第 13 章 不用语言之间的正则表达式的比较

目录

13.1. 一些关于正则表达式中的比较

摘要

13.1. 一些关于正则表达式中的比较

[提示] 旧帖

此部分的内容的旧帖为:【总结】关于(C#和Python中的)正则表达式

下面对于各种语言之间的正则表达式的语法,进行简单的比较:

表 13.1. 不同语言间正则表达式写法的比较

  Python C# PHP Javascript Notepad++
Named Group (?P<groupName>xxx)1 (?<groupName>xxx)2      
正则表达式字符串的表示方法 r"(.+?)" @"(.+?)"      
向前匹配(prev match) (?<=xxx)3 (?<=xxx)4      
向后匹配(post match) (?=xxx)3 (?=xxx)4      
向前一定不要匹配(prev non-match) (?<!xxx)3 (?<!xxx)4      
向后一定不要匹配(post non-match) (?<=xxx)3 (?<=xxx)4      
           

1

示例代码为:


import re;

#------------------------------------------------------------------------------
def testBackReference():
    # back reference (?P=name) test
    backrefValidStr = '"group":0,"iconType":"NonEmptyDocumentFolder","id":"9A8B8BF501A38A36!601","itemType":32,"name":"released","ownerCid":"9A8B8BF501A38A36"';
    backrefInvalidStr = '"group":0,"iconType":"NonEmptyDocumentFolder","id":"9A8B8BF501A38A36!601","itemType":32,"name":"released","ownerCid":"987654321ABCDEFG"';
    backrefP = r'"group":\d+,"iconType":"\w+","id":"(?P<userId>\w+)!\d+","itemType":\d+,"name":".+?","ownerCid":"(?P=userId)"'
    userId = "";
     
    foundBackref = re.search(backrefP, backrefValidStr);
    print "foundBackref=",foundBackref; # foundBackref= <_sre.SRE_Match object at 0x02B96660>
    if(foundBackref):
        userId = foundBackref.group("userId");
        print "userId=",userId; # userId= 9A8B8BF501A38A36
        print "can found userId here";
         
    foundBackref = re.search(backrefP, backrefInvalidStr);
    print "foundBackref=",foundBackref; # foundBackref= None
    if(not foundBackref):
        print "can NOT found userId here";
     
    return ;

            

2

示例代码为:


using System.Text.RegularExpressions;

void testBackReference()
{
    // back reference \k<name> test
    string backrefValidStr = "\"group\":0,\"iconType\":\"NonEmptyDocumentFolder\",\"id\":\"9A8B8BF501A38A36!601\",\"itemType\":32,\"name\":\"released\",\"ownerCid\":\"9A8B8BF501A38A36\"";
    string backrefInvalidStr = "\"group\":0,\"iconType\":\"NonEmptyDocumentFolder\",\"id\":\"9A8B8BF501A38A36!601\",\"itemType\":32,\"name\":\"released\",\"ownerCid\":\"987654321ABCDEFG\"";
    string backrefP = @"""group"":\d+,""iconType"":""\w+"",""id"":""(?<userId>\w+)!\d+"",""itemType"":\d+,""name"":"".+?"",""ownerCid"":""\k<userId>""";
    string userId = "";
    Regex backrefValidRx = new Regex(backrefP);
    Match foundBackref;
 
    foundBackref = backrefValidRx.Match(backrefValidStr);
    if (foundBackref.Success)
    {
        // can found the userId
        userId = foundBackref.Groups["userId"].Value;
        MessageBox.Show("can found the userId !");
    }
 
    foundBackref = backrefValidRx.Match(backrefInvalidStr);
    if (foundBackref.Success)
    {
        // can NOT found the userId
    }
    else
    {
        MessageBox.Show("can NOT found the userId !");
    }    
}

            

3

示例代码为:


import re;

#------------------------------------------------------------------------------
def testPrevPostMatch():
    # post match:       (?=xxx)
    # post non-match:   (?!xxx)
    # prev match:       (?<=xxx)
    # prev non-match:   (?<!xxx)
     
    #note that input string is:
    #src=\"http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA\"
    qqPicUrlStr             = 'src=\\"http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA\\"';
    qqPicUrlInvalidPrevStr  = '1234567http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA\\"';
    qqPicUrlInvalidPostStr  = 'src=\\"http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA123';
    canFindPrevPostP = r'(?<=src=\\")(?P<qqPicUrl>http://.+?\.qq\.com.+?)(?=\\")';
    qqPicUrl = "";
 
    foundPrevPost = re.search(canFindPrevPostP, qqPicUrlStr);
    print "foundPrevPost=",foundPrevPost; #
    if(foundPrevPost):
        qqPicUrl = foundPrevPost.group("qqPicUrl");
        print "qqPicUrl=",qqPicUrl; # qqPicUrl= http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA
        print "can found qqPicUrl here";
 
    foundInvalidPrev = re.search(canFindPrevPostP, qqPicUrlInvalidPrevStr);
    print "foundInvalidPrev=",foundInvalidPrev; # foundInvalidPrev= None
    if(not foundInvalidPrev):
        print "can NOT found qqPicUrl here";
         
    foundInvalidPost = re.search(canFindPrevPostP, qqPicUrlInvalidPostStr);
    print "foundInvalidPost=",foundInvalidPost; # foundInvalidPost= None
    if(not foundInvalidPost):
        print "can NOT found qqPicUrl here";
 
    return ;

            

4

示例代码为:


using System.Text.RegularExpressions;

void testPrevPostMatch()
{
    //http://msdn.microsoft.com/en-us/library/bs2twtah(v=vs.71).aspx
    // post match:       (?=xxx)
    // post non-match:   (?!xxx)
    // prev match:       (?<=xxx)
    // prev non-match:   (?<!xxx)
     
    //src=\"http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA\"
    string qqPicUrlStr              = "src=\\\"http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA\\\"";
    string qqPicUrlInvalidPrevStr   = "12345678http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA\\\"";
    string qqPicUrlInvalidPostStr   = "src=\\\"http://b101.photo.store.qq.com/psb?/V10ppwxs00XiXU/5dbOIlYaLYVPWOz*1nHYeSFq09Z5rys72RIJszCsWV8!/b/YYUOOzy3HQAAYqsTPjz7HQAA1234";
    string canFindPrevPostP = @"(?<=src=\\"")(?<qqPicUrl>http://.+?\.qq\.com.+?)(?=\\"")";
    string qqPicUrl = "";
 
    Regex prevPostRx = new Regex(canFindPrevPostP);
 
    Match foundPrevPost = prevPostRx.Match(qqPicUrlStr);
    if (foundPrevPost.Success)
    {
        qqPicUrl = foundPrevPost.Groups["qqPicUrl"].Value;
        MessageBox.Show("can found the qqPicUrl !");
    }
 
    Match foundInvalidPrev = prevPostRx.Match(qqPicUrlInvalidPrevStr);
    if (!foundInvalidPrev.Success)
    {
        MessageBox.Show("can NOT found the qqPicUrl !");
    }
 
    Match foundInvalidPost = prevPostRx.Match(qqPicUrlInvalidPostStr);
    if (!foundInvalidPost.Success)
    {
        MessageBox.Show("can NOT found the qqPicUrl !");
    }
}