加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 百科 > 正文

re – Regular Expressions 理解完正则学会一半

发布时间:2020-12-14 02:20:06 所属栏目:百科 来源:网络整理
导读:理解完正则学会一半: import redef test_patterns(text,patterns=[]): """Given source text and a list of patterns,look for matches for each pattern within the text and print them to stdout. """ print print ''.join(str(i/10 or ' ') for i in ra

理解完正则学会一半:

import re

def test_patterns(text,patterns=[]):
    """Given source text and a list of patterns,look for
    matches for each pattern within the text and print
    them to stdout.
    """
    print
    print ''.join(str(i/10 or ' ') for i in range(len(text)))
    print ''.join(str(i%10) for i in range(len(text)))
    print text

    # Look for each pattern in the text and print the results
    for pattern in patterns:
        print
        print 'Matching "%s"' % pattern
        for match in re.finditer(pattern,text):
            s = match.start()
            e = match.end()
            print '  %2d : %2d = "%s"' % 
                (s,e-1,text[s:e])
    return

if __name__ == '__main__':
    print "*"*50
    #Pattern Syntax
    test_patterns('abbaaabbbbaaaaa',['ab'])
    print "*"*50
    #Repetition
    test_patterns('abbaaabbbbaaaaa',[ 'ab*',# a followed by zero or more b
                'ab+',# a followed by one or more b
                'ab?',# a followed by zero or one b
                'ab{3}',# a followed by three b
                'ab{2,3}',# a followed by two to three b
                ])
    print "*"*50
    #Character Sets
    test_patterns('abbaaabbbbaaaaa',[ '[ab]',# either a or b
                'a[ab]+',# a followed by one or more a or b
                'a[ab]+?',# a followed by one or more a or b,not greedy
                ])
    print "*"*50
    test_patterns('This is some text -- with punctuation.',[ '[^-. ]+',# sequences without -,.,or space
                ])
    print "*"*50
    test_patterns('This is some text -- with punctuation.',[ '[a-z]+',# sequences of lower case letters
                '[A-Z]+',# sequences of upper case letters
                '[a-zA-Z]+',# sequences of lower or upper case letters
                '[A-Z][a-z]+',# one upper case letter followed by lower case letters
                ])
    print "*"*50
    test_patterns('abbaaabbbbaaaaa',[ 'a.',# a followed by any one character
                'b.',# b followed by any one character
                'a.*b',# a followed by anything,ending in b
                'a.*?b',ending in b
                ])
    print "*"*50
    #Escape Codes
    # Code  Meaning
    # d    a digit
    # D    a non-digit
    # s    whitespace (tab,space,newline,etc.)
    # S    non-whitespace
    # w    alphanumeric
    # W    non-alphanumeric
    test_patterns('This is a prime #1 example!',[ r'd+',# sequence of digits
                r'D+',# sequence of non-digits
                r's+',# sequence of whitespace
                r'S+',# sequence of non-whitespace
                r'w+',# alphanumeric characters
                r'W+',# non-alphanumeric
                ])
    print "*"*50
    test_patterns(r'd+ D+ s+ S+ w+ W+',[ r'd+',r'D+',r's+',r'S+',r'w+',r'W+',])
    #Anchoring
    # Code  Meaning
    # ^ start of string,or line
    # $ end of string,or line
    # A    start of string
    # Z    end of string
    # b    empty string at the beginning or end of a word
    # B    empty string not at the beginning or end of a word
    print "*"*50
    test_patterns('This is some text -- with punctuation.',[ r'^w+',# word at start of string
                r'Aw+',# word at start of string
                r'w+S*$',# word at end of string,with optional punctuation
                r'w+S*Z',with optional punctuation
                r'w*tw*',# word containing 't'
                r'btw+',# 't' at start of word
                r'w+tb',# 't' at end of word
                r'BtB',# 't',not start or end of word
                ])
输出结果:

**************************************************

          22222
012345678901234
abbaaabbbbaaaaa

Matching "ab"
   0 :  1 = "ab"
   5 :  6 = "ab"
**************************************************

          22222
012345678901234
abbaaabbbbaaaaa

Matching "ab*"
   0 :  2 = "abb"
   3 :  3 = "a"
   4 :  4 = "a"
   5 :  9 = "abbbb"
  10 : 10 = "a"
  11 : 11 = "a"
  12 : 12 = "a"
  13 : 13 = "a"
  14 : 14 = "a"

Matching "ab+"
   0 :  2 = "abb"
   5 :  9 = "abbbb"

Matching "ab?"
   0 :  1 = "ab"
   3 :  3 = "a"
   4 :  4 = "a"
   5 :  6 = "ab"
  10 : 10 = "a"
  11 : 11 = "a"
  12 : 12 = "a"
  13 : 13 = "a"
  14 : 14 = "a"

Matching "ab{3}"
   5 :  8 = "abbb"

Matching "ab{2,3}"
   0 :  2 = "abb"
   5 :  8 = "abbb"
**************************************************

          22222
012345678901234
abbaaabbbbaaaaa

Matching "[ab]"
   0 :  0 = "a"
   1 :  1 = "b"
   2 :  2 = "b"
   3 :  3 = "a"
   4 :  4 = "a"
   5 :  5 = "a"
   6 :  6 = "b"
   7 :  7 = "b"
   8 :  8 = "b"
   9 :  9 = "b"
  10 : 10 = "a"
  11 : 11 = "a"
  12 : 12 = "a"
  13 : 13 = "a"
  14 : 14 = "a"

Matching "a[ab]+"
   0 : 14 = "abbaaabbbbaaaaa"

Matching "a[ab]+?"
   0 :  1 = "ab"
   3 :  4 = "aa"
   5 :  6 = "ab"
  10 : 11 = "aa"
  12 : 13 = "aa"
**************************************************

          2222222222222222222233333333
01234567890123456789012345678901234567
This is some text -- with punctuation.

Matching "[^-. ]+"
   0 :  3 = "This"
   5 :  6 = "is"
   8 : 11 = "some"
  13 : 16 = "text"
  21 : 24 = "with"
  26 : 36 = "punctuation"
**************************************************

          2222222222222222222233333333
01234567890123456789012345678901234567
This is some text -- with punctuation.

Matching "[a-z]+"
   1 :  3 = "his"
   5 :  6 = "is"
   8 : 11 = "some"
  13 : 16 = "text"
  21 : 24 = "with"
  26 : 36 = "punctuation"

Matching "[A-Z]+"
   0 :  0 = "T"

Matching "[a-zA-Z]+"
   0 :  3 = "This"
   5 :  6 = "is"
   8 : 11 = "some"
  13 : 16 = "text"
  21 : 24 = "with"
  26 : 36 = "punctuation"

Matching "[A-Z][a-z]+"
   0 :  3 = "This"
**************************************************

          22222
012345678901234
abbaaabbbbaaaaa

Matching "a."
   0 :  1 = "ab"
   3 :  4 = "aa"
   5 :  6 = "ab"
  10 : 11 = "aa"
  12 : 13 = "aa"

Matching "b."
   1 :  2 = "bb"
   6 :  7 = "bb"
   8 :  9 = "bb"

Matching "a.*b"
   0 :  9 = "abbaaabbbb"

Matching "a.*?b"
   0 :  1 = "ab"
   3 :  6 = "aaab"
**************************************************

          22222222222222222
012345678901234567890123456
This is a prime #1 example!

Matching "d+"
  17 : 17 = "1"

Matching "D+"
   0 : 16 = "This is a prime #"
  18 : 26 = " example!"

Matching "s+"
   4 :  4 = " "
   7 :  7 = " "
   9 :  9 = " "
  15 : 15 = " "
  18 : 18 = " "

Matching "S+"
   0 :  3 = "This"
   5 :  6 = "is"
   8 :  8 = "a"
  10 : 14 = "prime"
  16 : 17 = "#1"
  19 : 26 = "example!"

Matching "w+"
   0 :  3 = "This"
   5 :  6 = "is"
   8 :  8 = "a"
  10 : 14 = "prime"
  17 : 17 = "1"
  19 : 25 = "example"

Matching "W+"
   4 :  4 = " "
   7 :  7 = " "
   9 :  9 = " "
  15 : 16 = " #"
  18 : 18 = " "
  26 : 26 = "!"
**************************************************

          2222222222222
01234567890123456789012
d+ D+ s+ S+ w+ W+

Matching "d+"
   0 :  2 = "d+"

Matching "D+"
   4 :  6 = "D+"

Matching "s+"
   8 : 10 = "s+"

Matching "S+"
  12 : 14 = "S+"

Matching "w+"
  16 : 18 = "w+"

Matching "W+"
  20 : 22 = "W+"
**************************************************

          2222222222222222222233333333
01234567890123456789012345678901234567
This is some text -- with punctuation.

Matching "^w+"
   0 :  3 = "This"

Matching "Aw+"
   0 :  3 = "This"

Matching "w+S*$"
  26 : 37 = "punctuation."

Matching "w+S*Z"
  26 : 37 = "punctuation."

Matching "w*tw*"
  13 : 16 = "text"
  21 : 24 = "with"
  26 : 36 = "punctuation"

Matching "btw+"
  13 : 16 = "text"

Matching "w+tb"
  13 : 16 = "text"

Matching "BtB"
  23 : 23 = "t"
  30 : 30 = "t"
  33 : 33 = "t"
待续...

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读