re – Regular Expressions 理解完正则学会一半
发布时间:2020-12-14 02:20:06 所属栏目:百科 来源:网络整理
导读:理解完正则学会一半: import redef test_patterns(text,patterns=[]): """Given source text and a list of patterns,look for matches for each pattern within the text and print them to stdout. """ print print ''.join(str(i/10 or ' ') for i in ra
理解完正则学会一半:
import re def test_patterns(text,patterns=[]): """Given source text and a list of patterns,look for matches for each pattern within the text and print them to stdout. """ print print ''.join(str(i/10 or ' ') for i in range(len(text))) print ''.join(str(i%10) for i in range(len(text))) print text # Look for each pattern in the text and print the results for pattern in patterns: print print 'Matching "%s"' % pattern for match in re.finditer(pattern,text): s = match.start() e = match.end() print ' %2d : %2d = "%s"' % (s,e-1,text[s:e]) return if __name__ == '__main__': print "*"*50 #Pattern Syntax test_patterns('abbaaabbbbaaaaa',['ab']) print "*"*50 #Repetition test_patterns('abbaaabbbbaaaaa',[ 'ab*',# a followed by zero or more b 'ab+',# a followed by one or more b 'ab?',# a followed by zero or one b 'ab{3}',# a followed by three b 'ab{2,3}',# a followed by two to three b ]) print "*"*50 #Character Sets test_patterns('abbaaabbbbaaaaa',[ '[ab]',# either a or b 'a[ab]+',# a followed by one or more a or b 'a[ab]+?',# a followed by one or more a or b,not greedy ]) print "*"*50 test_patterns('This is some text -- with punctuation.',[ '[^-. ]+',# sequences without -,.,or space ]) print "*"*50 test_patterns('This is some text -- with punctuation.',[ '[a-z]+',# sequences of lower case letters '[A-Z]+',# sequences of upper case letters '[a-zA-Z]+',# sequences of lower or upper case letters '[A-Z][a-z]+',# one upper case letter followed by lower case letters ]) print "*"*50 test_patterns('abbaaabbbbaaaaa',[ 'a.',# a followed by any one character 'b.',# b followed by any one character 'a.*b',# a followed by anything,ending in b 'a.*?b',ending in b ]) print "*"*50 #Escape Codes # Code Meaning # d a digit # D a non-digit # s whitespace (tab,space,newline,etc.) # S non-whitespace # w alphanumeric # W non-alphanumeric test_patterns('This is a prime #1 example!',[ r'd+',# sequence of digits r'D+',# sequence of non-digits r's+',# sequence of whitespace r'S+',# sequence of non-whitespace r'w+',# alphanumeric characters r'W+',# non-alphanumeric ]) print "*"*50 test_patterns(r'd+ D+ s+ S+ w+ W+',[ r'd+',r'D+',r's+',r'S+',r'w+',r'W+',]) #Anchoring # Code Meaning # ^ start of string,or line # $ end of string,or line # A start of string # Z end of string # b empty string at the beginning or end of a word # B empty string not at the beginning or end of a word print "*"*50 test_patterns('This is some text -- with punctuation.',[ r'^w+',# word at start of string r'Aw+',# word at start of string r'w+S*$',# word at end of string,with optional punctuation r'w+S*Z',with optional punctuation r'w*tw*',# word containing 't' r'btw+',# 't' at start of word r'w+tb',# 't' at end of word r'BtB',# 't',not start or end of word ])输出结果:
************************************************** 22222 012345678901234 abbaaabbbbaaaaa Matching "ab" 0 : 1 = "ab" 5 : 6 = "ab" ************************************************** 22222 012345678901234 abbaaabbbbaaaaa Matching "ab*" 0 : 2 = "abb" 3 : 3 = "a" 4 : 4 = "a" 5 : 9 = "abbbb" 10 : 10 = "a" 11 : 11 = "a" 12 : 12 = "a" 13 : 13 = "a" 14 : 14 = "a" Matching "ab+" 0 : 2 = "abb" 5 : 9 = "abbbb" Matching "ab?" 0 : 1 = "ab" 3 : 3 = "a" 4 : 4 = "a" 5 : 6 = "ab" 10 : 10 = "a" 11 : 11 = "a" 12 : 12 = "a" 13 : 13 = "a" 14 : 14 = "a" Matching "ab{3}" 5 : 8 = "abbb" Matching "ab{2,3}" 0 : 2 = "abb" 5 : 8 = "abbb" ************************************************** 22222 012345678901234 abbaaabbbbaaaaa Matching "[ab]" 0 : 0 = "a" 1 : 1 = "b" 2 : 2 = "b" 3 : 3 = "a" 4 : 4 = "a" 5 : 5 = "a" 6 : 6 = "b" 7 : 7 = "b" 8 : 8 = "b" 9 : 9 = "b" 10 : 10 = "a" 11 : 11 = "a" 12 : 12 = "a" 13 : 13 = "a" 14 : 14 = "a" Matching "a[ab]+" 0 : 14 = "abbaaabbbbaaaaa" Matching "a[ab]+?" 0 : 1 = "ab" 3 : 4 = "aa" 5 : 6 = "ab" 10 : 11 = "aa" 12 : 13 = "aa" ************************************************** 2222222222222222222233333333 01234567890123456789012345678901234567 This is some text -- with punctuation. Matching "[^-. ]+" 0 : 3 = "This" 5 : 6 = "is" 8 : 11 = "some" 13 : 16 = "text" 21 : 24 = "with" 26 : 36 = "punctuation" ************************************************** 2222222222222222222233333333 01234567890123456789012345678901234567 This is some text -- with punctuation. Matching "[a-z]+" 1 : 3 = "his" 5 : 6 = "is" 8 : 11 = "some" 13 : 16 = "text" 21 : 24 = "with" 26 : 36 = "punctuation" Matching "[A-Z]+" 0 : 0 = "T" Matching "[a-zA-Z]+" 0 : 3 = "This" 5 : 6 = "is" 8 : 11 = "some" 13 : 16 = "text" 21 : 24 = "with" 26 : 36 = "punctuation" Matching "[A-Z][a-z]+" 0 : 3 = "This" ************************************************** 22222 012345678901234 abbaaabbbbaaaaa Matching "a." 0 : 1 = "ab" 3 : 4 = "aa" 5 : 6 = "ab" 10 : 11 = "aa" 12 : 13 = "aa" Matching "b." 1 : 2 = "bb" 6 : 7 = "bb" 8 : 9 = "bb" Matching "a.*b" 0 : 9 = "abbaaabbbb" Matching "a.*?b" 0 : 1 = "ab" 3 : 6 = "aaab" ************************************************** 22222222222222222 012345678901234567890123456 This is a prime #1 example! Matching "d+" 17 : 17 = "1" Matching "D+" 0 : 16 = "This is a prime #" 18 : 26 = " example!" Matching "s+" 4 : 4 = " " 7 : 7 = " " 9 : 9 = " " 15 : 15 = " " 18 : 18 = " " Matching "S+" 0 : 3 = "This" 5 : 6 = "is" 8 : 8 = "a" 10 : 14 = "prime" 16 : 17 = "#1" 19 : 26 = "example!" Matching "w+" 0 : 3 = "This" 5 : 6 = "is" 8 : 8 = "a" 10 : 14 = "prime" 17 : 17 = "1" 19 : 25 = "example" Matching "W+" 4 : 4 = " " 7 : 7 = " " 9 : 9 = " " 15 : 16 = " #" 18 : 18 = " " 26 : 26 = "!" ************************************************** 2222222222222 01234567890123456789012 d+ D+ s+ S+ w+ W+ Matching "d+" 0 : 2 = "d+" Matching "D+" 4 : 6 = "D+" Matching "s+" 8 : 10 = "s+" Matching "S+" 12 : 14 = "S+" Matching "w+" 16 : 18 = "w+" Matching "W+" 20 : 22 = "W+" ************************************************** 2222222222222222222233333333 01234567890123456789012345678901234567 This is some text -- with punctuation. Matching "^w+" 0 : 3 = "This" Matching "Aw+" 0 : 3 = "This" Matching "w+S*$" 26 : 37 = "punctuation." Matching "w+S*Z" 26 : 37 = "punctuation." Matching "w*tw*" 13 : 16 = "text" 21 : 24 = "with" 26 : 36 = "punctuation" Matching "btw+" 13 : 16 = "text" Matching "w+tb" 13 : 16 = "text" Matching "BtB" 23 : 23 = "t" 30 : 30 = "t" 33 : 33 = "t"待续... (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |