正则表达式
发布时间:2020-12-13 21:54:26 所属栏目:百科 来源:网络整理
导读:match search findall finditer re.match(pattern,string[,flags]):扫描字符串开头 re.search(): 扫描整个字符串,返回第一个匹配成功的结果 re.findall():返回所有结果的列表,失败返回空列表 finditer() 返回迭代器 失败返回空 flags 比如是否区分大小写
match search findall finditerre.match(pattern,string[,flags]):扫描字符串开头 import re tmp = 'http' print(re.match(tmp,'http://www.baidu.com')) # <_sre.SRE_Match object; span=(0,4),match='http'> print(re.match(tmp,'://wwwhttp.baidu.com')) # None print(re.match(tmp,'http://www.baidu.com').span()) # (0,4) print(re.search(tmp,'www.Http://baidu.com',re.I)) # <_sre.SRE_Match object; span=(4,8),match='Http'> print(re.findall(tmp,'http://www.http.com')) # ['http','http'] find = re.finditer(tmp,'http://HTTp.com0',re.I) for i in find: print(i) # <_sre.SRE_Match object; span=(0,match='http'> # <_sre.SRE_Match object; span=(7,11),match='HTTp'> 各种匹配符号
print(re.findall('.','http:n//')) # ['h','t','p',':','/','/'] print(re.findall('[12345]','qhchg461905nkj')) # ['4','1','5'] # 保留原始字符的R/r在正则中无效 print(re.findall(r'D','123qwr')) # ['q','w','r'] print(re.findall('S','123 gh n 78r')) # ['1','2','3','g','h','7','8']
# 看你是否nice开始 类似于match print(re.findall('^nice','to nice meet you')) # [] # 多行开头匹配 print(re.findall('^nice','nice to meet younnice to meet you',re.M)) # ['nice','nice'] # 是否是com结尾 print(re.findall('com$','http:baidu.com')) # ['com'] # b 本身是个转义字符,但在正则中也有特殊意义,在正则字符串前面加r # 将将转义字符的意义去掉 保留正则的意义 # 以空格有边界的有一个ce print(re.findall(r'ceb','nice to meet niceto meet')) # ['ce']
print(re.findall('[abc]','qwracb')) # ['a','c','b'] print(re.findall('(abc)','qwrabc')) # ['abc'] print(re.findall('a?','a')) # ['a',''] print(re.findall('(aa)?','aaab')) # ['aa','',''] print(re.findall('(aa)','aaaab')) # ['aa','aa'] print(re.findall('(aa)+','aaaab')) # 由于是贪婪匹配,相当于在最长的aaaa中找出: ['aa'] print(re.findall('aa+','aabaaaabaaaba')) # 最后一个a没 ['aa','aaaa','aaa'] print(re.findall('a*','abaabbaaabaaaabb')) # 匹配任意多个a # ['a','aa','aaa',''] print(re.findall('a{3}','aabaaacsdsaaa')) # ['aaa','aaa'] print(re.findall('a{3,}','aabbaaabbaaaa')) # ['aaa','aaaa'] print(re.findall('(good)','good--Good-good')) print(re.findall('good','good--Good-good')) # ['good','good'] print(re.findall('(good)|(Good)','good--Good')) # [('good',''),('','Good')] print(re.findall('good|Good','good--Good')) # ['good','Good'] 特殊元字符
print(re.findall('.','')) # [] print(re.findall('.*','')) # [''] r = 'who .* he' print(re.findall(r,'who is a girl he who is boy he ')) # ['who is a girl he who is boy he'] 贪婪 r = 'who .*? he' print(re.findall(r,'who is a girl he who is boy he who he')) # ['who is a girl he','who is boy he','who he'] 非贪婪 分组正则中有组的概念 tmp = 'd{3}-d{8}' str1 = '010-12345678' result = re.findall(tmp,str1) print(result) # ['010-12345678'] tmp2 ='(d{3})-(d{8})' # 用小括号括起来的内容为一组 result2 = re.findall(tmp2,str1) print(result2) # [('010','12345678')] tmp3 = '(d{3})-(d{8})' result3 = re.match(tmp3,str1) print(result3) # <_sre.SRE_Match object; span=(0,12),match='010-12345678'> # groups()可以查看匹配到的所有分组的情况 print(result3.groups()) # ('010','12345678') # group() 可以单独获取分组情况 # group(0) 一直代表匹配到的原始字符串 print(result3.group()) # 010-12345678 print(result3.group(0)) # 010-12345678 print(result3.group(1)) # 010 print(result3.group(2)) # 12345678 # 另起名称(?P<名称>) tmp4 = '(?P<love>d{3})-(?P<like>d{8})' result4 = re.match(tmp4,str1) print(result4.group(0)) print(result4.group(1)) print(result4.group(2)) print(result4.group('love')) 正则与字符串str1 = 'ac b c d e' print(str1.split()) # ['ac','b','d','e'] print(re.split(' +',str1)) # ['ac','e'] # 替换字符串 # re.sub(pattern,rep1,string) # pattern: 即将被替换的字符 # rep1:替换的字符 # count 替换次数 # subn 将结果和替换的次数放到一个元组了 res2 = re.sub(' +','*',str1) print(res2) # ac*b*c*d*e res2 = re.sub(' +',str1,count=2) print(res2) # ac*b*c d e res2 = re.subn(' +',str1) print(res2) # ('ac*b*c*d*e',4) compiletmp = 'd{3}-d{8}' str1 = '090-99999999' res = re.match(tmp,str1) print(res) tmp2 = 'd{3}-d{8}' str2 = '090-99999999' re_tmp2 = re.compile(tmp2) res2 = re_tmp2.match(str2) print(res2) # <_sre.SRE_Match object; span=(0,match='090-99999999'> 例子# 判断某一个字符串是不是11位,第一个是否是数字1 r= '^1d{10}$' # 1开头 # 10个数字结尾 print(re.findall(r,'12534568545')) # 前面是3个数字 后面八个数字 中间为- r= '^d{3}-d{8}$' # 123456@qq.com # ^.$ 在正则中有实际意义,如果想要作为普通字符使用需要使用转义字符 r= '^d{6}@qq.com' print(re.findall(r,'125565@qq.com')) (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |