正则表达式的使用
发布时间:2020-12-13 22:10:59 所属栏目:百科 来源:网络整理
导读:2016-2-27 常用符号 #-*-coding:utf8-*- import re # from re import findall,search,S print ( 'hello world' )secret_code = 'feafefexxixx23fe23xxlovexxafeifvaxxyouxx32fe' a = 'xz123' b = re.findall( 'x' , a) # . 类似于占位符 print (b)a = 'xyxy1
2016-2-27
常用符号
#-*-coding:utf8-*- import re # from re import findall,search,S print('hello world') secret_code = 'feafefexxixx23fe23xxlovexxafeifvaxxyouxx32fe' a = 'xz123' b = re.findall('x',a) # .类似于占位符 print(b) a = 'xyxy123' # *显示x的位置 b = re.findall('x*'print(b) b = re.findall('x?'print(b) # 。*贪心查找最多的在之间 b = re.findall('xx.*xx'?最多种的组合 c = re.findall('xx.*?xx'print(c) 需要的放在()里,不需要的放在()外 d = re.findall('xx(.*?)xx'print(d) for each in d: print(each) s = '''sdfxxhello xxfsdfxxworldxxasdf''' 第二行的xx变成开始的,换行符也是 e = re.findall(re.S) # S hellon world print(e) 对比search和findall的使用 s2 = 'asdfxxixx123xxlovexxdfd' # group 代表了()的个数 f = re.search('xx(.*?)xx123xx(.*?)xx'2) print(f) f2 = re.search(#print(f2[0][1]) #sub s = '123abcssfasdfas123' # 吧123之间的字符换成789 output = re.sub('123(.*?)123''123%d123'%789print(output) # (d+)匹配数字 a = 'asdfasf1234567fasd55fas' b = re.findall('(d+)'print(b) import re old_url = 'http://www.pythontab.com/html/2013/pythonhexinbiancheng001.html' total_page = 20 f = open('test.txt''r') html = f.read() f.close() # search 爬到符合的地方就会停下 # findall 则会遍历整个文档 title = re.search('<title>(.*?)</title>'re.S).group(1) print(title) links = re.findall('href="(.*?)"'in links: print(each) 先抓大在抓小 # text_field = re.findall('<ul>(.*?)</ul>',html,re.S) # the_text = re.findall('') 翻页 for i in range(21): new_link = re.sub('pythonhexinbiancheng00d+''pythonhexinbiancheng00%d'%ire.S) print(new_link) (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |