Ajax 请求分析抓取百度图片
发布时间:2020-12-16 02:45:11 所属栏目:百科 来源:网络整理
导读:import requests from urllib.parse import urlencode from multiprocessing import Pool # 开启多进程 from requests.exceptions import RequestException # import re import json from hashlib import md5 def page_get(url): try : html = requests.get(
import requests from urllib.parse import urlencode from multiprocessing import Pool#开启多进程 from requests.exceptions import RequestException # import re import json from hashlib import md5 def page_get(url): try: html = requests.get(url) if html.status_code == 200: return html.text return None except RequestException: print(‘请求失败‘) return None def page_html(pn): data = { ‘tn‘: ‘resultjson_com‘,‘ipn‘: ‘rj‘,‘ct‘: 201326592,‘is‘: ‘‘,‘fp‘: ‘result‘,‘queryWord‘: ‘清晰图片‘,‘cl‘: 2,‘lm‘: -1,‘ie‘: ‘utf-8‘,‘oe‘: ‘utf-8‘,‘adpicid‘: ‘‘,‘st‘: -1,‘z‘: 0,‘ic‘: 0,‘hd‘: 0,‘latest‘: 0,‘copyright‘: 0,‘word‘: ‘清晰图片‘,‘s‘: ‘‘,‘se‘: ‘‘,‘tab‘: ‘‘,‘width‘: 1920,‘height‘: 1080,‘face‘: ‘‘,‘istype‘: ‘‘,‘qc‘: ‘‘,‘nc‘: 1,‘fr‘: ‘‘,‘expermode‘: ‘‘,‘force‘: ‘‘,‘pn‘: pn,‘rn‘: 30,‘gsm‘: ‘1e‘,‘1561179768452‘: ‘‘ } url = ‘https://image.baidu.com/search/acjson?‘+ urlencode(data) html = page_get(url) for item in page_re(html): print(item) def page_re(html): srt = re.compile(‘.*?fromPageTitle":"(.*?)",.*?thumbURL":"(.*?)",.*?middleURL":"(.*?)",.*?hoverURL":"(.*?)",‘,re.S) srt = re.findall(srt,html) for item in srt: wrire_to(item[1]) yield { ‘名称‘:item[0],‘img‘:item[1] } def wrire_to(url): try: html = requests.get(url) if html.status_code == 200: asve_img(html.content) return None except RequestException: print(‘请求失败‘) return None def asve_img(content): file_path = ‘{0}/{1}.{2}‘.format(r‘C:UsersAdministratorDesktopimg‘,md5(content).hexdigest(),‘jpg‘) # if not os.path.exists(file_path): with open(file_path,‘wb‘) as f: f.write(content) f.close() def main(): # for i in range(10): # page_html(i*30) pool = Pool() pool.map(page_html,[i*30 for i in range(10)]) # html = page_html(30) if __name__ == ‘__main__‘: main() (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |