加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 百科 > 正文

Ajax 请求分析抓取百度图片

发布时间:2020-12-16 02:45:11 所属栏目:百科 来源:网络整理
导读:import requests from urllib.parse import urlencode from multiprocessing import Pool # 开启多进程 from requests.exceptions import RequestException # import re import json from hashlib import md5 def page_get(url): try : html = requests.get(
import requests
from urllib.parse import urlencode
from multiprocessing import Pool#开启多进程
from requests.exceptions import RequestException #  
import re
import json
from hashlib import md5
def page_get(url):
    try:
        html = requests.get(url)
        if html.status_code == 200:
            return html.text
        return None
    except RequestException:
        print(请求失败)
        return None

def page_html(pn):
    data = {
        tn: resultjson_com,ipn: rj,ct: 201326592,is: ‘‘,fp: result,queryWord: 清晰图片,cl: 2,lm: -1,ie: utf-8,oe: utf-8,adpicid: ‘‘,st: -1,z: 0,ic: 0,hd: 0,latest: 0,copyright: 0,word: 清晰图片,s: ‘‘,se: ‘‘,tab: ‘‘,width: 1920,height: 1080,face: ‘‘,istype: ‘‘,qc: ‘‘,nc: 1,fr: ‘‘,expermode: ‘‘,force: ‘‘,pn: pn,rn: 30,gsm: 1e,1561179768452: ‘‘
    }
    url = https://image.baidu.com/search/acjson?+ urlencode(data)
    html = page_get(url)
    for item in page_re(html):
        print(item)
def page_re(html):
    srt = re.compile(.*?fromPageTitle":"(.*?)",.*?thumbURL":"(.*?)",.*?middleURL":"(.*?)",.*?hoverURL":"(.*?)",,re.S)
    srt = re.findall(srt,html)
    for item in srt:
        wrire_to(item[1])
        yield {
            名称:item[0],img:item[1]
        }
def wrire_to(url):
    try:
        html = requests.get(url)
        if html.status_code == 200:
            asve_img(html.content)
        return None
    except RequestException:
        print(请求失败)
        return None
def asve_img(content):
    file_path = {0}/{1}.{2}.format(rC:UsersAdministratorDesktopimg,md5(content).hexdigest(),jpg)
    # if not os.path.exists(file_path):
    with open(file_path,wb) as f:
        f.write(content)
        f.close()
def main():
    # for i in range(10):
    #     page_html(i*30)
    pool = Pool()
    pool.map(page_html,[i*30 for i in range(10)])
    # html = page_html(30)

if __name__ == __main__:
    main()

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读