Python抓妹子图+多进程
发布时间:2020-12-17 17:26:44 所属栏目:Python 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 # -*- coding: utf-8 -*-"""Created on Fri Aug 07 17:30:58 2015@author: Dreace"""import urllib2import sysimport timeimport osimport randomfrom
以下代码由PHP站长网 52php.cn收集自互联网 现在PHP站长网小编把它分享给大家,仅供参考 # -*- coding: utf-8 -*- """ Created on Fri Aug 07 17:30:58 2015 @author: Dreace """ import urllib2 import sys import time import os import random from multiprocessing.dummy import Pool as ThreadPool type_ = sys.getfilesystemencoding() def rename(): return time.strftime("%Y%m%d%H%M%S") def rename_2(name): if len(name) == 2: name = '0' + name + '.jpg' elif len(name) == 1: name = '00' + name + '.jpg' else: name = name + '.jpg' return name def download_pic(i): global count global time_out if Filter(i): try: content = urllib2.urlopen(i,timeout = time_out) url_content = content.read() file_name = repr(random.randint(10000,999999999)) + "_" + rename_2(repr(count)) f = open(file_name,"wb") f.write(url_content) f.close() if os.path.getsize(file_name) >= 1024*11: count += 1 else: os.remove(file_name) except Exception,e: print e def Filter(content): for line in Filter_list: if content.find(line) == -1: return True def get_pic(url_address): global pic_list global time_out global headers try: req = urllib2.Request(url = url_address,headers = headers) str_ = urllib2.urlopen(req,timeout = time_out).read() url_content = str_.split("'") for i in url_content: if i.find(".jpg") != -1: pic_list.append(i) except Exception,e: print e MAX = 100 count = 0 time_out = 60 thread_num = 50 pic_list = [] page_list = [] pic_kind = ["hot","share","mm","taiwan","japan","model"] Filter_list = ["imgsize.ph.126.net","img.ph.126.net","img2.ph.126.net"] dir_name = "C:Photos"+rename() os.makedirs(dir_name) os.chdir(dir_name) start_time = time.time() url_address = "http://www.mzitu.com/model/page/" headers = {"User-Agent":" Mozilla/5.0 (Windows NT 10.0; rv:39.0) Gecko/20100101 Firefox/39.0"} for pic_i in pic_kind: for i in range(1,MAX + 1): page_list.append(url_address + pic_i + "/page/" + repr(i)) page_pool = ThreadPool(thread_num) page_pool.map(get_pic,page_list) page_pool.close() page_pool.join() print "获取到".decode("utf-8").encode(type_),len(pic_list),"张图片,开始下载!".decode("utf-8").encode(type_) pool = ThreadPool(thread_num) pool.map(download_pic,pic_list) pool.close() pool.join() print count,"张图片保存在".decode("utf-8").encode(type_) + dir_name print "共耗时".decode("utf-8").encode(type_),time.time() - start_time,"s" 以上内容由PHP站长网【52php.cn】收集整理供大家参考研究 如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |