python实现下载指定网址所有图片的方法
发布时间:2020-12-16 21:34:46 所属栏目:Python 来源:网络整理
导读:本篇章节讲解python实现下载指定网址所有图片的方法。供大家参考研究。具体实现方法如下: #coding=utf-8#download pictures of the url#useage: python downpicture.py www.baidu.comimport osimport sysfrom html.parser import HTMLParserfrom ur
本篇章节讲解python实现下载指定网址所有图片的方法。分享给大家供大家参考。具体实现方法如下: #coding=utf-8 #download pictures of the url #useage: python downpicture.py www.baidu.com import os import sys from html.parser import HTMLParser from urllib.request import urlopen from urllib.parse import urlparse def getpicname(path): ''' retrive filename of url ''' if os.path.splitext(path)[1] == '': return None pr=urlparse(path) path='http://'+pr[1]+pr[2] return os.path.split(path)[1] def saveimgto(path,urls): ''' save img of url to local path ''' if not os.path.isdir(path): print('path is invalid') sys.exit() else: for url in urls: of=open(os.path.join(path,getpicname(url)),'w+b') q=urlopen(url) of.write(q.read()) q.close() of.close() class myhtmlparser(HTMLParser): '''put all src of img into urls''' def __init__(self): HTMLParser.__init__(self) self.urls=list() self.num=0 def handle_starttag(self,tag,attr): if tag.lower() == 'img': srcs=[u[1] for u in attr if u[0].lower() == 'src'] self.urls.extend(srcs) self.num = self.num+1 if __name__ == '__main__': url=sys.argv[1] if not url.startswith('http://'): url='http://' + sys.argv[1] parseresult=urlparse(url) domain='http://' + parseresult[1] q=urlopen(url) content=q.read().decode('utf-8','ignore') q.close() myparser=myhtmlparser() myparser.feed(content) for u in myparser.urls: if (u.startswith('//')): myparser.urls[myparser.urls.index(u)]= 'http:'+u elif u.startswith('/'): myparser.urls[myparser.urls.index(u)]= domain+u saveimgto(r'D:pythonsong',myparser.urls) print('num of download pictures is {}'.format(myparser.num)) 运行结果如下: num of download pictures is 19 希望本文所述对大家的Python程序设计有所帮助。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |