多线程结合队列下载百度音乐
发布时间:2020-12-17 17:22:07 所属栏目:Python 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 #!/usr/bin/python# -*- coding: utf-8 -*-''' 百度中批量下载某歌手的歌(目前只下载第一页,可以自行拓展) @author:admin @qq: 1243385033'''import
以下代码由PHP站长网 52php.cn收集自互联网 现在PHP站长网小编把它分享给大家,仅供参考 #!/usr/bin/python # -*- coding: utf-8 -*- ''' 百度中批量下载某歌手的歌(目前只下载第一页,可以自行拓展) @author:admin @qq: 1243385033 ''' import threading,urllib2,os,re,sys from bs4 import BeautifulSoup from Queue import Queue '''目标歌手''' SINGER = u'亚东' '''保存路径''' SAVE_FOLDER = 'F:/music/' # 查询url search_url = "http://music.baidu.com/search/song?key=%s&s=1" # 百度音乐播放盒url song_url = "http://box.zhangmen.baidu.com/x?op=12&count=1&mtype=1&title=" class Downloader(threading.Thread): def __init__(self,task): threading.Thread.__init__(self) self.task = task def run(self): '''覆盖父类的run方法''' while True: url = self.task.get() self.download(url) self.task.task_done() def build_path(self,filename): join = os.path.join parentPath=join(SAVE_FOLDER,SINGER) filename = filename + '.mp3' myPath = join(parentPath,filename) return myPath def download(self,url): '''下载文件''' sub_url = url.items() f_name = sub_url[0][0] req_url = sub_url[0][1] handle = urllib2.urlopen(req_url) # 保存路径 save_path = self.build_path(f_name) with open(save_path,"wb") as handler: while True: chunk = handle.read(1024) if not chunk: break handler.write(chunk) msg = u"已经从 %s下载完成" % req_url sys.stdout.write(msg) sys.stdout.flush() class HttpRequest: def __init__(self): self.task = [] self.reg_decode = re.compile('<decode>.*?CDATA[(.*?)]].*?</decode>') self.reg_encode = re.compile('<encode>.*?CDATA[(.*?)]].*?</encode>') self.init() self.target_url = search_url % urllib2.quote(self.encode2utf8(SINGER)) def encode2utf8(self,source): if source and isinstance(source,(str,unicode)): source=source.encode("utf8") return source return source def mkDir(self,dir_name): if not os.path.exists(dir_name): os.mkdir(dir_name) def init(self): self.mkDir(SAVE_FOLDER) subPath = os.path.join(SAVE_FOLDER,SINGER) self.mkDir(subPath) def http_request(self): global song_url '''发起请求''' response=urllib2.urlopen(self.target_url) # 获取头信息 content = response.read() response.close() # 使用BeautifulSoup html = BeautifulSoup(content,from_encoding="utf8") # 提取HTML标签 span_tag = html.find_all('div',{"monkey":"song-list"})[0].find_all('span',class_='song-title') # 遍历List for a_tag in span_tag: song_name = unicode(a_tag.find_all("a")[0].get_text()) song_url = song_url + urllib2.quote(self.encode2utf8(song_name)) song_url = song_url + '$$' + urllib2.quote(self.encode2utf8(SINGER)) + '$$$$&url=&listenreelect=0&.r=0.1696378872729838' xmlfile = urllib2.urlopen(song_url) xml_content = xmlfile.read() xmlfile.close() url1 = re.findall(self.reg_encode,xml_content) url2 = re.findall(self.reg_decode,xml_content) if not url1 or not url2: continue url = url1[0][:url1[0].rindex('/') + 1] + url2[0] self.task.append({song_name:url}) return self.task def start_download(urls): #创建一个队列 quene=Queue() #获取list的大小 size=len(urls) #开启线程 for _ in xrange(size): t=Downloader(quene) t.setDaemon(True) t.start() #入队列 for url in urls: quene.put(url) quene.join() if __name__=='__main__': http=HttpRequest() urls=http.http_request() start_download(urls) 以上内容由PHP站长网【52php.cn】收集整理供大家参考研究 如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |