下载全国城市空气质量历史数据
发布时间:2020-12-17 17:29:54 所属栏目:Python 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 ?import osimport shutilimport reimport urllib.requesthome = "http://www.tianqihoubao.com"def down2txt(code,tit,url): # 网页地址 page = urlli
以下代码由PHP站长网 52php.cn收集自互联网 现在PHP站长网小编把它分享给大家,仅供参考 ? import os import shutil import re import urllib.request home = "http://www.tianqihoubao.com" def down2txt(code,tit,url): # 网页地址 page = urllib.request.urlopen(url).read() try: page = page.decode("gbk") except: page = page.decode("utf-8") i_start = page.find("<h1>") i_end = page.find("</h1>") t = page[i_start : i_end] #tit = t.replace("rn","").replace("<h1>","").strip(" ") #print(tit) # 创建目录 if not os.path.exists(os.getcwd() + "/data/" + code): os.makedirs(os.getcwd() + "/data/" + code) # 文件存在则不下载 file = os.getcwd() + "/data/" + code + "/" + tit + ".txt" if os.path.exists(file): print("文件已存在:" + tit + ".txt") return # 截取表格文本 i_start = page.find('<table') i_end = page.find('</table>') page = page[i_start:i_end] i_start = page.find(">") page = page[i_start:] page = page.replace("rn","") #page = page.replace("r","").replace("r","") page = page.replace("</b>","").replace("<b>","") #page = page.replace(" ","").replace(" ","") re_c = re.compile(">(.+?)<") ls = re.findall(re_c,page) f = open(file,"w") i = 0 s = "" for l in ls: l = l.strip(" ") if l == "": continue s += l + " " if i == 8: #print(s) f.write(s + "n") i+=1 if i >= 9: i = 0 s = "" f.close() def down_city(name,code): url = home + "/aqi/" + code + ".html" print(url) page = urllib.request.urlopen(url).read() page = page.decode("gbk") ls = re.findall(re.compile("href='(/aqi/" + code + "-" + ".+?html)'"),page) for l in ls: url = home + l tit = l.replace("/aqi/","").replace(".html","") print(url) down2txt(code,url) #print(l) if __name__ == "__main__": url = home + "/aqi/" page = urllib.request.urlopen(url).read() try: page = page.decode("gbk") except: page = page.decode("utf-8") ls = re.findall(re.compile('href="/aqi/(.+?)</a>'),page) index = 0 for l in ls: try: ls2 = l.replace(" ","").replace('.html">'," ").strip(" ").split(" ") if len(ls2) == 2: index += 1 print( str(index) + "/" + str(len(ls)) + ": " + ls2[0] + " " + ls2[1]) b_down = False # 查找下载记录 if os.path.exists("dataindex.txt"): f = open("dataindex.txt","r") ls3 = f.readlines() f.close() for l3 in ls3: if l3.strip("n") == ls2[0] + " " + ls2[1]: print(ls2[1] + " 已下载") b_down = True break if b_down : continue down_city(ls2[1],ls2[0]) # 保存记录 f = open("dataindex.txt","a") f.write(ls2[0] + " " + ls2[1] + "n") f.close() except: print("error!") print("finished!") 以上内容由PHP站长网【52php.cn】收集整理供大家参考研究 如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |