加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 编程开发 > Python > 正文

下载全国城市经纬度

发布时间:2020-12-17 17:27:55 所属栏目:Python 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 ?import reimport osimport shutilimport urllib.requesthome = "http://jingwei.supfree.net"def find_txt(file,s): b_find = False if os.path.exis

以下代码由PHP站长网 52php.cn收集自互联网

现在PHP站长网小编把它分享给大家,仅供参考

?
import re
import os
import shutil
import urllib.request

home = "http://jingwei.supfree.net"

def find_txt(file,s):
    b_find = False
    if os.path.exists(file):
        f = open(file,"r")
        ls_txt = f.readlines()
        f.close()     
        
        for l in ls_txt:
            if l.find(s) >= 0:
                b_find = True
                break
    return b_find

def append_txt(file,s):
    f = open(file,"a")
    f.write(s)
    f.close()    

def getjw(p2):
    url = home + "/" + p2
    page = urllib.request.urlopen(url).read()
    try:
        page = page.decode("gb2312")   
    except:
        try:
            page = page.decode("gbk")
        except:
            page = page.decode("utf-8")
    ls = re.findall(re.compile('botitle18">(.+?)<'),page)
    if len(ls) == 2:
        return ls[0].strip(" "),ls[1].strip(" ")


def dwon_city(s_I,s_II,p):
    file = "jingwei.txt"
    url = home + "/" + p
    page = urllib.request.urlopen(url).read()
    try:
        page = page.decode("gb2312")   
    except:
        try:
            page = page.decode("gbk")
        except:
            page = page.decode("utf-8")
    
    ls1 = re.findall(re.compile('href="(mengzi.asp.+?)"'),page)
    ls2 = re.findall(re.compile('经纬度">(.+?)</a'),page)
    #print(len(ls1))
    #print(len(ls2))
    for i in range(len(ls1)):
        #print("        " + ls2[i] + " " + ls1[i])
        if not find_txt(file,s_I + " " + s_II + " " + ls2[i]):
            (j,w) = getjw(ls1[i])
            print("    " + ls2[i] + " " + j + " " + w)
            s3 = s_I + " " + s_II + " " + ls2[i] + " " + j + " " + w + "n"
            append_txt(file,s3)
        else:
            print("    " + ls2[i] + " 已存在")


if __name__ == "__main__":
    #try:
    url = home
    page = urllib.request.urlopen(url).read()
    page = page.decode("gb2312")
    
    page = page.replace("rn","")
    page = page.replace(" ","")
    ls = re.findall(re.compile('class="bredbotitle14">(.+?)</a><'),page)
    print(len(ls))
    file = "log.txt"
    
    for l in ls:
        i_s = l.find("<")
        s_I = l[:i_s]
        print(s_I)
        if find_txt(file,s_I):
            continue
        
        s2 = l[i_s:] + "<"
        ls2 = re.findall(re.compile('href="(.+?)<'),s2)
        for l2 in ls2:
            l2 = l2.replace('">'," ")
            ls3 = l2.split(" ")
            #if l2 == "|" or l2.find(">") > 0:
            #    continue
            if len(ls3) != 2:
                continue
            print("  " + ls3[1] + " " + ls3[0])
            s_II = ls3[1]
            if s_II.find("?") >= 0:
                continue
            
            dwon_city(s_I,ls3[0])
        append_txt(file,s_I + "n")
    #except:
    #    print("error!")
    print("finished!")


以上内容由PHP站长网【52php.cn】收集整理供大家参考研究

如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读