同步重庆时时彩的数据到本地数据库
发布时间:2020-12-17 17:28:16 所属栏目:Python 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 #!/usr/bin/env python#-*-coding:utf-8-*-""" 从 cp.360.cn 同步时时彩的数据到表 haoma"""import datetimefrom datetime import timedeltaimport ti
以下代码由PHP站长网 52php.cn收集自互联网 现在PHP站长网小编把它分享给大家,仅供参考 #!/usr/bin/env python #-*-coding:utf-8-*- """ 从 cp.360.cn 同步时时彩的数据到表 haoma """ import datetime from datetime import timedelta import time import torndb import sys import requests import re from mylogger import get_logger reload(sys) sys.setdefaultencoding('utf-8') DBHOST = "localhost:3306" SCHEMA = "CAIPIAO" DBUSER = "user" DBPASSWD = "passwd" db = torndb.Connection(host=DBHOST,database=SCHEMA,user=DBUSER,password=DBPASSWD) cplog = get_logger("caipiao") class Data_Sync(object): ssc_re = re.compile(r'<td class='gray'>(.*?)</td>(<td class='red big'>|<td style='width:65px'>)(.*?)</td>.*?<tr>') def __init__(self,start_date="20150101",sleep_secs = 10,run_ever=True): self.start_date = start_date if start_date > "20130101" else "20150101" self.run_ever = run_ever self.base_url = "http://chart.cp.360.cn/kaijiang/kaijiang?lotId=255401&spanType=2&span=" self.latest_date = '' self.latest_period = '' self.need_sleep = False self.sleep_secs = sleep_secs def run(self): while True: if self.need_sleep: time.sleep(self.sleep_secs) self.need_sleep = False else: self.sync_data_from_360() def sync_data_from_360(self): """ 根据数据库中最新一条数据,从 cp.360.com 同步数据至最新数据 """ self.get_latest_haoma_from_mysql() if not self.latest_date: cplog.info("db has no data,so start at {0}".format(self.start_date)) self.latest_date = self.start_date self.latest_period = "000" cplog.info("in db,item_date={0},period={1}".format(self.latest_date,self.latest_period)) if self.latest_date: cur_date = datetime.datetime.utcnow() + timedelta(hours=8) latest_date = datetime.datetime.strptime(self.latest_date,"%Y%m%d") """ 更新规则: 1、检查是否同一天,如果不是,就下载数据,执行步骤2,增加天数,直到数据库日期与当前日一致; 2、检查数据库中的期数与下载回来的数据的最新期是否一致,一致,检查日期是否一致,是就跳过,否则插入数据; """ dl_times = 0 while (cur_date - latest_date).days > 0: if int(self.latest_period) < 120: dl_date = latest_date.strftime("%Y-%m-%d") dl_url = self.base_url + dl_date + "_" + dl_date data = self.download_with_requests(dl_url) if not data: if dl_times < 3: dl_times += 1 time.sleep(2) continue else: latest_date += timedelta(1) continue dl_times = 0 self.latest_date = latest_date.strftime('%Y%m%d') lottery_numbers = data[int(self.latest_period):] self.insert_into_mysql(self.latest_date,lottery_numbers) latest_date += timedelta(1) else: latest_date += timedelta(1) self.latest_period = "000" """ 更新当日数据 """ dl_date = latest_date.strftime("%Y-%m-%d") dl_url = self.base_url + dl_date + "_" + dl_date data = self.download_with_requests(dl_url) if data: lottery_numbers = data[int(self.latest_period):] self.latest_date = latest_date.strftime('%Y%m%d') self.insert_into_mysql(self.latest_date,lottery_numbers) def insert_into_mysql(self,item_date,datas): insert_datas = [] for data in datas: period = data[0] date_period = item_date + period lottery_number = data[2] if not re.search('d+',lottery_number): continue a,b,c,d,e = list(lottery_number) insert_data = (item_date,period,date_period,lottery_number,a,e) insert_datas.append(insert_data) if insert_datas: cplog.info("current insert into haoma:{0},{1}".format(item_date,datas)) sql = "insert into haoma(item_date,e) values(%s,%s,%s)" try: db.executemany(sql,insert_datas) except Exception as e: print e sys.exit(1) else: cplog.info("no more new data to sync,wait for {0} seconds".format(self.sleep_secs)) self.need_sleep = True def get_latest_haoma_from_mysql(self): sql = "select * from haoma order by date_period desc limit 1" ret = db.get(sql) if ret: self.latest_date = ret.item_date self.latest_period = ret.period def download_with_requests(self,url): cplog.info("download: {0}".format(url)) data = [] try: r = requests.get(url,timeout=10) if r.status_code == 200: data = self.ssc_re.findall(r.content) else: cplog.info("download err,http status_code:{0}".format(r.status_code)) except Exception as e: cplog.info("call requests raise Exception: {0}".format(e)) finally: return data def run(): sync = Data_Sync(start_date="20140101",sleep_secs=30) sync.run() if __name__ == "__main__": run() 以上内容由PHP站长网【52php.cn】收集整理供大家参考研究 如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |