抓取简单游全站数据
发布时间:2020-12-17 17:18:57 所属栏目:Python 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 #!/usr/bin/python# -*- coding:utf-8 -*-from public import gethtml2from dbconfig_waigua import *from scrapy.selector import Selectorimport My
以下代码由PHP站长网 52php.cn收集自互联网 现在PHP站长网小编把它分享给大家,仅供参考 #!/usr/bin/python # -*- coding:utf-8 -*- from public import gethtml2 from dbconfig_waigua import * from scrapy.selector import Selector import MySQLdb,MySQLdb.cursors,datetime,re,inspect class getGames: conn = None db = None def __init__(self): self.conn = MySQLdb.connect (db=dbname,user=dbuser,passwd=dbpasswd,host=dbhost,charset="utf8",cursorclass = MySQLdb.cursors.DictCursor) self.db = self.conn.cursor() def saveGame(self,name): self.db.execute('select id from fz_games where name = %s',(name)) item = self.db.fetchone() if not item: self.db.execute('insert into fz_games (name,updated) values (%s,%s)',(name,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) self.conn.commit() return self.db.lastrowid else: return item['id'] def savefuzhu(self,gametype,title,info,updated): self.db.execute('select id from fz_games where name = %s',(gametype)) item = self.db.fetchone() if item: gameid = item['id'] else: self.db.execute('insert into fz_games (name,(gametype,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) self.conn.commit() gameid = self.db.lastrowid self.db.execute('select id from fz_name where name = %s',(title)) item = self.db.fetchone() if not item: self.db.execute('insert into fz_name (gameid,name,updated) values (%s,%s,(gameid,updated)) self.conn.commit() #解析简单游脚本 def parsejdy(self,url): html = unicode(gethtml2(url).read(),'gbk','ignore') sel = Selector(text = html) tmp = sel.xpath('//meta[@name="keywords"]/@content').extract()[0] arr = tmp.split(',') gametype = arr[0] title = arr[1] updated = sel.xpath('//div[@class="detailall"]/div[@class="syall"][position() =2]/div[@class="rights"]/text()').extract()[0] info = sel.xpath('//div[@class="abstract"][position()=1]/text()').extract()[0] print updated.strip().encode('utf-8'),gametype.strip().encode('utf- 8'),title.strip().encode('utf-8'),info.encode('utf-8') return (gametype.strip(),title.strip(),info.strip(),updated.strip()) def _getgamelistjdy(self,url): html = gethtml2(url).read() sel = Selector(text = html) items = sel.xpath('//a[contains(@href,"scripts")]/@href').extract() for item in items: item = self.parsejdy('http://www.jdyou.com/'+item[2:]) self.savefuzhu(item[0],item[1],item[2],item[3]) #简单游全站解析 def getgamefromjdy(self): #得到全部游戏列表 html = unicode(gethtml2('http://www.jdyou.com/').read(),'ignore') sel = Selector(text = html) items = sel.xpath('//div[@class="clearfixs menulist"]/a[contains (@href,"ScriptList")]/@href').extract() for item in items: try: self._getgamelistjdy('http://www.jdyou.com/'+item) except: pass if __name__ == "__main__": g = getGames() g.getgamefromjdy() 以上内容由PHP站长网【52php.cn】收集整理供大家参考研究 如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |