加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 编程开发 > Python > 正文

抓取简单游全站数据

发布时间:2020-12-17 17:18:57 所属栏目:Python 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 #!/usr/bin/python# -*- coding:utf-8 -*-from public import gethtml2from dbconfig_waigua import *from scrapy.selector import Selectorimport My

以下代码由PHP站长网 52php.cn收集自互联网

现在PHP站长网小编把它分享给大家,仅供参考

#!/usr/bin/python
# -*- coding:utf-8 -*-
from public import gethtml2
from dbconfig_waigua import *
from scrapy.selector import Selector
import MySQLdb,MySQLdb.cursors,datetime,re,inspect

class getGames:

	conn = None
	db = None

	def __init__(self):
		self.conn = MySQLdb.connect

(db=dbname,user=dbuser,passwd=dbpasswd,host=dbhost,charset="utf8",cursorclass = 

MySQLdb.cursors.DictCursor)
		self.db = self.conn.cursor()


	def saveGame(self,name):
		self.db.execute('select id from fz_games where name = %s',(name))
		item = self.db.fetchone()
		if not item:
			self.db.execute('insert into fz_games (name,updated) values (%s,%s)',(name,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) 
			self.conn.commit()
			return self.db.lastrowid
		else:
			return item['id']


	def savefuzhu(self,gametype,title,info,updated):
		self.db.execute('select id from fz_games where name = %s',(gametype))
		item = self.db.fetchone()
		if item:
			gameid = item['id']
		else:
			self.db.execute('insert into fz_games (name,(gametype,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) 
			self.conn.commit()
			gameid = self.db.lastrowid

		self.db.execute('select id from fz_name where name = %s',(title))
		item = self.db.fetchone()
		if not item:
			self.db.execute('insert into fz_name (gameid,name,updated) values 

(%s,%s,(gameid,updated))  
			self.conn.commit()



	#解析简单游脚本
	def parsejdy(self,url):
		html = unicode(gethtml2(url).read(),'gbk','ignore')
		sel = Selector(text = html)
		tmp = sel.xpath('//meta[@name="keywords"]/@content').extract()[0]
		arr = tmp.split(',')
		gametype = arr[0]
		title = arr[1]
		updated = sel.xpath('//div[@class="detailall"]/div[@class="syall"][position()

=2]/div[@class="rights"]/text()').extract()[0]
		info = sel.xpath('//div[@class="abstract"][position()=1]/text()').extract()[0]
		print updated.strip().encode('utf-8'),gametype.strip().encode('utf-

8'),title.strip().encode('utf-8'),info.encode('utf-8')
		return (gametype.strip(),title.strip(),info.strip(),updated.strip())


	def _getgamelistjdy(self,url):
		html = gethtml2(url).read()
		sel = Selector(text = html)
		items = sel.xpath('//a[contains(@href,"scripts")]/@href').extract()
		for item in items:
			item = self.parsejdy('http://www.jdyou.com/'+item[2:])
			self.savefuzhu(item[0],item[1],item[2],item[3])

	#简单游全站解析
	def getgamefromjdy(self):
		#得到全部游戏列表
		html = unicode(gethtml2('http://www.jdyou.com/').read(),'ignore')
		sel = Selector(text = html)
		items = sel.xpath('//div[@class="clearfixs menulist"]/a[contains

(@href,"ScriptList")]/@href').extract()
		for item in items:
			try:
				self._getgamelistjdy('http://www.jdyou.com/'+item)
			except:
				pass


if __name__ == "__main__":

	g = getGames()
	g.getgamefromjdy()

以上内容由PHP站长网【52php.cn】收集整理供大家参考研究

如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读