python根据京东商品url获取产品价格
发布时间:2020-12-16 21:34:43 所属栏目:Python 来源:网络整理
导读:京东商品详细的请求处理,是先显示html,然后再ajax请求处理显示价格。 1.可以运行js,并解析之后得到的html 2.模拟js请求,得到价格 # -*- coding: utf-8 -*-"""根据京东url地址,获取商品价格京东请求处理过程,先显示html页面,然后通过ajax get请求获取
京东商品详细的请求处理,是先显示html,然后再ajax请求处理显示价格。 # -*- coding: utf-8 -*- """ 根据京东url地址,获取商品价格 京东请求处理过程,先显示html页面,然后通过ajax get请求获取相应的商品价格 1.商品的具体数据在html中的格式,如下(示例) # product: { # skuid: 1310118868,# name: 'u9999u5f71u77edu88d9u4e24u4ef6u5957u88c5u5973u0032u0030u0031u0034u51acu88c5u65b0u6b3eu97e9u7248u957fu8896u0054u6064u4e0au8863u8377u53f6u8fb9u534au8eabu88d9u6f6eu0020u85cfu9752u0020u004d',# skuidkey:'7781F505B71CE37A3AFBADA119D3587F',# href: 'http://item.jd.com/1310118868.html',# src: 'jfs/t385/197/414081450/336886/3070537b/541be890N2995990c.jpg',# cat: [1315,1343,1355],# brand: 18247,# nBrand: 18247,# tips: false,# type: 2,# venderId:38824,# shopId:'36786',# TJ:'0',# specialAttrs:["is7ToReturn-1"],# videoPath:'',# HM:'0' # } 2.ajax请求代码如下: # // 获得数字价格 # var getPriceNum = function(skus,$wrap,perfix,callback) { # skus = typeof skus === 'string' ? [skus]: skus; # $wrap = $wrap || $('body'); # perfix = perfix || 'J-p-'; # $.ajax({ # url: 'http://p.3.cn/prices/mgets?skuIds=J_' + skus.join(',J_') + '&type=1',# dataType: 'jsonp',# success: function (r) { # if (!r && !r.length) { # return false; # } # for (var i = 0; i < r.length; i++) { # var sku = r[i].id.replace('J_',''); # var price = parseFloat(r[i].p,10); # # if (price > 0) { # $wrap.find('.'+ perfix + sku).html('¥' + r[i].p + ''); # } else { # $wrap.find('.'+ perfix + sku).html('暂无报价'); # } # # if ( typeof callback === 'function' ) { # callback(sku,price,r); # } # } # } # }); # }; """ import urllib import json import re class JdPrice(object): """ 对获取京东商品价格进行简单封装 """ def __init__(self,url): self.url = url self._response = urllib.urlopen(self.url) self.html = self._response.read() def get_product(self): """ 获取html中,商品的描述(未对数据进行详细处理,粗略的返回str类型) :return: """ product_re = re.compile(r'compatible: true,(.*?)};',re.S) product_info = re.findall(product_re,self.html)[0] return product_info def get_product_skuid(self): """ 通过获取的商品信息,获取商品的skuid :return: """ product_info = self.get_product() skuid_re = re.compile(r'skuid: (.*?),') skuid = re.findall(skuid_re,product_info)[0] return skuid def get_product_name(self): pass def get_product_price(self): """ 根据商品的skuid信息,请求获得商品price :return: """ price = None skuid = self.get_product_skuid() url = 'http://p.3.cn/prices/mgets?skuIds=J_' + skuid + '&type=1' price_json = json.load(urllib.urlopen(url))[0] if price_json['p']: price = price_json['p'] return price # 测试代码 if __name__ == '__main__': url = 'http://item.jd.com/1310118868.html' url = 'http://item.jd.com/1044773.html' jp = JdPrice(url) print jp.get_product_price() # htm.decode('gb2312','ignore').encode('utf-8') # f = open('jjs.html','w') # f.write(htm) # f.close() 再给大家分享一个京东价格的爬虫: fromcreepyimportCrawler fromBeautifulSoupimportBeautifulSoup importurllib2 importjson classMyCrawler(Crawler): defprocess_document(self,doc): ifdoc.status==200: print[%d]%s%(doc.status,doc.url) try: soup=BeautifulSoup(doc.text.decode(gb18030).encode(utf-8)) exceptExceptionase: printe soup=BeautifulSoup(doc.text) printsoup.find(id="product-intro").div.h1.text url_id=urllib2.unquote(doc.url).decode(utf8).split(/)[-1].split(.)[0] f=urllib2.urlopen(http://p.3.cn/prices/get?skuid=J_+url_id,timeout=5) price=json.loads(f.read()) f.close() printprice[0][p] else: pass crawler=MyCrawler() crawler.set_follow_mode(Crawler.F_SAME_HOST) crawler.set_concurrency_level(16) crawler.add_url_filter(.(jpg|jpeg|gif|png|js|css|swf)$) crawler.crawl(http://item.jd.com/982040.html) (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |