python爬取（自动化）豆瓣电影影评，并存储。

发布时间：2020-12-20 10:43:27 所属栏目：Python 来源：网络整理

导读：from selenium import webdriverfrom selenium.webdriver import ActionChainsimport timedriver = webdriver.Chrome(r‘C:Program Files (x86)GoogleChromeApplicationchromedriver.exe‘) #自动化侧是驱动程序本地所纺地址driver.get(‘https://accou

from selenium import webdriverfrom selenium.webdriver import ActionChainsimport timedriver = webdriver.Chrome(r‘C:Program Files (x86)GoogleChromeApplicationchromedriver.exe‘) #自动化侧是驱动程序本地所纺地址driver.get(‘https://accounts.douban.com/passport/login?source=movie‘)#豆瓣电影登录口time.sleep(4)#创建事件对象action = ActionChains(driver)#获取目标元素login = driver.find_element_by_class_name(‘account-tab-account‘)#执行运行action.click(login).perform()username = driver.find_element_by_name(‘username‘)password = driver.find_element_by_name(‘password‘)username.send_keys(‘**‘) #写入自己的用户名，自己更改password.send_keys(‘*‘) #写入自己的密码#获取登录按钮loginbtn = driver.find_element_by_link_text(‘登录豆瓣‘)#执行运行action.click(loginbtn).perform()time.sleep(5)urls = ["https://movie.douban.com/subject/26794435/comments?start=%s&limit=20&sort=new_score&status=P"%i for i in range(0,481,20)]index = 0for url in urls: index+=1 driver.get(url) time.sleep(3) data = driver.page_source with open("./temple/%s.html"%index,"w",encoding=‘utf-8‘) as f: f.write(data) time.sleep(3) with open(‘./评论/评论.text‘,‘a‘,encoding=‘utf-8‘) as h: read = driver.find_elements_by_class_name("short") for j in range(0,len(read)): h.write(‘‘.join(read[j].text).strip().replace(‘n‘,‘‘)) print(‘‘.join(read[j].text).strip().replace(‘n‘,‘‘)) time.sleep(3)driver.close()

（编辑：李大同）

【声明】本站内容均来自网络，其相关言论仅代表作者个人观点，不代表本站立场。若无意侵犯到您的权利，请及时与联系站长删除相关内容!