共计 1047 个字符,预计需要花费 3 分钟才能阅读完成。
一、引入 selenium
from selenium import webdriver | |
from time import sleep | |
#from selenium.webdriver.chrome.options import Options | |
#import xlrd | |
import csv | |
import os | |
#固定 csv 保存在桌面 | |
os.chdir(r'C:\Users\Administrator\Desktop') |
二、打开网页
driver =webdriver.Firefox() | |
driver.get('https://movie.douban.com/top250') | |
driver.implicitly_wait(20) |
三、翻页、获取内容、写入 CSV
# 遍历循环 20 次 | |
for o in range (1,13): | |
#遍历循环 15 次 | |
sleep(0.5) | |
for i in range (1,26): | |
#获取标题和时间 | |
#拼接字符串 | |
data1 = driver.find_element_by_css_selector('.grid_view > li:nth-child('+str(i)+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > a:nth-child(1) > span:nth-child(1)').text | |
data2 = driver.find_element_by_css_selector('.grid_view > li:nth-child('+str(i)+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > p:nth-child(3) > span:nth-child(1)').text | |
print(data1, data2) | |
#写入 csv | |
with open('豆瓣.csv', 'a+', newline = '\n')as f: | |
w = csv.writer(f) | |
w.writerow([data1, data2]) | |
sleep(0.5) | |
#并进行点击翻页 | |
driver.find_element_by_css_selector('.next > a:nth-child(2)').click() | |
driver.quit() |
正文完
星哥玩云-微信公众号
