# 爬取百度电影排行
import requests
from lxml import etree
url = "https://top.baidu.com/board?tab=movie"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
}
page_text = requests.get(url=url,headers=headers).text
# 对爬取的数据进行解析
tree = etree.HTML(page_text)
fp = open("paihang.txt","w",encoding="utf-8")
div_list = tree.xpath('//div[@class="container-bg_lQ801"]/div[2]/div')
i = 0
for div in div_list:
i += 1
title = div.xpath('./div[@class="content_1YWBm"]/a/div[1]/text()')[0].strip()
print(f"{i}.{title}")
fp.write(f"{i}.{title}"+"\n")
fp.close()
print("ok")
文章评论