爬取青年大学习

发布时间 2023-04-24 20:20:20作者: 0x1e61
import requests
from lxml import etree

url = 'http://news.cyol.com/gb/channels/vrGlAKDl/index.html'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.49'
}

resp = requests.get(url=url,headers=headers)
# print(resp.text)

# 解析
tree = etree.HTML(resp.text)
# 最新一集的在第一个li[1]里面。
li = 1
href = str(tree.xpath(f'//*[@class="movie-list"]/li[{li}]/a/@href')[0])
# https://h5.cyol.com/special/daxuexi/fox4w2cd4t/m.html
image_id = href.split('/')[5]
# image_id = fox4w2cd4t
# print(image_id)
# https://h5.cyol.com/special/daxuexi/fr3np2q0vg/images/end.jpg
images_url ="https://h5.cyol.com/special/daxuexi/" + image_id + "/images/end.jpg"
# print(images_url)
image_resp = requests.get(images_url)
# print(image_resp)
file_name = '青年大学习.jpg'
with open(file_name ,'wb') as f:
    f.write(image_resp.content)
    print('保存成功!')