python之秀人网图片下载

发布时间 2023-12-28 13:13:00作者: 萧海~

import requests
from lxml import etree
cookies = {
    '_pk_ref.2.90a9': '%5B%22%22%2C%22%22%2C1703739850%2C%22https%3A%2F%2Fwww.google.com.hk%2F%22%5D',
    '_pk_id.2.90a9': 'b87f72074fff4914.1703739850.',
    '_pk_ses.2.90a9': '1',
}

headers = {
    'authority': 'www.xiurenwang.cc',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'zh-CN,zh;q=0.9',
    'cache-control': 'no-cache',
    # Requests sorts cookies= alphabetically
    # 'cookie': '_pk_ref.2.90a9=%5B%22%22%2C%22%22%2C1703739850%2C%22https%3A%2F%2Fwww.google.com.hk%2F%22%5D; _pk_id.2.90a9=b87f72074fff4914.1703739850.; _pk_ses.2.90a9=1',
    'pragma': 'no-cache',
    'referer': 'https://www.xiurenwang.cc/bang',
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
}

response = requests.get('https://www.xiurenwang.cc/bang/page/2', cookies=cookies, headers=headers)
# print(response.text)
html=etree.HTML(response.text)
next_url=html.xpath('//div[@class="list"]/li/a[@class="img"]/@href')

for item in next_url:
    detail_url='https://www.xiurenwang.cc'+item

    response = requests.get(detail_url, cookies=cookies, headers=headers)
    html = etree.HTML(response.text)
    img_url = html.xpath('//div[@id="image"]/a/@href')
    print(img_url)