【Python】转载一个python 爬虫的帖子

发布时间 2023-07-24 12:17:09作者: hayden_william

原帖地址

原帖标题:爬取图网的4K图片自动保存本地
https://www.52pojie.cn/thread-1809600-1-1.html
(出处: 吾爱破解论坛)

python 代码


import os.path
import random
import time

import requests
from lxml import etree
import webbrowser

webbrowser.open('https://pic.netbian.com')

ua = input("请输入浏览器的User-Agent,不会的请百度\n")

print('''例如:https://pic.netbian.com/4kmeinv
              https://pic.netbian.com/4kfengjing
              https://pic.netbian.com
''')
uuuu = input("请输入彼岸图的URL不带'/'\n")

headers = {
    'User-Agent': ua
}
url = uuuu

def huoqu(urll):
    url = urll
    # print(url)

    respones = requests.get(url, headers=headers)
    mg = respones.content.decode("gbk")

    tree = etree.HTML(mg)
    img_url = tree.xpath('//ul[@class="clearfix"]//li')
    # print(img_url)

    for a in img_url:
        c = a.xpath('./a//img/@alt')[0]
        img_mg = a.xpath('./a//img/@src')
        img_mgg = requests.get('https://pic.netbian.com' + ''.join(img_mg))

        if not os.path.exists('彼岸图网'):
            os.mkdir("彼岸图网")
        with open(f'彼岸图网/{c}.jpg', 'wb') as f:

            f.write(img_mgg.content)
            print(f'彼岸图网/{c}.jpg,保存成功')

def zongpage(url):
    url = url
    print(url)

    respones = requests.get(url, headers=headers)
    mg = respones.content.decode("gbk")

    tree = etree.HTML(mg)
    page = tree.xpath('//div[@class="page"]/a/text()')

    zongpage = page[-2]
    print('总共:' + zongpage + '页')
    return zongpage

zongpagee = zongpage(url)

for a in range(1, int(zongpagee)):

    if a == 1:
        huoqu(url)
    else:
        uu = f'{url}/index_{a}.html'
        print(uu)
        huoqu(uu)
        time.sleep(random.randint(2, 5))