python爬虫示例-1

发布时间 2024-01-08 20:36:14作者: 右眼与明天
 1 import os 
 2 from bs4 import BeautifulSoup
 3 import requests as re 
 4 import time
 5 
 6 download_url_1= "https://umei.net/i/"
 7 # print(download_url[:-5])
 8 headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
 9            "Accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
10            "Cookie" : "__vtins__K0KLKO0fwudqZoqt=%7B%22sid%22%3A%20%2221d0b33c-10ad-5fad-9a4c-bbe843634ecb%22%2C%20%22vd%22%3A%204%2C%20%22stt%22%3A%20594413%2C%20%22dr%22%3A%20157663%2C%20%22expires%22%3A%201703747854223%2C%20%22ct%22%3A%201703746054223%7D; __51uvsct__K0KLKO0fwudqZoqt=1; __51vcke__K0KLKO0fwudqZoqt=b3006d56-5dd1-54d9-ad5d-2ea6cb1d13c1; __51vuft__K0KLKO0fwudqZoqt=1703745459812"}
11 
12 def image_path(path):
13     flag = True
14     while flag :
15         if not os.path.exists(path):
16             os.mkdir(path)
17             flag = False
18         else :
19             print('路径已经存在!,请重新输入')
20             flag=True
21             time.sleep(1)  #延时1秒
22             break
23     path = os.path.abspath(path) + "\\"
24     return path
25 
26 # ---------------------------------------------------
27 def download(path,download_url_1):
28     url = re.get(download_url_1,headers=headers,timeout=20)
29     url.encoding = url.apparent_encoding
30     img = BeautifulSoup(url.text,"html.parser")
31     image = []
32     li = img.find_all("li") 
33     for line in li :
34             tag = line.find_all("img")
35             # print(tag)
36             for lines in tag:
37                  if lines !='':
38                       image.append(lines)
39                  else:
40                       tag.remove(lines)
41     j = 0 
42     for i in range(1,len(image)):
43          if(j < len(image) and image[j].attrs['src'] != ''):
44             imgpath = str(i) +'.jpg'
45             download_url= image[j].attrs['data-original']
46             j+= 1
47             #下载图片
48             try:
49                  img_data = re.get(download_url)
50             except:
51                  continue
52             img_path = path + imgpath
53             with open(img_path,'w',encoding='utf-8') as f:
54                  f.write(img_data.content)
55             print('下载完成')
56             
57             #    print(f'图片下载路径{download_url}')
58 
59 
60 # ----------------------------------------------------
61 if __name__ == "__main__":
62     # Folder = input('请输入要创建的文件夹名:')
63     Folder = "background"
64     imgpth = image_path(Folder)
65     print(f'文件夹路径:{imgpth}')
66     download_url_1= "https://umei.net/i/"
67     download(imgpth,download_url_1)
68