爬虫爬取网页图片《滕王阁序》文徵明 行草

发布时间 2023-09-14 17:55:54作者: 嘿十三

python爬取网页图片

import urllib.request  # python自带的爬操作url的库
import re # 正则表达式

def getImage(url):
headers = {
'User-Agent': 'Mozilla/5.0(linux; android 6.0; Nexus 5 Build/MRA58N) \
AppleWebKit/537.36(KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36'
}
url = urllib.request.Request(url, headers=headers)
page = urllib.request.urlopen(url).read()
page = page.decode('UTF-8')

imageList = re.findall(r'(https://img[^\s]*?)"', page)
x = 0
for imageUrl in imageList[:20]:
try:
print('正在下载: %s' % imageUrl)
image_save_path = 'D:/Users/Desktop/tmp/imagebugliang/%d.jpg' % x
# 下载图片并且保存到指定文件夹中
urllib.request.urlretrieve(imageUrl, image_save_path)
x = x + 1
except:
continue


if __name__ == '__main__':
url = "https://www.zmkm8.com/article-7194-1.html"
getImage(url)