python爬取电影演员一生拍摄电影的数量

发布时间 2023-05-21 20:14:30作者: YE-
import requests
from bs4 import BeautifulSoup
import random

url = "https://www.imdb.com/search/name/?gender=male,female"

# 向IMDb发送请求并获取响应
response = requests.get(url)

# 使用BeautifulSoup解析响应内容
soup = BeautifulSoup(response.text, "html.parser")

# 找到所有演员姓名的元素
names = soup.find_all("h3", class_="lister-item-header")

# 创建字典来存储演员姓名和电影数量
actors = {}

# 遍历所有的演员姓名元素,并将它们添加到演员字典中
for name in names:
    actor_name = name.find("a").text.strip()
    actor_link = "https://www.imdb.com" + name.find("a").get("href")
    actors[actor_name] = actor_link

# 从演员字典中随机选择五个演员
random_actors = random.sample(list(actors.keys()), 5)

# 获取每个演员的电影数量并添加到演员字典中
for actor in random_actors:
    actor_link = actors[actor]
    response = requests.get(actor_link)
    soup = BeautifulSoup(response.text, "html.parser")
    movie_num = soup.find("div", class_="desc").find("span").text.strip() if soup.find("div", class_="desc") else None
    actors[actor] = int(movie_num.replace(",", "")) if movie_num is not None else None

# 将含有None值的项去除
actors = {k:v for k,v in actors.items() if v is not None}

# 按演员电影数量降序排列
sorted_actors = sorted(actors.items(), key=lambda x: x[1], reverse=True)

# 打印排名结果
print("五位随机选取的演员及其生涯电影数量:")
for i, (actor, num) in enumerate(sorted_actors):
    print(f"{i+1}. {actor}: {num}")

运行测试截图