点击查看代码
# Author:Jasy
# 爬取网易云音乐飙升榜歌曲
import requests
from lxml import etree
# https://music.163.com/song/media/outer/url?id= 外链地址
# 1、确定网址
# url里的#号是网页访问第一道,不是最终网页url,要去掉 原url https://music.163.com/#/discover/toplist?id=19723756
url = 'https://music.163.com/discover/toplist?id=19723756'
base_url = 'https://music.163.com/song/media/outer/url?id='
# 2.1添加UA
ua = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'}
# 2.2请求
html_str = requests.get(url,headers=ua).text
# 3、筛选数据 (使用xpath 适合xml,html,字符串的需要转换,就需要使用lxml工具,bs4现在用的少)
html = etree.HTML(html_str)
song_names = html.xpath('//a[contains(@href,"song?id=")]/text()') #xpath语法
song_ids = html.xpath('//a[contains(@href,"song?id=")]/@href')
for song_id,song_name in zip(song_ids,song_names):
song_id = song_id.strip('/song?id=') #用strip剥去song_id中的'/song?id='字符串
if '$' not in song_id: #去掉song_id中带$开头的字符串
song_url = base_url + song_id
print(song_url)
# 如果拿的是视频 音频 图片,用content
mp3 = requests.get(song_url,headers=ua).content
# 4、保存 注意绝对路径和相对路径(用./),我这里用的绝对路径
with open(f'E:/python爬虫练习/wangyiyun/{song_name}.mp3','wb') as f:
f.write(mp3)