1. requests的快速使用
/1 爬虫定义:可见即可爬
/2 安装resquests模块
正确路径下输入 pip install requests
/3 用requests发送get请求
import requests
# res是响应对象 就是http响应 python包装成了对象(响应头 ,响应体等)
res = requests.get('https://www.cnblogs.com/abc683871/')
print(res.text) # 响应体转为字符串
/4 用requests发送携带参数的get请求
# res是响应对象 就是http响应 python包装成了对象(响应头 ,响应体等)
res = requests.get('https://www.cnblogs.com/abc683871/',params={'name':'jack'}) # 利用params参数传一个字典会自动把他拼接到url后面
res = requests.get('https://www.baidu.com/s?wd=%E5%A1%9E%E5%B0%94%E8%BE%BE%E4%BC%A0%E8%AF%B4',params={'name':'jack'})
print(res.url)
/5 url的编码和解码
from urllib.parse import quote,unquote
# 解码
print(unquote('%E5%A1%9E%E5%B0%94%E8%BE%BE%E4%BC%A0%E8%AF%B4'))
# 编码
print(quote('王国之泪'))
/ 6 携带请求头发送get请求
# 请求头中正常有哪些东西:1 user-Agent:客户端类型 2 referer;上一个页面的地址 3 cookie
header={
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
}
res = requests.get('https://dig.chouti.com/',headers=header) # 携带请求头中的User-Agent
print(res.text)
/7 发送post请求
header={
'Referer':
'http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F',
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
}
data={
'username': '616564099@qq.com',
'password': 'lqz123',
'captcha': '1111',
'remember': 1,
'ref': 'http://www.aa7a.cn/',
'act': 'act_login',
}
# 两种编码方式
# 编码方式是urlencoded
res = requests.post('http://www.aa7a.cn/user.php',headers=header,data=data)
# 编码方式是json
res = requests.post('http://www.aa7a.cn/user.php',headers=header,json=data)
print(res.text)
/8 获取登录成功的cookie
cookie = res.cookies
print(cookie)
方式一:
另外创建字段携带cookie发送post请求
res1 = requests.get('http://www.aa7a.cn/',cookies=cookie)
print('616564099@qq.com' in res1.text) # 判断账号是否在页面中
方式二:
在请求头中携带cookie发送post请求
header={
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Cookie':
'deviceId=web.eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiJhNTA5NzE0YS1hYTNmLTQzMDItODgzYy00YjM5MzcxZDExMDUiLCJleHBpcmUiOiIxNjkxMjM2ODQyMzAzIn0.StoeBF6lSJKrGGnUvzSgzEqg5Lgj12mUqy8R10itVPM; Hm_lvt_03b2668f8e8699e91d479d62bc7630f1=1688644843; gdxidpyhxdE=bz4yL%5C90nH9u1PHGl0STR%2Bxo6%2B%5CqTYWugpLZKpWh5wgsvYnQ3pzf8UHev2bSYH4WOk%2B%5CMRVRKgX%2FgWR5QEbxzNSTMC9p4%5CiVEkmRd24VDvM0gqc25Qey13h%2BepaJZjQnqDp%5C4d6gyIYu992Abc8vcD8WYNRdEUGG7uEXT0rDc8xHBsqI%3A1688648636718; YD00000980905869%3AWM_NI=nLEqy1%2Fuqjh3pGpEPH3Yqzhc7k73Uz%2BR2yHA6Bz1tk5pxLeicd440md3V7lCLEql9yM%2BPK4j0Zws5pBWkYZzoZ2U8%2FK71LdgJc5A1E9vrD%2FfymE3hPtf9dJZmeaQL%2F1eS2w%3D; YD00000980905869%3AWM_NIKE=9ca17ae2e6ffcda170e2e6ee90e1509ab9afa2d63abbbc8ba3d45f969f9e83d13bf3a6aaa8f5219b8c9aafce2af0fea7c3b92aae948b92fc3bedabbf8beb39b499c0a6d344f187bf9beb69f5b5f9d4d872e98efe9acb4bf69eb6d2f761f7b0b8ade863a1bcfcd5b367a8ed9ad8ef49909bbf8dbc738ff5a0acb762f486b994b87c8289a5bad972ad86feb6bb2589e9ffade76d8abdfba2b44ffbaabea5ae6ebbb38bd4eb5cb8baf785c952afbbafb2cf25f3bb9dd3f637e2a3; YD00000980905869%3AWM_TID=I%2BxMlFVQaPRFAQBUVFaVwhzbXu%2BJGQdb; token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiJjZHVfNTMyMDcwNzg0NjAiLCJleHBpcmUiOiIxNjkxMjM5Nzc0ODk3In0.rFLp1jS4okBUVpvsiL8EN-Of2ZwCTyaLBGqyxjaCYUw; Hm_lpvt_03b2668f8e8699e91d479d62bc7630f1=1688647927'
}
data={
'linkId':39201026
}
res = requests.post('https://dig.chouti.com/link/vote',headers=header,data=data)
print(res.text)