7.6 爬虫基础知识学习 requests的使用

发布时间 2023-07-06 21:17:31作者: ranbo145

1. requests的快速使用 

/1 爬虫定义:可见即可爬

/2 安装resquests模块

正确路径下输入 pip install requests

/3 用requests发送get请求

import requests
# res是响应对象 就是http响应 python包装成了对象(响应头 ,响应体等)
res = requests.get('https://www.cnblogs.com/abc683871/')
print(res.text) # 响应体转为字符串

/4 用requests发送携带参数的get请求

# res是响应对象 就是http响应 python包装成了对象(响应头 ,响应体等)
res = requests.get('https://www.cnblogs.com/abc683871/',params={'name':'jack'}) # 利用params参数传一个字典会自动把他拼接到url后面
res = requests.get('https://www.baidu.com/s?wd=%E5%A1%9E%E5%B0%94%E8%BE%BE%E4%BC%A0%E8%AF%B4',params={'name':'jack'})
print(res.url)

/5 url的编码和解码

from urllib.parse import quote,unquote
# 解码
print(unquote('%E5%A1%9E%E5%B0%94%E8%BE%BE%E4%BC%A0%E8%AF%B4'))
# 编码
print(quote('王国之泪'))

 / 6 携带请求头发送get请求

# 请求头中正常有哪些东西:1 user-Agent:客户端类型 2 referer;上一个页面的地址 3 cookie
header={
'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
}
res = requests.get('https://dig.chouti.com/',headers=header) # 携带请求头中的User-Agent
print(res.text)

/7 发送post请求

header={
'Referer':
'http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F',
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
}
data={
'username': '616564099@qq.com',
'password': 'lqz123',
'captcha': '1111',
'remember': 1,
'ref': 'http://www.aa7a.cn/',
'act': 'act_login',
}
# 两种编码方式
# 编码方式是urlencoded
res = requests.post('http://www.aa7a.cn/user.php',headers=header,data=data)
# 编码方式是json
res = requests.post('http://www.aa7a.cn/user.php',headers=header,json=data)
print(res.text)

 /8 获取登录成功的cookie

cookie = res.cookies
print(cookie)

 方式一:

另外创建字段携带cookie发送post请求
res1 = requests.get('http://www.aa7a.cn/',cookies=cookie)
print('616564099@qq.com' in res1.text) # 判断账号是否在页面中

 方式二:

在请求头中携带cookie发送post请求

header={
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'Cookie':
'deviceId=web.eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiJhNTA5NzE0YS1hYTNmLTQzMDItODgzYy00YjM5MzcxZDExMDUiLCJleHBpcmUiOiIxNjkxMjM2ODQyMzAzIn0.StoeBF6lSJKrGGnUvzSgzEqg5Lgj12mUqy8R10itVPM; Hm_lvt_03b2668f8e8699e91d479d62bc7630f1=1688644843; gdxidpyhxdE=bz4yL%5C90nH9u1PHGl0STR%2Bxo6%2B%5CqTYWugpLZKpWh5wgsvYnQ3pzf8UHev2bSYH4WOk%2B%5CMRVRKgX%2FgWR5QEbxzNSTMC9p4%5CiVEkmRd24VDvM0gqc25Qey13h%2BepaJZjQnqDp%5C4d6gyIYu992Abc8vcD8WYNRdEUGG7uEXT0rDc8xHBsqI%3A1688648636718; YD00000980905869%3AWM_NI=nLEqy1%2Fuqjh3pGpEPH3Yqzhc7k73Uz%2BR2yHA6Bz1tk5pxLeicd440md3V7lCLEql9yM%2BPK4j0Zws5pBWkYZzoZ2U8%2FK71LdgJc5A1E9vrD%2FfymE3hPtf9dJZmeaQL%2F1eS2w%3D; YD00000980905869%3AWM_NIKE=9ca17ae2e6ffcda170e2e6ee90e1509ab9afa2d63abbbc8ba3d45f969f9e83d13bf3a6aaa8f5219b8c9aafce2af0fea7c3b92aae948b92fc3bedabbf8beb39b499c0a6d344f187bf9beb69f5b5f9d4d872e98efe9acb4bf69eb6d2f761f7b0b8ade863a1bcfcd5b367a8ed9ad8ef49909bbf8dbc738ff5a0acb762f486b994b87c8289a5bad972ad86feb6bb2589e9ffade76d8abdfba2b44ffbaabea5ae6ebbb38bd4eb5cb8baf785c952afbbafb2cf25f3bb9dd3f637e2a3; YD00000980905869%3AWM_TID=I%2BxMlFVQaPRFAQBUVFaVwhzbXu%2BJGQdb; token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiJjZHVfNTMyMDcwNzg0NjAiLCJleHBpcmUiOiIxNjkxMjM5Nzc0ODk3In0.rFLp1jS4okBUVpvsiL8EN-Of2ZwCTyaLBGqyxjaCYUw; Hm_lpvt_03b2668f8e8699e91d479d62bc7630f1=1688647927'
}
data={
    'linkId':39201026
}
res = requests.post('https://dig.chouti.com/link/vote',headers=header,data=data)
print(res.text)