requests模块基本使用

发布时间 2023-12-12 16:36:58作者: 秃头不爱学

1.requests模块基本使用

  1.1 使用requests发送get请求

import requests
# res 响应对象,http响应,python包装成了对象,响应头,响应头。。。在res中都会有
res=requests.get('https://www.cnblogs.com/Hao12345/p/17661461.html')
print(res.text)     #res.text------->响应体

  1.2 get请求携带参数

import requests
res=requests.get('https://www.cnblogs.com/Hao12345/p/17661461.html',params={'name':"ydh","age":19})
#params:拼接url地址
print(res.url)      #res.url:https://www.cnblogs.com/Hao12345/p/17661461.html?name=ydh&age=19

  1.3 编码和解码

from urllib.parse import quote,unquote
# quote------->编码 unquote----->解码
print(unquote('%E5%B8%85%E5%93%A5')) print(quote("帅哥")) # %E5%B8%85%E5%93%A5

  1.4 get请求携带请求头

import requests
"""
常见的请求头:
User-Agent:客户端类型
referer:上一个页面url
cookie:本地终端上的数据
"""

headers={
"User-Agent":"Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Mobile Safari/537.36 Edg/118.0.0.0"
}
res=requests.get('https://dig.chouti.com/',headers=headers)
print(res.text)

   1.5 使用post请求携带数据获取cookies登陆

import requests
#发送post请求
headers = {
    "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46",
    "Referer": "http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F"
}
data = {'username': "2022213049@qq.com",
        "password": "hao09148079",
        "captcha": " kv6n",
        "remember": " 1",
        "ref": "http://www.aa7a.cn/",
        "act": "act_login"}
res = requests.post('http://www.aa7a.cn/user.php', data=data, headers=headers)

# 登陆成功获取cookies
cookies=res.cookies
#get请求携带cookies
res_get=requests.get('http://www.aa7a.cn/',cookies=cookies)
print(res_get.text)

  1.6携带cookie的两种方式:

import requests
# 方式1:携带在请求头中
headers = {
    "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46",
    "Cookie":"deviceId=web.eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiIyNGZkMzY0MS04Zjg2LTRiM2ItOGNhMC0zMTQ2MjhjYjk2YTEiLCJleHBpcmUiOiIxNzAwOTg4ODIwNTcxIn0.8JFzEL_xSn2HtSbVG54g_e6mOsNaEouO2ESk6rWuHVU; Hm_lvt_03b2668f8e8699e91d479d62bc7630f1=1698396823; __snaker__id=t6eAqJjjUPoZmCI4; gdxidpyhxdE=0fqbq3Vt78ByCEtiN%2F6v%2FA%2F4XTCsqP55A%2Bq2iA%5Cqx0%2BTDTPfR7xEY84cNJ9Ac5KMBMMvLz8KZU7GeV2qH74nrEjS5PwzAdwoSaEh4biwmzwEsr%5CcYzDADVEfCA2e4Y3VRrS4Yy54B%2Fevlavi%2B8%5CDPd%5CqAh6M8i7Yno7Q%2B64ekcSqBaRl%3A1698397753185; YD00000980905869%3AWM_NI=%2FBZjrPmHDbdrQvsBw2R1hZRyhG10%2BaBM5lsXpkVV99G7AeVr48aG37t4Go2%2FKYLTQg7gzg0jPL7k4uMF5b2RMFc3jgDzHhm3qVICve9XTmZv8RJTqjP2RvUlp8BMYU%2FMS1U%3D; YD00000980905869%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eed1e944a5f0f890e821b3eb8aa7d14b838a8facd839afe79b84f979f295a5b9d92af0fea7c3b92aed9e968bf15caf988c83e77a93aabda3f63a92efa8bbd173ac99ffd1aa6b8293fb8df254b4b59ab7bc3385e98f86d57af8e8afa8b23db8ec8990b125898784b5d46df5b9b688b3629494f98cb77f8f9afb83b15a8f9aadd4e96d88a68889f039b38a87a4f56ffb889c8ed97efc8e86b5f46294ee9e97d868b4e88eaedc5fed939dd2d837e2a3; YD00000980905869%3AWM_TID=46M6MDMPrOdFRRFVABbUmisUY2sjLhGJ; token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiJjdHVfNjk4Mzk2OTE2MDgiLCJleHBpcmUiOiIxNzAwOTg4OTE2MjM5In0.WzH1t9AAoy2Q91YhERM4AU1YZqMf6TP_N4FigkRwNko; Hm_lpvt_03b2668f8e8699e91d479d62bc7630f1=1698396920"
}
data={"linkId": "40419683"}
res=requests.post('https://dig.chouti.com/link/vote',headers=headers,data=data)
print(res.text)

#方式2:get请求携带cookies
headers = {
    "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46",
    "Referer": "http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F"
}
data = {'username': "2022213049@qq.com",
        "password": "hao09148079",
        "captcha": " kv6n",
        "remember": " 1",
        "ref": "http://www.aa7a.cn/",
        "act": "act_login"}
res = requests.post('http://www.aa7a.cn/user.php', data=data, headers=headers)

# 登陆成功获取cookies
cookies=res.cookies
#get请求携带cookies
res_get=requests.get('http://www.aa7a.cn/',cookies=cookies)
print(res_get.text)

2.requests模块的进阶用法:

  2.1 自动携带cookie 的session对象

import requests
# 使用session发送请求
session=requests.session()
headers = {
    "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46",
    "Referer": "http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F"
}
data = {'username': "2022213049@qq.com",
        "password": "hao09148079",
        "captcha": " kv6n",
        "remember": " 1",
        "ref": "http://www.aa7a.cn/",
        "act": "act_login"}
res = session.post('http://www.aa7a.cn/user.php', data=data, headers=headers)   
res_get=session.get('http://www.aa7a.cn/')   # 使用session发请求后,不再需要携带cookies了
print(res_get.text)

  2.2 响应Response

import requests
res=requests.get('http://www.aa7a.cn/')
print(res.text)    #-------------->响应体,默认为utf-8形式
print(res.content)  #--------------->响应体的bytes格式
print(res.cookies)   #------------>cookies
print(res.status_code)  #---------->响应状态码
print(res.headers)    #---------->响应头
print(res.cookies.get_dict())   #----------->cookies转为字典形式
print(res.cookies.items())   #----------->cookies转为字典键值对
print(res.url)    #------->请求地址
print(res.history)  #-------->访问一个地址,如果重定向了,history会有没重定向的地址
print(res.encoding)  #-------->网页源码

3.爬虫小案例:   

  3.1 爬取图片

1 import requests
2 
3 res = requests.get('https://ts4.cn.mm.bing.net/th?id=OIP-C.WOiZvWcRE0EhFei1CzT_twHaNK&w=187&h=333&c=8&rs=1&qlt=90&o=6&dpr=1.3&pid=3.1&rm=2')
4 with open('美女.png', 'wb') as f:
5     f.write(res.content)

  3.2 爬取肯德基餐厅地址

import requests, json
data = 'cname=&pid=&keyword=%E5%91%A8%E6%B5%A6&pageIndex=1&pageSize=10'
headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
           'Cookie': 'route-cell=ksa; ASP.NET_SessionId=sir55y2z3ppkk0zdgq5pbxkj; VOLCALB=839681b35f197b4ed33d4bc5335bdf66|1702364346|1702364224',
           'Referer': 'http://www.kfc.com.cn/kfccda/storelist/index.aspx'}
res = requests.post('http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword', data=data, headers=headers)
ktc = json.loads(res.text)
ktc_dict = ktc.get('Table1')
for i in ktc_dict:
    print('餐厅名称:%s,餐厅地址:%s' % (i.get('storeName'), i.get('addressDetail')))