urllib发送请求

发布时间 2023-09-30 13:50:58作者: sgj191024
import urllib.request

url = "http://www.baidu.com"
response = urllib.request.urlopen(url)
content = response.read().decode('utf-8')
print(content)

  如果不加decode解码会是字节数据

code = response.getcode()
print(code)

  

headers = response.getheaders()
print(headers)

  下载:

import urllib.request
url = 'http://www.baidu.com'
urllib.request.urlretrieve(url,'baidu1.html')

  下载图片:

import urllib.request
url = 'https://img1.baidu.com/it/u=1095880180,3931424613&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=889'
urllib.request.urlretrieve(url,'daimei.jpg')

  百度url加了https后会有反爬,返回数据就没了

url = 'https://www.baidu.com'
res = urllib.request.urlopen(url)
content = res.read().decode('utf-8')
print(content)

  百度搜索关键字需要转成ASCII才行

import urllib.request
import urllib.parse

#url = 'https://www.baidu.com/s?wd=%E5%91%A8%E6%9D%B0%E4%BC%A6'

key = urllib.parse.quote('周杰伦')
url = 'https://www.baidu.com/s?wd=' + key

headers = {

        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'
    }
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
print(content)

  百度搜索多个关键字使用urlencode:

import urllib.request
import urllib.parse

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'
}
data = {
    'wd':'周杰伦',
    'sex':'男',
    'location':'台湾'
}
reqData = urllib.parse.urlencode(data)
url = 'https://www.baidu.com/s?' + reqData
print(reqData)
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
print(content)