大数据分析与可视化 之 有道翻译-2
# coding:utf8
import base64
import hashlib
import json
import time
from hashlib import md5
import requests
from Crypto.Cipher import AES
class YoudaoSpider(object):
def __init__(self):
# url一定要写抓包时抓到的POST请求的提交地址,但是还需要去掉 url中的“_o”,
# “_o”这是一种url反爬策略,做了页面跳转,若直接访问会返回{"errorCode":50}
self.url = 'https://dict.youdao.com/webtranslate'
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.81",
"Cookie": "OUTFOX_SEARCH_USER_ID=-695549188@10.108.162.133; OUTFOX_SEARCH_USER_ID_NCOO=1438427639.5089219; "
"UM_distinctid=18a93ceffd272c-0851d10d932231-26031f51-384000-18a93ceffd39f1; "
"__yadk_uid=8hXAsOgNThoPb2WV5P6XAoREbUSlHjNe; rollNum=true",
"Origin": "https: // fanyi.youdao.com",
"Referer": "https://fanyi.youdao.com/"
}
# 获取lts时间戳,sign加密签名
def get_lts_salt_sign(self):
lts = str(int(time.time() * 1000))
print(lts)
# client = fanyideskweb & mysticTime = 1694745185424 & product = webfanyi & key = fsdsogkndfokasodnaso
string = "client=fanyideskweb&mysticTime=" + lts + "&product=webfanyi&key=fsdsogkndfokasodnaso"
s = md5()
s.update(string.encode())
sign = s.hexdigest()
print(lts, sign)
return lts, sign
def attack_yd(self, word):
lts, sign = self.get_lts_salt_sign()
num, num2, temp = 0, 0, 0
data = {
"i": word,
"from": "auto",
"to": "",
"dictResult": "true",
"keyid": "webfanyi",
"sign": sign,
"client": "fanyideskweb",
"product": "webfanyi",
"appVersion": "1.0.0",
"vendor": "web",
"pointParam": "client,mysticTime,product",
"mysticTime": lts,
"keyfrom": "fanyi.web"
}
# 使用 reqeusts.post()方法提交请求
res = requests.post(
url=self.url,
data=data,
headers=self.headers,
)
# 客户端与服务器数据交互以json字符串传递,因此需要将它转换为python数据类型
resp=res.text
iv = b'ydsecret://query/iv/C@lZe2YzHtZ2CYgaXKSVfsb7Y4QWHjITPPZ0nQp87fBeJ!Iv6v^6fvi2WN@bYpJ4'
key = b'ydsecret://query/key/B*RGygVywfNBwpmBaZg*WT7SIOUP2T0C9WHMZN39j^DAdaZhAnxvGcCY6VYFwnHl'
cypter = AES.new(hashlib.md5(key).digest()[:16], AES.MODE_CBC, hashlib.md5(iv).digest()[:16])
res = cypter.decrypt(base64.urlsafe_b64decode(resp))
res=res.decode('utf-8')
print(res)
for i in res:
temp += 1
if i == '{':
num+=1
elif i == '}':
num2+=1
elif num2==num:
break
print(res[0:temp-1])
res_json=json.loads(res[0:temp-1])#将字符串转为json格式\
# 查看响应结果response html:{"translateResult":[[{"tgt":"hello","src":"你好"}]],"errorCode":0,"type":"zh-CHS2en"}
result=str(res_json["translateResult"][0][0]["tgt"])
print('翻译结果:', result)
def run(self):
try:
word=input('请输入要翻译的单词:')
self.attack_yd(word)
except Exception as e:
print(e)
if __name__ == '__main__':
spider = YoudaoSpider()
spider.run()