python爬取谷歌翻译 爬虫实践

Julia ·
更新时间:2024-09-20
· 814 次阅读

python爬取谷歌翻译 爬虫实践

首先先安装第三方库:

pip3 install PyExecJS -i https://pypi.tuna.tsinghua.edu.cn/simple

macos系统运行代码报错的话在终端运行如下代码:

/Applications/Python\ 3.7/Install\ Certificates.command ; exit;

英译中代码:

import urllib.request import execjs # 谷歌翻译 class Py4Js(): def __init__(self): self.ctx = execjs.compile(""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) <> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c = t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """) def getTk(self, text): """获取tk值""" return self.ctx.call("TL", text) # 获取要翻译的文字 key = input("请输入要翻译的英文文字(英译中):") # 获取Py4Js实例 py = Py4Js() print(py.getTk(key)) # 打印tk值 # 请求的URL url = "https://translate.google.cn/translate_a/single?client=webapp&sl=en&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&otf=1&ssel=0&tsel=0&kc=7&tk=" + py.getTk( key) + "&q=" + key # 请求头 header = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36", } # 获取request对象 req = urllib.request.Request(url, headers=header) # 响应的数据是通过Ajax返回的JSON格式数据,注意编码问题 resp = urllib.request.urlopen(req).read().decode("utf-8") # 打印 print(resp) 中译英代码: import urllib.parse import urllib.request import execjs # 谷歌翻译 class Py4Js(): def __init__(self): self.ctx = execjs.compile(""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) <> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c = t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """) def getTk(self, text): """获取tk值""" return self.ctx.call("TL", text) # 获取要翻译的文字 key = input("请输入要翻译的中文文字(中译英):") # 获取Py4Js实例 py = Py4Js() print(py.getTk(key)) # 打印tk值 # 对中文进行处理 data = urllib.parse.urlencode({"q": key}) # 请求的URL url = "https://translate.google.cn/translate_a/single?client=webapp&sl=zh-CN&tl=en&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ssel=6&tsel=3&kc=1&tk=" + py.getTk( key) + "&" + data print(url) # 打印URL # 请求头 header = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36", } # 获取request对象 req = urllib.request.Request(url, headers=header) # 响应的数据是通过Ajax返回的JSON格式数据,注意编码问题 resp = urllib.request.urlopen(req).read().decode("utf-8") # 打印 print(resp)
作者:Ricardo.Baby



爬虫 谷歌 Python

需要 登录 后方可回复, 如果你还没有账号请 注册新账号