百度翻译api抓取分析

本文最后更新于:2024年6月8日 晚上

开始

自从上次写了有道翻译后,这次来弄个百度翻译的api

要开学了


抓取数据

这里的抓包软件,我就不多说了,推荐大家用小黄鸟电脑版,地址->点我

打开小黄鸟和百度翻译,先翻译一个单词先,最好翻译两个不同的单词

这边小黄鸟抓到了一个api,名为:v2transapi

这里可以看的有两个参数signts是不一样的,query是我们翻译的东西

对于这种情况,我尝试去除ts参数,发现还是可以访问的,但去除sign就不行了,因此我们需要分析sign参数


解密sign参数

先找到这个api的来源js文件

发现文件都是同一个,那就直接进去就好了

先格式化代码

先全局搜索sign,发现有41个,一个一个查找

这里的sign调用了b函数附带了e参数,e参数对应字段为query,就是我们翻译的东西。按下暂停键开始调式

被跳转到其他js文件了,点回去,发现token也是和抓的参数一样的

维独这个b函数没东西,这里可以尝试再翻译一次,在左边点第一个图标,然后输入新内容,果然不出所料,e参数正常显示了

此时b函数也有地址,点进去

这个类应该是加密算法的地方了,先对第一行的var o,i打断点,可以得到t函数就是我们翻译的东西,我们复制整个类

同时把t.exports改成sign

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
sign = function(t) {
var o, i = t.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === i) {
var a = t.length;
a > 30 && (t = "".concat(t.substr(0, 10)).concat(t.substr(Math.floor(a / 2) - 5, 10)).concat(t.substr(-10, 10)))
} else {
for (var s = t.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), c = 0, u = s.length, l = []; c < u; c++)
"" !== s[c] && l.push.apply(l, function(t) {
if (Array.isArray(t))
return e(t)
}(o = s[c].split("")) || function(t) {
if ("undefined" != typeof Symbol && null != t[Symbol.iterator] || null != t["@@iterator"])
return Array.from(t)
}(o) || function(t, n) {
if (t) {
if ("string" == typeof t)
return e(t, n);
var r = Object.prototype.toString.call(t).slice(8, -1);
return "Object" === r && t.constructor && (r = t.constructor.name),
"Map" === r || "Set" === r ? Array.from(t) : "Arguments" === r || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r) ? e(t, n) : void 0
}
}(o) || function() {
throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")
}()),
c !== u - 1 && l.push(i[c]);
var p = l.length;
p > 30 && (t = l.slice(0, 10).join("") + l.slice(Math.floor(p / 2) - 5, Math.floor(p / 2) + 5).join("") + l.slice(-10).join(""))
}
for (var d = "".concat(String.fromCharCode(103)).concat(String.fromCharCode(116)).concat(String.fromCharCode(107)), h = (null !== r ? r : (r = window[d] || "") || "").split("."), f = Number(h[0]) || 0, m = Number(h[1]) || 0, g = [], y = 0, v = 0; v < t.length; v++) {
var _ = t.charCodeAt(v);
_ < 128 ? g[y++] = _ : (_ < 2048 ? g[y++] = _ >> 6 | 192 : (55296 == (64512 & _) && v + 1 < t.length && 56320 == (64512 & t.charCodeAt(v + 1)) ? (_ = 65536 + ((1023 & _) << 10) + (1023 & t.charCodeAt(++v)),
g[y++] = _ >> 18 | 240,
g[y++] = _ >> 12 & 63 | 128) : g[y++] = _ >> 12 | 224,
g[y++] = _ >> 6 & 63 | 128),
g[y++] = 63 & _ | 128)
}
for (var b = f, w = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(97)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(54)), k = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(51)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(98)) + "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(102)), x = 0; x < g.length; x++)
b = n(b += g[x], w);
return b = n(b, k),
(b ^= m) < 0 && (b = 2147483648 + (2147483647 & b)),
"".concat((b %= 1e6).toString(), ".").concat(b ^ f)
}

接下来开始调式这个js代码,我这里用浏览器调式,先把上面js代码,写到1.js,再创建一个1.html调用该js文件,同时你也可以在这个js代码,下面加一句console.log(sign("abc"));

这样按你键盘上的F12,点击console,就能看到输出结果了

错误 r变量未定义

我们回去该函数,对r函数打断点,打两次 发现值还是不变,得到该函数说个定值,得到r="320305.131321201"

再把r函数补充回去,然后n函数也报错,也是未定义

可以看的n函数属于一个类,返回到刚刚的js,把n函数复制过来

再调用一下,发现没报错了,获得计算出来的sign,可以看到和官方计算的一致

最后的js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
 function n(t, e) {
for (var n = 0; n < e.length - 2; n += 3) {
var r = e.charAt(n + 2);
r = "a" <= r ? r.charCodeAt(0) - 87 : Number(r),
r = "+" === e.charAt(n + 1) ? t >>> r : t << r,
t = "+" === e.charAt(n) ? t + r & 4294967295 : t ^ r
}
return t
}
var r ="320305.131321201";
sign = function(t) {
var o, i = t.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === i) {
var a = t.length;
a > 30 && (t = "".concat(t.substr(0, 10)).concat(t.substr(Math.floor(a / 2) - 5, 10)).concat(t.substr(-10, 10)))
} else {
for (var s = t.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), c = 0, u = s.length, l = []; c < u; c++)
"" !== s[c] && l.push.apply(l, function(t) {
if (Array.isArray(t))
return e(t)
}(o = s[c].split("")) || function(t) {
if ("undefined" != typeof Symbol && null != t[Symbol.iterator] || null != t["@@iterator"])
return Array.from(t)
}(o) || function(t, n) {
if (t) {
if ("string" == typeof t)
return e(t, n);
var r =Object.prototype.toString.call(t).slice(8, -1);
return "Object" === r && t.constructor && (r = t.constructor.name),
"Map" === r || "Set" === r ? Array.from(t) : "Arguments" === r || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r) ? e(t, n) : void 0
}
}(o) || function() {
throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")
}()),
c !== u - 1 && l.push(i[c]);
var p = l.length;
p > 30 && (t = l.slice(0, 10).join("") + l.slice(Math.floor(p / 2) - 5, Math.floor(p / 2) + 5).join("") + l.slice(-10).join(""))
}
for (var d = "".concat(String.fromCharCode(103)).concat(String.fromCharCode(116)).concat(String.fromCharCode(107)), h = (null !== r ? r : (r = window[d] || "") || "").split("."), f = Number(h[0]) || 0, m = Number(h[1]) || 0, g = [], y = 0, v = 0; v < t.length; v++) {
var _ = t.charCodeAt(v);
_ < 128 ? g[y++] = _ : (_ < 2048 ? g[y++] = _ >> 6 | 192 : (55296 == (64512 & _) && v + 1 < t.length && 56320 == (64512 & t.charCodeAt(v + 1)) ? (_ = 65536 + ((1023 & _) << 10) + (1023 & t.charCodeAt(++v)),
g[y++] = _ >> 18 | 240,
g[y++] = _ >> 12 & 63 | 128) : g[y++] = _ >> 12 | 224,
g[y++] = _ >> 6 & 63 | 128),
g[y++] = 63 & _ | 128)
}
for (var b = f, w = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(97)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(54)), k = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(51)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(98)) + "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(102)), x = 0; x < g.length; x++)
b = n(b += g[x], w);
return b = n(b, k),
(b ^= m) < 0 && (b = 2147483648 + (2147483647 & b)),
"".concat((b %= 1e6).toString(), ".").concat(b ^ f)
}


示例文件

参数都分析完了,可以去写成python爬虫,python有个可以执行js的库,叫execjs

以下是示例代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import execjs
import httpx

a_ = "123454" # 要翻译的内容

global l
with open("1.js","r",encoding="utf_8") as f:
l = f.read()
f.close()

def get_sign(t):
o = execjs.compile(l)
return o.eval(f'sign("{t}")')

a = httpx.post(
url="https://fanyi.baidu.com/v2transapi?from=zh&to=en",
headers={
"Content-Type":"application/x-www-form-urlencoded",
"Referer":"https://fanyi.baidu.com/?aldtype=16047&ext_channel=Aldtype",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
"Cookie":"BIDUPSID=CC748520E2E056F7B2519245A2119494; PSTM=1705042679; newlogin=1; BAIDUID=CC748520E2E056F7AF71947BEC84B6DB:SL=0:NR=10:FG=1; APPGUIDE_10_6_9=1; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; H_PS_PSSID=40124_40201_40210_40207_40215_40224_40059; H_WISE_SIDS_BFESS=40124_40201_40210_40207_40215_40224; MCITY=-%3A; BA_HECTOR=2ga58ga1a02ga48g008hak85la62q21it1klb1s; BAIDUID_BFESS=CC748520E2E056F7AF71947BEC84B6DB:SL=0:NR=10:FG=1; ZFY=zQm4gfD8FyBjiADoVhXYxys90nEGWKbjkR6Chhyw1GY:C; H_WISE_SIDS=40124_40201_40210_40207_40215_40224_40059; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1708188279,1708188554,1708188691,1708191917; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1708191920; ab_sr=1.0.1_YWM1YTg2NzQwNjFkZGU3MzhmMzljMGU1YzAyZDk5NWQ0M2NiODYyMGQyZDJlYmFlZGYyN2ExOTZiMWFmMWQ2ZTU1M2MzYjdjMzBhYmQ0NzE2YWZlNDg3NmQ5ZTBmYjVjNmE1OTgzZjczZTcwMzdlZjhiMDhiNDBhZTgzZDZjNTMzYTg1ZmRmODVlYjY4MGM2ZjEzNTczYmRkNDQ5YjY3OA=="
},
data={
"from":"zh",
"to":"en",
"query":a_,
"transtype":"translang",
"simple_means_flag":3,
"sign":get_sign(a_),
"token":"d8536631afc6444316453f86746e3a4a",
"domain":"common"
}
)

print(a.text)

最后

编写不易


百度翻译api抓取分析
http://blog.bingyue.top/2024/02/18/baidu_fanyi/
作者
bingyue
发布于
2024年2月18日
许可协议