失眠网,内容丰富有趣,生活中的好帮手!
失眠网 > python爬虫JS逆向加密破解之百度翻译

python爬虫JS逆向加密破解之百度翻译

时间:2021-03-14 13:41:09

相关推荐

python爬虫JS逆向加密破解之百度翻译

最近在从基础学习JS逆向,来分享一下百度翻译JS逆向的整个过程,也有助于自己加深记忆。

JS逆向可以说是爬虫工程师必备的知识点了,但是如果对前端知识不够了解还是学起来很有难度的。

想学习的话可以在B站找找JS逆向的课程

废话不多说,我们正式开始。

首先,我们找到进入百度翻译,/,然后F12抓包:

因为是异步加载,所以需要抓XHR,找到数据接口链接:/v2transapi?from=en&to=zh,对两次抓包数据的分析:

可以看到,在form表单里其他数据都是不变的,只有sign这个参数是变化的,下面先写代码不带sign请求一下这个网址,看看能不能得到结果:

看来不带sign这个参数是拿不到数据的,下面我们就来破解一下这个参数:

首先:全局搜索sign,找到对应的js文件

分析这些js文件,发现第一个index文件中有很大可能出现,我们先分析index.js文件:

ctrl+f搜索sign:找来找去找到这个函数,看起来跟form表单的数据非常像,可以看出,sign是f(n)这个函数生成的:

我们先打个断点确认一下是不是这个sign:

下面我们找到f(n)这个函数(可以直接点击f(n)找到):

非常复杂对吧,如果用python代码来还原整个JS逻辑,那就太难了,所有接下来就需要用到我们的一个第三方库,叫:pyexecjs,这个库可以执行我们的js文件,直接pip install 就可以

创建一个js文件后将这段js代码放进去:

function e(r) {var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);if (null === o) {var t = r.length;t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))} else {for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)"" !== e[C] && f.push.apply(f, a(e[C].split(""))),C !== h - 1 && f.push(o[C]);var g = f.length;g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))}var u = void 0, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);u = null !== i ? i : (i = window[l] || "") || "";for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {var A = r.charCodeAt(v);128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),S[c++] = A >> 18 | 240,S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,S[c++] = A >> 6 & 63 | 128),S[c++] = 63 & A | 128)}for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)p += S[b],p = n(p, F);return p = n(p, D),p ^= s,0 > p && (p = (2147483647 & p) + 2147483648),p %= 1e6,p.toString() + "." + (p ^ m)}

这是我没用execjs库用requests直接请求的代码:

import requestsurl = "/v2transapi?from=en&to=zh"def request(word):headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36","Cookie": "BIDUPSID=C1D60FBAFF280D92DEA75430FB219DFC; PSTM=1605330633; BAIDUID=C1D60FBAFF280D92A9F88C3BF0F07F3A:FG=1; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=pWODkzTnZSczZUT2JUTWhpbUs0bWJkTFJ2SVZyMmZGa0VQbDBJdGo5VDE4RFZnRVFBQUFBJCQAAAAAAAAAAAEAAAAsVqassszV8dfmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPVjDmD1Yw5ga1; BDUSS_BFESS=pWODkzTnZSczZUT2JUTWhpbUs0bWJkTFJ2SVZyMmZGa0VQbDBJdGo5VDE4RFZnRVFBQUFBJCQAAAAAAAAAAAEAAAAsVqassszV8dfmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPVjDmD1Yw5ga1; BAIDUID_BFESS=E5C05BC9E40BD15758E69386EDD517FD:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; __yjs_duid=1_df897e8d50f3f0e9d8eedf62c5c43d211614259579733; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1614345635,1614345643,1614347940,1614401883; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1614401883; ab_sr=1.0.0_NGQ3NjQ4NTAzNGNjNDFhNTQ5ZDVmMDNlYTc1YTQyNmJlM2U1NjI3N2RmZGUyYjc2ZGNiZTUxOWQxNTBmZGYwMzQxNDBmNWU0NTQ3MDg5ZTVhODJhNjg5ZTQ0NGE5MmUx; __yjsv5_shitong=1.0_7_bb0b3e2f7db4e875eaf788202e0f3d2218c5_300_1614401882856_27.226.159.239_63467023"}form_data = {"from": "en" if word[0] in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k","l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y","z", ] else "zh","to": "zh" or "en","query": word,"transtype": "realtime","simple_means_flag": 3,# "sign": sign,"token": "9a20246ac075f19baf61fd6ea99bd648","domain": "common"}response = requests.post(url, headers=headers, data=form_data)print(response.json())request("cat")

下面我们就来引入这个库,让这个第三方库去执行js代码,看看返回了什么结果:

import execjsdef get_sign(word):with open("demo01.js", "r", encoding="utf8") as f:jscode = f.read()"""complie括号里的参数是读取的js文件中的代码call方法中的参数:第一个是js这个函数的函数名,第二个参数是js函数的参数"""sign = pile(jscode).call("e", word)return sign

这是完整的代码:

import requestsimport execjsurl = "/v2transapi?from=en&to=zh"def get_sign(word):with open("demo01.js", "r", encoding="utf8") as f:jscode = f.read()sign = pile(jscode).call("e", word)return signdef request(word):sign = get_sign(word)headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36","Cookie": "BIDUPSID=C1D60FBAFF280D92DEA75430FB219DFC; PSTM=1605330633; BAIDUID=C1D60FBAFF280D92A9F88C3BF0F07F3A:FG=1; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=pWODkzTnZSczZUT2JUTWhpbUs0bWJkTFJ2SVZyMmZGa0VQbDBJdGo5VDE4RFZnRVFBQUFBJCQAAAAAAAAAAAEAAAAsVqassszV8dfmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPVjDmD1Yw5ga1; BDUSS_BFESS=pWODkzTnZSczZUT2JUTWhpbUs0bWJkTFJ2SVZyMmZGa0VQbDBJdGo5VDE4RFZnRVFBQUFBJCQAAAAAAAAAAAEAAAAsVqassszV8dfmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPVjDmD1Yw5ga1; BAIDUID_BFESS=E5C05BC9E40BD15758E69386EDD517FD:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; __yjs_duid=1_df897e8d50f3f0e9d8eedf62c5c43d211614259579733; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1614345635,1614345643,1614347940,1614401883; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1614401883; ab_sr=1.0.0_NGQ3NjQ4NTAzNGNjNDFhNTQ5ZDVmMDNlYTc1YTQyNmJlM2U1NjI3N2RmZGUyYjc2ZGNiZTUxOWQxNTBmZGYwMzQxNDBmNWU0NTQ3MDg5ZTVhODJhNjg5ZTQ0NGE5MmUx; __yjsv5_shitong=1.0_7_bb0b3e2f7db4e875eaf788202e0f3d2218c5_300_1614401882856_27.226.159.239_63467023"}form_data = {"from": "en" if word[0] in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k","l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y","z", ] else "zh","to": "zh" or "en","query": word,"transtype": "realtime","simple_means_flag": 3,"sign": sign,"token": "9a20246ac075f19baf61fd6ea99bd648","domain": "common"}response = requests.post(url, headers=headers, data=form_data)print(response.json())request("cat")

我们再来请求一下:可以发现报错了!i参数未定义

我们来找找i在哪,会到js文件中,可以发现,i就在function e®这个函数中,继续打断点调试:

可以发现i和u是一个判断逻辑,我们在console控制台打印一下i的值:

这样我们就找到了i,然后在function e®中加入i,var i = “320305.131321201”

再次运行:

发现又报错了,缺少对象,那缺少什么对象呢?这儿就比较难分析了,我们从这个函数的返回值来分析,return p = n(p, D),可以看到这个n也是一个函数,但是这n并没有出现在我们的js文件里面,所以我们找到这个n函数,发现就在我们function e®上面,我们把这个函数复制下来,加到js文件里面

再次运行,就发现得到了返回的结果:

然后我们优化一下返回值代码,得到最终结果:

我把最终的两个文件都发一下吧:

demo01.py

import requestsimport execjsurl = "/v2transapi?from=en&to=zh"def get_sign(word):with open("demo01.js", "r", encoding="utf8") as f:jscode = f.read()sign = pile(jscode).call("e", word)return signdef request(word):sign = get_sign(word)headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36","Cookie": "BIDUPSID=C1D60FBAFF280D92DEA75430FB219DFC; PSTM=1605330633; BAIDUID=C1D60FBAFF280D92A9F88C3BF0F07F3A:FG=1; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=pWODkzTnZSczZUT2JUTWhpbUs0bWJkTFJ2SVZyMmZGa0VQbDBJdGo5VDE4RFZnRVFBQUFBJCQAAAAAAAAAAAEAAAAsVqassszV8dfmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPVjDmD1Yw5ga1; BDUSS_BFESS=pWODkzTnZSczZUT2JUTWhpbUs0bWJkTFJ2SVZyMmZGa0VQbDBJdGo5VDE4RFZnRVFBQUFBJCQAAAAAAAAAAAEAAAAsVqassszV8dfmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPVjDmD1Yw5ga1; BAIDUID_BFESS=E5C05BC9E40BD15758E69386EDD517FD:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; __yjs_duid=1_df897e8d50f3f0e9d8eedf62c5c43d211614259579733; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1614345635,1614345643,1614347940,1614401883; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1614401883; ab_sr=1.0.0_NGQ3NjQ4NTAzNGNjNDFhNTQ5ZDVmMDNlYTc1YTQyNmJlM2U1NjI3N2RmZGUyYjc2ZGNiZTUxOWQxNTBmZGYwMzQxNDBmNWU0NTQ3MDg5ZTVhODJhNjg5ZTQ0NGE5MmUx; __yjsv5_shitong=1.0_7_bb0b3e2f7db4e875eaf788202e0f3d2218c5_300_1614401882856_27.226.159.239_63467023"}form_data = {"from": "en" if word[0] in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k","l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y","z", ] else "zh","to": "zh" or "en","query": word,"transtype": "realtime","simple_means_flag": 3,"sign": sign,"token": "9a20246ac075f19baf61fd6ea99bd648","domain": "common"}response = requests.post(url, headers=headers, data=form_data)print(response.json()["trans_result"]["data"][0])request("cat")

demo01.js

function n(r, o) {for (var t = 0; t < o.length - 2; t += 3) {var a = o.charAt(t + 2);a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),a = "+" === o.charAt(t + 1) ? r >>> a : r << a,r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a}return r}function e(r) {var i = "320305.131321201"var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);if (null === o) {var t = r.length;t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))} else {for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)"" !== e[C] && f.push.apply(f, a(e[C].split(""))),C !== h - 1 && f.push(o[C]);var g = f.length;g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))}var u = void 0, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);u = null !== i ? i : (i = window[l] || "") || "";for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {var A = r.charCodeAt(v);128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),S[c++] = A >> 18 | 240,S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,S[c++] = A >> 6 & 63 | 128),S[c++] = 63 & A | 128)}for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)p += S[b],p = n(p, F);return p = n(p, D),p ^= s,0 > p && (p = (2147483647 & p) + 2147483648),p %= 1e6,p.toString() + "." + (p ^ m)}

百度翻译这个JS的破解还是比较简单的,因为在form表单里只有一个sign参数在发生变化,其他都是固定不变的,后面我会分享更加难破解的加密数据,也欢迎大家关注的微信公众号:一起学习,共同进步!

如果觉得《python爬虫JS逆向加密破解之百度翻译》对你有帮助,请点赞、收藏,并留下你的观点哦!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。