失眠网,内容丰富有趣,生活中的好帮手!
失眠网 > python爬虫进阶-1688工厂信息(JS逆向-sign签名验证)

python爬虫进阶-1688工厂信息(JS逆向-sign签名验证)

时间:2021-03-28 14:10:48

相关推荐

python爬虫进阶-1688工厂信息(JS逆向-sign签名验证)

目的

获取1688工厂名片的相关信息

详细需求

一、进入1688网站

/

二、使用“工厂”这个搜索框

三、输入工厂名称进行搜索,如“深圳市杰之美时装有限公司”

四、返回搜索结果,并获取逐个店铺/工厂的连接

五、获取有关数据

思路解析

一、搜索关键词,获取返回网页中的工厂ID

二、链接拼接-进入工厂名片详情页

三、目标信息定位

四、模拟构建请求

这里的难点就是sign值的获取

五、思路汇总

1.请求工厂关键词-解析得到工厂ID2.需要进行三个json请求2.1. json1获取工厂基本信息2.2 json7获取粉丝数2.3 json2获取生产实力3.获取cookie(这里直接复制使用的,具有时效性,会过期)4.对每个json请求的data进行处理5.拼接完成后,交给本地js进行处理得到sign值6.构建请求7.信息提取与打印

源码

1688.js

function u(e) {function t(e, t) {return e << t | e >>> 32 - t}function n(e, t) {var n, o, r, i, s;return r = 2147483648 & e,i = 2147483648 & t,n = 1073741824 & e,o = 1073741824 & t,s = (1073741823 & e) + (1073741823 & t),n & o ? 2147483648 ^ s ^ r ^ i : n | o ? 1073741824 & s ? 3221225472 ^ s ^ r ^ i : 1073741824 ^ s ^ r ^ i : s ^ r ^ i}function o(e, t, n) {return e & t | ~e & n}function r(e, t, n) {return e & n | t & ~n}function i(e, t, n) {return e ^ t ^ n}function s(e, t, n) {return t ^ (e | ~n)}function a(e, r, i, s, a, p, u) {return e = n(e, n(n(o(r, i, s), a), u)),n(t(e, p), r)}function p(e, o, i, s, a, p, u) {return e = n(e, n(n(r(o, i, s), a), u)),n(t(e, p), o)}function u(e, o, r, s, a, p, u) {return e = n(e, n(n(i(o, r, s), a), u)),n(t(e, p), o)}function c(e, o, r, i, a, p, u) {return e = n(e, n(n(s(o, r, i), a), u)),n(t(e, p), o)}function d(e) {for (var t, n = e.length, o = n + 8, r = (o - o % 64) / 64, i = 16 * (r + 1), s = new Array(i - 1), a = 0, p = 0; n > p;)t = (p - p % 4) / 4,a = p % 4 * 8,s[t] = s[t] | e.charCodeAt(p) << a,p++;return t = (p - p % 4) / 4,a = p % 4 * 8,s[t] = s[t] | 128 << a,s[i - 2] = n << 3,s[i - 1] = n >>> 29,s}function l(e) {var t, n, o = "", r = "";for (n = 0; 3 >= n; n++)t = e >>> 8 * n & 255,r = "0" + t.toString(16),o += r.substr(r.length - 2, 2);return o}function f(e) {e = e.replace(/\r\n/g, "\n");for (var t = "", n = 0; n < e.length; n++) {var o = e.charCodeAt(n);128 > o ? t += String.fromCharCode(o) : o > 127 && 2048 > o ? (t += String.fromCharCode(o >> 6 | 192),t += String.fromCharCode(63 & o | 128)) : (t += String.fromCharCode(o >> 12 | 224),t += String.fromCharCode(o >> 6 & 63 | 128),t += String.fromCharCode(63 & o | 128))}return t}var m, h, g, _, y, v, R, S, w, O = [], E = 7, A = 12, q = 17, b = 22, T = 5, x = 9, N = 14, C = 20, k = 4, J = 11,P = 16, L = 23, I = 6, D = 10, j = 15, W = 21;for (e = f(e),O = d(e),v = 1732584193,R = 4023233417,S = 2562383102,w = 271733878,m = 0; m < O.length; m += 16)h = v,g = R,_ = S,y = w,v = a(v, R, S, w, O[m + 0], E, 3614090360),w = a(w, v, R, S, O[m + 1], A, 3905402710),S = a(S, w, v, R, O[m + 2], q, 606105819),R = a(R, S, w, v, O[m + 3], b, 3250441966),v = a(v, R, S, w, O[m + 4], E, 4118548399),w = a(w, v, R, S, O[m + 5], A, 1200080426),S = a(S, w, v, R, O[m + 6], q, 2821735955),R = a(R, S, w, v, O[m + 7], b, 4249261313),v = a(v, R, S, w, O[m + 8], E, 1770035416),w = a(w, v, R, S, O[m + 9], A, 2336552879),S = a(S, w, v, R, O[m + 10], q, 4294925233),R = a(R, S, w, v, O[m + 11], b, 2304563134),v = a(v, R, S, w, O[m + 12], E, 1804603682),w = a(w, v, R, S, O[m + 13], A, 4254626195),S = a(S, w, v, R, O[m + 14], q, 2792965006),R = a(R, S, w, v, O[m + 15], b, 1236535329),v = p(v, R, S, w, O[m + 1], T, 4129170786),w = p(w, v, R, S, O[m + 6], x, 3225465664),S = p(S, w, v, R, O[m + 11], N, 643717713),R = p(R, S, w, v, O[m + 0], C, 3921069994),v = p(v, R, S, w, O[m + 5], T, 3593408605),w = p(w, v, R, S, O[m + 10], x, 38016083),S = p(S, w, v, R, O[m + 15], N, 3634488961),R = p(R, S, w, v, O[m + 4], C, 3889429448),v = p(v, R, S, w, O[m + 9], T, 568446438),w = p(w, v, R, S, O[m + 14], x, 3275163606),S = p(S, w, v, R, O[m + 3], N, 4107603335),R = p(R, S, w, v, O[m + 8], C, 1163531501),v = p(v, R, S, w, O[m + 13], T, 2850285829),w = p(w, v, R, S, O[m + 2], x, 4243563512),S = p(S, w, v, R, O[m + 7], N, 1735328473),R = p(R, S, w, v, O[m + 12], C, 2368359562),v = u(v, R, S, w, O[m + 5], k, 4294588738),w = u(w, v, R, S, O[m + 8], J, 2272392833),S = u(S, w, v, R, O[m + 11], P, 1839030562),R = u(R, S, w, v, O[m + 14], L, 4259657740),v = u(v, R, S, w, O[m + 1], k, 2763975236),w = u(w, v, R, S, O[m + 4], J, 1272893353),S = u(S, w, v, R, O[m + 7], P, 4139469664),R = u(R, S, w, v, O[m + 10], L, 3200236656),v = u(v, R, S, w, O[m + 13], k, 681279174),w = u(w, v, R, S, O[m + 0], J, 3936430074),S = u(S, w, v, R, O[m + 3], P, 3572445317),R = u(R, S, w, v, O[m + 6], L, 76029189),v = u(v, R, S, w, O[m + 9], k, 3654602809),w = u(w, v, R, S, O[m + 12], J, 3873151461),S = u(S, w, v, R, O[m + 15], P, 530742520),R = u(R, S, w, v, O[m + 2], L, 3299628645),v = c(v, R, S, w, O[m + 0], I, 4096336452),w = c(w, v, R, S, O[m + 7], D, 1126891415),S = c(S, w, v, R, O[m + 14], j, 2878612391),R = c(R, S, w, v, O[m + 5], W, 4237533241),v = c(v, R, S, w, O[m + 12], I, 1700485571),w = c(w, v, R, S, O[m + 3], D, 2399980690),S = c(S, w, v, R, O[m + 10], j, 4293915773),R = c(R, S, w, v, O[m + 1], W, 2240044497),v = c(v, R, S, w, O[m + 8], I, 1873313359),w = c(w, v, R, S, O[m + 15], D, 4264355552),S = c(S, w, v, R, O[m + 6], j, 2734768916),R = c(R, S, w, v, O[m + 13], W, 1309151649),v = c(v, R, S, w, O[m + 4], I, 4149444226),w = c(w, v, R, S, O[m + 11], D, 3174756917),S = c(S, w, v, R, O[m + 2], j, 718787259),R = c(R, S, w, v, O[m + 9], W, 3951481745),v = n(v, h),R = n(R, g),S = n(S, _),w = n(w, y);var H = l(v) + l(R) + l(S) + l(w);return H.toLowerCase()}// console.log(a)// console.log(p)// var e='5970c860dcff67864c7b1912bd984ee9&1602670696727&12574478&{"cid":"TpFacCoreInfosService:TpFacCoreInfosService","methodName":"execute","params":"{\"facAliId\":\"106328\"}"}'// var e='87ce3da8b6f5218713fc35e8b9d9d7de&1602738209823&12574478&{"cid":"TpFacCoreInfosService:TpFacCoreInfosService","methodName":"execute","params":"{\\"facAliId\\":\\"106328\\"}"}'// var a = (new Date).getTime()// // var a='1602810824704'// var s='12574478'// var token='8a0539ff99e9241f36538eee5f490e48'// var data='{"cid":"TpFacCoreInfosService:TpFacCoreInfosService","methodName":"execute","params":"{\\"facAliId\\":\\"106328\\"}"}'//// p = (token + "&" + a + "&" + s + "&" + data)// console.log(a)// console.log(u(p))

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Author : jia666# @Time : /10/15 17:07import reimport timeimport requestsimport execjsfrom urllib import parseclass S1688(object):def __init__(self, word):self.head = {'cookie':'cookie2=15b9e05276e7c4893f44f1509e7846f6; t=2189917eb616063e3da62aa4f41673d9; _tb_token_=7b657778a3376; __cn_logon__=false; cna=dFkQGGsGrnACARsm+oOaXAW/; xlly_s=1; h_keys="%u6df1%u5733%u5e02%u6770%u4e4b%u7f8e%u65f6%u88c5%u6709%u9650%u516c%u53f8"; _csrf_token=1603072773208; alicnweb=touch_tb_at%3D1603088557128; _m_h5_tk=bd327dc391fc8b112121750e88805f27_1603099001009; _m_h5_tk_enc=7095c3c810519260432599239fa2c840; ad_prefer="/10/19 14:23:12"; isg=BCcnARTANLN3qbA-Gyksu_xDtlvxrPuOwp-EYfmU17bd6EeqAX-p3mWpCuj2ANMG; l=eBEQxAnqOmPZIkKZBO5CFurza779uIRb4sPzaNbMiInca10FaF6eCNQVOwXJudtjgtCUIetybUMLyRLHR3fRwxDDB5JEV7vS3xvO.; tfstk=cJ6cBRAuSsRjwbAlO-9fv3snaPbcaGoykdJPUT8LyYvDAYBJ0s4xaXPjD0YAoOh1.',"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"}self.word = word # 工厂关键词self.api = 'https://h5api./h5/mtop.taobao.widgetservice.getjsoncomponent/1.0/?'def Main(self):self.Get_word() # 一、关键词处理-获取工厂IDself.Get_mtopjsonp1() # 二、获取合作伙伴地址等信息self.Get_mtopjsonp2() # 三、获取生产实力self.Get_mtopjsonp7() # 四、获取粉丝数self.RE_html() # 五、正则提取与整理def Get_html(self, url):'请求网页,返回文本'req = requests.get(url, headers=self.head) #网页请求html = re.sub('\s', '', req.text) #去除多余空格return htmldef Init_js(self, data):token = re.findall('_m_h5_tk=(.*?)_', self.head['cookie'], re.S)[0] # token值s = '12574478' #固定参数self.a = str(int(time.time() * 1000)) #时间戳p = (token + "&" + self.a + "&" + s + "&" + data) #参数整理with open('1688.js', 'r', encoding='utf-8') as f: #加载jsctx = pile(f.read())self.sign = ctx.call('u', p)#执行sign生成函数获取sign值def Get_word(self):'获取工厂ID'url = '/company/company_search.htm?keywords={}&charset=utf8'.format(self.word)# print(url)req = self.Get_html(url)# print(req)self.factory_id = re.findall('"realUserId":"(.*?)"', req, re.S)[0] #获取工厂IDdef Get_mtopjsonp1(self):data = '{"cid":"TpFacCoreInfosService:TpFacCoreInfosService","methodName":"execute","params":"{\\"facAliId\\":\\"' + str(self.factory_id) + '\\"}"}' # 参数更新url=self.Get_url(data,1)# print(url)self.html1 = self.Get_html(url) #请求网页返回的文本# print(self.html1)self.Check_html(self.html1) #检查cookie是否正常def Check_html(self, html):k = re.findall('令牌过期', html, re.S)if k:print('令牌过期,更新cookie后重试')def Get_url(self,data,n):self.Init_js(str(data)) # js获取parms = 'jsv=2.6.0&appKey=12574478&t={}&sign={}&api=mtop.taobao.widgetService.getJsonComponent&v=1.0&type=jsonp&timeout=5000&dataType=jsonp&callback=mtopjsonp{}&'.format(self.a, self.sign,n)sdata = parse.quote(str(data)) # quote()将字符串进行编码url = self.api + parms + 'data=' + sdatareturn urldef Get_mtopjsonp2(self):data = {"cid": "FactoryStrengthServiceWidget:FactoryStrengthServiceWidget", "methodName": "execute"}k = "{\"extParam\":{\"factoryUserId\":\"%s\"}}" % (self.factory_id)#参数修改data.update(({'params': k}))#字典更新url=self.Get_url(data,2)self.html2 = self.Get_html(url)self.Check_html(self.html2)def Get_mtopjsonp7(self):data = {'cid': 'ShopFavouriteServiceWidget:ShopFavouriteServiceWidget', 'methodName': 'execute'}k = '{"extParams":{"method":"readFavourite","targetUserId":"%s"}}' % (self.factory_id)data.update(({'params': k}))url=self.Get_url(data,7)self.html7 = self.Get_html(url)self.Check_html(self.html7)def RE_html(self):facName = re.findall('"facName":"(.*?)"', self.html1, re.S)[0] # 工厂名称data = re.findall('"data":"(.*?)"', self.html1, re.S) # 数据comment = re.findall('"desc":"(.*?)"', self.html1, re.S) # 备注factoryPv = re.findall('"factoryPv":"(.*?)"', self.html1, re.S)[0] # 浏览数address = re.findall('"factoryDetailedAddress":"(.*?)"', self.html1, re.S)[0] # 地址favCount = re.findall('"favCount":"(.*?)"', self.html7, re.S)[0] # 粉丝数k = re.findall('"value":"(.*?)"', self.html2, re.S) # 生产实力s = ''for i, com in enumerate(comment):if i==0:t='\t'else:t='%\t's += str(com) + ':' + str(data[i]) +ts += '粉丝数:' + favCount + '\t' + '浏览数:' + factoryPv + '\t'h = '厂房面积' + k[0] + '平方' + '\t' + \'生产人数' + k[1] + '人' + '\t' + \'设备总数' + k[2] + '台' + '\t' + \'仓储类型' + k[3] + '\t' + \'加工方式' + k[4] + '\t' + \'代工模式' + k[5] + '\t' + \'质检类型' + k[6] + '\t' + \'售后服务' + k[7] + '\t'print(facName + '\n' + '*' * 50 + '\n' + s + '\n' + address + '\n' + '-' * 50 + '\n' + h)if __name__ == '__main__':word = '深圳市杰之美时装有限公司'ex = S1688(word)ex.Main()

实现效果

注意:cookie需要手动更新

如果觉得《python爬虫进阶-1688工厂信息(JS逆向-sign签名验证)》对你有帮助,请点赞、收藏,并留下你的观点哦!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。