python 爬取数据通过js 解密,在js中解密log可以输出没有问题 但是在py中 就出问题了
```python
import requests
url='https://www.xiniudata.com/api2/service/x_service/person_industry_list/list_industries_by_sort'
import requests
import execjs
cookies = {
'btoken': '71PQ5C8SPR689W3I3X0Y5HN31RX9EDE2',
'hy_data_2020_id': '186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80',
'hy_data_2020_js_sdk': '%7B%22distinct_id%22%3A%22186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80%22%2C%22site_id%22%3A211%2C%22user_company%22%3A105%2C%22props%22%3A%7B%7D%2C%22device_id%22%3A%22186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80%22%7D',
'sajssdk_2020_cross_new_user': '1',
'Hm_lvt_42317524c1662a500d12d3784dbea0f8': '1678186731',
'Hm_lpvt_42317524c1662a500d12d3784dbea0f8': '1678186731',
}
headers = {
'authority': 'www.xiniudata.com',
'accept': 'application/json',
'accept-language': 'zh-CN,zh;q=0.9',
'content-type': 'application/json',
# 'cookie': 'btoken=71PQ5C8SPR689W3I3X0Y5HN31RX9EDE2; hy_data_2020_id=186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80; hy_data_2020_js_sdk=%7B%22distinct_id%22%3A%22186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80%22%2C%22site_id%22%3A211%2C%22user_company%22%3A105%2C%22props%22%3A%7B%7D%2C%22device_id%22%3A%22186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80%22%7D; sajssdk_2020_cross_new_user=1; Hm_lvt_42317524c1662a500d12d3784dbea0f8=1678186731; Hm_lpvt_42317524c1662a500d12d3784dbea0f8=1678186731',
'origin': 'https://www.xiniudata.com',
'referer': 'https://www.xiniudata.com/industry/newest?from=data',
'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
}
fenye_dict=execjs.compile(open('犀牛加密.js','r',encoding='utf-8').read()).call('fenye')
print(fenye_dict)
json_data = {
'payload': fenye_dict['pyload'],
'sig': fenye_dict['sig'],
'v': 1,
}
response = requests.post(
'https://www.xiniudata.com/api2/service/x_service/person_industry_list/list_industries_by_sort',
cookies=cookies,
headers=headers,
json=json_data,
).json()
data=response['d']
print(data)
result=execjs.compile(open('犀牛response解密.js','r',encoding='utf-8').read()).call('getdata',data)
print(result)
``python代码,下面是js代码犀牛加密.js
```javascript
const crypto = require('crypto');
var _p = "W5D80NFZHAYB8EUI2T649RT2MNRMVE2O"
function md5(text) {
return crypto.createHash('md5').update(text).digest('hex');
}
function e1(e) {
var _keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
if (null == e)
return null;
for (var t, n, r, o, i, a, c, u = "", s = 0; s < e.length;)
o = (t = e.charCodeAt(s++)) >> 2,
i = (3 & t) << 4 | (n = e.charCodeAt(s++)) >> 4,
a = (15 & n) << 2 | (r = e.charCodeAt(s++)) >> 6,
c = 63 & r,
isNaN(n) ? a = c = 64 : isNaN(r) && (c = 64),
u = u + _keyStr.charAt(o) + _keyStr.charAt(i) + _keyStr.charAt(a) + _keyStr.charAt(c);
return u
}
function e2(e) {
if (null == (e = _u_e(e)))
return null;
for (var t = "", n = 0; n < e.length; n++) {
var r = _p.charCodeAt(n % _p.length);
t += String.fromCharCode(e.charCodeAt(n) ^ r)
}
return t
}
function _u_e(e) {
if (null == e)
return null;
e = e.replace(/\r\n/g, "\n");
for (var t = "", n = 0; n < e.length; n++) {
var r = e.charCodeAt(n);
r < 128 ? t += String.fromCharCode(r) : r > 127 && r < 2048 ? (t += String.fromCharCode(r >> 6 | 192),
t += String.fromCharCode(63 & r | 128)) : (t += String.fromCharCode(r >> 12 | 224),
t += String.fromCharCode(r >> 6 & 63 | 128),
t += String.fromCharCode(63 & r | 128))
}
return t
}
function sig123(e) {
return md5(e + _p).toUpperCase()
}
function fenye(){
d={}
page_n = {
'limit': 20,
'sort': 2,
'start': 0
}
d['pyload']=e1(e2(JSON.stringify(page_n)))
d['sig']=sig123(d['pyload'])
return d
}
// console.log(fenye());
这是解密。js
var _keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
, _p = "W5D80NFZHAYB8EUI2T649RT2MNRMVE2O";
function d2(e) {
for (var t = "", n = 0; n < e.length; n++) {
var r = _p.charCodeAt(n % _p.length);
t += String.fromCharCode(e.charCodeAt(n) ^ r)
}
return t = _u_d(t)
}
function d1(e) {
var t, n, r, o, i, a, c = "", u = 0;
for (e = e.replace(/[^A-Za-z0-9\+\/\=]/g, ""); u < e.length; )
t = _keyStr.indexOf(e.charAt(u++)) << 2 | (o = _keyStr.indexOf(e.charAt(u++))) >> 4,
n = (15 & o) << 4 | (i = _keyStr.indexOf(e.charAt(u++))) >> 2,
r = (3 & i) << 6 | (a = _keyStr.indexOf(e.charAt(u++))),
c += String.fromCharCode(t),
64 != i && (c += String.fromCharCode(n)),
64 != a && (c += String.fromCharCode(r));
return c
}
function _u_d(e) {
for (var t = "", n = 0, r = 0, o = 0, i = 0; n < e.length; )
(r = e.charCodeAt(n)) < 128 ? (t += String.fromCharCode(r),
n++) : r > 191 && r < 224 ? (o = e.charCodeAt(n + 1),
t += String.fromCharCode((31 & r) << 6 | 63 & o),
n += 2) : (o = e.charCodeAt(n + 1),
i = e.charCodeAt(n + 2),
t += String.fromCharCode((15 & r) << 12 | (63 & o) << 6 | 63 & i),
n += 3);
return t
}
function getdata(data){
result=JSON.parse(d2(d1(data)))
return result
}
// console.log(getdata(data));
我传递过data 在js里面没有问题 在py里面 报错
```python
Exception in thread Thread-3:
Traceback (most recent call last):
File "D:\Python37\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "D:\Python37\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "D:\Python37\lib\subprocess.py", line 1267, in _readerthread
buffer.append(fh.read())
UnicodeDecodeError: 'gbk' codec can't decode byte 0xbb in position 2257: illegal multibyte sequence
Traceback (most recent call last):
File "D:/pythonProject/爬虫实战/犀牛数据/犀牛数据.py", line 53, in <module>
result=execjs.compile(open('犀牛response解密.js','r',encoding='utf-8').read()).call('getdata',data1)
File "D:\pythonProject\venv\lib\site-packages\execjs\_abstract_runtime_context.py", line 37, in call
return self._call(name, *args)
File "D:\pythonProject\venv\lib\site-packages\execjs\_external_runtime.py", line 92, in _call
return self._eval("{identifier}.apply(this, {args})".format(identifier=identifier, args=args))
File "D:\pythonProject\venv\lib\site-packages\execjs\_external_runtime.py", line 78, in _eval
return self.exec_(code)
File "D:\pythonProject\venv\lib\site-packages\execjs\_abstract_runtime_context.py", line 18, in exec_
return self._exec_(source)
File "D:\pythonProject\venv\lib\site-packages\execjs\_external_runtime.py", line 87, in _exec_
output = self._exec_with_pipe(source)
File "D:\pythonProject\venv\lib\site-packages\execjs\_external_runtime.py", line 103, in _exec_with_pipe
stdoutdata, stderrdata = p.communicate(input=input)
File "D:\Python37\lib\subprocess.py", line 964, in communicate
stdout, stderr = self._communicate(input, endtime, timeout)
File "D:\Python37\lib\subprocess.py", line 1317, in _communicate
stdout = stdout[0]
IndexError: list index out of range
请问亲们 是哪里除了问题 谢谢哦 要疯啊!