qq_21209015 2023-03-08 13:26 采纳率: 33.3%
浏览 27
已结题

python 爬虫 js解析数据没问题 py中execjs 报错

python 爬取数据通过js 解密,在js中解密log可以输出没有问题 但是在py中 就出问题了


```python

import requests

url='https://www.xiniudata.com/api2/service/x_service/person_industry_list/list_industries_by_sort'
import requests
import execjs

cookies = {
    'btoken': '71PQ5C8SPR689W3I3X0Y5HN31RX9EDE2',
    'hy_data_2020_id': '186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80',
    'hy_data_2020_js_sdk': '%7B%22distinct_id%22%3A%22186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80%22%2C%22site_id%22%3A211%2C%22user_company%22%3A105%2C%22props%22%3A%7B%7D%2C%22device_id%22%3A%22186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80%22%7D',
    'sajssdk_2020_cross_new_user': '1',
    'Hm_lvt_42317524c1662a500d12d3784dbea0f8': '1678186731',
    'Hm_lpvt_42317524c1662a500d12d3784dbea0f8': '1678186731',
}

headers = {
    'authority': 'www.xiniudata.com',
    'accept': 'application/json',
    'accept-language': 'zh-CN,zh;q=0.9',
    'content-type': 'application/json',
    # 'cookie': 'btoken=71PQ5C8SPR689W3I3X0Y5HN31RX9EDE2; hy_data_2020_id=186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80; hy_data_2020_js_sdk=%7B%22distinct_id%22%3A%22186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80%22%2C%22site_id%22%3A211%2C%22user_company%22%3A105%2C%22props%22%3A%7B%7D%2C%22device_id%22%3A%22186bbb954696da-052dd36111ead6-26031951-2073600-186bbb9546ac80%22%7D; sajssdk_2020_cross_new_user=1; Hm_lvt_42317524c1662a500d12d3784dbea0f8=1678186731; Hm_lpvt_42317524c1662a500d12d3784dbea0f8=1678186731',
    'origin': 'https://www.xiniudata.com',
    'referer': 'https://www.xiniudata.com/industry/newest?from=data',
    'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
}
fenye_dict=execjs.compile(open('犀牛加密.js','r',encoding='utf-8').read()).call('fenye')
print(fenye_dict)
json_data = {
    'payload': fenye_dict['pyload'],
    'sig': fenye_dict['sig'],
    'v': 1,
}

response = requests.post(
    'https://www.xiniudata.com/api2/service/x_service/person_industry_list/list_industries_by_sort',
    cookies=cookies,
    headers=headers,
    json=json_data,
).json()
data=response['d']
print(data)


result=execjs.compile(open('犀牛response解密.js','r',encoding='utf-8').read()).call('getdata',data)

print(result)

``python代码,下面是js代码犀牛加密.js
```javascript
const crypto = require('crypto');
var  _p = "W5D80NFZHAYB8EUI2T649RT2MNRMVE2O"
function md5(text) {
    return crypto.createHash('md5').update(text).digest('hex');
}

function e1(e) {
    var _keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
    if (null == e)
        return null;
    for (var t, n, r, o, i, a, c, u = "", s = 0; s < e.length;)
        o = (t = e.charCodeAt(s++)) >> 2,
            i = (3 & t) << 4 | (n = e.charCodeAt(s++)) >> 4,
            a = (15 & n) << 2 | (r = e.charCodeAt(s++)) >> 6,
            c = 63 & r,
            isNaN(n) ? a = c = 64 : isNaN(r) && (c = 64),
            u = u + _keyStr.charAt(o) + _keyStr.charAt(i) + _keyStr.charAt(a) + _keyStr.charAt(c);
    return u
}

function e2(e) {

    if (null == (e = _u_e(e)))
        return null;
    for (var t = "", n = 0; n < e.length; n++) {
        var r = _p.charCodeAt(n % _p.length);
        t += String.fromCharCode(e.charCodeAt(n) ^ r)
    }
    return t
}
function _u_e(e) {
                if (null == e)
                    return null;
                e = e.replace(/\r\n/g, "\n");
                for (var t = "", n = 0; n < e.length; n++) {
                    var r = e.charCodeAt(n);
                    r < 128 ? t += String.fromCharCode(r) : r > 127 && r < 2048 ? (t += String.fromCharCode(r >> 6 | 192),
                    t += String.fromCharCode(63 & r | 128)) : (t += String.fromCharCode(r >> 12 | 224),
                    t += String.fromCharCode(r >> 6 & 63 | 128),
                    t += String.fromCharCode(63 & r | 128))
                }
                return t
            }

function sig123(e) {
    return md5(e + _p).toUpperCase()
}

function fenye(){
    d={}

    page_n = {
    'limit': 20,
    'sort': 2,
    'start': 0
    }
    d['pyload']=e1(e2(JSON.stringify(page_n)))
    d['sig']=sig123(d['pyload'])
    return d
}

// console.log(fenye());

这是解密。js

 var _keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
              , _p = "W5D80NFZHAYB8EUI2T649RT2MNRMVE2O";
function d2(e) {
                for (var t = "", n = 0; n < e.length; n++) {
                    var r = _p.charCodeAt(n % _p.length);
                    t += String.fromCharCode(e.charCodeAt(n) ^ r)
                }
                return t = _u_d(t)
            }
function d1(e) {
                var t, n, r, o, i, a, c = "", u = 0;
                for (e = e.replace(/[^A-Za-z0-9\+\/\=]/g, ""); u < e.length; )
                    t = _keyStr.indexOf(e.charAt(u++)) << 2 | (o = _keyStr.indexOf(e.charAt(u++))) >> 4,
                    n = (15 & o) << 4 | (i = _keyStr.indexOf(e.charAt(u++))) >> 2,
                    r = (3 & i) << 6 | (a = _keyStr.indexOf(e.charAt(u++))),
                    c += String.fromCharCode(t),
                    64 != i && (c += String.fromCharCode(n)),
                    64 != a && (c += String.fromCharCode(r));
                return c
        }
function _u_d(e) {
                for (var t = "", n = 0, r = 0, o = 0, i = 0; n < e.length; )
                    (r = e.charCodeAt(n)) < 128 ? (t += String.fromCharCode(r),
                    n++) : r > 191 && r < 224 ? (o = e.charCodeAt(n + 1),
                    t += String.fromCharCode((31 & r) << 6 | 63 & o),
                    n += 2) : (o = e.charCodeAt(n + 1),
                    i = e.charCodeAt(n + 2),
                    t += String.fromCharCode((15 & r) << 12 | (63 & o) << 6 | 63 & i),
                    n += 3);
                return t
            }
function getdata(data){
    result=JSON.parse(d2(d1(data)))
    return result
}
// console.log(getdata(data));


我传递过data 在js里面没有问题 在py里面 报错


```python
Exception in thread Thread-3:
Traceback (most recent call last):
  File "D:\Python37\lib\threading.py", line 926, in _bootstrap_inner
    self.run()
  File "D:\Python37\lib\threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "D:\Python37\lib\subprocess.py", line 1267, in _readerthread
    buffer.append(fh.read())
UnicodeDecodeError: 'gbk' codec can't decode byte 0xbb in position 2257: illegal multibyte sequence

Traceback (most recent call last):
  File "D:/pythonProject/爬虫实战/犀牛数据/犀牛数据.py", line 53, in <module>
    result=execjs.compile(open('犀牛response解密.js','r',encoding='utf-8').read()).call('getdata',data1)
  File "D:\pythonProject\venv\lib\site-packages\execjs\_abstract_runtime_context.py", line 37, in call
    return self._call(name, *args)
  File "D:\pythonProject\venv\lib\site-packages\execjs\_external_runtime.py", line 92, in _call
    return self._eval("{identifier}.apply(this, {args})".format(identifier=identifier, args=args))
  File "D:\pythonProject\venv\lib\site-packages\execjs\_external_runtime.py", line 78, in _eval
    return self.exec_(code)
  File "D:\pythonProject\venv\lib\site-packages\execjs\_abstract_runtime_context.py", line 18, in exec_
    return self._exec_(source)
  File "D:\pythonProject\venv\lib\site-packages\execjs\_external_runtime.py", line 87, in _exec_
    output = self._exec_with_pipe(source)
  File "D:\pythonProject\venv\lib\site-packages\execjs\_external_runtime.py", line 103, in _exec_with_pipe
    stdoutdata, stderrdata = p.communicate(input=input)
  File "D:\Python37\lib\subprocess.py", line 964, in communicate
    stdout, stderr = self._communicate(input, endtime, timeout)
  File "D:\Python37\lib\subprocess.py", line 1317, in _communicate
    stdout = stdout[0]
IndexError: list index out of range

请问亲们 是哪里除了问题 谢谢哦 要疯啊!


  • 写回答

3条回答 默认 最新

  • 文盲老顾 WEB应用领新星创作者 2023-03-08 13:32
    关注

    img

    他还引用了另一个包,你一起弄进来

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(2条)

报告相同问题?

问题事件

  • 系统已结题 3月18日
  • 已采纳回答 3月10日
  • 创建了问题 3月8日

悬赏问题

  • ¥30 模拟电路 logisim
  • ¥15 PVE8.2.7无法成功使用a5000的vGPU,什么原因
  • ¥15 is not in the mmseg::model registry。报错,模型注册表找不到自定义模块。
  • ¥15 安装quartus II18.1时弹出此error,怎么解决?
  • ¥15 keil官网下载psn序列号在哪
  • ¥15 想用adb命令做一个通话软件,播放录音
  • ¥30 Pytorch深度学习服务器跑不通问题解决?
  • ¥15 部分客户订单定位有误的问题
  • ¥15 如何在maya程序中利用python编写领子和褶裥的模型的方法
  • ¥15 Bug traq 数据包 大概什么价