我的油猴脚本需要用中文分词,目前用的是Chrome自带的Intl.Segmenter
:const str_separated = Array.from(new Intl.Segmenter('cn', { granularity: 'word' }).segment(str));
但是感觉准确率特别低,很多常见词汇都分不出来。
所以我试了下讯飞的在线分词:
function sending_xml(input_text) {
// const input_text = "你好";
const XAppid = "..."
const APIKey = "...";
const XCurTime = Math.floor(new Date().getTime() / 1000);
const XParam = btoa(JSON.stringify({"type": "dependent"}));
const XCheckSum = md5(APIKey + XCurTime + XParam);
const headers = {
"Content-Type": "application/json",
"X-Appid": XAppid,
"X-CurTime": XCurTime,
"X-Param":XParam,
"X-CheckSum": XCheckSum
};
var data = {};
data.text = input_text;
GM_log(data);
GM.xmlHttpRequest ({
method: "POST",
url: "https://ltpapi.xfyun.cn/v1/cws",
data: JSON.stringify(data),
headers: headers,
onload: function (response) {
if (response.status === 200) {
GM_log(response.responseText);
} else {
GM_log("Error: " + response.statusText);
}
}
});
}
结果是:{"code":"10109","data":{},"desc":"illegal text length","sid":"..."}
请问饿为什么是illegal text length?这个请求哪里不对?