想用Jsoup解析天猫商品搜索页面,但是解析后返回的document中所有在URL中传递的搜索关键字都变成了乱码,项目使用的是UTF-8.
private List<ItemInfo> getFirstPageItemInfoList() throws IOException,InterruptedException {
String pageurljd = "http://search.jd.com/Search?keyword=" + pagedata.key_utf8 + "&enc=utf-8";
String pageurlaz = "http://www.amazon.cn/s/ref=nb_sb_noss_1?__mk_zh_CN=亚马逊网站&url=search-alias%3Daps&field-keywords=" + pagedata.key_utf8;
String pgurltb = "https://list.tmall.com/search_product.htm?q=" + this.pagedata.key_utf8;
return getFromURLs(pageurljd, pageurlaz, pgurltb);
}
public List<ItemInfo> getItemInfoList()
throws IOException, InterruptedException {
/**
* 通过URL获得Document对象
*/
// try {
// String gbk=URLEncoder.encode(this.pageurl,"GBK");
// System.out.println(gbk);
// } catch (UnsupportedEncodingException e) {
// e.printStackTrace();
// }
Document doc = Jsoup.connect(this.pageurl).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36").timeout(3000).maxBodySize(0).post();
//Document doc = Jsoup.parse(new URL(this.pageurl).openStream(), "GBK", this.pageurl);
pagedata.tbpage = this.getPageNum(doc);
用浏览器查看天猫后台发现它的网页编码是meta charset=gbk,是因为这个原因吗,怎么修改好呢