congyiwei 2015-04-14 04:37 采纳率: 100%
浏览 2616
已采纳

求助jsoup取页面之后输出的问题

取出的表格数据为空,请问这是string body的问题么...
如果要解决问题应该怎样修改...

import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;


public class Main {

    public static void main(String[] args) throws Throwable {

        for (int i = 1; i <= 3; i++) {
            System.out.println(getPrice(i));
        }

    }

    static List<String> getPrice(int pageNo) throws Throwable {

        Document doc = Jsoup.parse(getText(pageNo));

        Elements trs = doc.select("#ctl00_cphMainFrame_Table1 tr");

        List<String> result = new ArrayList<String>(trs.size());

        for (int i = 1, l = trs.size(); i < l; i++) {
            Element tr = trs.get(i);

            result.add(tr.child(5).text());
        }

        return result;

    }

    static String getText(int pageNo) throws Throwable {

        URL url = new URL("http://www.lnprice.gov.cn/wjjc/jgjc/ReportByDateOfPivot.aspx?PriceBureauMainType_Id=101&YM=201502&DP=28");

        HttpURLConnection conn = (HttpURLConnection) url.openConnection();

        conn.setRequestMethod("POST");

        conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko");

        conn.setDoOutput(true);

        conn.connect();

        String body = "ctl00%24cphMainFrame%24ScriptManager1=ctl00%24cphMainFrame%24UpdatePanel1%7Cctl00%24cphMainFrame%24aspnetpager1&ctl00%24cphMainFrame%24ddlYear=2015&ctl00%24cphMainFrame%24ddlMonth=02&ctl00%24cphMainFrame%24ddlTimePoint=28&__EVENTTARGET=ctl00%24cphMainFrame%24aspnetpager1&__LASTFOCUS=&__VIEWSTATE=%2FwEPDwULLTEwNTcyNDc4NjkPZBYCZg9kFgICAQ9kFgQCAQ8WAh4LXyFJdGVtQ291bnQCCxYWZg9kFgJmDxUCAzEwMQzlhpzlia%2Fkuqflk4FkAgEPZBYCZg8VAgMxMDcP5bel5Lia5raI6LS55ZOBZAICD2QWAmYPFQIDMTA4EuW3peS4mueUn%2BS6p%2Bi1hOaWmWQCAw9kFgJmDxUCAzEwORjln47luILlsYXmsJHmnI3liqHku7fmoLxkAgQPZBYCZg8VAgMxMTAY5Yac5p2R5bGF5rCR5pyN5Yqh5Lu35qC8ZAIFD2QWAmYPFQIDMTExDOa2ieWGnOS6p%2BWTgWQCBg9kFgJmDxUCAzEwMhwyMDEz5bm05Lul5YmN5bel5Lia5raI6LS55ZOBZAIHD2QWAmYPFQIDMTAzHzIwMTPlubTku6XliY3lt6XkuJrnlJ%2FkuqfotYTmlplkAggPZBYCZg8VAgMxMDQZMjAxM%2BW5tOS7peWJjeacjeWKoeS7t%2BagvGQCCQ9kFgJmDxUCAzEwNR8yMDEz5bm05Lul5YmN5Yac5Lia55Sf5Lqn6LWE5paZZAIKD2QWAmYPFQIDMTA2GTIwMTPlubTku6XliY3mtonlhpzkuqflk4FkAgMPZBYQAgEPDxYCHgRUZXh0BQzlhpzlia%2Fkuqflk4FkZAIDDw8WBB8BBVw8c3BhbiBzdHlsZT0ibWFyZ2luLWxlZnQ6MjBweDsiICBjbGFzcz0ibXNqZ19jaGF4dW5feHhrMV9iZzAwIiA%2B5oyJ5YiG57G75YWo55yB5p%2Bl6K%2BiPC9zcGFuPh4LTmF2aWdhdGVVcmwFM1JlcG9ydEJ5RGF0ZU9mUGl2b3QuYXNweD9QcmljZUJ1cmVhdU1haW5UeXBlX0lkPTEwMWRkAgQPDxYEHwEFQTxzcGFuIGNsYXNzPSJtc2pnX2NoYXh1bl94eGsxX2JnMTEiID7mjInllYblk4HliIbluILmn6Xor6I8L3NwYW4%2BHwIFNVJlcG9ydEdvb2RzSW5mb0J5Q2l0eS5hc3B4P1ByaWNlQnVyZWF1TWFpblR5cGVfSWQ9MTAxZGQCBQ8PFgQfAQVCPHNwYW4gY2xhc3M9Im1zamdfY2hheHVuX3h4azFfYmcxMSIgPuaMieaXtumXtOWIhuW4guafpeivoiA8L3NwYW4%2BHwIFMFJlcG9ydEluZm9ieVRpbWUuYXNweD9QcmljZUJ1cmVhdU1haW5UeXBlX0lkPTEwMWRkAgcPEA8WBh4NRGF0YVRleHRGaWVsZAUKQ3JlYXRlWWVhch4ORGF0YVZhbHVlRmllbGQFCkNyZWF0ZVllYXIeC18hRGF0YUJvdW5kZ2QQFRAEMjAwMAQyMDAxBDIwMDIEMjAwMwQyMDA0BDIwMDUEMjAwNgQyMDA3BDIwMDgEMjAwOQQyMDEwBDIwMTEEMjAxMgQyMDEzBDIwMTQEMjAxNRUQBDIwMDAEMjAwMQQyMDAyBDIwMDMEMjAwNAQyMDA1BDIwMDYEMjAwNwQyMDA4BDIwMDkEMjAxMAQyMDExBDIwMTIEMjAxMwQyMDE0BDIwMTUUKwMQZ2dnZ2dnZ2dnZ2dnZ2dnZxYBAg9kAgkPEGRkFgECAWQCCw8QDxYCHwVnZBAVBAnor7fpgInmi6kCMDUCMTUCMjUVBAEwAjI4AjI5AjMwFCsDBGdnZ2dkZAIPDxYCHgdWaXNpYmxlZxYCZg9kFgJmD2QWAgIDD2QWAmYPZBYCAgMPFgIfBmcWAmYPZBYCZg9kFgICAQ8PFgYeCFBhZ2VTaXplAhQeEEN1cnJlbnRQYWdlSW5kZXgCAh4LUmVjb3JkY291bnQCL2RkZM%2FO1WQW50DLN7G3eiSyS6q2rewQ&__EVENTVALIDATION=%2FwEWJAKb97l9ArjilMkFApDM2c4FApDMreUCApDMsZgLApDMhT8CkMzp0wgCkMz99gECkMzBrQ4CkMzVwAYCkMz5KQKQzM3MCAL79f%2FVDwL79cOIBAL79devDQL79bvCBQL79Y%2F5AgL79ZOcCwLWm967DgLG9LjWAgLG9LzWAgLG9IDWAgLG9ITWAgLG9IjWAgLG9IzWAgLG9JDWAgLG9NTVAgLG9NjVAgLZ9LTWAgLZ9LjWAgLZ9LzWAgLSx8%2BzDgLMx%2B%2BzDgLMx%2BOzDgLPx4%2BwDgLy%2BZrvCEFw0vATX2wSsTwyj9sMOqdXBRc0&__ASYNCPOST=true&__EVENTARGUMENT=" + pageNo;

        conn.getOutputStream().write(body.getBytes());

        byte[] buff = new byte[4096];
        int count;

        ByteArrayOutputStream out = new ByteArrayOutputStream(4096);
        InputStream in = conn.getInputStream();

        while((count = in.read(buff)) != -1) {
            out.write(buff, 0, count);
        }

        conn.disconnect();

        return out.toString("UTF-8");

    }

}
  • 写回答

3条回答 默认 最新

  • 毕小宝 博客专家认证 2015-04-14 06:42
    关注

    代码没有问题,可能网站数据是实时变动的,。
    body内容试试这个参数,这个是刚刚抓到的(如果不行,找个最新的再试试):

     String body = "ctl00%24cphMainFrame%24ScriptManager1=ctl00%24cphMainFrame%24UpdatePanel1%7Cctl00%24cphMainFrame%24aspnetpager1&ctl00%24cphMainFrame%24ddlYear=2015&ctl00%24cphMainFrame%24ddlMonth=02&ctl00%24cphMainFrame%24ddlTimePoint=28&__EVENTTARGET=ctl00%24cphMainFrame%24aspnetpager1&__LASTFOCUS=&__VIEWSTATE=%2FwEPDwULLTEwNTcyNDc4NjkPZBYCZg9kFgICAQ9kFgQCAQ8WAh4LXyFJdGVtQ291bnQCCxYWZg9kFgJmDxUCAzEwMQzlhpzlia%2Fkuqflk4FkAgEPZBYCZg8VAgMxMDcP5bel5Lia5raI6LS55ZOBZAICD2QWAmYPFQIDMTA4EuW3peS4mueUn%2BS6p%2Bi1hOaWmWQCAw9kFgJmDxUCAzEwORjln47luILlsYXmsJHmnI3liqHku7fmoLxkAgQPZBYCZg8VAgMxMTAY5Yac5p2R5bGF5rCR5pyN5Yqh5Lu35qC8ZAIFD2QWAmYPFQIDMTExDOa2ieWGnOS6p%2BWTgWQCBg9kFgJmDxUCAzEwMhwyMDEz5bm05Lul5YmN5bel5Lia5raI6LS55ZOBZAIHD2QWAmYPFQIDMTAzHzIwMTPlubTku6XliY3lt6XkuJrnlJ%2FkuqfotYTmlplkAggPZBYCZg8VAgMxMDQZMjAxM%2BW5tOS7peWJjeacjeWKoeS7t%2BagvGQCCQ9kFgJmDxUCAzEwNR8yMDEz5bm05Lul5YmN5Yac5Lia55Sf5Lqn6LWE5paZZAIKD2QWAmYPFQIDMTA2GTIwMTPlubTku6XliY3mtonlhpzkuqflk4FkAgMPZBYQAgEPDxYCHgRUZXh0BQzlhpzlia%2Fkuqflk4FkZAIDDw8WBB8BBVw8c3BhbiBzdHlsZT0ibWFyZ2luLWxlZnQ6MjBweDsiICBjbGFzcz0ibXNqZ19jaGF4dW5feHhrMV9iZzAwIiA%2B5oyJ5YiG57G75YWo55yB5p%2Bl6K%2BiPC9zcGFuPh4LTmF2aWdhdGVVcmwFM1JlcG9ydEJ5RGF0ZU9mUGl2b3QuYXNweD9QcmljZUJ1cmVhdU1haW5UeXBlX0lkPTEwMWRkAgQPDxYEHwEFQTxzcGFuIGNsYXNzPSJtc2pnX2NoYXh1bl94eGsxX2JnMTEiID7mjInllYblk4HliIbluILmn6Xor6I8L3NwYW4%2BHwIFNVJlcG9ydEdvb2RzSW5mb0J5Q2l0eS5hc3B4P1ByaWNlQnVyZWF1TWFpblR5cGVfSWQ9MTAxZGQCBQ8PFgQfAQVCPHNwYW4gY2xhc3M9Im1zamdfY2hheHVuX3h4azFfYmcxMSIgPuaMieaXtumXtOWIhuW4guafpeivoiA8L3NwYW4%2BHwIFMFJlcG9ydEluZm9ieVRpbWUuYXNweD9QcmljZUJ1cmVhdU1haW5UeXBlX0lkPTEwMWRkAgcPEA8WBh4NRGF0YVRleHRGaWVsZAUKQ3JlYXRlWWVhch4ORGF0YVZhbHVlRmllbGQFCkNyZWF0ZVllYXIeC18hRGF0YUJvdW5kZ2QQFRAEMjAwMAQyMDAxBDIwMDIEMjAwMwQyMDA0BDIwMDUEMjAwNgQyMDA3BDIwMDgEMjAwOQQyMDEwBDIwMTEEMjAxMgQyMDEzBDIwMTQEMjAxNRUQBDIwMDAEMjAwMQQyMDAyBDIwMDMEMjAwNAQyMDA1BDIwMDYEMjAwNwQyMDA4BDIwMDkEMjAxMAQyMDExBDIwMTIEMjAxMwQyMDE0BDIwMTUUKwMQZ2dnZ2dnZ2dnZ2dnZ2dnZxYBAg9kAgkPEGRkFgECAWQCCw8QDxYCHwVnZBAVBAnor7fpgInmi6kCMDUCMTUCMjUVBAEwAjI4AjI5AjMwFCsDBGdnZ2dkZAIPDxYCHgdWaXNpYmxlZxYCZg9kFgJmD2QWAgIDD2QWAmYPZBYCAgMPFgIfBmcWAmYPZBYCZg9kFgICAQ8PFgYeCFBhZ2VTaXplAhQeEEN1cnJlbnRQYWdlSW5kZXgCAh4LUmVjb3JkY291bnQCL2RkZMVUAu8HLwRzj1xEKpBi8MSr0fYD&__EVENTVALIDATION=%2FwEWJAKS79jeCgK44pTJBQKQzNnOBQKQzK3lAgKQzLGYCwKQzIU%2FApDM6dMIApDM%2FfYBApDMwa0OApDM1cAGApDM%2BSkCkMzNzAgC%2B%2FX%2F1Q8C%2B%2FXDiAQC%2B%2FXXrw0C%2B%2FW7wgUC%2B%2FWP%2BQIC%2B%2FWTnAsC1pveuw4CxvS41gICxvS81gICxvSA1gICxvSE1gICxvSI1gICxvSM1gICxvSQ1gICxvTU1QICxvTY1QIC2fS01gIC2fS41gIC2fS81gIC0sfPsw4CzMfvsw4CzMfjsw4Cz8ePsA4C8vma7whOBwA2O0BJTn5kLqZv1C98W2UbZQ%3D%3D&__ASYNCPOST=true&&__EVENTARGUMENT=" + pageNo;
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(2条)

报告相同问题?

悬赏问题

  • ¥15 metadata提取的PDF元数据,如何转换为一个Excel
  • ¥15 关于arduino编程toCharArray()函数的使用
  • ¥100 vc++混合CEF采用CLR方式编译报错
  • ¥15 coze 的插件输入飞书多维表格 app_token 后一直显示错误,如何解决?
  • ¥15 vite+vue3+plyr播放本地public文件夹下视频无法加载
  • ¥15 c#逐行读取txt文本,但是每一行里面数据之间空格数量不同
  • ¥50 如何openEuler 22.03上安装配置drbd
  • ¥20 ING91680C BLE5.3 芯片怎么实现串口收发数据
  • ¥15 无线连接树莓派,无法执行update,如何解决?(相关搜索:软件下载)
  • ¥15 Windows11, backspace, enter, space键失灵