Lomon---- 2020-03-26 22:11 采纳率: 0%
浏览 548
已结题

org.apache.http.NoHttpResponseException: liuyan.people.com.cn:80 failed to respond

需要对这个页面(http://liuyan.people.com.cn/threads/list?fid=565)进行设计采集程序,由于“加载更多”就是当页最后一个ID,如:
http://liuyan.people.com.cn/threads/list?fid=565
的JSON数据页面为
http://liuyan.people.com.cn/threads/queryThreadsList?fid=565&lastItem=0
JSON数据最后一个TID是7127843,那第二页即是
http://liuyan.people.com.cn/threads/queryThreadsList?fid=565&lastItem=7127843
JSON数据最后一个TID是7125108,那第三页即是
http://liuyan.people.com.cn/threads/queryThreadsList?fid=565&lastItem=7125108
依此类推,当responseData中数据条目小于10条或空时,即结束。

以下代码会采集切换5~6页后,在执行DealjsonData()的response = httpClient.execute(httpPost);后报错org.apache.http.NoHttpResponseException: liuyan.people.com.cn:80 failed to respond

有什么好的解决办法,如测试通过就全部给分,谢谢。

以下是部分代码借鉴。

public static void gettitle(String tablename,String province,String cityname,String district,String leaderstype,String leadersname,String queryurl,String url,String proxyname,int proxyport)
{
String[] Pagecountsp = null;
boolean tmptable=true;
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

HttpClient httpClient = null;
HttpPost httpPost = null;
HttpResponse response = null;
try{

    //WebClient webClient = new WebClient(BrowserVersion.CHROME,"45.63.121.157",80);
        DataBaseHelper dbh = new DataBaseHelper(tablename);
        dbh.onCreate(tablename);
        String tmp_tablename=dbh.onCreatetmp(tablename); //建立临时库和清空临时库
        System.out.println(df.format(System.currentTimeMillis())+" "+"queryurl:"+queryurl+" tablename:"+tablename+" leadersname:"+leadersname);
        HtmlPage page=null;
        int pagenum=20;

    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    ProxyConfig proxyConfig = new ProxyConfig(proxyname,proxyport); 
    webClient.getOptions().setTimeout(3000000);
    //webClient.getOptions().setProxyConfig(proxyConfig);     
    webClient.getOptions().setJavaScriptEnabled(false);
    webClient.getOptions().setCssEnabled(false);

    httpClient = HttpClients.createDefault();
    httpPost = new HttpPost(queryurl);// 传入URL地址
    httpPost.addHeader("Accept",
            "pplication/json, text/javascript, */*; q=0.01");
    httpPost.addHeader("Accept-Encoding", "gzip, deflate");// 设置请求头
    httpPost.addHeader("Accept-Language",
            "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");// 设置请求头
    //httpPost.addHeader("Connection", "keep-alive");// 设置请求头
    // httpPost.addHeader("Content-Length", "19");//设置请求头
    //httpPost.addHeader("Content-Type",
        //    "application/x-www-form-urlencoded; charset=UTF-8");// 设置请求头

    httpPost.addHeader("Referer",
            "http://liuyan.people.com.cn/threads/list?fid=1079");// 设置请求头
    //httpPost.addHeader("User-Agent",
    //        "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/64.0");// 设置请求头
    //httpPost.addHeader("X-Requested-With", "XMLHttpRequest");// 设置请求头

    response = httpClient.execute(httpPost);// 获取响应
    int statusCode = response.getStatusLine().getStatusCode();
    //System.out.println("HTTP Status Code:" + statusCode);
    if (statusCode != HttpStatus.SC_OK) {
        //System.out.println("HTTP请求未成功!HTTP Status Code:");//+response.getStatusLine())
    }
    HttpEntity httpEntity = response.getEntity();
    String reponseContent = EntityUtils.toString(httpEntity);
    EntityUtils.consume(httpEntity);
    reponseContent=reponseContent.replaceAll("null", "0");
    //System.out.println("响应内容:" + reponseContent);

    if(reponseContent.length()>20){ 
    JSONObject dealList_findjson = JSONObject.fromObject(reponseContent); 
    //System.out.println(dealList_findjson.toString());  
    //System.out.println("页数:"+dealList_findjson.getInt("ttlpage")+"记录数:"+dealList_findjson.getInt("ttlrow"));  
    //int RecordCou = dealList_findjson.getInt("ttlrow"); //记录数   
    //int PageCou = dealList_findjson.getInt("ttlpage"); //页数
    //int RecordCou = PageCou*pagenum; //预计最大记录总数,最后一页可能不是20        
    int DBcout=dbh.DBcount(tablename,leadersname);  //
    //System.out.println(" 总页数:"+PageCou+" 记录数:"+RecordCou+" 数据库最早时间:"+DBnewstime+" 近1月内数据库条目:"+DB3cout+" 数据库总条目:"+DBcout);
    if(dealList_findjson.size()>0){          //在今天截止到表中最新时间的URL筛选如有新数据,由于可以直接调整WEB显示时间,不用增量模式,改用选择模式
        System.out.println("开始采集:");
        //System.out.println(doc.toString());
        int continues=getListData(dbh,queryurl,url,dealList_findjson,tmp_tablename,tablename,province,cityname,district,leaderstype,leadersname);  
    for (int i=0;continues>0;i++) //PageCount-&&i<2
        {  //continues代表只有不到10条数据,意味着结尾了,默认是0,否则tid=continues;
        //System.out.println(continues);    
        reponseContent=DealjsonData(queryurl,continues);  //调用读取XHR的JSON数据
        if(reponseContent.indexOf("success")>1){
        reponseContent=reponseContent.replaceAll("null", "0");
        //System.out.println("响应内容:" + reponseContent);
        if(reponseContent.length()>20){
       dealList_findjson = JSONObject.fromObject(reponseContent);  //转化为json
        continues=getListData(dbh,queryurl,url,dealList_findjson,tmp_tablename,tablename,province,cityname,district,leaderstype,leadersname);  //getListData对获取的JSON数据进行返回最后一条TID值,用于翻页,否则为0结束,插入数据。
        //System.out.println("continues:" + continues+" i:" + i);
        }
            else{System.out.println("获取JSON数据异常l!");}
        }
        else{System.out.println("获取XHR的JSON数据失败!");
        break;}
    }
    }       

    dbh.Dedup_Add_table(tablename,tmp_tablename,province,leaderstype,leadersname,cityname,district,tmptable); //增加了临时的数据库,进行重删方式新增
    }
    //20180114改为不删除临时tmp库,即无论tmp_zfcg0或tmpzfcg3;本来每次在onCreatetmp头部就会清空库truncate
    //dbh.TBdel(tmp_tablename); //删除临时库
    webClient.close();



  } catch(Exception e){
    // TODO Auto-generated catch block
    e.printStackTrace();

     StringWriter sw = new StringWriter();   
     e.printStackTrace(new PrintWriter(sw, true));   
     String strs = sw.toString(); 
     log.error("url:"+url+" gettitle error:"+ strs);
      }
}

//获取XHR中的JSON数据

static String DealjsonData(String url,int pageNo) {  
    String url1=url.substring(0,url.lastIndexOf("lastItem=")+9)+pageNo;
    System.out.println("url:"+url1);
    //ConnectionConfig connectionConfig = ConnectionConfig.custom().setBufferSize(4128).build();
    //HttpClient httpClient = HttpClients.custom().setDefaultConnectionConfig(connectionConfig).build();
        HttpClient httpClient = null;
        HttpPost httpPost = null;
        HttpResponse response = null;
        String reponseContent=null;
        try{

        httpClient = HttpClients.createDefault();
        httpPost = new HttpPost(url1);// 传入URL地址
        httpPost.addHeader("Accept","pplication/json, text/javascript, */*; q=0.01");
        httpPost.addHeader("Accept-Encoding", "gzip, deflate");// 设置请求头
        httpPost.addHeader("Accept-Language","zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");// 设置请求头
        httpPost.addHeader("Connection", "keep-alive");// 设置请求头
        // httpPost.addHeader("Content-Length", "19");//设置请求头
        httpPost.addHeader("Content-Type","application/x-www-form-urlencoded; charset=utf-8");// 设置请求头

        httpPost.addHeader("Referer",
                "http://liuyan.people.com.cn/threads/list?fid=1079");// 设置请求头
        //httpPost.addHeader("User-Agent",
        //        "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/64.0");// 设置请求头
        //httpPost.addHeader("X-Requested-With", "XMLHttpRequest");// 设置请求头


        response = httpClient.execute(httpPost);// 获取响应
        int statusCode = response.getStatusLine().getStatusCode();
        //System.out.println("HTTP Status Code:" + statusCode);
        if (statusCode != HttpStatus.SC_OK) {
            //System.out.println("HTTP请求未成功!HTTP Status Code:"+ response.getStatusLine());
        }
        HttpEntity httpEntity = response.getEntity();
        reponseContent = EntityUtils.toString(httpEntity);
        EntityUtils.consume(httpEntity);
        //System.out.println("响应内容:" + reponseContent);
   return reponseContent;       
  } catch(Exception e){
    // TODO Auto-generated catch block
    e.printStackTrace();

    StringWriter sw = new StringWriter();   
    e.printStackTrace(new PrintWriter(sw, true));   
    String strs = sw.toString(); 
    log.error("table_name:"+url+"url1:"+url1+" ListData error:"+ strs);
    return reponseContent;
    }
}
  • 写回答

1条回答 默认 最新

  • threenewbee 2020-03-27 00:19
    关注

    你用浏览器访问 liuyan.people.com.cn,看看有问题么。
    是不是有反机器人的机制,比如说留言太频繁会有验证码或者让你休息一下的提示,或者封你的ip
    抓包分析下。

    评论

报告相同问题?

悬赏问题

  • ¥15 用土力学知识进行土坡稳定性分析与挡土墙设计
  • ¥15 帮我写一个c++工程
  • ¥30 Eclipse官网打不开,官网首页进不去,显示无法访问此页面,求解决方法
  • ¥15 关于smbclient 库的使用
  • ¥15 微信小程序协议怎么写
  • ¥15 c语言怎么用printf(“\b \b”)与getch()实现黑框里写入与删除?
  • ¥20 怎么用dlib库的算法识别小麦病虫害
  • ¥15 华为ensp模拟器中S5700交换机在配置过程中老是反复重启
  • ¥15 uniapp uview http 如何实现统一的请求异常信息提示?
  • ¥15 有了解d3和topogram.js库的吗?有偿请教