Lomon---- 2018-09-07 15:58 采纳率: 0%
浏览 2199
已采纳

java采集页面显示202状态

最近测试采集公共资源交易的页面,出来202问题,无法采集最终页面内容,希望给予页面采集的完整JAVA代码,谢谢!

附测试代码:
package asptest;

import java.io.IOException;
import java.net.URI;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.client.protocol.ClientContext;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.cookie.BasicClientCookie2;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
/*

  • author:合肥工业大学 管院学院 钱洋
    *1563178220@qq.com
    */
    public class Testggzy {

    public static void main(String[] args) throws ClientProtocolException, IOException, InterruptedException {
    getRawHTML("http://www.ggzy.gov.cn/information/html/b/500000/0201/201808/30/005073ad2bc4036b4335a46cf421674b341f.shtml");

    }
    public static String getRawHTML ( String url ) throws ClientProtocolException, IOException, InterruptedException{
    //初始化
    DefaultHttpClient httpclient = new DefaultHttpClient();
    httpclient.getParams().setParameter("http.protocol.cookie-policy",

    CookiePolicy.BROWSER_COMPATIBILITY);
    //设置参数
    HttpParams params = httpclient.getParams();
    //连接时间
    HttpConnectionParams.setConnectionTimeout(params, 6000);

    HttpConnectionParams.setSoTimeout(params, 6000*20);

    //超时重新请求次数
    DefaultHttpRequestRetryHandler dhr = new DefaultHttpRequestRetryHandler(5,true);

    HttpContext localContext = new BasicHttpContext();

    HttpRequest request2 = (HttpRequest) localContext.getAttribute(

    ExecutionContext.HTTP_REQUEST);

    httpclient.setHttpRequestRetryHandler(dhr);
    BasicCookieStore cookieStore = new BasicCookieStore();

    BasicClientCookie2 cookie = new BasicClientCookie2("Content-Type","text/html;charset=UTF-8");

    BasicClientCookie2 cookie1 = new BasicClientCookie2("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36");
    cookieStore.addCookie(cookie);
    cookieStore.addCookie(cookie1);
    localContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore);

    HttpGet request = new HttpGet();
    request.setURI(URI.create(url));
    HttpResponse response = null;

    String rawHTML = "";
    response = httpclient.execute(request,localContext);
    int StatusCode = response.getStatusLine().getStatusCode(); //获取响应状态码
    System.out.println(StatusCode);
    if(StatusCode == 200){ //状态码200表示响应成功
    //获取实体内容
    rawHTML = EntityUtils.toString (response.getEntity());
    System.out.println(rawHTML);
    //输出实体内容
    EntityUtils.consume(response.getEntity()); //消耗实体
    }else {
    //关闭HttpEntity的流实体
    EntityUtils.consume(response.getEntity()); //消耗实体
    Thread.sleep(20*60*1000); //如果报错先休息30分钟
    }
    httpclient.close();
    System.out.println(rawHTML);
    return rawHTML;
    }
    }

  • 写回答

3条回答 默认 最新

  • Lomon---- 2019-01-01 15:07
    关注

    http://deal.ggzy.gov.cn/ds/deal/dealList_find.jsp
    改为这里找到请求数据的格式,读到JSON,解决链接的问题

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(2条)

报告相同问题?

悬赏问题

  • ¥15 如何在scanpy上做差异基因和通路富集?
  • ¥20 关于#硬件工程#的问题,请各位专家解答!
  • ¥15 关于#matlab#的问题:期望的系统闭环传递函数为G(s)=wn^2/s^2+2¢wn+wn^2阻尼系数¢=0.707,使系统具有较小的超调量
  • ¥15 FLUENT如何实现在堆积颗粒的上表面加载高斯热源
  • ¥30 截图中的mathematics程序转换成matlab
  • ¥15 动力学代码报错,维度不匹配
  • ¥15 Power query添加列问题
  • ¥50 Kubernetes&Fission&Eleasticsearch
  • ¥15 報錯:Person is not mapped,如何解決?
  • ¥15 c++头文件不能识别CDialog