Lomon---- 2018-09-07 15:58 采纳率: 0%
浏览 2199
已采纳

java采集页面显示202状态

最近测试采集公共资源交易的页面,出来202问题,无法采集最终页面内容,希望给予页面采集的完整JAVA代码,谢谢!

附测试代码:
package asptest;

import java.io.IOException;
import java.net.URI;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.client.protocol.ClientContext;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.cookie.BasicClientCookie2;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
/*

  • author:合肥工业大学 管院学院 钱洋
    *1563178220@qq.com
    */
    public class Testggzy {

    public static void main(String[] args) throws ClientProtocolException, IOException, InterruptedException {
    getRawHTML("http://www.ggzy.gov.cn/information/html/b/500000/0201/201808/30/005073ad2bc4036b4335a46cf421674b341f.shtml");

    }
    public static String getRawHTML ( String url ) throws ClientProtocolException, IOException, InterruptedException{
    //初始化
    DefaultHttpClient httpclient = new DefaultHttpClient();
    httpclient.getParams().setParameter("http.protocol.cookie-policy",

    CookiePolicy.BROWSER_COMPATIBILITY);
    //设置参数
    HttpParams params = httpclient.getParams();
    //连接时间
    HttpConnectionParams.setConnectionTimeout(params, 6000);

    HttpConnectionParams.setSoTimeout(params, 6000*20);

    //超时重新请求次数
    DefaultHttpRequestRetryHandler dhr = new DefaultHttpRequestRetryHandler(5,true);

    HttpContext localContext = new BasicHttpContext();

    HttpRequest request2 = (HttpRequest) localContext.getAttribute(

    ExecutionContext.HTTP_REQUEST);

    httpclient.setHttpRequestRetryHandler(dhr);
    BasicCookieStore cookieStore = new BasicCookieStore();

    BasicClientCookie2 cookie = new BasicClientCookie2("Content-Type","text/html;charset=UTF-8");

    BasicClientCookie2 cookie1 = new BasicClientCookie2("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36");
    cookieStore.addCookie(cookie);
    cookieStore.addCookie(cookie1);
    localContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore);

    HttpGet request = new HttpGet();
    request.setURI(URI.create(url));
    HttpResponse response = null;

    String rawHTML = "";
    response = httpclient.execute(request,localContext);
    int StatusCode = response.getStatusLine().getStatusCode(); //获取响应状态码
    System.out.println(StatusCode);
    if(StatusCode == 200){ //状态码200表示响应成功
    //获取实体内容
    rawHTML = EntityUtils.toString (response.getEntity());
    System.out.println(rawHTML);
    //输出实体内容
    EntityUtils.consume(response.getEntity()); //消耗实体
    }else {
    //关闭HttpEntity的流实体
    EntityUtils.consume(response.getEntity()); //消耗实体
    Thread.sleep(20*60*1000); //如果报错先休息30分钟
    }
    httpclient.close();
    System.out.println(rawHTML);
    return rawHTML;
    }
    }

  • 写回答

3条回答 默认 最新

  • Lomon---- 2019-01-01 15:07
    关注

    http://deal.ggzy.gov.cn/ds/deal/dealList_find.jsp
    改为这里找到请求数据的格式,读到JSON,解决链接的问题

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(2条)

报告相同问题?

悬赏问题

  • ¥60 请查询全国几个煤炭大省近十年的煤炭铁路及公路的货物周转量
  • ¥15 请帮我看看我这道c语言题到底漏了哪种情况吧!
  • ¥66 如何制作支付宝扫码跳转到发红包界面
  • ¥15 pnpm 下载element-plus
  • ¥15 解决编写PyDracula时遇到的问题
  • ¥15 有没有人能解决下这个问题吗,本人不会编程
  • ¥15 plotBAPC画图出错
  • ¥30 关于#opencv#的问题:使用大疆无人机拍摄水稻田间图像,拼接成tif图片,用什么方法可以识别并框选出水稻作物行
  • ¥15 Python卡尔曼滤波融合
  • ¥20 iOS绕地区网络检测