枫叶梨花 2016-07-28 13:14 采纳率: 100%
浏览 1092
已采纳

Java做的简单爬虫的问题

图片说明
这是我的程序的错误!不知道怎么回事
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;

import Icrawl.ICrawler;
import pojos.CrawlResultPojo;
import pojos.UrlPojo;

public class HttpClientCrawlerImpl implements ICrawler {
public CloseableHttpClient httpclient = HttpClients.custom().build();

@Override
public CrawlResultPojo crawl(UrlPojo urlPojo) {
    if (urlPojo == null) {
        return null;
    }
    CrawlResultPojo crawlResultPojo = new CrawlResultPojo();
    CloseableHttpResponse response1 = null;
    BufferedReader br = null;
    try {
        HttpGet httpget = new HttpGet(urlPojo.getUrl());
        response1 = httpclient.execute(httpget);
        HttpEntity entity = response1.getEntity();
        InputStreamReader isr = new InputStreamReader(entity.getContent(),
                "utf-8");
        br = new BufferedReader(isr);

        String line = null;
        StringBuilder stringBuilder = new StringBuilder();
        while ((line = br.readLine()) != null) {
            stringBuilder.append(line + "\n");
        }
        crawlResultPojo.setSuccess(true);
        crawlResultPojo.setPageContent(stringBuilder.toString());
        return crawlResultPojo;
    } catch (Exception e) {
        e.printStackTrace();
        crawlResultPojo.setSuccess(false);
    } finally {
        if (response1 != null) {
            try {
                response1.close();
            } catch (IOException e1) {
                e1.printStackTrace();
            }
        }
        if (br != null) {
            try {
                br.close();
            } catch (IOException e1) {
                e1.printStackTrace();
            }
        }
    }

    return crawlResultPojo;
}

/**
 * 传入加入参数post参数的url pojo
 */
public CrawlResultPojo crawl4Post(UrlPojo urlPojo) {
    if (urlPojo == null) {
        return null;
    }
    CrawlResultPojo crawlResultPojo = new CrawlResultPojo();
    CloseableHttpResponse response1 = null;
    BufferedReader br = null;
    try {
        RequestBuilder rb = RequestBuilder.post().setUri(
                new URI(urlPojo.getUrl()));

        // .addParameter("IDToken1",
        // "username").addParameter("IDToken2", "password").build();

        Map<String, Object> parasMap = urlPojo.getParasMap();
        if (parasMap != null) {
            for (Entry<String, Object> entry : parasMap.entrySet()) {
                rb
                        .addParameter(entry.getKey(), entry.getValue()
                                .toString());
            }
        }
        HttpUriRequest httpRequest = rb.build();
        response1 = httpclient.execute(httpRequest);
        HttpEntity entity = response1.getEntity();
        InputStreamReader isr = new InputStreamReader(entity.getContent(),
                "utf-8");
        br = new BufferedReader(isr);

        String line = null;
        StringBuilder stringBuilder = new StringBuilder();
        while ((line = br.readLine()) != null) {
            stringBuilder.append(line + "\n");
        }
        crawlResultPojo.setSuccess(true);
        crawlResultPojo.setPageContent(stringBuilder.toString());
        return crawlResultPojo;
    } catch (Exception e) {
        e.printStackTrace();
        crawlResultPojo.setSuccess(false);
    } finally {
        if (response1 != null) {
            try {
                response1.close();
            } catch (IOException e1) {
                e1.printStackTrace();
            }
        }
        if (br != null) {
            try {
                br.close();
            } catch (IOException e1) {
                e1.printStackTrace();
            }
        }
    }

    return crawlResultPojo;
}

@SuppressWarnings("unused")
public static void main(String[] args) throws Exception {
    HttpClientCrawlerImpl httpClientCrawlerImpl = new HttpClientCrawlerImpl();
    String url = "http://www.wdzj.com/front_select-platt";
    UrlPojo urlPojo = new UrlPojo(url);
    Map<String, Object> parasMap = new HashMap<String, Object>();

    int max_page_number = 1000;

    parasMap.put("currPage", 30);
    parasMap.put("params", "");
    parasMap.put("sort", 0);
    urlPojo.setParasMap(parasMap);

    CrawlResultPojo resultPojo = httpClientCrawlerImpl.crawl4Post(urlPojo);

    if (resultPojo != null) {
        System.out.println(resultPojo);
    }
}

}

这是代码,怎么解决这个报错啊???

  • 写回答

2条回答 默认 最新

  • 枫叶梨花 2016-07-29 06:37
    关注

    Apache httpclient版本问题

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

悬赏问题

  • ¥15 微信会员卡等级和折扣规则
  • ¥15 微信公众平台自制会员卡可以通过收款码收款码收款进行自动积分吗
  • ¥15 随身WiFi网络灯亮但是没有网络,如何解决?
  • ¥15 gdf格式的脑电数据如何处理matlab
  • ¥20 重新写的代码替换了之后运行hbuliderx就这样了
  • ¥100 监控抖音用户作品更新可以微信公众号提醒
  • ¥15 UE5 如何可以不渲染HDRIBackdrop背景
  • ¥70 2048小游戏毕设项目
  • ¥20 mysql架构,按照姓名分表
  • ¥15 MATLAB实现区间[a,b]上的Gauss-Legendre积分