xuan0506 2010-12-06 17:18
浏览 239
已采纳

httpclient的疑问求解答!

    我使用httpclient来爬去固定一些网站的数据信息,由于使用了线程池因此在抓取的时候经常碰到有异常!


Exception in thread "pool-226-thread-200" java.lang.IllegalStateException: Invalid use of SingleClientConnManager: connection still allocated.
Make sure to release the connection before allocating another one.
at org.apache.http.impl.conn.SingleClientConnManager.getConnection(SingleClientConnManager.java:199)
at org.apache.http.impl.conn.SingleClientConnManager$1.getConnection(SingleClientConnManager.java:173)
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:390)
at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:641)
at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:576)
at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:554)
at com.hc.HttpClientFactory.httpGet(HttpClientFactory.java:127)
at com.hc.Fetcher.fetchType(Fetcher.java:449)
at com.hc.Fetcher.fetchType(Fetcher.java:466)
at com.hc.Fetcher.fetchType(Fetcher.java:466)
at com.hc.Fetcher.httpByCode(Fetcher.java:286)
at com.hc.Fetcher.FetchHtml(Fetcher.java:72)
at com.thread.FetchHtml.fetchStart(FetchHtml.java:64)
at com.thread.FetchHtml.run(FetchHtml.java:209)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:619)



这里有我的线程池的实现:

logger.debug("启动");
// 获得抓取配置数据
List songsList = songsService.getSongs();
List provList = provService.getProvs();
if (0 < songsList.size() && 0 < provList.size()) {
// 构造一个线程池
ThreadPoolExecutor // 构造一个线程池
producerPool = new ThreadPoolExecutor(40, 80, 1, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(songsList.size() * provList.size()),new ThreadPoolExecutor.CallerRunsPolicy());
// 循环开始抓取数据
for (Object objS : songsList) {
// 得到单条彩铃信息
Map sMap = (Map) objS;
for (Object objP : provList) {
// 得到单个省份抓取配置信息
Map pMap = (Map) objP;
if (sMap.get("TELTYPE").equals(pMap.get("TELTYPE"))) {
FetchHtml fhFetchHtml = new FetchHtml(sMap, pMap);
producerPool.execute(fhFetchHtml);
}
}
}
// 停止线程
producerPool.shutdown();
}
logger.debug("结束");


这里是我httpclient的配置信息:

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.net.ssl.SSLHandshakeException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.HttpVersion;
import org.apache.http.NameValuePair;
import org.apache.http.NoHttpResponseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HTTP;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import com.anal.UtilComm;

public class HttpClientFactory {

private static final String CHARSET_GBK = "GBK";

/**
 *  异常自动恢复处理
 *  使用HttpRequestRetryHandler接口实现请求的异常恢复
 */
private static HttpRequestRetryHandler requestRetryHandler = new HttpRequestRetryHandler() {
    // 自定义的恢复策略
    public synchronized boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
        // 设置恢复策略,在发生异常时候将自动重试3次
        if (executionCount &gt; 3) {  
            // 超过最大次数则不需要重试  
            return false;  
        }  
        if (exception instanceof NoHttpResponseException) {  
            // 服务停掉则重新尝试连接  
            return true;  
        }  
        if (exception instanceof SSLHandshakeException) {  
            // SSL异常不需要重试  
            return false;  
        } 
        HttpRequest request = (HttpRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
        boolean idempotent = (request instanceof HttpEntityEnclosingRequest);
        if (!idempotent) {
            // 请求内容相同则重试
            return true;
        }
        return false;
    }
};

/**
 *  使用ResponseHandler接口处理响应
 *  HttpClient使用ResponseHandler会自动管理连接的释放
 *  解决了对连接的释放管理
 */
private static ResponseHandler&lt;String&gt; responseHandler = new ResponseHandler&lt;String&gt;() {
    // 自定义响应处理
    public synchronized String handleResponse(HttpResponse response)    throws ClientProtocolException, IOException {
        HttpEntity entity = response.getEntity();
        if (entity != null) {
            String charset = EntityUtils.getContentCharSet(entity) == null ? CHARSET_GBK : EntityUtils.getContentCharSet(entity);
            return new String(EntityUtils.toByteArray(entity), charset);
        } else {
            return null;
        }
    }
};

/**
* 获取DefaultHttpClient实例
* 
* @param charset
* 参数编码集, 可空
* @return DefaultHttpClient 对象
*/
public static DefaultHttpClient getDefaultHttpClient(final String charset){
    DefaultHttpClient httpclient = new DefaultHttpClient();
    ArrayList headers = new ArrayList();
    headers.add(new BasicHeader("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"));
    headers.add(new BasicHeader("Accept-Language", "zh-cn,en-us,zh-tw,en-gb,en;"));
    headers.add(new BasicHeader("Accept-Charset","gbk,gb2312,utf-8,BIG5,ISO-8859-1;"));
    headers.add(new BasicHeader("Connection","Close"));
    headers.add(new BasicHeader("Cache-Control","no-cache"));
    headers.add(new BasicHeader("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; CIBA)"));
    httpclient.getParams().setParameter("http.default-headers", headers);

    //设置http头信息
    httpclient.getParams().setParameter(CoreProtocolPNames.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
    //模拟浏览器,解决一些服务器程序只允许浏览器访问的问题
    httpclient.getParams().setParameter(CoreProtocolPNames.HTTP_CONTENT_CHARSET, charset == null ? HTTP.UTF_8 : charset);
    httpclient.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT,30000);
    httpclient.getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT,60000);
    httpclient.setHttpRequestRetryHandler(requestRetryHandler);
    return httpclient;
}

/**
 * get方式提交抓取网页
 * 
 * @param url
 * @param charset
 * @throws IOException
 * @throws ClientProtocolException
 * @throws IOException
 */
public static String httpGet(HttpClient httpClient, String url,
        String charset) throws ClientProtocolException, IOException {
    HttpGet httpget = new HttpGet(url);
    String content = null;
    // 发送请求,得到响应
    HttpResponse response = httpClient.execute(httpget);
    HttpEntity entity = response.getEntity();
    if (entity != null &amp;&amp; HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
        charset = EntityUtils.getContentCharSet(entity) == null ? 
                CHARSET_GBK : EntityUtils.getContentCharSet(entity);
        content = UtilComm.getString(entity.getContent(),charset);
    }

    abortRequest(httpget);
    return content;

}

/**
 * post方式提交抓取网页
 * 
 * @param url
 * @param charset
 * @throws IOException
 * @throws ClientProtocolException
 */
public static String httpPost(HttpClient httpClient, String url,
        String charset) throws ClientProtocolException, IOException {
    HttpPost httppost = new HttpPost(url);
    // 得到提交的POST值
    List&lt;NameValuePair&gt; nvpsList = UtilComm.getNameValuePairs(url);
    httppost.setEntity(new UrlEncodedFormEntity(nvpsList, charset));
    // 得到返回值
    String content = null;
    HttpResponse response = httpClient.execute(httppost);
    HttpEntity entity = response.getEntity();
    if (entity != null &amp;&amp; HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
        charset = EntityUtils.getContentCharSet(entity) == null ? 
                CHARSET_GBK : EntityUtils.getContentCharSet(entity);
        content = UtilComm.getString(entity.getContent(),charset);
    }

    abortRequest(httppost);
    return content;
}

/**
* 释放HttpClient连接
* 
* @param hrb
* 请求对象
* @param httpclient
*           client对象
*/
public static void abortRequest(final HttpRequestBase hrb){
    if (hrb != null &amp;&amp; hrb.isAborted()) {
        hrb.abort();
    }
}

public static void shutdown(final HttpClient httpclient) {
    if (httpclient != null) {
        httpclient.getConnectionManager().shutdown();
    }
}

}



经常碰到这类异常,帮忙看看,什么原因造成的呢?
  • 写回答

1条回答 默认 最新

  • beneo 2010-12-06 18:16
    关注

    你多线程是用httpclient用错了,

    如果你需要多线程使用httpclient的话,请看这个例子
    [code="java"]
    public class ClientMultiThreadedExecution {

    public static void main(String[] args) throws Exception {
        // Create and initialize HTTP parameters
        HttpParams params = new BasicHttpParams();
        ConnManagerParams.setMaxTotalConnections(params, 100);
        HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
    
        // Create and initialize scheme registry
        SchemeRegistry schemeRegistry = new SchemeRegistry();
        schemeRegistry.register(
                new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
    
        // Create an HttpClient with the ThreadSafeClientConnManager.
        // This connection manager must be used if more than one thread will
        // be using the HttpClient.
        ClientConnectionManager cm = new ThreadSafeClientConnManager(params, schemeRegistry);
        HttpClient httpClient = new DefaultHttpClient(cm, params);
    
        // create an array of URIs to perform GETs on
        String[] urisToGet = {
                "http://hc.apache.org/",
                "http://hc.apache.org/httpcomponents-core/",
                "http://hc.apache.org/httpcomponents-client/",
                "http://svn.apache.org/viewvc/httpcomponents/"
        };
    
        // create a thread for each URI
        GetThread[] threads = new GetThread[urisToGet.length];
        for (int i = 0; i < threads.length; i++) {
            HttpGet httpget = new HttpGet(urisToGet[i]);
            threads[i] = new GetThread(httpClient, httpget, i + 1);
        }
    
        // start the threads
        for (int j = 0; j < threads.length; j++) {
            threads[j].start();
        }
    
        // join the threads
        for (int j = 0; j < threads.length; j++) {
            threads[j].join();
        }
    
        // When HttpClient instance is no longer needed,
        // shut down the connection manager to ensure
        // immediate deallocation of all system resources
        httpClient.getConnectionManager().shutdown();
    }
    
    /**
     * A thread that performs a GET.
     */
    static class GetThread extends Thread {
    
        private final HttpClient httpClient;
        private final HttpContext context;
        private final HttpGet httpget;
        private final int id;
    
        public GetThread(HttpClient httpClient, HttpGet httpget, int id) {
            this.httpClient = httpClient;
            this.context = new BasicHttpContext();
            this.httpget = httpget;
            this.id = id;
        }
    
        /**
         * Executes the GetMethod and prints some status information.
         */
        @Override
        public void run() {
    
            System.out.println(id + " - about to get something from " + httpget.getURI());
    
            try {
    
                // execute the method
                HttpResponse response = httpClient.execute(httpget, context);
    
                System.out.println(id + " - get executed");
                // get the response body as an array of bytes
                HttpEntity entity = response.getEntity();
                if (entity != null) {
                    byte[] bytes = EntityUtils.toByteArray(entity);
                    System.out.println(id + " - " + bytes.length + " bytes read");
                }
    
            } catch (Exception e) {
                httpget.abort();
                System.out.println(id + " - error: " + e);
            }
        }
    
    }
    

    }
    [/code]

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥30 python代码,帮调试
  • ¥15 #MATLAB仿真#车辆换道路径规划
  • ¥15 java 操作 elasticsearch 8.1 实现 索引的重建
  • ¥15 数据可视化Python
  • ¥15 要给毕业设计添加扫码登录的功能!!有偿
  • ¥15 kafka 分区副本增加会导致消息丢失或者不可用吗?
  • ¥15 微信公众号自制会员卡没有收款渠道啊
  • ¥100 Jenkins自动化部署—悬赏100元
  • ¥15 关于#python#的问题:求帮写python代码
  • ¥20 MATLAB画图图形出现上下震荡的线条