package com.lucene.web; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.Socket; import java.net.UnknownHostException; public class WebHttpClient { /** * @param args * @throws IOException * @throws UnknownHostException */ public static void main(String[] args) throws UnknownHostException, IOException { Socket webClient = new Socket("www.bnu.edu.cn", 80); PrintWriter result = new PrintWriter(webClient.getOutputStream(), true); BufferedReader receiver = new BufferedReader(new InputStreamReader( webClient.getInputStream())); // 解码 编码 // 发送http请求 result.println("GET / HTTP/1.1"); result.println("Host: bnu.edu.cn"); // 响应完了 告诉服务器关闭连接 result.println("Connection: Close"); result.println(); // 接受请求 boolean bRet = true; StringBuilder sb = new StringBuilder(8096); while (bRet) { // 有数据进来了 if (receiver.ready()) { String name = null; while ((name = receiver.readLine()) != null) { //一行字符地读取 数据 /** * 解码的关键部分 */ String context = new String(name.getBytes(), "utf-8"); sb.append(context + "\n"); } bRet = false; } } // 显示获取的正文的网页 打印到控制台 // String context=URLDecoder.decode(sb.toString(),"utf-8"); System.out.println(sb.toString()); if (webClient != null) { webClient.close(); } } }
运行完 抓取下来的网页里面出现了几个不能解码的汉字 以问号的形式出现 但是很少 对此问题表示不解