package com.lucene.web;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.Socket;
import java.net.UnknownHostException;
public class WebHttpClient {
/**
* @param args
* @throws IOException
* @throws UnknownHostException
*/
public static void main(String[] args) throws UnknownHostException,
IOException {
Socket webClient = new Socket("www.bnu.edu.cn", 80);
PrintWriter result = new PrintWriter(webClient.getOutputStream(), true);
BufferedReader receiver = new BufferedReader(new InputStreamReader(
webClient.getInputStream()));
// 解码 编码
// 发送http请求
result.println("GET / HTTP/1.1");
result.println("Host: bnu.edu.cn");
// 响应完了 告诉服务器关闭连接
result.println("Connection: Close");
result.println();
// 接受请求
boolean bRet = true;
StringBuilder sb = new StringBuilder(8096);
while (bRet) {
// 有数据进来了
if (receiver.ready()) {
String name = null;
while ((name = receiver.readLine()) != null) { //一行字符地读取 数据
/**
* 解码的关键部分
*/
String context = new String(name.getBytes(), "utf-8");
sb.append(context + "\n");
}
bRet = false;
}
}
// 显示获取的正文的网页 打印到控制台
// String context=URLDecoder.decode(sb.toString(),"utf-8");
System.out.println(sb.toString());
if (webClient != null) {
webClient.close();
}
}
}
运行完 抓取下来的网页里面出现了几个不能解码的汉字 以问号的形式出现 但是很少 对此问题表示不解