我使用java写一个很简单的http代理,只是http代理,所以很简单,
逻辑就是 有缓存文件,读缓存文件,没有缓存文件就转发请求到原始网站。获取回复,保存成本地文件。
完整代码如下:
import java.io.*;
import java.net.ServerSocket;
import java.net.Socket;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.zip.GZIPInputStream;
public class Proxy {
static int TIMEOUT = 5000;
static int BUFFSIZE = 8000;
static boolean writeCache = false;
static File folder;
static File cache;
static BufferedWriter bw;
public static void main(String[] args) {
// the socket server for the browser;
System.out.println("listening the port");
int port = 3000;
if(args.length > 0){
port = Integer.parseInt( args[0] );
}
ServerSocket server = getServerInstant(port);
Socket client = null;
while (true){
try{
client = server.accept();
}catch (Exception e){
e.printStackTrace();
}
try{
// 获取请求的时间
System.out.println( "Timestamp: " + new Date().toString() );
// 整理浏览器发来的请求到一个数据结构中
ArrayList<String> request = getRequest( client );
client.setSoTimeout( TIMEOUT );
server.setSoTimeout( TIMEOUT );
BufferedReader reader;
// 读取缓存或者转发请求得到的流的
InputStream html = parsing( request );
StringBuffer text = new StringBuffer();
int t =0;
byte[] packet = new byte[BUFFSIZE];
String tmp ;
// 准备根据得到的流写入浏览器
PrintWriter out = new PrintWriter( client.getOutputStream(), true );
//GZIPInputStream gzip = new GZIPInputStream( html );
reader = new BufferedReader( new InputStreamReader(html,StandardCharsets.UTF_8) );
// 如果需要写缓存就创建文件创建buffereWriter
if(writeCache)
creatFileWrieter();
// 给浏览器发流 以及写入缓存
while( ( tmp = reader.readLine()) != null ) {
System.out.println(">>"+tmp);
if(writeCache)
saveCache( tmp );
out.println(tmp);
}
bw.close();
// out.close();
}catch (java.net.UnknownHostException jnu){
System.out.println("ignore this request");
}
catch (Exception e){
e.printStackTrace();
}
System.out.println("finish one case.");
}
}
private static void creatFileWrieter() throws IOException {
if(!folder.exists())
folder.mkdir();
if(!cache.exists())
cache.createNewFile();
bw = new BufferedWriter( new FileWriter( cache.getPath(),true ) );
}
private static void saveCache(String onLineCache) throws Exception{
bw.write( onLineCache + "\r\n");
}
// 根据行程list的请求获取反馈(读缓存,或者转发回复)
private static InputStream parsing(ArrayList<String> list) throws Exception {
if(list.size() <= 1){
return null;
}
String htmlContent ="";
InputStream htmlStream;
String requestLine = list.get(0);
// split the request into different parts.
String splits[] = getFirst(requestLine);
System.out.println("request elements :" + Arrays.toString(splits));
folder = new File( "./"+splits[2]+splits[3] );
// System.out.println("cache path: " + folder.getPath());
cache = new File( folder.getPath()+"/default");
// cache is stored
// System.out.println("check if there is cache");
// 有缓存文件直接打开文件获得流
if(folder.exists() && cache.exists())
htmlStream = new DataInputStream( new FileInputStream( cache ) );
// 没有缓存
else{
writeCache = true;
// forward the request and get response
// System.out.println("-------from original");
// 从通过转发方法从服务器获取流
htmlStream = forwardRequest( splits,list );
}
// return it back
return htmlStream;
}
// forward the request if there is not a cache
// and return the response from the original server. return the InputStream
// 转发到服务器,然后获得流
private static InputStream forwardRequest(String[] splits, ArrayList<String> headers) throws IOException {
headers.set(0,splits[0] + splits[3] + " " + splits[5] );
headers.set(1,"Host: " + splits[2] );
String newRequest = "";
for(String x : headers){
newRequest += x+"\r\n";
}
int port = Integer.parseInt( splits[4] );
// System.out.println("-------------->"+newRequest);
Socket proxyclient = new Socket(splits[2],port);
// forwarding the request
PrintWriter headSender = new PrintWriter(proxyclient.getOutputStream(),true);
headSender.println(newRequest);
InputStream dis = new DataInputStream( proxyclient.getInputStream() );
return dis;
}
// get the first line of the headers.
// 根据第一行header获取方法名,协议,域名,文件菜单,端口号等相关信息。
private static String[] getFirst(String requestLine) {
//[method,http,domain,port,path,version]
String[] splits = new String[6];
int i = requestLine.indexOf(" ");
int j = requestLine.indexOf(" ",++i);
// GET
splits[0] = requestLine.substring(0, i);
// Protocol
splits[5] = requestLine.substring(j+1);
String url = requestLine.substring(i, j);
String[] urlParts = parseUrl( url );
// http
splits[1] = urlParts[0];
// domain
splits[2] = urlParts[1];
int k = urlParts[2].lastIndexOf( ":" );
// if there is a port number;
// sub domain[3] and port[4].
if(k > 0){
splits[3] = urlParts[2].substring( 0,k );
splits[4] = urlParts[2].substring( k+1 );
}else{
splits[3] = urlParts[2];
splits[4] = "80";
}
return splits;
}
// parse url part in the request
// 根据url判断端口号,域名
private static String[] parseUrl(String url){
url = url.substring( 1 );
int i=0,j=0;
String[] urlParts = new String[3];
while(i < url.length() && url.charAt(i)!='/')i++;
if(i==url.length()){
urlParts[0] = "";
urlParts[1] = url;
urlParts[2] = "/";
}
// this is the http:// or https://
if(i>0 && url.charAt(i-1)==':' && i+1 < url.length() && url.charAt(i+1)=='/'){
urlParts[0] = url.substring(0,i-1);
j = url.indexOf('/',i+=2);
if(j>0){
urlParts[1] = url.substring(i, j);
urlParts[2] = url.substring(j);
}
else{
urlParts[1] = url.substring(i);
urlParts[2] = "/";
}
}
else{
urlParts[0] = "";
urlParts[1] = url.substring(0, i);
urlParts[2] = url.substring(i);
}
return urlParts;
}
// separate the request into the array list.
// 根据浏览器发来的内容,将每行请求分别放入list中。
static ArrayList<String> getRequest(Socket client) throws IOException {
InputStream is = client.getInputStream();
BufferedReader reader = new BufferedReader( new InputStreamReader( is,"UTF-8" ));
ArrayList<String> list = new ArrayList();
int t = 0 ;
String tmp ;
while((tmp = reader.readLine()) != null){
System.out.println( "into list: " + tmp);
list.add( tmp );
// "/r/n" empty line for the end.
if(tmp.length() == 0)
t++;
if(t==1)
break;
System.out.println(t);
}
System.out.println("finish");
return list;
}
// return a ServerSocket Instance
static ServerSocket getServerInstant(int port){
ServerSocket server = null;
int c = 0;
while(server==null && ++c<100){
try {
server = new ServerSocket(port);
return server;
} catch (IOException e) {
// renew the port
System.out.println("invalid port retry another port: " + port );
}
}
return null;
}
}
打开浏览器 输入http://localhost:3000/http://www.bom.gov.au,缓存的内容为乱码。而实际的html文件内容应该是一下代码段的内容,
通过 正常访问,获取 http://www.bom.gov.au的html,如果把内容复制到缓存中,用IE浏览器打开,只能得到一半的html文件,也就是说 浏览器没有收全信息。
只收到了122 行 其实总共有800多行。
可能是读取流文件的时候, 有问题,但是有的网站又能直接打开。
比如输入http://localhost:3000/http://autoidlab.cs.adelaide.edu.au
我怀疑是这个方法:
private static InputStream forwardRequest(String[] splits, ArrayList<String> headers) throws IOException {
headers.set(0,splits[0] + splits[3] + " " + splits[5] );
headers.set(1,"Host: " + splits[2] );
String newRequest = "";
for(String x : headers){
newRequest += x+"\r\n";
}
int port = Integer.parseInt( splits[4] );
// System.out.println("-------------->"+newRequest);
Socket proxyclient = new Socket(splits[2],port);
// forwarding the request
PrintWriter headSender = new PrintWriter(proxyclient.getOutputStream(),true);
headSender.println(newRequest);
InputStream dis = new DataInputStream( proxyclient.getInputStream() );
return dis;
}
或者主方法中:
while( ( tmp = reader.readLine()) != null ) {
System.out.println(">>"+tmp);
if(writeCache)
saveCache( tmp );
out.println(tmp);
}
这块的问题。
但是我试过很对,甚至一个用byte去发, 也是不行。
望大神赐教