ALU_1112 2021-04-27 17:46 采纳率: 50%
浏览 41

java 代理 读取流是乱码,而且浏览器无法接收发送的缓存文件。

我使用java写一个很简单的http代理,只是http代理,所以很简单, 

逻辑就是 有缓存文件,读缓存文件,没有缓存文件就转发请求到原始网站。获取回复,保存成本地文件。

完整代码如下:

import java.io.*;
import java.net.ServerSocket;
import java.net.Socket;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.zip.GZIPInputStream;

public class Proxy {
    static int TIMEOUT = 5000;
    static int BUFFSIZE = 8000;
    static boolean writeCache = false;
    static File folder;
    static File cache;
    static BufferedWriter bw;

    public static void main(String[] args) {
        // the socket server for the browser;
        System.out.println("listening the port");
        int port = 3000;

        if(args.length > 0){
            port = Integer.parseInt( args[0] );
        }

        ServerSocket server = getServerInstant(port);
        Socket client = null;
        while (true){
            try{
                client = server.accept();
            }catch (Exception e){
                e.printStackTrace();
            }

            try{
                // 获取请求的时间
                System.out.println( "Timestamp: " + new Date().toString() );
                // 整理浏览器发来的请求到一个数据结构中
                ArrayList<String> request = getRequest( client );
                client.setSoTimeout( TIMEOUT );
                server.setSoTimeout( TIMEOUT );
                BufferedReader reader;
                
                // 读取缓存或者转发请求得到的流的
                InputStream html = parsing( request );

                StringBuffer text = new StringBuffer();
                int t =0;
                byte[] packet = new byte[BUFFSIZE];
                String tmp ;
            
                // 准备根据得到的流写入浏览器
                PrintWriter out = new PrintWriter( client.getOutputStream(), true );
                //GZIPInputStream gzip = new GZIPInputStream( html );
                reader = new BufferedReader( new InputStreamReader(html,StandardCharsets.UTF_8) );
                // 如果需要写缓存就创建文件创建buffereWriter
                if(writeCache)
                    creatFileWrieter();
                // 给浏览器发流 以及写入缓存
                while( ( tmp = reader.readLine()) != null ) {
                    System.out.println(">>"+tmp);
                    if(writeCache)
                        saveCache( tmp );
                    out.println(tmp);
                }
                bw.close();
//                        out.close();
            }catch (java.net.UnknownHostException jnu){
                System.out.println("ignore this request");
            }
            catch (Exception e){
                e.printStackTrace();
            }

            System.out.println("finish one case.");
        }
    }

    private static void creatFileWrieter() throws IOException {
        if(!folder.exists())
            folder.mkdir();
        if(!cache.exists())
            cache.createNewFile();
        bw = new BufferedWriter( new FileWriter( cache.getPath(),true ) );
    }

    private static void saveCache(String onLineCache) throws Exception{

        bw.write( onLineCache + "\r\n");
    }

    // 根据行程list的请求获取反馈(读缓存,或者转发回复)
    private static InputStream parsing(ArrayList<String> list) throws Exception {

        if(list.size() <= 1){
            return null;
        }

        String htmlContent ="";
        InputStream htmlStream;
        String requestLine = list.get(0);

        // split the request into different parts.
        String splits[] = getFirst(requestLine);
        System.out.println("request elements :" + Arrays.toString(splits));

        folder = new File( "./"+splits[2]+splits[3] );
//        System.out.println("cache path: " +  folder.getPath());
        cache = new File( folder.getPath()+"/default");

        // cache is stored
        //  System.out.println("check if there is cache");
        // 有缓存文件直接打开文件获得流
        if(folder.exists() && cache.exists())
            htmlStream = new DataInputStream( new FileInputStream( cache ) );
        // 没有缓存
        else{
            writeCache = true;

            // forward the request and get response
            // System.out.println("-------from original");
            // 从通过转发方法从服务器获取流
            htmlStream =  forwardRequest( splits,list );
        }
        //  return it back
        return htmlStream;
    }

    // forward the request if there is not a cache
    // and return the response from the original server. return the InputStream
    // 转发到服务器,然后获得流
    private static InputStream forwardRequest(String[] splits, ArrayList<String> headers) throws IOException {

        headers.set(0,splits[0] + splits[3] + " " + splits[5] );
        headers.set(1,"Host: " + splits[2] );

        String newRequest = "";
        for(String x : headers){
            newRequest += x+"\r\n";
        }

        int port = Integer.parseInt( splits[4] );
//        System.out.println("-------------->"+newRequest);
        Socket proxyclient = new Socket(splits[2],port);
        // forwarding the request
        PrintWriter headSender = new PrintWriter(proxyclient.getOutputStream(),true);
        headSender.println(newRequest);

        InputStream dis = new DataInputStream( proxyclient.getInputStream() );
        return dis;
    }

    // get the first line of the headers.
    // 根据第一行header获取方法名,协议,域名,文件菜单,端口号等相关信息。
    private static String[] getFirst(String requestLine) {
        //[method,http,domain,port,path,version]
        String[] splits = new String[6];
        int i = requestLine.indexOf(" ");
        int j = requestLine.indexOf(" ",++i);
        // GET
        splits[0] = requestLine.substring(0, i);
        // Protocol
        splits[5] = requestLine.substring(j+1);
        String url = requestLine.substring(i, j);
        String[] urlParts = parseUrl( url );
        // http
        splits[1] = urlParts[0];
        // domain
        splits[2] = urlParts[1];
        int k = urlParts[2].lastIndexOf( ":" );
        // if there is a port number;
        // sub domain[3] and port[4].
        if(k > 0){
            splits[3] = urlParts[2].substring( 0,k );
            splits[4] = urlParts[2].substring( k+1 );
        }else{
            splits[3] = urlParts[2];
            splits[4] = "80";
        }
        return splits;
    }

    // parse url part in the request
    // 根据url判断端口号,域名
    private static String[] parseUrl(String url){
        url = url.substring( 1 );
        int i=0,j=0;
        String[] urlParts = new String[3];
        while(i < url.length() && url.charAt(i)!='/')i++;
        if(i==url.length()){
            urlParts[0] = "";
            urlParts[1] = url;
            urlParts[2] = "/";
        }
        // this is the http:// or https://
        if(i>0 && url.charAt(i-1)==':' && i+1 < url.length() && url.charAt(i+1)=='/'){
            urlParts[0] = url.substring(0,i-1);
            j = url.indexOf('/',i+=2);
            if(j>0){
                urlParts[1] = url.substring(i, j);
                urlParts[2] = url.substring(j);
            }
            else{
                urlParts[1] = url.substring(i);
                urlParts[2] = "/";
            }
        }
        else{
            urlParts[0] = "";
            urlParts[1] = url.substring(0, i);
            urlParts[2] = url.substring(i);
        }
        return urlParts;
    }

    // separate the request into the array list.
    // 根据浏览器发来的内容,将每行请求分别放入list中。
    static ArrayList<String> getRequest(Socket client) throws IOException {
        InputStream is = client.getInputStream();
        BufferedReader reader = new BufferedReader( new InputStreamReader( is,"UTF-8" ));
        ArrayList<String> list = new ArrayList();
        int t = 0 ;
        String tmp ;
        while((tmp = reader.readLine()) != null){
            System.out.println( "into list: " + tmp);
            list.add( tmp  );
            // "/r/n" empty line for the end.
            if(tmp.length() == 0)
                t++;
            if(t==1)
                break;
            System.out.println(t);
        }
        System.out.println("finish");

        return list;
    }

    // return a ServerSocket Instance
    static ServerSocket getServerInstant(int port){
        ServerSocket server = null;
        int c = 0;
        while(server==null && ++c<100){
            try {
                server = new ServerSocket(port);
                return server;
            } catch (IOException e) {
                // renew the port
                System.out.println("invalid port retry another port: " + port );
            }
        }
        return null;
    }
}

打开浏览器 输入http://localhost:3000/http://www.bom.gov.au,缓存的内容为乱码。而实际的html文件内容应该是一下代码段的内容,

通过 正常访问,获取 http://www.bom.gov.au的html,如果把内容复制到缓存中,用IE浏览器打开,只能得到一半的html文件,也就是说 浏览器没有收全信息。

只收到了122 行 其实总共有800多行。

可能是读取流文件的时候, 有问题,但是有的网站又能直接打开。

比如输入http://localhost:3000/http://autoidlab.cs.adelaide.edu.au 

我怀疑是这个方法:

private static InputStream forwardRequest(String[] splits, ArrayList<String> headers) throws IOException {

        headers.set(0,splits[0] + splits[3] + " " + splits[5] );
        headers.set(1,"Host: " + splits[2] );

        String newRequest = "";
        for(String x : headers){
            newRequest += x+"\r\n";
        }

        int port = Integer.parseInt( splits[4] );
//        System.out.println("-------------->"+newRequest);
        Socket proxyclient = new Socket(splits[2],port);
        // forwarding the request
        PrintWriter headSender = new PrintWriter(proxyclient.getOutputStream(),true);
        headSender.println(newRequest);

        InputStream dis = new DataInputStream( proxyclient.getInputStream() );
        return dis;
    }

或者主方法中:

                while( ( tmp = reader.readLine()) != null ) {
                    System.out.println(">>"+tmp);
                    if(writeCache)
                        saveCache( tmp );
                    out.println(tmp);
                }

这块的问题。

但是我试过很对,甚至一个用byte去发, 也是不行。

望大神赐教

  • 写回答

2条回答 默认 最新

  • BCS-点心 2021-04-28 09:09
    关注

    这个网站http://www.bom.gov.au/你想获取它的HTML代码,用来干嘛呢?

    评论

报告相同问题?

悬赏问题

  • ¥15 delta降尺度计算的一些细节,有偿
  • ¥15 Arduino红外遥控代码有问题
  • ¥15 数值计算离散正交多项式
  • ¥30 数值计算均差系数编程
  • ¥15 redis-full-check比较 两个集群的数据出错
  • ¥15 Matlab编程问题
  • ¥15 训练的多模态特征融合模型准确度很低怎么办
  • ¥15 kylin启动报错log4j类冲突
  • ¥15 超声波模块测距控制点灯,灯的闪烁很不稳定,经过调试发现测的距离偏大
  • ¥15 import arcpy出现importing _arcgisscripting 找不到相关程序