AugustLeo_cheng 2015-08-26 02:37 采纳率: 100%
浏览 4088

爬虫,跪求搜索功能,怎么往网页搜索框注入值

上次忘记在哪找了一段代码,跪求添加一个搜索功能,我只写了一个输入搜索内容,但是不知道怎么往网页注入值。跪求补全

  • 写回答

2条回答 默认 最新

  • AugustLeo_cheng 2015-08-26 02:37
    关注

    package http.demo;

    import java.io.BufferedReader;
    import java.io.ByteArrayOutputStream;
    import java.io.DataInputStream;
    import java.io.DataOutputStream;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStream;
    import java.io.PrintWriter;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLConnection;
    import java.net.URLEncoder;
    import java.nio.charset.Charset;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.StringTokenizer;

    public class CookieUtil {

    public final static String CONTENT_TYPE = "Content-Type";

    private static URLConnection connection;

    private static void connect( String urlString ) {

    try {

    URL url = new URL(urlString);

    connection = url.openConnection();

    System.out.println(connection.getClass());

    } catch (MalformedURLException e){

    e.printStackTrace();

    } catch (IOException e) {

    e.printStackTrace();

    }

    }

    private static void readContents() {

    BufferedReader in = null;

    try {

    in = new BufferedReader(

    new InputStreamReader(

    connection.getInputStream()));

       String inputLine;  
       while (  
         (inputLine = in.readLine()) != null) {  
         System.out.println(inputLine);  
       }  
     } catch (IOException e) {  
       e.printStackTrace();  
     }  
    

    }

    public static void main(String[] args) {

    // login

    //验证码的位置

    //Content content = getRandom("GET", "http://localhost:8080/back/random.action", null, null, false,"d:/");

    Content content = getRandom("GET", "http://gsxt.zjaic.gov.cn/zhejiang.jsp;jsessionid=E43ABD225C6526877C02283D8D266D43-n1.gsxt44", null, null, false,"d:/");

    // build request headers & do rate of user review

    List lsit = content.getHeaders().get("Set-Cookie");

    Map resmap = new HashMap();

    if (lsit != null) {

    StringBuffer sb = new StringBuffer();
    boolean isLast = false;

    int i = 0;

    for (String val : lsit) {

    i++;

    if (i == lsit.size()) {

    isLast = true;

    }

    int pos = val.indexOf("=");

    if (pos != -1) {

    String cookieName = val.substring(0, pos);

    String cookieVal = val.substring(pos + 1);

    System.out.println(cookieName+":"+cookieVal);

    cookieVal = cookieVal.split(";")[0];

    if (isLast) {

    sb.append(cookieName + "=" + cookieVal);

    } else {

    sb.append(cookieName + "=" + cookieVal + ";");

    }

    }

    }

    System.out.println("sb.toString() = "+sb.toString());

    resmap.put("Cookie", sb.toString());

    }

    String a="";

    System.out.print("请输入验证码:");

    BufferedReader strin=new BufferedReader(new InputStreamReader(System.in));

    try {

    a=strin.readLine();

    } catch (IOException e) {

    e.printStackTrace();

    }

    System.out.println("输入的数是:"+a);

    String userCode = "yourname";

    String password = "yourpass";

    String search = "";
    System.out.print("请输入搜索内容:");

    BufferedReader string=new BufferedReader(new InputStreamReader(System.in));

    try {

    search=string.readLine();

    } catch (IOException e) {

    e.printStackTrace();

    }

    String loginUrl = "http://gsxt.zjaic.gov.cn/search/doGetAppSearchResult.do";

    String rateReviewUrl = "http://gsxt.zjaic.gov.cn/zhejiang.jsp;jsessionid=E43ABD225C6526877C02283D8D266D43-n1.gsxt44";

    Map paramMap = new HashMap();

    paramMap.put("userCode", userCode);

    paramMap.put("password", password);
    paramMap.put("search", search+"");

    paramMap.put("random", a+"");

    content = curl("POST", loginUrl, paramMap, resmap, false,"");

    System.out.println("第一次 content.getBody()= " + content==null?"no body":content.getBody());

    // build request headers & do rate of user review

    paramMap = new HashMap();

    content = curl("POST", rateReviewUrl, paramMap, resmap, false,"");

    inFile(content.getBody(), "D:/浙江.html");

    System.out.println("第二次content.getBody() = " + content==null?"no body":content.getBody());

    }

    public static Content curl(String method, //方法类型

    String sUrl,//要解析的URL

    Map paramMap, //存放用户名和密码的map

    Map requestHeaderMap,//存放COOKIE的map

    boolean isOnlyReturnHeader,

    String path) {//存放文件路径

    System.out.println("-------------"+sUrl+"-------------------");

    Content content = null;

    HttpURLConnection httpUrlConnection = null;

    InputStream in = null;

    try {

    URL url = new URL(sUrl);

    boolean isPost = "POST".equals(method);

     if (method == null || (!"GET".equalsIgnoreCase(method) && !"POST".equalsIgnoreCase(method))) {  
      method = "POST";  
     }  
    
     URL resolvedURL = url;  
     URLConnection urlConnection = resolvedURL.openConnection();  
     httpUrlConnection = (HttpURLConnection) urlConnection;  
     httpUrlConnection.setRequestMethod(method);  
     httpUrlConnection.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5");  
    
     // Do not follow redirects, We will handle redirects ourself  
     httpUrlConnection.setInstanceFollowRedirects(false);  
     urlConnection.setDoOutput(true);  
     urlConnection.setDoInput(true);  
     urlConnection.setConnectTimeout(5000);  
     urlConnection.setReadTimeout(5000);  
     urlConnection.setUseCaches(false);  
     urlConnection.setDefaultUseCaches(false);  
     // set request header  
     if (requestHeaderMap != null) {  
     for (Map.Entry<String, String> entry : requestHeaderMap.entrySet()) {  
      String key = entry.getKey();  
      String val = entry.getValue();       
      if (key != null && val != null) {  
       urlConnection.setRequestProperty(key, val);  
      }  
     }  
     }  
     if (isPost) {  
      urlConnection.setDoOutput(true);  
      ByteArrayOutputStream bufOut = new ByteArrayOutputStream();  
      boolean firstParam = true;  
      for (Map.Entry<String, String> entry : paramMap.entrySet()) {  
       String encName = URLEncoder.encode(entry.getKey(), "UTF-8");  
       if (firstParam) {  
        firstParam = false;  
       } else {  
        bufOut.write((byte) '&');  
       }  
       String encValue = URLEncoder.encode(entry.getValue(),"UTF-8");  
       bufOut.write(encName.getBytes("UTF-8"));  
       bufOut.write((byte) '=');  
       bufOut.write(encValue.getBytes("UTF-8"));  
      }  
      byte[] postContent = bufOut.toByteArray();  
      if (urlConnection instanceof HttpURLConnection) {  
       ((HttpURLConnection) urlConnection).setFixedLengthStreamingMode(postContent.length);  
      }  
      OutputStream postOut = urlConnection.getOutputStream();  
      postOut.write(postContent);  
      postOut.flush();  
      postOut.close();  
     }  
     httpUrlConnection.connect();  
     int responseCode = httpUrlConnection.getResponseCode();  
    
     // We handle redirects ourself  
     if (responseCode == HttpURLConnection.HTTP_MOVED_PERM || responseCode == HttpURLConnection.HTTP_MOVED_TEMP) {  
     String location = httpUrlConnection.getHeaderField("Location");  
     URL newAction = new URL(url, location);  
     // Recurse  
     StringBuffer newUrlSb = new StringBuffer(newAction.getProtocol() + "://" + newAction.getHost());  
     if (newAction.getPort() != -1) {  
      newUrlSb.append(":" + newAction.getPort());  
     }  
     if (newAction.getPath() != null) {  
      newUrlSb.append(newAction.getPath());  
     }  
     if (newAction.getQuery() != null) {  
      newUrlSb.append("?" + newAction.getQuery());  
     }  
     if (newAction.getRef() != null) {  
      newUrlSb.append("#" + newAction.getRef());  
     }  
    
     return curl("POST", newUrlSb.toString(), paramMap, requestHeaderMap,isOnlyReturnHeader,path);  
     } else if (responseCode == HttpURLConnection.HTTP_OK || responseCode == HttpURLConnection.HTTP_CREATED) {  
     byte[] bytes = new byte[0];  
     if (!isOnlyReturnHeader) {  
      if(isPost){  
       in = httpUrlConnection.getInputStream();  
       ByteArrayOutputStream bout = new ByteArrayOutputStream();  
       byte[] buf = new byte[1024];  
       while (true) {  
        int rc = in.read(buf);  
        if (rc <= 0) {  
         break;  
        } else {  
         bout.write(buf, 0, rc);  
        }  
       }  
       bytes = bout.toByteArray();  
       in.close();  
      }  
     }  
     // only fetch Content-Length and Last-Modified header  
     String encoding = null;  
     if (encoding == null) {  
      encoding = getEncodingFromContentType(httpUrlConnection.getHeaderField(CONTENT_TYPE));  
     }      
      content = new Content(sUrl, new String(bytes, encoding),httpUrlConnection.getHeaderFields());  
     }  
    } catch (Exception e) {  
    return null;  
    } finally {  
    if (httpUrlConnection != null) {  
     httpUrlConnection.disconnect();  
    }  
    

    }

    return content;

    }

    public static Content getRandom(String method,

    String sUrl,//要解析的url

    Map paramMap, //存放用户名和密码的map

    Map requestHeaderMap,//存放COOKIE的map

    boolean isOnlyReturnHeader,

    String path) {

    Content content = null;

    HttpURLConnection httpUrlConnection = null;

    InputStream in = null;

    try {

    URL url = new URL(sUrl);

    boolean isPost = "POST".equals(method);

    if (method == null || (!"GET".equalsIgnoreCase(method) && !"POST".equalsIgnoreCase(method))) {

    method = "POST";

    }

    URL resolvedURL = url;

    URLConnection urlConnection = resolvedURL.openConnection();

    httpUrlConnection = (HttpURLConnection) urlConnection;

    httpUrlConnection.setRequestMethod(method);

    httpUrlConnection.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5");

    // Do not follow redirects, We will handle redirects ourself

    httpUrlConnection.setInstanceFollowRedirects(false);

    httpUrlConnection.setDoOutput(true);

    httpUrlConnection.setDoInput(true);

    httpUrlConnection.setConnectTimeout(5000);

    httpUrlConnection.setReadTimeout(5000);

    httpUrlConnection.setUseCaches(false);

    httpUrlConnection.setDefaultUseCaches(false);

    httpUrlConnection.connect();

    int responseCode = httpUrlConnection.getResponseCode();

    if (responseCode == HttpURLConnection.HTTP_OK || responseCode == HttpURLConnection.HTTP_CREATED) {  
    byte[] bytes = new byte[0];  
    if (!isOnlyReturnHeader) {  
       DataInputStream ins = new DataInputStream(httpUrlConnection.getInputStream());  
       //验证码的位置  
           DataOutputStream out = new DataOutputStream(new FileOutputStream(path+"/code.bmp"));  
           byte[] buffer = new byte[4096];  
           int count = 0;  
           while ((count = ins.read(buffer)) > 0) {  
            out.write(buffer, 0, count);  
           }  
          out.close();  
          ins.close();  
    }  
    String encoding = null;  
    if (encoding == null) {  
     encoding = getEncodingFromContentType(httpUrlConnection.getHeaderField(CONTENT_TYPE));  
    }      
    content = new Content(sUrl, new String(bytes, encoding),httpUrlConnection.getHeaderFields());  
    

    }

    } catch (Exception e) {

    return null;

    } finally {

    if (httpUrlConnection != null) {

    httpUrlConnection.disconnect();

    }

    }

    return content;

    }

    public static String getEncodingFromContentType(String contentType) {

    String encoding = null;

    if (contentType == null) {

    return null;

    }

    StringTokenizer tok = new StringTokenizer(contentType, ";");

    if (tok.hasMoreTokens()) {

    tok.nextToken();

    while (tok.hasMoreTokens()) {

    String assignment = tok.nextToken().trim();

    int eqIdx = assignment.indexOf('=');

    if (eqIdx != -1) {

    String varName = assignment.substring(0, eqIdx).trim();

    if ("charset".equalsIgnoreCase(varName)) {

    String varValue = assignment.substring(eqIdx + 1).trim();

    if (varValue.startsWith("\"") && varValue.endsWith("\"")) {

    // substring works on indices

    varValue = varValue.substring(1,varValue.length() - 1);

    }

    if (Charset.isSupported(varValue)) {

    encoding = varValue;

    }

    }

    }

    }

    }

    if (encoding == null) {

    return "UTF-8";

    }

    return encoding;

    }

    // 这个是输出

    public static boolean inFile(String content, String path) {

    PrintWriter out = null;

    File file = new File(path);

    try {

    if (!file.exists()) {

    file.createNewFile();

    }

    out = new PrintWriter(new FileWriter(file));

    out.write(content);

    out.flush();

    return true;

    } catch (Exception e) {

    e.printStackTrace();

    } finally {

    out.close();

    }

    return false;

    }

    public static String getHtmlReadLine(String httpurl){

    String CurrentLine="";

    String TotalString="";

    InputStream urlStream;

    String content="";

    try {

    URL url = new URL(httpurl);

    HttpURLConnection connection = (HttpURLConnection)url.openConnection();

    connection.connect();

    System.out.println(connection.getResponseCode());

    urlStream = connection.getInputStream();

    BufferedReader reader = new BufferedReader(

    new InputStreamReader(urlStream,"utf-8"));

    while ((CurrentLine = reader.readLine()) != null) {

    TotalString += CurrentLine+"\n";

    }

    content = TotalString;

    } catch (Exception e) {}

    return content;

    }

    }

    class Content {

    private String url;

    private String body;

    private Map> m_mHeaders = new HashMap>();

    public Content(String url, String body, Map> headers) {

    this.url = url;

    this.body = body;

    this.m_mHeaders = headers;

    }

    public String getUrl() {

    return url;

    }

    public String getBody() {

    return body;

    }

    public Map> getHeaders() {

    return m_mHeaders;

    }

    }

    评论

报告相同问题?

悬赏问题

  • ¥15 安卓adb backup备份应用数据失败
  • ¥15 eclipse运行项目时遇到的问题
  • ¥15 关于#c##的问题:最近需要用CAT工具Trados进行一些开发
  • ¥15 南大pa1 小游戏没有界面,并且报了如下错误,尝试过换显卡驱动,但是好像不行
  • ¥15 没有证书,nginx怎么反向代理到只能接受https的公网网站
  • ¥50 成都蓉城足球俱乐部小程序抢票
  • ¥15 yolov7训练自己的数据集
  • ¥15 esp8266与51单片机连接问题(标签-单片机|关键词-串口)(相关搜索:51单片机|单片机|测试代码)
  • ¥15 电力市场出清matlab yalmip kkt 双层优化问题
  • ¥30 ros小车路径规划实现不了,如何解决?(操作系统-ubuntu)