wangbaosongmsn
哈哈和呵呵
2017-02-23 09:15

JAVA htmlunit 抓取不到 页面 元素

2
  • java

下面代码中 能获取到百度页面的Form 表单, 但是获取不到另外一个 网页的表单, 此时该如何处理?

ps:获取另外一个网页的源码保存html后,用浏览器打开,能看到 form 表单。

package com.xttx.cn.fetchpro.fetchImp;

import java.io.IOException;
import java.net.URL;
import java.util.List;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.CollectingAlertHandler;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.HttpWebConnection;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.xttx.cn.fetchpro.exception.PageNotFoundException;

public class SimulationFetch {
protected static WebClient webClient = null;
public WebClient getWebClient() {
return webClient;
}

public  void setWebClient(WebClient webClient) {
    this.webClient = webClient;
}
static WebRequest request = null;

public static void main(String[] args) throws FailingHttpStatusCodeException, IOException, PageNotFoundException, InterruptedException {
    // TODO Auto-generated method stub
    BrowserVersion.FIREFOX_24.setBrowserLanguage("zh-CN");
    BrowserVersion.FIREFOX_24.setSystemLanguage("zh-CN");
    BrowserVersion.FIREFOX_24.setUserLanguage("zh-CN");
    BrowserVersion.FIREFOX_24.setUserAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0");
    BrowserVersion.FIREFOX_24.setBrowserVersion(46.0f);
    BrowserVersion.FIREFOX_24.setCpuClass("x64");
    webClient = new WebClient(BrowserVersion.FIREFOX_24);

    webClient.setWebConnection(new HttpWebConnection(webClient));
    webClient.getCache().clear();
    webClient.getCookieManager().clearCookies();
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.setJavaScriptTimeout(60*1000);
    webClient.getOptions().setCssEnabled(true);
    webClient.getOptions().setActiveXNative(true);
    webClient.getOptions().setPopupBlockerEnabled(true);
    webClient.getOptions().setRedirectEnabled(true);
    webClient.getOptions().setTimeout(10000);
    webClient.getOptions().setDoNotTrackEnabled(true);
    webClient.getCookieManager().setCookiesEnabled(true);
    webClient.getOptions().setThrowExceptionOnFailingStatusCode(true);
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.getOptions().setUseInsecureSSL(true);
    webClient.getOptions().setSSLInsecureProtocol("TLSv1.2");
    webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    webClient.setAlertHandler(new CollectingAlertHandler());


    HtmlPage loginPageWithForm =(HtmlPage)webClient.getPage(new WebRequest(new URL("https://login.youzan.com/sso/index?service=kdt&from_source=pzshouye")));
    List<HtmlForm> htmlForm0 = loginPageWithForm.getForms();

    HtmlPage loginPageNoForm = (HtmlPage)webClient.getPage(new WebRequest(new URL("https://www.baidu.com/")));
    HtmlForm htmlForm = loginPageNoForm.getForms().get(0);

    System.out.println("---");

}

}

  • 点赞
  • 回答
  • 收藏
  • 复制链接分享

1条回答