爬去网页 其中一个是: http://www.csdn.net/article/2016-11-17/2826670;
出错是 : Element detail = doc.getElementsByClass("detail").get(0);
程序运行在网页数据中找不到detail 类 , 但是同样的程序在eclipse 运行正确,放在Android studio 却错
public List getNewsContent(String urlStr) {
String dataHtml = DataUtil.getDataOfHtml(urlStr);
this.content = new ArrayList<>();
NewsContent newc;
Document doc = Jsoup.parse(dataHtml);
Element detail = doc.getElementsByClass("detail").get(0);
//标题
Element title = detail.getElementsByTag("h1").get(0);
newc = new NewsContent();
newc.setTitle(title.text());
newc.setType(NewsType.TYPE_TITLE);
this.content.add(newc);
//时间 和 作者 等信息
Element date = detail.getElementsByTag("h4").get(0);
newc = new NewsContent();
newc.setDate(date.text());
newc.setType(NewsType.TYPE_DATE);
this.content.add(newc);
//摘要
Element summary = detail.getElementsByClass("summary").get(0);
newc = new NewsContent();
newc.setSummary(summary.text());
newc.setType(NewsType.TYPE_SUMMARY);
this.content.add(newc);
//新闻信息
Element content_ele = detail.getElementsByClass("con news_content").get(0);
Elements children = content_ele.children();
for(Element ele : children){
//加载此 p 标签中的图片信息
Elements imgs = ele.getElementsByTag("img");
for(Element img: imgs){
newc = new NewsContent();
newc.setImg(img.attr("src"));
newc.setType(NewsType.TYPE_IMG);
this.content.add(newc);
}
//移除图片
imgs.remove();
if(ele.text().equals(""))
continue;
newc = new NewsContent();
newc.setType(NewsType.TYPE_CONTENT);
try{
Element ment = ele.getElementsByTag("h2").size() > 0 ?
ele.getElementsByTag("h2").get(0):null;
if(ment != null && !ment.text().equals("")){
System.out.println("Tag: " + ment.text());
newc.setType(NewsType.TYPE_BOLD_TITLE);
}
}catch(Exception e){
e.printStackTrace();
}
newc.setContent(ele.text());
this.content.add(newc);
}
return this.content;
}