package cn.xls.util;
import cn.xls.pojo.City;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @program:
* @description: 获取城市信息
* @author: l
*/
public class CityInfoUtil {
public static List<City> getProvinceData(String requestUrl) {
List<City> lists = new ArrayList<City>();
try {
Document document = Jsoup.connect(requestUrl).timeout(50000).maxBodySize(0).get();
//获取所有的省份
Elements provinceList = document.select("tr[class='provincetr']").select("td").select("a");
//遍历省份
for (Element element : provinceList) {
//拼接当前省份下的城市地址
String url1 = requestUrl.replace("index.html", element.attr("href"));
Document document1 = Jsoup.connect(url1).timeout(50000).maxBodySize(0).get();
//获取该省份下所有城市(第一列为城市区号,第二列为城市名)
Elements citys = document1.select("tr[class='citytr']").select("td:eq(1)").select("a");
String province = element.html();
System.out.println("当前省份 : " + province);
System.out.println("当前省份下有 : " + citys.size() + " 个城市");
//遍历城市
int cityCount = 0;
for (Element element1 : citys) {
// System.out.println("城市 : " + element1.html());
//拼接当前城市下的区域地址
String url2 = requestUrl.replace("index.html", element1.attr("href"));
Document document2 = Jsoup.connect(url2).timeout(50000).maxBodySize(0).get();
//获取该城市下所有区域(第一列为区域区号,第二列为区域名)
Elements areas = document2.select("tr[class=countytr] td:eq(1)").select("a");
String city = element1.html();
System.out.println("当前城市 : " + (++cityCount) + city);
System.out.println("当前城市下有 : " + areas.size() + " 个区");
for (Element element2 : areas) {
System.out.println("区域 : " + element2.html());
}
}
System.out.println("-----------");
}
} catch (IOException e) {
e.printStackTrace();
}
return lists;
}
//测试
public static void main(String[] args) {
//List<City> lists = getProvinceData("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/index.html");
List<City> list1 = getProvinceData("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/index.html");
}
}
测试时老是打印了一个七八个城市的数据后就超时,请问该怎么解决啊?刚从网上学的jsoup,请大佬们多多指点