import requests,time
from bs4 import BeautifulSoup
from pymongo import MongoClient
def city(url,city_list_name=[],city_list_url=[]):
headers = {
'Cookie': 'Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1502525654; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1502525654',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
html = requests.get(url, headers=headers)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#cityall > div > div > ul > li > a'):
if not each.get('class'):
# city_list.append(each)
city_list_name.append(each.get_text())
city_list_url.append(each.get('href'))
return city_list_name,city_list_url
url = 'http://lishi.tianqi.com/'
#天气状况
city(url)
def year_month(url,url_list=[]):
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#main > div.left > div#tool_site > div > ul > li > a '):
url_list.append(each)
# print(each.get_text())
# print(each.get('href'))
return url_list
url = 'http://lishi.tianqi.com/batang/index.html'
year_month(url)
def day(url,city,detail_list=[]):
detail = {
'city':city,
'date':'DATE',
'up-temperature':'MAXIMUM',
'low-temperature':'MINIMUM',
'weather':'WEATHER',
}
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#main > div.left > div#tool_site > div.tqtongji2 > ul'):
if not each.get('class'):
x = each.select('li')
detail['date'] = x[0].get_text()
detail['up-temperature'] = x[1].get_text()
detail['low-temperature'] = x[2].get_text()
detail['weather'] = x[3].get_text()
print(detail)
detail_list.append(detail)
return detail_list
url = 'http://lishi.tianqi.com/aohan/201101.html'
day(url,'敖汉')
url = 'http://lishi.tianqi.com/'
天气状况
city_list_name,city_list_url = city(url)
print(city_list_name)
print(city_list_url)
detail_list = []
for city_name,city_url in zip(city_list_name,city_list_url):
print(city_name)
print(city_url)
time.sleep(1)
year_month_list = year_month(city_url)
for year_month in year_month_list:
detail_list_part = day(year_month.get('href'),city_name)
detail_list += detail_list_part
for each in detail_list:
print(each)