import requests
from bs4 import BeautifulSoup
import pandas as pd
def method_name():
# url = 'http://www.tianqihoubao.com/aqi/xingtai-202102.html'
years = ['2013','2014', '2015', '2026','2017','2018', '2019', '2020']
months = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
# for i in years:
# for j in months:
# a = 'http://www.tianqihoubao.com/aqi/xingtai-'+str(i)+str(j)+'.html'
# print(a)
# pass
# pass
# for i in years:
# for j in months:
# a = 'http://www.tianqihoubao.com/aqi/xingtai-{}{}.html' .format(i,j)
# print(a)
# pass
# pass
for i in months:
a = 'http://www.tianqihoubao.com/aqi/xingtai-2019{}.html' .format(i)
print(a)
global tr_list
resp = requests.get(a)
html = resp.content.decode('gbk')
# 数据提取
soup = BeautifulSoup(html, 'html.parser')
tr_list = soup.find_all('tr')
print(tr_list)
#获取网页源代码
method_name()
dates,conditions,temp,paiming= [],[],[],[]
pm10,So2,No2,Co,O3 = [],[],[],[],[]
for data in tr_list[1:]:
sub_data = data.text.split()
dates.append(sub_data[0])
conditions.append(''.join(sub_data[1]))
temp.append(''.join(sub_data[2]))
paiming.append(''.join(sub_data[3]))
pm10.append(''.join(sub_data[4]))
So2.append(''.join(sub_data[5]))
No2.append(''.join(sub_data[6]))
Co.append(''.join(sub_data[7]))
O3.append(''.join(sub_data[8]))
_data = pd.DataFrame()
_data['日期'] = dates
_data['质量等级'] = conditions
_data['AQI指数'] = temp
_data['当天AQI排名'] = paiming
_data['PM10'] = pm10
_data['So2'] = So2
_data['No2'] = No2
_data['Co'] = Co
_data['O3'] = O3
print(_data)