现在能爬去数据, 但需要手动一个一个换城市和时间, 希望能加个循环,一次性爬玩选定城市所有时间的天气。
网站来自 天气网->历史天气。 网站特点:http://lishi.tianqi.com/'+position+'/'+date+'.html
我不知道怎么加循环
代码现在如下:
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import csv
import os
from openpyxl import workbook # 写入Excel表所用
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_setting.images": 2}
chrome_options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.set_page_load_timeout(60)
driver.set_script_timeout(60)
def getId():
position = 'wulumuqi' #这里填写地方的缩拼音例如:安泽
date = '201901' #这里填写日期例如:2011年的2月
query_url = 'http://lishi.tianqi.com/'+position+'/'+date+'.html'
wb = os.path.join('C:\\Users\\w4376\\OneDrive\\Desktop\\weather collecting\\') #指定路径打开
wb = workbook.Workbook() # 创建Excel对象
ws = wb.active # 获取当前正在操作的表对象
ws.append(['日期', '最高气温', '最低气温', '天气', '风向', '风力'])
try:
driver.get('http://tianqi.com/')
time.sleep(2)
driver.get(query_url)
js = "window.scrollTo(0, 1800000);"
driver.execute_script(js)
time.sleep(2)
res = driver.page_source
soup = BeautifulSoup(res, 'html.parser')
box = soup.find('div',class_='tqtongji2').find_all('ul')[1:]
for item in box:
date_detail = item.find_all('li')[0].get_text() #日期
print(date_detail)
max_temperature =item.find_all('li')[1].get_text() #最高气温
print(max_temperature)
min_temperature = item.find_all('li')[2].get_text() # 最低气温
print(min_temperature)
temperature = item.find_all('li')[3].get_text() # 天气
print(temperature)
wind_direction = item.find_all('li')[4].get_text() #风向
print( wind_direction)
wind_power = item.find_all('li')[5].get_text() # 风力
if wind_power == ('微风'):
wf=('0级')
wind_power=wf
print(wf)
else:
print(wind_power)
if wind_direction == ('无持续风向'):
wd=('无')
wind_direction=wd
print(wd)
ws.append([date_detail, max_temperature,min_temperature, temperature, wind_direction,wind_power])
# spamwriter = csv.writer(csvfile, dialect='excel')
# spamwriter.writerow([date_detail, max_temperature,min_temperature, temperature, wind_direction,wind_power])
wb.save(position+date+'.xlsx') # 存入所有信息后,保存为filename.xlsx
except Exception as e:
print(e)
getId()