qq_37673876 2017-08-12 10:55
浏览 952

一个爬虫for循环外层循环都执行了,但内层循环只执行了一次

import requests,time
from bs4 import BeautifulSoup
from pymongo import MongoClient

def city(url,city_list_name=[],city_list_url=[]):
headers = {
'Cookie': 'Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1502525654; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1502525654',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
html = requests.get(url, headers=headers)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#cityall > div > div > ul > li > a'):
if not each.get('class'):
# city_list.append(each)
city_list_name.append(each.get_text())
city_list_url.append(each.get('href'))
return city_list_name,city_list_url

url = 'http://lishi.tianqi.com/'

#天气状况

city(url)

def year_month(url,url_list=[]):
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#main > div.left > div#tool_site > div > ul > li > a '):
url_list.append(each)
# print(each.get_text())
# print(each.get('href'))
return url_list

url = 'http://lishi.tianqi.com/batang/index.html'

year_month(url)

def day(url,city,detail_list=[]):
detail = {
'city':city,
'date':'DATE',
'up-temperature':'MAXIMUM',
'low-temperature':'MINIMUM',
'weather':'WEATHER',
}
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#main > div.left > div#tool_site > div.tqtongji2 > ul'):
if not each.get('class'):
x = each.select('li')
detail['date'] = x[0].get_text()
detail['up-temperature'] = x[1].get_text()
detail['low-temperature'] = x[2].get_text()
detail['weather'] = x[3].get_text()
print(detail)
detail_list.append(detail)
return detail_list

url = 'http://lishi.tianqi.com/aohan/201101.html'

day(url,'敖汉')

url = 'http://lishi.tianqi.com/'

天气状况

city_list_name,city_list_url = city(url)

print(city_list_name)

print(city_list_url)

detail_list = []
for city_name,city_url in zip(city_list_name,city_list_url):
print(city_name)
print(city_url)
time.sleep(1)
year_month_list = year_month(city_url)
for year_month in year_month_list:
detail_list_part = day(year_month.get('href'),city_name)
detail_list += detail_list_part

for each in detail_list:

print(each)


  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥20 有关区间dp的问题求解
    • ¥15 多电路系统共用电源的串扰问题
    • ¥15 slam rangenet++配置
    • ¥15 有没有研究水声通信方面的帮我改俩matlab代码
    • ¥15 对于相关问题的求解与代码
    • ¥15 ubuntu子系统密码忘记
    • ¥15 信号傅里叶变换在matlab上遇到的小问题请求帮助
    • ¥15 保护模式-系统加载-段寄存器
    • ¥15 电脑桌面设定一个区域禁止鼠标操作
    • ¥15 求NPF226060磁芯的详细资料