qq_37673876 2017-08-12 10:55
浏览 952

一个爬虫for循环外层循环都执行了,但内层循环只执行了一次

import requests,time
from bs4 import BeautifulSoup
from pymongo import MongoClient

def city(url,city_list_name=[],city_list_url=[]):
headers = {
'Cookie': 'Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1502525654; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1502525654',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
html = requests.get(url, headers=headers)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#cityall > div > div > ul > li > a'):
if not each.get('class'):
# city_list.append(each)
city_list_name.append(each.get_text())
city_list_url.append(each.get('href'))
return city_list_name,city_list_url

url = 'http://lishi.tianqi.com/'

#天气状况

city(url)

def year_month(url,url_list=[]):
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#main > div.left > div#tool_site > div > ul > li > a '):
url_list.append(each)
# print(each.get_text())
# print(each.get('href'))
return url_list

url = 'http://lishi.tianqi.com/batang/index.html'

year_month(url)

def day(url,city,detail_list=[]):
detail = {
'city':city,
'date':'DATE',
'up-temperature':'MAXIMUM',
'low-temperature':'MINIMUM',
'weather':'WEATHER',
}
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html5lib')
for each in soup.select('body > div#main > div.left > div#tool_site > div.tqtongji2 > ul'):
if not each.get('class'):
x = each.select('li')
detail['date'] = x[0].get_text()
detail['up-temperature'] = x[1].get_text()
detail['low-temperature'] = x[2].get_text()
detail['weather'] = x[3].get_text()
print(detail)
detail_list.append(detail)
return detail_list

url = 'http://lishi.tianqi.com/aohan/201101.html'

day(url,'敖汉')

url = 'http://lishi.tianqi.com/'

天气状况

city_list_name,city_list_url = city(url)

print(city_list_name)

print(city_list_url)

detail_list = []
for city_name,city_url in zip(city_list_name,city_list_url):
print(city_name)
print(city_url)
time.sleep(1)
year_month_list = year_month(city_url)
for year_month in year_month_list:
detail_list_part = day(year_month.get('href'),city_name)
detail_list += detail_list_part

for each in detail_list:

print(each)


  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥15 关于#matlab#的问题:在模糊控制器中选出线路信息,在simulink中根据线路信息生成速度时间目标曲线(初速度为20m/s,15秒后减为0的速度时间图像)我想问线路信息是什么
    • ¥15 banner广告展示设置多少时间不怎么会消耗用户价值
    • ¥16 mybatis的代理对象无法通过@Autowired装填
    • ¥15 可见光定位matlab仿真
    • ¥15 arduino 四自由度机械臂
    • ¥15 wordpress 产品图片 GIF 没法显示
    • ¥15 求三国群英传pl国战时间的修改方法
    • ¥15 matlab代码代写,需写出详细代码,代价私
    • ¥15 ROS系统搭建请教(跨境电商用途)
    • ¥15 AIC3204的示例代码有吗,想用AIC3204测量血氧,找不到相关的代码。