用python爬去微博ajax数据之后得到多个dataframe,如果仅仅是print可以得到所有的,但是输出到excel就变成了只能输出最后一个,请问该怎么办?
# conding:utf-8
import requests
from time import sleep
from urllib.parse import urlencode
from bs4 import BeautifulSoup as bs
from pyquery import PyQuery as pq
import pandas
def get_page(page=1):
base_url = "https://m.weibo.cn/api/container/getIndex?"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15',
'Referer': 'https://m.weibo.cn/u/1821094164',
'X-Requested-With': 'XMLHttpRequest'
}
params = {
'type': 'uid',
'value': 1821094164,
'containerid': 1076031821094164,
'page': str(page),
}
url = base_url+urlencode(params)
try:
resp = requests.get(url, headers=headers)
sleep(1)
if resp.status_code == 200:
return resp.json(), page
except requests.ConnectionError as e:
print ('Error: {}'.format(e.args))
def parse_page(json, page):
'''
用request请求德 json数据
'''
if json:
items = json.get('data').get('cards')
for index, item in enumerate(items):
if page == 1 and index == 1:
continue
else:
item = item.get('mblog')
weibo_data = {}
weibo_data['id'] = item.get('id')
weibo_data['text'] = pq(item.get('text')).text()
weibo_data['attitudes'] = item.get('attitudes_count')
weibo_data['comments'] = item.get('comments_count')
weibo_data['reposts'] = item.get('reposts_count')
yield weibo_data
if __name__ == '__main__':
for page in range(1, 11):
json = get_page(page)
results = parse_page(*json)
for result in results:
data = pandas.DataFrame(result)
for df in data:
df.to_excel(excel_writer=“微博”)