from datetime import datetime,timedelta
import re
import requests
import xlwt
cookie ='Hm_lvt_eafafe9dd9041f948d8897cb295170d5=1627023856,1627027237; Hm_lpvt_eafafe9dd9041f948d8897cb295170d5=1627028614'
headers = {'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36',"Cookie":cookie}
def gen_dates(b_date, days):
day = timedelta(days=1)
for i in range(days):
yield b_date + day*i
def get_date_list():
#这里自己设置时间
start = datetime.strptime("2021-7-20", "%Y-%m-%d").date()
#.date()可以只截取日期
end = datetime.strptime("2021-7-23", "%Y-%m-%d").date()
datelist = []
for d in gen_dates(start, (end-start).days):
datelist.append(d)
return datelist
#获取数据的文字部分(热搜词条)和时间(我只需要这两个)
def GetMiddleStr(final_set,content,time):
try:
print(re.match(content,'topic'))
wenzi = re.match(content,'topic')
url = re.match(content,'date')
#final_set.add((url,wenzi,time))
except:
return 0
def export(result_set,date_str):
excel = xlwt.Workbook(encoding="utf-8")
sheet = excel.add_sheet("sheet1")
sheet.write(0, 0, "热搜")
sheet.write(0, 1, "时间")
ex = 'D:/360/'+'Test.xls'
i = 0
for t in result_set:
sheet.write(i + 1, 0, t[0])
sheet.write(i + 1, 1, t[1])
i += 1
excel.save(ex)
def main():
final_set = set()
url = 'https://weibo.zhaoyizhe.com/'
for i in range(len(get_date_list())):
date_str = str(get_date_list()[i].year)+'-'+str(get_date_list()[i].month)+'-'+str(get_date_list()[i].day)
print(url)
data = {
'date' : date_str
}
r = requests.post(url,data=data,headers=headers)
result = r.content.decode('utf-8')
print(result)
result = result.split('},{')
result[0] = result[0].strip('[{')
for j in range(len(result)):
GetMiddleStr(final_set,result[j],date_str)
export(final_set,date_str)
final_set = set()
if __name__ =="__main__":
main()
我想爬取历史微博热搜,我需要把下面的代码(网上找的代码,自己改不动了)跑起来
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
5条回答 默认 最新
- 忍气吞声埋头苦干 2021-07-24 01:33关注
import requests
headers={
'Host':'google-api.zhaoyizhe.com',
'Connection':'keep-alive',
'Pragma':'no-cache',
'Cache-Control':'no-cache',
'Accept':'application/json, text/plain, /',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'Origin':'https://weibo.zhaoyizhe.com',
'Sec-Fetch-Site':'same-site',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Dest':'empty',
'Referer':'https://weibo.zhaoyizhe.com/',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
}
resp = requests.get('https://google-api.zhaoyizhe.com/google-api/index/mon/list',headers=headers)
print(resp.text)
再试试本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报
悬赏问题
- ¥15 微信会员卡接入微信支付商户号收款
- ¥15 如何获取烟草零售终端数据
- ¥15 数学建模招标中位数问题
- ¥15 phython路径名过长报错 不知道什么问题
- ¥15 深度学习中模型转换该怎么实现
- ¥15 HLs设计手写数字识别程序编译通不过
- ¥15 Stata外部命令安装问题求帮助!
- ¥15 从键盘随机输入A-H中的一串字符串,用七段数码管方法进行绘制。提交代码及运行截图。
- ¥15 TYPCE母转母,插入认方向
- ¥15 如何用python向钉钉机器人发送可以放大的图片?