import aiohttp
import asyncio
import time
import multiprocessing as mp
import requests
from bs4 import BeautifulSoup
import socket
header = 'http://osu.ppy.sh/'
middle = 'p/pp/?'
mode = 'm=3' # 0=stanard 1=taiko 2=ctb 3=mania
url = header + middle + mode + '&'
page = [1, 3] # 开始页数-结束页数
badRequest = {} # pageNum:resCode
htmls={}
#way store in mongoDB : collection: {"_id":"1", "Rank":"1","Player Name":"Jaka#ds","Accuracy":"97.59%","Play Count":""
#"Performance":"17288pp"}
def getPages(pageNum): #每1秒获取一个页面当做缓存
global url
#global badRequest
#global htmls
try:
print('开始get网页,pageNum=',pageNum)
res = requests.get(url=url + str(pageNum), timeout=10)
time.sleep(1)
# 如果res不等于200 重试3次
count = 0
#print(res.status_code)
while (res.status_code != 200 and count <= 3):
res.status_code = requests.get(url=url + str(pageNum), timeout=10)
print('restart get')
count += 1
if (res.status_code == 200):
print('200 ok')
htmls[str(pageNum)]=res.content
break
if(res.status_code==200):
htmls[str(pageNum)]=res.content
#print(res.content)
else:
badRequest[str(pageNum)]=res.status_code
print( 'pageNum : ', pageNum, '返回码 : ', res.status_code)
return res.status_code
except Exception as e:
print(e)
return None
def findTags():
print()
if __name__=='__main__':
startTime = time.time()
pool = mp.Pool()
jobs=[pool.apply_async(getPages,args=(pageNum,))for pageNum in range(page[0],page[-1]+1)]
results=[f.get() for f in jobs]
print(htmls)
print('花费时间 : ', time.time() - startTime, 's')
print('ok')
``
代码如上,在if(res.status_code==200):
htmls[str(pageNum)]=res.content 运行之后res.content没有正确的存储到htmls字典中, 测试可以print出res.content
求解答