```python
import requests
import json
import re
import time
import pandas as pd
# 转换毫秒时间戳 1.转换成localtime 2.转换成新的时间格式(精确到秒)
def trans(timestamp):
time_local = time.localtime(int(timestamp) / 1000)
dt = time.strftime("%Y-%m-%d %H:%M:%S", time_local)
return dt
# 75595 故宫
# 76613 恭王府
view_list = [
['4456','桂林'],
#['76613','恭王府'],
#['75597','颐和园'],
#['75599','天坛'],
#['76625','圆明园'],
# ['75598','北海公园'],
]
for data in view_list:
time_list = []
content_list = []
name_list = []
score_list = []
for x in range(500):
x = x + 1
url = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList"
payload = {"arg":{"channelType":2,"collapseType":0,"commentTagId":0,"pageIndex":x,"pageSize":1000,"poiId":data[0],"sourceType":1,"sortType":3,"starType":0},"head":{"cid":"","ctok":"","cver":"1.0","lang":"01","sid":"8888","syscode":"09","auth":"","xsid":"","extension":[]}}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4173.2 Safari/537.36',
'cookie': '_RSG=AcK9V_YEBG7ur9DSkpbO1A; _RGUID=740b3a2f-df25-498a-a058-973b9c349d78; _RDG=281b4b8ffa2be023af3024778c85d3535f; _ga=GA1.2.2094831093.1618393109; MKT_CKID=1618393108670.2qaku.gj3w; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1621216936&Expires=1621821735909; MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1621216935915&CURL=https%3A%2F%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={"pc_vid":"1618393095351.3issth"}; _RF1=203.93.121.22; MKT_CKID_LMT=1621216935930; _gid=GA1.2.232947876.1621216936; MKT_Pagesource=PC; StartCity_Pkg=PkgStartCity=2; GUID=09031023113948444957; _abtest_userid=8ffeb95c-a3d5-4a29-9b98-d8b1e37c1495; intl_ht1=h4=1_375126; __utma=1.2094831093.1618393109.1621225649.1621225649.1; __utmc=1; __utmz=1.1621225649.1.1.utmcsr=ctrip.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmt=1; __utmb=1.1.10.1621225649; nfes_isSupportWebP=1; _gat=1; appFloatCnt=2; _bfs=1.7; _bfa=1.1618393095351.3issth.1.1621216932364.1621225645294.3.20; _bfi=p1%3D290510%26p2%3D290510%26v1%3D20%26v2%3D19; _jzqco=%7C%7C%7C%7C1621216936294%7C1.1123308241.1618393108722.1621225749459.1621225775042.1621225749459.1621225775042.undefined.0.0.8.8; __zpspc=9.3.1621225651.1621225775.6%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%25E7%25BD%2591%7C%23',
'referer': 'https://detail.tmall.com/',
'Content-Type': 'text/plain'
}
response = requests.request("POST", url, headers=headers, data=json.dumps(payload)).text
html1 = json.loads(response)
# print(html1)
comments = html1['result']['items']
try:
for i,element in enumerate(comments):
# print(i,element)
result = re.findall(r'([0-9]+)',element["publishTime"])
# print(time[0])
time_list.append(trans(result[0]))
content_list.append(element['content'])
name_list.append(data[1])
score_list.append(element['score'])
except:
print('{}无数据了'.format(data[1]))
break
# print(i,x,element['content'])
# time = re.findall(r"(.+?)",element["publishTime"])
# print(i,time)
time_df = pd.DataFrame(time_list,columns=['评论时间'])
content_df = pd.DataFrame(content_list,columns=['评论内容'])
name_df = pd.DataFrame(name_list,columns=['景点'])
score_df = pd.DataFrame(score_list,columns=['用户评分'])
pd.concat([time_df,name_df,score_df,content_df],axis=1).to_excel('{}数据.xlsx'.format(data[1]))
![img](https://img-mid.csdnimg.cn/release/static/image/mid/ask/114776298556170.png "#left")