Marson丶 2022-05-12 11:22 采纳率: 42.9%
浏览 60
已结题

json.load(html)时报错json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

源码

import requests, json, time, re, datetime
import pandas as pd


# 请求评论api接口
def requestApi(url):
    headers = {
        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
        'accept': '*/*',
    }

    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
        return r.text

    except requests.HTTPError as e:
        print(e)
    except requests.RequestException as e:
        print(e)
    except:
        print("出错了")


# 解析接口返回数据
def getData(html):
    json_data = json.loads(html)['cmts']
    comments = []

    # 解析数据并存入数组
    try:
        for item in json_data:
            comment = []
            comment.append(item['nickName'])
            comment.append(item['cityName'] if 'cityName' in item else '')
            comment.append(item['content'].strip().replace('\n', ''))
            comment.append(item['score'])
            comment.append(item['startTime'])
            comments.append(comment)

        return comments

    except Exception as e:
        print(comment)
        print(e)


# 保存数据,写入excel
def saveData(comments):
    filename = './cnm.csv'

    dataObject = pd.DataFrame(comments)
    dataObject.to_csv(filename, mode='a', index=False, sep=',', header=False, encoding='utf_8_sig')


# 爬虫主函数
def main():
    # 当前时间
    start_time = datetime.datetime.now().strftime('%Y-%m-%d  %H:%M:%S')
    # 电影上映时间
    end_time = '2019-04-24  00:00:00'

    while start_time > end_time:
        url = 'http://m.maoyan.com/mmdb/comments/movie/248172.json?_v_=yes&offset=0&startTime=' + start_time.replace(
            '  ', '%20')
        html = None
        print(url)
        try:
            html = requestApi(url)
            print(html)

        except Exception as e:  # 如果有异常,暂停一会再爬
            time.sleep(1)
            html = requestApi(url)

        # else: #开启慢速爬虫
        # time.sleep(0.5)

        comments = getData(html)
        # print(url)
        start_time = comments[14][4]  # 获取每页中最后一条评论时间,每页有15条评论
        #print(start_time)

        # 最后一条评论时间减一秒,避免爬取重复数据
        start_time = datetime.datetime.strptime(start_time, '%Y-%m-%d  %H:%M:%S') + datetime.timedelta(seconds=-1)
        start_time = datetime.datetime.strftime(start_time, '%Y-%m-%d  %H:%M:%S')
        print(start_time)
        saveData(comments)


if __name__ == '__main__':
    main()   

报错

Traceback (most recent call last):
  File "C:/Users/22178/Desktop/MaoYan-MovieComments-master/comment.py", line 92, in <module>
    main()   
  File "C:/Users/22178/Desktop/MaoYan-MovieComments-master/comment.py", line 79, in main
    comments = getData(html)
  File "C:/Users/22178/Desktop/MaoYan-MovieComments-master/comment.py", line 27, in getData
    json_data = json.loads(html, strict=False)['cmts']
  File "F:\AnacondaData\lib\json\__init__.py", line 370, in loads
    return cls(**kw).decode(s)
  File "F:\AnacondaData\lib\json\decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "F:\AnacondaData\lib\json\decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

求帮助谢谢~

  • 写回答

2条回答 默认 最新

  • 溪风沐雪 2022-05-12 11:31
    关注

    需要先确定请求是否返回数据了

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

问题事件

  • 系统已结题 6月6日
  • 已采纳回答 5月29日
  • 创建了问题 5月12日

悬赏问题

  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程
  • ¥15 完成下列问题完成下列问题
  • ¥15 C#算法问题, 不知道怎么处理这个数据的转换
  • ¥15 YoloV5 第三方库的版本对照问题
  • ¥15 请完成下列相关问题!
  • ¥15 drone 推送镜像时候 purge: true 推送完毕后没有删除对应的镜像,手动拷贝到服务器执行结果正确在样才能让指令自动执行成功删除对应镜像,如何解决?
  • ¥15 求daily translation(DT)偏差订正方法的代码
  • ¥15 js调用html页面需要隐藏某个按钮