Marson丶 2022-05-12 11:22 采纳率: 42.9%
浏览 60
已结题

json.load(html)时报错json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

源码

import requests, json, time, re, datetime
import pandas as pd


# 请求评论api接口
def requestApi(url):
    headers = {
        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
        'accept': '*/*',
    }

    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
        return r.text

    except requests.HTTPError as e:
        print(e)
    except requests.RequestException as e:
        print(e)
    except:
        print("出错了")


# 解析接口返回数据
def getData(html):
    json_data = json.loads(html)['cmts']
    comments = []

    # 解析数据并存入数组
    try:
        for item in json_data:
            comment = []
            comment.append(item['nickName'])
            comment.append(item['cityName'] if 'cityName' in item else '')
            comment.append(item['content'].strip().replace('\n', ''))
            comment.append(item['score'])
            comment.append(item['startTime'])
            comments.append(comment)

        return comments

    except Exception as e:
        print(comment)
        print(e)


# 保存数据,写入excel
def saveData(comments):
    filename = './cnm.csv'

    dataObject = pd.DataFrame(comments)
    dataObject.to_csv(filename, mode='a', index=False, sep=',', header=False, encoding='utf_8_sig')


# 爬虫主函数
def main():
    # 当前时间
    start_time = datetime.datetime.now().strftime('%Y-%m-%d  %H:%M:%S')
    # 电影上映时间
    end_time = '2019-04-24  00:00:00'

    while start_time > end_time:
        url = 'http://m.maoyan.com/mmdb/comments/movie/248172.json?_v_=yes&offset=0&startTime=' + start_time.replace(
            '  ', '%20')
        html = None
        print(url)
        try:
            html = requestApi(url)
            print(html)

        except Exception as e:  # 如果有异常,暂停一会再爬
            time.sleep(1)
            html = requestApi(url)

        # else: #开启慢速爬虫
        # time.sleep(0.5)

        comments = getData(html)
        # print(url)
        start_time = comments[14][4]  # 获取每页中最后一条评论时间,每页有15条评论
        #print(start_time)

        # 最后一条评论时间减一秒,避免爬取重复数据
        start_time = datetime.datetime.strptime(start_time, '%Y-%m-%d  %H:%M:%S') + datetime.timedelta(seconds=-1)
        start_time = datetime.datetime.strftime(start_time, '%Y-%m-%d  %H:%M:%S')
        print(start_time)
        saveData(comments)


if __name__ == '__main__':
    main()   

报错

Traceback (most recent call last):
  File "C:/Users/22178/Desktop/MaoYan-MovieComments-master/comment.py", line 92, in <module>
    main()   
  File "C:/Users/22178/Desktop/MaoYan-MovieComments-master/comment.py", line 79, in main
    comments = getData(html)
  File "C:/Users/22178/Desktop/MaoYan-MovieComments-master/comment.py", line 27, in getData
    json_data = json.loads(html, strict=False)['cmts']
  File "F:\AnacondaData\lib\json\__init__.py", line 370, in loads
    return cls(**kw).decode(s)
  File "F:\AnacondaData\lib\json\decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "F:\AnacondaData\lib\json\decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

求帮助谢谢~

  • 写回答

2条回答 默认 最新

  • 溪风沐雪 2022-05-12 11:31
    关注

    需要先确定请求是否返回数据了

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

问题事件

  • 系统已结题 6月6日
  • 已采纳回答 5月29日
  • 创建了问题 5月12日

悬赏问题

  • ¥15 关于cpci总线的几个问题,有点迷糊
  • ¥15 乳腺癌数据集 相关矩阵 特征选择
  • ¥15 我的游戏账号被盗取,请问我该怎么做
  • ¥15 通关usb3.0.push文件,导致usb频繁断连
  • ¥15 有没有能解决微信公众号,只能实时拍照,没有选择相册上传功能,我不懂任何技术,,有没有给我发个软件就能搞定的方法
  • ¥15 Pythontxt文本可视化
  • ¥15 如何基于Ryu环境下使用scapy包进行数据包构造
  • ¥15 springboot国际化
  • ¥15 搭建QEMU环境运行OP-TEE出现错误
  • ¥15 Minifilter文件保护