渴望春天247 2021-07-26 12:03 采纳率: 84.6%
浏览 51
已结题

刚学习爬虫,出现很多报错?


from typing import Any, Union

import requests
from bs4 import BeautifulSoup
import time
import random
def get_html(url, soup):#获取html
    header = {
        "User - Agent: Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 92.0.4515.107Safari / 537.36Edg / 92.0.902.55"}
    html = requests.get(url, headers=header).content
    try:
        html = requests.get(url,timeout = 5)
        soup = BeautifulSoup(html.content,'html.parser') #解析
 #       soup1 = soup.prettify()
  #      print(soup1)
        return soup
    except Exception as e:
        pass 

def xlfirstline(soup)
    get_line = []
    get_jc_a = soup.find_all('div',class_='jc_a')
    get_tr = get_jc_a.find_all('tr')
    tr = get_tr.find_all('th')
    for each_th in tr:
        th = each_th.get_text
        get_line.append(th)
    return get_line
    
def get_content(soup):
    get_result = []
    get_jc_a = soup.find('div',class_='jc_a')
    tr = get_jc_a.find_all('td')
    for each_td in tr:
        td = each_td.get_text()
        get_result.append(td)  
    return get_result
    
def prt_ret(get_result):    
    with open(r'C:\Users\DELL\Desktop\国家社科基金项目数据库.txt','a') as f:
        while get_result:
            for i in range(20):         
                f.write(get_result.pop(0) + '\t')
            f.write('\n')
    
def main():
    url = 'http://fz.people.com.cn/skygb/sk/index.php/Index/index?&p=1' 
    soup = get_html(url, None)
    xlfirstline(soup)
    get_line = xlfirstline(soup)
    prt_ret(get_line)
    for i in range(3):
        wait_time: Union[int, Any] = random.randint(3,10)
        time.sleep(wait_time)
        url = 'http://fz.people.com.cn/skygb/sk/index.php/Index/index?&p=' + str(i + 1)
        soup = get_html(url, None)
        get_result = get_content(soup)
        prt_ret(get_result)


if __name__ == "__main__":
    main()

line 67, in
main()
line 53, in main
soup = get_html(url, None)
line 11, in get_html
html = requests.get(url, headers=header).content
line 75, in get
return request('get', url, params=params, **kwargs)
line 61, in request
return session.request(method=method, url=url, **kwargs)
line 528, in request
prep = self.prepare_request(req)
line 456, in prepare_request
p.prepare(
line 317, in prepare
self.prepare_headers(headers)
line 449, in prepare_headers
for header in headers.items():
AttributeError: 'set' object has no attribute 'items'

  • 写回答

2条回答 默认 最新

  • zhu6201976 博客专家认证 2021-07-26 16:17
    关注

    你的headers写成了set 应该是dict 检查下

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

问题事件

  • 系统已结题 8月6日
  • 已采纳回答 7月29日
  • 修改了问题 7月26日
  • 创建了问题 7月26日

悬赏问题

  • ¥50 buildozer打包kivy app失败
  • ¥30 在vs2022里运行python代码
  • ¥15 不同尺寸货物如何寻找合适的包装箱型谱
  • ¥15 求解 yolo算法问题
  • ¥15 虚拟机打包apk出现错误
  • ¥15 用visual studi code完成html页面
  • ¥15 聚类分析或者python进行数据分析
  • ¥15 三菱伺服电机按启动按钮有使能但不动作
  • ¥15 js,页面2返回页面1时定位进入的设备
  • ¥50 导入文件到网吧的电脑并且在重启之后不会被恢复