羅漢果茶 2020-12-03 10:55 采纳率: 66.7%
浏览 29

小白Python爬虫运行程序发生异常,求指教!

我在B站上跟着视频教程学习网页爬虫,做到了想要爬取网页上的演员信息的时候就出错了,以下是目前的代码

import bs4
import re
import urllib.request,urllib.error
import sqlite3
from bs4 import BeautifulSoup
import sys
import requests
import importlib
importlib.reload(sys)


def main():
#爬取页面
    baseurl="https://baike.baidu.com/item/平凡的荣耀"
    datalist=getData(baseurl)
    askURL("https://baike.baidu.com/item/平凡的荣耀")
#savepath=".\\平凡的荣耀数据.json"
#saveData(savepath)


#解析页面
#保存页面

def askURL(url):#获取指定页面内容
    head={
        "User-Agent":"Mozilla/5.0(Windows NT 10.0;WOW64)AppleWebKit/537.36(KHTML, likeGecko)Chrome/70.0.3538.25Safari/537.36Core/1.70.3741.400QQBrowser/10.5.3863.400"
    }#假装自己是一个浏览器
    request=urllib.request.Request(url,headers=head)
    html=""
    try:
        response = requests.get(url,headers=head)
        html=BeautifulSoup(response.text,'html')
        #print(html)
    except urllib.error.URLError as e:
        if hasattr(e,"code"):
            print(e.code)
        if hasattr(e,"reason"):
            print(e.reason)
    return html

def getData(url):#爬取内容
    datalist=[]
    return datalist

def getTVData(url):
    datalist=[]
    html=askURL(url)
    soup=BeautifulSoup(html,"html.parser")
    for item in soup.find_all('div',class_="viewport"):#收视率
        print(item)
    return
    #return datalist

def saveData(savepath):
    print('save...')

# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    baseurl="https://baike.baidu.com/item/平凡的荣耀"
    getTVData(baseurl)
    


结果报错了TypeError: 'NoneType' object is not callable

求大佬指教……

  • 写回答

2条回答 默认 最新

  • 囚生CY 2020-12-03 11:11
    关注
    import bs4
    	
    import re
    	
    import urllib.request,urllib.error
    	
    import sqlite3
    	
    from bs4 import BeautifulSoup
    	
    import sys
    	
    import requests
    	
    import importlib
    	
    importlib.reload(sys)
    	
     
    	
    def main():
    	
    #爬取页面
    	
    	baseurl="https://baike.baidu.com/item/平凡的荣耀"
    	
    	datalist=getData(baseurl)
    	
    	askURL("https://baike.baidu.com/item/平凡的荣耀")
    	
    #savepath=".\\平凡的荣耀数据.json"
    	
    #saveData(savepath)
    	
     
    	
     
    	
    #解析页面
    	
    #保存页面
    	
     
    	
    def askURL(url):#获取指定页面内容
    	
    	head={
    	
    		"User-Agent":"Mozilla/5.0(Windows NT 10.0;WOW64)AppleWebKit/537.36(KHTML, likeGecko)Chrome/70.0.3538.25Safari/537.36Core/1.70.3741.400QQBrowser/10.5.3863.400"
    	
    	}#假装自己是一个浏览器
    	
    	request=urllib.request.Request(url,headers=head)
    	
    	html=""
    	
    	try:
    	
    		response = requests.get(url,headers=head)
    	
    		html=BeautifulSoup(response.text,'lxml')
    	
    		#print(html)
    	
    	except urllib.error.URLError as e:
    	
    		if hasattr(e,"code"):
    	
    			print(e.code)
    	
    		if hasattr(e,"reason"):
    	
    			print(e.reason)
    	
    	return html
    	
     
    	
    def getData(url):#爬取内容
    	
    	datalist=[]
    	
    	return datalist
    	
     
    	
    def getTVData(url):
    	
    	datalist=[]
    	
    
    	soup=askURL(url)
    	
    	for item in soup.find_all('div',class_="viewport"):#收视率
    	
    		print(item)
    	
    	return
    	
    	#return datalist
    	
     
    	
    def saveData(savepath):
    	
    	print('save...')
    	
     
    	
    # Press the green button in the gutter to run the script.
    	
    if __name__ == '__main__':
    	
    	baseurl="https://baike.baidu.com/item/平凡的荣耀"
    	
    	getTVData(baseurl)
    

    askURL返回的就是BeautifulSoup对象了,getTVData函数里改成soup=askURL(url)即可

    评论

报告相同问题?

悬赏问题

  • ¥50 安卓adb backup备份子用户应用数据失败
  • ¥20 有人能用聚类分析帮我分析一下文本内容嘛
  • ¥15 请问Lammps做复合材料拉伸模拟,应力应变曲线问题
  • ¥30 python代码,帮调试
  • ¥15 #MATLAB仿真#车辆换道路径规划
  • ¥15 java 操作 elasticsearch 8.1 实现 索引的重建
  • ¥15 数据可视化Python
  • ¥15 要给毕业设计添加扫码登录的功能!!有偿
  • ¥15 kafka 分区副本增加会导致消息丢失或者不可用吗?
  • ¥15 微信公众号自制会员卡没有收款渠道啊