小白写python网络爬虫权威指南出错,求大佬们看一下

# -*- coding: GBK -*-
from bs4 import BeautifulSoup

class Website:
	
	def __init__(self,name,url,targetPattern,absoluteUrl,
		titleTag,bodyTag):
		self.name = name
		self.url = url
		self.targetPattren = targetPattern
		self.absoluteUrl = absoluteUrl
		self.titleTag = titleTag
		self.bodyTag = bodyTag
		
class Content:
	def __init__(self,url,title,body):
		self.url = url
		self.title = title
		self.body = body
		
	def print(self):
		print("URL: {}".format(self.url))
		print("TITLE: {}".format(self.title))
		print("BODY: {}".format(self.body))
		
import re
import requests

class Crawler:
	def __init__(self,site):
		self.site = site
		self.visited = []
		
	def getPage(self,url):
		try:
			req = requests.get(url)
		except requests.exceptions.RequestException:
			return None
		return BeautifulSoup(req.text, 'html.parser')
		
	def safeGet(self,pageObj,selector):
		selectedElems = pageObj.select(selector)
		if selectedElems is not None and len(selectedElems) > 0:
			return '\n'.join([elem.get_text() for elem in selectedElems])
		return ''
		
	def parse(self,url):
		bs = self.getPage(url)
		if bs is not None:
			title = self.safeGet(bs,self.site.titleTag)
			body = self.safeGet(bs,self.site.bodyTag)
			if title != '' and body != '':
				content = Content(url,title,body)
				content.print()

	def crawl(self):
		"""获取网站主页的页面链接"""
		
		bs = self.getPage(self.site.url)
		targetPages = bs.findALL('a',href=re.compile(self.site.targetPattern))
		for targetPage in targetPages:
			targetPage = targetPate.attrs['href']
			if targetPage not in self.visited:
				self.visited.append(targetPage)
				if not self.site.absolutedUrl:
					targetPage = '{}{}'.format(self.site.url,targetPage)
				self.parse(targetPage)
				
reuters = Website('Reuters', 'https://www.reuters.com', '^(/artilce/)', False,
	'h1', 'div.StandardArticleBody_body_1gnLA')

crawler = Crawler(reuters)
crawler.crawl()

代码如上,按照书上打的,运行后是这样的:

 

findALL是书上这么写的,我也试过改成find_all,findall,但都没用,还是报一样的错误

查看全部
JK_laile
JK_laile
2020/11/26 21:27
  • python
  • 点赞
  • 收藏
  • 回答
    私信
满意答案
查看全部

2个回复