代码如下:
import requests
from requests.exceptions import RequestException
import time
from bs4 import BeautifulSoup
def get_one_page(url):
try:
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
response = requests.get(url,headers=headers)
if response.status_code==200:
return response.text
return None
except RequestException:
return None
def page(offset):
url='http://maoyan.com/board/6?offset='+str(offset)
return url
for j in range(10):
html_doc = get_one_page(page(j*10))
soup = BeautifulSoup(html_doc,'lxml')
i = 1
for dd in soup.select("dd"):
print(dd.find("i","board-index board-index-"+str(i+j*10)).get_text()
+dd.find("p","name").get_text()
+dd.find("p","star").get_text().strip()
+dd.find("p","releasetime").string
+dd.find("p","score").get_text()+'\n')
i = i + 1
time.sleep(1)
运行反馈结果为:
Traceback (most recent call last):
File "<ipython-input-8-95f75b1c7bd0>", line 1, in <module>
runfile('H:/程序语言学习用文件夹/Spider/beautifulSoup.py', wdir='H:/程序语言学习用文件夹/Spider')
File "C:\Users\pc1\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Users\pc1\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "H:/程序语言学习用文件夹/Spider/beautifulSoup.py", line 29, in <module>
soup = BeautifulSoup(html_doc,'lxml')
File "C:\Users\pc1\Anaconda3\lib\site-packages\bs4\__init__.py", line 192, in __init__
elif len(markup) <= 256 and (
TypeError: object of type 'NoneType' has no len()