问题
返回的网页内容不完整,这是为什么啊?
爬虫代码
import scrapy
class wangyiyun_spider(scrapy.Spider):
name = 'wy'
def start_requests(self):
urls=['https://music.163.com/']
for url in urls:
yield scrapy.Request(url=url,callback=self.parse)
def parse(self,response):
with open('wz.html','wb') as f:
f.write(response.body)
MiddleWares代码
from selenium import webdriver
from scrapy.http.response.html import HtmlResponse
import time
class SeleniumParseMiddleware_req(object):
def process_request(self,request,spider):
url = 'https://music.163.com/'
options= webdriver.ChromeOptions()
options.add_argument('--log-level=3')
brower = webdriver.Chrome(options=options) # 实例化浏览器对象
brower.maximize_window() # 窗口最大化
brower.get(url)
brower.execute_script('window.scrollTo(0,document.body.scrollHeight)') # 下滑
time.sleep(10)
data = brower.page_source.encode()
brower.close()
brower.quit()
response = HtmlResponse(url=url, body=data, request=request, encoding='utf-8') # 获得response
return response
class SeleniumParseMiddleware_res(object):
def process_response(self, request, response, spider):
return response
setting里的MiddleWares 已经打开了