写了一个scrapy程序,主要是爬取几个网站的新闻内容,一共是四个网站。
我有搞了一个脚本启动的run.py程序,如下
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
from twisted.internet import reactor
import time
def run(spider_list):
runner = CrawlerRunner(get_project_settings())
for spider in spider_list:
runner.crawl(spider)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
reactor.run()
我又搞了一个界面,界面中给他们几个网站分别搞了一个复选框,没事的时候,勾选启动的,一两个网站,点击“确定”, 就可以实现启动scrapy,完成爬取。
界面程序如下所示。
# _*_ coding:utf-8 _*_
# ------------------------
# 左侧窗体六
# 关键字预警页面
# ------------------------
from PyQt5 import QtWidgets
from PyQt5.QtCore import Qt
import qtawesome
from news_spider.Fenghuang import run
import time
from news_spider.Fenghuang.Fenghuang.spiders.fenghuang import FenghuangSpider
from news_spider.Fenghuang.Fenghuang.spiders.huanqiu import HuanqiuSpider
from news_spider.Fenghuang.Fenghuang.spiders.sputnik import SputnikSpider
from news_spider.Fenghuang.Fenghuang.spiders.kyodonews import KyodoSpider
# ----------------------------------------右侧窗体五(关键字搜索)-------------------------------------------
class Right_widget_6(QtWidgets.QWidget):
def right_widget_window_6(self):
self.right_branch_widget_6 = QtWidgets.QWidget()
self.right_branch_widget_6.setGeometry(0, 0, 760, 700)
self.right_branch_layout_6 = QtWidgets.QGridLayout() # 右侧顶部搜索框网格布局
self.right_branch_widget_6.setLayout(self.right_branch_layout_6)
self.search_icon = QtWidgets.QLabel(chr(0xf002) + ' ' + '搜索') # chr(0xf002)代表样式
self.search_icon.setFont(qtawesome.font('fa', 16))
self.right_bar_widget_search_input = QtWidgets.QLineEdit()
self.right_bar_widget_search_input.setPlaceholderText("输入关键字,回车进行搜索")
self.confirm_btn = QtWidgets.QPushButton("确 定")
self.confirm_btn.setObjectName("confirm_btn")
# 复选框
self.web_n_0 = QtWidgets.QCheckBox("全选")
self.web_n_1 = QtWidgets.QCheckBox("环球网")
self.web_n_2 = QtWidgets.QCheckBox("凤凰网")
self.web_n_3 = QtWidgets.QCheckBox("俄星社")
self.web_n_4 = QtWidgets.QCheckBox("共同社")
self.confirm_btn.clicked.connect(lambda: self.run())
self.web_n_0.stateChanged.connect(lambda: self.change_web_name_1())
self.web_n_1.stateChanged.connect(lambda: self.change_web_name_2())
self.web_n_2.stateChanged.connect(lambda: self.change_web_name_2())
self.web_n_3.stateChanged.connect(lambda: self.change_web_name_2())
self.web_n_4.stateChanged.connect(lambda: self.change_web_name_2())
# self.confirm_btn.clicked.connect(self.go)
# ----------------------------将所有组件添加到总布局中--------------------------------
self.right_branch_layout_6.addWidget(self.search_icon, 0, 0, 1, 1)
self.right_branch_layout_6.addWidget(self.right_bar_widget_search_input, 0, 1, 1, 10)
self.right_branch_layout_6.addWidget(self.confirm_btn, 0, 11, 1, 1)
self.right_branch_layout_6.addWidget(self.web_n_0, 2, 0, 1, 1)
self.right_branch_layout_6.addWidget(self.web_n_1, 3, 0, 1, 1)
self.right_branch_layout_6.addWidget(self.web_n_2, 3, 1, 1, 1)
self.right_branch_layout_6.addWidget(self.web_n_3, 3, 2, 1, 1)
self.right_branch_layout_6.addWidget(self.web_n_4, 3, 3, 1, 1)
def change_web_name_1(self):
if self.web_n_0.checkState() == Qt.Checked:
self.web_n_1.setChecked(True)
self.web_n_2.setChecked(True)
self.web_n_3.setChecked(True)
self.web_n_4.setChecked(True)
elif self.web_n_0.checkState() == Qt.Unchecked:
self.web_n_1.setChecked(False)
self.web_n_2.setChecked(False)
self.web_n_3.setChecked(False)
self.web_n_4.setChecked(False)
# isChecked()主要是判断复选框是否被选中,要是选中就返回True,否则返回False。
def change_web_name_2(self):
if self.web_n_1.isChecked() and self.web_n_2.isChecked() and self.web_n_3.isChecked() and self.web_n_4.isChecked():
self.web_n_0.setCheckState(Qt.Checked)
else:
self.web_n_0.setTristate(False)
self.web_n_0.setCheckState(Qt.Unchecked)
def run(self):
web_name = ['self.web_n_1', 'self.web_n_2', 'self.web_n_3', "self.web_n_4"]
names = {
"环球网": HuanqiuSpider,
"凤凰网": FenghuangSpider,
"俄星社": SputnikSpider,
"共同社": KyodoSpider,
}
# for run_name in run_name_list:
# run.run(run_name)
run_name_list = []
if self.web_n_1.isChecked():
run_name_list.append(names[self.web_n_1.text()])
if self.web_n_2.isChecked():
run_name_list.append(names[self.web_n_2.text()])
if self.web_n_3.isChecked():
run_name_list.append(names[self.web_n_3.text()])
if self.web_n_4.isChecked():
run_name_list.append(names[self.web_n_4.text()])
print(run_name_list)
run.run(run_name_list)
效果如下所示。
(当然很丑),我复选框选择其中几个,点击“确定”,程序就会执行。
如下所示。
当我返回界面接着选择一个或者几个,想着再执行以下,这个时候就执行不了了,程序还会自动退出,如下所示
我确实也是搞不懂到底是怎么回事?如果有大神知道,请告诉小弟,小弟在此谢过了!!!