用pyqt5给爬虫做个界面,但是在界面中的lineEdit文本传不到爬虫中去(要爬微博所以得传一个用于搜索的关键字)
方法是设一个全局变量KEYWORD然后再在界面中用lineEdit修改这个全局变量,最后开启爬虫,读取这个修改后的KEYWORD
无关的函数我都改成pass方便查看- -,为什么这方法有错误,是因为开了另一个线程然后爬虫默认赋值为原来的关键字1 ?
-*- coding: utf-8 -*-
KEYWORD = '关键字1'
class Ui_Form(object):
def setupUi(self, Form):
Form.setObjectName("Form")
Form.resize(769, 575)
self.lineEdit = QLineEdit(Form)
self.lineEdit.setGeometry(QRect(130, 50, 161, 21))
self.lineEdit.setObjectName("lineEdit")
self.label = QLabel(Form)
self.label.setGeometry(QRect(30, 50, 91, 21))
self.label.setObjectName("label")
self.pushButton_2 = QPushButton(Form)
self.pushButton_2.setGeometry(QRect(550, 40, 81, 41))
self.pushButton_2.setObjectName("pushButton_2")
self.pushButton_3 = QPushButton(Form)
self.pushButton_3.setGeometry(QRect(330, 40, 81, 41))
self.pushButton_3.setObjectName("pushButton_3")
self.pushButton_4 = QPushButton(Form)
self.pushButton_4.setGeometry(QRect(440, 40, 81, 41))
self.pushButton_4.setObjectName("pushButton_4")
self.pushButton_5 = QPushButton(Form)
self.pushButton_5.setGeometry(QRect(660, 40, 81, 41))
self.pushButton_5.setObjectName("pushButton_5")
self.pushButton_4.clicked.connect(self.pop2) #开启爬虫
self.pushButton_2.clicked.connect(self.pop1)
self.pushButton_3.clicked.connect(self.pop4) #开启cookiespool和修改关键字值
self.pushButton_5.clicked.connect(self.pop5)
self.tableView = QTableView(Form)
self.tableView.setGeometry(QRect(15, 131, 731, 421))
#设置tableView
self.model = QStandardItemModel(1, 6)
self.model.setHorizontalHeaderLabels(['作者id', '评论数', '正文', '转发数', '点赞数', 'user'])
self.tableView.setEditTriggers(QAbstractItemView.NoEditTriggers) # 只读
self.tableView.resizeColumnsToContents() # 宽度和长度和显示内容相同
self.tableView.setModel(self.model)
#设置tableView结束
self.tableView.setObjectName("tableView")
self.label_2 = QLabel(Form)
self.label_2.setGeometry(QRect(30, 110, 72, 15))
self.label_2.setObjectName("label_2")
self.retranslateUi(Form)
QMetaObject.connectSlotsByName(Form)
def retranslateUi(self, Form):
_translate = QCoreApplication.translate
Form.setWindowTitle(_translate("Form", "Form"))
self.label.setText(_translate("Form", "输入关键字"))
self.pushButton_2.setText(_translate("Form", "显示结果"))
self.pushButton_3.setText(_translate("Form", "启动服务"))
self.pushButton_4.setText(_translate("Form", "开始抓取"))
self.pushButton_5.setText(_translate("Form", "结果分析"))
self.label_2.setText(_translate("Form", "结果显示"))
#槽函数部分
def pop1(self): #从数据库显示数据
pass
def pop2(self): #开启爬虫
new.run()
def pop3(self): #退出
pass
def pop4(self): #开启服务 在这修改关键字 比如传入的时关键字2
global KEYWORD
KEYWORD = self.lineEdit.text()
print(KEYWORD) #输出会显示关键字2 而不是关键字1
s.start()
def pop5(self): #结果显示
pass
if name == '__main__':
app = QApplication(sys.argv)
MainWindow = QMainWindow()
ui = Ui_Form()
ui.setupUi(MainWindow)
MainWindow.show()
sys.exit(app.exec_())
#爬虫部分
class WeiboSpider(Spider):
client = pymongo.MongoClient(host='127.0.0.1', port=27017)
db = client.weibo
p = db.weibo
name = 'weibo'
allowed_domains = ["weibo.cn"]
start_url='https://weibo.cn/search/mblog'
max_page = 100
count = 0
def start_requests(self):
global KEYWORD
keyword = KEYWORD #这里获取不到已经修改的关键字
print(keyword) #输出的还是关键字1
url='{url}?keyword={keyword}'.format(url=self.start_url, keyword=keyword)
for page in range(self.max_page + 1):
data = {
'mp' : str(self.max_page),
'page' : str(page)
}
yield FormRequest(url, callback=self.parse_index, formdata=data)
def parse_index(self, response):
pass
def comment_detail(self, response):
pass
new.py 文件内容
from scrapy.crawler import CrawlerProcess
from weibosearch.spiders.weibo import WeiboSpider
def run():
process = CrawlerProcess()
process.crawl(WeiboSpider)
process.start()