m0_75199394 2024-12-05 00:38 采纳率: 100%
浏览 54
已结题

抓取数据时发生错误: get_mooncake_data() missing 1 required positional argument: 'driver'的问题,怎么改出正确的爬虫代码?

为什么会出现,抓取数据时发生错误: get_mooncake_data() missing 1 required positional argument: 'driver'的问题,怎么改出正确的爬虫代码?

1.0  scraper.py
import os
import pickle
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
import logging

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# 硬编码值
GECKODRIVER_PATH = r'D:\Mozilla Firefox\geckodriver.exe'  # 使用原始字符串避免转义问题
USERNAME = ''  # 替换为您的淘宝用户名
PASSWORD = ''  # 替换为您的淘宝密码

def save_cookies(driver, filename='cookies.pkl'):
    with open(filename, 'wb') as f:
        pickle.dump(driver.get_cookies(), f)

def load_cookies(driver, filename='cookies.pkl'):
    try:
        with open(filename, 'rb') as f:
            cookies = pickle.load(f)
            for cookie in cookies:
                driver.add_cookie(cookie)
            driver.refresh()
            return True
    except Exception as e:
        logging.error(f"加载 cookies 失败: {e}")
        return False

def setup_driver():
    options = Options()
    options.set_preference('dom.webdriver.enabled', False)
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--incognito')

    service = Service(GECKODRIVER_PATH)
    driver = webdriver.Firefox(service=service, options=options)
    driver.execute_script('Object.defineProperty(navigator, "webdriver", {get: () => undefined})')
    return driver

def login(driver):
    url_login = 'https://login.taobao.com/member/login.jhtml'
    driver.get(url_login)

    WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.XPATH, '//input[@type="text"]'))
    )

    username_field = driver.find_element(By.XPATH, '//input[@type="text"]')
    password_field = driver.find_element(By.XPATH, '//input[@type="password"]')
    username_field.send_keys(USERNAME)
    password_field.send_keys(PASSWORD)

    login_button = WebDriverWait(driver, 30).until(
        EC.element_to_be_clickable((By.XPATH, '//button[text()="登录"]'))
    )
    login_button.click()

    WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'site-nav-menu-hd'))
    )  # 修改为登录成功后的某个元素作为等待条件

    save_cookies(driver)
    logging.info("登录成功并已保存 cookies.")

def get_mooncake_data(driver):
    url = "https://s.taobao.com/search?q=%E6%9C%88%E9%A5%BC"
    driver.get(url)
 
    # 尝试加载之前保存的cookies(如果存在且有效)
    if not load_cookies(driver):
        logging.info("没有找到有效的 Cookies,正在进行自动登录...")
        login(driver)
        # 登录后不需要再次加载cookies,因为login函数已经保存了新登录的cookies
 
    # 等待商品列表加载完成
    WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'item'))
    )

    mooncake_data = []
    items = driver.find_elements(By.CLASS_NAME, 'item')
    logging.info(f"Found {len(items)} items.")

    for i, item in enumerate(items):
        try:
            title = item.find_element(By.CSS_SELECTOR, '.title')
            price = item.find_element(By.CLASS_NAME, 'price')
            mooncake_data.append({
                'title': title.text.strip(),
                'price': price.text.strip()
            })
        except Exception as e:
            logging.error(f"错误: {e} 在处理 item {i+1} 时")

    return mooncake_data

def main():
    driver = setup_driver()
    try:
        # 在访问页面之前不加载cookies,因为get_mooncake_data会处理它
        data = get_mooncake_data(driver)  # 确保这里传递了driver参数
        for item in data:
            print(item)
    finally:
        driver.quit()

if __name__ == "__main__":
    main()

2.0main.py
from PyQt5.QtWidgets import QApplication, QWidget
import sys
from gui import MooncakeApp  # 确保你有这个类

if __name__ == "__main__":
    # 先创建 QApplication 对象
    app = QApplication(sys.argv)
    
    # 创建主窗口
    window = MooncakeApp()
    
    # 显示主窗口
    window.show()
    
    # 运行应用
    sys.exit(app.exec_())  # 进入 PyQt5 的事件循环
3.0 gui.py
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QPushButton, QLabel, QTableWidget, QTableWidgetItem
import sys
from scraper import get_mooncake_data
from visualizer import visualize_data

class MooncakeApp(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("月饼销售数据爬虫")
        self.setGeometry(100, 100, 800, 600)

        self.layout = QVBoxLayout()

        self.label = QLabel("月饼销售数据", self)
        self.layout.addWidget(self.label)

        self.btn_scrape = QPushButton("抓取数据", self)
        self.btn_scrape.clicked.connect(self.scrape_data)
        self.layout.addWidget(self.btn_scrape)

        self.table = QTableWidget(self)
        self.layout.addWidget(self.table)

        self.btn_visualize = QPushButton("展示可视化图", self)
        self.btn_visualize.clicked.connect(visualize_data)
        self.layout.addWidget(self.btn_visualize)

        self.setLayout(self.layout)

    def scrape_data(self):
        try:
            data = get_mooncake_data()  # 获取数据
            if data:  # 如果有数据,则显示
                self.show_data_in_table(data)
            else:
                print("没有抓取到数据")
        except Exception as e:
            print(f"抓取数据时发生错误: {e}")

    def show_data_in_table(self, data):
        self.table.setRowCount(len(data))  # 设置表格行数
        self.table.setColumnCount(5)  # 设置表格列数
        self.table.setHorizontalHeaderLabels(['标题', '价格', '销量', '店铺', '评分'])

        for i, row in enumerate(data):
            for j, col in enumerate(row):
                self.table.setItem(i, j, QTableWidgetItem(str(col)))  # 填充表格

if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = MooncakeApp()
    window.show()
    sys.exit(app.exec_())
4.0 visualizer.py
import matplotlib.pyplot as plt
import pandas as pd
import mysql.connector

def visualize_data():
    # 从数据库读取数据
    conn = mysql.connector.connect(
        host="localhost", user="root", password="1ot", database="taobao_sales"
    )
    query = "SELECT title, price, sales_volume FROM mooncake_sales"
    df = pd.read_sql(query, conn)
    conn.close()

    # 绘制销量与价格关系图
    plt.figure(figsize=(10, 6))
    plt.scatter(df['price'], df['sales_volume'], c='blue', alpha=0.5)
    plt.title('月饼价格与销量关系图')
    plt.xlabel('价格 (¥)')
    plt.ylabel('销量')
    plt.show()

5.0 config.py
# 配置文件示例

DB_CONFIG = {
    'host': 'localhost',
    'user': 'root',
    'password': 'root',
    'database': 'taobao_sales'
}

USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
6.0 db_handler.py
import mysql.connector

def save_to_mysql(mooncake_data):
    conn = mysql.connector.connect(
        host="localhost", user="root", password="root", database="taobao_sales"
    )
    cursor = conn.cursor()

    for data in mooncake_data:
        cursor.execute("""
            INSERT INTO mooncake_sales (title, price, sales_volume, shop_name, rating)
            VALUES (%s, %s, %s, %s, %s)
        """, data)

    conn.commit()
    cursor.close()
    conn.close()
  • 写回答

20条回答 默认 最新

  • 到点就困告 2024-12-05 09:05
    关注

    爬虫私聊dd

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(19条)

报告相同问题?

问题事件

  • 系统已结题 12月14日
  • 已采纳回答 12月6日
  • 创建了问题 12月5日