weixin_51171409 2024-11-05 22:35 采纳率: 0%
浏览 6
已结题

支持向量机底层复现问题

请问各路,谁能帮忙看一下我的SVM复现代码?
不知道是SMO的理解有问题,还是循环多了,或者是哪里参数没调整好,用乳腺癌的数据跑出来只有40%左右的准确率;训练速度也非常慢,差不多1分钟迭代30次左右。
不调SVM包,想知道对SVM的底层哪里有理解偏差

import numpy as np
from sklearn import datasets
from sklearn.metrics import accuracy_score, precision_score, recall_score, cohen_kappa_score, f1_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

class SupportVectorMachine:
    def __init__(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train

    def kernel(self, name, sample1, sample2, **kwargs):
        if name == 'Gauss':
            sigma = kwargs.get('sigma', 1)
            delta = sample1 - sample2
            norm2 = np.dot(delta.T, delta)
            gauss_kernel = np.exp(-norm2 / (2 * sigma ** 2))
            return gauss_kernel
        elif name == 'polynomial':
            p = kwargs.get('p', 2)
            dot = np.dot(sample1.T, sample2) + 1
            polynomial_kernel = dot ** p
            return polynomial_kernel
        elif name == 'linear':
            linear_kernal = np.dot(sample1, sample2)
            return linear_kernal
        else:
            return 'Please input Gauss or polynomial or linear'

    def find_hyperplane(self, alpha):
        label = self.y_train
        train_set = self.x_train
        m, n = train_set.shape
        weight_sample = train_set * alpha
        labeled_weight = weight_sample * label
        w = np.sum(labeled_weight, axis=0).reshape(n, 1)
        w_mult_x = np.dot(train_set, w)
        pos = []; neg = []
        for i in range(len(w_mult_x)):
            if label[i] == 1:
                pos.append(w_mult_x[i][0])
            else:
                neg.append(w_mult_x[i][0])
        max_neg = np.min(neg)
        min_pos = np.max(pos)
        b = -(max_neg + min_pos) / 2
        return w, b

    def predict(self, sample, alpha):
        train_set = self.x_train
        label = self.y_train
        m, n = train_set.shape
        w, b = self.find_hyperplane(alpha)
        predict = 0
        for i in range(m):
            emp = alpha[i] * label[i] * self.kernel('linear', sample, train_set[i], sigma=1)
            predict += emp
        predict += b
        return predict

    def find_alpha1(self, alpha, toler=0.01, C=1):
        train_set = self.x_train
        label = self.y_train
        w, b = self.find_hyperplane(alpha)
        find_target = False
        for alpha1_index in range(len(alpha)):
            if 0 < alpha[alpha1_index] < C:
                emp = label[alpha1_index] * (np.dot(w.T, train_set[alpha1_index]) + b)
                if not (1 - toler < emp < 1 + toler):
                    alpha1 = alpha[alpha1_index]
                    find_target = True
                    break
        if find_target:
            return alpha1, alpha1_index, find_target
        else:
            for alpha1_index in range(len(alpha)):
                emp = label[alpha1_index] * (np.dot(w.T, train_set[alpha1_index]) + b)
                if (alpha[alpha1_index] == 0 and emp < 1 + toler) or \
                   (alpha[alpha1_index] == C and emp > 1 - toler):
                    find_target = True
                    break
            alpha1 = alpha[alpha1_index]
            return alpha1, alpha1_index, find_target

    def update_alpha(self, alpha, alpha1_index, alpha2_index, error, C):
        error = np.array(error)
        alpha1 = alpha[alpha1_index]
        alpha2 = alpha[alpha2_index]
        train_set = self.x_train
        label = self.y_train
        kernal_x1_x1 = self.kernel('linear', train_set[alpha1_index], train_set[alpha1_index], sigma=1)
        kernal_x1_x2 = self.kernel('linear', train_set[alpha1_index], train_set[alpha2_index], sigma=1)
        kernal_x2_x2 = self.kernel('linear', train_set[alpha2_index], train_set[alpha2_index], sigma=1)
        alpha2_new_unc = alpha2 + (label[alpha2_index] * (error[alpha1_index] - error[alpha2_index]) / (kernal_x1_x1 - 2 * kernal_x1_x2 + kernal_x2_x2))
        y1 = label[alpha1_index]; y2 = label[alpha2_index]
        if y1 == y2:
            L = max(0, alpha2 - alpha1)
            H = min(C, C + alpha2 - alpha1)
        else:
            L = max(0, alpha2 + alpha1 - C)
            H = min(C, alpha2 + alpha1)
        if alpha2_new_unc > H:
            alpha2_new = H
        elif alpha2_new_unc < L:
            alpha2_new = L
        else:
            alpha2_new = alpha2_new_unc
        alpha1_new = alpha1 + label[alpha1_index] * label[alpha2_index] * (alpha2 - alpha2_new)
        alpha[alpha1_index] = alpha1_new
        alpha[alpha2_index] = alpha2_new
        return alpha

    def SMO_algorithm(self, iteration=100, C=1, sigma=1):
        train_set = self.x_train
        label = self.y_train
        m, n = train_set.shape
        alpha = np.zeros((m, 1))  # Initialize alpha to 0
        for k in range(iteration):
            print('迭代{}次'.format(k))
            error = []
            for i in range(m):
                predict = self.predict(train_set[i], alpha)
                error.append(predict - label[i])
            alpha1, alpha1_index, find_target = self.find_alpha1(alpha, C=C)
            if not find_target:
                break
            else:
                error1 = error[alpha1_index]
                if error1 >= 0:
                    min_error = np.min(error)
                    alpha2_index = error.index(min_error)
                else:
                    max_error = np.max(error)
                    alpha2_index = error.index(max_error)
            alpha = self.update_alpha(alpha, alpha1_index, alpha2_index, error, C)
        return alpha

# Cross-validation to find the best C
def cross_validate_C(C_values, x_train, y_train, x_test, y_test, sigma=1):
    best_C = None
    best_score = 0
    for C in C_values:
        svm = SupportVectorMachine(x_train, y_train)
        alpha = svm.SMO_algorithm(C=C, sigma=sigma)
        w, b = svm.find_hyperplane(alpha)
        y_predict = []
        for i in range(x_test.shape[0]):
            pred = svm.predict(x_test[i], alpha)
            if pred > 0:
                y_predict.append(1)
            else:
                y_predict.append(-1)
        score = accuracy_score(y_predict, y_test)
        if score > best_score:
            best_score = score
            best_C = C
    return best_C

# Load data and preprocess
cancer = datasets.load_breast_cancer()
data = cancer["data"]
label = cancer.target.reshape(data.shape[0], 1)
for i in range(len(label)):
    if label[i] == 0:
        label[i] = -1

m, n = data.shape
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

C_values = [0.001,0.1,1,10,100]  # Possible C values to test
sigma = 1  # Sigma for the Gaussian kernel
folder = KFold(n_splits=10, shuffle=True, random_state=50)
best_C = 0

for train_index, test_index in folder.split(data):
    x_train = data[train_index, :].astype('float64')
    x_test = data[test_index, :].astype('float64')
    y_train = label[train_index].astype('float64')
    y_test = label[test_index].astype('float64')
    best_C = cross_validate_C(C_values, x_train, y_train, x_test, y_test, sigma)
    break  # We only need to find the best C once

# Now use the best C to train the final model
svm = SupportVectorMachine(data, label)
alpha = svm.SMO_algorithm(C=best_C, sigma=sigma)
w, b = svm.find_hyperplane(alpha)

# Evaluate the final model
y_predict = []
for i in range(data.shape[0]):
    pred = svm.predict(data[i], alpha)
    if pred > 0:
        y_predict.append(1)
    else:
        y_predict.append(-1)

accuracy = accuracy_score(y_predict, label)
precision = precision_score(y_predict, label)
recall = recall_score(y_predict, label)
f1 = f1_score(y_predict, label)
kappa = cohen_kappa_score(y_predict, label)

print('Best C is {}; 准确率为{};查全率为{};召回率为{};f1系数为{};kappa系数为{}'.format(best_C, accuracy, precision, recall, f1, kappa))


  • 写回答

23条回答 默认 最新

  • GISer Liu 2024-11-06 10:25
    关注
    获得0.45元问题酬金

    该回答引用自GPT-4o, 由博主 GISer Liu 编写:

    问题分析

    用户在实现支持向量机(SVM)的底层算法时遇到了以下问题:

    1. 准确率低:使用乳腺癌数据集进行训练和测试,准确率只有40%左右。
    2. 训练速度慢:训练速度非常慢,大约1分钟迭代30次。

    用户怀疑问题可能出在以下几个方面:

    • SMO算法的理解有误。
    • 循环次数过多。
    • 参数设置不当。

    解决方案

    1. 检查SMO算法的实现

    SMO(Sequential Minimal Optimization)算法是SVM的核心算法,用于优化拉格朗日乘子(alpha)。我们需要检查SMO算法的实现是否正确。

    1.1 find_alpha1 方法

    find_alpha1 方法用于选择第一个拉格朗日乘子 alpha1。代码中存在一些逻辑问题:

    def find_alpha1(self, alpha, toler=0.01, C=1):
        train_set = self.x_train
        label = self.y_train
        w, b = self.find_hyperplane(alpha)
        find_target = False
        for alpha1_index in range(len(alpha)):
            if 0 < alpha[alpha1_index] < C:
                emp = label[alpha1_index] * (np.dot(w.T, train_set[alpha1_index]) + b)
                if not (1 - toler < emp < 1 + toler):
                    alpha1 = alpha[alpha1_index]
                    find_target = True
                    break
        if find_target:
            return alpha1, alpha1_index, find_target
        else:
            for alpha1_index in range(len(alpha)):
                emp = label[alpha1_index] * (np.dot(w.T, train_set[alpha1_index]) + b)
                if (alpha[alpha1_index] == 0 and emp < 1 + toler) or \
                   (alpha[alpha1_index] == C and emp > 1 - toler):
                    find_target = True
                    break
            alpha1 = alpha[alpha1_index]
            return alpha1, alpha1_index, find_target
    

    问题

    • 在选择 alpha1 时,代码首先检查 0 < alpha[alpha1_index] < C,然后检查 emp 是否在 (1 - toler, 1 + toler) 范围内。如果不在范围内,则选择该 alpha1。但如果没有找到合适的 alpha1,代码会再次遍历所有 alpha,这次检查的条件是 alpha[alpha1_index] == 0alpha[alpha1_index] == C,这可能会导致选择不合适的 alpha1

    解决方案

    • 修改 find_alpha1 方法,确保选择 alpha1 的逻辑更加合理。
    def find_alpha1(self, alpha, toler=0.01, C=1):
        train_set = self.x_train
        label = self.y_train
        w, b = self.find_hyperplane(alpha)
        find_target = False
        for alpha1_index in range(len(alpha)):
            if 0 < alpha[alpha1_index] < C:
                emp = label[alpha1_index] * (np.dot(w.T, train_set[alpha1_index]) + b)
                if not (1 - toler < emp < 1 + toler):
                    alpha1 = alpha[alpha1_index]
                    find_target = True
                    break
        if not find_target:
            for alpha1_index in range(len(alpha)):
                emp = label[alpha1_index] * (np.dot(w.T, train_set[alpha1_index]) + b)
                if (alpha[alpha1_index] == 0 and emp < 1 + toler) or \
                   (alpha[alpha1_index] == C and emp > 1 - toler):
                    alpha1 = alpha[alpha1_index]
                    find_target = True
                    break
        return alpha1, alpha1_index, find_target
    
    1.2 update_alpha 方法

    update_alpha 方法用于更新拉格朗日乘子 alpha1alpha2。代码中存在一些计算错误:

    def update_alpha(self, alpha, alpha1_index, alpha2_index, error, C):
        error = np.array(error)
        alpha1 = alpha[alpha1_index]
        alpha2 = alpha[alpha2_index]
        train_set = self.x_train
        label = self.y_train
        kernal_x1_x1 = self.kernel('linear', train_set[alpha1_index], train_set[alpha1_index], sigma=1)
        kernal_x1_x2 = self.kernel('linear', train_set[alpha1_index], train_set[alpha2_index], sigma=1)
        kernal_x2_x2 = self.kernel('linear', train_set[alpha2_index], train_set[alpha2_index], sigma=1)
        alpha2_new_unc = alpha2 + (label[alpha2_index] * (error[alpha1_index] - error[alpha2_index]) / (kernal_x1_x1 - 2 * kernal_x1_x2 + kernal_x2_x2))
        y1 = label[alpha1_index]; y2 = label[alpha2_index]
        if y1 == y2:
            L = max(0, alpha2 - alpha1)
            H = min(C, C + alpha2 - alpha1)
        else:
            L = max(0, alpha2 + alpha1 - C)
            H = min(C, alpha2 + alpha1)
        if alpha2_new_unc > H:
            alpha2_new = H
        elif alpha2_new_unc < L:
            alpha2_new = L
        else:
            alpha2_new = alpha2_new_unc
        alpha1_new = alpha1 + label[alpha1_index] * label[alpha2_index] * (alpha2 - alpha2_new)
        alpha[alpha1_index] = alpha1_new
        alpha[alpha2_index] = alpha2_new
        return alpha
    

    问题

    • 在计算 alpha2_new_unc 时,分母部分 (kernal_x1_x1 - 2 * kernal_x1_x2 + kernal_x2_x2) 可能会导致除零错误。

    解决方案

    • 在计算 alpha2_new_unc 时,添加一个小的常数 epsilon 以避免除零错误。
    def update_alpha(self, alpha, alpha1_index, alpha2_index, error, C, epsilon=1e-6):
        error = np.array(error)
        alpha1 = alpha[alpha1_index]
        alpha2 = alpha[alpha2_index]
        train_set = self.x_train
        label = self.y_train
        kernal_x1_x1 = self.kernel('linear', train_set[alpha1_index], train_set[alpha1_index], sigma=1)
        kernal_x1_x2 = self.kernel('linear', train_set[alpha1_index], train_set[alpha2_index], sigma=1)
        kernal_x2_x2 = self.kernel('linear', train_set[alpha2_index], train_set[alpha2_index], sigma=1)
        alpha2_new_unc = alpha2 + (label[alpha2_index] * (error[alpha1_index] - error[alpha2_index]) / (kernal_x1_x1 - 2 * kernal_x1_x2 + kernal_x2_x2 + epsilon))
        y1 = label[alpha1_index]; y2 = label[alpha2_index]
        if y1 == y2:
            L = max(0, alpha2 - alpha1)
            H = min(C, C + alpha2 - alpha1)
        else:
            L = max(0, alpha2 + alpha1 - C)
            H = min(C, alpha2 + alpha1)
        if alpha2_new_unc > H:
            alpha2_new = H
        elif alpha2_new_unc < L:
            alpha2_new = L
        else:
            alpha2_new = alpha2_new_unc
        alpha1_new = alpha1 + label[alpha1_index] * label[alpha2_index] * (alpha2 - alpha2_new)
        alpha[alpha1_index] = alpha1_new
        alpha[alpha2_index] = alpha2_new
        return alpha
    

    2. 检查核函数的选择

    用户在代码中使用了线性核函数,但在实际应用中,高斯核函数(RBF)通常更适合处理非线性问题。

    解决方案

    • 尝试使用高斯核函数(RBF)代替线性核函数。
    def SMO_algorithm(self, iteration=100, C=1, sigma=1):
        train_set = self.x_train
        label = self.y_train
        m, n = train_set.shape
        alpha = np.zeros((m, 1))  # Initialize alpha to 0
        for k in range(iteration):
            print('迭代{}次'.format(k))
            error = []
            for i in range(m):
                predict = self.predict(train_set[i], alpha)
                error.append(predict - label[i])
            alpha1, alpha1_index, find_target = self.find_alpha1(alpha, C=C)
            if not find_target:
                break
            else:
                error1 = error[alpha1_index]
                if error1 >= 0:
                    min_error = np.min(error)
                    alpha2_index = error.index(min_error)
                else:
                    max_error = np.max(error)
                    alpha2_index = error.index(max_error)
            alpha = self.update_alpha(alpha, alpha1_index, alpha2_index, error, C, epsilon=1e-6)
        return alpha
    

    3. 检查参数设置

    用户在代码中使用了固定的 Csigma 值。这些参数对SVM的性能有很大影响。

    解决方案

    • 使用交叉验证来选择最佳的 Csigma 值。
    def cross_validate_C_sigma(C_values, sigma_values, x_train, y_train, x_test, y_test):
        best_C = None
        best_sigma = None
        best_score = 0
        for C in C_values:
            for sigma in sigma_values:
                svm = SupportVectorMachine(x_train, y_train)
                alpha = svm.SMO_algorithm(C=C, sigma=sigma)
                w, b = svm.find_hyperplane(alpha)
                y_predict = []
                for i in range(x_test.shape[0]):
                    pred = svm.predict(x_test[i], alpha)
                    if pred > 0:
                        y_predict.append(1)
                    else:
                        y_predict.append(-1)
                score = accuracy_score(y_predict, y_test)
                if score > best_score:
                    best_score = score
                    best_C = C
                    best_sigma = sigma
        return best_C, best_sigma
    
    # Load data and preprocess
    cancer = datasets.load_breast_cancer()
    data = cancer["data"]
    label = cancer.target.reshape(data.shape[0], 1)
    for i in range(len(label)):
        if label[i] == 0:
            label[i] = -1
    
    m, n = data.shape
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    
    C_values = [0.001, 0.1, 1, 10, 100]  # Possible C values to test
    sigma_values = [0.1, 1, 10]  # Possible sigma values to test
    folder = KFold(n_splits=10, shuffle=True, random_state=50)
    best_C = 0
    best_sigma = 0
    
    for train_index, test_index in folder.split(data):
        x_train = data[train_index, :].astype('float64')
        x_test = data[test_index, :].astype('float64')
        y_train = label[train_index].astype('float64')
        y_test = label[test_index].astype('float64')
        best_C, best_sigma = cross_validate_C_sigma(C_values, sigma_values, x_train, y_train, x_test, y_test)
        break  # We only need to find the best C and sigma once
    
    # Now use the best C and sigma to train the final model
    svm = SupportVectorMachine(data, label)
    alpha = svm.SMO_algorithm(C=best_C, sigma=best_sigma)
    w, b = svm.find_hyperplane(alpha)
    
    # Evaluate the final model
    y_predict = []
    for i in range(data.shape[0]):
        pred = svm.predict(data[i], alpha)
        if pred > 0:
            y_predict.append(1)
        else:
            y_predict.append(-1)
    
    accuracy = accuracy_score(y_predict, label)
    precision = precision_score(y_predict, label)
    recall = recall_score(y_predict, label)
    f1 = f1_score(y_predict, label)
    kappa = cohen_kappa_score(y_predict, label)
    
    print('Best C is {}; Best sigma is {}; 准确率为{};查全率为{};召回率为{};f1系数为{};kappa系数为{}'.format(best_C, best_sigma, accuracy, precision, recall, f1, kappa))
    

    总结

    通过以上修改,可以提高SVM的准确率和训练速度。主要修改点包括:

    1. 修正 find_alpha1update_alpha 方法中的逻辑错误。
    2. 尝试使用高斯核函数(RBF)代替线性核函数。
    3. 使用交叉验证选择最佳的 Csigma 值。

    希望这些修改能帮助用户解决SVM底层实现中的问题。

    如果该回答解决了您的问题,请采纳!如果没有,请私信联系或评论您的疑惑

    评论

报告相同问题?

问题事件

  • 系统已结题 11月14日
  • 修改了问题 11月6日
  • 修改了问题 11月6日
  • 赞助了问题酬金15元 11月6日
  • 展开全部