求帮忙看一个python的SVM程序改主成分维度改哪里

从zouxy大神那里拷贝来的程序

源程序在这里

from numpy import *
import time
import matplotlib.pyplot as plt 


# calulate kernel value
def calcKernelValue(matrix_x, sample_x, kernelOption):
    kernelType = kernelOption[0]
    numSamples = matrix_x.shape[0]
    kernelValue = mat(zeros((numSamples, 1)))

    if kernelType == 'linear':
        kernelValue = matrix_x * sample_x.T
    elif kernelType == 'rbf':
        sigma = kernelOption[1]
        if sigma == 0:
            sigma = 1.0
        for i in xrange(numSamples):
            diff = matrix_x[i, :] - sample_x
            kernelValue[i] = exp(diff * diff.T / (-2.0 * sigma**2))
    else:
        raise NameError('Not support kernel type! You can use linear or rbf!')
    return kernelValue


# calculate kernel matrix given train set and kernel type
def calcKernelMatrix(train_x, kernelOption):
    numSamples = train_x.shape[0]
    kernelMatrix = mat(zeros((numSamples, numSamples)))
    for i in xrange(numSamples):
        kernelMatrix[:, i] = calcKernelValue(train_x, train_x[i, :], kernelOption)
    return kernelMatrix


# define a struct just for storing variables and data
class SVMStruct:
    def __init__(self, dataSet, labels, C, toler, kernelOption):
        self.train_x = dataSet # each row stands for a sample
        self.train_y = labels  # corresponding label
        self.C = C             # slack variable
        self.toler = toler     # termination condition for iteration
        self.numSamples = dataSet.shape[0] # number of samples
        self.alphas = mat(zeros((self.numSamples, 1))) # Lagrange factors for all samples
        self.b = 0
        self.errorCache = mat(zeros((self.numSamples, 2)))
        self.kernelOpt = kernelOption
        self.kernelMat = calcKernelMatrix(self.train_x, self.kernelOpt)


# calculate the error for alpha k
def calcError(svm, alpha_k):
    output_k = float(multiply(svm.alphas, svm.train_y).T * svm.kernelMat[:, alpha_k] + svm.b)
    error_k = output_k - float(svm.train_y[alpha_k])
    return error_k


# update the error cache for alpha k after optimize alpha k
def updateError(svm, alpha_k):
    error = calcError(svm, alpha_k)
    svm.errorCache[alpha_k] = [1, error]


# select alpha j which has the biggest step
def selectAlpha_j(svm, alpha_i, error_i):
    svm.errorCache[alpha_i] = [1, error_i] # mark as valid(has been optimized)
    candidateAlphaList = nonzero(svm.errorCache[:, 0].A)[0] # mat.A return array
    maxStep = 0; alpha_j = 0; error_j = 0

    # find the alpha with max iterative step
    if len(candidateAlphaList) > 1:
        for alpha_k in candidateAlphaList:
            if alpha_k == alpha_i: 
                continue
            error_k = calcError(svm, alpha_k)
            if abs(error_k - error_i) > maxStep:
                maxStep = abs(error_k - error_i)
                alpha_j = alpha_k
                error_j = error_k
    # if came in this loop first time, we select alpha j randomly
    else:           
        alpha_j = alpha_i
        while alpha_j == alpha_i:
            alpha_j = int(random.uniform(0, svm.numSamples))
        error_j = calcError(svm, alpha_j)

    return alpha_j, error_j


# the inner loop for optimizing alpha i and alpha j
def innerLoop(svm, alpha_i):
    error_i = calcError(svm, alpha_i)

    ### check and pick up the alpha who violates the KKT condition
    ## satisfy KKT condition
    # 1) yi*f(i) >= 1 and alpha == 0 (outside the boundary)
    # 2) yi*f(i) == 1 and 0<alpha< C (on the boundary)
    # 3) yi*f(i) <= 1 and alpha == C (between the boundary)
    ## violate KKT condition
    # because y[i]*E_i = y[i]*f(i) - y[i]^2 = y[i]*f(i) - 1, so
    # 1) if y[i]*E_i < 0, so yi*f(i) < 1, if alpha < C, violate!(alpha = C will be correct) 
    # 2) if y[i]*E_i > 0, so yi*f(i) > 1, if alpha > 0, violate!(alpha = 0 will be correct)
    # 3) if y[i]*E_i = 0, so yi*f(i) = 1, it is on the boundary, needless optimized
    if (svm.train_y[alpha_i] * error_i < -svm.toler) and (svm.alphas[alpha_i] < svm.C) or\
        (svm.train_y[alpha_i] * error_i > svm.toler) and (svm.alphas[alpha_i] > 0):

        # step 1: select alpha j
        alpha_j, error_j = selectAlpha_j(svm, alpha_i, error_i)
        alpha_i_old = svm.alphas[alpha_i].copy()
        alpha_j_old = svm.alphas[alpha_j].copy()

        # step 2: calculate the boundary L and H for alpha j
        if svm.train_y[alpha_i] != svm.train_y[alpha_j]:
            L = max(0, svm.alphas[alpha_j] - svm.alphas[alpha_i])
            H = min(svm.C, svm.C + svm.alphas[alpha_j] - svm.alphas[alpha_i])
        else:
            L = max(0, svm.alphas[alpha_j] + svm.alphas[alpha_i] - svm.C)
            H = min(svm.C, svm.alphas[alpha_j] + svm.alphas[alpha_i])
        if L == H:
            return 0

        # step 3: calculate eta (the similarity of sample i and j)
        eta = 2.0 * svm.kernelMat[alpha_i, alpha_j] - svm.kernelMat[alpha_i, alpha_i] \
                  - svm.kernelMat[alpha_j, alpha_j]
        if eta >= 0:
            return 0

        # step 4: update alpha j
        svm.alphas[alpha_j] -= svm.train_y[alpha_j] * (error_i - error_j) / eta

        # step 5: clip alpha j
        if svm.alphas[alpha_j] > H:
            svm.alphas[alpha_j] = H
        if svm.alphas[alpha_j] < L:
            svm.alphas[alpha_j] = L

        # step 6: if alpha j not moving enough, just return     
        if abs(alpha_j_old - svm.alphas[alpha_j]) < 0.00001:
            updateError(svm, alpha_j)
            return 0

        # step 7: update alpha i after optimizing aipha j
        svm.alphas[alpha_i] += svm.train_y[alpha_i] * svm.train_y[alpha_j] \
                                * (alpha_j_old - svm.alphas[alpha_j])

        # step 8: update threshold b
        b1 = svm.b - error_i - svm.train_y[alpha_i] * (svm.alphas[alpha_i] - alpha_i_old) \
                                                    * svm.kernelMat[alpha_i, alpha_i] \
                             - svm.train_y[alpha_j] * (svm.alphas[alpha_j] - alpha_j_old) \
                                                    * svm.kernelMat[alpha_i, alpha_j]
        b2 = svm.b - error_j - svm.train_y[alpha_i] * (svm.alphas[alpha_i] - alpha_i_old) \
                                                    * svm.kernelMat[alpha_i, alpha_j] \
                             - svm.train_y[alpha_j] * (svm.alphas[alpha_j] - alpha_j_old) \
                                                    * svm.kernelMat[alpha_j, alpha_j]
        if (0 < svm.alphas[alpha_i]) and (svm.alphas[alpha_i] < svm.C):
            svm.b = b1
        elif (0 < svm.alphas[alpha_j]) and (svm.alphas[alpha_j] < svm.C):
            svm.b = b2
        else:
            svm.b = (b1 + b2) / 2.0

        # step 9: update error cache for alpha i, j after optimize alpha i, j and b
        updateError(svm, alpha_j)
        updateError(svm, alpha_i)

        return 1
    else:
        return 0


# the main training procedure
def trainSVM(train_x, train_y, C, toler, maxIter, kernelOption = ('rbf', 1.0)):
    # calculate training time
    startTime = time.time()

    # init data struct for svm
    svm = SVMStruct(mat(train_x), mat(train_y), C, toler, kernelOption)

    # start training
    entireSet = True
    alphaPairsChanged = 0
    iterCount = 0
    # Iteration termination condition:
    #   Condition 1: reach max iteration
    #   Condition 2: no alpha changed after going through all samples,
    #                in other words, all alpha (samples) fit KKT condition
    while (iterCount < maxIter) and ((alphaPairsChanged > 0) or entireSet):
        alphaPairsChanged = 0

        # update alphas over all training examples
        if entireSet:
            for i in xrange(svm.numSamples):
                alphaPairsChanged += innerLoop(svm, i)
            print '---iter:%d entire set, alpha pairs changed:%d' % (iterCount, alphaPairsChanged)
            iterCount += 1
        # update alphas over examples where alpha is not 0 & not C (not on boundary)
        else:
            nonBoundAlphasList = nonzero((svm.alphas.A > 0) * (svm.alphas.A < svm.C))[0]
            for i in nonBoundAlphasList:
                alphaPairsChanged += innerLoop(svm, i)
            print '---iter:%d non boundary, alpha pairs changed:%d' % (iterCount, alphaPairsChanged)
            iterCount += 1

        # alternate loop over all examples and non-boundary examples
        if entireSet:
            entireSet = False
        elif alphaPairsChanged == 0:
            entireSet = True

    print 'Congratulations, training complete! Took %fs!' % (time.time() - startTime)
    return svm


# testing your trained svm model given test set
def testSVM(svm, test_x, test_y):
    test_x = mat(test_x)
    test_y = mat(test_y)
    numTestSamples = test_x.shape[0]
    supportVectorsIndex = nonzero(svm.alphas.A > 0)[0]
    supportVectors      = svm.train_x[supportVectorsIndex]
    supportVectorLabels = svm.train_y[supportVectorsIndex]
    supportVectorAlphas = svm.alphas[supportVectorsIndex]
    matchCount = 0
    for i in xrange(numTestSamples):
        kernelValue = calcKernelValue(supportVectors, test_x[i, :], svm.kernelOpt)
        predict = kernelValue.T * multiply(supportVectorLabels, supportVectorAlphas) + svm.b
        if sign(predict) == sign(test_y[i]):
            matchCount += 1
    accuracy = float(matchCount) / numTestSamples
    return accuracy


# show your trained svm model only available with 2-D data
def showSVM(svm):
    if svm.train_x.shape[1] != 2:
        print "Sorry! I can not draw because the dimension of your data is not 2!"
        return 1

    # draw all samples
    for i in xrange(svm.numSamples):
        if svm.train_y[i] == -1:
            plt.plot(svm.train_x[i, 0], svm.train_x[i, 1], 'or')
        elif svm.train_y[i] == 1:
            plt.plot(svm.train_x[i, 0], svm.train_x[i, 1], 'ob')

    # mark support vectors
    supportVectorsIndex = nonzero(svm.alphas.A > 0)[0]
    for i in supportVectorsIndex:
        plt.plot(svm.train_x[i, 0], svm.train_x[i, 1], 'oy')

    # draw the classify line
    w = zeros((2, 1))
    for i in supportVectorsIndex:
        w += multiply(svm.alphas[i] * svm.train_y[i], svm.train_x[i, :].T) 
    min_x = min(svm.train_x[:, 0])[0, 0]
    max_x = max(svm.train_x[:, 0])[0, 0]
    y_min_x = float(-svm.b - w[0] * min_x) / w[1]
    y_max_x = float(-svm.b - w[0] * max_x) / w[1]
    plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')
    plt.show()

测试代码在这里

 from numpy import *
import SVM

################## test svm #####################
## step 1: load data
print ("step 1: load data...")
dataSet = []
labels = []
fileIn = open('D:\Python33\SVM\testSet.txt')
for line in fileIn.readlines():
    lineArr = line.strip().split('\t')
    dataSet.append([float(lineArr[0]), float(lineArr[1])])
    labels.append(float(lineArr[2]))

dataSet = mat(dataSet)
labels = mat(labels).T
train_x = dataSet[0:81, :]
train_y = labels[0:81, :]
test_x = dataSet[80:101, :]
test_y = labels[80:101, :]

## step 2: training...
print ("step 2: training...")
C = 0.6
toler = 0.001
maxIter = 50
svmClassifier = SVM.trainSVM(train_x, train_y, C, toler, maxIter, kernelOption = ('linear', 0))

## step 3: testing
print ("step 3: testing...")
accuracy = SVM.testSVM(svmClassifier, test_x, test_y)

## step 4: show the result
print ("step 4: show the result...")    
print ('The classify accuracy is: %.3f%%' % (accuracy * 100))
SVM.showSVM(svmClassifier)

测试的数据在这里

3.542485 1.977398 -1
3.018896 2.556416 -1
7.551510 -1.580030 1
2.114999 -0.004466 -1
8.127113 1.274372 1
7.108772 -0.986906 1
8.610639 2.046708 1
2.326297 0.265213 -1
3.634009 1.730537 -1
0.341367 -0.894998 -1
3.125951 0.293251 -1
2.123252 -0.783563 -1
0.887835 -2.797792 -1
..........

现在我想把这个二维主成分的SVM改成分类五维主成分的

就是把测试的数据改成比如
3.125951 0.293251 2.123252 -0.783563 0.887835 -1
0.887835 -2.797792 3.634009 1.730537 -2.797792 -1
但还是二分类，
请问源程序代码和测试代码应该改哪里？

写回答
好问题 0 提建议
追加酬金
关注问题
分享
邀请回答
编辑收藏删除
收藏举报

报告相同问题？

关注问题

python 具有多个返回值，只用一个参数去接受，为什么会出错 python 有问必答
2021-08-24 11:14

回答 5 已采纳星号表达式(*expressoin)不可单独使用可以先执行完函数再赋值
使用SVM对手写体数字图片分类，python报错 python 机器学习
2022-11-11 15:16

回答 1 已采纳参考：使用svm对手写体数字图片进行分类_Walt_像道光的博客-CSDN博客 import pandas as pdfrom sklea
用python做SVM模型的分类结果，该怎么做误差分析呀 python 分类有问必答机器学习
2022-11-20 22:23

回答 1 已采纳你好，我是有问必答小助手，非常抱歉，本次您提出的有问必答问题，技术专家团超时未为您做出解答本次提问扣除的有问必答次数，已经为您补发到账户，我们后续会持续优化，扩大我们的服务范围，为您带来更好地服务。
简明KPCA及其python实现（核主成分分析）
2020-04-07 22:33

ZachhhBweg的博客文章目录KPCAKPCA， PCA与...核主成分分析-kernel principal component analysis，是一种用于非线性分类的降维工具，实现非线性映射降维右图维典型的非线性分类问题 KPCA， PCA与LDA PCA：主要用于线性非监督学习...
修改svm识别手写数字 jupyter python 支持向量机
2021-12-21 22:25

回答 4 已采纳你试试这个呢 import os import cv2 import matplotlib.pyplot as plt import numpy as np from sklearn import
pyqt+Python两个类之间传值 python
2020-01-10 21:30

回答 1 已采纳 https://blog.csdn.net/u014041346/article/details/83659937
请问一下这个错误应该怎么修改呢？ python windows 有问必答机器学习
2021-06-03 21:30

回答 5 已采纳 No such file or directory: '\\svm_model.pkl' 检查一下你的model是否存在呢训练集不存在！！！！
python主成分分析法降维_机器学习之路：python 特征降维主成分分析 PCA
2021-01-14 22:36

梨漾的博客 1 from sklearn.svm importLinearSVC2 from sklearn.metrics importclassification_report3 from sklearn.decomposition importPCA4 importpandas as pd5 importnumpy as np6 ‘‘‘7 主成分分析：8 特征降低维度的...
svm的这个问题的求解怎么实现啊 python
2023-03-31 02:08

回答 1 已采纳这有个类似的问题, 你可以参考下: https://ask.csdn.net/questions/1056427这篇博客也不错, 你可以看下SVM做多标签回归时遇到的问题及相关解决方式和解释说明除此之
python绘制的ROC曲线与结果不符 python 分类机器学习
2023-03-16 21:13

回答 9 已采纳该回答引用chatGpt根据您提供的信息，所有评价指标都为1，但是ROC曲线不符合预期，这表明评价指标的结果可能不正确。有以下几个问题需要检查和调整：对于多类别问题，需要使用label_binari
为什么同样的数据，用随机森林跑了几次只需要一个小时，换成svm却跑了十几个小时还没训练完成呢。 python 有问必答机器学习
2022-03-28 10:12

回答 3 已采纳 SVM这种使用核技巧的决策函数计算成本关于训练样本数目是线性的，在选择多项式核、次数一多时效果尤为明显，根本算不出来相比决策树的计算复杂度就没那么高
python 3d pca_python机器学习——主成分分析PCA实现
2021-03-04 09:25

weixin_39599166的博客主成分分析PCA实现一、数据降维二、应用“手写体数字图像”数据进行PCA操作参考文献：一、数据降维降维/压缩问题是选取具有代表性的特征，在保持数据多样性( Variance )的基础上，规避掉大量的特征冗余和噪声，不过...
python支持向量机为什么输出结果会变来变去？ python 人工智能有问必答机器学习
2021-06-06 01:55

回答 2 已采纳你可以设置一下random state这个参数，划分数据集的时候也可以设置一下，这样每次得到的结果就是一样的了，有用的话麻烦给个采纳，谢谢
PCA主成分分析算法专题【Python机器学习系列（十五）】
2022-09-18 16:00

侯小啾的博客 PCA主成分分析算法专题【Python机器学习系列（十五）】文章目录 1. PCA简介 1.2 python 实现鸢尾花数据集PCA降维 1.3 sklearn库实现鸢尾花数据集PCA降维案例
支持向量机SVM代码详解——多分类/降维可视化/参数优化【python】
2023-07-08 19:44

lichensun的博客主要介绍数学建模以及大数据比赛中常用的SVM支持向量机模型算法，并使用python实现实例二分类、多分类、可视化以及参数优化。
没有解决我的问题, 去提问

悬赏问题

¥100 Jenkins自动化部署—悬赏100元
¥15 关于#python#的问题：求帮写python代码
¥20 MATLAB画图图形出现上下震荡的线条
¥15 关于#windows#的问题：怎么用WIN 11系统的电脑克隆WIN NT3.51-4.0系统的硬盘
¥15 perl MISA分析p3_in脚本出错
¥15 k8s部署jupyterlab，jupyterlab保存不了文件
¥15 ubuntu虚拟机打包apk错误
¥199 rust编程架构设计的方案有偿
¥15 回答4f系统的像差计算
¥15 java如何提取出pdf里的文字？

码龄粉丝数原力等级 --

求帮忙看一个python的SVM程序改主成分维度改哪里

源程序在这里

测试代码在这里

测试的数据在这里

现在我想把这个二维主成分的SVM改成分类五维主成分的

0条回答默认最新

悬赏问题

求帮忙看一个python的SVM程序改主成分维度改哪里

源程序在这里

测试代码在这里

测试的数据在这里

现在我想把这个二维主成分的SVM改成分类五维主成分的

0条回答 默认 最新

悬赏问题

0条回答默认最新