使用R语言中蘑菇数据运行KNN时,最后预测准确率一直为1,能帮我看看是为什么吗,非常感谢。下面是我的代码
library(cba)
data(Mushroom)
Mushroom
str(Mushroom)#查看基本信息
dim(Mushroom)
Mushroom$`veil-type`<- NULL
Mushroom<- na.omit(Mushroom)
sum(is.na(Mushroom))
dim(Mushroom)
#定义因子
Mushroom$class <- as.factor(Mushroom$class)
for (i in 2:ncol(Mushroom)) {
Mushroom[, i] <- as.numeric(factor(Mushroom[, i]))
}#转换为数值型
Mushroom
str(Mushroom)
###标准化
Mushroom[,-1] <- scale(Mushroom[,-1])
Mushroom
# 划分训练集和测试集
set.seed(123)
train_index <- sample(1:nrow(Mushroom),size=nrow(Mushroom)*0.8,replace=F)
train<- Mushroom[train_index, ]
test<- Mushroom[-train_index, ]
dim(train)
dim(test)
train
# 运行KNN算法进行分类
library(class)
knn_pred <- knn(train = train[, -1], test = test[, -1], cl = train$class, k = 5)
# 计算预测准确率
sum(knn_pred == test[,1]) /dim(test)[1]
#交叉表展示
library(gmodels)
CrossTable(x=test[,1],y=knn_pred,prop.chisq = F)
#结果
> sum(knn_pred == test[,1]) /dim(test)[1]
[1] 1
##交叉表
| knn_pred
test[, 1] | edible | poisonous | Row Total |
-------------|-----------|-----------|-----------|
edible | 669 | 0 | 669 |
| 1.000 | 0.000 | 0.593 |
| 1.000 | 0.000 | |
| 0.593 | 0.000 | |
-------------|-----------|-----------|-----------|
poisonous | 0 | 460 | 460 |
| 0.000 | 1.000 | 0.407 |
| 0.000 | 1.000 | |
| 0.000 | 0.407 | |
-------------|-----------|-----------|-----------|
Column Total | 669 | 460 | 1129 |
| 0.593 | 0.407 | |
-------------|-----------|-----------|-----------|