最近在学习随机森林做多分类,做的是多类型的设备识别,代码是自己摸索着写的,求帮我看看代码有问题吗?有没有什么错误的地方?
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pickle
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
input_directory = "IoT_Labels_Extend"
all_files = sorted(os.listdir(input_directory))
counter = 1
data = None
labels = None
for i, item in enumerate(all_files, start=1):
print(f"{i}: {item[:-4]}")
for item in all_files:
if counter == 1:
# 加载目标设备
data = np.genfromtxt(os.path.join(input_directory, item), delimiter='\t')
labels = [counter] * (data.shape[0])
else:
temp_data = np.genfromtxt(os.path.join(input_directory, item), delimiter='\t')
temp_labels = [counter] * (temp_data.shape[0])
# 合并训练集
data = np.concatenate((data, temp_data), axis=0)
# 合并标签
labels.extend(temp_labels)
counter = counter + 1
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42)
# 一百棵树,并且计算袋外准确度
rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', oob_score=True)
# 训练
rf.fit(x_train, y_train)
# 使用测试集作为输入,进行预测
predicted = rf.predict(x_test)
print(predicted)
# 根据标签计算准确率
accuracy = accuracy_score(y_test, predicted)
# 输出袋外准确度
print(rf.oob_score_)
# 输出预测准确率
print(accuracy)
# 打印分类报告
print(classification_report(y_test, predicted))