最近在复习python数据建模。请问交叉验证cross_val_score的scoring参数获得的r2得分是训练集还是测试集的分数?我看讲解就说是模型的得分。
还有一个就是GridSearchCV网格搜索后获得模型最优分数,同上,是训练集分数还是测试集分数?代码附在下方。有帮助必采纳!
1.cross_val_score
#交叉验证01⽰例:K折叠交叉验证
import numpy as np
import sklearn.model_selection as ms
import sklearn.naive_bayes as nb
x, y = [], [] #输⼊,输出
#读取数据⽂件
with open("D:\\pythonProject3\\learning\\resource\\multiple1.txt", "r") as f:
for line in f.readlines():
data = [float(s) for s in line.split(",")]
x.append(data[:-1]) #输⼊样本:取从第⼀列到导数第⼆列
y.append(data[-1]) #输出样本:取最后⼀列
train_x = np.array(x)
train_y = np.array(y, dtype=int)
#划分训练集和测试集
#train_x, test_x, train_y, test_y = ms.train_test_split(x, y, test_size=0.25, random_state=7)
#创建⾼斯朴素⻉叶斯分类器对象
model = nb.GaussianNB()
#先做交叉验证,如果得分结果可以接受,再执⾏训练和预测
pws = ms.cross_val_score(model, x, y,
cv=5, #折叠数量
scoring='precision_weighted') #查准率
print("precision:", pws.mean())
rws = ms.cross_val_score(model, x, y, cv=5,
scoring='recall_weighted') #召回率
print("recall:", rws.mean())
f1s = ms.cross_val_score(model, x, y, cv=5,
scoring='f1_weighted') #F1得分
print("f1:", f1s.mean())
acc = ms.cross_val_score(model, x, y,cv=5,
scoring='accuracy') #准确率
print("acc:", acc.mean())
2.GridSearchCV
#⽹格搜索⽰例
import numpy as np
import sklearn.model_selection as ms
import sklearn.svm as svm
x, y = [], []
with open("D:\\pythonProject3\\learning\\resource\\multiple2.txt", "r") as f:
for line in f.readlines():
data = [float(s) for s in line.split(",")]
x.append(data[:-1]) #输⼊
y.append(data[-1]) #输出
x = np.array(x)
y = np.array(y, dtype=int)
#通过⽹格搜索确定最优参数组合
#定义参数字典
params = [
{"kernel": ["linear"],
"C": [1, 10, 100, 1000]
},
{"kernel": ["poly"],
"C": [1],
"degree": [2, 3]
},
{"kernel": ["rbf"],
"C": [1, 10, 100, 1000],
"gamma": [1, 0.1, 0.01, 0.001]
}
]
model = ms.GridSearchCV(svm.SVC(), params, cv=5) #创建⽹格搜索对象
model.fit(x, y) #训练
print("best_score_:", model.best_score_) #最优分数
print("best_params_:\n", model.best_params_) #最优参数组合