我的数据经筛选后是96行7列,神经网络建模的时候怎么寻找最佳参数?
现在建模的效果很差很差,怎么进行调整
数据概况,第一列为y,601个特征筛选后还有7个特征用于神经网络建模
#MSC处理后进行特征波段筛选
###1.数据读入
#导入pandas读取数据
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
#读取数据
data = pd.read_excel("./data/XZQ集装箱3-1MSC.xlsx")
#print(data[:5]) #前五行
###2.数据分离
#m * n
#print("数据矩阵 data.shape:",data.shape)
#50个样本,600个波段第一列是桃子糖度值,需要分离开
X = data.values[:,1:] #切片,得x,行全要,列从第二列开始
y = data.values[:,0] #切片,得y,行全要,列拿第一列
#print(f"X.shape:{X.shape}, y.shape:{y.shape}")
###3.导入SPA
#导入SPA包
import SPA
#导入spa对象
spa = SPA.SPA()
#print(X[1,:5]) #随便拿取数据看看效果,X得第二行前五列数据
#print(X_[1,:5])
#建模集测试集分割
#若存在运行后出现波段选择为最小值可适当调整建模集与测试集比例,test_size值0.3 - 0.5
Xcal, Xval, ycal, yval = train_test_split(X, y, test_size=0.3, random_state=0)
#print(Xcal.shape,Xval.shape)
###4.建模筛选
#m_max默认为50(Xcal样本大于52),如果Xcal(m*n) m < 50 m_max=m-2
var_sel, var_sel_phase2 = spa.spa(
Xcal, ycal, m_min=2, m_max=28,Xval=Xval, yval=yval, autoscaling=1)
###5.导出波段
#导出筛选光谱波段
#spa返回的是列号,并不是光谱数据
#获取波段列表
absorbances = data.columns.values[1:]
#print("波段(前5个)",absorbances[:5])
#spa筛选出的波段
print("spa 筛选出的波段:",absorbances[var_sel])
#导出筛选波段光谱数据
X_select = X[:,var_sel]
print("X_select.shape:",X_select.shape)
#print(X_select)
#建立神经网络预测模型
#划分训练测试集
train_x,test_x,train_y,test_y = train_test_split(X_select,y,test_size=0.3, random_state=1)
#没有标准化
mlpr1 = MLPRegressor(solver='sgd',learning_rate="adaptive",max_iter=1000,alpha=1e-5, hidden_layer_sizes=(5,2), random_state=1)
mlpr1.fit(train_x,train_y)
#计算在训练集和测试集上的预测均方根误差
mlpr1_lab = mlpr1.predict(train_x)
mlpr1_pre = mlpr1.predict(test_x)
print("训练集上的均方根误差:",metrics.mean_squared_error(train_y,mlpr1_lab))
print("测试集上的均方根误差:",metrics.mean_squared_error(test_y,mlpr1_pre))
print(metrics.r2_score(test_y,mlpr1_pre))
#有标准化
std = StandardScaler()
train_x_s = std.fit_transform(train_x)
test_x_s = std.transform(test_x)
mlpr2 = MLPRegressor(solver='sgd',learning_rate="adaptive",max_iter=1000,alpha=1e-5, hidden_layer_sizes=(5,2), random_state=1)
mlpr2.fit(train_x_s,train_y)
#计算均方根误差
mlpr2_lab = mlpr2.predict(train_x_s)
mlpr2_pre = mlpr2.predict(test_x_s)
print("训练集上的均方根误差:",metrics.mean_squared_error(train_y,mlpr2_lab))
print("测试集上的均方根误差:",metrics.mean_squared_error(test_y,mlpr2_pre))
print(metrics.r2_score(test_y,mlpr2_pre))
目前的效果