数据集是csv文件,一共十三列,十几万行,第十三列是要预测的值。
试过很多种方法(都是百度的),包括更改网络层数、 节点数,学习率……,效果都没什么提升
不知道问题出在哪里,请大神指点。
import numpy as np
import keras as ks
from keras.models import Sequential
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Activation,Dropout
x_yuan = np.loadtxt(open("shaixuandata.csv","rb"),\
usecols=(range(12)),delimiter=",",skiprows=1)
x = preprocessing.scale(x_yuan)
y = np.loadtxt(open("shaixuandata.csv","rb"),\
usecols=(12),delimiter=",",skiprows=1)
x_train, x_test, y_train, y_test = train_test_split(\
x, y, test_size=0.25, random_state=43)
model = Sequential()
model.add(Dense(units=30, input_dim=12))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Dense(units=1))
model.add(Activation('linear'))
ks.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, \
patience=10, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
sgd = ks.optimizers.SGD(lr=0.001, clipnorm=1.,decay=1e-6, momentum=0.9)
model.compile(optimizer='sgd', loss='mae', metrics=['mae'])
model.fit(x_train, y_train, batch_size=30, epochs=3, callbacks=None, \
validation_data=(x_test,y_test), shuffle=True, class_weight=None, \
sample_weight=None, initial_epoch=0)
predict = model.predict(x_test)
sum = 0
for i in range(len(y_test)):
sum = sum+(y_test[i]-predict[i])**2
mse = sum/len(y_test)
print(mse)