最近在学习用kerasd搭建神经网络,有一个简单的二输出二输入函数拟合问题:x1,x2在[-2,2]之间,y1是x1*exp(-x1^2-x2^2),y2是cos(x1^2+x2^2)/x2。结果就遇到了些问题,最后的模型预测结果直接是一个平面,找了好久也看不出来哪里的问题。训练数据集做了归一化处理,到[-1 1]之间。这个真的困扰了我好久,给同学看也没看出来哪里的问题。完整代码在后面,有多个绘图部分,都是检测数据集有效性用的。
我初学机器学习,这个问题本身难度不应该算大。希望同仁可以帮帮我,让我过了这个坎。(我可以私发我的训练集,其实很简单,但不知道怎么加上来)
训练集画出来的散点图 vs 模型测试结果画出来的散点图
代码中的模型本体部分
##load the dataset
dataset=loadtxt('Q2.csv',delimiter=',',dtype='float')
##split into input (x) and output(y) variables
x=dataset[0:2,:].T
y=dataset[2:4,:].T
(trainx,testx,trainy,testy)=train_test_split(x,y,test_size=0.05)
print(trainx)
epoch=999
l_r0=1e-2
##define the keras model
model = Sequential()
regularizer=regularizers.l2(0.01)
initializer=initializers.TruncatedNormal(mean=0.0,stddev=0.5)
input_layer = Dense(units=160, kernel_regularizer=regularizer,input_shape=(2,),kernel_initializer=initializer,activation='tanh',
bias_initializer=initializers.Zeros(),name='input_layer')
model.add(input_layer)
model.add(Dropout(0.5))
output_layer = Dense(units=2,kernel_regularizer=regularizer,kernel_initializer=initializer,activation='relu',
bias_initializer=initializers.Zeros(),name='output_layer')
model.add(output_layer)
model.summary()
##compile the keras model
opt = tf.keras.optimizers.Adam(learning_rate=l_r0)
def root_mean_squared_error(y_true, y_pred):
return k.sqrt(k.mean(k.square(y_pred - y_true)))
model.compile(loss=root_mean_squared_error, optimizer=opt,metrics=['accuracy'])
#fit the keras model on the dataset
H=model.fit(trainx,trainy,validation_data=(testx,testy),epochs=epoch,batch_size=32)
完整代码
from numpy import loadtxt
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from tensorflow.keras.optimizers import SGD
import keras.backend as k
import tensorflow as tf
from keras import initializers
from keras import regularizers
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
##load the dataset
dataset=loadtxt('Q2.csv',delimiter=',',dtype='float')
##split into input (x) and output(y) variables
x=dataset[0:2,:].T
y=dataset[2:4,:].T
(trainx,testx,trainy,testy)=train_test_split(x,y,test_size=0.05)
print(trainx)
epoch=999
l_r0=1e-2
#opt=SGD(learning_rate=l_r0)
#training set plot
trainy1=tf.slice(trainy,[0,0],[972,1])
trainy2=tf.slice(trainy,[0,1],[972,1])
trainx1=tf.slice(trainx,[0,0],[972,1])
trainx2=tf.slice(trainx,[0,1],[972,1])
testy1=tf.slice(testy,[0,0],[52,1])
testx1=tf.slice(testx,[0,0],[52,1])
testx2=tf.slice(testx,[0,1],[52,1])
testy2=tf.slice(testy,[0,1],[52,1])
figure=plt.figure(1)
ax = Axes3D(figure)
ax.scatter(trainx1,trainx2,trainy1,s=20)
plt.show()
figure=plt.figure(2)
ax = Axes3D(figure)
ax.scatter(trainx1,trainx2,trainy2,s=20)
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
ax.set_zlim(-1, 1)
plt.show()
figure=plt.figure(3)
ax = Axes3D(figure)
ax.scatter(testx1,testx2,testy1,s=20)
plt.show()
figure=plt.figure(3)
ax = Axes3D(figure)
ax.scatter(testx1,testx2,testy2,s=20)
plt.show()
##define the keras model
model = Sequential()
regularizer=regularizers.l2(0.01)
initializer=initializers.TruncatedNormal(mean=0.0,stddev=0.5)
input_layer = Dense(units=160, kernel_regularizer=regularizer,input_shape=(2,),kernel_initializer=initializer,activation='tanh',
bias_initializer=initializers.Zeros(),name='input_layer')
model.add(input_layer)
model.add(Dropout(0.5))
output_layer = Dense(units=2,kernel_regularizer=regularizer,kernel_initializer=initializer,activation='relu',
bias_initializer=initializers.Zeros(),name='output_layer')
model.add(output_layer)
model.summary()
##compile the keras model
opt = tf.keras.optimizers.Adam(learning_rate=l_r0)
def root_mean_squared_error(y_true, y_pred):
return k.sqrt(k.mean(k.square(y_pred - y_true)))
model.compile(loss=root_mean_squared_error, optimizer=opt,metrics=['accuracy'])
#fit the keras model on the dataset
H=model.fit(trainx,trainy,validation_data=(testx,testy),epochs=epoch,batch_size=32)
##loss graph
N=np.arange(0,epoch)
plt.style.use("ggplot")
plt.figure(1)
plt.plot(N,H.history["loss"],label="train_loss")
plt.title("train_loss")
plt.figure(2)
plt.plot(N,H.history["val_loss"],label="val_loss")
plt.title("val_loss")
plt.figure(3)
plt.plot(N,H.history["accuracy"],label="train_accuracy")
plt.title("train_accuracy")
plt.figure(4)
plt.plot(N,H.history["val_accuracy"],label="val_accuracy")
plt.title("val_accuracy")
plt.ylabel("Loss/Accuracy")
plt.legend()
#predict plot vs train set plot
predict = model.predict(x)
predicty1=tf.slice(predict,[0,0],[1024,1])
predicty2=tf.slice(predict,[0,1],[1024,1])
predictx1=tf.slice(x,[0,0],[1024,1])
predictx2=tf.slice(x,[0,1],[1024,1])
figure=plt.figure(1)
ax = Axes3D(figure)
ax.scatter(trainx1,trainx2,trainy1,s=20)
plt.show()
figure=plt.figure(2)
ax = Axes3D(figure)
ax.scatter(predictx1,predictx2,predicty1,s=20)
plt.show()
figure=plt.figure(2)
ax = Axes3D(figure)
ax.scatter(trainx1,trainx2,trainy2,s=20)
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
ax.set_zlim(-1, 1)
plt.show()
figure=plt.figure(4)
ax = Axes3D(figure)
ax.scatter(predictx1,predictx2,predicty2,s=20)
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
ax.set_zlim(-5, 5)
plt.show()