问题
使用pytorch搭建了一个三层的神经网络用来拟合数据做预测。一共有1000个样本,四个输入一个输出,但是不管怎么调整都不收敛。
代码
#深度神经网络拟合数据
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.utils.data
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
#训练
def train_loop(dataloader, model, loss_fn, optimizer, train_loss):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
pred = model(X)
y = torch.reshape(y,(100,1))
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss.append(loss)
print('train_loss batch%s:'%batch,loss)
#测试
def test_loop(dataloader, model, loss_fn, test_loss):
size = len(dataloader.dataset)
num_batches = len(dataloader)
with torch.no_grad():
for X, y in dataloader:
y = torch.reshape(y,(-1, 1))
pred = model(X)
loss = loss_fn(pred,y)
test_loss.append(loss)
#显示损失:
#print('test_loss:',loss)
if __name__ == '__main__':
#输入4个参数,输出1个s参数
path = r'total_data.csv'
input_dim = 4
hidden_dim = 8
output_dim = 1
#模型:两个隐藏层
#模型:两个隐藏层
ANN_model = nn.Sequential( nn.Linear(input_dim,hidden_dim), nn.ELU(),
nn.Linear(hidden_dim,hidden_dim),nn.ELU(),
nn.Linear(hidden_dim, hidden_dim), nn.ELU(),
nn.Linear(hidden_dim, output_dim))
data = pd.read_csv(path,index_col=False,header=None,dtype=np.float32)
data_total = np.array(data)
#划分数据
data_input = data_total[:,1:5]
data_output = data_total[:,5]
train_input, test_input, train_output, test_output = train_test_split(data_input,data_output,test_size=0.2)
#转化为tensor
train_input = torch.from_numpy(train_input).type(torch.float32)
train_output = torch.from_numpy(train_output).type(torch.float32)
test_input = torch.from_numpy(test_input).type(torch.float32)
test_output = torch.from_numpy(test_output).type(torch.float32)
#进行归一化处理
train_input = torch.nn.functional.normalize(train_input,p=2,dim=1)
test_input = torch.nn.functional.normalize(test_input,p=2,dim=1)
# batch size和迭代次数
batch_size = 100
epochs = 200
#合并
train = torch.utils.data.TensorDataset(train_input, train_output)
test = torch.utils.data.TensorDataset(test_input, test_output)
#转化为dataloader
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=20, shuffle=True)
loss_func = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.Adam(ANN_model.parameters(), lr=learning_rate)
test_loss = []
train_loss = []
for i in range(epochs):
train_loop(train_loader, ANN_model, loss_func, optimizer, train_loss)
test_loop(test_loader, ANN_model, loss_func, test_loss)
print('---------------------------------')
print('epoch:%s'%i)
print("Done!")
#绘图
px = np.array(range(len(test_loss)))
ptest_y = np.array(test_loss)
plt.plot(px,ptest_y,color='green')
plt.show()
运行结果
每个epoch结束跑一次测试集,结果如下:
我的解答思路和尝试过的方法
我一开始猜测是不是没有对四个输入参数进行归一化导致梯度消失,然后进行了归一化,没有改善网络的表现,然后又进行了下面的几种方法进行修改:
1、增加batch size:将batch size从30慢慢增加到100,学习率都为0.01
2、增加隐藏层节点:将隐藏层层数增加到七层,每层神经元循序渐进增加,为[8 16 32 64 32 16 8]
3、改变激活函数:一开始使用的是softmax(),Relu、Erelu、LeakyReLU、Tanh;
4、将数据进行归一化
使用了上述的方法还是不收敛
现在在尝试增加dropout layer
我想要达到的结果
将误差MSE降低到零点几这种数量级