2021-03-01 00:35

# 神经网络反向传播法梯度错误和Loss上升问题

```照着书打的代码。。。

```
``````'''对应误差反向传播法的神经网络的实现'''
import numpy as np
from collections import OrderedDict
import matplotlib.pyplot as plt

class SoftmaxWithLoss:
def __init__(self):
self.loss = None  # 损失
self.y = None  # softmax的输出
self.t = None  # 监督数据（one-hot vector）

def softmax(self, x):
x = x - np.max(x)
exp_x = np.exp(x)
softmax_x = exp_x / np.sum(exp_x)
return softmax_x

def cross_entropy_error(self, y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
batch_size = y.shape[0]
cee = -np.sum(t * np.log(y + 1e-7) / batch_size)

return cee

def forward(self, x, t):
self.t = t
self.y = self.softmax(x)
self.loss = self.cross_entropy_error(self.y, self.t)

return self.loss

def backward(self, dout=1):
batch_size = self.t.shape[0]
dx = (self.y - self.t) / batch_size

return dx

class Affine:
"""定义仿射层（矩阵乘积）"""

def __init__(self, W, b):
self.W = W
self.b = b
self.x = None
self.dW = None
self.db = None

def forward(self, x):
self.x = x
out = np.dot(x, self.W) + self.b

return out

def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.x.T, dout)
self.db = np.sum(dout, axis=0)
return dx

class Relu:
def __init(self):

def forward(self, x):
out = x.copy()

return out

def backward(self, dout):
dx = dout

return dx

class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)

# 生成层
self.layers = OrderedDict()
self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
self.layers['Relu1'] = Relu()
self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

self.lastLayer = SoftmaxWithLoss()

def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)

return x

# x:输入数据，t：监督数据
def loss(self, x, t):
y = self.predict(x)
return self.lastLayer.forward(y, t)

def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
if t.ndim != 1:
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy

# x:输入数据，t：监督数据
# forward
self.loss(x, t)

# backward
dout = 1
dout = self.lastLayer.backward(dout)

layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)

# 设定

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):

# 通过误差反向传播求梯度

# 更新
for key in ('W1', 'b1', 'W2', 'b2'):

loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)

if i % iter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print(train_acc, test_acc)

plt.plot(train_loss_list)
plt.show()``````
• 点赞
• 写回答
• 关注问题
• 收藏
• 复制链接分享
• 邀请回答