Mnist两层神经网络梯度一直为零

``````from keras.datasets import mnist
import keras
import numpy as np
from PIL import Image
import matplotlib.pylab as plt

# 显示图形
def img_show(img):
pil_img = Image.fromarray(np.uint8(img))
pil_img.show()

# 2层神经网络的类
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
self.params = {}
self.params['w1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['w2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)

def predict(self, x):  # x=输入
w1, w2 = self.params['w1'], self.params['w2']#权重
b1, b2 = self.params['b1'], self.params['b2']#偏移
a1 = np.dot(x, w1) + b1
z1 = sigmoid(a1)#第一层输出
a2 = np.dot(z1, w2) + b2
z2 = softmax(a2)#第二层输出
return z2

def loss(self, x, t):  # x=输入，t=监督数据
y = self.predict(x)
return cross_entropy_error(y, t)

def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy

loss_w = lambda w: self.loss(x, t)#损失函数
}

# 梯度函数
h = 1e-4
for idx in range(x.shape[0]):
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2 * h)#求梯度
x[idx] = tmp_val#还原x

# 误差函数cross entropy error
def cross_entropy_error(y, t):
delta = 1e-7
return -np.sum(t * np.log(y + delta))

# softmax函数
def softmax(a):
c = np.max(a)
exp_a = np.exp(a - c)  # 防止溢出
sum_exp_a = np.sum(exp_a)
y = exp_a / sum_exp_a
return y

# sigmoid函数
def sigmoid(a):
out = a.copy()
sel = ((a > 100) & (a < -100))
out = 1 / (1 + np.exp(-a))#sigmoid计算
out[sel] = 1 / (1 + np.exp(-100))#防止指数爆炸
return out

# 验证集转one-hot
(x_train, y_train), (x_test, y_test) = mnist.load_data()
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

# 数据改为60000*784浮点格式
x_train = x_train.reshape(x_train.shape[0], 784).astype('float')

train_loss_list = []

# 参数初始化
iters_num = 100  # 循环次数
train_size = x_train.shape[0]  # 总数据量
batch_size = 32  # 每次取出的数据量
learning_rate = 0.1  # 学习率
network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)  # 创建对象

for i in range(iters_num):
# 获取mini_batch

# 计算梯度
# 更新参数
for key in ('w1', 'b1', 'w2', 'b2'):
network.params[key] = network.params[key] - learning_rate * grad[key]
# 损失量
loss = network.loss(x_batch, y_batch)
train_loss_list.append(loss)

# 损失量图像
x = np.arange(0, iters_num / 10, 0.1)
y = np.array(train_loss_list)
plt.plot(x, y)
plt.show()

``````
导包的代码我省略了，里面主要修改的是数据需要归一化、权重系统weight_init_std=0.01修改为了1、学习率降低(修改为0.05)。如果想做的更好一些，可以增加dropout

``````# 显示图形
def img_show(img):
pil_img = Image.fromarray(np.uint8(img))
pil_img.show()

# 2层神经网络的类
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std=1):
self.params = {}
self.params['w1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['w2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)

def predict(self, x):  # x=输入
w1, w2 = self.params['w1'], self.params['w2']#权重
b1, b2 = self.params['b1'], self.params['b2']#偏移
a1 = np.dot(x, w1) + b1
z1 = sigmoid(a1)#第一层输出
a2 = np.dot(z1, w2) + b2
z2 = softmax(a2)#第二层输出
return z2

def loss(self, x, t):  # x=输入，t=监督数据
y = self.predict(x)
return cross_entropy_error(y, t)

def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy

loss_w = lambda w: self.loss(x, t)#损失函数
}

# 梯度函数
h = 1e-4
for idx in range(x.shape[0]):
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2 * h)#求梯度
x[idx] = tmp_val#还原x

# 误差函数cross entropy error
def cross_entropy_error(y, t):
delta = 1e-7
return -np.sum(t * np.log(y + delta))

# softmax函数
def softmax(a):
c = np.max(a)
exp_a = np.exp(a - c)  # 防止溢出
sum_exp_a = np.sum(exp_a)
y = exp_a / sum_exp_a
return y

# sigmoid函数
def sigmoid(a):
out = a.copy()
sel = ((a > 100) & (a < -100))
out = 1 / (1 + np.exp(-a))#sigmoid计算
out[sel] = 1 / (1 + np.exp(-100))#防止指数爆炸
return out

# 验证集转one-hot
(x_train, y_train), (x_test, y_test) = mnist.load_data()
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

# 数据改为60000*784浮点格式
x_train = x_train.reshape(x_train.shape[0], 784).astype('float')
x_train = x_train / 255.0

train_loss_list = []

# 参数初始化
iters_num = 100  # 循环次数
train_size = x_train.shape[0]  # 总数据量
batch_size = 32  # 每次取出的数据量
learning_rate = 0.05  # 学习率
network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)  # 创建对象

for i in range(iters_num):
# 获取mini_batch

# 计算梯度
# 更新参数
for key in ('w1', 'b1', 'w2', 'b2'):
network.params[key] = network.params[key] - learning_rate * grad[key]
# 损失量
loss = network.loss(x_batch, y_batch)
train_loss_list.append(loss)

# 损失量图像
x = np.arange(0, iters_num / 10, 0.1)
y = np.array(train_loss_list)
plt.plot(x, y)
plt.show()
``````

