David_aik 2022-08-03 19:10 采纳率: 100%
浏览 59
已结题

Mnist两层神经网络梯度一直为零

最近开始学习机器学习,在编写简单的二层神经网络的过程中,发现损失函数一直居高不下,然后看了一下损失函数的梯度,发现梯度一直都是零。
我在途中试着改变损失函数,将cross entropy error函数换成mean_squared_error函数;又试着改变激活函数:sigmoid函数,Relu函数,softmax函数都试着用过了;然后又试着改变训练次数,mini_batch的大小,学习率,可惜都无济于事,我希望能够找到问题所在。以下是我的完整代码。希望有大佬能指点一下迷津。

from keras.datasets import mnist
import keras
import numpy as np
from PIL import Image
import matplotlib.pylab as plt


# 显示图形
def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()


# 2层神经网络的类
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['w1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['w2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

    def predict(self, x):  # x=输入
        w1, w2 = self.params['w1'], self.params['w2']#权重
        b1, b2 = self.params['b1'], self.params['b2']#偏移
        a1 = np.dot(x, w1) + b1
        z1 = sigmoid(a1)#第一层输出
        a2 = np.dot(z1, w2) + b2
        z2 = softmax(a2)#第二层输出
        return z2

    def loss(self, x, t):  # x=输入,t=监督数据
        y = self.predict(x)
        return cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_w = lambda w: self.loss(x, t)#损失函数
        grads = {
            'w1': numerical_gradient(loss_w, self.params['w1']),
            'b1': numerical_gradient(loss_w, self.params['b1']),
            'w2': numerical_gradient(loss_w, self.params['w2']),
            'b2': numerical_gradient(loss_w, self.params['b2'])
        }
        return grads


# 梯度函数
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    for idx in range(x.shape[0]):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)
        x[idx] = tmp_val - h
        fxh2 = f(x)
        grad[idx] = (fxh1 - fxh2) / (2 * h)#求梯度
        x[idx] = tmp_val#还原x
    return grad


# 误差函数cross entropy error
def cross_entropy_error(y, t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))


# softmax函数
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)  # 防止溢出
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y


# sigmoid函数
def sigmoid(a):
    out = a.copy()
    sel = ((a > 100) & (a < -100))
    out = 1 / (1 + np.exp(-a))#sigmoid计算
    out[sel] = 1 / (1 + np.exp(-100))#防止指数爆炸
    return out


# 验证集转one-hot
(x_train, y_train), (x_test, y_test) = mnist.load_data()
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

# 数据改为60000*784浮点格式
x_train = x_train.reshape(x_train.shape[0], 784).astype('float')

train_loss_list = []

# 参数初始化
iters_num = 100  # 循环次数
train_size = x_train.shape[0]  # 总数据量
batch_size = 32  # 每次取出的数据量
learning_rate = 0.1  # 学习率
network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)  # 创建对象

for i in range(iters_num):
    # 获取mini_batch
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    y_batch = y_train[batch_mask]

    # 计算梯度
    grad = network.numerical_gradient(x_batch, y_batch)
    # 更新参数
    for key in ('w1', 'b1', 'w2', 'b2'):
        network.params[key] = network.params[key] - learning_rate * grad[key]
    # 损失量
    loss = network.loss(x_batch, y_batch)
    train_loss_list.append(loss)

# 损失量图像
x = np.arange(0, iters_num / 10, 0.1)
y = np.array(train_loss_list)
plt.plot(x, y)
plt.show()

  • 写回答

2条回答 默认 最新

  • herosunly Python领域优质创作者 2022-08-04 13:45
    关注

    导包的代码我省略了,里面主要修改的是数据需要归一化、权重系统weight_init_std=0.01修改为了1、学习率降低(修改为0.05)。如果想做的更好一些,可以增加dropout

    # 显示图形
    def img_show(img):
        pil_img = Image.fromarray(np.uint8(img))
        pil_img.show()
     
     
    # 2层神经网络的类
    class TwoLayerNet:
        def __init__(self, input_size, hidden_size, output_size, weight_init_std=1):
            self.params = {}
            self.params['w1'] = weight_init_std * np.random.randn(input_size, hidden_size)
            self.params['b1'] = np.zeros(hidden_size)
            self.params['w2'] = weight_init_std * np.random.randn(hidden_size, output_size)
            self.params['b2'] = np.zeros(output_size)
     
        def predict(self, x):  # x=输入
            w1, w2 = self.params['w1'], self.params['w2']#权重
            b1, b2 = self.params['b1'], self.params['b2']#偏移
            a1 = np.dot(x, w1) + b1
            z1 = sigmoid(a1)#第一层输出
            a2 = np.dot(z1, w2) + b2
            z2 = softmax(a2)#第二层输出
            return z2
     
        def loss(self, x, t):  # x=输入,t=监督数据
            y = self.predict(x)
            return cross_entropy_error(y, t)
     
        def accuracy(self, x, t):
            y = self.predict(x)
            y = np.argmax(y, axis=1)
            t = np.argmax(t, axis=1)
            accuracy = np.sum(y == t) / float(x.shape[0])
            return accuracy
     
        def numerical_gradient(self, x, t):
            loss_w = lambda w: self.loss(x, t)#损失函数
            grads = {
                'w1': numerical_gradient(loss_w, self.params['w1']),
                'b1': numerical_gradient(loss_w, self.params['b1']),
                'w2': numerical_gradient(loss_w, self.params['w2']),
                'b2': numerical_gradient(loss_w, self.params['b2'])
            }
            return grads
     
     
    # 梯度函数
    def numerical_gradient(f, x):
        h = 1e-4
        grad = np.zeros_like(x)
        for idx in range(x.shape[0]):
            tmp_val = x[idx]
            x[idx] = tmp_val + h
            fxh1 = f(x)
            x[idx] = tmp_val - h
            fxh2 = f(x)
            grad[idx] = (fxh1 - fxh2) / (2 * h)#求梯度
            x[idx] = tmp_val#还原x
        return grad
     
     
    # 误差函数cross entropy error
    def cross_entropy_error(y, t):
        delta = 1e-7
        return -np.sum(t * np.log(y + delta))
     
     
    # softmax函数
    def softmax(a):
        c = np.max(a)
        exp_a = np.exp(a - c)  # 防止溢出
        sum_exp_a = np.sum(exp_a)
        y = exp_a / sum_exp_a
        return y
     
     
    # sigmoid函数
    def sigmoid(a):
        out = a.copy()
        sel = ((a > 100) & (a < -100))
        out = 1 / (1 + np.exp(-a))#sigmoid计算
        out[sel] = 1 / (1 + np.exp(-100))#防止指数爆炸
        return out
     
     
    # 验证集转one-hot
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    y_train = keras.utils.to_categorical(y_train)
    y_test = keras.utils.to_categorical(y_test)
     
    # 数据改为60000*784浮点格式
    x_train = x_train.reshape(x_train.shape[0], 784).astype('float')
    x_train = x_train / 255.0
     
    train_loss_list = []
     
    # 参数初始化
    iters_num = 100  # 循环次数
    train_size = x_train.shape[0]  # 总数据量
    batch_size = 32  # 每次取出的数据量
    learning_rate = 0.05  # 学习率
    network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)  # 创建对象
     
    for i in range(iters_num):
        # 获取mini_batch
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        y_batch = y_train[batch_mask]
     
        # 计算梯度
        grad = network.numerical_gradient(x_batch, y_batch)
        # 更新参数
        for key in ('w1', 'b1', 'w2', 'b2'):
            network.params[key] = network.params[key] - learning_rate * grad[key]
        # 损失量
        loss = network.loss(x_batch, y_batch)
        train_loss_list.append(loss)
     
    # 损失量图像
    x = np.arange(0, iters_num / 10, 0.1)
    y = np.array(train_loss_list)
    plt.plot(x, y)
    plt.show()
    

    img

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

问题事件

  • 系统已结题 8月12日
  • 已采纳回答 8月4日
  • 赞助了问题酬金6元 8月3日
  • 创建了问题 8月3日

悬赏问题

  • ¥15 远程桌面文档内容复制粘贴,格式会变化
  • ¥15 关于#java#的问题:找一份能快速看完mooc视频的代码
  • ¥15 这种微信登录授权 谁可以做啊
  • ¥15 请问我该如何添加自己的数据去运行蚁群算法代码
  • ¥20 用HslCommunication 连接欧姆龙 plc有时会连接失败。报异常为“未知错误”
  • ¥15 网络设备配置与管理这个该怎么弄
  • ¥20 机器学习能否像多层线性模型一样处理嵌套数据
  • ¥20 西门子S7-Graph,S7-300,梯形图
  • ¥50 用易语言http 访问不了网页
  • ¥50 safari浏览器fetch提交数据后数据丢失问题