pig314_ 2021-08-10 12:37 采纳率: 20%
浏览 944

神经网络loss下降到一定程度后不下降?

以下是loss变化图,迭代10000次

img

img

代码如下
我使用sigmoid作为激活函数,softmax作为输出层激活函数
数据集使用MNIST的手写数字,没有用神经网络模块
loss降到2,3就开始徘徊

from 解析idx import *
import pygame
import random
from time import sleep
from scipy import misc
import imageio
import json
import numpy as np
from numba import jit
pygame.init()
pm=pygame.display.set_mode((2000,750))

#激活函数及导数(dajl/dzjl)
def sigmoid(x):
    #print(x)
    return 1/(1+2.718**(-x/2))
def dsigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))
def tanh(x):
    return 2*sigmoid(2*x)-1
def dtanh(x):
    return 1-tanh(x)*tanh(x)

#输出层激活函数及导数(dajl/dzjl)
def softmax(a):
    c=np.max(a) #求数组中的最大值
    exp=np.exp(a-c)  #指数运算
    sum_exp=np.sum(exp) #指数求和
    y=exp/sum_exp        #softmax函数值
    return y
def softmax_loss(a,y):
    #print(a)
    mi=0
    for i in range(0,len(y)):
        if y[i]==1:
            mi=i
    return -np.log(a[mi]+0.0001)


#dC/dzjl
def dsoftmax_loss(a,y):
    return a-y


#损失函数及导数(dC/dajl)
def variance_loss(a,y):
    return np.sum((a-y)*(a-y))
def dvariance_loss(a,y):
    return 2*(a,y)
class Net():
    def __init__(self,layers,activation_function=sigmoid,d_activation_function=dsigmoid):
        self.activation_function=activation_function
        self.d_activation_function=d_activation_function
        self.learn_cent=0.005                  #最初的学习率设置
        self.learn_cent_decay=1              #学习率衰减率
        self.learn_cent_step=30                   #喂入多少轮min_batch后,更新一次学习率
        self.learn_times=0
        self.layers=layers
        self.b_learn_cent=0.005 #偏置学习率
        self.tot_cost=0
        self.cost_num=0
        self.a=np.zeros([len(self.layers),800])
        self.b=np.random.uniform(-1,1,[len(self.layers),800])
        #self.b=np.zeros([len(self.layers),800])
        self.z=np.zeros([len(self.layers),800])
        self.w=np.random.uniform(-1,1,[len(self.layers),800,800])
        #self.w=np.zeros([len(self.layers),800,800])
        self.tw=np.zeros([len(self.layers),800,800])
        self.tw_num=np.zeros([len(self.layers),800,800])
        self.tb=np.zeros([len(self.layers),800])
        self.tb_num=np.zeros([len(self.layers),800])
        self.cost=10
    def get_cost(self):
        if self.cost_num==0:
            self.cost_num=0.001
        pj_cost=self.tot_cost/self.cost_num
        self.tot_cost=0
        self.cost_num=0
        return pj_cost
    def train(self,msg,right_ans):
        self.learn_times+=1
        if self.learn_times%self.learn_cent_step==0:
            self.learn_cent*=self.learn_cent_decay
        self.a=np.zeros([len(self.layers),800])
        self.z=np.zeros([len(self.layers),800])
        ipt=np.zeros(800)
        i=-1
        for x in msg:
            i+=1
            ipt[i]=x
        self.a[0]=ipt
        
        for i in range(1,len(self.layers)-1):
            self.z[i]=np.dot(self.w[i],self.a[i-1])+self.b[i]
            self.a[i]=self.activation_function(self.z[i])
        i=len(self.layers)-1
        self.z[i]=np.dot(self.w[i],self.a[i-1])+self.b[i]
        self.a[i][0:10]=softmax(self.z[i][0:10])
        #print(self.a[-1][0:11])
        self.cost=0
        #for i in range(0,self.layers[-1]):
            #print(i)
        self.cost=softmax_loss(self.a[-1][0:self.layers[-1]],right_ans[0:self.layers[-1]])
        print(self.cost)
        self.tot_cost+=self.cost
        self.cost_num+=1
        #print(self.a[-1][0])
        file_.write(str(self.cost)+'\n')
        self.BP(right_ans)
        return self.a[-1][0:10]
    def update(self):
        for i in range(0,len(self.layers)):
            for j in range(0,self.layers[i]):
                for k in range(0,self.layers[i-1]):
                    if self.tw_num[i,k,j]!=0:
                        #print(self.tw_num[i,j,k])
                        self.w[i,k,j]+=self.tw[i,k,j]/self.tw_num[i,k,j]
                        self.tw[i,k,j]=0
                        self.tw_num[i,k,j]=0
                        
        for i in range(0,len(self.layers)):
            for j in range(0,self.layers[i]):
                if self.tb_num[i,j]!=0:
                    self.b[i,j]+=self.tb[i,j]/self.tb_num[i,j]
                    self.tb[i,j]=0
                    self.tb_num[i,j]=0
    def BP(self,right_ans):
        delta=np.zeros([len(self.layers),800])
        delta[-1][0:10]=dsoftmax_loss(self.z[-1][0:10],right_ans[0:10])#dC/dzjl
        #print(self.a[-1])
        for i in range(len(self.layers)-2,-1,-1):
            delta[i]=np.dot(self.w[i+1].T,delta[i+1])*self.d_activation_function(self.z[i])
        print(self.learn_times)
        #print(delta)
        
        self.tb-=self.b_learn_cent*delta
        self.tb_num+=1
        for i in range(1,len(self.layers)):
            for j in range(0,self.layers[i]):
                for k in range(0,self.layers[i-1]):
                    self.tw[i,k,j]-=self.learn_cent*delta[i,j]*self.a[i-1,k]
                    #print('three',self.learn_cent,delta[i,j],self.a[i-1,k])
                    self.tw_num[i,k,j]+=1
        #print(self.w[2])
        #sleep(0.3)
        #print(delta)
#print(dir(np))                                    
group=[]
data=decode_idx3_ubyte('train-images-idx3-ubyte/train-images.idx3-ubyte')
number=decode_idx1_ubyte('train-labels-idx1-ubyte/train-labels.idx1-ubyte')
for i in range(0,len(data)):
    if i%3==0:
        group.append([])
    else:
        #imageio.imwrite(str(i)+'.png',data[i])
        group[-1].append([data[i],number[i]])
file_=open('Net_.txt','w+')
q=[]
net=Net([784,16,16,10])
lines=[]
random.shuffle(group)
#print(group)
while 1:
    #print(mkt.dic['money_you_have'])
    c=-1
    for g in group:
        c+=1
        if c%30==0:
            lines.append([c/6,800-net.get_cost()*100])
            pm.fill((255,255,255))
            for l in range(0,len(lines)-1):
                pygame.draw.line(pm,(0,0,0),lines[l],lines[l+1])
            for event in pygame.event.get():
                if event.type==pygame.QUIT:
                    sys.exit()
            pygame.display.flip()
        
        for n in range(0,2):
            p=[]
            for i in g[n][0]:
                for j in i:
                    p.append(sigmoid(j))
            f=np.array([0]*800)
            f[int(g[n][1])]=1
            m=0
            mi=0
            o=net.train(p,f)
            #print(o[1])
            for i in range(0,10):
                #print(o[i])
                if o[i]>m:
                    m=o[i]
                    mi=i
            print(g[n][1],mi)
        net.update()
        


        



  • 写回答

1条回答 默认 最新

  • CSDN专家-kaily 2021-08-11 14:28
    关注

    self.learn_cent_decay=1 #学习率衰减率
    等于1的话,你的学习率真的衰减了吗?

    self.learn_cent*=self.learn_cent_decay
    学习率不一直都没变吗?

    评论

报告相同问题?

问题事件

  • 创建了问题 8月10日

悬赏问题

  • ¥20 BAPI_PR_CHANGE how to add account assignment information for service line
  • ¥500 火焰左右视图、视差(基于双目相机)
  • ¥100 set_link_state
  • ¥15 虚幻5 UE美术毛发渲染
  • ¥15 CVRP 图论 物流运输优化
  • ¥15 Tableau online 嵌入ppt失败
  • ¥100 支付宝网页转账系统不识别账号
  • ¥15 基于单片机的靶位控制系统
  • ¥15 真我手机蓝牙传输进度消息被关闭了,怎么打开?(关键词-消息通知)
  • ¥15 装 pytorch 的时候出了好多问题,遇到这种情况怎么处理?