以下是loss变化图,迭代10000次
代码如下
我使用sigmoid作为激活函数,softmax作为输出层激活函数
数据集使用MNIST的手写数字,没有用神经网络模块
loss降到2,3就开始徘徊
from 解析idx import *
import pygame
import random
from time import sleep
from scipy import misc
import imageio
import json
import numpy as np
from numba import jit
pygame.init()
pm=pygame.display.set_mode((2000,750))
#激活函数及导数(dajl/dzjl)
def sigmoid(x):
#print(x)
return 1/(1+2.718**(-x/2))
def dsigmoid(x):
return sigmoid(x)*(1-sigmoid(x))
def tanh(x):
return 2*sigmoid(2*x)-1
def dtanh(x):
return 1-tanh(x)*tanh(x)
#输出层激活函数及导数(dajl/dzjl)
def softmax(a):
c=np.max(a) #求数组中的最大值
exp=np.exp(a-c) #指数运算
sum_exp=np.sum(exp) #指数求和
y=exp/sum_exp #softmax函数值
return y
def softmax_loss(a,y):
#print(a)
mi=0
for i in range(0,len(y)):
if y[i]==1:
mi=i
return -np.log(a[mi]+0.0001)
#dC/dzjl
def dsoftmax_loss(a,y):
return a-y
#损失函数及导数(dC/dajl)
def variance_loss(a,y):
return np.sum((a-y)*(a-y))
def dvariance_loss(a,y):
return 2*(a,y)
class Net():
def __init__(self,layers,activation_function=sigmoid,d_activation_function=dsigmoid):
self.activation_function=activation_function
self.d_activation_function=d_activation_function
self.learn_cent=0.005 #最初的学习率设置
self.learn_cent_decay=1 #学习率衰减率
self.learn_cent_step=30 #喂入多少轮min_batch后,更新一次学习率
self.learn_times=0
self.layers=layers
self.b_learn_cent=0.005 #偏置学习率
self.tot_cost=0
self.cost_num=0
self.a=np.zeros([len(self.layers),800])
self.b=np.random.uniform(-1,1,[len(self.layers),800])
#self.b=np.zeros([len(self.layers),800])
self.z=np.zeros([len(self.layers),800])
self.w=np.random.uniform(-1,1,[len(self.layers),800,800])
#self.w=np.zeros([len(self.layers),800,800])
self.tw=np.zeros([len(self.layers),800,800])
self.tw_num=np.zeros([len(self.layers),800,800])
self.tb=np.zeros([len(self.layers),800])
self.tb_num=np.zeros([len(self.layers),800])
self.cost=10
def get_cost(self):
if self.cost_num==0:
self.cost_num=0.001
pj_cost=self.tot_cost/self.cost_num
self.tot_cost=0
self.cost_num=0
return pj_cost
def train(self,msg,right_ans):
self.learn_times+=1
if self.learn_times%self.learn_cent_step==0:
self.learn_cent*=self.learn_cent_decay
self.a=np.zeros([len(self.layers),800])
self.z=np.zeros([len(self.layers),800])
ipt=np.zeros(800)
i=-1
for x in msg:
i+=1
ipt[i]=x
self.a[0]=ipt
for i in range(1,len(self.layers)-1):
self.z[i]=np.dot(self.w[i],self.a[i-1])+self.b[i]
self.a[i]=self.activation_function(self.z[i])
i=len(self.layers)-1
self.z[i]=np.dot(self.w[i],self.a[i-1])+self.b[i]
self.a[i][0:10]=softmax(self.z[i][0:10])
#print(self.a[-1][0:11])
self.cost=0
#for i in range(0,self.layers[-1]):
#print(i)
self.cost=softmax_loss(self.a[-1][0:self.layers[-1]],right_ans[0:self.layers[-1]])
print(self.cost)
self.tot_cost+=self.cost
self.cost_num+=1
#print(self.a[-1][0])
file_.write(str(self.cost)+'\n')
self.BP(right_ans)
return self.a[-1][0:10]
def update(self):
for i in range(0,len(self.layers)):
for j in range(0,self.layers[i]):
for k in range(0,self.layers[i-1]):
if self.tw_num[i,k,j]!=0:
#print(self.tw_num[i,j,k])
self.w[i,k,j]+=self.tw[i,k,j]/self.tw_num[i,k,j]
self.tw[i,k,j]=0
self.tw_num[i,k,j]=0
for i in range(0,len(self.layers)):
for j in range(0,self.layers[i]):
if self.tb_num[i,j]!=0:
self.b[i,j]+=self.tb[i,j]/self.tb_num[i,j]
self.tb[i,j]=0
self.tb_num[i,j]=0
def BP(self,right_ans):
delta=np.zeros([len(self.layers),800])
delta[-1][0:10]=dsoftmax_loss(self.z[-1][0:10],right_ans[0:10])#dC/dzjl
#print(self.a[-1])
for i in range(len(self.layers)-2,-1,-1):
delta[i]=np.dot(self.w[i+1].T,delta[i+1])*self.d_activation_function(self.z[i])
print(self.learn_times)
#print(delta)
self.tb-=self.b_learn_cent*delta
self.tb_num+=1
for i in range(1,len(self.layers)):
for j in range(0,self.layers[i]):
for k in range(0,self.layers[i-1]):
self.tw[i,k,j]-=self.learn_cent*delta[i,j]*self.a[i-1,k]
#print('three',self.learn_cent,delta[i,j],self.a[i-1,k])
self.tw_num[i,k,j]+=1
#print(self.w[2])
#sleep(0.3)
#print(delta)
#print(dir(np))
group=[]
data=decode_idx3_ubyte('train-images-idx3-ubyte/train-images.idx3-ubyte')
number=decode_idx1_ubyte('train-labels-idx1-ubyte/train-labels.idx1-ubyte')
for i in range(0,len(data)):
if i%3==0:
group.append([])
else:
#imageio.imwrite(str(i)+'.png',data[i])
group[-1].append([data[i],number[i]])
file_=open('Net_.txt','w+')
q=[]
net=Net([784,16,16,10])
lines=[]
random.shuffle(group)
#print(group)
while 1:
#print(mkt.dic['money_you_have'])
c=-1
for g in group:
c+=1
if c%30==0:
lines.append([c/6,800-net.get_cost()*100])
pm.fill((255,255,255))
for l in range(0,len(lines)-1):
pygame.draw.line(pm,(0,0,0),lines[l],lines[l+1])
for event in pygame.event.get():
if event.type==pygame.QUIT:
sys.exit()
pygame.display.flip()
for n in range(0,2):
p=[]
for i in g[n][0]:
for j in i:
p.append(sigmoid(j))
f=np.array([0]*800)
f[int(g[n][1])]=1
m=0
mi=0
o=net.train(p,f)
#print(o[1])
for i in range(0,10):
#print(o[i])
if o[i]>m:
m=o[i]
mi=i
print(g[n][1],mi)
net.update()