大家好,我尝试自己写了一个神经网络来做XOR的例子,但是输出结果始终不如人意,不知问题出在哪里?望各位大牛解答!
先附上循环10次的输出结果:
[[ 0.63892076]
[ 0.67854515]
[ 0.6880628 ]
[ 0.71555396]]
以下是代码:
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 18 23:29:20 2017
@author: 32641
"""
import numpy as np
def sigmoid(x):
result=1/(1+np.exp(-x))
return result
#此函数是为了计算梯度用
def matrix_X(X,Y):
l1=[]
for n in range(len(Y)):
l=[]
for x in X[n]:
for y in Y[n]:
l.append(x*y)
l1.append(l)
return np.array(l1)
#初始化
X=np.array([[0,0],[1,0],[0,1],[1,1]],dtype=np.float32)
Y=np.array([1,0,0,1],dtype=np.float32).reshape(4,1)
V=np.random.rand(2,2)
W=np.random.rand(2,1)
b1=np.tile(np.zeros((1,2)),(4,1))
b2=np.tile(np.zeros((1,1)),(1,1))
D=np.dot(X,V)+b1
O=np.dot(sigmoid(D),W)+b2
output=sigmoid(O)
error=np.sum(np.square(Y-output))
#backward
for n in range(10):
#求导
gradient_w=np.sum((Y-output)*sigmoid(O)*(1-sigmoid(O))*sigmoid(D),axis=0).reshape((2,1))
gradient_b2=np.sum((Y-output)*sigmoid(O)*(1-sigmoid(O)),axis=0).reshape((1,1))
gradient_v=np.sum(matrix_X(X,(Y-output)*sigmoid(O)*(1-sigmoid(O))*sigmoid(D)*(1-sigmoid(D))),axis=0).reshape((2,2))
gradient_b1=np.sum((Y-output)*sigmoid(O)*(1-sigmoid(O))*sigmoid(D)*(1-sigmoid(D)),axis=0).reshape((1,2))
η=0.01
W=W+η*gradient_w
b2=b2-η*np.tile(gradient_b2,(1,1))
V=V-η*gradient_v
b1=b1-η*np.tile(gradient_b1,(4,1))
D=np.dot(X,V)+b1
O=np.dot(sigmoid(D),W)+b2
output=sigmoid(O)
error=np.sum(np.square(Y-output))
print(error)
print(output)