PB神经网络损失函数问题
我在编写一个PB神经网络,在反向传播时出现了问题,我的损失函数是daf(x)*(T-O)其中daf(x)为激活函数在此点的导数,
T为正确结果,O是实际结果,我的激活函数是tanh,但神经网络好像误解了我的意思,他试图使输出逼近一,此点导数为零
所以损失函数为零。
我想知道是不是我的损失函数出错了,正确的损失函数是什么,还是我的代码出了问题?
我的代码如下
import numpy as np
def tanh(x: np.ndarray) -> float:
return np.tanh(x)
def derived_tanh(x: np.ndarray) -> float:
return 1 - (x**2)
def ReLU(x: np.ndarray) -> float:
return (np.abs(x) + x) / 2
def derived_ReLU(x: np.ndarray) -> float:
return (x > 0) * 1
class BPNN:
def __init__(self, input_num: int, output_num: int, storey: int, num: int, activation_function, derived_Activation_Function) -> None:
self.input_num = input_num
self.output_num = output_num
self.storey = storey
self.num = num
self.weight_in = np.random.randn(self.num, self.input_num)
self.weight_mi = np.random.randn(self.storey - 1, self.num, self.num)
self.weight_ou = np.random.randn(self.output_num, self.num)
self.bias = np.random.randn(self.storey, self.num)
self.activation_function = activation_function
self.derived_Activation_Function = derived_Activation_Function
def out(self, inputs: np.ndarray, right_output: np.ndarray = None, learnning_rate: float = None) -> np.ndarray:
out = np.array(self.bias)
out[0] = self.activation_function(np.sum(self.weight_in * inputs, axis=1))
for i in range(1, self.storey - 1):
out[i] = self.activation_function(np.sum(self.weight_mi[i - 1] * out[i - 1], axis=1))
output = self.activation_function(np.sum(self.weight_ou * out[self.storey - 1], axis=1))
if right_output is None:
return output
else:
e_ou = (right_output - output) * self.derived_Activation_Function(output)
e_mi = np.zeros([self.storey, self.num])
e_mi[self.storey - 1] = np.sum(e_ou * self.weight_ou, axis=0) * self.derived_Activation_Function(out[self.storey - 1])
for i in range(self.storey - 2, -1, -1):
e_mi[i] = np.sum(e_mi[i + 1] * self.weight_mi[i], axis=0) * self.derived_Activation_Function(out[i])
self.weight_ou += (learnning_rate * e_ou).reshape([-1, 1]) * out[self.storey - 1]
for i in range(self.storey - 2, -1, -1):
self.weight_mi[i] += (learnning_rate * e_mi[i + 1]).reshape([-1, 1]) * out[i]
self.bias[i + 1] += learnning_rate * e_mi[i + 1]
self.bias[0] += learnning_rate * e_mi[0]
self.weight_in += (learnning_rate * e_mi[0]).reshape([-1, 1]) * inputs
return output
def train(self, inputs: np.ndarray, right_output: np.ndarray, learning_rate: float, garter: float) -> None:
data = np.random.shuffle(np.arange(0, inputs.shape[0]))
train_in = inputs[data[np.arange(0, int(inputs.shape[0] * 0.7))]]
train_ou = right_output[data[np.arange(int(inputs.shape[0] * 0.7), inputs.shape[0])]]
test_in = inputs[data[np.arange(0, int(inputs.shape[0] * 0.7))]]
test_ou = right_output[data[np.arange(int(inputs.shape[0] * 0.7), inputs.shape[0])]]
while True:
e = 0
for i in range(0, train_in.shape[0]):
e += (train_ou - self.out(test_in[i], train_ou, learning_rate))**2 / 2
print("训练集误差" + str(e))
e = 0
for i in range(0, train_in.shape[0]):
e += (test_ou - self.out(test_in[i]))**2 / 2
print("测试集误差" + str(e))
inputs = np.array([[0, 1], [1, 1], [0, 1], [0, 0]])
outputs = np.array([[1], [0], [1], [0]])
BP = BPNN(2, 1, 4, 10, tanh, derived_tanh)
for i in range(0, 10):
e = 0
for j in range(0, 4):
e += np.abs(outputs[j] - BP.out(inputs[j], outputs[j], 0.9))
print('误差' + str(e))
e = 0
for i in range(0, 4):
out = BP.out(inputs[i])
print('输入' + str(inputs[i]) + '\t期望输出' + str(outputs[i]) + '\t实际输出' + str(out))
e += np.abs(outputs[i] - out)
print('总误差' + str(e))
输出如下
误差[2.69006214]
误差[2.]
误差[2.]
误差[2.]
误差[2.]
误差[2.]
误差[2.]
误差[2.]
误差[2.]
误差[2.]
输入[0 1] 期望输出[1] 实际输出[0.99992796]
输入[1 1] 期望输出[0] 实际输出[0.99992796]
输入[0 1] 期望输出[1] 实际输出[0.99992796]
输入[0 0] 期望输出[0] 实际输出[0.99992796]