从预测结果上看,预测的准确率极低,0.1左右。我觉得1-0.1=0.9才是理想结果,可能是哪里写反了,但我找不到哪里有问题。
代码, 删去了很多不必要的部分(求cost之类的):
import numpy as np
from function_set.activate_functions import sigmoid, reLU, tanh, softmax # eval()会用到,不删
from function_set.activate_differential import sigmoid_d, reLU_d, tanh_d # eval()会用到,不删
class neural_network:
def __init__(self,
layer_num=3,
dim_num_list=None,
funct_list=None,
a_functs=("tanh", "sigmoid"),
):
self.funct_list = funct_list # 激活函数列表,记录每一层的激活函数
self.layer_num = layer_num # 层数
self.dim_num_list = dim_num_list # 每一层的结点数
self.a_functs = a_functs # 激活函数名
self.paras = dict() # 保存参数 W,b
self.cache = dict() # 存储前向传播的数据,供反向传播使用
self.epsilon = 1e-8 # 精度,防止除0或log
self.iter_cnt = 0 # 迭代计数, 用于momentum等优化
def init(self, X, Y):
if not self.dim_num_list:
self.dim_num_list = np.random.randint(20, 31, self.layer_num + 1) # 默认维度为30
else:
self.layer_num = len(self.dim_num_list) - 1
if not self.funct_list:
self.funct_list = list()
for i in range(0, self.layer_num):
self.funct_list.append(self.a_functs[0])
self.funct_list.append(self.a_functs[1])
self.dim_num_list[0] = X.shape[0]
self.dim_num_list[self.layer_num] = Y.shape[0]
self.init_paras()
def init_paras(self):
for i in range(1, self.layer_num + 1):
self.paras["W" + str(i)] = np.random.randn(self.dim_num_list[i], self.dim_num_list[i - 1])
self.paras["b" + str(i)] = np.zeros(self.dim_num_list[i], float).reshape(-1, 1)
def forward_propagate(self, X):
self.cache["A0"] = X
for i in range(1, self.layer_num + 1):
self.cache["Z" + str(i)] = np.dot(self.paras["W" + str(i)], self.cache["A" + str(i - 1)])
self.cache["A" + str(i)] = eval(self.funct_list[i])(self.cache["Z" + str(i)])
def backward_propagate(self, Y):
A = self.cache["A" + str(self.layer_num)]
m = A.shape[1]
dA = (-1 / m) * Y * (1 / (A + self.epsilon))
i = self.layer_num
while (i > 0):
function_name = self.funct_list[i]
if (i == self.layer_num) and (function_name == "softmax"): # 输出层
dZ = (1 / m) * (A - Y)
else: # 隐藏层
dZ = dA * eval(function_name + "_d")(self.cache["A" + str(i)], self.cache["Z" + str(i)])
self.cache["dW" + str(i)] = np.dot(dZ, self.cache["A" + str(i - 1)].T)
self.cache["db" + str(i)] = np.sum(dZ, axis=1).reshape(-1, 1)
dA = np.dot(self.paras["W" + str(i)].T, dZ)
i -= 1
def update_parameters(self, learning_rate):
for i in range(1, self.layer_num + 1):
self.paras["W" + str(i)] = self.paras["W" + str(i)] - learning_rate * self.cache["dW" + str(i)]
self.paras["b" + str(i)] = self.paras["b" + str(i)] - learning_rate * self.cache["db" + str(i)]
def fit(self, X, Y, learning_rate=0.5, iter_num=1000):
self.init(X, Y)
self.iter_cnt = 0
for epoch in range(iter_num):
self.iter_cnt += 1
self.forward_propagate(X)
self.backward_propagate(Y)
self.update_parameters(learning_rate)
def predict_probability(self, X):
self.forward_propagate(X)
return self.cache["A" + str(self.layer_num)]
def predict(self, X):
probabilty = self.predict_probability(X)
print(probabilty)
return self.map_to_int(probabilty)
def map_to_int(self, A): # 概率转预测结果
result = None #
if self.funct_list[self.layer_num] == "softmax":
result = np.argmax(A, axis=0)
return result
def get_one_hot(self, y, class_num): # 将整数类别转换为独热编码
n = class_num
m = len(y)
result = np.zeros((n, m))
for i in range(m):
result[y[i]][i] = 1
return result
def test(X_train, Y_train, X_test, Y_test):
L = [2, 30 , 1]
nn = neural_network(dim_num_list=L, a_functs=("tanh", "softmax")) # a_functs[0]是隐藏层函数名,a_functs[1]是输出层函数名
nn.fit(X_train, Y_train, learning_rate=0.1, iter_num=1000)
result = nn.predict(X_test)
y = Y_test[0]
print("实际值 = ", y)
print("预测值 = ", result)
cnt = 0
for e in range(len(y)):
if result[e] == y[e]:
cnt += 1
print("准确率 = ", cnt / len(y))
from planar_utils import load_planar_dataset
X_train, Y_train = load_planar_dataset()
X_train /= 4
# plt.scatter(X_train[0, :], X_train[1, :], c=Y_train, s=40, cmap=plt.cm.Spectral) #绘制散点图
# plt.show()
X_test, Y_test = load_planar_dataset()
X_test /= 4
Y_train = neural_network().get_one_hot(Y_train[0], 2)
Y_test = neural_network().get_one_hot(Y_test[0], 2)
test(X_train, Y_train, X_test, Y_test)
运行结果:
实际值 = [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
预测值 = [0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0
0 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 1 1 0 1 1 1 0 1 0 0 1 1 1
1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1
1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0]
准确率 = 0.1175
下面是激活函数和对应微分的代码,因为最后是直接对Z求的导,所以softmax的微分函数就pass了
import numpy as np
def sigmoid(Z):
positive_mask = (Z >= 0)
negative_mask = Z < 0
result_positive = 1 / (1 + np.exp(-Z * positive_mask))
result_positive[~positive_mask] = 0
result_negative = np.exp(Z * negative_mask) / (np.exp(Z * negative_mask) + 1)
result_negative[~negative_mask] = 0
result = result_negative + result_positive
return result
def reLU(Z):
return np.maximum(0, Z)
def tanh(Z):
return np.tanh(Z)
def softmax(Z):
max_Z = np.max(Z, axis=0)
return np.exp(Z - max_Z) / np.sum(np.exp(Z - max_Z), axis=0)
import numpy as np
def sigmoid_d(A, Z):
return A * (1 - A)
def reLU_d(A ,Z):
result = (Z > 0) * 1
return result
def tanh_d(A, Z):
return 1 - A * A
def softmax_d(A, Z):
pass