weixin_51793354 2022-07-27 18:30 采纳率: 54.5%
浏览 54
已结题

softmax不知道哪里写错了,二分类预测的正确率为0.1左右。不知道哪里反了

从预测结果上看,预测的准确率极低,0.1左右。我觉得1-0.1=0.9才是理想结果,可能是哪里写反了,但我找不到哪里有问题。
代码, 删去了很多不必要的部分(求cost之类的):


import numpy as np
from function_set.activate_functions import sigmoid, reLU, tanh, softmax  # eval()会用到,不删
from function_set.activate_differential import sigmoid_d, reLU_d, tanh_d  # eval()会用到,不删


class neural_network:
    def __init__(self,
                 layer_num=3,
                 dim_num_list=None,
                 funct_list=None,
                 a_functs=("tanh", "sigmoid"),
                 ):
        self.funct_list = funct_list  # 激活函数列表,记录每一层的激活函数
        self.layer_num = layer_num  # 层数
        self.dim_num_list = dim_num_list  # 每一层的结点数
        self.a_functs = a_functs  # 激活函数名
        self.paras = dict()  # 保存参数 W,b
        self.cache = dict()  # 存储前向传播的数据,供反向传播使用
        self.epsilon = 1e-8  # 精度,防止除0或log
        self.iter_cnt = 0  # 迭代计数, 用于momentum等优化

    def init(self, X, Y):
        if not self.dim_num_list:
            self.dim_num_list = np.random.randint(20, 31, self.layer_num + 1)  # 默认维度为30
        else:
            self.layer_num = len(self.dim_num_list) - 1
        if not self.funct_list:
            self.funct_list = list()
            for i in range(0, self.layer_num):
                self.funct_list.append(self.a_functs[0])
            self.funct_list.append(self.a_functs[1])
        self.dim_num_list[0] = X.shape[0]
        self.dim_num_list[self.layer_num] = Y.shape[0]
        self.init_paras()

    def init_paras(self):
        for i in range(1, self.layer_num + 1):
            self.paras["W" + str(i)] = np.random.randn(self.dim_num_list[i], self.dim_num_list[i - 1])
            self.paras["b" + str(i)] = np.zeros(self.dim_num_list[i], float).reshape(-1, 1)

    def forward_propagate(self, X):
        self.cache["A0"] = X
        for i in range(1, self.layer_num + 1):
            self.cache["Z" + str(i)] = np.dot(self.paras["W" + str(i)], self.cache["A" + str(i - 1)])
            self.cache["A" + str(i)] = eval(self.funct_list[i])(self.cache["Z" + str(i)])


    def backward_propagate(self, Y):
        A = self.cache["A" + str(self.layer_num)]
        m = A.shape[1]
        dA = (-1 / m) * Y * (1 / (A + self.epsilon))
        i = self.layer_num
        while (i > 0):
            function_name = self.funct_list[i]
            if (i == self.layer_num) and (function_name == "softmax"):  # 输出层
                dZ = (1 / m) * (A - Y)
            else:  # 隐藏层
                dZ = dA * eval(function_name + "_d")(self.cache["A" + str(i)], self.cache["Z" + str(i)])
            self.cache["dW" + str(i)] = np.dot(dZ, self.cache["A" + str(i - 1)].T)
            self.cache["db" + str(i)] = np.sum(dZ, axis=1).reshape(-1, 1)
            dA = np.dot(self.paras["W" + str(i)].T, dZ)
            i -= 1

    def update_parameters(self, learning_rate):
        for i in range(1, self.layer_num + 1):
            self.paras["W" + str(i)] = self.paras["W" + str(i)] - learning_rate * self.cache["dW" + str(i)]
            self.paras["b" + str(i)] = self.paras["b" + str(i)] - learning_rate * self.cache["db" + str(i)]

    def fit(self, X, Y, learning_rate=0.5, iter_num=1000):
        self.init(X, Y)
        self.iter_cnt = 0
        for epoch in range(iter_num):
            self.iter_cnt += 1
            self.forward_propagate(X)
            self.backward_propagate(Y)
            self.update_parameters(learning_rate)

    def predict_probability(self, X):
        self.forward_propagate(X)
        return self.cache["A" + str(self.layer_num)]

    def predict(self, X):
        probabilty = self.predict_probability(X)
        print(probabilty)
        return self.map_to_int(probabilty)

    def map_to_int(self, A):  # 概率转预测结果
        result = None  #
        if self.funct_list[self.layer_num] == "softmax":
            result = np.argmax(A, axis=0)
        return result

    def get_one_hot(self, y, class_num):  # 将整数类别转换为独热编码
        n = class_num
        m = len(y)
        result = np.zeros((n, m))
        for i in range(m):
            result[y[i]][i] = 1
        return result


def test(X_train, Y_train, X_test, Y_test):
    L = [2, 30 , 1]
    nn = neural_network(dim_num_list=L, a_functs=("tanh", "softmax"))  # a_functs[0]是隐藏层函数名,a_functs[1]是输出层函数名
    nn.fit(X_train, Y_train, learning_rate=0.1, iter_num=1000)
    result = nn.predict(X_test)
    y = Y_test[0]
    print("实际值 = ", y)
    print("预测值 = ", result)
    cnt = 0
    for e in range(len(y)):
        if result[e] == y[e]:
            cnt += 1
    print("准确率 = ", cnt / len(y))


from planar_utils import load_planar_dataset

X_train, Y_train = load_planar_dataset()
X_train /= 4
# plt.scatter(X_train[0, :], X_train[1, :], c=Y_train, s=40, cmap=plt.cm.Spectral) #绘制散点图
# plt.show()
X_test, Y_test = load_planar_dataset()
X_test /= 4

Y_train = neural_network().get_one_hot(Y_train[0], 2)
Y_test = neural_network().get_one_hot(Y_test[0], 2)

test(X_train, Y_train, X_test, Y_test)

运行结果:

实际值 =  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
预测值 =  [0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 1 1 0 1 1 1 0 1 0 0 1 1 1
 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0]
准确率 =  0.1175


下面是激活函数和对应微分的代码,因为最后是直接对Z求的导,所以softmax的微分函数就pass了


import numpy as np


def sigmoid(Z):
    positive_mask = (Z >= 0)
    negative_mask = Z < 0
    result_positive = 1 / (1 + np.exp(-Z * positive_mask))
    result_positive[~positive_mask] = 0
    result_negative = np.exp(Z * negative_mask) / (np.exp(Z * negative_mask) + 1)
    result_negative[~negative_mask] = 0
    result = result_negative + result_positive
    return result


def reLU(Z):
    return np.maximum(0, Z)


def tanh(Z):
    return np.tanh(Z)


def softmax(Z):
    max_Z = np.max(Z, axis=0)
    return np.exp(Z - max_Z) / np.sum(np.exp(Z - max_Z), axis=0)


import numpy as np


def sigmoid_d(A, Z):
    return A * (1 - A)


def reLU_d(A ,Z):
    result = (Z > 0) * 1
    return result


def tanh_d(A, Z):
    return 1 - A * A


def softmax_d(A, Z):
    pass

  • 写回答

1条回答 默认 最新

  • 东方佑 2022-07-29 00:10
    关注

    很明显你训练的时候标注的 和 验证时标注的是反的调整一下就可以了 0 看做1 1 看做0 就好

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 7月29日
  • 已采纳回答 7月29日
  • 赞助了问题酬金10元 7月29日
  • 赞助了问题酬金20元 7月28日
  • 展开全部

悬赏问题

  • ¥15 腾讯云如何建立同一个项目中物模型之间的联系
  • ¥30 VMware 云桌面水印如何添加
  • ¥15 用ns3仿真出5G核心网网元
  • ¥15 matlab答疑 关于海上风电的爬坡事件检测
  • ¥88 python部署量化回测异常问题
  • ¥30 酬劳2w元求合作写文章
  • ¥15 在现有系统基础上增加功能
  • ¥15 远程桌面文档内容复制粘贴,格式会变化
  • ¥15 这种微信登录授权 谁可以做啊
  • ¥15 请问我该如何添加自己的数据去运行蚁群算法代码