weixin_51793354 2022-07-25 22:48 采纳率: 54.5%
浏览 155
已结题

使用softmax二分类,正确率过低,将0,1互换后竟然是比较好的预测结果。

用softmax 做一个二分类,预测正确率为0.2左右, 将预测结果的0,1互换后感觉才是比较理想的预测结果。可能是哪里出了问题?

这个神经网络(5层左右,隐藏层是reLU)的输出层用sigmoid的时候挺正常的。我只是把输出层改成了softmax, 梯度下降的时候cost也在减小。

我试着将原始数据的0,1标签对换,没有用,正确率还是0.2左右。

代码有点长。下面是类代码

import numpy as np
from function_set.activate_functions import sigmoid, reLU, tanh, softmax  # eval()会用到,不删
from function_set.activate_differential import sigmoid_d, reLU_d, tanh_d  # eval()会用到,不删


class neural_network:
    def __init__(self,
                 layer_num=3,
                 dim_num_list=None,
                 funct_list=None,
                 a_functs=("tanh", "sigmoid"),
                 reg="L2",
                 lambda_reg=0.0,
                 beta_momentum=0.9,
                 batch_size=-1,
                 keep_prob=1.0
                 ):
        self.funct_list = funct_list  # 激活函数列表,记录每一层的激活函数
        self.layer_num = layer_num  # 层数
        self.dim_num_list = dim_num_list  # 每一层的结点数
        self.a_functs = a_functs  # 激活函数名
        self.paras = dict()  # 保存参数 W,b
        self.cache = dict()  # 存储前向传播的数据,供反向传播使用
        self.reg = reg.upper()
        self.epsilon = 1e-8  # 精度,防止除0或log
        self.lambda_reg = lambda_reg  # L2正则化的lambda
        self.beta_momentum = beta_momentum  # 动量梯度下降法的超参数
        self.iter_cnt = 0  # 迭代计数, 用于momentum等优化
        self.batch_size = batch_size  # mini-batch的大小
        self.keep_prob = keep_prob  # dropout中的保持概率

    def init(self, X, Y):
        if not self.dim_num_list:
            self.dim_num_list = np.random.randint(30, 31, self.layer_num + 1)  # 默认维度为30
        else:
            self.layer_num = len(self.dim_num_list) - 1
        if not self.funct_list:
            self.funct_list = list()
            for i in range(0, self.layer_num):
                self.funct_list.append(self.a_functs[0])
            self.funct_list.append(self.a_functs[1])
        self.dim_num_list[0] = X.shape[0]
        self.dim_num_list[self.layer_num] = Y.shape[0]
        for i in range(1, self.layer_num + 1):
            self.cache["v_dW" + str(i)] = np.zeros((self.dim_num_list[i], self.dim_num_list[i - 1]))
            self.cache["v_db" + str(i)] = np.zeros((self.dim_num_list[i], 1))
        self.init_paras()

    def init_paras(self):
        for i in range(1, self.layer_num + 1):
            self.paras["W" + str(i)] = np.random.randn(self.dim_num_list[i], self.dim_num_list[i - 1])
            self.paras["b" + str(i)] = np.zeros(self.dim_num_list[i], float).reshape(-1, 1)

    def forward_propagate(self, X):
        self.cache["A0"] = X
        for i in range(1, self.layer_num + 1):
            self.cache["Z" + str(i)] = np.dot(self.paras["W" + str(i)], self.cache["A" + str(i - 1)])
            self.cache["A" + str(i)] = eval(self.funct_list[i])(self.cache["Z" + str(i)])
            if self.keep_prob < 1:  # dropout
                D = np.random.rand(self.cache["A" + str(i)].shape[0], self.cache["A" + str(i)].shape[1])
                D = (D < self.keep_prob) * 1
                self.cache["D" + str(i)] = D
                self.cache["A" + str(i)] = self.cache["A" + str(i)] * D / self.keep_prob

    def get_cost(self, Y):
        m = self.cache["A0"].shape[1]
        A = self.cache["A" + str(self.layer_num)]
        Z = self.cache["A" + str(self.layer_num)]
        cost1 = 0  # cost1为交叉熵
        loss_matrix = None
        if self.funct_list[self.layer_num] == "sigmoid":
            # loss_matrix = -Y * (np.maximum(Z, 0) - np.log(1 + np.exp(-np.abs(Z))))
            loss_matrix = np.maximum(Z, 0) - Z * Y + np.log(1 + np.exp(-np.abs(Z)))
        elif self.funct_list[self.layer_num] == "softmax":
            max_Z = np.max(Z, axis=0)
            loss_matrix = Y * (max_Z - Z + np.log(np.sum(np.exp(Z - max_Z), axis=0)))
        cost1 += (1 / m) * np.sum(np.sum(loss_matrix, axis=0), axis=0)
        cost2 = 0  # cost2 为正则项
        if self.reg == "L2":
            for i in range(1, self.layer_num + 1):
                cost2 += self.lambda_reg * np.sum(np.sum(self.paras["W" + str(i)] * self.paras["W" + str(i)], axis=0),
                                                  axis=0)
        else:
            pass
        cost = cost1 + cost2
        print("cost1 = ", cost1, "\tcost2=", cost2)
        return cost

    def backward_propagate(self, Y):
        A = self.cache["A" + str(self.layer_num)]
        m = A.shape[1]
        dA = (-1 / m) * Y * (1 / (A + self.epsilon))
        i = self.layer_num
        while (i > 0):
            function_name = self.funct_list[i]
            if (i == self.layer_num) and (function_name == "softmax" or function_name == "sigmoid"):
                dZ = (1 / m) * (A - Y)
            else:
                if self.keep_prob < 1:
                    dA = dA * self.cache["D" + str(i)] / self.keep_prob
                dZ = dA * eval(function_name + "_d")(self.cache["A" + str(i)], self.cache["Z" + str(i)])
            dW_reg = 2 * self.lambda_reg * self.paras["W" + str(i)]
            db_reg = 2 * self.lambda_reg * self.paras["b" + str(i)]
            self.cache["dW" + str(i)] = np.dot(dZ, self.cache["A" + str(i - 1)].T) + dW_reg
            self.cache["db" + str(i)] = np.sum(dZ, axis=1).reshape(-1, 1) + db_reg
            dA = np.dot(self.paras["W" + str(i)].T, dZ)
            i -= 1

    def update_parameters(self, learning_rate):
        for i in range(1, self.layer_num + 1):
            self.cache["v_dW" + str(i)] = self.beta_momentum * self.cache["v_dW" + str(i)] + (1 - self.beta_momentum) * \
                                          self.cache["dW" + str(i)]
            self.cache["v_db" + str(i)] = self.beta_momentum * self.cache["v_db" + str(i)] + (1 - self.beta_momentum) * \
                                          self.cache["db" + str(i)]
            self.cache["v_c_dW" + str(i)] = self.cache["v_dW" + str(i)] / (1 - self.beta_momentum ** self.iter_cnt)
            self.cache["v_c_db" + str(i)] = self.cache["v_db" + str(i)] / (1 - self.beta_momentum ** self.iter_cnt)
            self.paras["W" + str(i)] = self.paras["W" + str(i)] - learning_rate * self.cache["v_c_dW" + str(i)]
            self.paras["b" + str(i)] = self.paras["b" + str(i)] - learning_rate * self.cache["v_c_db" + str(i)]

    def fit(self, X, Y, learning_rate=0.5, iter_num=1000):
        self.init(X, Y)
        batch_generator = self.get_batch(X, Y)
        for X, Y in batch_generator:
            self.iter_cnt = 0
            for epoch in range(iter_num):
                self.iter_cnt += 1
                self.forward_propagate(X)
                cost = self.get_cost(Y)
                self.backward_propagate(Y)
                self.update_parameters(learning_rate)

    def predict_probability(self, X):
        self.forward_propagate(X)
        return self.cache["A" + str(self.layer_num)]

    def predict(self, X):
        probabilty = self.predict_probability(X)
        return self.map_to_int(probabilty)

    def get_batch(self, X, Y):  # mini-batch生成器
        m = X.shape[1]
        if self.batch_size == -1:
            self.batch_size = m
        batch_num = (m + self.batch_size - 1) // self.batch_size
        index_list = np.random.permutation(m)
        start = 0
        for i in range(batch_num):
            end = min(start + self.batch_size, m)
            yield X[:, index_list[start:end]], Y[:, index_list[start:end]]
            start = end

    def map_to_int(self, A):  # 概率转预测结果
        result = None  #
        if self.funct_list[self.layer_num] == "softmax":
            result = np.argmax(A, axis=0)
        elif self.funct_list[self.layer_num] == "sigmoid":
            result = (A >= 0.5) * 1
            result = result[0]
        return result

    def get_one_hot(self, y, class_num):  # 将整数类别转换为独热编码
        n = class_num
        m = len(y)
        result = np.zeros((n, m))
        for i in range(m):
            result[y[i]][i] = 1
        return result



下面是测试函数

def test(X_train, Y_train, X_test, Y_test):
    L = [2, 5, 5, 5, 5, 1]
    nn = neural_network(dim_num_list=L, a_functs=("reLU", "softmax"), batch_size=200)
    nn.fit(X_train, Y_train, learning_rate=0.1, iter_num=1000)
    result = nn.predict(X_test)
    print("result = ", result)
    y = Y_test[0]
    cnt = 0
    for e in range(len(y)):
        if result[e] == y[e]:
            cnt += 1
    print(cnt / len(y))

下面是激活函数代码

import numpy as np


def sigmoid(Z):
    positive_mask = (Z >= 0)
    negative_mask = Z < 0
    result_positive = 1 / (1 + np.exp(-Z * positive_mask))
    result_positive[~positive_mask] = 0
    result_negative = np.exp(Z * negative_mask) / (np.exp(Z * negative_mask) + 1)
    result_negative[~negative_mask] = 0
    result = result_negative + result_positive
    return result


def reLU(Z):
    return np.maximum(0, Z)


def tanh(Z):
    return np.tanh(Z)

def softmax(Z):
    max_Z = np.max(Z, axis=0)
    return np.exp(Z - max_Z) / np.sum(np.exp(Z - max_Z), axis=0)


下面是激活函数的微分代码:因为反向传播的时候并没有对softmax直接求导,所以就没有实现softmax_d函数


import numpy as np


def sigmoid_d(A, Z):
    return A * (1 - A)


def reLU_d(A ,Z):
    result = (Z > 0) * 1
    return result


def tanh_d(A, Z):
    return 1 - A * A


def softmax_d(A, Z):
    pass

下面是main函数代码:

from nn import neural_network, test
from deep_learning_course.course_1_3.planar_utils import load_planar_dataset


X_train, Y_train = load_planar_dataset()
X_train /= 4
# plt.scatter(X_train[0, :], X_train[1, :], c=Y_train, s=40, cmap=plt.cm.Spectral) #绘制散点图
# plt.show()
X_test, Y_test = load_planar_dataset()
X_test /= 4
Y_train = neural_network().get_one_hot(Y_train[0], 2)
Y_test = neural_network().get_one_hot(Y_test[0], 2)


test(X_train, Y_train, X_test, Y_test)


下面是planar_utils文件里的内容:

import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model

def plot_decision_boundary(model, X, y):
    # Set min and max values and give it some padding
    x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
    y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    Z = model(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral)


def sigmoid(Z):
    positive_mask = (Z >= 0)
    negative_mask = Z < 0
    result_positive = 1 / (1 + np.exp(-Z * positive_mask))
    result_positive[~positive_mask] = 0
    result_negative = np.exp(Z * negative_mask) / (np.exp(Z * negative_mask) + 1)
    result_negative[~negative_mask] = 0
    result = result_negative + result_positive
    return result

def load_planar_dataset():
    # np.random.seed(1)
    m = 400 # number of examples
    N = int(m/2) # number of points per class
    D = 2 # dimensionality
    X = np.zeros((m,D)) # data matrix where each row is a single example
    Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue)
    a = 4 # maximum ray of the flower

    for j in range(2):
        ix = range(N*j,N*(j+1))
        t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j

    X = X.T
    Y = Y.T

    return X, Y

def load_extra_datasets():  
    N = 200
    noisy_circles = sklearn.datasets.make_circles(n_samples=N, factor=.5, noise=.3)
    noisy_moons = sklearn.datasets.make_moons(n_samples=N, noise=.2)
    blobs = sklearn.datasets.make_blobs(n_samples=N, random_state=5, n_features=2, centers=6)
    gaussian_quantiles = sklearn.datasets.make_gaussian_quantiles(mean=None, cov=0.5, n_samples=N, n_features=2, n_classes=2, shuffle=True, random_state=None)
    no_structure = np.random.rand(N, 2), np.random.rand(N, 2)

    return noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure


  • 写回答

2条回答 默认 最新

  • 迪菲赫尔曼 人工智能领域优质创作者 2022-07-26 05:56
    关注

    改成softmax后没有重新训练吗

    评论

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 7月29日
  • 修改了问题 7月26日
  • 修改了问题 7月26日
  • 创建了问题 7月25日

悬赏问题

  • ¥20 指导如何跑通以下两个Github代码
  • ¥15 大家知道这个后备文件怎么删吗,为啥这些文件我只看到一份,没有后备呀
  • ¥15 C++为什么这个代码没报错运行不出来啊
  • ¥15 一道ban了很多东西的pyjail题
  • ¥15 关于#r语言#的问题:如何将生成的四幅图排在一起,且对变量的赋值进行更改,让组合的图漂亮、美观@(相关搜索:森林图)
  • ¥15 C++识别堆叠物体异常
  • ¥15 微软硬件驱动认证账号申请
  • ¥15 GPT写作提示指令词
  • ¥20 根据动态演化博弈支付矩阵完成复制动态方程求解和演化相图分析等
  • ¥15 华为超融合部署环境下RedHat虚拟机分区扩容问题