Kevin__Cao
2021-06-09 10:53
采纳率: 83.3%
浏览 116

如何将训练好的BP神经网络模型保存并可以在其他py文件中直接调用?

下面是我写的BP神经网络代码,问问如何将训练好的神经网络模型保存并可以在其他py文件中直接调用?请老师给出详细的代码,谢谢

import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
from pandas.plotting import radviz

'''
    构建一个具有1个隐藏层的神经网络,隐层的大小为10
    输入层为4个特征,输出层为3个分类
    (1,0,0)为第一类,(0,1,0)为第二类,(0,0,1)为第三类
'''


# 1.初始化参数
def initialize_parameters(n_x, n_h, n_y):
    np.random.seed(2)

    # 权重和偏置矩阵
    w1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros(shape=(n_h, 1))
    w2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros(shape=(n_y, 1))

    # 通过字典存储参数
    parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}

    return parameters


# 2.前向传播
def forward_propagation(X, parameters):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']

    # 通过前向传播来计算a2
    z1 = np.dot(w1, X) + b1  # 这个地方需注意矩阵加法:虽然(w1*X)和b1的维度不同,但可以相加
    a1 = np.tanh(z1)  # 使用tanh作为第一层的激活函数
    z2 = np.dot(w2, a1) + b2
    a2 = 1 / (1 + np.exp(-z2))  # 使用sigmoid作为第二层的激活函数

    # 通过字典存储参数
    cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}

    return a2, cache


# 3.计算代价函数
def compute_cost(a2, Y, parameters):
    m = Y.shape[1]  # Y的列数即为总的样本数

    # 采用交叉熵(cross-entropy)作为代价函数
    logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2))
    cost = - np.sum(logprobs) / m

    return cost


# 4.反向传播(计算代价函数的导数)
def backward_propagation(parameters, cache, X, Y):
    m = Y.shape[1]

    w2 = parameters['w2']

    a1 = cache['a1']
    a2 = cache['a2']

    # 反向传播,计算dw1、db1、dw2、db2
    dz2 = a2 - Y
    dw2 = (1 / m) * np.dot(dz2, a1.T)
    db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
    dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2))
    dw1 = (1 / m) * np.dot(dz1, X.T)
    db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)

    grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2}

    return grads


# 5.更新参数
def update_parameters(parameters, grads, learning_rate=0.06):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']

    dw1 = grads['dw1']
    db1 = grads['db1']
    dw2 = grads['dw2']
    db2 = grads['db2']

    # 更新参数
    w1 = w1 - dw1 * learning_rate
    b1 = b1 - db1 * learning_rate
    w2 = w2 - dw2 * learning_rate
    b2 = b2 - db2 * learning_rate

    parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}

    return parameters


# 6.模型评估
def predict(parameters, x_test, y_test):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']

    z1 = np.dot(w1, x_test) + b1
    a1 = np.tanh(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = 1 / (1 + np.exp(-z2))

    # 结果的维度
    n_rows = y_test.shape[0]
    n_cols = y_test.shape[1]

    # 预测值结果存储
    output = np.empty(shape=(n_rows, n_cols), dtype=int)

    # for i in range(n_rows):
    #     for j in range(n_cols):
    #         if a2[i][j] > 0.5:
    #             output[i][j] = 1
    #         else:
    #             output[i][j] = 0

    for i in range(n_cols):
        # 将每条测试数据的预测结果(概率)存为一个行向量
        temp = np.zeros(shape=n_rows)
        for j in range(n_rows):
            temp[j] = a2[j][i]

        # 将每条结果(概率)从小到大排序,并获得相应下标
        sorted_dist = np.argsort(temp)
        length = len(sorted_dist)

        # 将概率最大的置为1,其它置为0
        for k in range(length):
            if k == sorted_dist[length - 1]:
                output[k][i] = 1
            else:
                output[k][i] = 0

    print('预测结果:')
    print(output)
    print('真实结果:')
    print(y_test)

    count = 0
    for k in range(0, n_cols):
        if output[0][k] == y_test[0][k] and output[1][k] == y_test[1][k] and output[2][k] == y_test[2][k]:
            count = count + 1
        else:
            print(k)

    acc = count / int(y_test.shape[1]) * 100
    print('准确率:%.2f%%' % acc)

    return output


# 建立神经网络
def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False):
    np.random.seed(3)

    n_x = n_input  # 输入层节点数
    n_y = n_output  # 输出层节点数

    # 1.初始化参数
    parameters = initialize_parameters(n_x, n_h, n_y)

    # 梯度下降循环
    for i in range(0, num_iterations):
        # 2.前向传播
        a2, cache = forward_propagation(X, parameters)
        # 3.计算代价函数
        cost = compute_cost(a2, Y, parameters)
        # 4.反向传播
        grads = backward_propagation(parameters, cache, X, Y)
        # 5.更新参数
        parameters = update_parameters(parameters, grads)

        # 每1000次迭代,输出一次代价函数
        if print_cost and i % 1000 == 0:
            print('迭代第%i次,代价函数为:%f' % (i, cost))

    return parameters



# 结果可视化
# 特征有4个维度,类别有1个维度,一共5个维度,故采用了RadViz图
# def result_visualization(x_test, y_test, result):
#     cols = y_test.shape[1]
#     y = []
#     pre = []
#
#     # 反转换类别的独热编码
#     for i in range(cols):
#         if y_test[0][i] == 0 and y_test[1][i] == 0 and y_test[2][i] == 1:
#             y.append('setosa')
#         elif y_test[0][i] == 0 and y_test[1][i] == 1 and y_test[2][i] == 0:
#             y.append('versicolor')
#         elif y_test[0][i] == 1 and y_test[1][i] == 0 and y_test[2][i] == 0:
#             y.append('virginica')
#
#     for j in range(cols):
#         if result[0][j] == 0 and result[1][j] == 0 and result[2][j] == 1:
#             pre.append('setosa')
#         elif result[0][j] == 0 and result[1][j] == 1 and result[2][j] == 0:
#             pre.append('versicolor')
#         elif result[0][j] == 1 and result[1][j] == 0 and result[2][j] == 0:
#             pre.append('virginica')
#         else:
#             pre.append('unknown')
#
#     # 将特征和类别矩阵拼接起来
#     real = np.column_stack((x_test.T, y))
#     prediction = np.column_stack((x_test.T, pre))
#
#     # 转换成DataFrame类型,并添加columns
#     df_real = pd.DataFrame(real, index=None,
#                            columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
#     df_prediction = pd.DataFrame(prediction, index=None,
#                                  columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
#
#     # 将特征列转换为float类型,否则radviz会报错
#     df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_real[
#         ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
#     df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_prediction[
#         ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
#
#     # 绘图
#     plt.figure('真实分类')
#     radviz(df_real, 'Species', color=['blue', 'green', 'red', 'yellow'])
#     plt.figure('预测分类')
#     radviz(df_prediction, 'Species', color=['blue', 'green', 'red', 'yellow'])
#     plt.show()


if __name__ == "__main__":
    # 读取数据
    data_set = pd.read_csv('C:/Users/29291/Desktop/sjwl.csv', header=None)

    # # 第1种取数据方法:
    # X = data_set.iloc[0:671, 0:269].values  # 前四列是特征,T表示转置
    # Y = data_set.iloc[672:674, 0:269].values  # 后三列是标签

    # 第2种取数据方法:
    # X = data_set.ix[0:671, 0:269].values
    # Y = data_set.ix[672:674, 0:269].values

    # 第3种取数据方法:
    X = data_set.loc[0:671, 0:269].values
    x_mean = np.mean(X, axis=0)
    x_std = np.std(X, axis=0)
    X = (X - x_mean) / x_std
    Y = data_set.loc[672:674, 0:269].values
    # X = data_set.loc[1:670, 1:270].values
    # Y = data_set.loc[672:674, 0:269].values
    # 第4种取数据方法:
    # X = data_set[data_set.columns[0:671, 0:269].values.T
    # Y = data_set[data_set.columns[672:674, 0:269]].values.T
    Y = Y.astype('uint8')

    # 开始训练
    start_time = datetime.datetime.now()
    # 输入4个节点,隐层10个节点,输出3个节点,迭代10000次
    parameters = nn_model(X, Y, n_h=30, n_input=672, n_output=3, num_iterations=10000, print_cost=True)
    end_time = datetime.datetime.now()
    print("用时:" + str((end_time - start_time).seconds) + 's' + str(
        round((end_time - start_time).microseconds / 1000)) + 'ms')
    # 对模型进行测试
    data_test = pd.read_csv('C:/Users/29291/Desktop/sjwl.csv', header=None)
    # x_test = data_test.iloc[0:671, 270:299].values
    # y_test = data_test.iloc[672:674, 270:299].values
    x_test =data_set.loc[0:671, 270:299].values
    x_mean = np.mean(x_test , axis=0)
    x_std = np.std(x_test , axis=0)
    x_test  = (x_test  - x_mean) / x_std
    y_test =data_set.loc[672:674, 270:299].values
    y_test = y_test.astype('uint8')
    result = predict(parameters, x_test, y_test)
  • 收藏

2条回答 默认 最新

  • CSDN专家-AlanMax 2021-06-09 11:20
    已采纳

    这个需要建立字典封装起来

    打赏 评论
  • 有问必答小助手 2021-06-10 13:53

    您好,我是有问必答小助手,您的问题已经有小伙伴解答了,您看下是否解决,可以追评进行沟通哦~

    如果有您比较满意的答案 / 帮您提供解决思路的答案,可以点击【采纳】按钮,给回答的小伙伴一些鼓励哦~~

    ps:问答VIP仅需29元,即可享受5次/月 有问必答服务,了解详情>>>https://vip.csdn.net/askvip?utm_source=1146287632

    打赏 评论

相关推荐 更多相似问题