¥YRQ¥ 2024-03-19 12:10 采纳率: 16.7%
浏览 3
已结题

BP网络不收敛,python


import numpy as np
import torch
import globals
import math
import torch.nn as nn
import torch.optim as optim
import torch.multiprocessing as mp
from OrbitPredict import *
import random
from datetime import datetime, timedelta
import os
import torch.nn.utils as nn_utils

'''
测试高精度函数代码
'''
# x0 = [42166,0,0,0,3.07459,0]
# startTime = np.array([2019,1,1,0,0,0])
# globals.orbitModel = 'HPOP'
# Predict = OrbitPredict()
# x,_ = Predict.OrbitPrediction(x0,360,60,[1 ,1],'RK7',startTime);
#
# print(x)
'''
构造网络
'''
class IntegralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(IntegralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x*200
    def save(self, file_path):
        torch.save(self.state_dict(), file_path)
'''
生成时间数据
'''
def random_time():
    # 生成随机的小时、分钟、秒数
    hour = random.randint(0, 23)
    minute = random.randint(0, 59)
    second = random.randint(0, 59)

    # 生成一个随机的日期,假设范围是最近的30天内
    today = datetime.now()
    random_days = random.randint(0, 29)
    random_date = today - timedelta(days=random_days)

    # 构建datetime对象
    random_datetime = torch.tensor([random_date.year, random_date.month, random_date.day,
        hour, minute, second])

    return random_datetime
''''
训练代码
'''
def train(ranks,epochs,integral_model, integral_optimizer):
    Predict = OrbitPredict()
    for rank in range(ranks):
        # 随机给出卫星的轨道数据,六根数据
        RAAN = np.random.randint(0, 180)
        inclination = np.random.randint(0, 180)
        x0 = torch.tensor([30000 + np.random.randint(0, 15000), random.random(), inclination, RAAN, np.random.randint(0, 360),
               np.random.randint(0, 360)])
        # 随机给出时间数据
        time = np.random.randint(1, 10)*60
        step = 60
        # 随机给出开始时间数据
        startTime = random_time()
        input = np.concatenate((x0, startTime))
        input = np.append(input, time)
        
        final_position = torch.tensor(100, dtype=torch.float64, requires_grad=True)
        for epoch in range(epochs):

            # integral_optimizer.zero_grad()

            output = integral_model(torch.tensor(input, dtype=torch.float))

            integral_loss = torch.sum((final_position - output)**2)

            # print(integral_loss.requires_grad)
            if epoch % 100 == 0:

                print(f"Process {rank}, Epoch {epoch}, Loss: {integral_loss.item()}")
                # 打印模型参数的梯度
                for name, param in integral_model.named_parameters():
                    if param.grad is not None:
                        print(f"Parameter: {name}, Gradient: {param.grad}")#全是0
                    else:
                        print(f"Parameter: {name}, Gradient: None")

                if rank % 100 ==0:
                    print(f"Process {rank}, Epoch {epoch}, Loss: {integral_loss.item()}")
                    checkpoint_path = "model_{}.pth".format(rank)
                    integral_model.save(checkpoint_path)


            integral_loss.backward(retain_graph=True)
            nn_utils.clip_grad_norm_(integral_model.parameters(), max_norm=10.0)  # max_norm为裁剪的最大范数
            integral_optimizer.step()


device = "cuda" if torch.cuda.is_available() else "cpu"

input_dim = 13
hidden_dim = 256
output_dim = 1
integral_model = IntegralNetwork(input_dim, hidden_dim, output_dim)
integral_model.to(device)
integral_optimizer = optim.Adam(integral_model.parameters(), lr=0.01)

train(100,1000000,integral_model,integral_optimizer)

img

问题:不知道这个代码为啥loss是10000,但是梯度是0?导致收敛不了
loss是10000是因为输入是100,100的平方是10000,也就是说我的神经网络输出是0

  • 写回答

3条回答 默认 最新

  • Seal^_^ 云原生领域优质创作者 2024-03-19 12:57
    关注

    在你提供的代码中,神经网络的输出是经过 sigmoid 函数处理的,然后乘以 200。由于 sigmoid 函数的性质,当输入的值很大或很小时,它的输出会趋近于 0 或 1。在这种情况下,如果神经网络输出接近于 0,那么乘以 200 后仍然会非常接近于 0,这也解释了为什么你得到的 loss 是 10000。

    至于梯度为0的问题,可能是由于梯度消失的原因导致的。在神经网络训练过程中,梯度消失是一个常见的问题,特别是在使用 sigmoid 激活函数时,当输出接近于 0 或 1 时,梯度会变得非常小,甚至趋近于 0,这会导致网络参数无法更新,从而影响收敛。

    以下是经过优化的完整代码,其中包括了使用ReLU激活函数、调整学习率、参数初始化等优化方法:

    import numpy as np
    import torch
    import math
    import torch.nn as nn
    import torch.optim as optim
    import random
    from datetime import datetime, timedelta
    import torch.nn.utils as nn_utils
    
    '''
    构造网络
    '''
    class IntegralNetwork(nn.Module):
        def __init__(self, input_dim, hidden_dim, output_dim):
            super(IntegralNetwork, self).__init__()
            self.fc1 = nn.Linear(input_dim, hidden_dim)
            self.fc2 = nn.Linear(hidden_dim, hidden_dim)
            self.fc3 = nn.Linear(hidden_dim, hidden_dim)
            self.fc4 = nn.Linear(hidden_dim, output_dim)
    
        def forward(self, x):
            x = torch.relu(self.fc1(x))
            x = torch.relu(self.fc2(x))
            x = torch.relu(self.fc3(x))
            x = self.fc4(x)
            return x * 200
    
    '''
    生成时间数据
    '''
    def random_time():
        hour = random.randint(0, 23)
        minute = random.randint(0, 59)
        second = random.randint(0, 59)
    
        today = datetime.now()
        random_days = random.randint(0, 29)
        random_date = today - timedelta(days=random_days)
    
        random_datetime = torch.tensor([random_date.year, random_date.month, random_date.day, hour, minute, second])
    
        return random_datetime
    
    ''''
    训练代码
    '''
    def train(ranks, epochs, integral_model, integral_optimizer):
        for rank in range(ranks):
            RAAN = np.random.randint(0, 180)
            inclination = np.random.randint(0, 180)
            x0 = torch.tensor([30000 + np.random.randint(0, 15000), random.random(), inclination, RAAN, np.random.randint(0, 360),
                   np.random.randint(0, 360)])
            
            startTime = random_time()
            time = np.random.randint(1, 10) * 60
            input_data = torch.cat((x0, startTime), dim=0)
            input_data = torch.cat((input_data, torch.tensor([time], dtype=torch.float)), dim=0)
            
            final_position = torch.tensor(100, dtype=torch.float64, requires_grad=True)
            for epoch in range(epochs):
    
                integral_optimizer.zero_grad()
    
                output = integral_model(input_data)
    
                integral_loss = torch.sum((final_position - output)**2)
    
                if epoch % 100 == 0:
                    print(f"Process {rank}, Epoch {epoch}, Loss: {integral_loss.item()}")
    
                integral_loss.backward()
                nn_utils.clip_grad_norm_(integral_model.parameters(), max_norm=5.0)
                integral_optimizer.step()
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    input_dim = 13
    hidden_dim = 256
    output_dim = 1
    integral_model = IntegralNetwork(input_dim, hidden_dim, output_dim)
    integral_model.to(device)
    
    integral_optimizer = optim.Adam(integral_model.parameters(), lr=0.001)
    
    def init_weights(m):
        if type(m) == nn.Linear:
            nn.init.xavier_uniform_(m.weight)
    
    integral_model.apply(init_weights)
    
    train(100, 1000000, integral_model, integral_optimizer)
    
    
    

    对神经网络进行了优化,包括使用了ReLU激活函数、调整了学习率、使用了Xavier初始化,并增加了梯度裁剪。

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(2条)

报告相同问题?

问题事件

  • 系统已结题 3月27日
  • 已采纳回答 3月19日
  • 创建了问题 3月19日

悬赏问题

  • ¥15 已知平面坐标系(非直角坐标系)内三个点的坐标,反求两坐标轴的夹角
  • ¥15 数据量少可以用MK趋势分析吗
  • ¥15 使用VH6501干扰RTR位,CANoe上显示的错误帧不足32个就进入bus off快慢恢复,为什么?
  • ¥15 大智慧怎么编写一个选股程序
  • ¥100 python 调用 cgps 命令获取 实时位置信息
  • ¥15 两台交换机分别是trunk接口和access接口为何无法通信,通信过程是如何?
  • ¥15 C语言使用vscode编码错误
  • ¥15 用KSV5转成本时,如何不生成那笔中间凭证
  • ¥20 ensp怎么配置让PC1和PC2通讯上
  • ¥50 有没有适合匹配类似图中的运动规律的图像处理算法