Pytorch loss.backward()反向传播时出现IndexError，求帮助！

问题遇到的现象和发生背景

准备预先训练训练为了生成食物画像的CookGAN的encoder部分，但是实行后发现loss反向传播出现问题无法解决。
这里是源代码的地址：
https://github.com/klory/CookGAN

问题相关代码

# train_retrieval.py
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch import optim
from tqdm import tqdm
import numpy as np
import os
import pdb
import wandb

from args_retrieval import get_parser
from datasets_retrieval import Dataset, train_transform
from models_retrieval import TextEncoder, ImageEncoder
from triplet_loss import global_loss, TripletLoss
from modules import DynamicSoftMarginLoss
import sys
sys.path.append('../')
from common import param_counter, sample_data

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
"""
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
"""

def create_model(ckpt_args, device='cuda'):
    text_encoder = TextEncoder(
        # self, data_dir, text_info, hid_dim, emb_dim, z_dim, with_attention, ingr_enc_type
        data_dir= 'E:/CookGAN/retrieval_model/models',
        emb_dim=ckpt_args.word2vec_dim,  # 300
        hid_dim=ckpt_args.rnn_hid_dim,  # 300
        z_dim=ckpt_args.feature_dim,  # 1024
        # word2vec_file=ckpt_args.word2vec_file,
        text_info=ckpt_args.text_info,
        with_attention=ckpt_args.with_attention,
        ingr_enc_type=ckpt_args.ingrs_enc_type)
    image_encoder = ImageEncoder(
        z_dim=ckpt_args.feature_dim)
    text_encoder, image_encoder = [x.to(device) for x in [text_encoder, image_encoder]]
    print('# text_encoder', param_counter(text_encoder.parameters()))
    print('# image_encoder', param_counter(image_encoder.parameters()))
    if device == 'cuda':
        text_encoder, image_encoder = [nn.DataParallel(x) for x in [text_encoder, image_encoder]]
    optimizer = torch.optim.Adam([
            {'params': text_encoder.parameters()},
            {'params': image_encoder.parameters()},
        ], lr=ckpt_args.lr, betas=(0.5, 0.999))
    return text_encoder, image_encoder, optimizer

def load_model(ckpt_path, device='cuda'):
    print('load retrieval model from:', ckpt_path)
    ckpt = torch.load(ckpt_path)
    ckpt_args = ckpt['args']
    batch_idx = ckpt['batch_idx']
    text_encoder, image_encoder, optimizer = create_model(ckpt_args, device)
    if device=='cpu':
        text_encoder.load_state_dict(ckpt['text_encoder'])
        image_encoder.load_state_dict(ckpt['image_encoder'])
    else:
        text_encoder.module.load_state_dict(ckpt['text_encoder'])
        image_encoder.module.load_state_dict(ckpt['image_encoder'])
    optimizer.load_state_dict(ckpt['optimizer'])

    return ckpt_args, batch_idx, text_encoder, image_encoder, optimizer


def save_model(args, batch_idx, text_encoder, image_encoder, optimizer, ckpt_path):
    print('save retrieval model to:', ckpt_path)
    ckpt = {
        'args': args,
        'batch_idx': batch_idx,
        'text_encoder': text_encoder.state_dict(),
        'image_encoder': image_encoder.state_dict(),
        'optimizer': optimizer.state_dict(),
    }
    torch.save(ckpt, ckpt_path)

# hinge loss
def compute_loss(txt_feat, img_feat, device='cuda'):
    BS = txt_feat.shape[0]
    denom = img_feat.norm(p=2, dim=1, keepdim=True) @ txt_feat.norm(p=2, dim=1, keepdim=True).t()
    numer = img_feat @ txt_feat.t()
    sim = numer / (denom + 1e-12)
    margin = 0.3 * torch.ones_like(sim)
    mask = torch.eye(margin.shape[0], margin.shape[1]).bool().to(device)
    margin.masked_fill_(mask, 0)
    pos_sim = (torch.diag(sim) * torch.ones(BS, BS).to(device)).t() # [BS, BS]
    loss_retrieve_txt = torch.max(
        torch.tensor(0.0).to(device), 
        margin + sim - pos_sim)
    loss_retrieve_img = torch.max(
        torch.tensor(0.0).to(device), 
        margin + sim.t() - pos_sim)
    loss = loss_retrieve_img + loss_retrieve_txt
    # effective number of pairs is BS*BS-BS, those on the diagnal are never counted and always zero
    loss = loss.sum() / (BS*BS-BS) / 2.0
    return loss

def train(args, start_batch_idx, text_encoder, image_encoder, optimizer, train_loader, device='cuda'):    
    if args.loss_type == 'hinge':
        criterion = compute_loss
    elif args.loss_type == 'hardmining+hinge':
        triplet_loss = TripletLoss(margin=args.margin)
    elif args.loss_type == 'dynamic_soft_margin':
        criterion = DynamicSoftMarginLoss(is_binary=False, nbins=args.batch_size // 2)
        criterion = criterion.to(device)
    
    #####################
    # train
    #####################
    wandb.init(project="cookgan_retrieval_model")
    wandb.config.update(args)
    # create process bar
    pbar = range(args.batches)
    pbar = tqdm(pbar, initial=start_batch_idx, dynamic_ncols=True, smoothing=0.3)

    text_encoder.train()
    image_encoder.train()
    if device == 'cuda':
        text_module = text_encoder.module
        image_module = image_encoder.module
    else:
        text_module = text_encoder
        image_module = image_encoder
    train_loader = sample_data(train_loader)

    for batch_idx in pbar:
        # test = next(train_loader)
        txt, img = next(train_loader)
        for i in range(len(txt)):
            txt[i] = txt[i].to(device)
        img = img.to(device)
        # print('shape of txt: ', len(txt))
        txt_feat = text_encoder([txt])
        img_feat = image_encoder(img)
        bs = img.shape[0]
        if args.loss_type == 'hinge':
            loss = criterion(img_feat, txt_feat, device)
        elif args.loss_type == 'hardmining+hinge':
            label = list(range(0, bs))
            label.extend(label)
            label = np.array(label)
            label = torch.tensor(label).long().to(device)
            loss = global_loss(triplet_loss, torch.cat((img_feat, txt_feat)), label, normalize_feature=True)[0]
        elif args.loss_type == 'dynamic_soft_margin':
            out = torch.cat((img_feat, txt_feat))
            loss = criterion(out)

        optimizer.zero_grad()  # clear
        print(loss)
        loss.backward()
        # loss.backward(retain_graph=True)
        optimizer.step()  # update

        wandb.log({
            'training loss': loss,
            'batch_idx': batch_idx
        })

        if batch_idx % 10_000 == 0:
            ckpt_path = f'{wandb.run.dir}/{batch_idx:>08d}.ckpt'
            save_model(args, batch_idx, text_module, image_module, optimizer, ckpt_path)

if __name__ == '__main__':
    ##############################
    # setup
    ##############################
    args = get_parser().parse_args()
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    torch.backends.cudnn.benchmark = True
    device = args.device

    ##############################
    # dataset
    ##############################
    print('loading datasets')
    train_set = Dataset(
        part='train', 
        recipe_file=args.recipe_file,
        img_dir=args.img_dir, 
        word2vec_file=args.word2vec_file, 
        transform=train_transform, 
        permute_ingrs=args.permute_ingrs)

    if args.debug:
        print('in debug mode')
        train_set = torch.utils.data.Subset(train_set, range(2000))

    train_loader = DataLoader(
        train_set, batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True, drop_last=False)
    print('train data:', len(train_set), len(train_loader))  # 281161 8787

    ##########################
    # model
    ##########################
    if args.ckpt_path:
        ckpt_args, batch_idx, text_encoder, image_encoder, optimizer = load_model(args.ckpt_path, device)
    else:
        text_encoder, image_encoder, optimizer = create_model(args, device)
        batch_idx = 0

    train(args, batch_idx, text_encoder, image_encoder, optimizer, train_loader, device='cuda')

# models_retrieval.py
import json
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.nn.utils import rnn
from torchvision import models
from gensim.models.keyedvectors import KeyedVectors
import pdb


def clean_state_dict(state_dict):
    # create new OrderedDict that does not contain `module.`
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:] if k[:min(6,len(k))] == 'module' else k # remove `module.`
        new_state_dict[name] = v
    return new_state_dict

class AttentionLayer(nn.Module):
    def __init__(self, input_dim):
        super(AttentionLayer, self).__init__()
        self.u = torch.nn.Parameter(torch.randn(input_dim))  # u = [2*hid_dim] a shared contextual vector
        # torch.randn 平均が 0 で分散が 1 の正規分布（標準正規分布とも呼ばれます）から乱数で満たされたテンソルを返す
        # torch.nn.parameter レイヤーのパラメータとして定義する
        self.u.requires_grad = True
        self.fc = nn.Linear(input_dim, input_dim)
    def forward(self, x):
        # x = [BS, num_vec, 2*hid_dim]
        mask = (x!=0)
        # a trick used to find the mask for the softmax
        mask = mask[:,:,0].bool()
        h = torch.tanh(self.fc(x))  # h = [BS, num_vec, 2*hid_dim]
        tmp = h @ self.u  # tmp = [BS, num_vec], unnormalized importance
        masked_tmp = tmp.masked_fill(~mask, -1e32)
        alpha = F.softmax(masked_tmp, dim=1)  # alpha = [BS, num_vec], normalized importance
        alpha = alpha.unsqueeze(-1)  # alpha = [BS, num_vec, 1]
        out = x * alpha  # out = [BS, num_vec, 2*hid_dim]
        out = out.sum(dim=1)  # out = [BS, 2*hid_dim]
        # pdb.set_trace()
        return out


class InstEmbedLayer(nn.Module):
    def __init__(self, data_dir, emb_dim):
        super(InstEmbedLayer, self).__init__()
        self.data_dir = data_dir
        path = os.path.join(self.data_dir, 'word2vec_recipes.bin')
        # model = KeyedVectors.load_word2vec_format(path, binary=True)
        wv = KeyedVectors.load(path, mmap='r')
        vec = torch.from_numpy(wv.vectors).float()
        # first three index has special meaning, see utils.py
        emb = nn.Embedding(vec.shape[0]+3, vec.shape[1], padding_idx=0)
        emb.weight.data[3:].copy_(vec)
        for p in emb.parameters():
            p.requires_grad = False
        self.embed_layer = emb
        print('==> Inst embed layer', emb)
    
    def forward(self, sent_list):  # 14557, 300 -> 35549, 300
        # sent_list [BS, max_len]
        # print(self.embed_layer(sent_list).shape) torch.Size([64, 20, 300])
        return self.embed_layer(sent_list)  # x=[BS, max_len, emb_dim]

class IngrEmbedLayer(nn.Module):
    def __init__(self, data_dir, emb_dim):
        super(IngrEmbedLayer, self).__init__()
        path = os.path.join(data_dir, 'vocab_ingr.txt')
        with open(path, 'r') as f:
            num_ingr = len(f.read().split('\n'))
            print('num_ingr = ', num_ingr)
        # first three index has special meaning, see utils.py
        emb = nn.Embedding(35549, emb_dim, padding_idx=0)
        # emb = nn.Embedding(num_ingr+3, emb_dim, padding_idx=0)
        self.embed_layer = emb
        print('==> Ingr embed layer', emb)
    
    def forward(self, sent_list):  # 1992, 300
        # sent_list [BS, max_len]  64, 20
        return self.embed_layer(sent_list)  # x=[BS, max_len, emb_dim]

class SentEncoder(nn.Module):
    def __init__(
        self, 
        data_dir,
        emb_dim,
        hid_dim, 
        with_attention=True, 
        source='inst'):
        assert source in ('inst', 'ingr')
        super(SentEncoder, self).__init__()
        if source=='inst':
            self.embed_layer = InstEmbedLayer(data_dir=data_dir, emb_dim=emb_dim) 
        elif source=='ingr':
            self.embed_layer = IngrEmbedLayer(data_dir=data_dir, emb_dim=emb_dim)
        self.rnn = nn.LSTM(
            input_size=emb_dim,
            hidden_size=hid_dim,
            bidirectional=True,
            batch_first=True)
        if with_attention:
            self.atten_layer = AttentionLayer(2*hid_dim)
        self.with_attention = with_attention
    
    def forward(self, sent_list):
        # sent_list [BS, max_len]
        x = self.embed_layer(sent_list)  # x=[BS, max_len, emb_dim]
        # print(sent_list)
        # lens = (sent_list==1).nonzero()[:,1] + 1
        lens = sent_list.count_nonzero(dim=1) + 1
        # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
        # print(lens.shape)
        sorted_len, sorted_idx = lens.sort(0, descending=True) # sorted_idx=[BS], for sorting
        _, original_idx = sorted_idx.sort(0, descending=False) # original_idx=[BS], for unsorting
        # print(sorted_idx.shape, x.shape)
        index_sorted_idx = sorted_idx.view(-1,1,1).expand_as(x) # sorted_idx=[BS, max_len, emb_dim]
        sorted_inputs = x.gather(0, index_sorted_idx.long()) # sort by num_words
        packed_seq = rnn.pack_padded_sequence(
            sorted_inputs, sorted_len.cpu().numpy(), batch_first=True)
        
        if self.with_attention:
            out, _ = self.rnn(packed_seq)
            y, _ = rnn.pad_packed_sequence(
                out, batch_first=True) # y=[BS, max_len, 2*hid_dim], currently in WRONG order!
            unsorted_idx = original_idx.view(-1,1,1).expand_as(y)
            output = y.gather(0, unsorted_idx).contiguous() # [BS, max_len, 2*hid_dim], now in correct order
            feat = self.atten_layer(output)
        else:
            _, (h,_) = self.rnn(packed_seq) # [2, BS, hid_dim], currently in WRONG order!
            h = h.transpose(0,1) # [BS, 2, hid_dim], still in WRONG order!
            # unsort the output
            unsorted_idx = original_idx.view(-1,1,1).expand_as(h)
            output = h.gather(0, unsorted_idx).contiguous() # [BS, 2, hid_dim], now in correct order
            feat = output.view(output.size(0), output.size(1)*output.size(2)) # [BS, 2*hid_dim]
            
        # print('sent', feat.shape) # [BS, 2*hid_dim]
        return feat


class SentEncoderFC(nn.Module):
    def __init__(
        self, 
        data_dir,
        emb_dim,
        hid_dim, 
        with_attention=True, 
        source='inst'):
        assert source in ('inst', 'ingr')
        super(SentEncoderFC, self).__init__()
        if source=='inst':
            self.embed_layer = InstEmbedLayer(data_dir=data_dir, emb_dim=emb_dim) 
        elif source=='ingr':
            self.embed_layer = IngrEmbedLayer(data_dir=data_dir, emb_dim=emb_dim)
        self.fc = nn.Linear(emb_dim, 2*hid_dim)
        if with_attention:
            self.atten_layer = AttentionLayer(2*hid_dim)
        self.with_attention = with_attention
    
    def forward(self, sent_list):
        # sent_list [BS, max_len]
        x = self.embed_layer(sent_list) # x=[BS, max_len, emb_dim]
        x = self.fc(x) # [BS, max_len, 2*hid_dim]
        if not self.with_attention:
            feat = x.sum(dim=1) # [BS, 2*hid_dim]
        else:
            feat = self.atten_layer(x) # [BS, 2*hid_dim]
        # print('ingredients', feat.shape)
        return feat


class DocEncoder(nn.Module):
    def __init__(self, sent_encoder, hid_dim, with_attention):
        super(DocEncoder, self).__init__()
        self.sent_encoder = sent_encoder
        self.rnn = nn.LSTM(
            input_size=2*hid_dim,
            hidden_size=hid_dim,
            bidirectional=True,
            batch_first=True)
        self.atten_layer_sent = AttentionLayer(2*hid_dim)
        self.with_attention = with_attention
    
    def forward(self, doc_list):
        # doc_list=[BS, max_len, max_len]
        embs = []
        lens = []
        for doc in doc_list:
            len_doc = doc.nonzero()[:,0].max().item() + 1
            lens.append(len_doc)
            emb_doc = self.sent_encoder(doc[:len_doc])  # [?, 2*hid_dim]
            embs.append(emb_doc)
        
        embs = sorted(embs, key=lambda x: -x.shape[0])  # [BS, [?, 2*hid_dim]]
        packed_seq = rnn.pack_sequence(embs)
        lens = torch.tensor(lens).long().to(embs[0].device)
        _, sorted_idx = lens.sort(0, descending=True)  # sorted_idx=[BS], for sorting
        _, original_idx = sorted_idx.sort(0, descending=False)  # original_idx=[BS], for unsorting

        if not self.with_attention:
            _, (h,_) = self.rnn(packed_seq) # [2, BS, hid_dim], currently in WRONG order!
            h = h.transpose(0,1) # [BS, 2, hid_dim], still in WRONG order!
            # unsort the output
            unsorted_idx = original_idx.view(-1,1,1).expand_as(h)
            output = h.gather(0, unsorted_idx).contiguous()  # [BS, 2, hid_dim], now in correct order
            feat = output.view(output.size(0), output.size(1)*output.size(2)) # [BS, 2*hid_dim]
        else:
            out, _ = self.rnn(packed_seq)
            y, _ = rnn.pad_packed_sequence(
                out, batch_first=True)  # y=[BS, max_valid_len, 2*hid_dim], currently in WRONG order!
            unsorted_idx = original_idx.view(-1,1,1).expand_as(y)
            output = y.gather(0, unsorted_idx).contiguous()  # [BS, 2, hid_dim], now in correct order
            feat = self.atten_layer_sent(output)

        # print('instructions', feat.shape)
        return feat


class TextEncoder(nn.Module):
    def __init__(
        self, data_dir, text_info, hid_dim, emb_dim, z_dim, with_attention, ingr_enc_type):
        super(TextEncoder, self).__init__()
        self.text_info = text_info
        if self.text_info == '111':
            self.sent_encoder = SentEncoder(
                data_dir,  # 300
                emb_dim,  # 300
                hid_dim,   # 1024
                with_attention, 
                source='inst')
            self.doc_encoder = DocEncoder(
                self.sent_encoder, 
                hid_dim, 
                with_attention
            )
            if ingr_enc_type=='rnn':
                self.ingr_encoder = SentEncoder(
                    data_dir,
                    emb_dim,
                    hid_dim, 
                    with_attention, 
                    source='ingr')
            elif ingr_enc_type == 'fc':
                self.ingr_encoder = SentEncoderFC(
                    data_dir,
                    emb_dim,
                    hid_dim, 
                    with_attention, 
                    source='ingr')
            self.bn = nn.BatchNorm1d((2+2+2)*hid_dim)
            self.fc = nn.Linear((2+2+2)*hid_dim, z_dim)
        
        elif self.text_info == '010':
            if ingr_enc_type=='rnn':
                self.ingr_encoder = SentEncoder(
                    data_dir,
                    emb_dim,
                    hid_dim, 
                    with_attention, 
                    source='ingr')
            elif ingr_enc_type == 'fc':
                self.ingr_encoder = SentEncoderFC(
                    data_dir,
                    emb_dim,
                    hid_dim, 
                    with_attention, 
                    source='ingr')
            self.bn = nn.BatchNorm1d(2*hid_dim)
            self.fc = nn.Linear(2*hid_dim, z_dim)
    
    def forward(self, recipe_list):

        title_list = recipe_list[0][0]

        ingredients_list = recipe_list[0][2]

        instructions_list = recipe_list[0][4]

        # title_list, ingredients_list, instructions_list = recipe_list
        if self.text_info == '111':
            feat_title = self.sent_encoder(title_list)
            feat_ingredients = self.ingr_encoder(ingredients_list)
            feat_instructions = self.doc_encoder(instructions_list)
            feat = torch.cat([feat_title, feat_ingredients, feat_instructions], dim=1)
            feat = torch.tanh(self.fc(self.bn(feat)))
        elif self.text_info == '010':
            feat_ingredients = self.ingr_encoder(ingredients_list)
            feat = torch.tanh(self.fc(self.bn(feat_ingredients)))
        # print('recipe', feat.shape)
        return feat


class Resnet(nn.Module):
    def __init__(self, ckpt_path=None):
        super(Resnet, self).__init__()
        resnet = models.resnet50(pretrained=False)
        num_feat = resnet.fc.in_features
        resnet.fc = nn.Linear(num_feat, 101)
        if ckpt_path:
            resnet.load_state_dict(clean_state_dict(torch.load(ckpt_path)))
        modules = list(resnet.children())[:-1]  # we do not use the last fc layer.
        self.encoder = nn.Sequential(*modules)
    
    def forward(self, image_list):
        BS = image_list.shape[0]
        return self.encoder(image_list).view(BS, -1)

class ImageEncoder(nn.Module):
    def __init__(self, z_dim, ckpt_path=None):
        super(ImageEncoder, self).__init__()
        self.resnet = Resnet(ckpt_path)
        self.bottleneck = nn.Sequential(
            nn.BatchNorm1d(2048),
            nn.Linear(2048, z_dim),
            nn.Tanh()
        )
    
    def forward(self, image_list):
        feat = self.resnet(image_list)
        feat = self.bottleneck(feat)
        # print('image', feat.shape)
        return feat

TextEncoder有被修改过，因为输入的数据recipe_list长度为7，改成了其中对应的标题、食材和操作：

# title_list, ingredients_list, instructions_list = recipe_list
title_list = recipe_list[0][0]
ingredients_list = recipe_list[0][2]
instructions_list = recipe_list[0][4]

运行结果及报错内容

  0%|          | 0/400000 [00:00<?, ?it/s]tensor(0.5797, device='cuda:0', grad_fn=<MeanBackward0>)
save retrieval model to: E:\CookGAN\retrieval_model\wandb\run-20220824_153726-s0ojwz5g\files/00000000.ckpt
  0%|          | 1/400000 [00:08<930:13:11,  8.37s/it]tensor(0.4852, device='cuda:0', grad_fn=<MeanBackward0>) 
Traceback (most recent call last):
  File "E:/CookGAN/retrieval_model/train_retrieval.py", line 206, in <module>
    train(args, batch_idx, text_encoder, image_encoder, optimizer, train_loader, device='cuda')
  File "E:/CookGAN/retrieval_model/train_retrieval.py", line 153, in train
    loss.backward()
  File "C:\Users\10706\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\_tensor.py", line 396, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "C:\Users\10706\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\autograd\__init__.py", line 175, in backward
    allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass
IndexError: select(): index 20 out of range for tensor of size [20, 32, 300] at dimension 0

此处的[20, 32, 300]分别代表：

embedding的输入数据的最大长度
batch size
embedding的emb_dim

尝试过batchsize不变同时缩小batch size，执行次数随着batchsize减小而增加，只能猜测是输入数据问题......
有大佬可以帮忙吗？

写回答
好问题 0 提建议
追加酬金
关注问题
分享
邀请回答
编辑收藏删除
收藏举报

3条回答默认最新

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
来灵 2022-08-24 17:24
关注
获得1.40元问题酬金

在运行报错的代码行相关函数里，把异常（IndexError）捕获和处理添加上

解决无用
评论打赏
分享
举报

评论

按下Enter换行，Ctrl+Enter发表内容

报告相同问题？

关注问题

Pytorch框架nn.RNN训练时反向传播报错 c++ python pytorch
2023-03-22 16:17

回答 5 已采纳基于最新版ChatGPT4的回答，望采纳!!!有其他问题也可以询问我哦、”(最新版更智能，功能更加强大) 这个问题是由于在循环中，计算图被保留了，但在每次循环时，其实我们需要重新构建计算图。因此，需要
实现pytorch时出现空参数问题 pytorch 机器学习深度学习
2022-10-24 15:49

回答 1 已采纳 int是什么鬼？改成__init__，不然你都没有初始化model，导致你的model就是空的
pytorch torch.jit.save c++ python pytorch
2022-12-24 18:45

回答 1 已采纳你看下这篇博客吧, 应该有用👉 ：PyTorch模型保存torch.save()与加载torch.load()你还可以看下pytorch参考手册中的 pytorch save_to_buffer()
pytorch loss.backward() 报错RuntimeError: select(): index 0 out of range for tensor of size [0, 1]解决方法
2022-01-10 12:08

qq_45475106的博客 pytorch loss.backward() 报错RuntimeError: select(): index 0 out of range for tensor of size [0, 1]解决方法
pytorch提高正确率，反向传播不会写 python pytorch 人工智能
2022-08-03 09:05

回答 3 已采纳 反向传播那部分的代码修改如下： loss.backward(retain_graph=True) optimizer.step() optimizer.zero_grad() loss.backwar
安装pytorch出现问题求解决 python pytorch 深度学习
2023-02-12 15:35

回答 2 已采纳更新一下conda再重试。conda update -n base condaconda update --all
求解！pytorch运行过程中出现out of memory python pytorch 深度学习
2022-03-06 00:23

回答 2 已采纳你这个是显存满了
PyTorch模型训练梯度反向传播遇到的几个报错解决办法
2021-07-08 16:41

森尼嫩豆腐的博客文章目录相关代码报错1：RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn报错2：IndexError: invalid index of a 0-dim tensor....这篇是关于PyTorch模型训练时
按照您的教程里在anaconda安装pytorch时出现问题，急救 pytorch
2021-10-07 22:20

回答 1 已采纳可以试试使用pip从官网安装 pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio===0.9.1 -f http
pytorch出现问题，v1.summary.FileWriter is not compatible with eager execution pytorch tensorflow 神经网络
2021-09-01 15:50

回答 1 已采纳 FileWriter类提供的是在给定目录中创建事件文件，并向这个文件添加摘要和事件的机制。当启用了eager execution的时候FileWriter就会报这个错。我看不到你的Logger类具体的
pytorch训练时怎么提高显卡的利用率？ pytorch 深度学习自然语言处理
2022-03-08 10:24

回答 2 已采纳 work number设置到cpu最大核心数，batch size 搞大点，你的显存才用了2G，还有那么多呢，不过显卡利用率低很正常，又不是挖矿，显卡很多时候要等硬盘或者内存读取信息呢。你把这里换成c
PyTorch初学错误集锦
2020-06-02 15:38

白振峰的博客 running_loss += loss.data[0] 是pytorch0.3版本代码,在0.4-0.5版本的pytorch会出现警告,不会报错,但是在0.5版本以上就会报错,版本更新问题。解决方法: # 将原语句： running_loss += loss.data[0]
TextCNN 反向传播过程速度太慢 python pytorch 自然语言处理
2023-04-10 16:41

回答 2 已采纳以下内容部分参考ChatGPT模型：可能是由于反向传播中梯度下降算法的迭代次数过多导致速度变慢。可以尝试以下几种方法来优化：减少网络层数和神经元数量，降低模型的复杂度。使用更高效的优化器，如Ad
【Pytorch】使用pytorch进行张量计算、自动求导和神经网络构建
2023-03-22 20:29

吉始的博客 pytorch，它是一个基于Python的开源深度学习框架，它提供了==两个核心功能：张量计算和自动求导==。
【pytorch】常用代码
2024-02-25 20:33

giao客的博客 nn.L1lose nn.MSELoss() nn.CrossEntropyLoss() loss_ = loss_.sum() / loss_.flatten().shape[0] 模型保存与加载 torch.save() 设备 cuda 一些补充查看输入输出维度动态图与静态图 random.seed(42) 条件与概率 ...
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
系统已结题 9月1日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 8月24日

悬赏问题

¥20 关于游戏c++语言代码问题
¥15 如何制作永久二维码，最好是微信也可以扫开的。（相关搜索：管理系统）
¥15 delphi indy cookie 有效期
¥15 labelme打不开怎么办
¥35 按照图片上的两个任务要求，用keil5写出运行代码，并在proteus上仿真成功，🙏
¥15 免费的电脑视频剪辑类软件如何盈利
¥30 MPI读入tif文件并将文件路径分配给各进程时遇到问题
¥15 pycharm中导入模块出错
¥20 Ros2 moveit2 Windows环境配置，有偿，价格可商议。
¥15 有关“完美的代价”问题的代码漏洞