Big就是Big 2022-03-18 14:51 采纳率: 0%
浏览 1559

for i , (input, label) in enumerate(dataloader)为什么会报indexerror:list index out of range

问题遇到的现象和发生背景

我在运行程序的训练部分的for i , (input, label) in enumerate(dataloader)是正常的,却在验证阶段的读取部分for i , (input, label) in enumerate(dataloader)报了indexerror:list index out of range错误,一直解决不了问题,还希望有大佬能指导一下。

问题相关代码,请勿粘贴截图

def validate(model, classifier, val_loader, criterion, epoch):

    # switch to evaluate mode
    model.eval()
    classifier.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    with torch.no_grad():

        end = time.time()

        # for batch_idx, (input, target) in enumerate(tqdm(val_loader, disable=False)):   #此处为报错点
        for batch_idx, (input, target) in enumerate(val_loader):                                        #此处为报错点

            #得到输入和标签信息
            input, target = input.float(), target.long()

            #将得到张量信息reshape
            input, target = input.reshape(-1, 3,224,224) , target.reshape(-1, )

            #放到GPU上
            input, target = input.cuda(), target.cuda()

            #计算output
            feats = model(input)
            output = classifier(feats)
            loss = criterion(output, target)

            #计算loss和acc
            batch_size = target.size(0)
            losses.update(loss.item(), batch_size)

            pred = torch.argmax(output, dim=1)
            acc.update(torch.sum(target == pred).item() / batch_size , batch_size)

            # 实耗时间计算
            batch_time.update(time.time() - end)
            end = time.time()

            #statistic
            if (batch_idx + 1)% 10 == 0:
                print('Val: [{0}][{1}/{2}]\t'
                'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'DT {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'loss {loss.val:.3f} ({loss.avg:.3f})\t'
                      'acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                    epoch, batch_idx + 1, len(val_loader), batch_time=batch_time, data_time=data_time, loss=losses,
                    acc=acc))
    return losses.avg, acc.avg

def train(model, classifier, train_loader, optimizer, epoch):
    model.train()
    classifier.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    end = time.time

    for batch_idx, (input, target ) in enumerate(tqdm(train_loader, disable=False)):

        # Get inputs and target
        input, target = input.float(), target.long()

        #Reshape augmented tensors
        input, target = input.reshape(-1, 3, 224, 224), target.reshape(-1, )
        
        # Move the variables to cuda
        input, target = input.cuda(), target.cuda()
        
        # compute output
        feats = model(input)
        output = classifier(feats)
        
        ## Calculate the loss
        loss = F.cross_entropy(output, target, reduction= 'none')

        loss_sorted, indices = torch.sort(loss, descending=True)

        # Select top_K values for determining the hardness in mini-batch (alpha x batch_size)
        top_k = round(0.1 * target.size(0))

        # Caculate the adaptive hardness threshold 
        a = 0.7
        b = 0.2
        thres = a*(1-(batch_idx/len(train_loader))) + b

        # Select the hardness in each mini-batch based on the threshold (thres)
        hard_samples = loss_sorted[0:top_k]
        total_sum_hard_samples = sum(hard_samples)

        # Check whether total sum exceeds the threshold and update the loss accordingly (Eq. 2 in the paper)
        if total_sum_hard_samples > (thres * sum(loss_sorted)):
            output = output[indices, :]
            target = target[indices]
            top_k_output = output[0:top_k]
            tok_k_target = target[0:top_k]
            loss = F.cross_entropy(top_k_output, tok_k_target, reduction='mean')
            print(' curriculum loss')
        else:
            loss = F.cross_entropy(output, target, reduction='mean')

        # compute gradient and do SGD step #####
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #compute loss and accuracy #####
        batch_size = target.size(0)
        losses.update(loss.item(), batch_size)

        pred = torch.argmax(output, dim=1)
        acc.update(torch.sum(target == pred).item() / batch_size, batch_size)

        # measure elapsed time ###
        batch_time.update(time.time() - end())
        end = time.time

        # print statistics and write summary every N batch
        if (batch_idx + 1) % 10 ==0:
            print('Train: [{0}][{1}]/[{2}]\t'
            'BT {batch_time.val:.3f} ({batch_time.avg:.3f}]\t'
            'DT {data_time.val:.3f} ({data_time.avg:.3f})\t'
            'loss {loss.val:.3f} ({loss.avg:.3f})\t'
            'acc {acc.val:.3f} ({acc.avg:.3f})'.format(
            epoch, batch_idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses,
                acc=acc))
    return losses.avg,acc.avg

def main():

    mean , std = [0.485, 0.456, 0.406] , [0.229, 0.224, 0.225]
    transforms = T.Compose([
        T.Resize((224,224)),
        T.ToTensor(),
        T.Normalize(mean, std)
    ])
    dataset = ImageFolder(
        "H:\panqiwei-speaker\\birdsong recognition\\birdsimages",
        transform = transforms,
    )
    
    length = len(dataset)
    tr_len = int(0.7 * length)
    val_len = int((length - tr_len) / 2)
    te_len = length - tr_len - val_len

    tr_dataset, val_dataset, te_dataset = random_split(dataset, [tr_len, val_len, te_len])

    indices = list(range(tr_len))
    split = int(np.floor(val_len))
    np.random.shuffle(indices)
    train_idx, val_idx = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_idx)
    val_sampler = SubsetRandomSampler(val_idx)

    print('total number of train samples in the dataset', len(train_idx))
    print('total number of val samples in the dataset', len(val_idx))

    #设置参数
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    params ={
        'batch_size': 128,
        'lr': 3e-4,
        'pre_training': False,
        'device': device,
        'checkpoint': False,
        'epochs': 200,
        'warm_up': False        
    }

    #加载
    tr_loader = DataLoader(tr_dataset, batch_size=params['batch_size'],
     shuffle=True if train_sampler is None else False,
     num_workers=0, pin_memory=True )
    val_loader = DataLoader(val_dataset, batch_size=params['batch_size'],sampler=val_sampler,
    shuffle=False, num_workers=0, pin_memory=True)
    te_loader = DataLoader(te_dataset, batch_size=params['batch_size'])
    loaders = [tr_loader, val_loader, te_loader]

    #网络模型
    # model = resnet34(pretrained=params['pre_training'], num_class=20).to(device)
    # train_epochs(model, loaders, params)

    model = net.TripletNet_Finetune('resnet18')

    state_dict = torch.load('./models/model_9_0.0289.pt')

    new_state_dict = OrderedDict()
    
    for k ,v in state_dict['model'].items():
        name = k[7:]
        new_state_dict[name] = v

    print('==> loading pre-trained model') 
    model.load_state_dict(new_state_dict,strict= False)


    idx = 0
    for layer_name, param in model.named_parameters():
        print(layer_name, '-->',idx)
        idx += 1

    for name, param in enumerate(model.named_parameters()):
        if name < 0:
            print("module", name,"was frozen")
            param = param[1]
            param.requires_grad = False

        else:
            print("module", name, "was not frozen")
            param = param[1]
            param.requires_grad = True
    
    print('==> finetuning classification')
    classifier = net.FinetuneResNet(20)

    # loss 
    criterion = nn.CrossEntropyLoss()

    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
        classifier = classifier.cuda()
        cudnn.benchmark = True

    #optimizer 
    optimizer = optim.Adam(filter(lambda p : p.requires_grad, list(model.parameters()) + list(classifier.parameters())),lr= 3e-4,
    betas=(0.9,0.999), weight_decay=1e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40, 60, 80], gamma = 0.95)

    #training model
    start_epoch = 1
    best_val_acc = -1

    if os.path.isfile('./Save_Result'):
        print("=> loading checkpoint '{}'".format('./Save_Result') )
        checkpoint = torch.load('./Save_Result')
        model.load_state_dict(checkpoint['model'])
        classifier.load_state_dict(checkpoint['classifier'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoh'] +1 
        best_val_acc = checkpoint['val_acc']
        print("=> loaded checkpoint '{}' (epoch {})"
                .format('./Save_Result', checkpoint['epoch']))
        del checkpoint
        torch.cuda.empty_cache()
    else:
        print("=> no checkpoint found at '{}'".format('./Save_Result'))

    #start log 
    with open(os.path.join('./Save_Results/', 'fine_tuned_results.csv'), 'w') as f:
        f.write('epoch, train_loss, train_acc, val_loss, val_acc\n')

    #routine
    for epoch in range(start_epoch, 100 +1):
        
        time_start = time.time()

        train_losses, train_acc = train(model, classifier, tr_loader, optimizer, epoch)
        print('Epoch time:{:.2f} s.'.format(time.time() - time_start))

        print("==> validating the fine-tuned model ...")
        val_losses, val_acc = validate(model, classifier, val_loader, criterion, epoch)

        #log result
        with open(os.path.join('./Save_Results/', 'fine_tuned_results.csv'), 'a') as f:
            f.write('%03d,%0.6f,%0.6f,%0.6f,%0.6f,\n' % ((epoch + 1),train_losses, train_acc, val_losses, val_acc))

        scheduler.step()

        #save model every 10 epochs
        if epoch % 10 == 0:
            print('==> Saving ...')
            state = {
                'args': args,
                'model': model.state_dict(),
                'classifier': classifier.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch,
                'train_loss': train_losses,
                'train_acc': train_acc,
                'val_acc': val_acc,
                'val_loss': val_losses
            }
            torch.save(state, '{}/fine_tuned_model_{}.pt'.format('./Save_Results/', epoch))

        #save model for the best val 
        if val_acc > best_val_acc:
            print('==> Saving...')
            state = {
                'args': args,
                'model': model.state_dict(),
                'classifier': classifier.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch,
                'train_loss': train_losses,
                'train_acc': train_acc,
                'val_acc': val_acc,
                'val_loss': val_losses
            }
            torch.save(state, '{}/best_fine_tuned_model_{}.pt'.format('./Save_Results/', epoch))
            best_val_acc = val_acc

            #help release GPU memory
            del state
        torch.cuda.empty_cache()

if __name__ == "__main__" :

    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    torch.cuda.manual_seed_all(seed)

    main()
运行结果及报错内容

IndexError:list index out of range

我的解答思路和尝试过的方法
我想要达到的结果
  • 写回答

4条回答 默认 最新

  • 深度狂想 2023-05-18 16:16
    关注

    我也有相似的错误,最后发现是,测试时数据集用的是划分给测试的数据集,但分配给各个客户端用的是训练集的划分方案,就是说我用了训练集里的下标去取测试集的数据,自然可能越界。

    评论

报告相同问题?

问题事件

  • 创建了问题 3月18日

悬赏问题

  • ¥50 求一位精通京东相关开发的专家
  • ¥100 求懂行的大ge给小di解答下!
  • ¥15 pcl运行在qt msvc2019环境运行效率低于visual studio 2019
  • ¥15 MAUI,Zxing扫码,华为手机没反应。可提高悬赏
  • ¥15 python运行报错 ModuleNotFoundError: No module named 'torch'
  • ¥100 华为手机私有App后台保活
  • ¥15 sqlserver中加密的密码字段查询问题
  • ¥20 有谁能看看我coe文件到底哪儿有问题吗?
  • ¥20 我的这个coe文件到底哪儿出问题了
  • ¥15 matlab使用自定义函数时一直报错输入参数过多