问题遇到的现象和发生背景
我在运行程序的训练部分的for i , (input, label) in enumerate(dataloader)是正常的,却在验证阶段的读取部分for i , (input, label) in enumerate(dataloader)报了indexerror:list index out of range错误,一直解决不了问题,还希望有大佬能指导一下。
问题相关代码,请勿粘贴截图
def validate(model, classifier, val_loader, criterion, epoch):
# switch to evaluate mode
model.eval()
classifier.eval()
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
acc = AverageMeter()
with torch.no_grad():
end = time.time()
# for batch_idx, (input, target) in enumerate(tqdm(val_loader, disable=False)): #此处为报错点
for batch_idx, (input, target) in enumerate(val_loader): #此处为报错点
#得到输入和标签信息
input, target = input.float(), target.long()
#将得到张量信息reshape
input, target = input.reshape(-1, 3,224,224) , target.reshape(-1, )
#放到GPU上
input, target = input.cuda(), target.cuda()
#计算output
feats = model(input)
output = classifier(feats)
loss = criterion(output, target)
#计算loss和acc
batch_size = target.size(0)
losses.update(loss.item(), batch_size)
pred = torch.argmax(output, dim=1)
acc.update(torch.sum(target == pred).item() / batch_size , batch_size)
# 实耗时间计算
batch_time.update(time.time() - end)
end = time.time()
#statistic
if (batch_idx + 1)% 10 == 0:
print('Val: [{0}][{1}/{2}]\t'
'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'DT {data_time.val:.3f} ({data_time.avg:.3f})\t'
'loss {loss.val:.3f} ({loss.avg:.3f})\t'
'acc {acc.val:.3f} ({acc.avg:.3f})'.format(
epoch, batch_idx + 1, len(val_loader), batch_time=batch_time, data_time=data_time, loss=losses,
acc=acc))
return losses.avg, acc.avg
def train(model, classifier, train_loader, optimizer, epoch):
model.train()
classifier.train()
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
acc = AverageMeter()
end = time.time
for batch_idx, (input, target ) in enumerate(tqdm(train_loader, disable=False)):
# Get inputs and target
input, target = input.float(), target.long()
#Reshape augmented tensors
input, target = input.reshape(-1, 3, 224, 224), target.reshape(-1, )
# Move the variables to cuda
input, target = input.cuda(), target.cuda()
# compute output
feats = model(input)
output = classifier(feats)
## Calculate the loss
loss = F.cross_entropy(output, target, reduction= 'none')
loss_sorted, indices = torch.sort(loss, descending=True)
# Select top_K values for determining the hardness in mini-batch (alpha x batch_size)
top_k = round(0.1 * target.size(0))
# Caculate the adaptive hardness threshold
a = 0.7
b = 0.2
thres = a*(1-(batch_idx/len(train_loader))) + b
# Select the hardness in each mini-batch based on the threshold (thres)
hard_samples = loss_sorted[0:top_k]
total_sum_hard_samples = sum(hard_samples)
# Check whether total sum exceeds the threshold and update the loss accordingly (Eq. 2 in the paper)
if total_sum_hard_samples > (thres * sum(loss_sorted)):
output = output[indices, :]
target = target[indices]
top_k_output = output[0:top_k]
tok_k_target = target[0:top_k]
loss = F.cross_entropy(top_k_output, tok_k_target, reduction='mean')
print(' curriculum loss')
else:
loss = F.cross_entropy(output, target, reduction='mean')
# compute gradient and do SGD step #####
optimizer.zero_grad()
loss.backward()
optimizer.step()
#compute loss and accuracy #####
batch_size = target.size(0)
losses.update(loss.item(), batch_size)
pred = torch.argmax(output, dim=1)
acc.update(torch.sum(target == pred).item() / batch_size, batch_size)
# measure elapsed time ###
batch_time.update(time.time() - end())
end = time.time
# print statistics and write summary every N batch
if (batch_idx + 1) % 10 ==0:
print('Train: [{0}][{1}]/[{2}]\t'
'BT {batch_time.val:.3f} ({batch_time.avg:.3f}]\t'
'DT {data_time.val:.3f} ({data_time.avg:.3f})\t'
'loss {loss.val:.3f} ({loss.avg:.3f})\t'
'acc {acc.val:.3f} ({acc.avg:.3f})'.format(
epoch, batch_idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses,
acc=acc))
return losses.avg,acc.avg
def main():
mean , std = [0.485, 0.456, 0.406] , [0.229, 0.224, 0.225]
transforms = T.Compose([
T.Resize((224,224)),
T.ToTensor(),
T.Normalize(mean, std)
])
dataset = ImageFolder(
"H:\panqiwei-speaker\\birdsong recognition\\birdsimages",
transform = transforms,
)
length = len(dataset)
tr_len = int(0.7 * length)
val_len = int((length - tr_len) / 2)
te_len = length - tr_len - val_len
tr_dataset, val_dataset, te_dataset = random_split(dataset, [tr_len, val_len, te_len])
indices = list(range(tr_len))
split = int(np.floor(val_len))
np.random.shuffle(indices)
train_idx, val_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)
print('total number of train samples in the dataset', len(train_idx))
print('total number of val samples in the dataset', len(val_idx))
#设置参数
device = 'cuda' if torch.cuda.is_available() else 'cpu'
params ={
'batch_size': 128,
'lr': 3e-4,
'pre_training': False,
'device': device,
'checkpoint': False,
'epochs': 200,
'warm_up': False
}
#加载
tr_loader = DataLoader(tr_dataset, batch_size=params['batch_size'],
shuffle=True if train_sampler is None else False,
num_workers=0, pin_memory=True )
val_loader = DataLoader(val_dataset, batch_size=params['batch_size'],sampler=val_sampler,
shuffle=False, num_workers=0, pin_memory=True)
te_loader = DataLoader(te_dataset, batch_size=params['batch_size'])
loaders = [tr_loader, val_loader, te_loader]
#网络模型
# model = resnet34(pretrained=params['pre_training'], num_class=20).to(device)
# train_epochs(model, loaders, params)
model = net.TripletNet_Finetune('resnet18')
state_dict = torch.load('./models/model_9_0.0289.pt')
new_state_dict = OrderedDict()
for k ,v in state_dict['model'].items():
name = k[7:]
new_state_dict[name] = v
print('==> loading pre-trained model')
model.load_state_dict(new_state_dict,strict= False)
idx = 0
for layer_name, param in model.named_parameters():
print(layer_name, '-->',idx)
idx += 1
for name, param in enumerate(model.named_parameters()):
if name < 0:
print("module", name,"was frozen")
param = param[1]
param.requires_grad = False
else:
print("module", name, "was not frozen")
param = param[1]
param.requires_grad = True
print('==> finetuning classification')
classifier = net.FinetuneResNet(20)
# loss
criterion = nn.CrossEntropyLoss()
if torch.cuda.is_available():
model = model.cuda()
criterion = criterion.cuda()
classifier = classifier.cuda()
cudnn.benchmark = True
#optimizer
optimizer = optim.Adam(filter(lambda p : p.requires_grad, list(model.parameters()) + list(classifier.parameters())),lr= 3e-4,
betas=(0.9,0.999), weight_decay=1e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40, 60, 80], gamma = 0.95)
#training model
start_epoch = 1
best_val_acc = -1
if os.path.isfile('./Save_Result'):
print("=> loading checkpoint '{}'".format('./Save_Result') )
checkpoint = torch.load('./Save_Result')
model.load_state_dict(checkpoint['model'])
classifier.load_state_dict(checkpoint['classifier'])
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoh'] +1
best_val_acc = checkpoint['val_acc']
print("=> loaded checkpoint '{}' (epoch {})"
.format('./Save_Result', checkpoint['epoch']))
del checkpoint
torch.cuda.empty_cache()
else:
print("=> no checkpoint found at '{}'".format('./Save_Result'))
#start log
with open(os.path.join('./Save_Results/', 'fine_tuned_results.csv'), 'w') as f:
f.write('epoch, train_loss, train_acc, val_loss, val_acc\n')
#routine
for epoch in range(start_epoch, 100 +1):
time_start = time.time()
train_losses, train_acc = train(model, classifier, tr_loader, optimizer, epoch)
print('Epoch time:{:.2f} s.'.format(time.time() - time_start))
print("==> validating the fine-tuned model ...")
val_losses, val_acc = validate(model, classifier, val_loader, criterion, epoch)
#log result
with open(os.path.join('./Save_Results/', 'fine_tuned_results.csv'), 'a') as f:
f.write('%03d,%0.6f,%0.6f,%0.6f,%0.6f,\n' % ((epoch + 1),train_losses, train_acc, val_losses, val_acc))
scheduler.step()
#save model every 10 epochs
if epoch % 10 == 0:
print('==> Saving ...')
state = {
'args': args,
'model': model.state_dict(),
'classifier': classifier.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
'train_loss': train_losses,
'train_acc': train_acc,
'val_acc': val_acc,
'val_loss': val_losses
}
torch.save(state, '{}/fine_tuned_model_{}.pt'.format('./Save_Results/', epoch))
#save model for the best val
if val_acc > best_val_acc:
print('==> Saving...')
state = {
'args': args,
'model': model.state_dict(),
'classifier': classifier.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
'train_loss': train_losses,
'train_acc': train_acc,
'val_acc': val_acc,
'val_loss': val_losses
}
torch.save(state, '{}/best_fine_tuned_model_{}.pt'.format('./Save_Results/', epoch))
best_val_acc = val_acc
#help release GPU memory
del state
torch.cuda.empty_cache()
if __name__ == "__main__" :
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
main()
运行结果及报错内容
IndexError:list index out of range