累积多个样本和大批量样本训练结果应该一致的,但是实际的累积的不能正常训练(loss不下降),大批量样本可以正常训练(正常收敛)。网络中已经注释掉了BatchNorm。是因为优化器的原因吗?
#累积600个样本
#batch_size=1
def Train_batch(self,epoch):
self.net.train()
loss_ = 0
self.optim.zero_grad()
for i,(data, label) in enumerate(self.train):
data = data.cuda() if tr.cuda.is_available() else data
label = label.cuda() if tr.cuda.is_available() else label
_,prediction = self.net(data)
loss = train.compute_loss(prediction, label)
loss.backward() #计算梯度
if (i+1)%600 == 0:
for param in self.net.parameters():
if param.grad is not None:
param.grad /= 600
#更新参数
self.optim.step()
self.optim.zero_grad()
loss_ += loss.item()
return loss_ / len(self.train) # 计算epoch内 loss 的均值
#batch_size=600
def Train_batch(self,epoch):
self.net.train()
loss_ = 0
batch_count = 0 # 记录批次数量
for data, label in self.train:
data = data.cuda() if tr.cuda.is_available() else data
label = label.cuda() if tr.cuda.is_available() else label
self.optim.zero_grad()
_,prediction = self.net(data)
loss = train.compute_loss(prediction, label)
loss.backward() #计算梯度
self.optim.step() #更新参数
loss_ = loss_ + loss.item()
batch_count += 1 # 统计 batch 数量
return loss_ / batch_count # 计算 loss 的均值