初学者,拿网上的代码跑自己的数据集,使用Resnet50预训练,调过学习率、batch_size、img_size的参数、换过优化器不过训练效果都差不多,还有什么办法提高准确率呢?
以下是训练代码:
import time
import torch
from torch import nn
from torch.utils.data import DataLoader
from PIL import Image
import torchvision.transforms as transforms
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from torchvision.models import resnet50 # ResNet系列
from torchvision import models
from torch.utils.data import Dataset
transform_BZ= transforms.Normalize(
mean=[0.44660836, 0.30800995, 0.21277349],# 取决于数据集
std=[0.18726136, 0.13087553, 0.09021662]
)
class LoadData(Dataset):
def __init__(self, txt_path, train_flag=True):
self.imgs_info = self.get_images(txt_path)
self.train_flag = train_flag
self.img_size = 512
self.train_tf = transforms.Compose([
transforms.Resize(self.img_size),
transforms.RandomHorizontalFlip(),#对图片进行随机的水平翻转
transforms.RandomVerticalFlip(),#随机的垂直翻转
transforms.ToTensor(),#把图片改为Tensor格式
transform_BZ#图片标准化的步骤
])
self.val_tf = transforms.Compose([##简单把图片压缩了变成Tensor模式
transforms.Resize(self.img_size),
transforms.ToTensor(),
transform_BZ#标准化操作
])
def get_images(self, txt_path):
with open(txt_path, 'r', encoding='utf-8') as f:
imgs_info = f.readlines()
imgs_info = list(map(lambda x:x.strip().split('\t'), imgs_info))
return imgs_info#返回图片信息
def padding_black(self, img): # 如果尺寸太小可以扩充
w, h = img.size
scale = self.img_size / max(w, h)
img_fg = img.resize([int(x) for x in [w * scale, h * scale]])
size_fg = img_fg.size
size_bg = self.img_size
img_bg = Image.new("RGB", (size_bg, size_bg))
img_bg.paste(img_fg, ((size_bg - size_fg[0]) // 2,
(size_bg - size_fg[1]) // 2))
img = img_bg
return img
def __getitem__(self, index):#返回真正想返回的东西
img_path, label = self.imgs_info[index]
img = Image.open(img_path)#打开图片
img = img.convert('RGB')#转换为RGB 格式
img = self.padding_black(img)
if self.train_flag:
img = self.train_tf(img)
else:
img = self.val_tf(img)
label = int(label)
return img, label
def __len__(self):
return len(self.imgs_info)
def WriteData(fname, *args):
with open(fname, 'a+') as f:
for data in args:
f.write(str(data)+"\t")
f.write("\n")
def train(dataloader, model, loss_fn, optimizer,device):
size = len(dataloader.dataset)
avg_loss = 0
# 从数据加载器中读取batch(一次读取多少张,即批次数),X(图片数据),y(图片真实标签)。
for batch, (X, y) in enumerate(dataloader):#固定格式:batch:第几批数据,不是批次大小,(X,y):数值用括号
# print(size)
# 将数据存到显卡
X, y = X.to(device), y.to(device)
# 得到预测的结果pred
pred = model(X)
loss = loss_fn(pred, y)
avg_loss += loss
# 反向传播,更新模型参数
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 每训练10次,输出一次当前信息
if batch % 10 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
# 当一个epoch完了后返回平均 loss
avg_loss /= size
avg_loss = avg_loss.detach().cpu().numpy()
return avg_loss
def validate(dataloader, model, loss_fn, device):
size = len(dataloader.dataset)
# 将模型转为验证模式
model.eval()
# 初始化test_loss 和 correct, 用来统计每次的误差
test_loss, correct = 0, 0
# 测试时模型参数不用更新,所以no_gard()
# 非训练, 推理期用到
with torch.no_grad():
# 加载数据加载器,得到里面的X(图片数据)和y(真实标签)
for X, y in dataloader:
# 将数据转到GPU
X, y = X.to(device), y.to(device)
# 将图片传入到模型当中就,得到预测的值pred
pred = model(X)
# 计算预测值pred和真实值y的差距
test_loss += loss_fn(pred, y).item()
# 统计预测正确的个数(针对分类)
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= size
correct /= size
print(f"correct = {correct}, Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
return correct, test_loss
if __name__=='__main__':
batch_size = 16
# # 给训练集和测试集分别创建一个数据集加载器
train_data = LoadData("/content/drive/MyDrive/data/train2500.txt", True)
valid_data = LoadData("/content/drive/MyDrive/data/test2500.txt", False)
train_dataloader = DataLoader(dataset=train_data, num_workers=4, pin_memory=True, batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(dataset=valid_data, num_workers=4, pin_memory=True, batch_size=batch_size)
# 如果显卡可用,则用显卡进行训练
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
finetune_net = resnet50(num_classes=5).to(device)
state_dict=torch.load("/content/drive/MyDrive/data/resnet50_best.pth",map_location='cpu')
finetune_net.load_state_dict(state_dict)
nn.init.xavier_normal_(finetune_net.fc.weight)
parms_1x = [value for name, value in finetune_net.named_parameters()
if name not in ["fc.weight", "fc.bias"]]
# 最后一层10倍学习率
parms_10x = [value for name, value in finetune_net.named_parameters()
if name in ["fc.weight", "fc.bias"]]
finetune_net = finetune_net.to(device)
# 定义损失函数,计算相差多少,交叉熵,
loss_fn = nn.CrossEntropyLoss()
# 定义优化器,用来训练时候优化模型参数,随机梯度下降法
learning_rate = 1e-3
optimizer = torch.optim.Adam([
{
'params': parms_1x
},
{
'params': parms_10x,
'lr': learning_rate * 10
}], lr=learning_rate)
epochs = 3
loss_ = 10
save_root = "/content/drive/MyDrive/data/"
for t in range(epochs):
print(f"Epoch {t + 1}\n-------------------------------")
time_start = time.time()
avg_loss = train(train_dataloader, finetune_net, loss_fn, optimizer, device)
time_end = time.time()
print(f"train time: {(time_end - time_start)}")
val_accuracy, val_loss = validate(valid_dataloader, finetune_net,loss_fn, device)
# 写入数据
WriteData(save_root + "resnet50_3.txt",
"epoch", t,
"train_loss", avg_loss,
"val_loss", val_loss,
"val_accuracy", val_accuracy)
if t % 5 == 0:
torch.save(finetune_net.state_dict(), save_root + "resnet50_3_epoch" + str(t) + "_loss_" + str(avg_loss) + ".pth")
torch.save(finetune_net.state_dict(), save_root + "resnet50_3_last.pth")
if avg_loss < loss_:
loss_ = avg_loss
torch.save(finetune_net.state_dict(), save_root + "resnet50_3_best.pth")