於菟601 2022-05-13 14:26 采纳率: 50%
浏览 88
已结题

字符识别训练出来的模型识别率太差,是否配置有误?

问题遇到的现象和发生背景

正在进行一个字符识别模型的训练,跑了两个多小时最后保存的模型却识别率极低,而且不见中文(省)结果:

img

怀疑可能配置或者数据集出了点问题

问题相关代码,请勿粘贴截图

配置如下:
import warnings
import torch as t

class DefaultConfig(object):
env = 'default' # visdom 环境
vis_port =8097 # visdom 端口
model = 'SqueezeNetGray' # 使用的模型,名字必须与models/init.py中的名字一致
classifier_num = 65 # 分类器最终的分类数量
gray = True # 读取图片是否为灰度图

train_data_root = './imgs/images/cnn_char_train/'  # 训练集存放路径
test_data_root = './data/test/char/'  # 测试集存放路径
load_model_path = None  # 加载预训练的模型的路径,为None代表不加载

batch_size = 16  # batch size
use_gpu = True  # user GPU or not
num_workers = 0  # how many workers for loading data
print_freq = 20  # print info every N batch

debug_file = '/tmp/debug'  # if os.path.exists(debug_file): enter ipdb
result_file = 'result.csv'
id_file = './findplate/plate.csv'

max_epoch = 100
lr = 0.001  # initial learning rate
lr_decay = 0.5  # when val_loss increase, lr = lr*lr_decay
weight_decay = 0e-5  # 损失函数


def _parse(self, kwargs):
    """
    根据字典kwargs 更新 config参数
    """
    for k, v in kwargs.items():
        if not hasattr(self, k):
            warnings.warn("Warning: opt has not attribut %s" % k)
        setattr(self, k, v)
    
    self.device =t.device('cuda') if self.use_gpu else t.device('cpu')


    print('user config:')
    for k, v in self.__class__.__dict__.items():
        if not k.startswith('_'):
            print(k, getattr(self, k))

opt = DefaultConfig()

训练部分如下:

from findplate.config import opt
import os
import torch as t
from findplate import models
from findplate.data.dataset import MyDataset
from torch.utils.data import DataLoader
from torchnet import meter
from findplate.utils.visualize import Visualizer
from tqdm import tqdm
from torchvision import transforms as T

def write_csv(results,file_name,col1_name,col2_name):
import csv
with open(file_name,'w',newline='') as f:
writer = csv.writer(f)
writer.writerow([col1_name,col2_name])
writer.writerows(results)

def train(**kwargs):
opt._parse(kwargs)
vis = Visualizer(opt.env,port = opt.vis_port)

# step1: configure model
model = getattr(models, opt.model)()
if opt.load_model_path:
    model.load(opt.load_model_path)
model.to(opt.device)

# step2: data
train_data = MyDataset(opt.train_data_root,train=True)
val_data = MyDataset(opt.train_data_root,train=False)
train_dataloader = DataLoader(train_data,opt.batch_size,
                    shuffle=True,num_workers=opt.num_workers)
val_dataloader = DataLoader(val_data,opt.batch_size,
                    shuffle=False,num_workers=opt.num_workers)
# write id and classes into csv file
data_id_to_class = []
label_idx = 0
for label_name in train_data.data_classes:
    data_id_to_class.append([label_idx, label_name])
    label_idx += 1
print(data_id_to_class)
id_file_name = opt.id_file
write_csv(data_id_to_class,id_file_name,'label_idx','label_name')

# step3: criterion and optimizer
criterion = t.nn.CrossEntropyLoss()
lr = opt.lr
optimizer = model.get_optimizer(lr, opt.weight_decay)
    
# step4: meters
loss_meter = meter.AverageValueMeter()
confusion_matrix = meter.ConfusionMeter(opt.classifier_num)
previous_loss = 1e10

# train
for epoch in range(opt.max_epoch):
    
    loss_meter.reset()
    confusion_matrix.reset()

    for ii,(data,label) in tqdm(enumerate(train_dataloader)):

        # train model 
        input = data.to(opt.device)
        target = label.to(opt.device)


        optimizer.zero_grad()
        score = model(input)
        loss = criterion(score,target)
        loss.backward()
        optimizer.step()
        
        
        # meters update and visualize
        loss_meter.add(loss.item())
        # detach 一下更安全保险
        confusion_matrix.add(score.detach(), target.detach()) 

        if (ii + 1)%opt.print_freq == 0:
            vis.plot('loss', loss_meter.value()[0])
            
            # 进入debug模式
            if os.path.exists(opt.debug_file):
                import ipdb;
                ipdb.set_trace()


    model.save()

    # validate and visualize
    val_cm,val_accuracy = val(model,val_dataloader)

    vis.plot('val_accuracy',val_accuracy)
    vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
    
    # update learning rate
    if loss_meter.value()[0] > previous_loss:          
        lr = lr * opt.lr_decay
        # 第二种降低学习率的方法:不会有moment等信息的丢失
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    

    previous_loss = loss_meter.value()[0]

@t.no_grad()
def val(model,dataloader):
"""
计算模型在验证集上的准确率等信息
"""
model = model.eval()

confusion_matrix = meter.ConfusionMeter(opt.classifier_num)
for ii, (val_input, label) in tqdm(enumerate(dataloader)):
    val_input = val_input.to(opt.device)
    score = model(val_input)
    confusion_matrix.add(score.detach().squeeze(), label.type(t.LongTensor))

model.train()
cm_value = confusion_matrix.value()
cm_value_sum = 0
for i in range(opt.classifier_num):
    cm_value_sum += cm_value[i][i]
accuracy = 100. * (cm_value_sum) / (cm_value.sum())
return confusion_matrix, accuracy

def help():
"""
打印帮助的信息: python file.py help
"""

print("""
usage : python file.py <function> [--args=value]
<function> := train | test | help
example: 
        python {0} train --env='env0701' --lr=0.01
        python {0} test --dataset='path/to/dataset/root/'
        python {0} help
avaiable args:""".format(__file__))

from inspect import getsource
source = (getsource(opt.__class__))
print(source)

if name=='main':
import fire
fire.Fire()

所采用的卷积网络模型如下:

from torchvision.models import squeezenet1_1
from findplate.models.basic_module import BasicModule
from torch import nn
from torch.optim import Adam

class SqueezeNetGray(BasicModule):
def init(self, num_classes=65):
super(SqueezeNetGray, self).init()
self.model_name = 'squeezenet_gray'
self.model = squeezenet1_1(pretrained=False)
# 修改 原始的num_class: 预训练模型是1000分类
self.model.num_classes = num_classes
self.model.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Conv2d(512, num_classes, 1),
nn.ReLU(inplace=True),
nn.AvgPool2d(13, stride=1)
)

def forward(self,x):
    return self.model(x)

def get_optimizer(self, lr, weight_decay):
    # 因为使用了预训练模型,我们只需要训练后面的分类
    # 前面的特征提取部分可以保持不变
    return Adam(self.model.classifier.parameters(), lr, weight_decay=weight_decay) 

测试集:

img

img

测试集同名,数据较少,路径在配置中

我想要达到的结果

希望各位能给点改进的思路,或者指出我的错误,谢谢。

  • 写回答

5条回答 默认 最新

  • 於菟601 2022-05-13 14:41
    关注

    现在放的测试图片是整个系统的测试图片,训练字符模块使用的是二值图,详见数据集部分,在进行字符识别模块的测试时,也会把输入的车牌模块提取车牌、字符分割、转二值图后再输入字符识别模块

    评论

报告相同问题?

问题事件

  • 系统已结题 5月21日
  • 创建了问题 5月13日

悬赏问题

  • ¥15 chaquopy python 安卓
  • ¥50 Kubernetes&Fission&Eleasticsearch
  • ¥15 CSS实现渐隐虚线框
  • ¥15 有没有帮写代码做实验仿真的
  • ¥15 報錯:Person is not mapped,如何解決?
  • ¥30 vmware exsi重置后登不上
  • ¥15 易盾点选的cb参数怎么解啊
  • ¥15 MATLAB运行显示错误,如何解决?
  • ¥15 c++头文件不能识别CDialog
  • ¥15 Excel发现不可读取的内容