为什么resnet50加入se注意力机制后前几轮的训练精度特别低只有0.01,精度提升也慢,而原本最终的训练精度有0.90。
# 自定义注意力模块
class SELayer(nn.Module):
def __init__(self, channel, reduction=16):
super(SELayer, self).__init__()
self.fc1 = nn.Linear(channel, channel // reduction, bias=False)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Linear(channel // reduction, channel, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
b, c, _, _ = x.size()
y = x.mean(dim=(2, 3), keepdim=True) # Global Average Pooling
y = self.fc1(y.view(b, c))
y = self.relu(y)
y = self.fc2(y)
y = self.sigmoid(y)
return x * y.view(b, c, 1, 1)
class ResNetWithSE(nn.Module):
def __init__(self, num_classes, pretrained=True):
super(ResNetWithSE, self).__init__()
self.resnet = models.resnet50(pretrained=pretrained)
self.se = SELayer(2048) # ResNet50的输出通道数
self.resnet.fc = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(self.resnet.fc.in_features, num_classes)
)
def forward(self, x):
x = self.resnet.conv1(x)
x = self.resnet.bn1(x)
x = self.resnet.relu(x)
x = self.resnet.maxpool(x)
x = self.resnet.layer1(x)
x = self.resnet.layer2(x)
x = self.resnet.layer3(x)
x = self.resnet.layer4(x)
# 加入Squeeze-and-Excitation模块
x = self.se(x)
x = self.resnet.avgpool(x)
x = torch.flatten(x, 1)
x = self.resnet.fc(x)
return x
# 修改后的build_model函数
def build_model(num_classes, previous_weights_file=None):
model = ResNetWithSE(num_classes=num_classes, pretrained=True)
# 如果有预训练权重文件,加载权重
if previous_weights_file and os.path.exists(previous_weights_file):
model = model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
print(f"加载之前的权重到模型设备:{next(model.parameters()).device}")
# 加载模型参数
pretrained_dict = torch.load(previous_weights_file)
# 创建当前模型的参数字典
model_dict = model.state_dict()
# 过滤掉不匹配的全连接层参数
pretrained_dict = {k: v for k, v in pretrained_dict.items() if
k in model_dict and v.size() == model_dict[k].size()}
# 更新当前模型的权重
model_dict.update(pretrained_dict)
# 加载更新后的权重到模型中
model.load_state_dict(model_dict)
return model