我现在正在使用预训练完毕的fcn模型对cityscapes数据集进行预测。该模型由* https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation的源码修改而得,数据集也从PASCAL VOC2012数据集换成了cityscapes进行语义分割,以下是我的代码:
fnc_models.py
`
from collections import OrderedDict
from typing import Dict
import torch
from torch import nn, Tensor
from torch.nn import functional as F
from .backbone import resnet50, resnet101
class IntermediateLayerGetter(nn.ModuleDict):
"""
Module wrapper that returns intermediate layers from a model
It has a strong assumption that the modules have been registered
into the model in the same order as they are used.
This means that one should **not** reuse the same nn.Module
twice in the forward if you want this to work.
Additionally, it is only able to query submodules that are directly
assigned to the model. So if `model` is passed, `model.feature1` can
be returned, but not `model.feature1.layer2`.
Args:
model (nn.Module): model on which we will extract the features
return_layers (Dict[name, new_name]): a dict containing the names
of the modules for which the activations will be returned as
the key of the dict, and the value of the dict is the name
of the returned activation (which the user can specify).
"""
_version = 2
__annotations__ = {
"return_layers": Dict[str, str],
}
def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:
if not set(return_layers).issubset([name for name, _ in model.named_children()]):
raise ValueError("return_layers are not present in model")
orig_return_layers = return_layers
return_layers = {str(k): str(v) for k, v in return_layers.items()}
# 重新构建backbone,将没有使用到的模块全部删掉
layers = OrderedDict()
for name, module in model.named_children():
layers[name] = module
if name in return_layers:
del return_layers[name]
if not return_layers:
break
super(IntermediateLayerGetter, self).__init__(layers)
self.return_layers = orig_return_layers
def forward(self, x: Tensor) -> Dict[str, Tensor]:
out = OrderedDict()
for name, module in self.items():
x = module(x)
if name in self.return_layers:
out_name = self.return_layers[name]
out[out_name] = x
return out
class FCN(nn.Module):
"""
Implements a Fully-Convolutional Network for semantic segmentation.
Args:
backbone (nn.Module): the network used to compute the features for the model.
The backbone should return an OrderedDict[Tensor], with the key being
"out" for the last feature map used, and "aux" if an auxiliary classifier
is used.
classifier (nn.Module): module that takes the "out" element returned from
the backbone and returns a dense prediction.
aux_classifier (nn.Module, optional): auxiliary classifier used during training
"""
__constants__ = ['aux_classifier']
def __init__(self, backbone, classifier, aux_classifier=None):
super(FCN, self).__init__()
self.backbone = backbone
self.classifier_new = classifier
self.aux_classifier_new = aux_classifier
def forward(self, x: Tensor) -> Dict[str, Tensor]:
input_shape = x.shape[-2:]
# contract: features is a dict of tensors
features = self.backbone(x)
result = OrderedDict()
x = features["out"]
x = self.classifier_new(x)
# 原论文中虽然使用的是ConvTranspose2d,但权重是冻结的,所以就是一个bilinear插值
x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
result["out"] = x
if self.aux_classifier_new is not None:
x = features["aux"]
x = self.aux_classifier_new(x)
# 原论文中虽然使用的是ConvTranspose2d,但权重是冻结的,所以就是一个bilinear插值
x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
result["aux"] = x
return result
# class FCNHead(nn.Sequential):
# def __init__(self, in_channels, channels):
# inter_channels = in_channels // 4
# layers = [
# nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
# nn.BatchNorm2d(inter_channels),
# nn.ReLU(),
# nn.Dropout(0.1),
# nn.Conv2d(inter_channels, channels, 1)
# ]
# super(FCNHead, self).__init__(*layers)
# def fcn_resnet50(aux, num_classes=21, pretrain_backbone=False):
# # 'resnet50_imagenet': 'https://download.pytorch.org/models/resnet50-0676ba61.pth'
# # 'fcn_resnet50_coco': 'https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth'
# backbone = resnet50(replace_stride_with_dilation=[False, True, True])
# if pretrain_backbone:
# # 载入resnet50 backbone预训练权重
# backbone.load_state_dict(torch.load("resnet50.pth", map_location='cpu'))
# out_inplanes = 2048
# aux_inplanes = 1024
# return_layers = {'layer4': 'out'}
# if aux:
# return_layers['layer3'] = 'aux'
# backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)
# aux_classifier = None
# # why using aux: https://github.com/pytorch/vision/issues/4292
# if aux:
# aux_classifier = FCNHead(aux_inplanes, num_classes)
# classifier = FCNHead(out_inplanes, num_classes)
# model = FCN(backbone, classifier, aux_classifier)
# return model
# def fcn_resnet101(aux, num_classes=21, pretrain_backbone=False):
# # 'resnet101_imagenet': 'https://download.pytorch.org/models/resnet101-63fe2227.pth'
# # 'fcn_resnet101_coco': 'https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth'
# backbone = resnet101(replace_stride_with_dilation=[False, True, True])
# if pretrain_backbone:
# # 载入resnet101 backbone预训练权重
# backbone.load_state_dict(torch.load("resnet101.pth", map_location='cpu'))
# out_inplanes = 2048
# aux_inplanes = 1024
# return_layers = {'layer4': 'out'}
# if aux:
# return_layers['layer3'] = 'aux'
# backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)
# aux_classifier = None
# # why using aux: https://github.com/pytorch/vision/issues/4292
# if aux:
# aux_classifier = FCNHead(aux_inplanes, num_classes)
# classifier = FCNHead(out_inplanes, num_classes)
# model = FCN(backbone, classifier, aux_classifier)
# return model
class FCNHead(nn.Sequential):
def __init__(self, in_channels, out_channels, num_classes):
inter_channels = in_channels // 4
layers = [
nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
nn.BatchNorm2d(inter_channels),
nn.ReLU(),
nn.Dropout(0.1),
nn.Conv2d(inter_channels, num_classes, 1)
]
super(FCNHead, self).__init__(*layers)
def fcn_resnet50(aux, num_classes=21, pretrain_backbone=False):
# 'resnet50_imagenet': 'https://download.pytorch.org/models/resnet50-0676ba61.pth'
# 'fcn_resnet50_coco': 'https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth'
backbone = resnet50(replace_stride_with_dilation=[False, True, True])
if pretrain_backbone:
# 载入resnet50 backbone预训练权重
backbone.load_state_dict(torch.load("resnet50.pth", map_location='cpu'))
out_inplanes = 2048
aux_inplanes = 1024
return_layers = {'layer4': 'out'}
if aux:
return_layers['layer3'] = 'aux'
backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)
aux_classifier = None
# why using aux: https://github.com/pytorch/vision/issues/4292
if aux:
aux_classifier = FCNHead(aux_inplanes, num_classes)
classifier = FCNHead(out_inplanes=2048, num_classes=num_classes) # 添加 num_classes 参数
model = FCN(backbone, classifier, aux_classifier)
return model
def fcn_resnet101(aux, num_classes=21, pretrain_backbone=False):
# 'resnet101_imagenet': 'https://download.pytorch.org/models/resnet101-63fe2227.pth'
# 'fcn_resnet101_coco': 'https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth'
backbone = resnet101(replace_stride_with_dilation=[False, True, True])
if pretrain_backbone:
# 载入resnet101 backbone预训练权重
backbone.load_state_dict(torch.load("resnet101.pth", map_location='cpu'))
out_inplanes = 2048
aux_inplanes = 1024
return_layers = {'layer4': 'out'}
if aux:
return_layers['layer3'] = 'aux'
backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)
aux_classifier = None
# why using aux: https://github.com/pytorch/vision/issues/4292
if aux:
aux_classifier = FCNHead(aux_inplanes, num_classes)
classifier = FCNHead(out_inplanes, num_classes=num_classes) # 添加 num_classes 参数
model = FCN(backbone, classifier, aux_classifier)
return model
predict.py如下:
import os
import time
import json
import torch
from torchvision import transforms
import numpy as np
from PIL import Image
from src import fcn_resnet50
def time_synchronized():
torch.cuda.synchronize() if torch.cuda.is_available() else None
return time.time()
def main():
aux = False # inference time not need aux_classifier
classes = 19
weights_path = "/root/autodl-tmp/test/fcn_4_24/fcn/save_weights/best1-model_eval1_city_69_270.pth"
img_path = "./test1.png"
palette_path = "./palette.json"
assert os.path.exists(weights_path), f"weights {weights_path} not found."
assert os.path.exists(img_path), f"image {img_path} not found."
assert os.path.exists(palette_path), f"palette {palette_path} not found."
with open(palette_path, "rb") as f:
pallette_dict = json.load(f)
pallette = []
for v in pallette_dict.values():
pallette += v
# get devices
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
# create model
model = fcn_resnet50(aux=aux, num_classes=21)
# delete weights about aux_classifier
weights_dict = torch.load(weights_path, map_location='cpu')['model']
for k in list(weights_dict.keys()):
if "aux" in k:
del weights_dict[k]
# load weights
model.load_state_dict(weights_dict)
model.to(device)
# load image
original_img = Image.open(img_path)
# from pil image to tensor and normalize
# data_transform = transforms.Compose([transforms.Resize(512),
# transforms.ToTensor(),
# transforms.Normalize(mean=(0.485, 0.456, 0.406),
# std=(0.229, 0.224, 0.225))])
# 修改这里
data_transform = transforms.Compose([
transforms.Resize((512, 1024)), # 将图像调整为512x1024
transforms.ToTensor(),
transforms.Normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225))])
img = data_transform(original_img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
model.eval() # 进入验证模式
with torch.no_grad():
# init model
img_height, img_width = img.shape[-2:]
init_img = torch.zeros((1, 3, img_height, img_width), device=device)
model(init_img)
t_start = time_synchronized()
output = model(img.to(device))
t_end = time_synchronized()
print("inference time: {}".format(t_end - t_start))
print("fps: {}".format(1//(t_end - t_start)))
prediction = output['out'].argmax(1).squeeze(0)
prediction = prediction.to("cpu").numpy().astype(np.uint8)
mask = Image.fromarray(prediction)
mask.putpalette(pallette)
mask.save("test_result.png")
if __name__ == '__main__':
main()
报错如下,,cityscapes的数据集我将它分成了19类,请帮我修改代码
(py38) root@autodl-container-3132448cc7-14f214ad:~/autodl-tmp/test/fcn_4_24/fcn# python predict-Copy1.py
using cuda:0 device.
Traceback (most recent call last):
File "predict-Copy1.py", line 94, in <module>
main()
File "predict-Copy1.py", line 38, in main
model = fcn_resnet50(aux=aux, num_classes=21)
File "/root/autodl-tmp/test/fcn_4_24/fcn/src/fcn_model.py", line 216, in fcn_resnet50
classifier = FCNHead(out_inplanes=2048, num_classes=num_classes) # 添加 num_classes 参数
TypeError: __init__() got an unexpected keyword argument 'out_inplanes'