正在用pytorch做图像的多分类问题,网上的模板是二分类的,然后我想改成多分类的,模型训练没问题,但是到训练过程可视化的时候提示错误,一直过不去,有没有人可以帮忙看看,有偿也ok
现在报错信息是:RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
报错这部分的代码如下:
```python
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_handeled = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders[test_path]):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_handeled += 1
ax = plt.subplot(num_images//2, 2, images_handeled)
ax.axis('off')
ax.set_title('predicted: {}'.format(class_names[preds[j]]))
imshow(inputs.cpu().data[j])
if images_handeled == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
base_model = train_model(resnet50, criterion, optimizer, exp_lr_scheduler, num_epochs=6)
visualize_model(base_model)
plt.show()
报错的具体信息如下:
RuntimeError Traceback (most recent call last)
Input In [56], in <cell line: 27>()
24 return
25 model.train(mode=was_training)
---> 27 base_model = train_model(resnet50, criterion, optimizer, exp_lr_scheduler, num_epochs=6)
28 visualize_model(base_model)
29 plt.show()
Input In [54], in train_model(model, criterion, optimizer, scheduler, num_epochs)
3 def train_model(model, criterion, optimizer, scheduler, num_epochs=20):
4 since = time.time()
----> 6 best_model_wts = copy.deepcopy(model.state_dict())
7 best_acc = 0.0
9 for epoch in range(num_epochs):
File ~\anaconda3\lib\copy.py:172, in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
174 # If is its own copy, don't memoize.
175 if y is not x:
File ~\anaconda3\lib\copy.py:296, in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
294 for key, value in dictiter:
295 key = deepcopy(key, memo)
--> 296 value = deepcopy(value, memo)
297 y[key] = value
298 else:
File ~\anaconda3\lib\copy.py:153, in deepcopy(x, memo, _nil)
151 copier = getattr(x, "__deepcopy__", None)
152 if copier is not None:
--> 153 y = copier(memo)
154 else:
155 reductor = dispatch_table.get(cls)
File ~\anaconda3\lib\site-packages\torch\_tensor.py:134, in Tensor.__deepcopy__(self, memo)
125 raise RuntimeError(
126 "The default implementation of __deepcopy__() for wrapper subclasses "
127 "only works for subclass types that implement clone() and for which "
(...)
131 "different type."
132 )
133 else:
--> 134 new_storage = self.storage().__deepcopy__(memo)
135 if self.is_quantized:
136 # quantizer_params can be different type based on torch attribute
137 quantizer_params: Union[
138 Tuple[torch.qscheme, float, int],
139 Tuple[torch.qscheme, Tensor, Tensor, int],
140 ]
File ~\anaconda3\lib\site-packages\torch\storage.py:597, in TypedStorage.__deepcopy__(self, memo)
596 def __deepcopy__(self, memo):
--> 597 return self._new_wrapped_storage(copy.deepcopy(self._storage, memo))
File ~\anaconda3\lib\copy.py:153, in deepcopy(x, memo, _nil)
151 copier = getattr(x, "__deepcopy__", None)
152 if copier is not None:
--> 153 y = copier(memo)
154 else:
155 reductor = dispatch_table.get(cls)
File ~\anaconda3\lib\site-packages\torch\storage.py:97, in _StorageBase.__deepcopy__(self, memo)
95 if self._cdata in memo:
96 return memo[self._cdata]
---> 97 new_storage = self.clone()
98 memo[self._cdata] = new_storage
99 return new_storage
File ~\anaconda3\lib\site-packages\torch\storage.py:111, in _StorageBase.clone(self)
109 def clone(self):
110 """Returns a copy of this storage"""
--> 111 return type(self)(self.nbytes(), device=self.device).copy_(self)
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.