def main():
args = parser.parse_args()
input_dir = args.input_dir
total_epoch = args.epochs
patch_size = args.patch_size
batch_size = args.batch_size
num_workers = args.num_workers
trainable_set = True
model = Model(trainable_set)
model.to('cuda:0')
epoch = 1
train_data = ImageFolder(root=input_dir, transform=transforms.Compose(
[
transforms.RandomCrop(patch_size),
transforms.ToTensor(),
]
))
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
opt = optim.Adam(model.parameters(), lr=1e-5)
while True:
if epoch > total_epoch:
break
model.train()
pbar = tqdm(train_loader)
for input_img, _ in pbar:
opt.zero_grad()
ori_img=to_variable(input_img)
loss,recon_img = model(ori_img, wavelet_trainable=1, coding=0)
loss.requires_grad_(True)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0, norm_type=2)
opt.step()
pbar.set_description("Processing %s" % loss.item())
epoch = epoch + 1
print(loss)
得到的损失也很奇怪
Processing 0.23131124675273895: 0%| | 1/200 [00:02<09:51, 2.97s/it]tensor(0.2313, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.1524205356836319: 1%| | 2/200 [00:03<05:28, 1.66s/it]tensor(0.1524, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.06570276618003845: 2%|▏ | 3/200 [00:04<04:13, 1.29s/it]tensor(0.0657, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.11781097948551178: 2%|▏ | 4/200 [00:05<03:42, 1.14s/it]tensor(0.1178, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.10447573661804199: 2%|▎ | 5/200 [00:06<03:38, 1.12s/it]tensor(0.1045, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.07497256249189377: 3%|▎ | 6/200 [00:07<03:24, 1.05s/it]tensor(0.0750, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.08797897398471832: 4%|▎ | 7/200 [00:08<03:17, 1.02s/it]tensor(0.0880, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.09975536912679672: 4%|▍ | 8/200 [00:09<03:14, 1.01s/it]tensor(0.0998, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.06214721128344536: 4%|▍ | 9/200 [00:10<03:14, 1.02s/it]tensor(0.0621, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.03598678484559059: 5%|▌ | 10/200 [00:11<03:13, 1.02s/it]tensor(0.0360, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.06032150983810425: 6%|▌ | 11/200 [00:12<03:04, 1.03it/s]tensor(0.0603, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.05920260399580002: 6%|▌ | 12/200 [00:13<03:09, 1.01s/it]tensor(0.0592, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.047266583889722824: 6%|▋ | 13/200 [00:14<03:01, 1.03it/s]tensor(0.0473, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.06426531076431274: 7%|▋ | 14/200 [00:15<02:54, 1.06it/s]tensor(0.0643, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.08503642678260803: 8%|▊ | 15/200 [00:16<02:58, 1.04it/s]tensor(0.0850, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.07649430632591248: 8%|▊ | 16/200 [00:17<03:11, 1.04s/it]tensor(0.0765, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.055424176156520844: 8%|▊ | 17/200 [00:18<03:19, 1.09s/it]tensor(0.0554, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.026697305962443352: 9%|▉ | 18/200 [00:19<03:21, 1.11s/it]tensor(0.0267, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.04741383716464043: 10%|▉ | 19/200 [00:20<03:17, 1.09s/it]tensor(0.0474, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.05615292116999626: 10%|█ | 20/200 [00:22<03:21, 1.12s/it]tensor(0.0562, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.08515559136867523: 10%|█ | 21/200 [00:23<03:13, 1.08s/it]tensor(0.0852, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.04595249891281128: 11%|█ | 22/200 [00:23<03:06, 1.05s/it]tensor(0.0460, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.05469346046447754: 12%|█▏ | 23/200 [00:25<03:10, 1.08s/it]tensor(0.0547, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.0492982491850853: 12%|█▏ | 24/200 [00:26<03:06, 1.06s/it]tensor(0.0493, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.044051021337509155: 12%|█▎ | 25/200 [00:27<03:01, 1.04s/it]tensor(0.0441, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.024460002779960632: 13%|█▎ | 26/200 [00:28<03:04, 1.06s/it]tensor(0.0245, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.0649680346250534: 14%|█▎ | 27/200 [00:29<02:54, 1.01s/it]tensor(0.0650, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.15364143252372742: 14%|█▍ | 28/200 [00:30<02:52, 1.00s/it]tensor(0.1536, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.1933668553829193: 14%|█▍ | 29/200 [00:31<02:52, 1.01s/it]tensor(0.1934, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.2110283076763153: 15%|█▌ | 30/200 [00:32<02:47, 1.02it/s]tensor(0.2110, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.22534942626953125: 16%|█▌ | 31/200 [00:33<02:48, 1.00it/s]tensor(0.2253, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.1966424435377121: 16%|█▌ | 32/200 [00:34<02:55, 1.04s/it]tensor(0.1966, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.13029730319976807: 16%|█▋ | 33/200 [00:35<02:58, 1.07s/it]tensor(0.1303, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.03669431805610657: 17%|█▋ | 34/200 [00:36<02:58, 1.08s/it]tensor(0.0367, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.12538409233093262: 18%|█▊ | 35/200 [00:37<02:53, 1.05s/it]tensor(0.1254, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.07871457934379578: 18%|█▊ | 36/200 [00:38<02:57, 1.08s/it]tensor(0.0787, device='cuda:0', grad_fn=<MseLossBackward0>)
Processing 0.1417844593524933: 18%|█▊ | 37/200 [00:39<02:52, 1.06s/it]tensor(0.1418, device='cuda:0', grad_fn=<MseLossBackward0>)
我已经尝试了将学习率调小,使用了剪裁,但还是训练了几个epoch之后梯度爆炸