CenterFusion運行train.sh報錯
驅動 NVIDIA-SMI 470.63.01 Driver Version: 470.63.01 CUDA Version: 11.4
cuda版本 11.0
cudnn 8.04
pytorch 1.71
python 3.7
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Thu_Jun_11_22:26:38_PDT_2020
Cuda compilation tools, release 11.0, V11.0.194
Build cuda_11.0_bu.TC445_37.28540450_0
pytorch可用
Python 3.7.16 (default, Jan 17 2023, 22:20:44)
[GCC 11.2.0] :: Anaconda, Inc. on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> print(torch.cuda.is_available())
True
>>> exit()
```python
訓練參數如下
```python
export CUDA_DEVICE_ORDER=PCI_BUS_ID
export CUDA_VISIBLE_DEVICES=0
cd src
# train
python main.py \
ddd \
--exp_id centerfusion \
--shuffle_train \
--train_split mini_train \
--val_split mini_val \
--val_intervals 1 \
--run_dataset_eval \
--nuscenes_att \
--velocity \
--batch_size 1 \
--lr 2.5e-4 \
--num_epochs 60 \
--lr_step 50 \
--save_point 20,40,50 \
--gpus 0 \
--not_rand_crop \
--flip 0.5 \
--shift 0.1 \
--pointcloud \
--radar_sweeps 3 \
--pc_z_offset 0.0 \
--pillar_dims 1.0,0.2,0.2 \
--max_pc_dist 60.0 \
--load_model ../models/centernet_baseline_e170.pth \
# --freeze_backbone \
# --resume \
cd ..
訓練之後報錯是這樣子的,不太懂怎麼調試,請求指教
Using tensorboardX
/home/sqyjy/anaconda3/envs/env1/lib/python3.7/site-packages/sklearn/utils/linear_assignment_.py:21: DeprecationWarning: The linear_assignment_ module is deprecated in 0.21 and will be removed from 0.23. Use scipy.optimize.linear_sum_assignment instead.
DeprecationWarning)
Fix size testing.
training chunk_sizes: [1]
input h w: 448 800
heads {'hm': 10, 'reg': 2, 'wh': 2, 'dep': 1, 'rot': 8, 'dim': 3, 'amodel_offset': 2, 'dep_sec': 1, 'rot_sec': 8, 'nuscenes_att': 8, 'velocity': 3}
weights {'hm': 1, 'reg': 1, 'wh': 0.1, 'dep': 1, 'rot': 1, 'dim': 1, 'amodel_offset': 1, 'dep_sec': 1, 'rot_sec': 1, 'nuscenes_att': 1, 'velocity': 1}
head conv {'hm': [256], 'reg': [256], 'wh': [256], 'dep': [256], 'rot': [256], 'dim': [256], 'amodel_offset': [256], 'dep_sec': [256, 256, 256], 'rot_sec': [256, 256, 256], 'nuscenes_att': [256, 256, 256], 'velocity': [256, 256, 256]}
Namespace(K=100, amodel_offset_weight=1, arch='dla_34', aug_rot=0, backbone='dla34', batch_size=1, chunk_sizes=[1], custom_dataset_ann_path='', custom_dataset_img_path='', custom_head_convs={'dep_sec': 3, 'rot_sec': 3, 'velocity': 3, 'nuscenes_att': 3}, data_dir='/home/sqyjy/project/CenterFusion/src/lib/../../data', dataset='nuscenes', dataset_version='', debug=0, debug_dir='/home/sqyjy/project/CenterFusion/src/lib/../../exp/ddd/centerfusion/debug', debugger_theme='white', demo='', dense_reg=1, dep_res_weight=1, dep_weight=1, depth_scale=1, dim_weight=1, disable_frustum=False, dla_node='dcn', down_ratio=4, eval=False, eval_n_plots=0, eval_render_curves=False, exp_dir='/home/sqyjy/project/CenterFusion/src/lib/../../exp/ddd', exp_id='centerfusion', fix_res=True, fix_short=-1, flip=0.5, flip_test=False, fp_disturb=0, freeze_backbone=False, frustumExpansionRatio=0.0, gpus=[0], gpus_str='0', head_conv={'hm': [256], 'reg': [256], 'wh': [256], 'dep': [256], 'rot': [256], 'dim': [256], 'amodel_offset': [256], 'dep_sec': [256, 256, 256], 'rot_sec': [256, 256, 256], 'nuscenes_att': [256, 256, 256], 'velocity': [256, 256, 256]}, head_kernel=3, heads={'hm': 10, 'reg': 2, 'wh': 2, 'dep': 1, 'rot': 8, 'dim': 3, 'amodel_offset': 2, 'dep_sec': 1, 'rot_sec': 8, 'nuscenes_att': 8, 'velocity': 3}, hm_dist_thresh={0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 1, 7: 1, 8: 0, 9: 0}, hm_disturb=0, hm_hp_weight=1, hm_to_box_ratio=0.3, hm_transparency=0.7, hm_weight=1, hp_weight=1, hungarian=False, ignore_loaded_cats=[], img_format='jpg', input_h=448, input_res=800, input_w=800, iou_thresh=0, keep_res=False, kitti_split='3dop', layers_to_freeze=['base', 'dla_up', 'ida_up'], load_model='../models/centernet_baseline_e170.pth', load_results='', lost_disturb=0, lr=0.00025, lr_step=[50], ltrb=False, ltrb_amodal=False, ltrb_amodal_weight=0.1, ltrb_weight=0.1, master_batch_size=1, max_age=-1, max_frame_dist=3, max_pc=1000, max_pc_dist=60.0, model_output_list=False, msra_outchannel=256, neck='dlaup', new_thresh=0.3, nms=False, no_color_aug=False, no_pause=False, no_pre_img=False, non_block_test=False, normalize_depth=True, not_cuda_benchmark=False, not_max_crop=False, not_prefetch_test=False, not_rand_crop=True, not_set_cuda_env=False, not_show_bbox=False, not_show_number=False, num_classes=10, num_epochs=60, num_head_conv=1, num_img_channels=3, num_iters=-1, num_resnet_layers=101, num_stacks=1, num_workers=4, nuscenes_att=True, nuscenes_att_weight=1, off_weight=1, optim='adam', out_thresh=-1, output_h=112, output_res=200, output_w=200, pad=31, pc_atts=['x', 'y', 'z', 'dyn_prop', 'id', 'rcs', 'vx', 'vy', 'vx_comp', 'vy_comp', 'is_quality_valid', 'ambig_state', 'x_rms', 'y_rms', 'invalid_state', 'pdh0', 'vx_rms', 'vy_rms'], pc_feat_channels={'pc_dep': 0, 'pc_vx': 1, 'pc_vz': 2}, pc_feat_lvl=['pc_dep', 'pc_vx', 'pc_vz'], pc_roi_method='pillars', pc_z_offset=0.0, pillar_dims=[1.5, 0.2, 0.2], pointcloud=True, pre_hm=False, pre_img=False, pre_thresh=-1, print_iter=0, prior_bias=-4.6, public_det=False, qualitative=False, r_a=250, r_b=5, radar_sweeps=3, reg_loss='l1', reset_hm=False, resize_video=False, resume=False, reuse_hm=False, root_dir='/home/sqyjy/project/CenterFusion/src/lib/../..', rot_weight=1, rotate=0, run_dataset_eval=True, same_aug_pre=False, save_all=False, save_dir='/home/sqyjy/project/CenterFusion/src/lib/../../exp/ddd/centerfusion', save_framerate=30, save_img_suffix='', save_imgs=[], save_point=[20, 40, 50], save_results=False, save_video=False, scale=0, secondary_heads=['velocity', 'nuscenes_att', 'dep_sec', 'rot_sec'], seed=317, shift=0.1, show_track_color=False, show_velocity=False, shuffle_train=True, sigmoid_dep_sec=True, skip_first=-1, sort_det_by_dist=False, tango_color=False, task='ddd', test_dataset='nuscenes', test_focal_length=-1, test_scales=[1.0], track_thresh=0.3, tracking=False, tracking_weight=1, train_split='mini_train', trainval=False, transpose_video=False, use_loaded_results=False, val_intervals=1, val_split='mini_val', velocity=True, velocity_weight=1, video_h=512, video_w=512, vis_gt_bev='', vis_thresh=0.3, warm_start_weights=False, weights={'hm': 1, 'reg': 1, 'wh': 0.1, 'dep': 1, 'rot': 1, 'dim': 1, 'amodel_offset': 1, 'dep_sec': 1, 'rot_sec': 1, 'nuscenes_att': 1, 'velocity': 1}, wh_weight=0.1, zero_pre_hm=False, zero_tracking=False)
fatal: 不是一个 git 仓库(或者任何父目录):.git
Creating model...
Using node type: (<class 'model.networks.dla.DeformConv'>, <class 'model.networks.dla.DeformConv'>)
Warning: No ImageNet pretrain!!
loaded ../models/centernet_baseline_e170.pth, epoch 28
Skip loading parameter nuscenes_att.0.weight, required shapetorch.Size([256, 67, 3, 3]), loaded shapetorch.Size([256, 64, 3, 3]).
Skip loading parameter nuscenes_att.2.weight, required shapetorch.Size([256, 256, 1, 1]), loaded shapetorch.Size([8, 256, 1, 1]).
Skip loading parameter nuscenes_att.2.bias, required shapetorch.Size([256]), loaded shapetorch.Size([8]).
Skip loading parameter velocity.0.weight, required shapetorch.Size([256, 67, 3, 3]), loaded shapetorch.Size([256, 64, 3, 3]).
Skip loading parameter velocity.2.weight, required shapetorch.Size([256, 256, 1, 1]), loaded shapetorch.Size([3, 256, 1, 1]).
Skip loading parameter velocity.2.bias, required shapetorch.Size([256]), loaded shapetorch.Size([3]).
No param dep_sec.0.weight.
No param dep_sec.0.bias.
No param dep_sec.2.weight.
No param dep_sec.2.bias.
No param dep_sec.4.weight.
No param dep_sec.4.bias.
No param dep_sec.6.weight.
No param dep_sec.6.bias.
No param rot_sec.0.weight.
No param rot_sec.0.bias.
No param rot_sec.2.weight.
No param rot_sec.2.bias.
No param rot_sec.4.weight.
No param rot_sec.4.bias.
No param rot_sec.6.weight.
No param rot_sec.6.bias.
No param nuscenes_att.4.weight.
No param nuscenes_att.4.bias.
No param nuscenes_att.6.weight.
No param nuscenes_att.6.bias.
No param velocity.4.weight.
No param velocity.4.bias.
No param velocity.6.weight.
No param velocity.6.bias.
Setting up validation data...
Dataset version
==> initializing mini_val data from /home/sqyjy/project/CenterFusion/src/lib/../../data/nuscenes/annotations_3sweeps/mini_val.json,
images from /home/sqyjy/project/CenterFusion/src/lib/../../data/nuscenes ...
loading annotations into memory...
Done (t=0.37s)
creating index...
index created!
Loaded mini_val 486 samples
Setting up train data...
Dataset version
==> initializing mini_train data from /home/sqyjy/project/CenterFusion/src/lib/../../data/nuscenes/annotations_3sweeps/mini_train.json,
images from /home/sqyjy/project/CenterFusion/src/lib/../../data/nuscenes ...
loading annotations into memory...
Done (t=1.61s)
creating index...
index created!
Loaded mini_train 1938 samples
Starting training...
Traceback (most recent call last):
File "main.py", line 140, in <module>
main(opt)
File "main.py", line 84, in main
log_dict_train, _ = trainer.train(epoch, train_loader)
File "/home/sqyjy/project/CenterFusion/src/lib/trainer.py", line 406, in train
return self.run_epoch('train', epoch, data_loader)
File "/home/sqyjy/project/CenterFusion/src/lib/trainer.py", line 178, in run_epoch
output, loss, loss_stats = model_with_loss(batch, phase)
File "/home/sqyjy/anaconda3/envs/env1/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/sqyjy/project/CenterFusion/src/lib/trainer.py", line 123, in forward
outputs = self.model(batch['image'], pc_hm=pc_hm, pc_dep=pc_dep, calib=calib)
File "/home/sqyjy/anaconda3/envs/env1/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/sqyjy/project/CenterFusion/src/lib/model/networks/base_model.py", line 91, in forward
feats = self.img2feats(x)
File "/home/sqyjy/project/CenterFusion/src/lib/model/networks/dla.py", line 622, in img2feats
x = self.dla_up(x)
File "/home/sqyjy/anaconda3/envs/env1/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/sqyjy/project/CenterFusion/src/lib/model/networks/dla.py", line 572, in forward
ida(layers, len(layers) -i - 2, len(layers))
File "/home/sqyjy/anaconda3/envs/env1/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/sqyjy/project/CenterFusion/src/lib/model/networks/dla.py", line 543, in forward
layers[i] = upsample(project(layers[i]))
File "/home/sqyjy/anaconda3/envs/env1/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/sqyjy/project/CenterFusion/src/lib/model/networks/dla.py", line 516, in forward
x = self.conv(x)
File "/home/sqyjy/anaconda3/envs/env1/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/sqyjy/project/CenterFusion/src/lib/model/networks/DCNv2/dcn_v2.py", line 170, in forward
self.deformable_groups,
File "/home/sqyjy/project/CenterFusion/src/lib/model/networks/DCNv2/dcn_v2.py", line 37, in forward
ctx.deformable_groups,
RuntimeError: Not compiled with GPU support