嘿,朋友们,我尝试复现一篇关于RAG+动态图的项目的时候遇到了一些问题,我发现在执行bash命令后代码能够正确运行,但是得到的结果指标一直是0(top_k_scores_test: {'MAP': [0.0], 'NDCG': [0.0], 'jaccard': [0.0]})我尝试了很多办法都没法解决,能不能请大家帮帮我看看问题出在哪里?下面是一些可能会有用的信息,如果有需要我提供别的信息请告诉我,再次感谢!
vocab size: 50257
model.config GPT2Config {
"_num_labels": 2,
"activation_function": "gelu_new",
"architectures": [
"GPT2LMHeadModel"
],
"attn_pdrop": 0.1,
"bos_token_id": 50256,
"do_sample": false,
"early_stopping": false,
"embd_pdrop": 0.1,
"eos_token_ids": [
50256
],
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"initializer_range": 0.02,
"is_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_epsilon": 1e-05,
"length_penalty": 1.0,
"max_length": 20,
"max_token_id": 50257,
"model_type": "gpt2",
"n_ctx": 1024,
"n_embd": 256,
"n_head": 2,
"n_layer": 2,
"n_positions": 1024,
"num_beams": 1,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_past": true,
"pad_token_id": null,
"pruned_heads": {},
"repetition_penalty": 1.0,
"resid_pdrop": 0.1,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"task_specific_params": {
"text-generation": {
"do_sample": true,
"max_length": 50
}
},
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"use_bfloat16": false,
"vocab_size": 50278
}
model GPT2LMHeadModel(
(transformer): GPT2Model(
(wte): Embedding(50278, 256)
(wpe): Embedding(1024, 256)
(drop): Dropout(p=0.1, inplace=False)
(h): ModuleList(
(0-1): 2 x Block(
(ln_1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(act): NewGELUActivation()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(ln_f): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
)
(lm_head): Linear(in_features=256, out_features=50278, bias=False)
)
Training/evaluation parameters Namespace(run_seed=True, n_gpu=1, timestamp='15', dataset='dialog', train_data_file='./resources/dialog/15/train.link_prediction', output_dir='simpledyg_ckpt/dialog/15/{7}/gpt2', model_type='gpt2', eval_data_file='./resources/dialog/15/val.link_prediction', eval_data_gt_file='./resources/dialog/15/val_gt.link_prediction', test_data_file='./resources/dialog/15/test.link_prediction', test_data_gt_file='./resources/dialog/15/test_gt.link_prediction', n_layer=2, n_head=2, n_embed=256, node_feat_file=None, should_continue=False, model_name_or_path='/home/wpk/RAG4DyG-main/gpt2_local', config_name=None, tokenizer_name=None, cache_dir=None, block_size=512, do_train=True, do_eval=False, evaluate_during_training=False, per_gpu_train_batch_size=32, per_gpu_eval_batch_size=32, gradient_accumulation_steps=1, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=2.0, max_steps=-1, warmup_steps=0, logging_steps=500, save_steps=250, save_total_limit=None, eval_all_checkpoints=True, no_cuda=False, overwrite_cache=False, seed=7, fp16=False, fp16_opt_level='O1', local_rank=-1, patience=10, device=device(type='cuda'), para_names=['dataset', 'method', 'time', 'nlayer', 'nhead', 'nemb', 'bz', 'lr', 'seed'], para_values=['dialog', 'SimpleDyG', '15', 2, 2, 256, 32, 0.0001, 7], run_name='dataset_dialog_method_SimpleDyG_time_15_nlayer_2_nhead_2_nemb_256_bz_32_lr_0.0001_seed_7_', spl_tokens=['<|history|>', '<|endofhistory|>', '<|pre|>', '<|endofpre|>', '<|time0|>', '<|time1|>', '<|time2|>', '<|time3|>', '<|time4|>', '<|time5|>', '<|time6|>', '<|time7|>', '<|time8|>', '<|time9|>', '<|time10|>', '<|time11|>', '<|time12|>', '<|time13|>', '<|time14|>', '<|time15|>'])
file_path ./resources/dialog/15/train.link_prediction
***** Running training *****
Num examples = 7464
Num Epochs = 2.0
Instantaneous batch size per GPU = 32
Total train batch size (w. parallel, distributed & accumulation) = 32
Gradient Accumulation steps = 1
Total optimization steps = 466.0