j1234567890987cbyl 2023-01-31 00:52 采纳率: 100%
浏览 65
已结题

使用DWY100k数据集对UEA进行测试,出现报错:IndexError: index 125000 is out of bounds for axis 0 with size 95500

问题遇到的现象和发生背景

下载官方代码使用DWY100k数据集对UEA进行测试

遇到的现象和发生背景,请写出第一个错误信息

出现如下报错:IndexError: index 125000 is out of bounds for axis 0 with size 95500

用代码块功能插入代码,请勿粘贴截图。 不用代码块回答率下降 50%

utils代码

import tensorflow as tf
from include.Model import build_SE, training
import time
from include.Load import *
import json
import scipy
from scipy import spatial
import copy
from collections import defaultdict

def get_hits_gen(simM, test1, test2, id2confi, confi, correct, gap):
    rowindex = []
    columnindex = []
    for iii in test1:
        if iii>=25500:
            rowindex.append(iii-15000)
        else:
            rowindex.append(iii)
    for iii in test2:
        columnindex.append(iii-10500)
    partialsim = simM[0,rowindex]
    partialsim = partialsim[:, columnindex]

    sim = partialsim
    counn = 0

    for i in range(len(rowindex)):
        rank = sim[i, :].argsort()
        scores = copy.deepcopy(sim[i, :])
        scores.sort()
        minrank = rank[0]
        minscore = scores[0]
        minscoregap = scores[1] - scores[0]

        scores_col = copy.deepcopy(sim[:, minrank])
        scores_col.sort()
        minscoregap_col = scores_col[1] - scores_col[0]

        rank_col = sim[:, minrank].argsort()
        minrank_col = rank_col[0]
        # if minscore<gap:
        if minrank_col == i and minscore<gap:
        # if minrank_col == i and minscoregap > gap and minscoregap_col > gap:
            confi.append([test1[i], test2[minrank]])
            id2confi[test1[i]] = test2[minrank]
            if test1[i] + 10500 == test2[minrank]:
                correct.append([test1[i], test2[minrank]])
        counn += 1

    matchable= 0
    for item in confi:
        if item[0] < 10500:
            matchable += 1

    print("Evaluated: " + str(counn))
    print("Confi " + str(len(confi)))
    print("Among which matchable " + str(matchable))
    print("Correct " + str(len(correct)))
    return confi, correct, id2confi


def get_hits_gen_nochange(simM, test1, test2, id2confi, correct, gap):
    rowindex = []
    columnindex = []
    for iii in test1:
        if iii>=25500:
            rowindex.append(iii-15000)
        else:
            rowindex.append(iii)
    for iii in test2:
        columnindex.append(iii-10500)
    partialsim = simM[rowindex]
    partialsim = partialsim[:, columnindex]

    sim = partialsim
    counn = 0
    confi = []
    for i in range(len(rowindex)):
        rank = sim[i, :].argsort()
        scores = copy.deepcopy(sim[i, :])
        scores.sort()
        minrank = rank[0]
        # get column-wise results
        minscore = scores[0]

        scores_col = copy.deepcopy(sim[:, minrank])
        scores_col.sort()
        minscoregap_col = scores_col[1] - scores_col[0]

        rank_col = sim[:, minrank].argsort()
        minrank_col = rank_col[0]
        if  minrank_col == i and minscore<gap:
            confi.append([test1[i], test2[minrank]])
            id2confi[test1[i]] = test2[minrank]
            if test1[i] + 10500 == test2[minrank]:
                correct.append([test1[i], test2[minrank]])
        counn += 1
    print("Evaluated: " + str(counn))
    print("Confi " + str(len(confi)))
    print("Correct " + str(len(correct)))

    return confi, correct, id2confi

def getsim_matrix_cosine(vec, test_left, test_right):
    Lvec = tf.placeholder(tf.float32, [None, vec.shape[1]])
    Rvec = tf.placeholder(tf.float32, [None, vec.shape[1]])
    he = tf.nn.l2_normalize(Lvec, dim=-1)
    norm_e_em = tf.nn.l2_normalize(Rvec, dim=-1)
    aep = tf.matmul(he, tf.transpose(norm_e_em))

    sess = tf.Session()
    Lv = np.array([vec[e1] for e1 in test_left])
    Rv = np.array([vec[e2] for e2 in test_right])

    aep = sess.run(aep, feed_dict = {Lvec: Lv, Rvec: Rv})
    aep = 1-aep
    return aep

def get_hits_ma(sim, test_pair, top_k=(1, 10)):
    top_lr = [0] * len(top_k)
    mrr_sum_l = 0
    for i in range(sim.shape[0]):
        rank = sim[i, :].argsort()
        if i < 10500:
            rank_index = np.where(rank == i)[0][0]
            mrr_sum_l = mrr_sum_l + 1.0 / (rank_index + 1)
            for j in range(len(top_k)):
                if rank_index < top_k[j]:
                    top_lr[j] += 1
    msg = 'Hits@1:%.3f, Hits@10:%.3f, MRR:%.3f\n' % (top_lr[0] / len(test_pair), top_lr[1] / len(test_pair), mrr_sum_l / len(test_pair))
    print(msg)
    msg = 'Hits@1:%.3f\n' % (top_lr[0] / 14888)
    print(msg)


main代码

import tensorflow as tf
from include.Model import build_SE, training
from include.utils import get_hits_gen, getsim_matrix_cosine, get_hits_ma
import time
from include.Load import *
import json
import scipy
from scipy import spatial
import copy
from collections import defaultdict


import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,3"

# def make_print_to_file(fileName, path='./'):
#     import sys
#     import os
#     import sys
#     import datetime
#
#     class Logger(object):
#         def __init__(self, filename="Default.log", path="./"):
#             self.terminal = sys.stdout
#             self.log = open(os.path.join(path, filename), "a", encoding='utf8',)
#
#         def write(self, message):
#             self.terminal.write(message)
#             self.log.write(message)
#
#         def flush(self):
#             pass
#     sys.stdout = Logger(fileName + '.log', path=path)
#     print(fileName.center(60,'*'))

seed = 12306
np.random.seed(seed)
tf.set_random_seed(seed)

import argparse
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='UEA')
    parser.add_argument('--lan', type=str, default='zh_en')
    parser.add_argument('--alpha', type=float, default=0.5)
    parser.add_argument('--beta', type=float, default=0.5)

    parser.add_argument('--thres', type=float, default=0.05) # initial threshold
    parser.add_argument('--inc', type=float, default=0.1)  # the increment of threshold
    parser.add_argument('--stopThres', type=float, default=0.45)  # the maximum of threshold

    parser.add_argument('--adj', type=bool, default=True) # whether dynamically adjust the threshold
    parser.add_argument('--fixedThres', type=float, default=0.45) # if adj is false, one should set a fixed weight


    args = parser.parse_args()
    print(args)

    language = args.lan
    e1 = 'data/' + language + '/ent_ids_1'
    e2 = 'data/' + language + '/ent_ids_2'
    r1 = 'data/' + language + '/rel_ids_1'
    r2 = 'data/' + language + '/rel_ids_2'
    ill = 'data/' + language + '/ref_ent_ids'
    kg1 = 'data/' + language + '/triples_1'
    kg2 = 'data/' + language + '/triples_2'
    # e1_trans = 'data/' + language + '/ent_ids_1_trans_goo'
    sup = 'data/' + language + '/sup_ent_ids'
    epochs_se = 300
    epochs_ae = 600
    se_dim = 300
    ae_dim = 100
    act_func = tf.nn.relu
    gamma = 3.0  # margin based loss
    k = 25  # number of negative samples for each positive one
    seed = 3  # 30% of seeds
    beta = 0.9  # weight of SE

    t = time.time()
    e = len(set(loadfile(e1, 1)) | set(loadfile(e2, 1))) # print(e)
    ILL = loadfile(ill, 2)
    illL = len(ILL)
    test = ILL
    test_left = []; test_right = []
    inf = open(e1,"rb")
    for i, line in enumerate(inf):
        strs = line.decode().strip().split('\t')
        if i<10500 or i>=15000:
            test_left.append(int(strs[0]))
    inf = open(e2,"rb")
    for i, line in enumerate(inf):
        strs = line.decode().strip().split('\t')
        if i < 10500:# or i >= 15000:
            test_right.append(int(strs[0]))

    seedss = loadfile(sup, 2)
    KG1 = loadfile(kg1, 3)
    KG2 = loadfile(kg2, 3)

    path = 'data' #'entity-alignment-full-data'
    lang = language.split('_')[0]
    with open(file='data/' + lang + '_en/' + lang + '_vectorList.json', mode='r', encoding='utf-8') as f:
        embedding_list = json.load(f)
    ne_vec = np.array(embedding_list)

    str_sim = np.load('./'+path+'/' + language + '/string_mat.npy')
    str_sim = 1 - str_sim
    aep_n = getsim_matrix_cosine(ne_vec, test_left, test_right)
    text_combine = aep_n * args.alpha + str_sim * (1 - args.alpha)

    clenold = 0
    id2confi = dict()
    confi = []
    correct = []
    if args.adj:
        thres = args.thres
    else:
        thres = args.fixedThres

    confi, correct, id2confi = get_hits_gen(text_combine, test_left, test_right, id2confi, confi, correct, thres)
    countt = 0

    if len(confi) < 10499:
        while len(confi) - clenold > 30:
            print('ROUND ' + str(countt))
            train = copy.deepcopy(confi)
            train = np.array(train)
            clenold = len(confi)

            test1 = []
            test2 = []
            for ee in test_left:
                if ee not in id2confi.keys():
                    test1.append(ee)
            for ee in test_right:
                if ee not in id2confi.values():
                    test2.append(ee)

            print("Generating structural embeddings.... ")
            output_layer, loss, = build_SE(se_dim, act_func, gamma, k, e, train, KG1 + KG2)
            se_vec, J = training(output_layer, loss, 25, epochs_se, train, e, k)
            countt += 1

            aep = getsim_matrix_cosine(se_vec, test_left, test_right)
            combine = aep * (1-args.beta) + text_combine * (args.beta)
            if args.adj:
                if thres >= args.stopThres:
                    thres = args.stopThres
                else:
                    thres = thres + args.inc
            else:
                thres = args.fixedThres

            confi, correct, id2confi = get_hits_gen(combine, test1, test2, id2confi, confi, correct,thres)
            print()

    conf = 0
    for item in confi:
        if item[0] < 10500:
            conf += 1
    corr = 0
    for item in correct:
        if item[0] < 10500:
            corr += 1

    print("Confi: " + str(len(confi)))
    print("Matchable: " + str(conf))
    print("Correct: " + str(corr))

    print("Precision: " + str(corr*1.0/len(confi)))
    print("Recall: " + str(corr * 1.0 / 10500))
    print("total time elapsed: {:.4f} s".format(time.time() - t))
运行结果及详细报错内容

Traceback (most recent call last):
File "main.py", line 119, in
confi, correct, id2confi = get_hits_gen(text_combine, test_left, test_right, id2confi, confi, correct, thres)
File "/home/cclsol/zzy/UEA-main/include/utils.py", line 21, in get_hits_gen
partialsim = simM[rowindex]
IndexError: index 125000 is out of bounds for axis 0 with size 95500

我的解答思路和尝试过的方法,不写自己思路的,回答率下降 60%

该错误是由于索引超出了列表的长度引起的,但是不会改。

我想要达到的结果,如果你需要快速回答,请尝试 “付费悬赏”
  • 写回答

4条回答 默认 最新

  • 王富贵儿619 2023-01-31 01:20
    关注

    这个错误是由于使用的数据集大小不匹配导致的。报错提示的 index 12500 超出了索引范围,数组大小为 95500。

    解决方案是检查数据集的大小是否与代码预期的大小匹配,并修正代码中的数据集引用,以避免超出索引范围。如果需要,也可以考虑重新核对数据集的大小是否正确。

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(3条)

报告相同问题?

问题事件

  • 系统已结题 2月9日
  • 已采纳回答 2月1日
  • 创建了问题 1月31日

悬赏问题

  • ¥20 机器学习能否像多层线性模型一样处理嵌套数据
  • ¥20 西门子S7-Graph,S7-300,梯形图
  • ¥50 用易语言http 访问不了网页
  • ¥50 safari浏览器fetch提交数据后数据丢失问题
  • ¥15 matlab不知道怎么改,求解答!!
  • ¥15 永磁直线电机的电流环pi调不出来
  • ¥15 用stata实现聚类的代码
  • ¥15 请问paddlehub能支持移动端开发吗?在Android studio上该如何部署?
  • ¥20 docker里部署springboot项目,访问不到扬声器
  • ¥15 netty整合springboot之后自动重连失效