使用DWY100k数据集对UEA进行测试，出现报错：IndexError: index 125000 is out of bounds for axis 0 with size 95500

问题遇到的现象和发生背景

下载官方代码使用DWY100k数据集对UEA进行测试

遇到的现象和发生背景，请写出第一个错误信息

出现如下报错：IndexError: index 125000 is out of bounds for axis 0 with size 95500

用代码块功能插入代码，请勿粘贴截图。不用代码块回答率下降 50%

utils代码

import tensorflow as tf
from include.Model import build_SE, training
import time
from include.Load import *
import json
import scipy
from scipy import spatial
import copy
from collections import defaultdict

def get_hits_gen(simM, test1, test2, id2confi, confi, correct, gap):
    rowindex = []
    columnindex = []
    for iii in test1:
        if iii>=25500:
            rowindex.append(iii-15000)
        else:
            rowindex.append(iii)
    for iii in test2:
        columnindex.append(iii-10500)
    partialsim = simM[0,rowindex]
    partialsim = partialsim[:, columnindex]

    sim = partialsim
    counn = 0

    for i in range(len(rowindex)):
        rank = sim[i, :].argsort()
        scores = copy.deepcopy(sim[i, :])
        scores.sort()
        minrank = rank[0]
        minscore = scores[0]
        minscoregap = scores[1] - scores[0]

        scores_col = copy.deepcopy(sim[:, minrank])
        scores_col.sort()
        minscoregap_col = scores_col[1] - scores_col[0]

        rank_col = sim[:, minrank].argsort()
        minrank_col = rank_col[0]
        # if minscore<gap:
        if minrank_col == i and minscore<gap:
        # if minrank_col == i and minscoregap > gap and minscoregap_col > gap:
            confi.append([test1[i], test2[minrank]])
            id2confi[test1[i]] = test2[minrank]
            if test1[i] + 10500 == test2[minrank]:
                correct.append([test1[i], test2[minrank]])
        counn += 1

    matchable= 0
    for item in confi:
        if item[0] < 10500:
            matchable += 1

    print("Evaluated: " + str(counn))
    print("Confi " + str(len(confi)))
    print("Among which matchable " + str(matchable))
    print("Correct " + str(len(correct)))
    return confi, correct, id2confi


def get_hits_gen_nochange(simM, test1, test2, id2confi, correct, gap):
    rowindex = []
    columnindex = []
    for iii in test1:
        if iii>=25500:
            rowindex.append(iii-15000)
        else:
            rowindex.append(iii)
    for iii in test2:
        columnindex.append(iii-10500)
    partialsim = simM[rowindex]
    partialsim = partialsim[:, columnindex]

    sim = partialsim
    counn = 0
    confi = []
    for i in range(len(rowindex)):
        rank = sim[i, :].argsort()
        scores = copy.deepcopy(sim[i, :])
        scores.sort()
        minrank = rank[0]
        # get column-wise results
        minscore = scores[0]

        scores_col = copy.deepcopy(sim[:, minrank])
        scores_col.sort()
        minscoregap_col = scores_col[1] - scores_col[0]

        rank_col = sim[:, minrank].argsort()
        minrank_col = rank_col[0]
        if  minrank_col == i and minscore<gap:
            confi.append([test1[i], test2[minrank]])
            id2confi[test1[i]] = test2[minrank]
            if test1[i] + 10500 == test2[minrank]:
                correct.append([test1[i], test2[minrank]])
        counn += 1
    print("Evaluated: " + str(counn))
    print("Confi " + str(len(confi)))
    print("Correct " + str(len(correct)))

    return confi, correct, id2confi

def getsim_matrix_cosine(vec, test_left, test_right):
    Lvec = tf.placeholder(tf.float32, [None, vec.shape[1]])
    Rvec = tf.placeholder(tf.float32, [None, vec.shape[1]])
    he = tf.nn.l2_normalize(Lvec, dim=-1)
    norm_e_em = tf.nn.l2_normalize(Rvec, dim=-1)
    aep = tf.matmul(he, tf.transpose(norm_e_em))

    sess = tf.Session()
    Lv = np.array([vec[e1] for e1 in test_left])
    Rv = np.array([vec[e2] for e2 in test_right])

    aep = sess.run(aep, feed_dict = {Lvec: Lv, Rvec: Rv})
    aep = 1-aep
    return aep

def get_hits_ma(sim, test_pair, top_k=(1, 10)):
    top_lr = [0] * len(top_k)
    mrr_sum_l = 0
    for i in range(sim.shape[0]):
        rank = sim[i, :].argsort()
        if i < 10500:
            rank_index = np.where(rank == i)[0][0]
            mrr_sum_l = mrr_sum_l + 1.0 / (rank_index + 1)
            for j in range(len(top_k)):
                if rank_index < top_k[j]:
                    top_lr[j] += 1
    msg = 'Hits@1:%.3f, Hits@10:%.3f, MRR:%.3f\n' % (top_lr[0] / len(test_pair), top_lr[1] / len(test_pair), mrr_sum_l / len(test_pair))
    print(msg)
    msg = 'Hits@1:%.3f\n' % (top_lr[0] / 14888)
    print(msg)

main代码

import tensorflow as tf
from include.Model import build_SE, training
from include.utils import get_hits_gen, getsim_matrix_cosine, get_hits_ma
import time
from include.Load import *
import json
import scipy
from scipy import spatial
import copy
from collections import defaultdict


import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,3"

# def make_print_to_file(fileName, path='./'):
#     import sys
#     import os
#     import sys
#     import datetime
#
#     class Logger(object):
#         def __init__(self, filename="Default.log", path="./"):
#             self.terminal = sys.stdout
#             self.log = open(os.path.join(path, filename), "a", encoding='utf8',)
#
#         def write(self, message):
#             self.terminal.write(message)
#             self.log.write(message)
#
#         def flush(self):
#             pass
#     sys.stdout = Logger(fileName + '.log', path=path)
#     print(fileName.center(60,'*'))

seed = 12306
np.random.seed(seed)
tf.set_random_seed(seed)

import argparse
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='UEA')
    parser.add_argument('--lan', type=str, default='zh_en')
    parser.add_argument('--alpha', type=float, default=0.5)
    parser.add_argument('--beta', type=float, default=0.5)

    parser.add_argument('--thres', type=float, default=0.05) # initial threshold
    parser.add_argument('--inc', type=float, default=0.1)  # the increment of threshold
    parser.add_argument('--stopThres', type=float, default=0.45)  # the maximum of threshold

    parser.add_argument('--adj', type=bool, default=True) # whether dynamically adjust the threshold
    parser.add_argument('--fixedThres', type=float, default=0.45) # if adj is false, one should set a fixed weight


    args = parser.parse_args()
    print(args)

    language = args.lan
    e1 = 'data/' + language + '/ent_ids_1'
    e2 = 'data/' + language + '/ent_ids_2'
    r1 = 'data/' + language + '/rel_ids_1'
    r2 = 'data/' + language + '/rel_ids_2'
    ill = 'data/' + language + '/ref_ent_ids'
    kg1 = 'data/' + language + '/triples_1'
    kg2 = 'data/' + language + '/triples_2'
    # e1_trans = 'data/' + language + '/ent_ids_1_trans_goo'
    sup = 'data/' + language + '/sup_ent_ids'
    epochs_se = 300
    epochs_ae = 600
    se_dim = 300
    ae_dim = 100
    act_func = tf.nn.relu
    gamma = 3.0  # margin based loss
    k = 25  # number of negative samples for each positive one
    seed = 3  # 30% of seeds
    beta = 0.9  # weight of SE

    t = time.time()
    e = len(set(loadfile(e1, 1)) | set(loadfile(e2, 1))) # print(e)
    ILL = loadfile(ill, 2)
    illL = len(ILL)
    test = ILL
    test_left = []; test_right = []
    inf = open(e1,"rb")
    for i, line in enumerate(inf):
        strs = line.decode().strip().split('\t')
        if i<10500 or i>=15000:
            test_left.append(int(strs[0]))
    inf = open(e2,"rb")
    for i, line in enumerate(inf):
        strs = line.decode().strip().split('\t')
        if i < 10500:# or i >= 15000:
            test_right.append(int(strs[0]))

    seedss = loadfile(sup, 2)
    KG1 = loadfile(kg1, 3)
    KG2 = loadfile(kg2, 3)

    path = 'data' #'entity-alignment-full-data'
    lang = language.split('_')[0]
    with open(file='data/' + lang + '_en/' + lang + '_vectorList.json', mode='r', encoding='utf-8') as f:
        embedding_list = json.load(f)
    ne_vec = np.array(embedding_list)

    str_sim = np.load('./'+path+'/' + language + '/string_mat.npy')
    str_sim = 1 - str_sim
    aep_n = getsim_matrix_cosine(ne_vec, test_left, test_right)
    text_combine = aep_n * args.alpha + str_sim * (1 - args.alpha)

    clenold = 0
    id2confi = dict()
    confi = []
    correct = []
    if args.adj:
        thres = args.thres
    else:
        thres = args.fixedThres

    confi, correct, id2confi = get_hits_gen(text_combine, test_left, test_right, id2confi, confi, correct, thres)
    countt = 0

    if len(confi) < 10499:
        while len(confi) - clenold > 30:
            print('ROUND ' + str(countt))
            train = copy.deepcopy(confi)
            train = np.array(train)
            clenold = len(confi)

            test1 = []
            test2 = []
            for ee in test_left:
                if ee not in id2confi.keys():
                    test1.append(ee)
            for ee in test_right:
                if ee not in id2confi.values():
                    test2.append(ee)

            print("Generating structural embeddings.... ")
            output_layer, loss, = build_SE(se_dim, act_func, gamma, k, e, train, KG1 + KG2)
            se_vec, J = training(output_layer, loss, 25, epochs_se, train, e, k)
            countt += 1

            aep = getsim_matrix_cosine(se_vec, test_left, test_right)
            combine = aep * (1-args.beta) + text_combine * (args.beta)
            if args.adj:
                if thres >= args.stopThres:
                    thres = args.stopThres
                else:
                    thres = thres + args.inc
            else:
                thres = args.fixedThres

            confi, correct, id2confi = get_hits_gen(combine, test1, test2, id2confi, confi, correct,thres)
            print()

    conf = 0
    for item in confi:
        if item[0] < 10500:
            conf += 1
    corr = 0
    for item in correct:
        if item[0] < 10500:
            corr += 1

    print("Confi: " + str(len(confi)))
    print("Matchable: " + str(conf))
    print("Correct: " + str(corr))

    print("Precision: " + str(corr*1.0/len(confi)))
    print("Recall: " + str(corr * 1.0 / 10500))
    print("total time elapsed: {:.4f} s".format(time.time() - t))

运行结果及详细报错内容

Traceback (most recent call last):
File "main.py", line 119, in
confi, correct, id2confi = get_hits_gen(text_combine, test_left, test_right, id2confi, confi, correct, thres)
File "/home/cclsol/zzy/UEA-main/include/utils.py", line 21, in get_hits_gen
partialsim = simM[rowindex]
IndexError: index 125000 is out of bounds for axis 0 with size 95500

我的解答思路和尝试过的方法，不写自己思路的，回答率下降 60%

该错误是由于索引超出了列表的长度引起的，但是不会改。

我想要达到的结果，如果你需要快速回答，请尝试 “付费悬赏”

写回答
好问题 0 提建议
追加酬金
关注问题
分享
邀请回答
编辑收藏删除
收藏举报

4条回答默认最新

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
王富贵儿619 2023-01-31 01:20
关注
这个错误是由于使用的数据集大小不匹配导致的。报错提示的 index 12500 超出了索引范围，数组大小为 95500。

解决方案是检查数据集的大小是否与代码预期的大小匹配，并修正代码中的数据集引用，以避免超出索引范围。如果需要，也可以考虑重新核对数据集的大小是否正确。

本回答被题主选为最佳回答 , 对您是否有帮助呢?

解决无用
评论打赏
分享
举报

评论

按下Enter换行，Ctrl+Enter发表内容

查看更多回答(3条)

报告相同问题？

关注问题

使用DWY100k数据集对UEA进行测试，出现报错：IndexError: index 125000 is out of bounds for axis 0 with size 95500 python tensorflow 深度学习
2023-01-31 00:52

回答 4 已采纳这个错误是由于使用的数据集大小不匹配导致的。报错提示的 index 12500 超出了索引范围，数组大小为 95500。解决方案是检查数据集的大小是否与代码预期的大小匹配，并修正代码中的数据集引用，
yolo video python cv2图像转PIL fromarray报错 python
2019-04-02 20:26

回答 1 已采纳 frame 对象是个NONE fromarray 这个方法不支持none对象
如何使用PHP反转strrev php
2015-05-05 08:41

回答 1 已采纳 Not very complicated and quite malicius if you ask me... the principle is to eval() that base64 en
dwy-cache:javascript数据缓存
2021-05-11 17:30

dwy-cachea javascript data cache plugin插件已经实现了三种缓存过期策略，可以根据项目需求选用合适的策略FIFO：First In First Out，先进先出LRU：Least Recently Used，最近最少使用LFU：Least Frequently Used...
最先进的实体对齐方法的实验研究综述 An Experimental Study of State-of-the-Art Entity Alignment Approaches
2022-06-01 11:09

boywaiter的博客实体对齐 (EA) 寻找位于不同知识图谱 (KG) 中的等价实体，这是提高 KG 质量的重要步骤，因此对下游应用程序（例如，问答和推荐）具有重要意义。本研究力求清晰地展示当前 EA 方法的优缺点，以激发高质量的后续研究。...
论文简读-COTSAE-《COTSAE: CO-Training of Structure and Attribute Embeddings for Entity Alignment》
2020-06-19 23:46

六娃_lw的博客 COTSAE: CO-Training of Structure and Attribute Embeddings for Entity Alignment 1. 摘要实体对齐是知识图构建和融合中的一项基本而重要的任务。以往的工作主要是通过学习关系三元组上的实体嵌入(entity ...
python36.dll 0xc000005_使用python运行时出现0xc000005错误
2021-02-03 06:40

寒水微痕的博客在但是，由于我对这个领域了解不多，所以我试图修改源代码以使它们能够正常工作(因为实际上，在github上发布的python下的所有runPE项目目前都无法工作)。在So I decided to train under the project: ...
基于图卷积的属性增强实体对齐方法研究+人工智能+知识图谱+预训练模型
2023-01-18 16:11

最后给出另外一个数据集DWY100K上的实验效果。实验表明，图卷积对关系结构比属性结构的对齐效果更加显著，属性结构对关系结构的辅助能有效提升对齐效果。同时对比规模不同的数据集，发现数据集的规模越大，实体...
微信小游戏 dwy （源码）.zip
2023-01-25 14:40

微信小游戏（源码）
小游戏源码 dwy （微信公众号小游戏）.rar
2023-01-25 14:07

微信公众号小游戏源码
论文笔记011-《COTSAE CO-Training of Structure and Attribute Embeddings for Entity Alignment》
2020-12-11 15:24

Jack_Can的博客题目：《COTSAE CO-Training of Structure and Attribute Embeddings for Entity Alignment》来源：AAAI-2020 链接：论文链接代码：Code和Dataset 关键字：Structure and Attribute Embeddings、Entity Alignment ...
DWY系列全液压轮斗挖掘机在国投哈密一矿的应用
2020-05-04 07:34

介绍了DWY系列全液压轮斗挖掘机的主要构成、工艺流程及性能参数,总结了全液压轮斗挖掘机在国投哈密一矿选型、应用及发展情况,论述了全液压轮斗挖掘机在露天开采工艺中的优势及前景。
使用python控制其他软件运行并操作处理数据_Python 运行其他程序
2021-02-05 05:07

星小呆bot的博客 10.4 运行其他程序在Python中可以方便地使用os模块运行其他的脚本或者程序，这样就可以在脚本中直接使用其他脚本，或者程序提供的功能，而不必再次编写实现该功能的代码。为了更好地控制运行的进程，可以使用win32...
《内网安全攻防：渗透测试实战指南》读书笔记（四）：权限提升分析及防御
2022-04-12 21:41

思源湖的鱼的博客本篇继续阅读学习《内网安全攻防：渗透测试实战指南》，是第四章权限提升分析及防御，本章主要分析了系统的内核溢出漏洞提权、利用Windows操作系统错误配置提权、利用组策略首选项提权、绕过UAC提权、令牌窃取及无...
实体对齐（Entity Alignment）相关论文与数据集整理
2021-03-21 21:46

BISTU_CD的博客实体对齐（Entity Alignment）、知识图谱融合方法总结整理年份模型 ...实体对齐数据集整理名称 — DBpe-dia（DBP） LinkedGeoData（LGD） Geonames（GEO） YAGO ...
DWY3000型全液压轮斗挖掘机工艺及其适用性分析
2020-05-13 21:05

DWY3000型全液压轮斗挖掘机可以大幅度降低单斗液压反铲挖掘机在露天矿采煤工程的使用数量,不仅能够明显提高露天煤矿开采的生产效率、降低生产成本还在挖掘过程中通过轮斗挖掘机的切割破碎有效控制了供煤粒径。
bert-ini:一种基于bert的实体对齐交互模型
2022-04-04 22:19

未来科技工作室的博客本次分享一项来自IJCAI-20的实体对齐工作，该工作提出了一种不同于以往的采用图结构信息进行实体对齐的技术方案，该方案取得了当时的最好效果；不仅如此，模型本身的对齐效率在该项工作中也被考虑，实际工作中非常...
DWY12864I-c.rar_嵌入式/单片机/硬件编程_C/C++_
2021-08-10 01:44

51单片机使用C语言实现128×64点阵LCM控制
2023-12-12 使用Android studio ndk命令编译jni C:\Users\x\AppData\Local\Android\Sdk\ndk-bundle\ndk-build.cmd
2023-12-12 10:43

Donald Linux的博客【代码】2023-12-12 使用Android studio ndk命令编译jni C:\Users\x\AppData\Local\Android\Sdk\ndk-bundle\ndk-build.cmd。
论文简读-BERT-INT-《 A BERT-based Interaction Model For Knowledge Graph Alignment》
2020-08-23 17:00

六娃_lw的博客论文简读-BERT-INT-《 A BERT-based Interaction Model For Knowledge Graph Alignment》 IJCAI 2020 1. 动机 (1). 在实体对齐任务中，知识图谱的side information（边缘信息：包括名称、描述和属性）比structural ...
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
系统已结题 2月9日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
已采纳回答 2月1日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 1月31日

悬赏问题

¥20 机器学习能否像多层线性模型一样处理嵌套数据
¥20 西门子S7-Graph,S7-300，梯形图
¥50 用易语言http 访问不了网页
¥50 safari浏览器fetch提交数据后数据丢失问题
¥15 matlab不知道怎么改，求解答！！
¥15 永磁直线电机的电流环pi调不出来
¥15 用stata实现聚类的代码
¥15 请问paddlehub能支持移动端开发吗？在Android studio上该如何部署？
¥20 docker里部署springboot项目，访问不到扬声器
¥15 netty整合springboot之后自动重连失效