问题遇到的现象和发生背景
我在参加【零基础入门推荐系统 - 新闻推荐】时,特征工程这一块给训练集打标签是出现这个问题
问题相关代码,请勿粘贴截图
#get_user_recall_item_label_df()是我定义的一个打标签函数
# 给训练验证数据打标签,并负采样(这一部分时间比较久)
trn_user_item_label_df, val_user_item_label_df, tst_user_item_label_df = get_user_recall_item_label_df(click_trn_hist,
click_val_hist,
click_tst_hist,
click_trn_last,
click_val_last,
recall_list_df)
trn_user_item_label_df.label
#将召回数据转换成字典
# 将最终的召回的df数据转换成字典的形式做排序特征
def make_tuple_func(group_df):
row_data = []
for name, row_df in group_df.iterrows():
row_data.append((row_df['sim_item'], row_df['score'], row_df['label']))
return row_data
trn_user_item_label_tuples = trn_user_item_label_df.groupby('user_id').apply(make_tuple_func).reset_index()
trn_user_item_label_tuples_dict = dict(zip(trn_user_item_label_tuples['user_id'], trn_user_item_label_tuples[0]))
if val_user_item_label_df is not None:
val_user_item_label_tuples = val_user_item_label_df.groupby('user_id').apply(make_tuple_func).reset_index()
val_user_item_label_tuples_dict = dict(zip(val_user_item_label_tuples['user_id'], val_user_item_label_tuples[0]))
else:
val_user_item_label_tuples_dict = None
tst_user_item_label_tuples = tst_user_item_label_df.groupby('user_id').apply(make_tuple_func).reset_index()
tst_user_item_label_tuples_dict = dict(zip(tst_user_item_label_tuples['user_id'], tst_user_item_label_tuples[0]))
运行结果及报错内容
KeyError Traceback (most recent call last)
~/anaconda3/envs/zkl/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
/tmp/ipykernel_32117/567686863.py in <module>
9
10 tst_user_item_label_tuples = tst_user_item_label_df.groupby('user_id').apply(make_tuple_func).reset_index()
---> 11 tst_user_item_label_tuples_dict = dict(zip(tst_user_item_label_tuples['user_id'], tst_user_item_label_tuples[0]))
~/anaconda3/envs/zkl/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~/anaconda3/envs/zkl/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 0
我的解答思路和尝试过的方法
我查到这个是无索引的问题,需要重新连接索引,但是我不会,请问具体怎么操作呢?