对基线baseline的推荐系统,已经求出其RMSE,怎样求他的准确率和召回率或者损失函数值呢?
import numpy as np
import pandas as pd
title=['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv("D:/Datamovies/ml-100k/u3.base",sep='\t',names = title)
test_df=pd.read_csv('D:/Datamovies/ml-100k/u3.test',sep='\t',names=title)
print (np.max(df['user_id']),np.max(df['item_id']))
ratings = np.zeros((943, 1682))
for row in df.itertuples():
ratings[row[1]-1,row[2]-1] = row[3]
sparsity = float(len(ratings.nonzero()[0]))
sparsity /= (ratings.shape[0] * ratings.shape[1])
sparsity *= 100
print('训练集矩阵密度为: {:4.2f}%'.format(sparsity))
all_mean = np.mean(ratings[ratings!=0])
user_mean = sum(ratings.T)/sum((ratings!=0).T)
item_mean = sum(ratings)/sum((ratings!=0))
#用all_mean填充user_mean和item_mean可能存在的空值Nan
user_mean = np.where(np.isnan(user_mean), all_mean, user_mean)
item_mean = np.where(np.isnan(item_mean), all_mean, item_mean)
def predict_naive(user, item):
prediction = item_mean[item] + user_mean[user] - all_mean
return prediction
def rmse(pred, actual):
'''计算预测结果的rmse'''
from sklearn.metrics import mean_squared_error
pred = pred[actual.nonzero()].flatten()
actual = actual[actual.nonzero()].flatten()
return np.sqrt(mean_squared_error(pred, actual))
# 用测试集测试
for row in test_df.itertuples():
user,item,actual = row[1]-1,row[2]-1,row[3]
predictions=[]
predictions.append(predict_naive(user, item))
actuals=[]
actuals.append(actual)
print('测试结果的rmse为 %.4f' % rmse(np.array(predictions), np.array(actuals)))