基于统计分析的电影推荐算法代码没有报错但是结果没有输出是什么原因?
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
# 加载movielens 100K数据集
def load_movielens_data(path='D:/Datamovies/ml-100k/u1.base', test_size=0.2):
# 读取用户和电影的元数据
users_df = pd.read_csv(path, sep='\t', header=None, names=['user_id', 'movie_id', 'rating', 'timestamp'])
# 分割数据集为训练集和测试集
train, test = train_test_split(users_df, test_size=test_size, random_state=42)
# 计算电影之间的相似度
movie_similarity = cosine_similarity(train[['rating']].values.T)
# 将相似度矩阵转化为DataFrame
movie_similarity_df = pd.DataFrame(movie_similarity, columns=train['movie_id'].unique())
return train, test, movie_similarity_df
# 推荐电影给用户
def recommend_movies(movie_similarity_df, user_id, num_recommendations=10):
# 为当前用户找到最相似的用户
similar_users = movie_similarity_df.apply(lambda x: x.corr(movie_similarity_df.iloc[user_id]))
# 找到这些相似用户评分最高的电影
recommended_movies = similar_users.nlargest(num_recommendations).index
return recommended_movies
# 评估推荐系统的性能
def evaluate_performance(train, test, movie_similarity_df):
# 标准化评分
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train[['rating']])
test_scaled = scaler.transform(test[['rating']])
# 训练推荐模型
model = NearestNeighbors(n_neighbors=10)
model.fit(train_scaled)
# 预测测试集
predictions = model.kneighbors(test_scaled, return_distance=False)
# 计算均方根误差
rmse = mean_squared_error(test['rating'], predictions, squared=False)
# 计算准确率和召回率
# 这里我们假设我们推荐的列表中只有1个电影是被评分过的,这是简化的情况
num_test_users = len(test)
accurate_recalls = [1 if len(set(predictions[i]).intersection(set(train['movie_id'][train['user_id'] == test.iloc[i]['user_id']]))) > 0 else 0 for i in range(num_test_users)]
total_recalls = [1 if len(set(predictions[i]).intersection(set(train['movie_id'][train['user_id'] == test.iloc[i]['user_id']]))) > 0 else 0 for i in range(num_test_users)]
accuracy = sum(accurate_recalls) / num_test_users
recall = sum(total_recalls) / num_test_users
return rmse,accuracy,recall
# 执行推荐系统
def run_recommender_system(path='D:/Datamovies/ml-100k/u1.base', test_size=0.2):
train, test = load_movielens