程式码程度:初学
要做一个基于内容推荐系统的系統(還在測試)
我将两个资料库中的值要做矩阵相乘,算出推荐相似度,但不知为何一直抱错
資料表如下:
代碼如下:
import pandas as pd
import pyodbc
import pymysql
# 資料庫參數設定
db_settings = {
"host": "127.0.0.1",
"port": 3306,
"user": "root",
"password": "",
"db": "final_db",
"charset": "utf8"
}
conn = pymysql.connect(**db_settings)
cursor = conn.cursor()
command = "SELECT * FROM `dishes`"
food_df = pd.read_sql(command, con=conn)
# 定义额外的 NaN标识符.
missing_values = ['na', '--', '?', '-', 'None', 'none', 'non']
food_df['genres'] = food_df.genres.str.split('.')
food_df.isna().sum()
food_df.foodId = food_df.foodId.astype('int32')
food_with_genres = food_df.copy(deep=True)
x = []
for index, row in food_df.iterrows():
x.append(index)
for genres in row['genres']:
out = "".join(genres.split())
food_with_genres.at[index, out] = 1
pd.set_option('display.max_columns',10)
print(food_with_genres)
food_with_genres = food_with_genres.fillna(0)
print(food_with_genres)#.head(3)
command1 = "SELECT title,rating FROM `user_ratings` WHERE member_id = 1"
df = pd.read_sql(command1, con=conn)
print(df)
Lawrence_food_ratings = pd.DataFrame(df, columns= ['title', 'rating'])
print(Lawrence_food_ratings)
#print(Lawrence_food_ratings.head())
# 从food_df中提取食譜id,并使用食譜id更新lawrence_food_ratings。
Lawrence_food_Id = food_df[food_df['title'].isin(Lawrence_food_ratings['title'])]
# 将Lawrence食譜Id和评级合并到lawrence_food_ratings数据框架中.
# 此操作通过标题列隐式合并两个数据帧.
Lawrence_food_ratings = pd.merge(Lawrence_food_Id, Lawrence_food_ratings)
# 删除我们不需要的信息,比如类型
Lawrence_food_ratings = Lawrence_food_ratings.drop(['genres'], 1)
# 劳伦斯的最终文件
print(Lawrence_food_ratings)
# 通过输出两者都存在的影片来过滤选择,Lawrence_food_ratings和food_with_genre。
Lawrence_genres_df = food_with_genres[food_with_genres.foodId.isin(Lawrence_food_ratings.foodId)]
# Lawrence_genres_df
# 首先,将index重置为default并删除现有索引。
Lawrence_genres_df.reset_index(drop=True, inplace=True)
# 接下来,去掉多余的列
Lawrence_genres_df.drop(['foodId', 'title', 'genres'], axis=1, inplace=True)
# 我们来确认一下数据的形状,以便于做矩阵乘法。
print('Shape of Lawrence_food_ratings is:', Lawrence_food_ratings.shape)
print('Shape of Lawrence_genres_df is:', Lawrence_genres_df.shape)
# 我们来求劳伦斯评级列的劳伦斯- genres_df转置的点积. 做乘积
Lawrence_profile = Lawrence_genres_df.T.dot(Lawrence_food_ratings.rating)
# 将索引设置为foodId。
food_with_genres = food_with_genres.set_index(food_with_genres.foodId)
# 删除四个不必要的列。
food_with_genres.drop(['foodId', 'title', 'genres'], axis=1, inplace=True)
#print(food_with_genres)
# 将类型数乘以权重,然后取加权平均值。 计算相似度,再去做归一化
recommendation_table_df = (food_with_genres.dot(Lawrence_profile) / Lawrence_profile.sum())
# 将值从大到小排序
recommendation_table_df.sort_values(ascending=False, inplace=True)
# 首先,我们复制原始的food_df
copy = food_df.copy(deep=True)
# 然后将它的索引设置为foodId
copy = copy.set_index('foodId', drop=True)
# 接下来,我们列出我们在上面定义的前20个推荐的食譜id
top_20_index = recommendation_table_df.index[:20].tolist()
# 最后,我们将这些索引从复制的food_df中切片并保存到一个变量中
recommended_food = copy.loc[top_20_index, :]
# 现在我们可以按喜好降序显示前20部食譜
print('推荐的食譜列表:',recommended_food)
抱错:
拜托各位大神解救我