[ 协同过滤与隐语义模型推荐系统实例1: 数据处理 ]
[ 协同过滤与隐语义模型推荐系统实例2: 基于相似度的推荐 ]
基于矩阵分解(SVD)的推荐
# 先计算歌曲被当前用户播放量/用户播放总量 当做分值
triplet_dataset_sub_song_merged_sum_df = triplet_dataset_sub_song_merged[['user', 'listen_count']].groupby('user').sum().reset_index()
triplet_dataset_sub_song_merged_sum_df.rename(columns = {'listen_count':'total_listen_count'}, inplace = True)
triplet_dataset_sub_song_merged = pd.merge(triplet_dataset_sub_song_merged, triplet_dataset_sub_song_merged_sum_df)
triplet_dataset_sub_song_merged.head()
triplet_dataset_sub_song_merged['fractional_play_count'] = triplet_dataset_sub_song_merged['listen_count'] / triplet_dataset_sub_song_merged['total_listen_count']
triplet_dataset_sub_song_merged[triplet_dataset_sub_song_merged.user == 'd6589314c0a9bcbca4fee0c93b14bc402363afea'][['user', 'song', 'listen_count', 'fractional_play_count']].head(8)
from scipy.sparse import coo_matrix
small_set = triplet_dataset_sub_song_merged
user_codes = small_set.user.drop_duplicates().reset_index()
song_codes = small_set.song.drop_duplicates().reset_index()
user_codes.rename(columns = {'index': 'user_index'}, inplace = True)
song_codes.rename(columns = {'index': 'song_index'}, inplace = True)
song_codes['so_index_value'] = list(song_codes.index)
user_codes['us_index_value'] = list(user_codes.index)
small_set = pd.merge(small_set, song_codes, how = 'left')
small_set = pd.merge(small_set, user_codes, how = 'left')
mat_candidate = small_set[['us_index_value','so_index_value','fractional_play_count']]
data_array = mat_candidate.fractional_play_count.values
row_array = mat_candidate.us_index_value.values
col_array = mat_candidate.so_index_value.values
data_sparse = coo_matrix((data_array, (row_array, col_array)), dtype = float)
user_codes[user_codes.user == '2a2f776cbac6df64d6cb505e7e834e01684673b6']
import math as mt
from scipy.sparse.linalg import *
from scipy.sparse.linalg import svds
from scipy.sparse import csc_matrix
def compute_svd(urm, K):
U, s, Vt = svds(urm, K)
dim = (len(s), len(s))
S = np.zeros(dim, dtype = np.float32)
for i in range(0, len(s)):
S[i, i] = mt.sqrt(s[i])
U = csc_matrix(U, dtype = np.float32)
S = csc_matrix(S, dtype = np.float32)
Vt = csc_matrix(Vt, dtype = np.float32)
return U,S,Vt
def compute_estimated_matrix(urm, U, S, Vt, uTest, K, test):
rightTerm = S * Vt
max_recommendation = 250
estimatedRatings = np.zeros(shape=(MAX_UID, MAX_PID), dtype = np.float16)
recomendRatings = np.zeros(shape=(MAX_UID, max_recommendation), dtype = np.float16)
for userTest in uTest:
prod = U[userTest, :] * rithtTerm
estimatedRatings[userTest, :] = prod.todense()
recomendRatings[userTest, :] = (-estimatedRatings[userTest, :].argsort()[:max_recommendation]
return recomendRatings
K = 50
urm = data_sparse
MAX_PID = urm.shape[1]
MAX_UID = urm.shape[0]
U,S,Vt = compute_svd(urm, K)
uTest = [4, 23]
uTest_recommended_items = compute_estimated_matrix(urm,U,S,Vt,uTest,K,True)
for user in uTest:
print('Recommendation for user with user id {}'.format(user))
rank_value = 1
for i in uTest_recommended_items[user, 0:10]:
song_details = small_set[small_set.so_index_value == i].drop_duplicates('so_index_value')[['title','artist_name']]
print('The number {} recommended song in {} BY {}'.format(rank_value, list(song_details['title'])[0], list(song_details['artist_name'])[0]))
rank_value += 1
Recommendation for user with user id 4
The number 1 recommended song in Fireflies BY Charttraxx Karaoke
The number 2 recommended song in Hey_ Soul Sister BY Train
The number 3 recommended song in Lucky (Album Version) BY Jason Mraz & Colbie Caillat
The number 4 recommended song in OMG BY Usher featuring will.i.am
The number 5 recommended song in Vanilla Twilight BY Owl City
The number 6 recommended song in Billionaire [feat. Bruno Mars] (Explicit Album Version) BY Travie McCoy
The number 7 recommended song in Crumpshit BY Philippe Rochard
The number 8 recommended song in Love Story BY Taylor Swift
The number 9 recommended song in Use Somebody BY Kings Of Leon
The number 10 recommended song in The Big Gundown BY The Prodigy
Recommendation for user with user id 23
The number 1 recommended song in Aber Dich Gibt’s Nur Einmal Für Mich BY Guildo Horn
The number 2 recommended song in Let’s Go [from ‘Salvation’] BY New Order
The number 3 recommended song in Crumpshit BY Philippe Rochard
The number 4 recommended song in Otherside (Album Version) BY Red Hot Chili Peppers
The number 5 recommended song in Yellow BY Coldplay
The number 6 recommended song in Bring Me To Life BY Evanescence
The number 7 recommended song in Luvstruck BY Southside Spinners
The number 8 recommended song in The Scientist BY Coldplay
The number 9 recommended song in Creep (Explicit) BY Radiohead
The number 10 recommended song in Sultans of El Sur BY The Mercury Program