Getting Started

Introduction

ReSyPE (pronounced recipe) is a Python library built for both practitioners and researchers that wish to quickly develop and deploy ML-based recommender systems.

The library provides an end-to-end pipeline that includes:

  1. Loading transaction, user feature, and item feature datasets

  2. Interchangable methods for user and item clustering

  3. Modular framework for machine learning models

  4. Iterative and decomposition-based techniques

Installation

pip install resype

Load the Data

import pandas as pd
import numpy as np
from resype.collab_filtering import CollabFilteringModel

%load_ext autoreload
%autoreload 2 
# load transaction list
transaction_list = pd.read_csv("sample_data/ratings.csv")[['userId', 'movieId', 'rating']]
transaction_list = transaction_list.sample(20)
transaction_list.columns = ["user_id", 'item_id', 'rating']

Preprocess

re = CollabFilteringModel(transaction_list)
re.transaction_list.head(3)
user_id item_id rating
58565 381 78266 3.0
18628 119 54272 4.5
35027 234 2123 3.0
# construct utlity matrix
re.construct_utility_matrix()
re.utility_matrix.head(3)
item_id 196 208 266 355 434 586 588 1701 1856 2123 2455 2502 2867 4025 6188 54272 78266 80463 106100 157108
user_id
18 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 4.5
107 NaN 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
111 NaN NaN NaN NaN NaN NaN NaN NaN 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

Train on Unclustered Matrix

Iterative Approach

# import sklearn Model
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
# fit and predict
re.fit(model, method='iterative')
re.utility_matrix_preds.head(3)
item_id 196 208 266 355 434 586 588 1701 1856 2123 2455 2502 2867 4025 6188 54272 78266 80463 106100 157108
user_id
18 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
107 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
111 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
# recommend
user_list = [0, 1, 2] # indices
top_n = 10
re.get_rec(user_list, top_n)
re.df_rec
user_id rank_1 rank_2 rank_3 rank_4 rank_5 rank_6 rank_7 rank_8 rank_9 rank_10
0 18 2123.0 106100.0 208.0 266.0 355.0 434.0 586.0 588.0 1701.0 1856.0
1 107 157108.0 106100.0 266.0 355.0 434.0 586.0 588.0 1701.0 1856.0 2123.0
2 111 157108.0 106100.0 208.0 266.0 355.0 434.0 586.0 588.0 1701.0 2123.0

Train on Clustered matrix

from sklearn.cluster import KMeans
km_users = KMeans(n_clusters=10)
km_items = KMeans(n_clusters=10)

user_model, user_cluster_map, util_matrix_w_users = re.cluster_users(km_users)
item_model, item_cluster_map, util_matrix_w_items = re.cluster_items(km_items)
re.cluster_assignment()
re.utility_matrix_agg(u_agg="mean", i_agg="mean")
re.utility_matrix.head(3)
i_cluster 0 1 2 3 4 5 6 7 8 9
u_cluster
0 0.000000 0.000000 0.0 5.0 0.00000 0.0 0.0 0.0 0.0 0.000000
1 0.363636 0.272727 0.0 0.0 0.18595 0.0 0.0 0.0 0.0 0.363636
2 0.000000 0.000000 5.0 0.0 0.00000 0.0 0.0 0.0 0.0 0.000000
from sklearn.ensemble import RandomForestRegressor
model1 = RandomForestRegressor()
re.fit(model_object=model1, method='iterative', n_synth_data=5, p=0.1)
re.utility_matrix_preds.head(3)
i_cluster 0 1 2 3 4 5 6 7 8 9
u_cluster
0 -0.430173 -0.417114 -0.458333 3.538939 -0.458333 -0.458333 -0.458333 0.209448 -0.230966 -0.417366
1 0.225689 0.134780 -0.080534 -0.088732 0.067354 -0.099382 -0.114118 -0.137948 -0.116099 0.225689
2 -0.486881 -0.522222 4.477778 -0.522222 -0.522222 0.130154 -0.522222 -0.522222 -0.522222 -0.522222
# predict top item clusters per user index
user_list = [0, 1, 2] # index
top_n = 5 # top n clusters
re.get_rec(user_list, top_n, re.user_assignment)
user_id rank_1 rank_2 rank_3 rank_4 rank_5
0 0 2.0 9.0 7.0 6.0 5.0
1 1 2.0 3.0 5.0 6.0 8.0
2 2 5.0 9.0 0.0 1.0 3.0
# predict top items per user_id
top_n = 5 # top n clusters
re.get_rec_item(top_n)
user_id rank_1 rank_2 rank_3 rank_4 rank_5
0 18 2502.0 588.0 54272.0 106100.0 2455.0
1 107 2502.0 2867.0 2455.0 106100.0 157108.0
2 111 2455.0 588.0 1701.0 78266.0 2867.0