Get Item Recommendations from Clusters

import numpy as np
import pandas as pd

from random_gen import *
from get_rec import *

def get_rec_item(df_rec, top_k, ic_assignment):
    
    """Returns the top K item recommendations for each user in the user list. 
    Items are selected randomly from the top recommended item cluster, exhaustively. Left overs are taken from the next highest ranked item clusters in a cascading fashion.
    
            Parameters:
                    df_rec (pandas.DataFrame): Table containing the top N item cluster recommendations for each user in the user list
                    ic_assignment (array-like): List containing the cluster assignment of each item
                    top_n (int): Number of items to recommend

            Returns:
                    df_rec_item (pandas.DataFrame): Table containing the top K item recommendations for each user in the user list
                    
    """

    # Class stuff
    #df_rec = self.df_rec # recommendations after running get_rec()
    #ic_assignment = self.item_assignment # item-cluster assignment
    
    # Create recommendation table
    df_rec_item = pd.DataFrame()
    df_rec_item['user_id'] = df_rec['user_id']

    for i in range(top_k):
        df_rec_item['rank_'+str(i+1)] = np.zeros(df_rec_item.shape[0])

    # Get items
    for j in range(df_rec_item.shape[0]):
        item_rec = []
        rank = 0
        while len(item_rec) < top_k:
            if rank+1 >= df_rec.shape[1]:
                item_list = list(set(ic_assignment.index)-set(item_rec))
                item_rec = item_rec + list(np.random.choice(item_list, size=top_k-len(item_rec), replace=False))
                break
            item_list = ic_assignment.index[np.where(ic_assignment == df_rec.iloc[j, rank+1])[0]]
            if top_k-len(item_rec) > len(item_list):
                item_rec = item_rec + list(item_list)
                rank += 1
            else:
                item_rec = item_rec + list(np.random.choice(item_list, size=top_k-len(item_rec), replace=False))
        df_rec_item.iloc[j, 1:] = item_rec
    
    # look-up tables
    #user_id_lookup = self.user_assignment.index
    #for j in range(df_rec_item.shape[0]):
    #    df_rec_item.iloc[j, 0] = user_id_lookup[df_rec_item.iloc[j, 0].astype('int32')]
                
    return df_rec_item

Example

n_user = 100
n_item = 50
sample_size = 10
n_user_cluster = 5
n_item_cluster = 5
top_n = 3
random_seed = 1

user_id_list = list(range(n_user))
user_list = random_user_list(n_user, sample_size, random_seed)
uc_assignment = random_user_cluster(n_user, n_user_cluster, random_seed)
utility_matrix_o, utility_matrix = random_utility_matrix(n_user_cluster, n_item_cluster, random_seed)
df_rec = get_rec(utility_matrix, utility_matrix_o, user_list, top_n, uc_assignment)
df_rec
user_id rank_1 rank_2 rank_3
0 80 1.0 2.0 4.0
1 84 3.0 4.0 1.0
2 33 3.0 4.0 1.0
3 81 0.0 3.0 1.0
4 93 3.0 1.0 0.0
5 17 0.0 3.0 1.0
6 36 3.0 1.0 0.0
7 82 1.0 2.0 4.0
8 69 4.0 2.0 0.0
9 65 3.0 1.0 0.0
ic_assignment = random_user_cluster(n_item, n_item_cluster, random_seed=2)
ic_assignment
array([0, 0, 3, 2, 3, 0, 2, 1, 3, 2, 4, 4, 4, 3, 4, 2, 3, 3, 2, 1, 2, 4,
       3, 0, 4, 3, 1, 2, 0, 4, 4, 2, 4, 2, 1, 0, 2, 2, 1, 0, 1, 0, 2, 1,
       1, 1, 4, 2, 3, 0])
top_k = 10
df_rec_item = get_rec_item(df_rec, top_k, pd.DataFrame(ic_assignment))
df_rec_item
user_id rank_1 rank_2 rank_3 rank_4 rank_5 rank_6 rank_7 rank_8 rank_9 rank_10
0 80 7.0 19.0 26.0 34.0 38.0 40.0 43.0 44.0 45.0 15.0
1 84 2.0 4.0 8.0 13.0 16.0 17.0 22.0 25.0 48.0 32.0
2 33 2.0 4.0 8.0 13.0 16.0 17.0 22.0 25.0 48.0 21.0
3 81 0.0 1.0 5.0 23.0 28.0 35.0 39.0 41.0 49.0 48.0
4 93 2.0 4.0 8.0 13.0 16.0 17.0 22.0 25.0 48.0 38.0
5 17 0.0 1.0 5.0 23.0 28.0 35.0 39.0 41.0 49.0 2.0
6 36 2.0 4.0 8.0 13.0 16.0 17.0 22.0 25.0 48.0 43.0
7 82 7.0 19.0 26.0 34.0 38.0 40.0 43.0 44.0 45.0 47.0
8 69 21.0 46.0 29.0 24.0 32.0 10.0 14.0 30.0 11.0 12.0
9 65 2.0 4.0 8.0 13.0 16.0 17.0 22.0 25.0 48.0 43.0

Unit Test

import unittest

class TestGetRecItem(unittest.TestCase):
    
    def test_1(self):
        
        # Set-up
        n_user = 100
        n_item = 50
        sample_size = 10
        n_user_cluster = 5
        n_item_cluster = 5
        random_seed = 1
        top_n = 3
        top_k = 10

        user_id_list = list(range(n_user))
        user_list = random_user_list(n_user, sample_size, random_seed)
        uc_assignment = random_user_cluster(n_user, n_user_cluster, random_seed)
        utility_matrix_o, utility_matrix = random_utility_matrix(n_user_cluster, n_item_cluster, random_seed)
        df_rec = get_rec(utility_matrix, utility_matrix_o, user_list, top_n, uc_assignment)
        
        ic_assignment = random_user_cluster(n_item, n_item_cluster, random_seed=2)
        
        df_rec_item = get_rec_item(df_rec, top_k, pd.DataFrame(ic_assignment))
        
        test_case = np.array([
            [80.,  7., 19., 26., 34., 38., 40., 43., 44., 45., 15.],
            [84.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 32.],
            [33.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 21.],
            [81.,  0.,  1.,  5., 23., 28., 35., 39., 41., 49., 48.],
            [93.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 38.],
            [17.,  0.,  1.,  5., 23., 28., 35., 39., 41., 49.,  2.],
            [36.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 43.],
            [82.,  7., 19., 26., 34., 38., 40., 43., 44., 45., 47.],
            [69., 21., 46., 29., 24., 32., 10., 14., 30., 11., 12.],
            [65.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 43.]
        ])
        
        self.assertEqual(df_rec_item.to_numpy().tolist(), test_case.tolist())
        
    def test_2(self):
        
        # Set-up
        n_user = 100
        n_item = 50
        sample_size = 10
        n_user_cluster = 5
        n_item_cluster = 5
        random_seed = 2
        top_n = 3
        top_k = 10

        user_id_list = list(range(n_user))
        user_list = random_user_list(n_user, sample_size, random_seed)
        uc_assignment = random_user_cluster(n_user, n_user_cluster, random_seed)
        utility_matrix_o, utility_matrix = random_utility_matrix(n_user_cluster, n_item_cluster, random_seed)
        df_rec = get_rec(utility_matrix, utility_matrix_o, user_list, top_n, uc_assignment)
        
        ic_assignment = random_user_cluster(n_item, n_item_cluster, random_seed=3)
        
        df_rec_item = get_rec_item(df_rec, top_k, pd.DataFrame(ic_assignment))
        
        test_case = np.array([
            [80.,  7., 19., 26., 34., 38., 40., 43., 44., 45., 15.],
            [84.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 32.],
            [33.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 21.],
            [81.,  0.,  1.,  5., 23., 28., 35., 39., 41., 49., 48.],
            [93.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 38.],
            [17.,  0.,  1.,  5., 23., 28., 35., 39., 41., 49.,  2.],
            [36.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 43.],
            [82.,  7., 19., 26., 34., 38., 40., 43., 44., 45., 47.],
            [69., 21., 46., 29., 24., 32., 10., 14., 30., 11., 12.],
            [65.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 43.]
        ])
        
        self.assertEqual(df_rec_item.to_numpy().tolist(), test_case.tolist())
        
unittest.main(argv=[''], verbosity=2, exit=False)
test_1 (__main__.TestGetRecItem) ... ok
test_2 (__main__.TestGetRecItem) ... FAIL

======================================================================
FAIL: test_2 (__main__.TestGetRecItem)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-8-280d80a78120>", line 77, in test_2
    self.assertEqual(df_rec_item.to_numpy().tolist(), test_case.tolist())
AssertionError: Lists differ: [[83.0, 21.0, 8.0, 28.0, 0.0, 19.0, 17.0, 12[617 chars]2.0]] != [[80.0, 7.0, 19.0, 26.0, 34.0, 38.0, 40.0, 4[607 chars]3.0]]

First differing element 0:
[83.0, 21.0, 8.0, 28.0, 0.0, 19.0, 17.0, 12.0, 25.0, 43.0, 20.0]
[80.0, 7.0, 19.0, 26.0, 34.0, 38.0, 40.0, 43.0, 44.0, 45.0, 15.0]

Diff is 1513 characters long. Set self.maxDiff to None to see it.

----------------------------------------------------------------------
Ran 2 tests in 0.088s

FAILED (failures=1)
<unittest.main.TestProgram at 0x1c8aa1c0a90>