Cluster Assignments

def cluster_assignment(cluster_res, data_name='user_id'):
    """
    Converts the dictionary containing user_id and user_cluster assignment  
    to a pandas DataFrame.

    cluster_res : dictionary
                  Result from clustering function with keys being the
                  user_id and values their cluster membership

    col         : string
                  Column name of the user or item

    Returns
    -------
    result      : pandas DataFrame
                  Two columns representing the user/item and their 
                  corresponding cluster assignments
    """
    import pandas as pd

    if data_name == 'user_id':
        cluster_name = 'ucluster'
    else:
        cluster_name = 'icluster'

    c_assignment = pd.DataFrame(list(cluster_res.items()),
                                columns=[data_name, cluster_name])
    c_assignment.set_index(data_name, inplace=True)
    return c_assignment
uc_assignment = cluster_assignment(y_u, data_name='user_id')
ic_assignment = cluster_assignment(y_i, data_name='item_id')
ic_assignment
icluster
item_id
1 4
2 4
3 3
4 0
5 3
... ...
193581 0
193583 0
193585 0
193587 0
193609 0

9742 rows × 1 columns

Unit Test

import unittest
import pandas as pd
from pandas._testing import assert_frame_equal

class Test_cluster_assign(unittest.TestCase):
    
    def test_cluster_assignment(self):
        dict_cluster_i = {0: 2, 1: 1, 2: 1, 3: 2, 4: 1, 5: 1, 6: 2, 7: 1, 8: 3, 9: 3}
        dict_cluster_u = {0: 1, 1: 1, 2: 1, 3: 2, 4: 3, 5: 2, 6: 2, 7: 3, 8: 1, 9: 2}
        
        df_ex_u = pd.DataFrame(list(dict_cluster_u.items()), columns=['user_id', 'ucluster'])
        df_ex_u.set_index('user_id', inplace=True)
        df_ex_i = pd.DataFrame(list(dict_cluster_i.items()), columns=['item_id', 'icluster'])
        df_ex_i.set_index('item_id', inplace=True)
        
        df_assignment_u = cluster_assignment(dict_cluster_u, data_name='user_id')
        df_assignment_i = cluster_assignment(dict_cluster_i, data_name='item_id')
        
        assert_frame_equal(df_ex_u, df_assignment_u)
        assert_frame_equal(df_ex_i, df_assignment_i)
        
unittest.main(argv=[''], verbosity=2, exit=False)
test_cluster_assignment (__main__.Test_cluster_assign) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.010s

OK
<unittest.main.TestProgram at 0x7f30b8f1eca0>