Cluster Assignments¶
def cluster_assignment(cluster_res, data_name='user_id'):
"""
Converts the dictionary containing user_id and user_cluster assignment
to a pandas DataFrame.
cluster_res : dictionary
Result from clustering function with keys being the
user_id and values their cluster membership
col : string
Column name of the user or item
Returns
-------
result : pandas DataFrame
Two columns representing the user/item and their
corresponding cluster assignments
"""
import pandas as pd
if data_name == 'user_id':
cluster_name = 'ucluster'
else:
cluster_name = 'icluster'
c_assignment = pd.DataFrame(list(cluster_res.items()),
columns=[data_name, cluster_name])
c_assignment.set_index(data_name, inplace=True)
return c_assignment
uc_assignment = cluster_assignment(y_u, data_name='user_id')
ic_assignment = cluster_assignment(y_i, data_name='item_id')
ic_assignment
| icluster | |
|---|---|
| item_id | |
| 1 | 4 |
| 2 | 4 |
| 3 | 3 |
| 4 | 0 |
| 5 | 3 |
| ... | ... |
| 193581 | 0 |
| 193583 | 0 |
| 193585 | 0 |
| 193587 | 0 |
| 193609 | 0 |
9742 rows × 1 columns
Unit Test¶
import unittest
import pandas as pd
from pandas._testing import assert_frame_equal
class Test_cluster_assign(unittest.TestCase):
def test_cluster_assignment(self):
dict_cluster_i = {0: 2, 1: 1, 2: 1, 3: 2, 4: 1, 5: 1, 6: 2, 7: 1, 8: 3, 9: 3}
dict_cluster_u = {0: 1, 1: 1, 2: 1, 3: 2, 4: 3, 5: 2, 6: 2, 7: 3, 8: 1, 9: 2}
df_ex_u = pd.DataFrame(list(dict_cluster_u.items()), columns=['user_id', 'ucluster'])
df_ex_u.set_index('user_id', inplace=True)
df_ex_i = pd.DataFrame(list(dict_cluster_i.items()), columns=['item_id', 'icluster'])
df_ex_i.set_index('item_id', inplace=True)
df_assignment_u = cluster_assignment(dict_cluster_u, data_name='user_id')
df_assignment_i = cluster_assignment(dict_cluster_i, data_name='item_id')
assert_frame_equal(df_ex_u, df_assignment_u)
assert_frame_equal(df_ex_i, df_assignment_i)
unittest.main(argv=[''], verbosity=2, exit=False)
test_cluster_assignment (__main__.Test_cluster_assign) ... ok
----------------------------------------------------------------------
Ran 1 test in 0.010s
OK
<unittest.main.TestProgram at 0x7f30b8f1eca0>