Evaluation

This notebook contains 2nd and 3rd of three functions:

  1. split_train_test: creates train and test sets by splitting the raw data ‘user_feature.csv’.

  2. evaluate: calculates the mse and mae of the final recommendations to the actual recommendations based on the test set.

  3. append_error_to_df: for visualization purposes and for further exploration of the errors.

Generating input data for unittesting purposes.

The commented cells are for the purpose of testing the function and unittest only.

# import pandas as pd
# data = pd.read_csv('user_feature.csv')
# features = ['userId', 'movieId', 'rating']
# # data
# new_data=data[features]
# new_data

Splitting

# import pandas as pd

# def split_train_test(data, train_ratio=0.7):
#     """
#     Splits the transaction data into train and test sets.
    
#     Parameters
#     ----------
#     data         : pandas DataFrame for transaction table containing user, item, and ratings
    
#     train_ratio  : the desired ratio of training set, while 1-train ratio is automatically set for the test set 
    
    
#     Returns
#     ---------
#     df_train_fin : dataframe for the training set
    
#     df_test_fin  : dataframe for the test set
    
#     """
    
#     list_df_train = []
#     list_df_test = []
    
#     #group by user id
#     d = dict(tuple(data.groupby(data.userId)))
    
#     #splitting randomly per user
#     for i in (d):
#         df_train = d[i].sample(frac=train_ratio)
#         ind = df_train.index
#         df_test = d[i].drop(ind)
#         list_df_train.append(df_train) 
#         list_df_test.append(df_test)

#     # 2. merge selected train set per user to a single dataframe
#     df_train_fin = pd.concat(list_df_train)
#     df_test_fin = pd.concat(list_df_test)

#     return df_train_fin, df_test_fin
# df_train, df_test = split_train_test(new_data, 0.70)
# df_test
# df_test.pivot(index='userId', columns='movieId', values='rating')

Metrics for the output of recommerder system

Sample test is created using a subset of the test set, while synthetic result is created by inducing few modifications in the test set.

# sample_test = df_test[(df_test.userId>= 2) & (df_test.userId<=4)].pivot(index='userId', columns='movieId', values='rating')
# sample_test
movieId 58 106 222 342 417 441 450 492 553 593 ... 5764 6874 8798 46970 58559 60756 70946 86345 106782 131724
userId
2 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN 4.0 3.5 4.0 4.5 5.0 NaN 4.0 5.0 5.0
3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 4.5 NaN NaN NaN NaN NaN 5.0 NaN NaN NaN
4 3.0 4.0 1.0 5.0 2.0 1.0 2.0 5.0 2.0 5.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3 rows × 86 columns

# # random change in the data for measurement of accuracy
# synthetic_result=sample_test-0.5
# synthetic_result.iloc[0,1] = 5.0
# synthetic_result.iloc[0,5] = 2.0
# synthetic_result.iloc[2,0] = 3.0
# synthetic_result
movieId 58 106 222 342 417 441 450 492 553 593 ... 5764 6874 8798 46970 58559 60756 70946 86345 106782 131724
userId
2 NaN 5.0 NaN NaN NaN 2.0 NaN NaN NaN NaN ... NaN 3.5 3.0 3.5 4.0 4.5 NaN 3.5 4.5 4.5
3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 4.0 NaN NaN NaN NaN NaN 4.5 NaN NaN NaN
4 3.0 3.5 0.5 4.5 1.5 0.5 1.5 4.5 1.5 4.5 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3 rows × 86 columns

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

def evaluate(df_test_result, df_test_data):
    """
    Calculates the mse and mae per user of the results of the recommender system for a given test set.
    
    Parameters
    ----------
    
    df_test_result   : utility matrix containing the result of the recommender systems
    
    df_test_data     : pivoted test data generated from splitting the transaction table and tested on the recommender systems
    
    Returns
    ---------
    
    mse_list         : list of mean squared error for each user
    
    mae_list         : list of mean absolute error for each user
    
    """
    
    
    mse_list = []
    mae_list = []
    
#     test indices first, all user ids should be represented in the test matrix 
    idx_orig_data = df_test_data.index
    idx_result = df_test_result.index
    a=idx_orig_data.difference(idx_result)
    
    if len(a)==0:
        print('proceed')
        
        for i in (df_test_result.index):
            y_pred = df_test_result[df_test_result.index==i].fillna(0)
            y = df_test_data[df_test_data.index==i].fillna(0)
            y_pred = y_pred[y.columns]

            mse = mean_squared_error(y, y_pred)
            mae = mean_absolute_error(y, y_pred)

            mse_list.append(mse)
            mae_list.append(mae)
    else:
        print(error)
    
    return mse_list, mae_list
mse, mae = evaluate(sample_test, synthetic_result)
print(mse)
print(mae)
proceed
[0.3633720930232558, 0.03488372093023256, 0.18604651162790697]
[0.13372093023255813, 0.06976744186046512, 0.37209302325581395]
def append_error_to_df(test_result, mse, mae):
    """
    Inserts the error values into the first two rows of the dataframe of the predictions of system for easy visualization
    and for further computations.
    
    Parameters
    ----------
    
    test_result   : utility matrix for the result of the recommender systems on the test set
    
    mse           : mse computed from function evaluate
    
    mae           : mae computed from function evaluate
    
    Returns
    -------
    
    test_result   : modified utility matrix with errors
    """
    
    test_result.insert(0, 'mse_u', mse)
    test_result.insert(0, 'mae_u', mae)
    
    return test_result
    
df_error = append_error_to_df(synthetic_result, mse, mae)
df_error
movieId mae_u mse_u 58 106 222 342 417 441 450 492 ... 5764 6874 8798 46970 58559 60756 70946 86345 106782 131724
userId
2 0.133721 0.363372 NaN 5.0 NaN NaN NaN 2.0 NaN NaN ... NaN 3.5 3.0 3.5 4.0 4.5 NaN 3.5 4.5 4.5
3 0.069767 0.034884 NaN NaN NaN NaN NaN NaN NaN NaN ... 4.0 NaN NaN NaN NaN NaN 4.5 NaN NaN NaN
4 0.372093 0.186047 3.0 3.5 0.5 4.5 1.5 0.5 1.5 4.5 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3 rows × 88 columns

Unittest

import unittest
import pandas as pd
from pandas._testing import assert_index_equal
from pandas._testing import assert_frame_equal

class Test_evaluate(unittest.TestCase):
    
    
    def test_index(self): #indices or userIds should all be represented
        df = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
        df_test1 = df.pivot(index=df.columns[0], columns=df.columns[1], values=df.columns[2])
        df_result = df_test1-0.5
        assert_index_equal(df_test1.index, df_result.index)        
        
        
    def test_type_error(self):
        df = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
        df_test1 = df.pivot(index=df.columns[0], columns=df.columns[1], values=df.columns[2])
        df_result1 = df_test1-0.5
        mse, mae = evaluate(df_result1, df_test1)
        length = len(df_result1)
        self.assertEqual(len(mse), length)
        self.assertEqual(len(mae), length)
        self.assertIsNotNone(sum(mae))
        self.assertIsNotNone(sum(mse))
        
    def test_same_df_shape(self):
        df = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
        df_test1 = df.pivot(index=df.columns[0], columns=df.columns[1], values=df.columns[2])
        df_result2 = df_test1-0.5
        self.assertEqual(df_result2.shape, df_test1.shape)
        
    
unittest.main(argv=[''], verbosity=2, exit=False)
test_index (__main__.Test_evaluate) ... ok
test_same_df_shape (__main__.Test_evaluate) ... ok
test_type_error (__main__.Test_evaluate) ... 
proceed
ok

----------------------------------------------------------------------
Ran 3 tests in 0.057s

OK
<unittest.main.TestProgram at 0x7f851a21bac0>