Evaluation_arrays¶

This notebook contains the function for evaluating the predictions against the ground truth from the test data set. Using the indices from split function, the ground truth values of ratings are extracted from the original transaction dataframe. The mse and mae errors are then computed by comparing model predicted ratings with the ground truth.

Generating input data for unittesting purposes.¶

The commented cells are for the purpose of testing the function and unittest only.

# import pandas as pd
# import numpy as np

# data = pd.read_csv('user_feature.csv')
# features = ['userId', 'movieId', 'rating']
# # data
# new_data=data[features]
# new_data

	userId	movieId	rating
0	1	1	4.0
1	1	3	4.0
2	1	6	4.0
3	1	47	5.0
4	1	50	5.0
...	...	...	...
100831	610	166534	4.0
100832	610	168248	5.0
100833	610	168250	5.0
100834	610	168252	5.0
100835	610	170875	3.0

100836 rows × 3 columns

Splitting¶

# import pandas as pd

# def split_train_test(data, train_ratio=0.7):
#     """
#     Splits the transaction data into train and test sets.
    
#     Parameters
#     ----------
#     data         : pandas DataFrame for transaction table containing user, item, and ratings
    
#     train_ratio  : the desired ratio of training set, while 1-train ratio is automatically set for the test set 
    
    
#     Returns
#     ---------
#     df_train_fin : dataframe for the training set
    
#     df_test_fin  : dataframe for the test set
    
#     df_test_fin* : possible option is a pivoted df ready as the util matrix input of the recsys. In our case, the
#                    index='userId', columns='movieId', values='rating'. To generalize a transaction table, 
#                    index=column[0], columns=itemId, values=rating.
#     """
    
#     list_df_train = []
#     list_df_test = []
    
#     #group by user id
#     d = dict(tuple(data.groupby(data.columns[0]))) #assuming column[0] is the userId
    
#     #splitting randomly per user
#     for i in (d):
#         if len(d[i])<2:
#             print(len(d[i]))
#             list_df_test.append(d[i])
            
#         else:            
#             df_train = d[i].sample(frac=train_ratio)  
#             ind = df_train.index
#             df_test = d[i].drop(ind)
#             list_df_train.append(df_train) 
#             list_df_test.append(df_test)

#     # 2. merge selected train set per user to a single dataframe
#     df_train_fin = pd.concat(list_df_train)
#     df_test_fin = pd.concat(list_df_test)
    
#     # 3. Option to pivot it to create the utility matrix ready as input for recsys
#     df_test_um = df_test_fin.pivot(index=df_test_fin.columns[0], columns=df_test_fin.columns[1], values=df_test_fin.columns[2])
    
#     # 4. get indices of train and test sets
#     indx_train = df_train_fin.index
#     indx_test = df_test_fin.index

#     return df_train_fin, df_test_fin, df_test_um, indx_train, indx_test #return indices

# df_train, df_test, df_test_um, indx_train, indx_test = split_train_test(new_data, 0.70)

# indx_test

Int64Index([     1,      4,      9,     11,     14,     16,     20,     21,
                22,     23,
            ...
            100782, 100785, 100787, 100796, 100799, 100803, 100811, 100812,
            100815, 100834],
           dtype='int64', length=30256)

# df_test_truth = new_data.loc[pd.Index(indx_test), 'rating']
# df_test_truth.values

array([4. , 5. , 5. , ..., 4.5, 3.5, 5. ])

# synthetic_result = np.random.randint(1,11,len(df_test_truth))
# synthetic_result

array([10,  9,  9,  1])

Metrics for the output of recommerder system¶

Sample test is created using a subset of the test set, while synthetic result is created by inducing few modifications in the test set.

import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

def evaluate_arrays(model_result_arr, df_data, indx_test):
    """
    Calculates the mse and mae of the recommender system for a given result and test set.
    
    Parameters
    ----------
    
    model_result_arr   : ratings from the results of the recommender sys using test set
    
    df_test_truth      : the original dataframe for before splitting.
                         the original ratings or ground truth from the test set will be extracted from here using indices
                         
    indx_test          : result indices of test set from splitting
    
    Returns
    ---------
    
    mse                : mse value using sklearn 
    
    mae                : mse value using sklearn 
    
    """
    
    df_test_truth = df_data.loc[pd.Index(indx_test), df_data.columns[2]]
    test_arr = df_test_truth.values
         
#     test indices first, all user ids should be represented in the test matrix 

    result_len = len(model_result_arr) 
    test_len = len(test_arr)
      
    if result_len!=test_len:
        raise ValueError('the arrays are of different lengths %s in %s' % (result_len,test_len))
        
    else:
        print('proceed')
            
        mse = mean_squared_error(test_arr, model_result_arr)
        mae = mean_absolute_error(test_arr, model_result_arr)

            
    return mse, mae

mse, mae = evaluate_arrays(synthetic_result, new_data, indx_test)
print(mse)
print(mae)

proceed
13.235316961924907
2.9707330777366474

Unittest¶

import unittest
import pandas as pd
import numpy as np
# from pandas._testing import assert_index_equal
# from pandas._testing import assert_frame_equal

class Test_evaluate_arrays(unittest.TestCase):
    
    
    def test_length(self): 
        df1 = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
        indx1= [2,3,4,5]
        df_test_truth = df.loc[pd.Index(indx1), df.columns=='r']
        arr_test = df_test_truth.values
        arr_result = np.random.randint(1,11,len(df_test_truth))
        self.assertEqual(len(arr_test), len(arr_result))
        
        
    def test_type_error(self):
        df2 = pd.DataFrame([[1,1,2], [2,3,3], [3,5,5], [3,4,5], [6,7,1], [2,3,1], [5,6,7], [8,9,3], [2,1,1]], index=[0,1,2,3,4,5,6,7,8], columns=['u', 'i', 'r'])
        indx2=[2,3,4,5]
        df_test_truth2 = df2.loc[pd.Index(indx2), df2.columns[2]]
        test_arr = df_test_truth2.values
        arr_result = np.random.randint(1,11,len(df_test_truth2))
        mse, mae = evaluate_arrays(arr_result, df2, indx2)
        self.assertIsNotNone(mae)
        self.assertIsNotNone(mse)
        
unittest.main(argv=[''], verbosity=2, exit=False)

test_length (__main__.Test_evaluate_arrays) ... ok
test_type_error (__main__.Test_evaluate_arrays) ... 

proceed

ok

----------------------------------------------------------------------
Ran 2 tests in 0.008s

OK

<unittest.main.TestProgram at 0x7fb1defaf7c0>

previous

Evaluation

next

Cross-validation