Evaluation¶

This notebook contains 2nd and 3rd of three functions:

split_train_test: creates train and test sets by splitting the raw data ‘user_feature.csv’.
evaluate: calculates the mse and mae of the final recommendations to the actual recommendations based on the test set.
append_error_to_df: for visualization purposes and for further exploration of the errors.

Generating input data for unittesting purposes.¶

The commented cells are for the purpose of testing the function and unittest only.

# import pandas as pd

# data = pd.read_csv('user_feature.csv')
# features = ['userId', 'movieId', 'rating']
# # data
# new_data=data[features]
# new_data

Splitting¶

# import pandas as pd

# def split_train_test(data, train_ratio=0.7):
#     """
#     Splits the transaction data into train and test sets.
    
#     Parameters
#     ----------
#     data         : pandas DataFrame for transaction table containing user, item, and ratings
    
#     train_ratio  : the desired ratio of training set, while 1-train ratio is automatically set for the test set 
    
    
#     Returns
#     ---------
#     df_train_fin : dataframe for the training set
    
#     df_test_fin  : dataframe for the test set
    
#     """
    
#     list_df_train = []
#     list_df_test = []
    
#     #group by user id
#     d = dict(tuple(data.groupby(data.userId)))
    
#     #splitting randomly per user
#     for i in (d):
#         df_train = d[i].sample(frac=train_ratio)
#         ind = df_train.index
#         df_test = d[i].drop(ind)
#         list_df_train.append(df_train) 
#         list_df_test.append(df_test)

#     # 2. merge selected train set per user to a single dataframe
#     df_train_fin = pd.concat(list_df_train)
#     df_test_fin = pd.concat(list_df_test)

#     return df_train_fin, df_test_fin

# df_train, df_test = split_train_test(new_data, 0.70)

# df_test

# df_test.pivot(index='userId', columns='movieId', values='rating')

Metrics for the output of recommerder system¶

Sample test is created using a subset of the test set, while synthetic result is created by inducing few modifications in the test set.

# sample_test = df_test[(df_test.userId>= 2) & (df_test.userId<=4)].pivot(index='userId', columns='movieId', values='rating')
# sample_test

movieId	58	106	222	342	417	441	450	492	553	593	...	5764	6874	8798	46970	58559	60756	70946	86345	106782	131724
userId
2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	4.0	3.5	4.0	4.5	5.0	NaN	4.0	5.0	5.0
3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	4.5	NaN	NaN	NaN	NaN	NaN	5.0	NaN	NaN	NaN
4	3.0	4.0	1.0	5.0	2.0	1.0	2.0	5.0	2.0	5.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

3 rows × 86 columns

# # random change in the data for measurement of accuracy
# synthetic_result=sample_test-0.5
# synthetic_result.iloc[0,1] = 5.0
# synthetic_result.iloc[0,5] = 2.0
# synthetic_result.iloc[2,0] = 3.0
# synthetic_result

movieId	58	106	222	342	417	441	450	492	553	593	...	5764	6874	8798	46970	58559	60756	70946	86345	106782	131724
userId
2	NaN	5.0	NaN	NaN	NaN	2.0	NaN	NaN	NaN	NaN	...	NaN	3.5	3.0	3.5	4.0	4.5	NaN	3.5	4.5	4.5
3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	4.0	NaN	NaN	NaN	NaN	NaN	4.5	NaN	NaN	NaN
4	3.0	3.5	0.5	4.5	1.5	0.5	1.5	4.5	1.5	4.5	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

3 rows × 86 columns

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

def evaluate(df_test_result, df_test_data):
    """
    Calculates the mse and mae per user of the results of the recommender system for a given test set.
    
    Parameters
    ----------
    
    df_test_result   : utility matrix containing the result of the recommender systems
    
    df_test_data     : pivoted test data generated from splitting the transaction table and tested on the recommender systems
    
    Returns
    ---------
    
    mse_list         : list of mean squared error for each user
    
    mae_list         : list of mean absolute error for each user
    
    """
    
    
    mse_list = []
    mae_list = []
    
#     test indices first, all user ids should be represented in the test matrix 
    idx_orig_data = df_test_data.index
    idx_result = df_test_result.index
    a=idx_orig_data.difference(idx_result)
    
    if len(a)==0:
        print('proceed')
        
        for i in (df_test_result.index):
            y_pred = df_test_result[df_test_result.index==i].fillna(0)
            y = df_test_data[df_test_data.index==i].fillna(0)
            y_pred = y_pred[y.columns]

            mse = mean_squared_error(y, y_pred)
            mae = mean_absolute_error(y, y_pred)

            mse_list.append(mse)
            mae_list.append(mae)
    else:
        print(error)
    
    return mse_list, mae_list

mse, mae = evaluate(sample_test, synthetic_result)
print(mse)
print(mae)

proceed
[0.3633720930232558, 0.03488372093023256, 0.18604651162790697]
[0.13372093023255813, 0.06976744186046512, 0.37209302325581395]

def append_error_to_df(test_result, mse, mae):
    """
    Inserts the error values into the first two rows of the dataframe of the predictions of system for easy visualization
    and for further computations.
    
    Parameters
    ----------
    
    test_result   : utility matrix for the result of the recommender systems on the test set
    
    mse           : mse computed from function evaluate
    
    mae           : mae computed from function evaluate
    
    Returns
    -------
    
    test_result   : modified utility matrix with errors
    """
    
    test_result.insert(0, 'mse_u', mse)
    test_result.insert(0, 'mae_u', mae)
    
    return test_result
    

df_error = append_error_to_df(synthetic_result, mse, mae)
df_error

movieId	mae_u	mse_u	58	106	222	342	417	441	450	492	...	5764	6874	8798	46970	58559	60756	70946	86345	106782	131724
userId
2	0.133721	0.363372	NaN	5.0	NaN	NaN	NaN	2.0	NaN	NaN	...	NaN	3.5	3.0	3.5	4.0	4.5	NaN	3.5	4.5	4.5
3	0.069767	0.034884	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	4.0	NaN	NaN	NaN	NaN	NaN	4.5	NaN	NaN	NaN
4	0.372093	0.186047	3.0	3.5	0.5	4.5	1.5	0.5	1.5	4.5	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

3 rows × 88 columns

Unittest¶

import unittest
import pandas as pd
from pandas._testing import assert_index_equal
from pandas._testing import assert_frame_equal

class Test_evaluate(unittest.TestCase):
    
    
    def test_index(self): #indices or userIds should all be represented
        df = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
        df_test1 = df.pivot(index=df.columns[0], columns=df.columns[1], values=df.columns[2])
        df_result = df_test1-0.5
        assert_index_equal(df_test1.index, df_result.index)        
        
        
    def test_type_error(self):
        df = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
        df_test1 = df.pivot(index=df.columns[0], columns=df.columns[1], values=df.columns[2])
        df_result1 = df_test1-0.5
        mse, mae = evaluate(df_result1, df_test1)
        length = len(df_result1)
        self.assertEqual(len(mse), length)
        self.assertEqual(len(mae), length)
        self.assertIsNotNone(sum(mae))
        self.assertIsNotNone(sum(mse))
        
    def test_same_df_shape(self):
        df = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
        df_test1 = df.pivot(index=df.columns[0], columns=df.columns[1], values=df.columns[2])
        df_result2 = df_test1-0.5
        self.assertEqual(df_result2.shape, df_test1.shape)
        
    
unittest.main(argv=[''], verbosity=2, exit=False)

test_index (__main__.Test_evaluate) ... ok
test_same_df_shape (__main__.Test_evaluate) ... ok
test_type_error (__main__.Test_evaluate) ... 

proceed

ok

----------------------------------------------------------------------
Ran 3 tests in 0.057s

OK

<unittest.main.TestProgram at 0x7f851a21bac0>

Splitting transaction table chronologically to train and test sets

Evaluation_arrays

ReSyPE

Evaluation¶

Generating input data for unittesting purposes.¶

Splitting¶

Metrics for the output of recommerder system¶

Unittest¶