Evaluation_arrays¶
This notebook contains the function for evaluating the predictions against the ground truth from the test data set. Using the indices from split function, the ground truth values of ratings are extracted from the original transaction dataframe. The mse and mae errors are then computed by comparing model predicted ratings with the ground truth.
Generating input data for unittesting purposes.¶
The commented cells are for the purpose of testing the function and unittest only.
# import pandas as pd
# import numpy as np
# data = pd.read_csv('user_feature.csv')
# features = ['userId', 'movieId', 'rating']
# # data
# new_data=data[features]
# new_data
| userId | movieId | rating | |
|---|---|---|---|
| 0 | 1 | 1 | 4.0 |
| 1 | 1 | 3 | 4.0 |
| 2 | 1 | 6 | 4.0 |
| 3 | 1 | 47 | 5.0 |
| 4 | 1 | 50 | 5.0 |
| ... | ... | ... | ... |
| 100831 | 610 | 166534 | 4.0 |
| 100832 | 610 | 168248 | 5.0 |
| 100833 | 610 | 168250 | 5.0 |
| 100834 | 610 | 168252 | 5.0 |
| 100835 | 610 | 170875 | 3.0 |
100836 rows × 3 columns
Splitting¶
# import pandas as pd
# def split_train_test(data, train_ratio=0.7):
# """
# Splits the transaction data into train and test sets.
# Parameters
# ----------
# data : pandas DataFrame for transaction table containing user, item, and ratings
# train_ratio : the desired ratio of training set, while 1-train ratio is automatically set for the test set
# Returns
# ---------
# df_train_fin : dataframe for the training set
# df_test_fin : dataframe for the test set
# df_test_fin* : possible option is a pivoted df ready as the util matrix input of the recsys. In our case, the
# index='userId', columns='movieId', values='rating'. To generalize a transaction table,
# index=column[0], columns=itemId, values=rating.
# """
# list_df_train = []
# list_df_test = []
# #group by user id
# d = dict(tuple(data.groupby(data.columns[0]))) #assuming column[0] is the userId
# #splitting randomly per user
# for i in (d):
# if len(d[i])<2:
# print(len(d[i]))
# list_df_test.append(d[i])
# else:
# df_train = d[i].sample(frac=train_ratio)
# ind = df_train.index
# df_test = d[i].drop(ind)
# list_df_train.append(df_train)
# list_df_test.append(df_test)
# # 2. merge selected train set per user to a single dataframe
# df_train_fin = pd.concat(list_df_train)
# df_test_fin = pd.concat(list_df_test)
# # 3. Option to pivot it to create the utility matrix ready as input for recsys
# df_test_um = df_test_fin.pivot(index=df_test_fin.columns[0], columns=df_test_fin.columns[1], values=df_test_fin.columns[2])
# # 4. get indices of train and test sets
# indx_train = df_train_fin.index
# indx_test = df_test_fin.index
# return df_train_fin, df_test_fin, df_test_um, indx_train, indx_test #return indices
# df_train, df_test, df_test_um, indx_train, indx_test = split_train_test(new_data, 0.70)
# indx_test
Int64Index([ 1, 4, 9, 11, 14, 16, 20, 21,
22, 23,
...
100782, 100785, 100787, 100796, 100799, 100803, 100811, 100812,
100815, 100834],
dtype='int64', length=30256)
# df_test_truth = new_data.loc[pd.Index(indx_test), 'rating']
# df_test_truth.values
array([4. , 5. , 5. , ..., 4.5, 3.5, 5. ])
# synthetic_result = np.random.randint(1,11,len(df_test_truth))
# synthetic_result
array([10, 9, 9, 1])
Metrics for the output of recommerder system¶
Sample test is created using a subset of the test set, while synthetic result is created by inducing few modifications in the test set.
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
def evaluate_arrays(model_result_arr, df_data, indx_test):
"""
Calculates the mse and mae of the recommender system for a given result and test set.
Parameters
----------
model_result_arr : ratings from the results of the recommender sys using test set
df_test_truth : the original dataframe for before splitting.
the original ratings or ground truth from the test set will be extracted from here using indices
indx_test : result indices of test set from splitting
Returns
---------
mse : mse value using sklearn
mae : mse value using sklearn
"""
df_test_truth = df_data.loc[pd.Index(indx_test), df_data.columns[2]]
test_arr = df_test_truth.values
# test indices first, all user ids should be represented in the test matrix
result_len = len(model_result_arr)
test_len = len(test_arr)
if result_len!=test_len:
raise ValueError('the arrays are of different lengths %s in %s' % (result_len,test_len))
else:
print('proceed')
mse = mean_squared_error(test_arr, model_result_arr)
mae = mean_absolute_error(test_arr, model_result_arr)
return mse, mae
mse, mae = evaluate_arrays(synthetic_result, new_data, indx_test)
print(mse)
print(mae)
proceed
13.235316961924907
2.9707330777366474
Unittest¶
import unittest
import pandas as pd
import numpy as np
# from pandas._testing import assert_index_equal
# from pandas._testing import assert_frame_equal
class Test_evaluate_arrays(unittest.TestCase):
def test_length(self):
df1 = pd.DataFrame({'u': [1,1,2,2,3,3,3,5,5,6], 'i': [3,4,5,6,7,1,2,3,1,0], 'r':[5,6,7,8,9,3,2,1,0,9]})
indx1= [2,3,4,5]
df_test_truth = df.loc[pd.Index(indx1), df.columns=='r']
arr_test = df_test_truth.values
arr_result = np.random.randint(1,11,len(df_test_truth))
self.assertEqual(len(arr_test), len(arr_result))
def test_type_error(self):
df2 = pd.DataFrame([[1,1,2], [2,3,3], [3,5,5], [3,4,5], [6,7,1], [2,3,1], [5,6,7], [8,9,3], [2,1,1]], index=[0,1,2,3,4,5,6,7,8], columns=['u', 'i', 'r'])
indx2=[2,3,4,5]
df_test_truth2 = df2.loc[pd.Index(indx2), df2.columns[2]]
test_arr = df_test_truth2.values
arr_result = np.random.randint(1,11,len(df_test_truth2))
mse, mae = evaluate_arrays(arr_result, df2, indx2)
self.assertIsNotNone(mae)
self.assertIsNotNone(mse)
unittest.main(argv=[''], verbosity=2, exit=False)
test_length (__main__.Test_evaluate_arrays) ... ok
test_type_error (__main__.Test_evaluate_arrays) ...
proceed
ok
----------------------------------------------------------------------
Ran 2 tests in 0.008s
OK
<unittest.main.TestProgram at 0x7fb1defaf7c0>