Cross-validationΒΆ
This notebook contains the function that performs cross validation tests. This is a dummy function that can be tested with the model/s.
def cross_val(df, k, model, split_method='random'):
"""
Performs cross-validation for different train and test sets.
Parameters
-----------
df : the data to be split in the form of vanilla/transaction++ table (uid, iid, rating, timestamp)
k : the number of times splitting and learning with the model is desired
model : an unfitted sklearn model
split_method : 'random' splitting or 'chronological' splitting of the data
Returns
--------
mse and mae : error metrics using sklearn
"""
mse = []
mae = []
if split_method == 'random':
for i in range(k):
print(i)
# 1. split
print('Starting splitting')
df_train, df_test, df_test_um, indx_train, indx_test = split_train_test(
df, 0.7)
print('Finished splitting')
# 2. train with model
model_clone = clone(model)
print('Starting training')
model_clone_fit = fit_ml_cb(df_train, model_clone)
print('Finished training')
print('Starting completing matrix')
result = reco_ml_cb(user_df, list(df_test.index), item_df, model_clone_fit)
print('Finished completing matrix')
print('Starting computing MAE and MSE')
# 3. evaluate results (result is in the form of utility matrix)
mse_i, mae_i = evaluate(result, df_test_um)
print('Finished computing MAE and MSE')
mse.append(mse_i)
mae.append(mae_i)
elif split_method == 'chronological':
# 1. split
df_train, df_test, df_test_um, indx_train, indx_test = split_train_test_chronological(
df, 0.7)
print('Starting splitting')
print('Finished splitting')
# 2. train with model
model_clone = clone(model)
print('Starting training')
model_clone_fit = fit_ml_cb(df_train, model_clone)
print('Finished training')
print('Starting completing matrix')
result = reco_ml_cb(user_df, list(df_test.index), item_df, model_clone_fit)
print('Finished completing matrix')
print('Starting computing MAE and MSE')
# 3. evaluate results (result is in the form of utility matrix)
mse_i, mae_i = evaluate(result, df_test_um)
print('Finished computing MAE and MSE')
mse.append(mse_i)
mae.append(mae_i)
return mse, mae