Cross-validationΒΆ

This notebook contains the function that performs cross validation tests. This is a dummy function that can be tested with the model/s.

def cross_val(df, k, model, split_method='random'):
    """
    Performs cross-validation for different train and test sets.

    Parameters
    -----------
    df                    : the data to be split in the form of vanilla/transaction++ table (uid, iid, rating, timestamp)

    k                     : the number of times splitting and learning with the model is desired
    
    model                 : an unfitted sklearn model

    split_method          : 'random' splitting or 'chronological' splitting of the data


    Returns
    --------
    mse and mae           : error metrics using sklearn


    """
    mse = []
    mae = []

    if split_method == 'random':

        for i in range(k):
            print(i)
            # 1. split
            print('Starting splitting')
            df_train, df_test, df_test_um, indx_train, indx_test = split_train_test(
                df, 0.7)
            print('Finished splitting')
            # 2. train with model
            model_clone = clone(model)
            print('Starting training')
            model_clone_fit = fit_ml_cb(df_train, model_clone)
            print('Finished training')
            print('Starting completing matrix')
            result = reco_ml_cb(user_df, list(df_test.index), item_df, model_clone_fit)
            print('Finished completing matrix')
            print('Starting computing MAE and MSE')
            # 3. evaluate results (result is in the form of utility matrix)
            mse_i, mae_i = evaluate(result, df_test_um)
            print('Finished computing MAE and MSE')

            mse.append(mse_i)
            mae.append(mae_i)

    elif split_method == 'chronological':

        # 1. split
        df_train, df_test, df_test_um, indx_train, indx_test = split_train_test_chronological(
            df, 0.7)

        print('Starting splitting')
        print('Finished splitting')
        # 2. train with model
        model_clone = clone(model)
        print('Starting training')
        model_clone_fit = fit_ml_cb(df_train, model_clone)
        print('Finished training')
        print('Starting completing matrix')
        result = reco_ml_cb(user_df, list(df_test.index), item_df, model_clone_fit)
        print('Finished completing matrix')
        print('Starting computing MAE and MSE')
        # 3. evaluate results (result is in the form of utility matrix)
        mse_i, mae_i = evaluate(result, df_test_um)
        print('Finished computing MAE and MSE')

        mse.append(mse_i)
        mae.append(mae_i)

    return mse, mae