Skip to content

Commit

Permalink
Created first hybrids, best Kaggle MAP 0.081.
Browse files Browse the repository at this point in the history
  • Loading branch information
Lodz97 committed Nov 28, 2020
1 parent 482dd57 commit 1661c47
Show file tree
Hide file tree
Showing 7 changed files with 462 additions and 20 deletions.
55 changes: 55 additions & 0 deletions Hyb0.081.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from Data_manager.RecSys2020 import RecSys2020Reader
from Notebooks_utils.data_splitter import train_test_holdout
import matplotlib.pyplot as pyplot
import numpy as np
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from GraphBased import P3alphaRecommender, RP3betaRecommender
from SLIM_ElasticNet import SLIMElasticNetRecommender
from Base.Evaluation.Evaluator import EvaluatorHoldout
from MatrixFactorization.Cython import MatrixFactorization_Cython
from MatrixFactorization.PyTorch import MF_MSE_PyTorch
from MatrixFactorization import IALSRecommender, NMFRecommender, PureSVDRecommender
from KNN import ItemKNNCBFRecommender, ItemKNNCFRecommender, ItemKNNCustomSimilarityRecommender,\
ItemKNNSimilarityHybridRecommender, UserKNNCFRecommender
from EASE_R import EASE_R_Recommender
import ItemKNNScoresHybridRecommender
import CreateCSV

# https://github.com/MaurizioFD/RecSys_Course_AT_PoliMi/blob/master/Practice%2009%20-%20SLIM%20BPR.ipynb
# https://github.com/nicolo-felicioni/recsys-polimi-2019/tree/master/Hybrid


if __name__ == '__main__':
URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
ICM_all = RecSys2020Reader.load_icm_asset()
target_ids = RecSys2020Reader.load_target()

np.random.seed(12341)
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.97)
ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.97)
evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True)

itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train)
itemKNNCF.fit(**{"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True,
"feature_weighting": "TF-IDF"})

userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train)
userKNNCF.fit(**{"topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True})

itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender(URM_train, ICM_train)
itemKNNCBF.fit(topK=700, shrink=200, similarity='jaccard', normalize=True, feature_weighting = "TF-IDF")

hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, itemKNNCBF, userKNNCF)
hyb.fit(alpha=0.5)

# Kaggle MAP 0.081
hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, hyb, itemKNNCF)
hyb2.fit(alpha=0.5)


print(evaluator_validation.evaluateRecommender(userKNNCF))
print(evaluator_validation.evaluateRecommender(hyb))
print(evaluator_validation.evaluateRecommender(hyb2))

item_list = hyb.recommend(target_ids, cutoff=10)
CreateCSV.create_csv(target_ids, item_list, 'Hyb_User_Item_KNNCF')
60 changes: 60 additions & 0 deletions ItemKNNScoresHybridRecommender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from Base.BaseSimilarityMatrixRecommender import BaseItemSimilarityMatrixRecommender
from Base.Recommender_utils import check_matrix
import numpy as np

class ItemKNNScoresHybridRecommender(BaseItemSimilarityMatrixRecommender):
""" ItemKNNScoresHybridRecommender
Hybrid of two prediction scores R = R1*alpha + R2*(1-alpha)
NB: Rec_1 is itemKNNCF, Rec_2 is userKNNCF
"""

RECOMMENDER_NAME = "ItemKNNScoresHybridRecommender"

def __init__(self, URM_train, Recommender_1, Recommender_2):
super(ItemKNNScoresHybridRecommender, self).__init__(URM_train)

self.URM_train = check_matrix(URM_train.copy(), 'csr')
self.Recommender_1 = Recommender_1
self.Recommender_2 = Recommender_2

def fit(self, alpha=0.5):
self.alpha = alpha


def _compute_item_score(self, user_id_array, items_to_compute=None):
"""
URM_train and W_sparse must have the same format, CSR
:param user_id_array:
:param items_to_compute:
:return:
"""

user_profile_array = self.URM_train[user_id_array]
user_weights_array = self.Recommender_2.W_sparse[user_id_array]

if items_to_compute is not None:
item_scores1 = - np.ones((len(user_id_array), self.URM_train.shape[1]), dtype=np.float32) * np.inf
item_scores_all1 = user_profile_array.dot(self.Recommender_1.W_sparse).toarray()
item_scores1[:, items_to_compute] = item_scores_all1[:, items_to_compute]
item_scores2 = - np.ones((len(user_id_array), self.URM_train.shape[1]), dtype=np.float32) * np.inf
item_scores_all2 = user_weights_array.dot(self.Recommender_2.URM_train).toarray()
item_scores2[:, items_to_compute] = item_scores_all2[:, items_to_compute]
else:
#print(self.Recommender_1.W_sparse)
#print(self.Recommender_2.W_sparse)
item_scores1 = self.Recommender_1._compute_item_score(user_id_array, items_to_compute)
item_scores2 = self.Recommender_2._compute_item_score(user_id_array, items_to_compute)

mean1 = np.mean(item_scores1)
mean2 = np.mean(item_scores2)
std1 = np.std(item_scores1)
std2 = np.std(item_scores2)
item_scores1 = (item_scores1 - mean1) / std1
item_scores2 = (item_scores2 - mean2) / std2
# print(item_scores1)
# print(item_scores2)

item_scores = item_scores1 * self.alpha + item_scores2 * (1 - self.alpha)

return item_scores
53 changes: 41 additions & 12 deletions MyRec.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from MatrixFactorization import IALSRecommender, NMFRecommender, PureSVDRecommender
from KNN import ItemKNNCBFRecommender, ItemKNNCFRecommender, ItemKNNCustomSimilarityRecommender,\
ItemKNNSimilarityHybridRecommender, UserKNNCFRecommender
from EASE_R import EASE_R_Recommender
import ItemKNNScoresHybridRecommender
import CreateCSV

# https://github.com/MaurizioFD/RecSys_Course_AT_PoliMi/blob/master/Practice%2009%20-%20SLIM%20BPR.ipynb
Expand Down Expand Up @@ -38,6 +40,7 @@
pyplot.xlabel('Sorted User')
pyplot.show()

np.random.seed(1234)
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8)
ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.8)
evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True)
Expand All @@ -50,13 +53,21 @@
}

# MAP 0.057, kaggle MAP 0.054
# recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False)
# recommender.fit(**{"topK": 665, "epochs": 2000, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 0.01,
# "lambda_j": 1e-05, "learning_rate": 0.0001}, **earlystopping_keywargs)
recommender1 = SLIM_BPR_Cython(URM_train, recompile_cython=False)
recommender1.load_model('SavedModels', 'SLIM_BPR_Cyrhon')
#recommender1.fit(**{"topK": 865, "epochs": 1000, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 0.01,
# "lambda_j": 1e-05, "learning_rate": 0.0001})
#recommender.save_model('SavedModels', 'SLIM_BPR_Cyrhon')
w1 = recommender1.W_sparse



# MAP 0.052
# recommender = P3alphaRecommender.P3alphaRecommender(URM_train)
# recommender.fit(**{"topK": 998, "alpha": 0.08643815887780361, "normalize_similarity": False})
#recommender2 = P3alphaRecommender.P3alphaRecommender(URM_train)
#recommender2.fit(**{"topK": 998, "alpha": 0.08643815887780361, "normalize_similarity": False})
#recommender2.save_model('SavedModels', 'P3alpha')
#w2 = recommender2.W_sparse


# Bad MAP 0.035
# recommender = RP3betaRecommender.RP3betaRecommender(URM_train)
Expand Down Expand Up @@ -100,21 +111,39 @@
# recommender = PureSVDRecommender.PureSVDRecommender(URM_train)
# recommender.fit(num_factors=2000)

# MAP 0.026 (topK=700, shrink=300, similarity='jaccard', normalize=True, feature_weighting = "TF-IDF")
# Bad MAP 0.026 (topK=700, shrink=300, similarity='jaccard', normalize=True, feature_weighting = "TF-IDF")
# recommender = ItemKNNCBFRecommender.ItemKNNCBFRecommender(URM_train, ICM_train)
# recommender.fit(topK=700, shrink=200, similarity='jaccard', normalize=True, feature_weighting = "TF-IDF")

# MAP 0.0563 (**{"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True,
# "feature_weighting": "TF-IDF"})
# recommender = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train)
# recommender.fit(**{"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True,
# "feature_weighting": "TF-IDF"})
recommender3 = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train)
recommender3.load_model('SavedModels', 'ItemKNNCF')
#recommender.fit(**{"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True,
# "feature_weighting": "TF-IDF"})
#recommender.save_model('SavedModels', 'ItemKNNCF')
w3 = recommender3.W_sparse

# MAP 0.058 (**{"topK": 305, "shrink": 0, "similarity": "cosine", "normalize": True,
# "feature_weighting": "TF-IDF"})
recommender = UserKNNCFRecommender.UserKNNCFRecommender(URM_train)
recommender.fit(**{"topK": 305, "shrink": 0, "similarity": "cosine", "normalize": True,
"feature_weighting": "TF-IDF"})
recommender4 = UserKNNCFRecommender.UserKNNCFRecommender(URM_train)
recommender4.fit(**{"topK": 305, "shrink": 0, "similarity": "cosine", "normalize": True,
"feature_weighting": "TF-IDF"})


# MAP 0.049 (topK=100, l2_norm = 1e3, normalize_matrix = False, verbose = True)
# recommender = EASE_R_Recommender.EASE_R_Recommender(URM_train)
# recommender.fit(topK=None, l2_norm = 3 * 1e3, normalize_matrix = False, verbose = True)

# MAP 0.053
#recommender = ItemKNNSimilarityHybridRecommender.ItemKNNSimilarityHybridRecommender(URM_train, w1, w2)
#recommender.fit(topK=300, alpha = 0.7)

recommendert = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, recommender3, recommender4)
recommendert.fit(alpha = 0.6)

recommender = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, recommender1, recommendert)
recommender.fit(alpha=0.6)

print(evaluator_validation.evaluateRecommender(recommender))

Expand Down
17 changes: 9 additions & 8 deletions ParameterTuning/run_parameter_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def run_KNNRecommender_on_similarity_type(similarity_type, parameterSearch,
hyperparameters_range_dictionary["similarity"] = Categorical([similarity_type])
hyperparameters_range_dictionary["normalize"] = Categorical([True, False])

is_set_similarity = similarity_type in ["tversky", "dice", "jaccard", "tanimoto"]
is_set_similarity = similarity_type in ["tversky", "dice", "jaccard", "tanimoto", 'cosine', 'adjusted', 'asymmetric']

if similarity_type == "asymmetric":
hyperparameters_range_dictionary["asymmetric_alpha"] = Real(low = 0, high = 2, prior = 'uniform')
Expand Down Expand Up @@ -616,11 +616,11 @@ def read_data_split_and_search():

URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.80)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.85)


output_folder_path = "ParamResultsExperiments/SKOPT_Item_User_KNNCF/"
output_folder_path += datetime.now().strftime('%b%d_%H-%M-%S')
output_folder_path = "ParamResultsExperiments/SKOPT_BPR_Cython_"
output_folder_path += datetime.now().strftime('%b%d_%H-%M-%S/')


# If directory does not exist, create
Expand All @@ -633,12 +633,12 @@ def read_data_split_and_search():
# TopPop,
#P3alphaRecommender,
#RP3betaRecommender,
ItemKNNCFRecommender,
UserKNNCFRecommender,
#ItemKNNCFRecommender,
#UserKNNCFRecommender,
#MatrixFactorization_BPR_Cython,
#MatrixFactorization_FunkSVD_Cython,
# PureSVDRecommender,
# SLIM_BPR_Cython,
SLIM_BPR_Cython,
# SLIMElasticNetRecommender
]

Expand All @@ -653,7 +653,8 @@ def read_data_split_and_search():
runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
URM_train = URM_train,
metric_to_optimize = "MAP",
n_cases = 20,
n_cases = 60,
n_random_starts=20,
evaluator_validation_earlystopping = evaluator_validation,
evaluator_validation = evaluator_validation,
evaluator_test = evaluator_test,
Expand Down
64 changes: 64 additions & 0 deletions TryOneRec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from Data_manager.RecSys2020 import RecSys2020Reader
from Notebooks_utils.data_splitter import train_test_holdout
import matplotlib.pyplot as pyplot
import numpy as np
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from GraphBased import P3alphaRecommender, RP3betaRecommender
from SLIM_ElasticNet import SLIMElasticNetRecommender
from Base.Evaluation.Evaluator import EvaluatorHoldout
from MatrixFactorization.Cython import MatrixFactorization_Cython
from MatrixFactorization.PyTorch import MF_MSE_PyTorch
from MatrixFactorization import IALSRecommender, NMFRecommender, PureSVDRecommender
from KNN import ItemKNNCBFRecommender, ItemKNNCFRecommender, ItemKNNCustomSimilarityRecommender,\
ItemKNNSimilarityHybridRecommender, UserKNNCFRecommender
from EASE_R import EASE_R_Recommender
import ItemKNNScoresHybridRecommender
import CreateCSV

# https://github.com/MaurizioFD/RecSys_Course_AT_PoliMi/blob/master/Practice%2009%20-%20SLIM%20BPR.ipynb
# https://github.com/nicolo-felicioni/recsys-polimi-2019/tree/master/Hybrid


if __name__ == '__main__':
URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
ICM_all = RecSys2020Reader.load_icm_asset()
target_ids = RecSys2020Reader.load_target()

item_popularity = np.ediff1d(URM_all.tocsc().indptr)
print(item_popularity)
item_popularity = np.sort(item_popularity)
pyplot.plot(item_popularity, 'ro')
pyplot.ylabel('Num Interactions ')
pyplot.xlabel('Sorted Item')
pyplot.show()

user_activity = np.ediff1d(URM_all.indptr)
user_activity = np.sort(user_activity)

pyplot.plot(user_activity, 'ro')
pyplot.ylabel('Num Interactions ')
pyplot.xlabel('Sorted User')
pyplot.show()

#np.random.seed(1234)
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.97)
ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.8)
evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True)

earlystopping_keywargs = {"validation_every_n": 10,
"stop_on_validation": True,
"evaluator_object": evaluator_validation,
"lower_validations_allowed": 5,
"validation_metric": "MAP",
}

# MAP 0.07 Kaggle "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": true}

recommender = UserKNNCFRecommender.UserKNNCFRecommender(URM_train)
recommender.fit(**{"topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True})


print(evaluator_validation.evaluateRecommender(recommender))

item_list = recommender.recommend(target_ids, cutoff=10)
CreateCSV.create_csv(target_ids, item_list, 'MyRec')
65 changes: 65 additions & 0 deletions TryOneRecBPR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from Data_manager.RecSys2020 import RecSys2020Reader
from Notebooks_utils.data_splitter import train_test_holdout
import matplotlib.pyplot as pyplot
import numpy as np
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from GraphBased import P3alphaRecommender, RP3betaRecommender
from SLIM_ElasticNet import SLIMElasticNetRecommender
from Base.Evaluation.Evaluator import EvaluatorHoldout
from MatrixFactorization.Cython import MatrixFactorization_Cython
from MatrixFactorization.PyTorch import MF_MSE_PyTorch
from MatrixFactorization import IALSRecommender, NMFRecommender, PureSVDRecommender
from KNN import ItemKNNCBFRecommender, ItemKNNCFRecommender, ItemKNNCustomSimilarityRecommender,\
ItemKNNSimilarityHybridRecommender, UserKNNCFRecommender
from EASE_R import EASE_R_Recommender
import ItemKNNScoresHybridRecommender
import CreateCSV

# https://github.com/MaurizioFD/RecSys_Course_AT_PoliMi/blob/master/Practice%2009%20-%20SLIM%20BPR.ipynb
# https://github.com/nicolo-felicioni/recsys-polimi-2019/tree/master/Hybrid


if __name__ == '__main__':
URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
ICM_all = RecSys2020Reader.load_icm_asset()
target_ids = RecSys2020Reader.load_target()

item_popularity = np.ediff1d(URM_all.tocsc().indptr)
print(item_popularity)
item_popularity = np.sort(item_popularity)
pyplot.plot(item_popularity, 'ro')
pyplot.ylabel('Num Interactions ')
pyplot.xlabel('Sorted Item')
pyplot.show()

user_activity = np.ediff1d(URM_all.indptr)
user_activity = np.sort(user_activity)

pyplot.plot(user_activity, 'ro')
pyplot.ylabel('Num Interactions ')
pyplot.xlabel('Sorted User')
pyplot.show()

#np.random.seed(1234)
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.97)
ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.8)
evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True)

earlystopping_keywargs = {"validation_every_n": 10,
"stop_on_validation": True,
"evaluator_object": evaluator_validation,
"lower_validations_allowed": 5,
"validation_metric": "MAP",
}

# MAP 0.07 Kaggle "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": true}

recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False)
recommender.fit(**{"topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05,
"lambda_j": 0.01, "learning_rate": 0.0001})


print(evaluator_validation.evaluateRecommender(recommender))

item_list = recommender.recommend(target_ids, cutoff=10)
CreateCSV.create_csv(target_ids, item_list, 'MyRec')
Loading

0 comments on commit 1661c47

Please sign in to comment.