-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Created first hybrids, best Kaggle MAP 0.081.
- Loading branch information
Showing
7 changed files
with
462 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from Data_manager.RecSys2020 import RecSys2020Reader | ||
from Notebooks_utils.data_splitter import train_test_holdout | ||
import matplotlib.pyplot as pyplot | ||
import numpy as np | ||
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython | ||
from GraphBased import P3alphaRecommender, RP3betaRecommender | ||
from SLIM_ElasticNet import SLIMElasticNetRecommender | ||
from Base.Evaluation.Evaluator import EvaluatorHoldout | ||
from MatrixFactorization.Cython import MatrixFactorization_Cython | ||
from MatrixFactorization.PyTorch import MF_MSE_PyTorch | ||
from MatrixFactorization import IALSRecommender, NMFRecommender, PureSVDRecommender | ||
from KNN import ItemKNNCBFRecommender, ItemKNNCFRecommender, ItemKNNCustomSimilarityRecommender,\ | ||
ItemKNNSimilarityHybridRecommender, UserKNNCFRecommender | ||
from EASE_R import EASE_R_Recommender | ||
import ItemKNNScoresHybridRecommender | ||
import CreateCSV | ||
|
||
# https://github.com/MaurizioFD/RecSys_Course_AT_PoliMi/blob/master/Practice%2009%20-%20SLIM%20BPR.ipynb | ||
# https://github.com/nicolo-felicioni/recsys-polimi-2019/tree/master/Hybrid | ||
|
||
|
||
if __name__ == '__main__': | ||
URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm() | ||
ICM_all = RecSys2020Reader.load_icm_asset() | ||
target_ids = RecSys2020Reader.load_target() | ||
|
||
np.random.seed(12341) | ||
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.97) | ||
ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.97) | ||
evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True) | ||
|
||
itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train) | ||
itemKNNCF.fit(**{"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, | ||
"feature_weighting": "TF-IDF"}) | ||
|
||
userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) | ||
userKNNCF.fit(**{"topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True}) | ||
|
||
itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender(URM_train, ICM_train) | ||
itemKNNCBF.fit(topK=700, shrink=200, similarity='jaccard', normalize=True, feature_weighting = "TF-IDF") | ||
|
||
hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, itemKNNCBF, userKNNCF) | ||
hyb.fit(alpha=0.5) | ||
|
||
# Kaggle MAP 0.081 | ||
hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, hyb, itemKNNCF) | ||
hyb2.fit(alpha=0.5) | ||
|
||
|
||
print(evaluator_validation.evaluateRecommender(userKNNCF)) | ||
print(evaluator_validation.evaluateRecommender(hyb)) | ||
print(evaluator_validation.evaluateRecommender(hyb2)) | ||
|
||
item_list = hyb.recommend(target_ids, cutoff=10) | ||
CreateCSV.create_csv(target_ids, item_list, 'Hyb_User_Item_KNNCF') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
from Base.BaseSimilarityMatrixRecommender import BaseItemSimilarityMatrixRecommender | ||
from Base.Recommender_utils import check_matrix | ||
import numpy as np | ||
|
||
class ItemKNNScoresHybridRecommender(BaseItemSimilarityMatrixRecommender): | ||
""" ItemKNNScoresHybridRecommender | ||
Hybrid of two prediction scores R = R1*alpha + R2*(1-alpha) | ||
NB: Rec_1 is itemKNNCF, Rec_2 is userKNNCF | ||
""" | ||
|
||
RECOMMENDER_NAME = "ItemKNNScoresHybridRecommender" | ||
|
||
def __init__(self, URM_train, Recommender_1, Recommender_2): | ||
super(ItemKNNScoresHybridRecommender, self).__init__(URM_train) | ||
|
||
self.URM_train = check_matrix(URM_train.copy(), 'csr') | ||
self.Recommender_1 = Recommender_1 | ||
self.Recommender_2 = Recommender_2 | ||
|
||
def fit(self, alpha=0.5): | ||
self.alpha = alpha | ||
|
||
|
||
def _compute_item_score(self, user_id_array, items_to_compute=None): | ||
""" | ||
URM_train and W_sparse must have the same format, CSR | ||
:param user_id_array: | ||
:param items_to_compute: | ||
:return: | ||
""" | ||
|
||
user_profile_array = self.URM_train[user_id_array] | ||
user_weights_array = self.Recommender_2.W_sparse[user_id_array] | ||
|
||
if items_to_compute is not None: | ||
item_scores1 = - np.ones((len(user_id_array), self.URM_train.shape[1]), dtype=np.float32) * np.inf | ||
item_scores_all1 = user_profile_array.dot(self.Recommender_1.W_sparse).toarray() | ||
item_scores1[:, items_to_compute] = item_scores_all1[:, items_to_compute] | ||
item_scores2 = - np.ones((len(user_id_array), self.URM_train.shape[1]), dtype=np.float32) * np.inf | ||
item_scores_all2 = user_weights_array.dot(self.Recommender_2.URM_train).toarray() | ||
item_scores2[:, items_to_compute] = item_scores_all2[:, items_to_compute] | ||
else: | ||
#print(self.Recommender_1.W_sparse) | ||
#print(self.Recommender_2.W_sparse) | ||
item_scores1 = self.Recommender_1._compute_item_score(user_id_array, items_to_compute) | ||
item_scores2 = self.Recommender_2._compute_item_score(user_id_array, items_to_compute) | ||
|
||
mean1 = np.mean(item_scores1) | ||
mean2 = np.mean(item_scores2) | ||
std1 = np.std(item_scores1) | ||
std2 = np.std(item_scores2) | ||
item_scores1 = (item_scores1 - mean1) / std1 | ||
item_scores2 = (item_scores2 - mean2) / std2 | ||
# print(item_scores1) | ||
# print(item_scores2) | ||
|
||
item_scores = item_scores1 * self.alpha + item_scores2 * (1 - self.alpha) | ||
|
||
return item_scores |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from Data_manager.RecSys2020 import RecSys2020Reader | ||
from Notebooks_utils.data_splitter import train_test_holdout | ||
import matplotlib.pyplot as pyplot | ||
import numpy as np | ||
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython | ||
from GraphBased import P3alphaRecommender, RP3betaRecommender | ||
from SLIM_ElasticNet import SLIMElasticNetRecommender | ||
from Base.Evaluation.Evaluator import EvaluatorHoldout | ||
from MatrixFactorization.Cython import MatrixFactorization_Cython | ||
from MatrixFactorization.PyTorch import MF_MSE_PyTorch | ||
from MatrixFactorization import IALSRecommender, NMFRecommender, PureSVDRecommender | ||
from KNN import ItemKNNCBFRecommender, ItemKNNCFRecommender, ItemKNNCustomSimilarityRecommender,\ | ||
ItemKNNSimilarityHybridRecommender, UserKNNCFRecommender | ||
from EASE_R import EASE_R_Recommender | ||
import ItemKNNScoresHybridRecommender | ||
import CreateCSV | ||
|
||
# https://github.com/MaurizioFD/RecSys_Course_AT_PoliMi/blob/master/Practice%2009%20-%20SLIM%20BPR.ipynb | ||
# https://github.com/nicolo-felicioni/recsys-polimi-2019/tree/master/Hybrid | ||
|
||
|
||
if __name__ == '__main__': | ||
URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm() | ||
ICM_all = RecSys2020Reader.load_icm_asset() | ||
target_ids = RecSys2020Reader.load_target() | ||
|
||
item_popularity = np.ediff1d(URM_all.tocsc().indptr) | ||
print(item_popularity) | ||
item_popularity = np.sort(item_popularity) | ||
pyplot.plot(item_popularity, 'ro') | ||
pyplot.ylabel('Num Interactions ') | ||
pyplot.xlabel('Sorted Item') | ||
pyplot.show() | ||
|
||
user_activity = np.ediff1d(URM_all.indptr) | ||
user_activity = np.sort(user_activity) | ||
|
||
pyplot.plot(user_activity, 'ro') | ||
pyplot.ylabel('Num Interactions ') | ||
pyplot.xlabel('Sorted User') | ||
pyplot.show() | ||
|
||
#np.random.seed(1234) | ||
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.97) | ||
ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.8) | ||
evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True) | ||
|
||
earlystopping_keywargs = {"validation_every_n": 10, | ||
"stop_on_validation": True, | ||
"evaluator_object": evaluator_validation, | ||
"lower_validations_allowed": 5, | ||
"validation_metric": "MAP", | ||
} | ||
|
||
# MAP 0.07 Kaggle "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": true} | ||
|
||
recommender = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) | ||
recommender.fit(**{"topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True}) | ||
|
||
|
||
print(evaluator_validation.evaluateRecommender(recommender)) | ||
|
||
item_list = recommender.recommend(target_ids, cutoff=10) | ||
CreateCSV.create_csv(target_ids, item_list, 'MyRec') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from Data_manager.RecSys2020 import RecSys2020Reader | ||
from Notebooks_utils.data_splitter import train_test_holdout | ||
import matplotlib.pyplot as pyplot | ||
import numpy as np | ||
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython | ||
from GraphBased import P3alphaRecommender, RP3betaRecommender | ||
from SLIM_ElasticNet import SLIMElasticNetRecommender | ||
from Base.Evaluation.Evaluator import EvaluatorHoldout | ||
from MatrixFactorization.Cython import MatrixFactorization_Cython | ||
from MatrixFactorization.PyTorch import MF_MSE_PyTorch | ||
from MatrixFactorization import IALSRecommender, NMFRecommender, PureSVDRecommender | ||
from KNN import ItemKNNCBFRecommender, ItemKNNCFRecommender, ItemKNNCustomSimilarityRecommender,\ | ||
ItemKNNSimilarityHybridRecommender, UserKNNCFRecommender | ||
from EASE_R import EASE_R_Recommender | ||
import ItemKNNScoresHybridRecommender | ||
import CreateCSV | ||
|
||
# https://github.com/MaurizioFD/RecSys_Course_AT_PoliMi/blob/master/Practice%2009%20-%20SLIM%20BPR.ipynb | ||
# https://github.com/nicolo-felicioni/recsys-polimi-2019/tree/master/Hybrid | ||
|
||
|
||
if __name__ == '__main__': | ||
URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm() | ||
ICM_all = RecSys2020Reader.load_icm_asset() | ||
target_ids = RecSys2020Reader.load_target() | ||
|
||
item_popularity = np.ediff1d(URM_all.tocsc().indptr) | ||
print(item_popularity) | ||
item_popularity = np.sort(item_popularity) | ||
pyplot.plot(item_popularity, 'ro') | ||
pyplot.ylabel('Num Interactions ') | ||
pyplot.xlabel('Sorted Item') | ||
pyplot.show() | ||
|
||
user_activity = np.ediff1d(URM_all.indptr) | ||
user_activity = np.sort(user_activity) | ||
|
||
pyplot.plot(user_activity, 'ro') | ||
pyplot.ylabel('Num Interactions ') | ||
pyplot.xlabel('Sorted User') | ||
pyplot.show() | ||
|
||
#np.random.seed(1234) | ||
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.97) | ||
ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.8) | ||
evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True) | ||
|
||
earlystopping_keywargs = {"validation_every_n": 10, | ||
"stop_on_validation": True, | ||
"evaluator_object": evaluator_validation, | ||
"lower_validations_allowed": 5, | ||
"validation_metric": "MAP", | ||
} | ||
|
||
# MAP 0.07 Kaggle "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": true} | ||
|
||
recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False) | ||
recommender.fit(**{"topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05, | ||
"lambda_j": 0.01, "learning_rate": 0.0001}) | ||
|
||
|
||
print(evaluator_validation.evaluateRecommender(recommender)) | ||
|
||
item_list = recommender.recommend(target_ids, cutoff=10) | ||
CreateCSV.create_csv(target_ids, item_list, 'MyRec') |
Oops, something went wrong.