Created
October 16, 2017 15:16
-
-
Save TheEdoardo93/7bce72c08ab4839a150034340634ecbd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def probabilistic_matrix_factorization_technique(ratings_matrix): | |
random_state = RandomState(0) # generates a casual number given by the Mersenne Twister pseudo-random number generator | |
number_of_users = max(ratings_matrix[:, 0]) # max value in the first column (e.g., users) | |
number_of_items = max(ratings_matrix[:, 1]) # max value in the second column (e.g., items) | |
# Shift user_id and item_id by 1, in order to let user_id and item_id start from 0 | |
ratings_matrix[:, (0, 1)] -= 1 | |
# Delete all the rows in the ratings matrix which have 0 as rating value | |
final_ratings_matrix = delete_all_rows_in_the_ratings_matrix_which_have_0_as_rating_value(ratings_matrix) | |
# Split available data into training set (for training the PMF model) and testing set (for evaluating the PMF model trained before) | |
training_set_percentage = 0.8 # percentage of data in the training set | |
random_state.shuffle(final_ratings_matrix) # modify a sequence in-place by shuffling its contents | |
training_set_size = int(training_set_percentage * final_ratings_matrix.shape[0]) # size of the training set | |
training_set = final_ratings_matrix[:training_set_size] | |
testing_set = final_ratings_matrix[training_set_size:] | |
# PMF model settings | |
number_of_features = ask_information_to_the_user("Enter the number of (user and item) latent features of the ratings matrix" | |
"you want to obtain: ", "number of latent features") | |
evaluation_iterations = 20 | |
min_rating_value = 1.0 | |
max_rating_value = 5.0 | |
initial_seed = 0 | |
epsilon_value = 25. | |
print("Number of users = %d, Number of items = %d, Number of features = %d, Training size = %d, Testing size: %d" % | |
(number_of_users, number_of_items, number_of_features, training_set.shape[0], testing_set.shape[0])) | |
pmf = PMF(n_user = number_of_users, n_item = number_of_items, n_feature = number_of_features, | |
epsilon = epsilon_value, min_rating = min_rating_value, max_rating = max_rating_value, seed = initial_seed) | |
# Training and testing phase | |
print "PMF training and testing phases" | |
pmf.fit(training_set, n_iters = evaluation_iterations) | |
training_predictions = pmf.predict(training_set[:, :2]) | |
training_rmse = RMSE(training_predictions, training_set[:, 2]) | |
testing_predictions = pmf.predict(testing_set[:, :2]) | |
testing_rmse = RMSE(testing_predictions, testing_set[:, 2]) | |
print("After %d iterations, training phase RMSE: %.6f, testing phase RMSE: %.6f" % (evaluation_iterations, training_rmse, testing_rmse)) | |
x = pmf.user_features_ | |
print "User features = " + str(x) # user features of dimension "number_of_features" | |
y = pmf.item_features_ | |
print "Item features = " + str(x) # item features of dimension "number_of_features" | |
return final_ratings_matrix | |
if __name__ == "__main__": | |
... | |
# Applying Probabilistic Matrix Factorization (PMF) model | |
final_ratings_matrix = probabilistic_matrix_factorization_technique(final_ratings_matrix) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment