Skip to content

Instantly share code, notes, and snippets.

@TheEdoardo93
Created October 16, 2017 15:16
Show Gist options
  • Save TheEdoardo93/7bce72c08ab4839a150034340634ecbd to your computer and use it in GitHub Desktop.
Save TheEdoardo93/7bce72c08ab4839a150034340634ecbd to your computer and use it in GitHub Desktop.
def probabilistic_matrix_factorization_technique(ratings_matrix):
random_state = RandomState(0) # generates a casual number given by the Mersenne Twister pseudo-random number generator
number_of_users = max(ratings_matrix[:, 0]) # max value in the first column (e.g., users)
number_of_items = max(ratings_matrix[:, 1]) # max value in the second column (e.g., items)
# Shift user_id and item_id by 1, in order to let user_id and item_id start from 0
ratings_matrix[:, (0, 1)] -= 1
# Delete all the rows in the ratings matrix which have 0 as rating value
final_ratings_matrix = delete_all_rows_in_the_ratings_matrix_which_have_0_as_rating_value(ratings_matrix)
# Split available data into training set (for training the PMF model) and testing set (for evaluating the PMF model trained before)
training_set_percentage = 0.8 # percentage of data in the training set
random_state.shuffle(final_ratings_matrix) # modify a sequence in-place by shuffling its contents
training_set_size = int(training_set_percentage * final_ratings_matrix.shape[0]) # size of the training set
training_set = final_ratings_matrix[:training_set_size]
testing_set = final_ratings_matrix[training_set_size:]
# PMF model settings
number_of_features = ask_information_to_the_user("Enter the number of (user and item) latent features of the ratings matrix"
"you want to obtain: ", "number of latent features")
evaluation_iterations = 20
min_rating_value = 1.0
max_rating_value = 5.0
initial_seed = 0
epsilon_value = 25.
print("Number of users = %d, Number of items = %d, Number of features = %d, Training size = %d, Testing size: %d" %
(number_of_users, number_of_items, number_of_features, training_set.shape[0], testing_set.shape[0]))
pmf = PMF(n_user = number_of_users, n_item = number_of_items, n_feature = number_of_features,
epsilon = epsilon_value, min_rating = min_rating_value, max_rating = max_rating_value, seed = initial_seed)
# Training and testing phase
print "PMF training and testing phases"
pmf.fit(training_set, n_iters = evaluation_iterations)
training_predictions = pmf.predict(training_set[:, :2])
training_rmse = RMSE(training_predictions, training_set[:, 2])
testing_predictions = pmf.predict(testing_set[:, :2])
testing_rmse = RMSE(testing_predictions, testing_set[:, 2])
print("After %d iterations, training phase RMSE: %.6f, testing phase RMSE: %.6f" % (evaluation_iterations, training_rmse, testing_rmse))
x = pmf.user_features_
print "User features = " + str(x) # user features of dimension "number_of_features"
y = pmf.item_features_
print "Item features = " + str(x) # item features of dimension "number_of_features"
return final_ratings_matrix
if __name__ == "__main__":
...
# Applying Probabilistic Matrix Factorization (PMF) model
final_ratings_matrix = probabilistic_matrix_factorization_technique(final_ratings_matrix)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment