Created
October 8, 2021 01:52
-
-
Save marcosan93/a6da9d6bf85ca86cf041a67c6a0457d3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def matchMan(men_df, women_df, ratings, new_man_answers, num_sim=10): | |
| """ | |
| This function will return the most likely compatible women based on a few given | |
| dataframes for a new male user. Will use the top N similar users' compatibility | |
| ratings to find the potentially most compatible women. | |
| """ | |
| # First need to replace the DF answers with their numerical values | |
| men_df = men_df.apply(lambda x: x.cat.codes) | |
| women_df = women_df.apply(lambda x: x.cat.codes) | |
| # Dataframe of new user | |
| new_man = pd.DataFrame( | |
| [new_man_answers], | |
| columns=men_df.columns, | |
| index=['m'+str(int(men_df.index[-1][1:])+1)] # Getting the new man's id | |
| ) | |
| # Categorical answers to the profile questions | |
| ans = ['A', 'B', 'C', 'D', 'E'] | |
| # Categorizing the answers | |
| new_man = new_man.apply( | |
| lambda x: pd.Categorical(x, categories=ans) | |
| ).apply( | |
| lambda x: x.cat.codes, axis=1 | |
| ) | |
| # Getting the top N similar users | |
| sim_men = men_df.corrwith( | |
| new_man.iloc[0], | |
| axis=1 | |
| ).sort_values(ascending=False)[:num_sim].index | |
| # Getting the similar users' ratings | |
| sim_rate = ratings.T[sim_men] | |
| # Filling in unseen values with nan for calculation purposes | |
| sim_rate.replace("unseen", np.nan, inplace=True) | |
| # The potentially most compatible women for the new man | |
| most_comp = sim_rate.mean(axis=1).sort_values(ascending=False) | |
| return most_comp |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment