Skip to content

Instantly share code, notes, and snippets.

@marcosan93
Created October 8, 2021 01:52
Show Gist options
  • Select an option

  • Save marcosan93/a6da9d6bf85ca86cf041a67c6a0457d3 to your computer and use it in GitHub Desktop.

Select an option

Save marcosan93/a6da9d6bf85ca86cf041a67c6a0457d3 to your computer and use it in GitHub Desktop.
def matchMan(men_df, women_df, ratings, new_man_answers, num_sim=10):
"""
This function will return the most likely compatible women based on a few given
dataframes for a new male user. Will use the top N similar users' compatibility
ratings to find the potentially most compatible women.
"""
# First need to replace the DF answers with their numerical values
men_df = men_df.apply(lambda x: x.cat.codes)
women_df = women_df.apply(lambda x: x.cat.codes)
# Dataframe of new user
new_man = pd.DataFrame(
[new_man_answers],
columns=men_df.columns,
index=['m'+str(int(men_df.index[-1][1:])+1)] # Getting the new man's id
)
# Categorical answers to the profile questions
ans = ['A', 'B', 'C', 'D', 'E']
# Categorizing the answers
new_man = new_man.apply(
lambda x: pd.Categorical(x, categories=ans)
).apply(
lambda x: x.cat.codes, axis=1
)
# Getting the top N similar users
sim_men = men_df.corrwith(
new_man.iloc[0],
axis=1
).sort_values(ascending=False)[:num_sim].index
# Getting the similar users' ratings
sim_rate = ratings.T[sim_men]
# Filling in unseen values with nan for calculation purposes
sim_rate.replace("unseen", np.nan, inplace=True)
# The potentially most compatible women for the new man
most_comp = sim_rate.mean(axis=1).sort_values(ascending=False)
return most_comp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment