Created
October 7, 2021 19:43
-
-
Save marcosan93/c3a0940f9ed468a6a2bf556232f202f3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Creating a Dataset of men and women | |
| men = pd.DataFrame() | |
| women = pd.DataFrame() | |
| # Number of users | |
| num = 1000 | |
| # Dating profile questions for each | |
| qs = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5'] | |
| # Answers to profile questions | |
| ans = ['A', 'B', 'C', 'D', 'E'] | |
| for q in qs: | |
| # Making them categorical for preprocessing later | |
| men[q] = pd.Categorical(random.choices(ans, k=num), categories=ans) | |
| women[q] = pd.Categorical(random.choices(ans, k=num), categories=ans) | |
| # IDs | |
| men['id'] = ["m"+str(i) for i in range(num)] | |
| women['id'] = ["w"+str(i) for i in range(num)] | |
| # Setting index | |
| men.set_index('id', inplace=True) | |
| women.set_index('id', inplace=True) | |
| # Creating match status between users | |
| ratings = pd.DataFrame(index=men.index, columns=women.index) | |
| for i in ratings.columns: | |
| ratings[i] = random.choices([0,1,"unseen"], k=num) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment