Created
October 13, 2021 14:18
-
-
Save vinimonteiro/d7c890d54306ec61ea5aa73397fa6775 to your computer and use it in GitHub Desktop.
cos similarity example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.metrics.pairwise import cosine_similarity | |
| import pandas as pd | |
| basetext = """ | |
| Quantum computers encode information in 0s and 1s at the same time, until you "measure" it | |
| """ | |
| text1 = """ | |
| A qubit stores "0 and 1 at the same time" in the same way how a car travelling north-west travels north and west at the same time | |
| """ | |
| text2 = """ | |
| Considering how quickly the brain reorganizes, it’s suggested that dreams are a defence mechanism | |
| """ | |
| text3 = """ | |
| Computers will come with more processing power due to more advanced processors | |
| """ | |
| df = pd.DataFrame([ | |
| {'wc':11, 'wbt': 11}, | |
| {'wc':4, 'wbt': 17}, | |
| {'wc':0, 'wbt': 9}, | |
| {'wc':1, 'wbt': 11} | |
| ], | |
| index = [ | |
| "basetext", | |
| "text1", | |
| "text2", | |
| "text3" | |
| ] | |
| ) | |
| base_df_loc = df.loc["basetext":"basetext"] | |
| text1_df_loc = df.loc["text1":"text1"] | |
| text2_df_loc = df.loc["text2":"text2"] | |
| text3_df_loc = df.loc["text3":"text3"] | |
| cs_base_base = cosine_similarity(base_df_loc, base_df_loc) | |
| cs_base_text1 = cosine_similarity(base_df_loc, text1_df_loc) | |
| cs_base_text2 = cosine_similarity(base_df_loc, text2_df_loc) | |
| cs_base_text3 = cosine_similarity(base_df_loc, text3_df_loc) | |
| print(cs_base_base) | |
| print(cs_base_text1) | |
| print(cs_base_text2) | |
| print(cs_base_text3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment