Last active
April 23, 2022 12:33
-
-
Save audhiaprilliant/074727ef235602ff6f600eed347aa56d to your computer and use it in GitHub Desktop.
How to Automatically Build Stopwords
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # List of data | |
| l_data = [] | |
| # Highest frequency | |
| max_freq = top_words[0][1] | |
| # Alpha | |
| alpha = 1 | |
| # Loop | |
| for index, item in enumerate(iterable = top_words, start = 1): | |
| # New data | |
| j_data = { | |
| 'rank': index, | |
| 'word': item[0], | |
| 'actual_freq': item[1], | |
| 'relative_freq': '1/{}'.format(index), | |
| 'zipf_freq': round( | |
| number = max_freq * (1 / (index ** alpha)), | |
| ndigits = 2 | |
| ) | |
| } | |
| # Append new data | |
| l_data.append(j_data) | |
| # Convert list of dictionary into data frame | |
| df = pd.DataFrame( | |
| data = l_data | |
| ) | |
| # Show the data | |
| df.head() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment