Created
March 31, 2023 15:09
-
-
Save vitalbh/f49683f5c0d1aff4985a7994486d74ae to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.tokenize import word_tokenize | |
from nltk.corpus import wordnet as wn | |
import nltk | |
import ssl | |
try: | |
_create_unverified_https_context = ssl._create_unverified_context | |
except AttributeError: | |
pass | |
else: | |
ssl._create_default_https_context = _create_unverified_https_context | |
nltk.download() | |
nltk.download('punkt') | |
def get_word_type(word): | |
synsets = wn.synsets(word) | |
if not synsets: | |
return None | |
else: | |
return synsets[0].pos() | |
def count_word_types(text): | |
tokens = word_tokenize(text) | |
unique_tokens = set(tokens) | |
word_types = {} | |
for token in unique_tokens: | |
word_type = get_word_type(token) | |
if word_type: | |
if word_type in word_types: | |
word_types[word_type] += 1 | |
else: | |
word_types[word_type] = 1 | |
return word_types | |
text = """We were good, we were gold | |
Kind of dream that can't be sold | |
We were right 'til we weren't | |
Built a home and watched it burn | |
Hum, I didn't wanna leave you | |
I didn't wanna lie | |
Started to cry, but then remembered I | |
I can buy myself flowers | |
Write my name in the sand | |
Talk to myself for hours | |
Say things you don't understand | |
I can take myself dancing | |
And I can hold my own hand | |
Yeah, I can love me better than you can | |
Can love me better | |
I can love me better, baby | |
Can love me better | |
I can love me better, baby | |
Paint my nails cherry-red | |
Match the roses that you left | |
No remorse, no regret | |
I forgive every word you said | |
Ooh, I didn't wanna leave, baby | |
I didn't wanna fight | |
Started to cry, but then remembered I | |
I can buy myself flowers | |
Write my name in the sand | |
Talk to myself for hours, yeah | |
Say things you don't understand | |
I can take myself dancing, yeah | |
I can hold my own hand | |
Yeah, I can love me better than you can | |
Can love me better | |
I can love me better, baby | |
Can love me better | |
I can love me better, baby | |
Can love me better | |
I can love me better, baby | |
Can love me better, ooh, I | |
I didn't wanna leave you | |
I didn't wanna fight | |
Started to cry, but then remembered I | |
I can buy myself flowers (uh, huh) | |
Write my name in the sand (ooh) | |
Talk to myself for hours (yeah) | |
Say things you don't understand (better than you) | |
I can take myself dancing (yeah) | |
I can hold my own hand | |
Yeah, I can love me better than | |
Yeah, I can love me better than you can | |
Can love me better | |
I can love me better, baby (oh) | |
Can love me better | |
I can love me better, baby (than you can) | |
Can love me better | |
I can love me better, baby | |
Can love me better, I""" | |
word_types = count_word_types(text) | |
print("Word count by type:") | |
for word_type, count in word_types.items(): | |
print(f"{word_type}: {count}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment