Last active
November 17, 2022 17:11
-
-
Save mjdargen/a32d63e7864c6601a733e449cfbeeea8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import random | |
import requests | |
from nltk import word_tokenize | |
from nltk import pos_tag | |
from nltk.tokenize import SyllableTokenizer | |
from random_word import RandomWords | |
rw = RandomWords() | |
SSP = SyllableTokenizer() | |
DIR = os.path.dirname(os.path.abspath(__file__)) | |
def get_word(): | |
url = "https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english-usa-no-swears.txt" | |
r = requests.get(url) | |
words = r.text.splitlines() | |
word = random.choice(words) | |
while not pos_tag(word_tokenize(word))[0][1].startswith("NN") or syllables_in_text(word) > 3: | |
word = random.choice(words) | |
return word | |
def random_word(): | |
word = rw.get_random_word() | |
while not pos_tag(word_tokenize(word))[0][1].startswith("NN") or syllables_in_text(word) > 3: | |
word = rw.get_random_word() | |
return word | |
def syllables_in_word(word): | |
return len(SSP.tokenize(word)) | |
def syllables_in_text(text): | |
count = 0 | |
for word in word_tokenize(text): | |
count += syllables_in_word(word) | |
return count | |
def get_haiku(word): | |
url = "https://dataviz.ei.columbia.edu/api/haiku" | |
headers = {'Content-type': 'application/json'} | |
data = {'subject': word} | |
counts = [] | |
while counts != [5, 7, 5]: | |
r = requests.post(url, headers=headers, json=data) | |
lines = re.sub(r'[^\w\s]', '', r.text).splitlines() | |
lines = [l for l in lines if len(l) > 1] | |
counts = [syllables_in_text(l) for l in lines] | |
print(r.text) | |
print(counts) | |
print([[SSP.tokenize(token) | |
for token in word_tokenize(line)] for line in lines]) | |
return r.text | |
def main(): | |
word = input("Enter a word or type 'r' to randomly choose a word: ") | |
if word.strip().lower() == 'r': | |
word = random_word() # from library | |
# word = get_word() # from word list | |
print(f"The word is {word}.") | |
haiku = get_haiku(word) | |
print(haiku) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment