Skip to content

Instantly share code, notes, and snippets.

@btbytes
Created September 16, 2024 11:41
Show Gist options
  • Save btbytes/2ed6c8f5a73b2e61a45653b4e15ae9ac to your computer and use it in GitHub Desktop.
Save btbytes/2ed6c8f5a73b2e61a45653b4e15ae9ac to your computer and use it in GitHub Desktop.
nlp stuff
# Sample text
text = "the cat sat on the mat the dog sat on the floor"
# Tokenize the text
words = text.split()
# Build bigram model
bigrams = defaultdict(list)
for i in range(len(words) - 1):
bigrams[words[i]].append(words[i + 1])
# Function to generate text
def generate_text(start_word, num_words):
current_word = start_word
result = [current_word]
for _ in range(num_words - 1):
if current_word in bigrams:
next_word = random.choice(bigrams[current_word])
result.append(next_word)
current_word = next_word
else:
break
return ' '.join(result)
# Generate text
print(generate_text("the", 5))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment