Created
September 16, 2024 11:41
-
-
Save btbytes/2ed6c8f5a73b2e61a45653b4e15ae9ac to your computer and use it in GitHub Desktop.
nlp stuff
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sample text | |
text = "the cat sat on the mat the dog sat on the floor" | |
# Tokenize the text | |
words = text.split() | |
# Build bigram model | |
bigrams = defaultdict(list) | |
for i in range(len(words) - 1): | |
bigrams[words[i]].append(words[i + 1]) | |
# Function to generate text | |
def generate_text(start_word, num_words): | |
current_word = start_word | |
result = [current_word] | |
for _ in range(num_words - 1): | |
if current_word in bigrams: | |
next_word = random.choice(bigrams[current_word]) | |
result.append(next_word) | |
current_word = next_word | |
else: | |
break | |
return ' '.join(result) | |
# Generate text | |
print(generate_text("the", 5)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment