Last active
September 5, 2024 15:00
-
-
Save omc8db/a359a96fbf64f67ccddfb572814a977e to your computer and use it in GitHub Desktop.
Simple markov chain text generator in python, bash script friendly. We have copilot at home.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import random | |
import sys | |
from collections import defaultdict | |
parser = argparse.ArgumentParser() | |
parser.add_argument("input", nargs="*", default="-", help="source text") | |
parser.add_argument("-n", "--window", type=int, default=3, help="windows size") | |
parser.add_argument( | |
"-o", "--output-size", type=int, default=500, help="number of tokens to output" | |
) | |
args = parser.parse_args() | |
# If multiple input files are given, output tokens are color | |
# coded by which source they came from | |
colors = [ | |
"\033[0;32m", # GREEN | |
"\033[1;33m", # YELLOW | |
"\033[0;31m", # RED | |
"\033[0;34m", # BLUE | |
"\033[0;33m", # BROWN | |
"\033[1;32m", # LIGHT_GREEN | |
"\033[1;31m", # LIGHT_RED | |
"\033[1;34m", # LIGHT_BLUE | |
"\033[0;37m", # LIGHT_GRAY | |
"\033[1;35m", # LIGHT_PURPLE | |
] | |
# freq[lookbehind] = [words] | |
freq = defaultdict(list) | |
buf = ("",) * args.window | |
for fname in args.input: | |
f = sys.stdin if fname == "-" else open(fname) | |
color = colors.pop(0) | |
for line in f: | |
for token in line.split(" "): | |
freq[buf].append((token, color)) | |
buf = buf[1:] + (token,) | |
buf = random.choice(list(freq.keys())) | |
for _ in range(args.output_size): | |
token, color = random.choice(freq[buf]) | |
sys.stdout.write(color + token) | |
if not token.endswith("\n"): | |
sys.stdout.write(" ") | |
buf = buf[1:] + (token,) | |
sys.stdout.write("\033[0m\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment