Skip to content

Instantly share code, notes, and snippets.

@omc8db
Last active September 5, 2024 15:00
Show Gist options
  • Save omc8db/a359a96fbf64f67ccddfb572814a977e to your computer and use it in GitHub Desktop.
Save omc8db/a359a96fbf64f67ccddfb572814a977e to your computer and use it in GitHub Desktop.
Simple markov chain text generator in python, bash script friendly. We have copilot at home.
#!/usr/bin/env python3
import argparse
import random
import sys
from collections import defaultdict
parser = argparse.ArgumentParser()
parser.add_argument("input", nargs="*", default="-", help="source text")
parser.add_argument("-n", "--window", type=int, default=3, help="windows size")
parser.add_argument(
"-o", "--output-size", type=int, default=500, help="number of tokens to output"
)
args = parser.parse_args()
# If multiple input files are given, output tokens are color
# coded by which source they came from
colors = [
"\033[0;32m", # GREEN
"\033[1;33m", # YELLOW
"\033[0;31m", # RED
"\033[0;34m", # BLUE
"\033[0;33m", # BROWN
"\033[1;32m", # LIGHT_GREEN
"\033[1;31m", # LIGHT_RED
"\033[1;34m", # LIGHT_BLUE
"\033[0;37m", # LIGHT_GRAY
"\033[1;35m", # LIGHT_PURPLE
]
# freq[lookbehind] = [words]
freq = defaultdict(list)
buf = ("",) * args.window
for fname in args.input:
f = sys.stdin if fname == "-" else open(fname)
color = colors.pop(0)
for line in f:
for token in line.split(" "):
freq[buf].append((token, color))
buf = buf[1:] + (token,)
buf = random.choice(list(freq.keys()))
for _ in range(args.output_size):
token, color = random.choice(freq[buf])
sys.stdout.write(color + token)
if not token.endswith("\n"):
sys.stdout.write(" ")
buf = buf[1:] + (token,)
sys.stdout.write("\033[0m\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment