Created
August 30, 2018 22:21
-
-
Save SmileyChris/01329ace51df56653abb80c8044bbe21 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
from typing import Optional, Any, List, Tuple | |
consonant_initial_digraphs = set( | |
["ch", "sh", "th", "thr", "ph", "wh", "ck", "kn", "wr"] | |
) | |
consonant_final_digraphs = set(["ch", "ng", "sh", "th", "tch"]) | |
vowel_digraphs = [ | |
"ai", | |
"au", | |
"aw", | |
"ay", | |
"ea", | |
"ee", | |
"ei", | |
"ew", | |
"ie", | |
"oa", | |
"oo", | |
"ou", | |
"ow", | |
"y", # not digraph, but special case | |
] | |
consonant_initial_blends = set( | |
[ | |
"sc", | |
"sm", | |
"st", | |
"sk", | |
"sn", | |
"sw", | |
"sl", | |
"sp", | |
"bl", | |
"gl", | |
"cl", | |
"pl", | |
"fl", | |
"br", | |
"fr", | |
"tr", | |
"cr", | |
"gr", | |
"dr", | |
"pr", | |
] | |
) | |
consonant_final_blends = set( | |
[ | |
"sk", | |
"sp", | |
"st", | |
"ld", | |
"lf", | |
"lk", | |
"lp", | |
"lt", | |
"nd", | |
"nk", | |
"nt", | |
"ft", | |
"mp", | |
"pt", | |
"rt", | |
] | |
) | |
weighted_letters = { | |
"a": 8.167, | |
"b": 1.492, | |
"c": 2.782, | |
"d": 4.253, | |
"e": 12.702, | |
"f": 2.228, | |
"g": 2.015, | |
"h": 6.094, | |
"i": 6.966, | |
"j": 0.153, | |
"k": 0.772, | |
"l": 4.025, | |
"m": 2.406, | |
"n": 6.749, | |
"o": 7.507, | |
"p": 1.929, | |
"q": 0.095, | |
"r": 5.987, | |
"s": 6.327, | |
"t": 9.056, | |
"u": 2.758, | |
"v": 0.978, | |
"w": 2.360, | |
"x": 0.150, | |
"y": 1.974, | |
"z": 0.074, | |
} | |
class WeightedRandomizer: | |
def __init__(self, weighted_items: List[Tuple[Any, float]]) -> None: | |
self._max = 0.0 | |
self._weights = [] | |
for item, weight in sorted(weighted_items, key=lambda o: o[1]): | |
self._max += weight | |
self._weights.append((self._max, item)) | |
def random(self) -> Any: | |
r = random.random() * self._max | |
for ceil, item in self._weights: | |
if ceil > r: | |
return item | |
def __str__(self) -> str: | |
return "".join(sorted(item for (c, item) in self._weights)) | |
simple_vowels = WeightedRandomizer( | |
item for item in weighted_letters.items() if item[0] in "aoeiu" | |
) | |
simple_consonants = WeightedRandomizer( | |
item for item in weighted_letters.items() if item[0] not in "aoeiu" | |
) | |
punctuation = WeightedRandomizer([(".", 0.9), ("?", 0.07), ("!", 0.03)]) | |
def new_sylable(force_hard: bool = False) -> str: | |
simple_vowel = random.randint(0, 8) | |
if simple_vowel: | |
vowel = simple_vowels.random() | |
else: | |
vowel = random.choice(vowel_digraphs) | |
if force_hard: | |
structure = random.randint(0, 1) | |
else: | |
structure = random.randint(0, 2) | |
consonant_simplicity = random.randint(0, 9) | |
simple_initial_consonant = consonant_simplicity != 0 | |
simple_final_consonant = consonant_simplicity != 1 | |
if structure in (0, 1): | |
if simple_initial_consonant: | |
start = simple_consonants.random() | |
else: | |
start = random.choice( | |
list(consonant_initial_digraphs | consonant_initial_blends) | |
) | |
else: | |
start = "" | |
open_sylable = structure == 0 | |
if structure in (1, 2): | |
if simple_final_consonant: | |
end = simple_consonants.random() | |
open_sylable = end[-1] in ("w", "h", "y") | |
if not open_sylable: | |
open_sylable = not random.randint(0, 4) | |
if open_sylable: | |
end += "e" | |
else: | |
end = random.choice(list(consonant_final_digraphs | consonant_final_blends)) | |
else: | |
end = "" | |
return start + vowel + end, open_sylable | |
def new_word(length: Optional[int] = None, plural: Optional[bool] = None) -> str: | |
if length is None: | |
length = random.randint(1, 3) | |
if plural is None: | |
plural = not random.randint(0, 4) | |
force_hard = False | |
sylables = [] | |
for i in range(length): | |
sylable, force_hard = new_sylable(force_hard) | |
sylables.append(sylable) | |
word = "".join(sylables) | |
if plural and not word.endswith("s"): | |
return word + "s" | |
return word | |
def new_text(paragraphs: int) -> List[str]: | |
complexity = min(40, 20 * paragraphs // 3) | |
short = [new_word(length=1, plural=False) for i in range(complexity // 2)] | |
longer = [] | |
for i in range(complexity): | |
word = new_word(random.randint(2, 3)) | |
if not random.randint(0, 6): | |
word = word.capitalize() | |
if not random.randint(0, 2): | |
word = short[random.randint(0, 2)] + " " + word | |
longer.append(word) | |
quote = WeightedRandomizer( | |
[(new_word(length=1, plural=False), i * 4) for i in range(3)] | |
) | |
return [ | |
" ".join( | |
new_sentence(short, longer, quote) for i in range(random.randint(1, 5)) | |
) | |
for i in range(paragraphs) | |
] | |
def new_sentence(short: List[str], longer: List[str], quote: WeightedRandomizer) -> str: | |
longer_chance = 0.4 | |
sentence = [] | |
for i in range(random.randint(4, 8)): | |
if random.random() > longer_chance: | |
longer_chance -= 0.3 | |
word = random.choice(longer) | |
else: | |
longer_chance += 0.1 | |
word = random.choice(short) | |
sentence.append(word) | |
sentence = " ".join(sentence) + punctuation.random() | |
if not random.randint(0, 8): | |
quote_text = [random.choice(longer), quote.random()] | |
random.shuffle(quote_text) | |
quote_text = " ".join(quote_text) | |
if random.randint(0, 1): | |
if sentence.endswith("."): | |
sentence = "{},".format(sentence[:-1]) | |
sentence = '"{}" {}.'.format(sentence.capitalize(), quote_text) | |
else: | |
sentence = '{} "{}"'.format(quote_text.capitalize(), sentence) | |
else: | |
sentence = sentence.capitalize() | |
return sentence | |
if __name__ == "__main__": | |
import sys | |
try: | |
sentences = int(sys.argv[1]) | |
except (IndexError, ValueError): | |
sentences = 10 | |
print("\n\n".join(new_text(sentences))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment