Skip to content

Instantly share code, notes, and snippets.

@elfsternberg
Created March 15, 2018 02:07
Show Gist options
  • Save elfsternberg/74dbb2204880d9d6aba3a9da1d1cc0be to your computer and use it in GitHub Desktop.
Save elfsternberg/74dbb2204880d9d6aba3a9da1d1cc0be to your computer and use it in GitHub Desktop.
A simple Python library using Markov chains to generate new names.
#!/usr/bin/env python3
"""
Takes a filename as its required argument. The file should contain
names from a sample list, one name per line. It creates a Markov
chain of one, two, and three-letter character sequences, including
octothorpe-anchored sequences for the starts of names. It then
generates 'n' random names, each using the probabistic distribution of
three-letter character sequences, to make names that match the
patterns fed in the original.
For example, given the 200 most popular girls' names in the USA for
the last decade, this script gives the following new and unique names:
Adall, Adriah, Alanie, Alliana, Camilian, Charlet, Delila, Destina,
Ellian, Emila, Faitlyn, Genelody, Isabeth, Izabelle, Kaith, Kaylor,
Kendalyn, Kendalynn, Kennifer, Lailey, Lillison, Madie, Melana, Paisy,
Sadison, Scarlotte, Stephine, Valentiny, Vanesis, Viole
This is a modified version of the code presented in
https://github.com/Tw1ddle/MarkovNameGenerator.git, made more portable
(who programs in Haxe voluntarily?) and just a bit streamlined. I am
impressed with Sam Twidale's getting around the whole tries thing with
a simple map.
The original was proposed for generating names for video games, but
as a writer I've been using the namelur algorithm forever for generating
lists of character names, and this algorithm is much better.
"""
from typing import Dict, DefaultDict, List, Optional
from collections import defaultdict
import random
class MarkovModel(object):
def __init__(self, data: List[str], order: int, prior: float, alphabet: List[str]) -> None:
self.order = order
self.prior = prior
self.alphabet = alphabet
self.chains = defaultdict(list) # type: DefaultDict[str, List[float]]
self.train(data)
@staticmethod
def _countMatches(arr: List[str], v: str) -> float:
return sum(map(lambda a: 1.0, filter(lambda a: a == v, arr)))
def train(self, data: List[str]):
observations = defaultdict(list) # type: DefaultDict[str, List[str]]
self.chains = defaultdict(list) # type: DefaultDict[str, List[float]]
for word in data:
item = '#' * self.order + word + '#'
for i in range(0, len(item) - self.order):
observations[item[i:(i + self.order)]].append(item[i + self.order])
for context in observations:
self.chains[context] += [(self.prior + self._countMatches(observations[context], prediction))
for prediction in self.alphabet]
@staticmethod
def _selectIndex(chain: List[float]) -> int:
totals = [] # type: List[float]
accumulator = 0.0
for weight in chain:
accumulator += weight
totals.append(accumulator)
seek = random.random() * accumulator
for counter, weight in enumerate(totals):
if seek < weight:
return counter
return 0
def generate(self, context: str) -> Optional[str]:
if context not in self.chains:
return None
return self.alphabet[self._selectIndex(self.chains[context])]
class MarkovGenerator(object):
def __init__(self, data: List[str], order: int, prior: float) -> None:
self.order = order
self.prior = prior
domain = ["#"] + sorted(list(set([l for word in data for l in iter(word)])))
self.models = [MarkovModel(data, self.order - i, self.prior, domain)
for i in range(0, self.order)]
def generate(self) -> str:
word = '#' * self.order
letter = self.getLetter(word)
while (letter != '#'):
if letter is not None:
word += letter
letter = self.getLetter(word)
return word
def getLetter(self, context: str) -> Optional[str]:
mtext = context[(len(context) - self.order):len(context)]
letter = None
for model in self.models:
letter = model.generate(mtext)
if letter is not None:
return letter
mtext = mtext[0]
return letter
class MarkovNameGenerator(object):
def __init__(self, data: List[str], order: int, prior: float) -> None:
self.generator = MarkovGenerator(data, order, prior)
def generateName(self,
minLength: int = 5,
maxLength: int = 10,
startsWith: str = "",
endsWith: str = "",
includes: str = "",
excludes: str = "") -> Optional[str]:
name = self.generator.generate().replace('#', '')
if (len(name) >= minLength and
len(name) <= maxLength and
includes in name and
(excludes == "" or excludes not in name) and
name.startswith(startsWith) and
name.endswith(endsWith)):
return name
return None
def generateNames(self,
count,
minLength: int = 5,
maxLength: int = 10,
startsWith: str = "",
endsWith: str = "",
includes: str = "",
excludes: str = "") -> List[str]:
names = [] # type: List[str]
while len(names) < count:
name = self.generateName(minLength, maxLength, startsWith,
endsWith, includes, excludes)
if name:
names.append(name)
return names
if __name__ == '__main__':
import sys
datafile = sys.argv[1]
count = 10
if len(sys.argv) > 2:
count = int(sys.argv[2])
data = [] # type: List[str]
with open(datafile, "r") as d:
data = list(map(str.strip, d.readlines()))
generator = MarkovNameGenerator(data, 3, 0.0)
print("\n".join(generator.generateNames(count)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment