elfsternberg · March 15, 2018 02:07
diff --git a/markow-name-generator.py b/markow-name-generator.py
 #!/usr/bin/env python3

 """
 Takes a filename as its required argument.  The file should contain
 names from a sample list, one name per line.  It creates a Markov
 chain of one, two, and three-letter character sequences, including
 octothorpe-anchored sequences for the starts of names.  It then
 generates 'n' random names, each using the probabistic distribution of
 three-letter character sequences, to make names that match the
 patterns fed in the original.

 For example, given the 200 most popular girls' names in the USA for
 the last decade, this script gives the following new and unique names:

 Adall, Adriah, Alanie, Alliana, Camilian, Charlet, Delila, Destina,
 Ellian, Emila, Faitlyn, Genelody, Isabeth, Izabelle, Kaith, Kaylor,
 Kendalyn, Kendalynn, Kennifer, Lailey, Lillison, Madie, Melana, Paisy,
 Sadison, Scarlotte, Stephine, Valentiny, Vanesis, Viole

 This is a modified version of the code presented in
 https://github.com/Tw1ddle/MarkovNameGenerator.git, made more portable
 (who programs in Haxe voluntarily?) and just a bit streamlined.  I am
 impressed with Sam Twidale's getting around the whole tries thing with
 a simple map.

 The original was proposed for generating names for video games, but
 as a writer I've been using the namelur algorithm forever for generating 
 lists of character names, and this algorithm is much better.
 """

 from typing import Dict, DefaultDict, List, Optional
 from collections import defaultdict
 import random


 class MarkovModel(object):

    def __init__(self, data: List[str], order: int, prior: float, alphabet: List[str]) -> None:
        self.order = order
        self.prior = prior
        self.alphabet = alphabet
        self.chains = defaultdict(list)   # type: DefaultDict[str, List[float]]
        self.train(data)

    @staticmethod
    def _countMatches(arr: List[str], v: str) -> float:
        return sum(map(lambda a: 1.0, filter(lambda a: a == v, arr)))

    def train(self, data: List[str]):
        observations = defaultdict(list)  # type: DefaultDict[str, List[str]]
        self.chains = defaultdict(list)   # type: DefaultDict[str, List[float]]

        for word in data:
            item = '#' * self.order + word + '#'
            for i in range(0, len(item) - self.order):
                observations[item[i:(i + self.order)]].append(item[i + self.order])

        for context in observations:
            self.chains[context] += [(self.prior + self._countMatches(observations[context], prediction))
                                     for prediction in self.alphabet]


    @staticmethod
    def _selectIndex(chain: List[float]) -> int:
        totals = []  # type: List[float]
        accumulator = 0.0
        for weight in chain:
            accumulator += weight
            totals.append(accumulator)

        seek = random.random() * accumulator
        for counter, weight in enumerate(totals):
            if seek < weight:
                return counter

        return 0

    def generate(self, context: str) -> Optional[str]:
        if context not in self.chains:
            return None
        return self.alphabet[self._selectIndex(self.chains[context])]


 class MarkovGenerator(object):

    def __init__(self, data: List[str], order: int, prior: float) -> None:
        self.order = order
        self.prior = prior
        domain = ["#"] + sorted(list(set([l for word in data for l in iter(word)])))
        self.models = [MarkovModel(data, self.order - i, self.prior, domain)
                       for i in range(0, self.order)]

    def generate(self) -> str:
        word = '#' * self.order
        letter = self.getLetter(word)
        while (letter != '#'):
            if letter is not None:
                word += letter
            letter = self.getLetter(word)
        return word

    def getLetter(self, context: str) -> Optional[str]:
        mtext = context[(len(context) - self.order):len(context)]
        letter = None
        for model in self.models:
            letter = model.generate(mtext)
            if letter is not None:
                return letter
            mtext = mtext[0]
        return letter


 class MarkovNameGenerator(object):

    def __init__(self, data: List[str], order: int, prior: float) -> None:
        self.generator = MarkovGenerator(data, order, prior)

    def generateName(self,
                     minLength: int = 5,
                     maxLength: int = 10,
                     startsWith: str = "",
                     endsWith: str = "",
                     includes: str = "",
                     excludes: str = "") -> Optional[str]:

        name = self.generator.generate().replace('#', '')

        if (len(name) >= minLength and
            len(name) <= maxLength and
            includes in name and
            (excludes == "" or excludes not in name) and
            name.startswith(startsWith) and
            name.endswith(endsWith)):
            return name

        return None

    def generateNames(self,
                      count,
                      minLength: int = 5,
                      maxLength: int = 10,
                      startsWith: str = "",
                      endsWith: str = "",
                      includes: str = "",
                      excludes: str = "") -> List[str]:

        names = []  # type: List[str]
        while len(names) < count:
            name = self.generateName(minLength, maxLength, startsWith,
                                     endsWith, includes, excludes)
            if name:
                names.append(name)

        return names


 if __name__ == '__main__':
    import sys
    datafile = sys.argv[1]
    count = 10
    if len(sys.argv) > 2:
        count = int(sys.argv[2])

    data = []  # type: List[str]
    with open(datafile, "r") as d:
        data = list(map(str.strip, d.readlines()))

    generator = MarkovNameGenerator(data, 3, 0.0)
    print("\n".join(generator.generateNames(count)))
	#!/usr/bin/env python3

	"""
	Takes a filename as its required argument. The file should contain
	names from a sample list, one name per line. It creates a Markov
	chain of one, two, and three-letter character sequences, including
	octothorpe-anchored sequences for the starts of names. It then
	generates 'n' random names, each using the probabistic distribution of
	three-letter character sequences, to make names that match the
	patterns fed in the original.

	For example, given the 200 most popular girls' names in the USA for
	the last decade, this script gives the following new and unique names:

	Adall, Adriah, Alanie, Alliana, Camilian, Charlet, Delila, Destina,
	Ellian, Emila, Faitlyn, Genelody, Isabeth, Izabelle, Kaith, Kaylor,
	Kendalyn, Kendalynn, Kennifer, Lailey, Lillison, Madie, Melana, Paisy,
	Sadison, Scarlotte, Stephine, Valentiny, Vanesis, Viole

	This is a modified version of the code presented in
	https://github.com/Tw1ddle/MarkovNameGenerator.git, made more portable
	(who programs in Haxe voluntarily?) and just a bit streamlined. I am
	impressed with Sam Twidale's getting around the whole tries thing with
	a simple map.

	The original was proposed for generating names for video games, but
	as a writer I've been using the namelur algorithm forever for generating
	lists of character names, and this algorithm is much better.
	"""

	from typing import Dict, DefaultDict, List, Optional
	from collections import defaultdict
	import random


	class MarkovModel(object):

	def __init__(self, data: List[str], order: int, prior: float, alphabet: List[str]) -> None:
	self.order = order
	self.prior = prior
	self.alphabet = alphabet
	self.chains = defaultdict(list) # type: DefaultDict[str, List[float]]
	self.train(data)

	@staticmethod
	def _countMatches(arr: List[str], v: str) -> float:
	return sum(map(lambda a: 1.0, filter(lambda a: a == v, arr)))

	def train(self, data: List[str]):
	observations = defaultdict(list) # type: DefaultDict[str, List[str]]
	self.chains = defaultdict(list) # type: DefaultDict[str, List[float]]

	for word in data:
	item = '#' * self.order + word + '#'
	for i in range(0, len(item) - self.order):
	observations[item[i:(i + self.order)]].append(item[i + self.order])

	for context in observations:
	self.chains[context] += [(self.prior + self._countMatches(observations[context], prediction))
	for prediction in self.alphabet]


	@staticmethod
	def _selectIndex(chain: List[float]) -> int:
	totals = [] # type: List[float]
	accumulator = 0.0
	for weight in chain:
	accumulator += weight
	totals.append(accumulator)

	seek = random.random() * accumulator
	for counter, weight in enumerate(totals):
	if seek < weight:
	return counter

	return 0

	def generate(self, context: str) -> Optional[str]:
	if context not in self.chains:
	return None
	return self.alphabet[self._selectIndex(self.chains[context])]


	class MarkovGenerator(object):

	def __init__(self, data: List[str], order: int, prior: float) -> None:
	self.order = order
	self.prior = prior
	domain = ["#"] + sorted(list(set([l for word in data for l in iter(word)])))
	self.models = [MarkovModel(data, self.order - i, self.prior, domain)
	for i in range(0, self.order)]

	def generate(self) -> str:
	word = '#' * self.order
	letter = self.getLetter(word)
	while (letter != '#'):
	if letter is not None:
	word += letter
	letter = self.getLetter(word)
	return word

	def getLetter(self, context: str) -> Optional[str]:
	mtext = context[(len(context) - self.order):len(context)]
	letter = None
	for model in self.models:
	letter = model.generate(mtext)
	if letter is not None:
	return letter
	mtext = mtext[0]
	return letter


	class MarkovNameGenerator(object):

	def __init__(self, data: List[str], order: int, prior: float) -> None:
	self.generator = MarkovGenerator(data, order, prior)

	def generateName(self,
	minLength: int = 5,
	maxLength: int = 10,
	startsWith: str = "",
	endsWith: str = "",
	includes: str = "",
	excludes: str = "") -> Optional[str]:

	name = self.generator.generate().replace('#', '')

	if (len(name) >= minLength and
	len(name) <= maxLength and
	includes in name and
	(excludes == "" or excludes not in name) and
	name.startswith(startsWith) and
	name.endswith(endsWith)):
	return name

	return None

	def generateNames(self,
	count,
	minLength: int = 5,
	maxLength: int = 10,
	startsWith: str = "",
	endsWith: str = "",
	includes: str = "",
	excludes: str = "") -> List[str]:

	names = [] # type: List[str]
	while len(names) < count:
	name = self.generateName(minLength, maxLength, startsWith,
	endsWith, includes, excludes)
	if name:
	names.append(name)

	return names


	if __name__ == '__main__':
	import sys
	datafile = sys.argv[1]
	count = 10
	if len(sys.argv) > 2:
	count = int(sys.argv[2])

	data = [] # type: List[str]
	with open(datafile, "r") as d:
	data = list(map(str.strip, d.readlines()))

	generator = MarkovNameGenerator(data, 3, 0.0)
	print("\n".join(generator.generateNames(count)))