Skip to content

Instantly share code, notes, and snippets.

@boredzo
Forked from fluffy-critter/namegen.py
Last active April 12, 2016 05:46
Show Gist options
  • Save boredzo/5d0efa528a572ad4aaee278ee8d1f9fa to your computer and use it in GitHub Desktop.
Save boredzo/5d0efa528a572ad4aaee278ee8d1f9fa to your computer and use it in GitHub Desktop.
Dissociator to generate names by example
#!/usr/bin/env python
#
# Based on https://gist.github.com/plaidfluff/aa9983161b2b56f8f2750d661279cb9e
#
# Silly thing to generate random names from examples. Uses the CSV files obtained from
# http://catalog.data.gov/dataset/baby-names-from-social-security-card-applications-national-level-data
#
# Any corpus will work if it's formatted like:
# Name,[ignored],weight
import csv
import sys
import collections
import random
import argparse
parser = argparse.ArgumentParser(description='Generate one or more random names. Takes data such as http://catalog.data.gov/dataset/baby-names-from-social-security-card-applications-national-level-data as input.')
parser.add_argument('-n', '--count', type=int, default=1, help='Number of names to generate.')
options, filenames = parser.parse_known_args()
if len(filenames) == 0:
parser.error('Must pass at least one file containing raw name data like "Sam,G,9999".')
class Node:
def __init__(self):
self.total = 0
self.next_nodes = collections.defaultdict(int)
nodes = collections.defaultdict(Node)
for arg in filenames:
with open(arg, 'r') as file:
reader = csv.reader(file)
for row in reader:
weight = int(row[2])
node = nodes[None]
pos = 0
for c in row[0]:
node.total += weight
node.next_nodes[c] += weight
node = nodes[(c,pos)]
pos += 1
node.total += weight
node.next_nodes[None] += weight
# for letter,weights in nodes.items():
# print "{} = {}".format(letter, weights.total)
# for nn,wt in weights.next_nodes.items():
# print " -> {} = {}".format(nn, wt)
# sys.exit(0)
def pick_weighted(node):
rnd = random.randint(0, node.total)
# print "{} {}".format(node.total, rnd)
for k,v in node.next_nodes.items():
rnd -= v
if rnd <= 0:
return k
def make_up_one_name():
out = ''
node = nodes[None]
pos = 0
while True:
letter = pick_weighted(node)
if not letter:
break
out += letter
node = nodes[(letter, pos)]
pos += 1
return out
def make_up_whole_name():
given_name = None
middle_name = None
family_name = None
cutoff = len('Joseana')
while given_name is None or family_name is None:
new_name = make_up_one_name()
if len(new_name) <= cutoff:
if given_name is not None:
middle_name = new_name
else:
given_name = new_name
else:
family_name = new_name
return given_name, middle_name, family_name
def generate_names(count):
while count > 0:
yield make_up_whole_name()
count = count - 1
for given_name, middle_name, family_name in generate_names(options.count):
print(' '.join([given_name] + [middle_name] * bool(middle_name) + [family_name]))
@boredzo
Copy link
Author

boredzo commented Apr 12, 2016

Example output:

  • Gelviss Mesele Maizaeler
  • Ellon Frysthas
  • Midob Alil Dariqung
  • Jamera Ischua Degareus
  • Jesene Dohn Eriandas
  • Gatold Anialice
  • Jonney Elvie Rynneland
  • Moharam Anily Mandicin
  • Janan Phyc Milianera
  • Cydnze Tachon Degartto
  • Jolly Brishas Deonolla
  • Beles Chrrie Flizamiater
  • Whede Dosstn Ermenigh
  • Kichas Jasherdra
  • Saman Chexa Pebrinether
  • Hary Kesse Carrryne
  • Aammy Jouirtina
  • Genort Lathara Jarobyny
  • Rimmen Angenet Mishabene
  • Larkabe Genien Elexarder

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment