Skip to content

Instantly share code, notes, and snippets.

@bsidhom
Last active June 12, 2023 03:23
Show Gist options
  • Save bsidhom/f30989cae39f1a2ba841eb00744c0ba0 to your computer and use it in GitHub Desktop.
Save bsidhom/f30989cae39f1a2ba841eb00744c0ba0 to your computer and use it in GitHub Desktop.
Anagram finder inspired by the Ink Black Heart Novel (Cormoran Strike)
#!/usr/bin/env python3
# Exact anagram finder. We start with a target name ("vilepechora") and consider
# all two-name exact anagrams of this target. Specifically, we do not consider
# anagrams of subsets of the original character set. We consider first and last
# names as different types to narrow the search space (there are 1712 matches if
# you consider first and last names to be interchangeable). There are "only" 582
# matches by considering first and last names separately (based on which dataset
# they're found in).
from collections import defaultdict
from collections.abc import Iterable
import csv
import io
import os
from typing import Mapping
import unicodedata
import zipfile
def main():
splits = compute_splits("vilepechora")
# Taken from https://www.ssa.gov/oact/babynames/names.zip
first_names = read_first_names("names.zip")
# Taken from https://github.com/fivethirtyeight/data/blob/master/most-common-name/surnames.csv
last_names = read_surnames("surnames.csv")
# NOTE: We consider the union of first and last names in a global list.
# names = first_names | surnames
first_name_clusters = multiset_map(first_names)
last_name_clusters = multiset_map(last_names)
# name_clusters = multiset_map(names)
for (first, last) in splits:
if first not in first_name_clusters:
continue
if last not in last_name_clusters:
continue
first_matches = first_name_clusters[first]
last_matches = last_name_clusters[last]
for a in first_matches:
for b in last_matches:
print(f"{a} {b}")
def read_first_names(first_names_zip: os.PathLike):
names = set()
with zipfile.ZipFile(first_names_zip) as z:
for finfo in z.infolist():
fname = finfo.filename
if not fname.startswith("yob") or not fname.endswith(".txt"):
continue
with io.TextIOWrapper(z.open(finfo.filename),
encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
name = normalize_string(row[0])
names.add(name)
return names
def read_surnames(csv_file: os.PathLike):
names = set()
with open(csv_file, newline="") as f:
reader = csv.reader(f)
# Discard header
next(reader)
for row in reader:
name = normalize_string(row[0])
names.add(name)
return names
def normalize_string(s: str):
result = ""
for c in unicodedata.normalize("NFKD", s.casefold()):
if unicodedata.category(c) != "Ll":
continue
result += c
return result
# Compute all possible two-part multiset partitions of the given target. This is
# only feasible in O(2^n), where n is the length of the target.
def compute_splits(target):
splits = set()
for (a, b) in powerset_pairs(target):
splits.add((char_multiset(a), char_multiset(b)))
return splits
# Given a list of strings, creates a mapping from the character multiset of each
# string to a list of strings matching that character multiset.
def multiset_map(ss: Iterable[str]) -> Mapping[str, list[str]]:
result = defaultdict(list)
for s in ss:
key = char_multiset(s)
result[key].append(s)
return result
# Turns the given string into a unique string that corresponds to the multiset
# of characters in the given string.
def char_multiset(s: str) -> str:
return "".join(sorted(s))
# Adapted from https://gist.github.com/bsidhom/a9da0c92eab358d18564285487799db4
def powerset_pairs(items):
count = len(items)
def rec(subset):
if len(subset) == count:
result = [items[i] for i in range(count) if subset[i]]
complement = [items[i] for i in range(count) if not subset[i]]
yield (result, complement)
else:
yield from rec(subset + [False])
yield from rec(subset + [True])
yield from rec([])
if __name__ == "__main__":
main()
ophia clever
porcia hevel
velora piche
velora piech
lovera piche
lovera piech
levora piche
levora piech
volare piche
volare piech
avorie pelch
capri hoevel
ralphie voce
ralphie cove
raphiel voce
raphiel cove
lea perovich
ale perovich
ela perovich
havoc leiper
havoc lepire
havoc pierle
havoc peiler
ophelie cvar
rochele piva
ivola pecher
oliva pecher
olvia pecher
olavi pecher
viola pecher
ovila pecher
lovia pecher
voila pecher
volia pecher
orvie plecha
orvie chaple
orvie pechal
orvie chapel
eivor plecha
eivor chaple
eivor pechal
eivor chapel
chalei prevo
ichael prevo
lechia prevo
cheila prevo
haciel prevo
chelia prevo
chaeli prevo
leicha prevo
chalie prevo
cherie pavlo
cheire pavlo
havier lopec
havier polce
harvie lopec
harvie polce
olive pechar
olive pearch
olive percha
olive pacher
olvie pechar
olvie pearch
olvie percha
olvie pacher
evilo pechar
evilo pearch
evilo percha
evilo pacher
levio pechar
levio pearch
levio percha
levio pacher
lovie pechar
lovie pearch
lovie percha
lovie pacher
elvio pechar
elvio pearch
elvio percha
elvio pacher
ovie placher
ovie palcher
eera povlich
aree povlich
chevi polera
chevi lopera
chevi lapore
chevi repola
chavie loper
chavie ropel
chavie poler
chavie lepro
oliver pecha
oliver cheap
oliver pache
oliver peach
orvile pecha
orvile cheap
orvile pache
orvile peach
hovie capler
hovie placer
hovie parcel
chia polvere
chai polvere
ciah polvere
olivea perch
ovelia perch
veolia perch
olevia perch
aivree ploch
earvie ploch
aviree ploch
averie ploch
pao leverich
ivalee porch
ivalee proch
avelie porch
avelie proch
evelia porch
evelia proch
evalei porch
evalei proch
evalie porch
evalie proch
leavie porch
leavie proch
avilee porch
avilee proch
laree povich
ralee povich
earle povich
leera povich
arlee povich
lerae povich
chloi pevear
evelio prach
elovie prach
chevel proia
vachel piero
vachel perio
vachel poire
cheval piero
cheval perio
cheval poire
clovie harpe
clovie haper
lovice harpe
lovice haper
elvire pacho
elvire pocha
elvire poach
elvire chapo
verlie pacho
verlie pocha
verlie poach
verlie chapo
everli pacho
everli pocha
everli poach
everli chapo
virlee pacho
virlee pocha
virlee poach
virlee chapo
chloee pivar
laporche vie
verah pelico
verah police
rheva pelico
rheva police
harve pelico
harve police
ree palovich
alphee corvi
velcie pohar
velcie pharo
clevie pohar
clevie pharo
porchea live
porchea veil
porchea levi
porchea viel
porchea vile
porchae live
porchae veil
porchae levi
porchae viel
porchae vile
loveah peric
loveah price
loveah cripe
loveah repic
pavi loecher
pecolia vehr
caliope vehr
elve porchia
evel porchia
leve porchia
hope clavier
cope hirvela
ore pavelich
ero pavelich
reo pavelich
roe pavelich
leveria poch
leveria chop
veleria poch
veleria chop
valerie poch
valerie chop
valiree poch
valiree chop
elveria poch
elveria chop
verilea poch
verilea chop
averlie poch
averlie chop
valiere poch
valiere chop
valeire poch
valeire chop
velarie poch
velarie chop
pavlo reiche
pavlo cherie
pavlo eicher
pervie ochal
pervie loach
valree pioch
lavere pioch
velera pioch
everal pioch
elvera pioch
levera pioch
valere pioch
chapel orive
leveah cipro
levaeh cipro
harvee colip
harvee polic
pavel chiero
pavle chiero
olachi preve
picola hever
viora pelech
avori pelech
arvol pechie
valor pechie
vlora pechie
lavor pechie
orval pechie
cove raphiel
evora pichel
veora pichel
overa pichel
oreva pichel
vica poehler
clevia opher
clevia hoper
valice opher
valice hoper
raphel voice
ralphe voice
elphie varco
elphie covar
elphie cravo
elphie carvo
orlee pavich
leeor pavich
loree pavich
chira voepel
richa voepel
archi voepel
cahir voepel
cahri voepel
chari voepel
leviah corpe
leviah proce
leviah pecor
leviah coper
po chevalier
ivel porchea
liev porchea
levi porchea
leiv porchea
elvi porchea
eveli poarch
eveli porcha
eveli chopra
levie poarch
levie porcha
levie chopra
livee poarch
livee porcha
livee chopra
lieve poarch
lieve porcha
lieve chopra
iveel poarch
iveel porcha
iveel chopra
elvie poarch
elvie porcha
elvie chopra
leevi poarch
leevi porcha
leevi chopra
paree lovich
oliv peacher
ivol peacher
lovi peacher
horice pavel
horice pleva
levai porche
levai copher
alvie porche
alvie copher
elvia porche
elvia copher
vaile porche
vaile copher
eliav porche
eliav copher
aviel porche
aviel copher
livea porche
livea copher
lavie porche
lavie copher
levia porche
levia copher
velia porche
velia copher
evila porche
evila copher
elpha ciervo
aleph ciervo
alphe ciervo
lepha ciervo
elaph ciervo
violar peche
olivar peche
valori peche
charee volpi
cherea volpi
cherae volpi
chaveli rope
chaveli opre
chaveli pore
chaveli poer
chaveli pero
chaveli proe
love charpie
ovel charpie
porcha viele
porcha levie
porcha velie
porcha veile
chloie paver
percival heo
percival hoe
percival ohe
ovia prechel
ovia pelcher
oiva prechel
oiva pelcher
verlia chope
verlia pecho
verlia poche
elvira chope
elvira pecho
elvira poche
avriel chope
avriel pecho
avriel poche
valire chope
valire pecho
valire poche
averil chope
averil pecho
averil poche
valier chope
valier pecho
valier poche
valrie chope
valrie pecho
valrie poche
valeri chope
valeri pecho
valeri poche
arvile chope
arvile pecho
arvile poche
laeh perovic
leah perovic
elah perovic
hela perovic
hale perovic
leha perovic
lhea perovic
hael perovic
charliee pov
achol prieve
chloa prieve
avice pohler
avice proehl
avice holper
avice hopler
vacie pohler
vacie proehl
vacie holper
vacie hopler
roee pavlich
oree pavlich
eero pavlich
velah perico
velah pecori
revi pachelo
iver pachelo
helvie crapo
helvie parco
helvie rocap
helvie carpo
hervie calpo
hervie lopac
price holeva
covie harpel
covie halper
covie harple
covie pahler
covie raphel
elorah pivec
leorah pivec
harloe pivec
hiep clavero
hiep alcover
ceriah volpe
harice volpe
herica volpe
ericha volpe
richae volpe
cheria volpe
charie volpe
archie volpe
cierah volpe
ericah volpe
ovalee pirch
philo vecera
revie polcha
revie polach
ervie polcha
ervie polach
ivree polcha
ivree polach
levar pioche
verla pioche
vearl pioche
laver pioche
arvle pioche
alver pioche
veral pioche
arvel pioche
icee vorpahl
cova hiepler
cova piehler
valorie pech
veloria pech
alveiro pech
valerio pech
olivera pech
perola evich
vice harpole
chavi lepore
chavi repole
chavi loeper
philece rova
philece varo
philece ravo
philece vora
pearce viohl
capree viohl
hopie carvel
hopie calver
hopie claver
ophie carvel
ophie calver
ophie claver
phor eclevia
helvi pecora
chevie prola
chevie parlo
chevie polar
ivoree pachl
porche vaile
porche leiva
porche lavie
porche viale
porche viela
porche levia
rohi pavelec
hiro pavelec
riho pavelec
iroh pavelec
ihor pavelec
heer lipovac
cevera philo
cevera hipol
reveca philo
reveca hipol
cleopha vire
cleopha vier
cleopha veri
alphie cervo
alphie cover
alphie vorce
ophira cleve
orphia cleve
pacie hervol
chap olivere
chap overlie
elvir pacheo
ervil pacheo
veril pacheo
peach oliver
valoree pich
valoree chip
porchia vele
porchia leve
halvor piece
avoree pilch
hopi cleaver
ophir cleave
vache riopel
vache repoli
vache lepori
chave riopel
chave repoli
chave lepori
chevalier po
revea polich
veera polich
reeva polich
varee polich
evera polich
aveer polich
revae polich
avree polich
vilho pearce
vilho parece
orva peichel
avor peichel
arvo peichel
vora peichel
veachel piro
perceval hoi
rephael voci
rephael vico
rephael covi
phila vercoe
prevail choe
prevail echo
ieva plocher
eiva plocher
evia plocher
avie plocher
piero chavel
piero cheval
orphie clave
orphie levac
lovea picher
lovea perich
loeva picher
loeva perich
evola picher
evola perich
oleva picher
oleva perich
veola picher
veola perich
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment