Last active
June 12, 2023 03:23
-
-
Save bsidhom/f30989cae39f1a2ba841eb00744c0ba0 to your computer and use it in GitHub Desktop.
Anagram finder inspired by the Ink Black Heart Novel (Cormoran Strike)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Exact anagram finder. We start with a target name ("vilepechora") and consider | |
# all two-name exact anagrams of this target. Specifically, we do not consider | |
# anagrams of subsets of the original character set. We consider first and last | |
# names as different types to narrow the search space (there are 1712 matches if | |
# you consider first and last names to be interchangeable). There are "only" 582 | |
# matches by considering first and last names separately (based on which dataset | |
# they're found in). | |
from collections import defaultdict | |
from collections.abc import Iterable | |
import csv | |
import io | |
import os | |
from typing import Mapping | |
import unicodedata | |
import zipfile | |
def main(): | |
splits = compute_splits("vilepechora") | |
# Taken from https://www.ssa.gov/oact/babynames/names.zip | |
first_names = read_first_names("names.zip") | |
# Taken from https://github.com/fivethirtyeight/data/blob/master/most-common-name/surnames.csv | |
last_names = read_surnames("surnames.csv") | |
# NOTE: We consider the union of first and last names in a global list. | |
# names = first_names | surnames | |
first_name_clusters = multiset_map(first_names) | |
last_name_clusters = multiset_map(last_names) | |
# name_clusters = multiset_map(names) | |
for (first, last) in splits: | |
if first not in first_name_clusters: | |
continue | |
if last not in last_name_clusters: | |
continue | |
first_matches = first_name_clusters[first] | |
last_matches = last_name_clusters[last] | |
for a in first_matches: | |
for b in last_matches: | |
print(f"{a} {b}") | |
def read_first_names(first_names_zip: os.PathLike): | |
names = set() | |
with zipfile.ZipFile(first_names_zip) as z: | |
for finfo in z.infolist(): | |
fname = finfo.filename | |
if not fname.startswith("yob") or not fname.endswith(".txt"): | |
continue | |
with io.TextIOWrapper(z.open(finfo.filename), | |
encoding="utf-8") as f: | |
reader = csv.reader(f) | |
for row in reader: | |
name = normalize_string(row[0]) | |
names.add(name) | |
return names | |
def read_surnames(csv_file: os.PathLike): | |
names = set() | |
with open(csv_file, newline="") as f: | |
reader = csv.reader(f) | |
# Discard header | |
next(reader) | |
for row in reader: | |
name = normalize_string(row[0]) | |
names.add(name) | |
return names | |
def normalize_string(s: str): | |
result = "" | |
for c in unicodedata.normalize("NFKD", s.casefold()): | |
if unicodedata.category(c) != "Ll": | |
continue | |
result += c | |
return result | |
# Compute all possible two-part multiset partitions of the given target. This is | |
# only feasible in O(2^n), where n is the length of the target. | |
def compute_splits(target): | |
splits = set() | |
for (a, b) in powerset_pairs(target): | |
splits.add((char_multiset(a), char_multiset(b))) | |
return splits | |
# Given a list of strings, creates a mapping from the character multiset of each | |
# string to a list of strings matching that character multiset. | |
def multiset_map(ss: Iterable[str]) -> Mapping[str, list[str]]: | |
result = defaultdict(list) | |
for s in ss: | |
key = char_multiset(s) | |
result[key].append(s) | |
return result | |
# Turns the given string into a unique string that corresponds to the multiset | |
# of characters in the given string. | |
def char_multiset(s: str) -> str: | |
return "".join(sorted(s)) | |
# Adapted from https://gist.github.com/bsidhom/a9da0c92eab358d18564285487799db4 | |
def powerset_pairs(items): | |
count = len(items) | |
def rec(subset): | |
if len(subset) == count: | |
result = [items[i] for i in range(count) if subset[i]] | |
complement = [items[i] for i in range(count) if not subset[i]] | |
yield (result, complement) | |
else: | |
yield from rec(subset + [False]) | |
yield from rec(subset + [True]) | |
yield from rec([]) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ophia clever | |
porcia hevel | |
velora piche | |
velora piech | |
lovera piche | |
lovera piech | |
levora piche | |
levora piech | |
volare piche | |
volare piech | |
avorie pelch | |
capri hoevel | |
ralphie voce | |
ralphie cove | |
raphiel voce | |
raphiel cove | |
lea perovich | |
ale perovich | |
ela perovich | |
havoc leiper | |
havoc lepire | |
havoc pierle | |
havoc peiler | |
ophelie cvar | |
rochele piva | |
ivola pecher | |
oliva pecher | |
olvia pecher | |
olavi pecher | |
viola pecher | |
ovila pecher | |
lovia pecher | |
voila pecher | |
volia pecher | |
orvie plecha | |
orvie chaple | |
orvie pechal | |
orvie chapel | |
eivor plecha | |
eivor chaple | |
eivor pechal | |
eivor chapel | |
chalei prevo | |
ichael prevo | |
lechia prevo | |
cheila prevo | |
haciel prevo | |
chelia prevo | |
chaeli prevo | |
leicha prevo | |
chalie prevo | |
cherie pavlo | |
cheire pavlo | |
havier lopec | |
havier polce | |
harvie lopec | |
harvie polce | |
olive pechar | |
olive pearch | |
olive percha | |
olive pacher | |
olvie pechar | |
olvie pearch | |
olvie percha | |
olvie pacher | |
evilo pechar | |
evilo pearch | |
evilo percha | |
evilo pacher | |
levio pechar | |
levio pearch | |
levio percha | |
levio pacher | |
lovie pechar | |
lovie pearch | |
lovie percha | |
lovie pacher | |
elvio pechar | |
elvio pearch | |
elvio percha | |
elvio pacher | |
ovie placher | |
ovie palcher | |
eera povlich | |
aree povlich | |
chevi polera | |
chevi lopera | |
chevi lapore | |
chevi repola | |
chavie loper | |
chavie ropel | |
chavie poler | |
chavie lepro | |
oliver pecha | |
oliver cheap | |
oliver pache | |
oliver peach | |
orvile pecha | |
orvile cheap | |
orvile pache | |
orvile peach | |
hovie capler | |
hovie placer | |
hovie parcel | |
chia polvere | |
chai polvere | |
ciah polvere | |
olivea perch | |
ovelia perch | |
veolia perch | |
olevia perch | |
aivree ploch | |
earvie ploch | |
aviree ploch | |
averie ploch | |
pao leverich | |
ivalee porch | |
ivalee proch | |
avelie porch | |
avelie proch | |
evelia porch | |
evelia proch | |
evalei porch | |
evalei proch | |
evalie porch | |
evalie proch | |
leavie porch | |
leavie proch | |
avilee porch | |
avilee proch | |
laree povich | |
ralee povich | |
earle povich | |
leera povich | |
arlee povich | |
lerae povich | |
chloi pevear | |
evelio prach | |
elovie prach | |
chevel proia | |
vachel piero | |
vachel perio | |
vachel poire | |
cheval piero | |
cheval perio | |
cheval poire | |
clovie harpe | |
clovie haper | |
lovice harpe | |
lovice haper | |
elvire pacho | |
elvire pocha | |
elvire poach | |
elvire chapo | |
verlie pacho | |
verlie pocha | |
verlie poach | |
verlie chapo | |
everli pacho | |
everli pocha | |
everli poach | |
everli chapo | |
virlee pacho | |
virlee pocha | |
virlee poach | |
virlee chapo | |
chloee pivar | |
laporche vie | |
verah pelico | |
verah police | |
rheva pelico | |
rheva police | |
harve pelico | |
harve police | |
ree palovich | |
alphee corvi | |
velcie pohar | |
velcie pharo | |
clevie pohar | |
clevie pharo | |
porchea live | |
porchea veil | |
porchea levi | |
porchea viel | |
porchea vile | |
porchae live | |
porchae veil | |
porchae levi | |
porchae viel | |
porchae vile | |
loveah peric | |
loveah price | |
loveah cripe | |
loveah repic | |
pavi loecher | |
pecolia vehr | |
caliope vehr | |
elve porchia | |
evel porchia | |
leve porchia | |
hope clavier | |
cope hirvela | |
ore pavelich | |
ero pavelich | |
reo pavelich | |
roe pavelich | |
leveria poch | |
leveria chop | |
veleria poch | |
veleria chop | |
valerie poch | |
valerie chop | |
valiree poch | |
valiree chop | |
elveria poch | |
elveria chop | |
verilea poch | |
verilea chop | |
averlie poch | |
averlie chop | |
valiere poch | |
valiere chop | |
valeire poch | |
valeire chop | |
velarie poch | |
velarie chop | |
pavlo reiche | |
pavlo cherie | |
pavlo eicher | |
pervie ochal | |
pervie loach | |
valree pioch | |
lavere pioch | |
velera pioch | |
everal pioch | |
elvera pioch | |
levera pioch | |
valere pioch | |
chapel orive | |
leveah cipro | |
levaeh cipro | |
harvee colip | |
harvee polic | |
pavel chiero | |
pavle chiero | |
olachi preve | |
picola hever | |
viora pelech | |
avori pelech | |
arvol pechie | |
valor pechie | |
vlora pechie | |
lavor pechie | |
orval pechie | |
cove raphiel | |
evora pichel | |
veora pichel | |
overa pichel | |
oreva pichel | |
vica poehler | |
clevia opher | |
clevia hoper | |
valice opher | |
valice hoper | |
raphel voice | |
ralphe voice | |
elphie varco | |
elphie covar | |
elphie cravo | |
elphie carvo | |
orlee pavich | |
leeor pavich | |
loree pavich | |
chira voepel | |
richa voepel | |
archi voepel | |
cahir voepel | |
cahri voepel | |
chari voepel | |
leviah corpe | |
leviah proce | |
leviah pecor | |
leviah coper | |
po chevalier | |
ivel porchea | |
liev porchea | |
levi porchea | |
leiv porchea | |
elvi porchea | |
eveli poarch | |
eveli porcha | |
eveli chopra | |
levie poarch | |
levie porcha | |
levie chopra | |
livee poarch | |
livee porcha | |
livee chopra | |
lieve poarch | |
lieve porcha | |
lieve chopra | |
iveel poarch | |
iveel porcha | |
iveel chopra | |
elvie poarch | |
elvie porcha | |
elvie chopra | |
leevi poarch | |
leevi porcha | |
leevi chopra | |
paree lovich | |
oliv peacher | |
ivol peacher | |
lovi peacher | |
horice pavel | |
horice pleva | |
levai porche | |
levai copher | |
alvie porche | |
alvie copher | |
elvia porche | |
elvia copher | |
vaile porche | |
vaile copher | |
eliav porche | |
eliav copher | |
aviel porche | |
aviel copher | |
livea porche | |
livea copher | |
lavie porche | |
lavie copher | |
levia porche | |
levia copher | |
velia porche | |
velia copher | |
evila porche | |
evila copher | |
elpha ciervo | |
aleph ciervo | |
alphe ciervo | |
lepha ciervo | |
elaph ciervo | |
violar peche | |
olivar peche | |
valori peche | |
charee volpi | |
cherea volpi | |
cherae volpi | |
chaveli rope | |
chaveli opre | |
chaveli pore | |
chaveli poer | |
chaveli pero | |
chaveli proe | |
love charpie | |
ovel charpie | |
porcha viele | |
porcha levie | |
porcha velie | |
porcha veile | |
chloie paver | |
percival heo | |
percival hoe | |
percival ohe | |
ovia prechel | |
ovia pelcher | |
oiva prechel | |
oiva pelcher | |
verlia chope | |
verlia pecho | |
verlia poche | |
elvira chope | |
elvira pecho | |
elvira poche | |
avriel chope | |
avriel pecho | |
avriel poche | |
valire chope | |
valire pecho | |
valire poche | |
averil chope | |
averil pecho | |
averil poche | |
valier chope | |
valier pecho | |
valier poche | |
valrie chope | |
valrie pecho | |
valrie poche | |
valeri chope | |
valeri pecho | |
valeri poche | |
arvile chope | |
arvile pecho | |
arvile poche | |
laeh perovic | |
leah perovic | |
elah perovic | |
hela perovic | |
hale perovic | |
leha perovic | |
lhea perovic | |
hael perovic | |
charliee pov | |
achol prieve | |
chloa prieve | |
avice pohler | |
avice proehl | |
avice holper | |
avice hopler | |
vacie pohler | |
vacie proehl | |
vacie holper | |
vacie hopler | |
roee pavlich | |
oree pavlich | |
eero pavlich | |
velah perico | |
velah pecori | |
revi pachelo | |
iver pachelo | |
helvie crapo | |
helvie parco | |
helvie rocap | |
helvie carpo | |
hervie calpo | |
hervie lopac | |
price holeva | |
covie harpel | |
covie halper | |
covie harple | |
covie pahler | |
covie raphel | |
elorah pivec | |
leorah pivec | |
harloe pivec | |
hiep clavero | |
hiep alcover | |
ceriah volpe | |
harice volpe | |
herica volpe | |
ericha volpe | |
richae volpe | |
cheria volpe | |
charie volpe | |
archie volpe | |
cierah volpe | |
ericah volpe | |
ovalee pirch | |
philo vecera | |
revie polcha | |
revie polach | |
ervie polcha | |
ervie polach | |
ivree polcha | |
ivree polach | |
levar pioche | |
verla pioche | |
vearl pioche | |
laver pioche | |
arvle pioche | |
alver pioche | |
veral pioche | |
arvel pioche | |
icee vorpahl | |
cova hiepler | |
cova piehler | |
valorie pech | |
veloria pech | |
alveiro pech | |
valerio pech | |
olivera pech | |
perola evich | |
vice harpole | |
chavi lepore | |
chavi repole | |
chavi loeper | |
philece rova | |
philece varo | |
philece ravo | |
philece vora | |
pearce viohl | |
capree viohl | |
hopie carvel | |
hopie calver | |
hopie claver | |
ophie carvel | |
ophie calver | |
ophie claver | |
phor eclevia | |
helvi pecora | |
chevie prola | |
chevie parlo | |
chevie polar | |
ivoree pachl | |
porche vaile | |
porche leiva | |
porche lavie | |
porche viale | |
porche viela | |
porche levia | |
rohi pavelec | |
hiro pavelec | |
riho pavelec | |
iroh pavelec | |
ihor pavelec | |
heer lipovac | |
cevera philo | |
cevera hipol | |
reveca philo | |
reveca hipol | |
cleopha vire | |
cleopha vier | |
cleopha veri | |
alphie cervo | |
alphie cover | |
alphie vorce | |
ophira cleve | |
orphia cleve | |
pacie hervol | |
chap olivere | |
chap overlie | |
elvir pacheo | |
ervil pacheo | |
veril pacheo | |
peach oliver | |
valoree pich | |
valoree chip | |
porchia vele | |
porchia leve | |
halvor piece | |
avoree pilch | |
hopi cleaver | |
ophir cleave | |
vache riopel | |
vache repoli | |
vache lepori | |
chave riopel | |
chave repoli | |
chave lepori | |
chevalier po | |
revea polich | |
veera polich | |
reeva polich | |
varee polich | |
evera polich | |
aveer polich | |
revae polich | |
avree polich | |
vilho pearce | |
vilho parece | |
orva peichel | |
avor peichel | |
arvo peichel | |
vora peichel | |
veachel piro | |
perceval hoi | |
rephael voci | |
rephael vico | |
rephael covi | |
phila vercoe | |
prevail choe | |
prevail echo | |
ieva plocher | |
eiva plocher | |
evia plocher | |
avie plocher | |
piero chavel | |
piero cheval | |
orphie clave | |
orphie levac | |
lovea picher | |
lovea perich | |
loeva picher | |
loeva perich | |
evola picher | |
evola perich | |
oleva picher | |
oleva perich | |
veola picher | |
veola perich |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment