Created
July 4, 2015 10:42
-
-
Save abcarroll/e1927f3f3871c9816306 to your computer and use it in GitHub Desktop.
Make a hopefully pronounceable word based on vowel, constant patterns. Patterns are pregenerated from analyzing dictionary files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// A.B. Carroll, <[email protected]> | |
// "Do whatever the fuck you want license" | |
// Generates a 'statistics chunk' for use in pronounceable word generation | |
// It's a one-time generation, so it doesn't need to be fast. | |
$file = file('/usr/share/dict/american-english'); | |
$patterns = []; | |
// Initial pass | |
$vowels = 'aeiou'; | |
$vowels = str_split($vowels); | |
foreach($file as $word) { | |
$word = trim($word); | |
if(preg_match('/^[A-Za-z]+$/', $word)) { | |
//echo "Regular word: $word\n"; | |
$pattern = ''; | |
$word = str_split($word); | |
foreach($word as $letter) { | |
if(in_array($letter, $vowels)) { | |
$pattern .= 'v'; | |
} else { | |
$pattern .= 'c'; | |
} | |
} | |
if(!isset($patterns[strlen($pattern)])) { | |
$patterns[strlen($pattern)] = []; | |
} | |
if(!isset($patterns[strlen($pattern)][$pattern])) { | |
$patterns[strlen($pattern)][$pattern] = 0; | |
} | |
$patterns[strlen($pattern)][$pattern]++; | |
} | |
} | |
// Prune, sort, re-key | |
$sortedPatterns = []; | |
ksort($patterns, SORT_NUMERIC); | |
foreach($patterns as $length => $iPatterns) { | |
asort($iPatterns, SORT_NUMERIC); | |
$iPatterns = array_reverse(($iPatterns)); | |
$total = array_sum($iPatterns); | |
foreach($iPatterns as $pattern => $count) { | |
$percentage = (int) round(($count / $total) * 100); | |
if($percentage > 2 && strlen($pattern) <= 10) { | |
// echo "$length => $pattern => $count ($percentage%)\n"; | |
if(!isset($sortedPatterns[strlen($pattern)])) { | |
$sortedPatterns[strlen($pattern)] = []; | |
} | |
$sortedPatterns[strlen($pattern)][$percentage] = $pattern; | |
} | |
} | |
} | |
echo "Your statistics chunk:\n\n"; | |
echo json_encode($sortedPatterns); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// A.B. Carroll, <[email protected]> | |
// "Do whatever the fuck you want license" | |
// Generates a pronouncable word based on pregenerated vowel,constant | |
// statistical data | |
// Sshh.. No words.. Only code now... | |
function pronounceableWord($length) { | |
$patternData = '{"1":{"90":"c","10":"v"},"2":{"51":"cc","35":"cv","15"' . | |
':"vc"},"3":{"64":"cvc","9":"cvv","7":"ccv","4":"vcv"},"4":{"39":"' . | |
'cvcc","20":"cvcv","14":"ccvc","3":"vcvc"},"5":{"24":"cvcvc","17":' . | |
'"cvccc","13":"ccvcc","9":"cvvcc","8":"cvccv","5":"ccvvc","3":"cvv' . | |
'cv"},"6":{"27":"cvccvc","12":"cvcvcc","7":"ccvcvc","6":"cvvcvc","' . | |
'5":"cvcvvc","3":"ccvvcc"},"7":{"20":"cvccvcc","9":"ccvccvc","8":"' . | |
'cvcvcvc","7":"cvccvvc","4":"ccvcvcc","3":"ccvvcvc"},"8":{"9":"cvc' . | |
'cvcvc","8":"ccvccvcc","7":"cvcvcvcc","6":"cvcccvcc","5":"cvcvccvc' . | |
'","4":"cvccvccc","3":"cvcvcvcv"},"9":{"9":"cvccvcvcc","6":"cvccvc' . | |
'cvc","5":"cvcvccvcc","4":"cvcvcvcvc","3":"ccvcccvcc"},"10":{"6":"' . | |
'cvccvccvcc","4":"cvcvcvcvcc","3":"cvcvccvcvc"}}'; | |
$patternData = json_decode($patternData, true); | |
if(isset($patternData[$length])) { | |
$patternData = $patternData[$length]; | |
$maxProbability = array_sum(array_keys($patternData)); | |
$accumulatedProbability = 0; | |
$rand = mt_rand(0, $maxProbability); // the magic deciding number | |
$selectedPattern = ''; | |
foreach($patternData as $probability => $pattern) { | |
$actualProbability = $probability + $accumulatedProbability; | |
$accumulatedProbability = $probability + $accumulatedProbability; | |
if($rand < $actualProbability) { | |
$selectedPattern = $pattern; | |
break; | |
} | |
} | |
$selectedPattern = str_split($selectedPattern); | |
$vowelConstants = [ | |
'v' => str_split('aeiou'), | |
'c' => str_split('bcdfghklmnpqrstvwxyz') | |
]; | |
$generatedWord = ''; | |
foreach($selectedPattern as $letter) { | |
$vc = $vowelConstants[$letter]; | |
$generatedWord .= $vc[array_rand($vc)]; | |
} | |
return $generatedWord; | |
} else { | |
throw new \RuntimeException("Length of '$length' isn't supported by" . | |
"the pattern data provided."); | |
} | |
} | |
echo "And the winner is... "; | |
echo pronounceableWord(5); | |
echo "\n\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment