Created
September 22, 2011 00:15
-
-
Save jmikola/1233709 to your computer and use it in GitHub Desktop.
Find the longest prefix(es) for a set of words.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env php | |
| <?php | |
| /** | |
| * This script calculates the longest prefixes for a set of words. The input is | |
| * expected to be a list of newline-delimited strings from either STDIN or a | |
| * file specified as the first command line parameter. | |
| */ | |
| call_user_func(function(){ | |
| $words = getUniqueWordsFromInput(); | |
| if (empty($words)) { | |
| fwrite(\STDERR, "There are no words to process.\n"); | |
| exit; | |
| } | |
| $shortestWords = getShortestWords($words); | |
| $prefixes = getPrefixesAmongWordsOfEqualLength($shortestWords); | |
| if (empty($prefixes)) { | |
| fwrite(\STDERR, "There are no common prefixes among the input's shortest words.\n"); | |
| exit; | |
| } | |
| $prefixes = getLongestValidPrefixes($prefixes, $words); | |
| if (empty($prefixes)) { | |
| fwrite(\STDERR, "There are no common prefixes for the input.\n"); | |
| exit; | |
| } | |
| echo implode("\n", $prefixes) . "\n"; | |
| }); | |
| /** | |
| * Returns all unique words from either a file argument or STDIN. | |
| * | |
| * @return array | |
| */ | |
| function getUniqueWordsFromInput() | |
| { | |
| $input = $_SERVER['argc'] > 2 ? $_SERVER['argv'][1] : 'php://stdin'; | |
| $words = array(); | |
| foreach (explode("\n", file_get_contents($input)) as $word) { | |
| $words[trim($word)] = 1; | |
| } | |
| unset($words['']); | |
| return array_keys($words); | |
| } | |
| /** | |
| * Return the shortest words within an array of words. | |
| * | |
| * @param array $words | |
| * @return array | |
| */ | |
| function getShortestWords(array $words) | |
| { | |
| // Get shortest words (prefix candidates) | |
| $shortestWordLength = mb_strlen($words[0]); | |
| $shortestWords = array(); | |
| foreach ($words as $word) { | |
| if (!isset($word[$shortestWordLength - 1])) { | |
| $shortestWordLength = mb_strlen($word); | |
| $shortestWords = array($word); | |
| } elseif (!isset($word[$shortestWordLength])) { | |
| $shortestWords[] = $word; | |
| } | |
| } | |
| return $shortestWords; | |
| } | |
| /** | |
| * Return all common prefixes for an array of words of equal length. | |
| * | |
| * @param array $words | |
| * @return array | |
| */ | |
| function getPrefixesAmongWordsOfEqualLength(array $words) | |
| { | |
| $candidates = array(); | |
| // Assume all strings in $words are of equal length | |
| $maxLength = mb_strlen($words[0]); | |
| foreach ($words as $word) { | |
| for ($i = $maxLength; $i > 0; --$i) { | |
| $prefix = mb_substr($word, 0, $i); | |
| if (!isset($candidates[$prefix]) && isPrefixValid($prefix, $words)) { | |
| $candidates[$prefix] = 1; | |
| // If a prefix is valid, it's derivatives are as well | |
| while (--$i > 0) { | |
| $candidates[mb_substr($prefix, 0, $i)] = 1; | |
| } | |
| } | |
| } | |
| } | |
| return array_keys($candidates); | |
| } | |
| /** | |
| * Return the longest valid prefixes (if any) for an array of words. | |
| * | |
| * @param array $prefixes | |
| * @param array $words | |
| * @return array | |
| */ | |
| function getLongestValidPrefixes($prefixes, array $words) | |
| { | |
| $longestValidPrefixLength = 0; | |
| $longestValidPrefixes = array(); | |
| foreach ($prefixes as $prefix) { | |
| // Ignore candidates shorter than a longer, validated prefix | |
| if (!isset($prefix[$longestValidPrefixLength])) { | |
| continue; | |
| } | |
| if (isPrefixValid($prefix, $words)) { | |
| if ($longestValidPrefixLength < mb_strlen($prefix)) { | |
| $longestValidPrefixLength = mb_strlen($prefix); | |
| $longestValidPrefixes = array(); | |
| } | |
| $longestValidPrefixes[] = $prefix; | |
| } | |
| } | |
| return $longestValidPrefixes; | |
| } | |
| /** | |
| * Return whether the prefix satisfies all words in the array. | |
| * | |
| * @param string $prefix | |
| * @param array $words | |
| * @return boolean | |
| */ | |
| function isPrefixValid($prefix, array $words) | |
| { | |
| // Use strlen() here, as we want the actual number of bytes in $prefix | |
| $nullBytes = str_repeat("\0", strlen($prefix)); | |
| foreach ($words as $word) { | |
| if ($nullBytes !== ($prefix ^ $word)) { | |
| return false; | |
| } | |
| } | |
| return true; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment