Skip to content

Instantly share code, notes, and snippets.

@jmikola
Created September 22, 2011 00:15
Show Gist options
  • Select an option

  • Save jmikola/1233709 to your computer and use it in GitHub Desktop.

Select an option

Save jmikola/1233709 to your computer and use it in GitHub Desktop.
Find the longest prefix(es) for a set of words.
#!/usr/bin/env php
<?php
/**
* This script calculates the longest prefixes for a set of words. The input is
* expected to be a list of newline-delimited strings from either STDIN or a
* file specified as the first command line parameter.
*/
call_user_func(function(){
$words = getUniqueWordsFromInput();
if (empty($words)) {
fwrite(\STDERR, "There are no words to process.\n");
exit;
}
$shortestWords = getShortestWords($words);
$prefixes = getPrefixesAmongWordsOfEqualLength($shortestWords);
if (empty($prefixes)) {
fwrite(\STDERR, "There are no common prefixes among the input's shortest words.\n");
exit;
}
$prefixes = getLongestValidPrefixes($prefixes, $words);
if (empty($prefixes)) {
fwrite(\STDERR, "There are no common prefixes for the input.\n");
exit;
}
echo implode("\n", $prefixes) . "\n";
});
/**
* Returns all unique words from either a file argument or STDIN.
*
* @return array
*/
function getUniqueWordsFromInput()
{
$input = $_SERVER['argc'] > 2 ? $_SERVER['argv'][1] : 'php://stdin';
$words = array();
foreach (explode("\n", file_get_contents($input)) as $word) {
$words[trim($word)] = 1;
}
unset($words['']);
return array_keys($words);
}
/**
* Return the shortest words within an array of words.
*
* @param array $words
* @return array
*/
function getShortestWords(array $words)
{
// Get shortest words (prefix candidates)
$shortestWordLength = mb_strlen($words[0]);
$shortestWords = array();
foreach ($words as $word) {
if (!isset($word[$shortestWordLength - 1])) {
$shortestWordLength = mb_strlen($word);
$shortestWords = array($word);
} elseif (!isset($word[$shortestWordLength])) {
$shortestWords[] = $word;
}
}
return $shortestWords;
}
/**
* Return all common prefixes for an array of words of equal length.
*
* @param array $words
* @return array
*/
function getPrefixesAmongWordsOfEqualLength(array $words)
{
$candidates = array();
// Assume all strings in $words are of equal length
$maxLength = mb_strlen($words[0]);
foreach ($words as $word) {
for ($i = $maxLength; $i > 0; --$i) {
$prefix = mb_substr($word, 0, $i);
if (!isset($candidates[$prefix]) && isPrefixValid($prefix, $words)) {
$candidates[$prefix] = 1;
// If a prefix is valid, it's derivatives are as well
while (--$i > 0) {
$candidates[mb_substr($prefix, 0, $i)] = 1;
}
}
}
}
return array_keys($candidates);
}
/**
* Return the longest valid prefixes (if any) for an array of words.
*
* @param array $prefixes
* @param array $words
* @return array
*/
function getLongestValidPrefixes($prefixes, array $words)
{
$longestValidPrefixLength = 0;
$longestValidPrefixes = array();
foreach ($prefixes as $prefix) {
// Ignore candidates shorter than a longer, validated prefix
if (!isset($prefix[$longestValidPrefixLength])) {
continue;
}
if (isPrefixValid($prefix, $words)) {
if ($longestValidPrefixLength < mb_strlen($prefix)) {
$longestValidPrefixLength = mb_strlen($prefix);
$longestValidPrefixes = array();
}
$longestValidPrefixes[] = $prefix;
}
}
return $longestValidPrefixes;
}
/**
* Return whether the prefix satisfies all words in the array.
*
* @param string $prefix
* @param array $words
* @return boolean
*/
function isPrefixValid($prefix, array $words)
{
// Use strlen() here, as we want the actual number of bytes in $prefix
$nullBytes = str_repeat("\0", strlen($prefix));
foreach ($words as $word) {
if ($nullBytes !== ($prefix ^ $word)) {
return false;
}
}
return true;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment