Created
December 5, 2008 19:23
-
-
Save filipsalo/32462 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Compare two strings by length first (descending), | |
* alphabetically second (ascending) | |
*/ | |
function strlen_cmp($a, $b) { | |
$diff = strlen($a) - strlen($b); | |
return ($diff != 0) ? -$diff : strcmp($a, $b); | |
} | |
/** | |
* Quote a string for use in a regular expression, | |
* including the "/" separator | |
*/ | |
function quote_for_regex($str) { | |
return preg_quote($str, "/"); | |
} | |
/* Read the card data and build an array of (name => id) pairs */ | |
$lines = file("kortnamn.txt", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); | |
foreach ($lines as $line) { | |
list($id, $card) = explode(" ", $line, 2); | |
$cards[$card] = (int) $id; | |
} | |
/* Sort the array by length */ | |
uksort($cards, "strlen_cmp"); | |
/* Build the regular expression */ | |
$card_regexes[] = implode("|", array_map("quote_for_regex", | |
array_keys($cards))); | |
/* ..and chop it up, if necessary */ | |
$RE_LIMIT = 30000; | |
while (strlen(end($card_regexes)) > $RE_LIMIT) { | |
$re = current($card_regexes); | |
$i = strrpos(substr($re, 0, $RE_LIMIT), "|"); | |
$new_re = substr($re, $i+1); | |
$card_regexes[key($card_regexes)] = substr($re, 0, $i); | |
$card_regexes[] = $new_re; | |
} | |
/* Finish up the regexes with delimiters and the S option */ | |
reset($card_regexes); | |
while (list($key, $regex) = each($card_regexes)) { | |
$card_regexes[$key] = sprintf("/%s/S", $regex); | |
} | |
/* Output the generated php file */ | |
echo "<?php\n"; | |
printf("\$card_regexes = %s;\n", var_export($card_regexes, true)); | |
echo "?>\n"; | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15094 "Ach! Hans, Run!" | |
17880 1996 World Champion | |
5629 Abandon Hope | |
8127 Abandoned Outpost | |
1421 Abbey Gargoyles | |
... | |
3465 Zuran Enchanter | |
3241 Zuran Orb | |
3466 Zuran Spellcaster | |
15048 Zzzyxas's Abyss | |
15001 _____ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* Oh, dear, look at the time! */ | |
$mtime = explode(' ', microtime()); | |
$starttime = $mtime[0] + $mtime[1]; | |
/** | |
* Check that a regex match doesn't overlap with any of spans in $ignore | |
* (We use this to filter matches in step 2.) | |
*/ | |
function no_overlaps($span) { | |
global $ignore; | |
list($s, $start) = $span; | |
$end = $start + strlen($s); | |
foreach ($ignore as $k => $ispan) { | |
list($is, $a) = $ispan; | |
$b = $a + strlen($is); | |
if ($a <= $start && $start < $b || $a < $end && $end <= $b) { | |
return false; | |
} | |
} | |
return true; | |
} | |
/* STEP 0: get the generated card and regex definitions */ | |
require 'carddata.php'; | |
/* STEP 1: Get the input data and identify any "untouchable" substrings */ | |
$data = implode(file('spel_mini.txt')); | |
preg_match_all('/\[([^]\s]+)(\s+.*?)?\][^[]+\[\/\1\]|\[[^]]+\]/S', | |
$data, $ignore, PREG_OFFSET_CAPTURE); | |
$ignore = $ignore[0]; | |
/* STEP 2: Look for card names in the data */ | |
$matches = array(); | |
foreach ($card_regexes as $k => $regex) { | |
preg_match_all($regex, $data, $candidates, PREG_OFFSET_CAPTURE); | |
/* Exclude any matches that overlaps with untouchables */ | |
$candidates = array_filter($candidates[0], 'no_overlaps'); | |
/* Collect the remaining matches */ | |
$matches = array_merge($matches, $candidates); | |
/* Add the matches to the ignore list, so we can exclude any overlapping | |
matches from the remaining regexes */ | |
$ignore = array_merge($ignore, $matches); | |
} | |
/* STEP 3: Go through and actually do the replacement on the matches | |
starting from the end, so we don't mess up the indices as we go | |
along */ | |
/* Sort regex matches by their position, backwards */ | |
function position_cmp($a, $b) { return $b[1] - $a[1]; } | |
usort($matches, "position_cmp"); | |
/* Replace the matching substring with something else. */ | |
foreach($matches as $k => $match) { | |
list($str, $start) = $match; | |
$data = substr_replace($data, sprintf('[card]%s[/card]', $str), | |
$start, strlen($str)); | |
} | |
/* DONE: Output the result */ | |
echo $data; | |
/* Brag about it. */ | |
$mtime = explode(' ', microtime()); | |
$totaltime = $mtime[0] + $mtime[1] - $starttime; | |
printf("Gone in %.3f seconds.\n\n", $totaltime); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment