Skip to content

Instantly share code, notes, and snippets.

@filipsalo
Created December 5, 2008 19:23
Show Gist options
  • Save filipsalo/32462 to your computer and use it in GitHub Desktop.
Save filipsalo/32462 to your computer and use it in GitHub Desktop.
<?php
/**
* Compare two strings by length first (descending),
* alphabetically second (ascending)
*/
function strlen_cmp($a, $b) {
$diff = strlen($a) - strlen($b);
return ($diff != 0) ? -$diff : strcmp($a, $b);
}
/**
* Quote a string for use in a regular expression,
* including the "/" separator
*/
function quote_for_regex($str) {
return preg_quote($str, "/");
}
/* Read the card data and build an array of (name => id) pairs */
$lines = file("kortnamn.txt", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
foreach ($lines as $line) {
list($id, $card) = explode(" ", $line, 2);
$cards[$card] = (int) $id;
}
/* Sort the array by length */
uksort($cards, "strlen_cmp");
/* Build the regular expression */
$card_regexes[] = implode("|", array_map("quote_for_regex",
array_keys($cards)));
/* ..and chop it up, if necessary */
$RE_LIMIT = 30000;
while (strlen(end($card_regexes)) > $RE_LIMIT) {
$re = current($card_regexes);
$i = strrpos(substr($re, 0, $RE_LIMIT), "|");
$new_re = substr($re, $i+1);
$card_regexes[key($card_regexes)] = substr($re, 0, $i);
$card_regexes[] = $new_re;
}
/* Finish up the regexes with delimiters and the S option */
reset($card_regexes);
while (list($key, $regex) = each($card_regexes)) {
$card_regexes[$key] = sprintf("/%s/S", $regex);
}
/* Output the generated php file */
echo "<?php\n";
printf("\$card_regexes = %s;\n", var_export($card_regexes, true));
echo "?>\n";
?>
15094 "Ach! Hans, Run!"
17880 1996 World Champion
5629 Abandon Hope
8127 Abandoned Outpost
1421 Abbey Gargoyles
...
3465 Zuran Enchanter
3241 Zuran Orb
3466 Zuran Spellcaster
15048 Zzzyxas's Abyss
15001 _____
<?php
/* Oh, dear, look at the time! */
$mtime = explode(' ', microtime());
$starttime = $mtime[0] + $mtime[1];
/**
* Check that a regex match doesn't overlap with any of spans in $ignore
* (We use this to filter matches in step 2.)
*/
function no_overlaps($span) {
global $ignore;
list($s, $start) = $span;
$end = $start + strlen($s);
foreach ($ignore as $k => $ispan) {
list($is, $a) = $ispan;
$b = $a + strlen($is);
if ($a <= $start && $start < $b || $a < $end && $end <= $b) {
return false;
}
}
return true;
}
/* STEP 0: get the generated card and regex definitions */
require 'carddata.php';
/* STEP 1: Get the input data and identify any "untouchable" substrings */
$data = implode(file('spel_mini.txt'));
preg_match_all('/\[([^]\s]+)(\s+.*?)?\][^[]+\[\/\1\]|\[[^]]+\]/S',
$data, $ignore, PREG_OFFSET_CAPTURE);
$ignore = $ignore[0];
/* STEP 2: Look for card names in the data */
$matches = array();
foreach ($card_regexes as $k => $regex) {
preg_match_all($regex, $data, $candidates, PREG_OFFSET_CAPTURE);
/* Exclude any matches that overlaps with untouchables */
$candidates = array_filter($candidates[0], 'no_overlaps');
/* Collect the remaining matches */
$matches = array_merge($matches, $candidates);
/* Add the matches to the ignore list, so we can exclude any overlapping
matches from the remaining regexes */
$ignore = array_merge($ignore, $matches);
}
/* STEP 3: Go through and actually do the replacement on the matches
starting from the end, so we don't mess up the indices as we go
along */
/* Sort regex matches by their position, backwards */
function position_cmp($a, $b) { return $b[1] - $a[1]; }
usort($matches, "position_cmp");
/* Replace the matching substring with something else. */
foreach($matches as $k => $match) {
list($str, $start) = $match;
$data = substr_replace($data, sprintf('[card]%s[/card]', $str),
$start, strlen($str));
}
/* DONE: Output the result */
echo $data;
/* Brag about it. */
$mtime = explode(' ', microtime());
$totaltime = $mtime[0] + $mtime[1] - $starttime;
printf("Gone in %.3f seconds.\n\n", $totaltime);
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment