Last active
July 14, 2022 23:42
-
-
Save smx-smx/a454a081812163662746b21d1e109e79 to your computer and use it in GitHub Desktop.
Better grep
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* grep.php | |
* author: Stefano Moioli <[email protected]> | |
* | |
* Custom variant of grep that supports: | |
* - utf8/utf16 strings | |
* - hex BE/LE patterns | |
*/ | |
define("CHUNKSIZE", 1024*1024); | |
function scangrep(string $path, array $queries){ | |
$ql = array_map('strlen', $queries); | |
sort($ql, SORT_NUMERIC); | |
$maxLength = end($ql); | |
$offset = 0; | |
$fh = fopen($path, 'rb'); | |
while(!feof($fh)){ | |
fseek($fh, $offset); | |
$chunk = fread($fh, CHUNKSIZE); | |
foreach($queries as $q){ | |
$match_pos = strpos($chunk, $q); | |
if($match_pos !== false){ | |
$match_offset = $offset + $match_pos; | |
// found, don't try other queries (TODO: flag?) | |
fclose($fh); | |
return $match_offset; | |
} | |
} | |
$read = strlen($chunk); | |
if($read > $maxLength){ | |
/** | |
* if we didn't match, and we read more than the query length | |
* we want to make sure the biggest query is included in the window | |
* so we don't skip a partial match | |
*/ | |
$advance = $read - $maxLength; | |
$offset += $advance; | |
} else { | |
$offset += $read; | |
} | |
} | |
fclose($fh); | |
return false; | |
} | |
function utf16(string $text){ | |
return mb_convert_encoding($text, "UTF-16LE", "UTF-8"); | |
} | |
// get queries | |
function get_qs(string $mode, string $text){ | |
$opts = substr($mode, 2); | |
$opts = str_split($opts); | |
$opts = array_flip($opts); | |
$has_opt = function(string $opt) use($opts){ | |
return isset($opts[$opt]); | |
}; | |
switch($mode[1]){ | |
// string | |
case "s": | |
yield $text; | |
// multi encoding | |
if($has_opt('s')){ | |
yield utf16($text); | |
} | |
// case insensitive | |
if($has_opt('i')){ | |
yield strtolower($text); | |
if($has_opt('s')){ | |
yield utf16(strtolower($text)); | |
} | |
} | |
break; | |
// hex | |
case "x": | |
$hex = str_replace(' ', '', $text); | |
yield hex2bin($hex); | |
// multi endian | |
if($has_opt('x')){ | |
$bytesHex = str_split($hex, 2); | |
$hexReverse = implode('', array_reverse($bytesHex)); | |
yield hex2bin($hexReverse); | |
} | |
break; | |
} | |
} | |
function get_qs_getopt(int $argc, array $argv){ | |
$opts = getopt("s::x::", [], $optind); | |
$has_mode = function(string $opt) use($opts){ | |
return isset($opts[$opt]); | |
}; | |
if($has_mode('s')){ | |
$mode = '-s' . $opts['s']; | |
} else if($has_mode('x')){ | |
$mode = '-x' . $opts['x']; | |
} else { | |
throw new InvalidArgumentException(); | |
} | |
$text = $argv[$optind]; | |
return get_qs($mode, $text); | |
} | |
if($argc > 2){ | |
$mode = $argv[1]; | |
$text = $argv[2]; | |
$where = $argv[3] ?? null; | |
} else { | |
$text = $argv[1]; | |
$mode = '-x'; | |
$where = $rgv[2] ?? null; | |
} | |
if($where === null){ | |
$where = __DIR__; | |
} | |
$qs = iterator_to_array(get_qs($mode, $text)); | |
foreach($qs as $q){ | |
print("- q: " . bin2hex($q) . "\n"); | |
} | |
$dit = new RecursiveDirectoryIterator($where); | |
$it = new RecursiveIteratorIterator($dit); | |
foreach($it as $file){ | |
/** @var SplFileObject $file */ | |
if(!$file->isFile()) continue; | |
$path = $file->getPathname(); | |
$ext = $file->getExtension(); | |
if($ext === 'dmp') continue; | |
if(($pos = scangrep($path, $qs)) !== FALSE){ | |
$dir = dirname($path); | |
$friendly_name = ""; | |
$uefi_ui_files = glob("{$dir}/*.ui"); | |
if(count($uefi_ui_files) > 0){ | |
$friendly_name = implode(",", array_map(function($f){ | |
$dat = file_get_contents($f); | |
return str_replace("\x00", "", $dat); | |
}, $uefi_ui_files)); | |
} | |
$path = preg_replace("|/cygdrive/(.*?)/|", '$1:/', $path); | |
printf("[%20s:0x%x] {$path}\n", $friendly_name, $pos); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment