Created
August 21, 2015 15:50
-
-
Save kmark/a01c1463242e435f6cb5 to your computer and use it in GitHub Desktop.
ProFinder - The ProGuard obfuscation tracker.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
/******************************************************************************************************************* | |
* ProFinder v1.0 * | |
* http://forum.xda-developers.com/android/software/profinder-proguard-obfuscation-tracker-t3183647 * | |
******************************************************************************************************************* | |
* Copyright 2015 Kevin Mark * | |
* * | |
* Licensed under the Apache License, Version 2.0 (the "License"); * | |
* you may not use this file except in compliance with the License. * | |
* You may obtain a copy of the License at * | |
* * | |
* http://www.apache.org/licenses/LICENSE-2.0 * | |
* * | |
* Unless required by applicable law or agreed to in writing, software * | |
* distributed under the License is distributed on an "AS IS" BASIS, * | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * | |
* See the License for the specific language governing permissions and * | |
* limitations under the License. * | |
******************************************************************************************************************* | |
* REQUIRED ARGUMENTS: * | |
* -c, --classes A comma-separated list of old classes to find in the new directory * | |
* -o, --old Path of the old version's smali directory * | |
* -n, --new Path of the new version's smali directory * | |
* * | |
* OPTIONS: * | |
* -l, --sig-len The minimum number of characters a particular signature string can be (default: 3) * | |
* -m, --sig-match The minimum pct of matches for a successful signature class match (default: 0.70) * | |
* -r, --sig-occr The minimum number of signature strings an old class must have (default: 3) * | |
* -t, --tolerance Only apply the diff method on new classes +/- this number of lines (default: 150) * | |
* * | |
* FLAGS: * | |
* -d, --always-try-diff Use the diff method even if signature matching was successful (default: off) * | |
* -v, --verbose Enable additional descriptive log output (default: off) * | |
*******************************************************************************************************************/ | |
// Get the options and place them into their respective constants | |
$opts = getopt('c:dl:m:n:o:r:t:v', [ 'classes:', 'always-try-diff', 'sig-len:', 'sig-match:', 'new:', 'old:', 'sig-occr:', 'tolerance:', 'verbose' ]); | |
define('VERBOSE', array_key_exists('v', $opts) || array_key_exists('verbose', $opts)); | |
println('ProFinder v1.0'); | |
verbose(' with PHP %s', PHP_VERSION); | |
println('-------------'); | |
$oldClasses = explode(',', | |
getSingleOpt($opts, 'c', 'classes', 'Specify a comma separated class list with -c or --classes')); | |
define('OLD_PATH', getSingleOpt($opts, 'o', 'old', 'Specify an old smali directory with -o or --old')); | |
define('NEW_PATH', getSingleOpt($opts, 'n', 'new', 'Specify a new smali directory with -n or --new')); | |
define('LINE_TOLERANCE', (int)getSingleOpt($opts, 't', 'tolerance', false, 150)); | |
define('SIGNATURE_STR_MIN_LENGTH', (int)getSingleOpt($opts, 'l', 'sig-len', false, 3)); | |
define('SIGNATURE_STR_MIN_OCCURRENCE', (int)getSingleOpt($opts, 'r', 'sig-occr', false, 3)); | |
define('SIGNATURE_STR_MATCH_THRESHOLD', (double)getSingleOpt($opts, 'm', 'sig-match', false, 0.70)); | |
define('ALWAYS_TRY_DIFF', array_key_exists('d', $opts) || array_key_exists('always-try-diff', $opts)); | |
// Check the config constants for proper values or exit | |
sanityCheck(); | |
// Get the smali statistics for both our old and new smali directories | |
verbose('Calculating old smali statistics...'); | |
$oldSmaliStats = getSmaliStats(OLD_PATH); | |
verbose('Calculating new smali statistics...'); | |
$newSmaliStats = getSmaliStats(NEW_PATH); | |
verbose('%d old classes, %d new classes', count($oldSmaliStats), count($newSmaliStats)); | |
// Iterate over each old class we need to find in the new smali directory | |
foreach($oldClasses as $class) { | |
verbose('-------------'); | |
verbose('Finding potential matches for %s...', $class); | |
// Create the fully qualified old class file path from just the name | |
$classPath = OLD_PATH . DIRECTORY_SEPARATOR . $class . '.smali'; | |
// A shortcut to the statistics for the old class | |
$target = $oldSmaliStats[$classPath]; | |
// Arrays to store the matches for both methods | |
$sigMatches = []; | |
$diffMatches = []; | |
// Always try the signature string method first. It's much faster than the diff method. | |
$totalSigStrings = count($target[1]); | |
verbose('This class has %d signature strings.', $totalSigStrings); | |
// Only attempt the signature string method if we have enough signatures worth comparing | |
if($totalSigStrings >= SIGNATURE_STR_MIN_OCCURRENCE) { | |
verbose('Finding classes with matching signature strings...'); | |
$sigMatches = getSigMatches($target[1], $newSmaliStats); | |
$totalSigMatches = count($sigMatches); | |
// Should never result in a divide-by-zero since sanityCheck confirms SIGNATURE_STR_MIN_OCCURRENCE > 0 | |
$sigMatchPct = $sigMatches[0][1] / $totalSigStrings; | |
verbose('Found %d classes with at least one common signature.', $totalSigMatches); | |
// Have we found enough common strings to be considered a match? | |
if($sigMatchPct >= SIGNATURE_STR_MATCH_THRESHOLD) { | |
println('%s -> %s', $class, spliceClassPath($sigMatches[0][0])); | |
verbose(' - %.2f%% signature match', $sigMatchPct * 100.00); | |
if($totalSigMatches > 1) { | |
// List the second best match since it might be useful if our best match is incorrect | |
verbose(' - Next best match is %s with a %.2f%% signature match.', spliceClassPath($sigMatches[1][0]), $sigMatches[1][1] / $totalSigStrings * 100.00); | |
} | |
// Successful match. If ALWAYS_TRY_DIFF is false we will move onto the next old class | |
if(!ALWAYS_TRY_DIFF) { | |
continue; | |
} | |
} elseif($totalSigMatches > 0) { | |
// No new files meet the match threshold. Output the closest match anyway assuming one exists. | |
verbose('Closest match is %s with %d intersecting signatures.', spliceClassPath($sigMatches[0][0]), $sigMatches[0][1]); | |
} | |
} | |
// If we've made it this far either ALWAYS_TRY_DIFF is true or we couldn't find a good signature match | |
// Apply the diff | wc -l method | |
verbose('Finding syntactically similar classes...'); | |
$diffMatches = getDiffMatches($classPath, $target[0], $newSmaliStats); | |
$totalDiffMatches = count($diffMatches); | |
verbose('%d classes fall within the line tolerance.', $totalDiffMatches); | |
if($totalDiffMatches > 0) { | |
println('%s -> %s', $class, spliceClassPath($diffMatches[0][0])); | |
verbose(' - %d differences.', $diffMatches[0][1]); | |
if (count($diffMatches) > 1) { | |
verbose(' - Next best match is %s with %d differences.', spliceClassPath($diffMatches[1][0]), $diffMatches[1][1]); | |
} | |
} else { | |
// ¯\_(ツ)_/¯ | |
println('No matching class could be located.'); | |
} | |
} | |
// Get a (desc) sorted list of intersecting signature string matches for a given list of signature strings | |
function getSigMatches($targetSigs, $newSmaliStats) { | |
$sigMatches = []; | |
foreach($newSmaliStats as $path => $stats) { | |
// Find the number of common signature strings between the old and the new | |
$intersections = count(array_intersect($targetSigs, $stats[1])); | |
if($intersections > 0) { | |
$sigMatches[] = [$path, $intersections]; | |
} | |
} | |
if(count($sigMatches) > 0) { | |
// Sort the matches in descending order so the first index will be the best match (most shared signatures) | |
usort($sigMatches, function($a, $b) { | |
if($a[1] == $b[1]) { | |
return 0; | |
} | |
return ($a[1] > $b[1]) ? -1 : 1; | |
}); | |
} | |
return $sigMatches; | |
} | |
// Get a (asc) sorted list of classes that are within the line tolerance of the input along with the diff lines | |
function getDiffMatches($targetPath, $targetLines, $newSmaliStats) { | |
$diffMatches = []; | |
foreach($newSmaliStats as $path => $stats) { | |
// Ignore this file if it is more than LINE_TOLERANCE larger or less than LINE_TOLERANCE smaller than the old | |
if($stats[0] > $targetLines + LINE_TOLERANCE || $stats[0] < $targetLines - LINE_TOLERANCE) { | |
continue; | |
} | |
$diffMatches[] = [ | |
$path, | |
// The number of line-by-line differences between the old smali and the new smali | |
(int)trim(shell_exec('diff -y --suppress-common-lines ' . escapeshellarg($targetPath) . ' ' . escapeshellarg($path) . ' | wc -l')) | |
]; | |
} | |
if(count($diffMatches) > 0) { | |
// Sort the matches in ascending order so the first index will be the best match (fewest diffs) | |
usort($diffMatches, function($a, $b) { | |
if($a[1] == $b[1]) { | |
return 0; | |
} | |
return ($a[1] < $b[1]) ? -1 : 1; | |
}); | |
} | |
return $diffMatches; | |
} | |
// Recursively calculate the statistics needed to perform the class matching analysis | |
function getSmaliStats($path) { | |
$stats = []; | |
$dir = new RecursiveDirectoryIterator($path); | |
// Recursively loop over every file | |
foreach(new RecursiveIteratorIterator($dir) as $file) { | |
/** @var SplFileInfo $file */ | |
// Not .smali? Not interested. | |
if($file->getExtension() !== 'smali') { | |
continue; | |
} | |
// Open the file and read all of it to memory | |
$handle = $file->openFile(); | |
$contents = $handle->fread($handle->getSize()); | |
// Close file | |
$handle = null; | |
// Use the pathname as the index since it should be unique and conveniently contains the class name | |
$stats[$file->getPathname()] = [ | |
// [0] = number of lines in the file | |
substr_count($contents, "\n"), | |
// [1] = array of signature strings found in the file | |
getSigStrings($contents) | |
]; | |
} | |
return $stats; | |
} | |
// Use a regular expression to get an array of signature strings from the given smali contents | |
function getSigStrings($contents) { | |
if(preg_match_all('/^\\s*const-string .+, "(.{'.SIGNATURE_STR_MIN_LENGTH.',})"$/m', $contents, $matches)) { | |
return $matches[1]; | |
} | |
return []; | |
} | |
// muh convenience method | |
function println($format) { | |
$args = func_get_args(); | |
$args[0] .= "\n"; | |
call_user_func_array('printf', $args); | |
} | |
// Output only if verbose mode is enabled | |
function verbose($format) { | |
if(VERBOSE) { | |
call_user_func_array('println', func_get_args()); | |
} | |
} | |
// Extract a single option and return it, exit or return a given default value if the option is unavailable | |
function getSingleOpt($options, $short, $long, $die = false, $default = null) { | |
if(array_key_exists($short, $options)) { | |
return $options[$short]; | |
} | |
if(array_key_exists($long, $options)) { | |
return $options[$long]; | |
} | |
if($die !== false) { | |
println($die); | |
exit(1); | |
} | |
return $default; | |
} | |
// Splice out the fully qualified class name from the smali file path | |
function spliceClassPath($class) { | |
return substr($class, strlen(NEW_PATH) + 1, -6); | |
} | |
// Check our runtime configuration constants and exit if they're invalid | |
function sanityCheck() { | |
if(!is_dir(OLD_PATH)) { | |
println('The old smali path is not a directory.'); | |
} | |
elseif(!is_dir(NEW_PATH)) { | |
println('The new smali path is not a directory.'); | |
} | |
elseif(LINE_TOLERANCE < 0) { | |
println('The line tolerance must be a non-negative whole number.'); | |
} | |
elseif(SIGNATURE_STR_MIN_LENGTH < 1) { | |
println('The minimum signature string length must be greater than zero.'); | |
} | |
elseif(SIGNATURE_STR_MIN_OCCURRENCE < 1) { | |
println('The minimum signature occurrence must be greater than zero.'); | |
} | |
elseif(SIGNATURE_STR_MATCH_THRESHOLD > 1 || SIGNATURE_STR_MATCH_THRESHOLD < 0) { | |
println('The minimum signature match threshold must be from 0 (0%%) to 1 (100%%) in decimal form.'); | |
} | |
else { | |
return; | |
} | |
exit(1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment