Created
March 1, 2014 17:00
-
-
Save Danack/9293028 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
define('BYTE_SAFE_PHRASE', 'byte safe'); | |
define('PATH_TO_ROOT', '../'); | |
// I always close off my php files with "\?\>" so as to detect accidental truncations | |
// Other people seem to leave that off, so only check certain directories for | |
// missing "\?\>" at the end of files. | |
$directoriesToCheck = array( | |
PATH_TO_ROOT.'basereality', | |
PATH_TO_ROOT.'conf', | |
PATH_TO_ROOT.'php_shared', | |
PATH_TO_ROOT.'src', | |
PATH_TO_ROOT.'templates', | |
PATH_TO_ROOT.'tools' | |
); | |
$GLOBALS['errorInfoArray'] = array(); | |
$GLOBALS['unsafeFunctionCount'] = 0; | |
//Map of unsafe functions with their mb_safe equivalent, or null if there is none. | |
$GLOBALS['unsafeFunctionsReplaceMap'] = array( | |
'mail' => 'mb_send_mail', | |
'split' => null, //'mb_split', deprecated function - just don't use it | |
//'' => 'mb_strcut', no direct non-mb equivalent | |
//'' => 'mb_strimwidth', no direct non-mb equivalent | |
'stripos' => 'mb_stripos', | |
'stristr' => 'mb_stristr', | |
'strlen' => 'mb_strlen', | |
'strpos' => 'mb_strpos', | |
'strrpos' => 'mb_strrpos', | |
'strrchr' => 'mb_strrchr', | |
'strripos' => 'mb_strripos', | |
'strstr' => 'mb_strstr', | |
'strtolower' => 'mb_strtolower', | |
'strtoupper' => 'mb_strtoupper', | |
//'' => 'mb_strwidth', no direct non-mb equivalent | |
//'' => 'mb_substitute_character', no direct non-mb equivalent | |
'substr_count' => 'mb_substr_count', | |
'substr' => 'mb_substr', | |
'str_ireplace' => null, | |
'str_split' => 'mb_str_split', | |
'strcasecmp' => 'mb_strcasecmp', | |
'strcspn' => null, //TODO - implement alternative | |
//'stristr' => null, //TODO - implement alternative | |
//'strrev' => 'mb_strrev', //TODO - check this works | |
'strspn' => null, //TODO - implement alternative | |
'substr_replace' => 'mb_substr_replace', | |
'lcfirst' => null, | |
'ucfirst' => 'mb_ucfirst', | |
'ucwords' => 'mb_ucwords', | |
'wordwrap' => null, | |
); | |
//******************************************* | |
//Start of tool | |
//******************************************* | |
try{ | |
checkFiles($directoriesToCheck); | |
} | |
catch(Exception $e){ | |
echo "Boom ".$e->getMessage(); | |
} | |
exit(0); | |
//******************************************* | |
//End of tool | |
//******************************************* | |
function checkFiles($directoriesToCheck){ | |
foreach ($directoriesToCheck as $directoryToCheckForBOM) { | |
scan_directory_recursively($directoryToCheckForBOM, 'checkFile'); | |
} | |
echo "File checking complete\r\n"; | |
$numberOfIssues = count($GLOBALS['errorInfoArray']); | |
if ($GLOBALS['unsafeFunctionCount'] > 0) { | |
echo "There are ".$GLOBALS['unsafeFunctionCount']." unsafe functions to refactor."; | |
} | |
if($numberOfIssues > 0){ | |
echo "There were $numberOfIssues in total.\r\n"; | |
} | |
if ($numberOfIssues == 0 && $GLOBALS['unsafeFunctionCount'] == 0) { | |
echo "No issues were detected."; | |
} | |
} | |
function checkFile($nextFile){ | |
$phpExtensionsToCheck = array( | |
'php',// => TRUE, | |
'inc',// => TRUE, | |
); | |
$extensionsToCheck = array( | |
'css', | |
'js', | |
'php', | |
'inc', | |
'tpl', | |
'html', | |
); | |
$isPHPFile = false; | |
if(in_array($nextFile['extension'], $phpExtensionsToCheck) == true) { | |
$isPHPFile = true; | |
} | |
if (in_array($nextFile['extension'], $extensionsToCheck) == true) { | |
checkForBom($nextFile['path'], false);//$isPHPFile); | |
checkFileForNonUTF8($nextFile['path']); | |
if ($isPHPFile) { | |
checkForUnsafeFunctions($nextFile['path']); | |
} | |
} | |
} | |
function scan_directory_recursively($directory, callable $function = null){ | |
$directory_tree = array(); | |
$filter = false; | |
// if the path has a slash at the end we remove it here | |
if(mb_substr($directory,-1) == '/') | |
{ | |
$directory = mb_substr($directory,0,-1); | |
} | |
// if the path is not valid or is not a directory ... | |
if(!file_exists($directory) || !is_dir($directory)) | |
{ | |
// ... we return false and exit the function | |
return false; | |
// ... else if the path is readable | |
}elseif(is_readable($directory)) | |
{ | |
// we open the directory | |
$directory_list = opendir($directory); | |
// and scan through the items inside | |
while (false !== ($file = readdir($directory_list))) | |
{ | |
// if the filepointer is not the current directory | |
// or the parent directory | |
if($file != '.' && $file != '..') | |
{ | |
// we build the new path to scan | |
$path = $directory.'/'.$file; | |
// if the path is readable | |
if(is_readable($path)) | |
{ | |
// we split the new path by directories | |
$subdirectories = explode('/',$path); | |
// if the new path is a directory | |
if(is_dir($path)) | |
{ | |
// add the directory details to the file list | |
$directory_tree[] = array( | |
'path' => $path, | |
'name' => end($subdirectories), | |
'kind' => 'directory', | |
// we scan the new path by calling this function | |
'content' => scan_directory_recursively($path, $function)); | |
// if the new path is a file | |
} | |
elseif(is_file($path)) | |
{ | |
// get the file extension by taking everything after the last dot | |
$partsArray = explode('.',end($subdirectories)); | |
$extension = end($partsArray); | |
// if there is no filter set or the filter is set and matches | |
if($filter === false || $filter == $extension) | |
{ | |
$nextFile = array( | |
'path' => $path, | |
'name' => end($subdirectories), | |
'extension' => $extension, | |
'size' => filesize($path), | |
'kind' => 'file' | |
); | |
if($function != false){ | |
$function($nextFile); | |
} | |
$directory_tree[] = $nextFile; // add the file details to the file list | |
} | |
} | |
} | |
} | |
} | |
// close the directory | |
closedir($directory_list); | |
// return file list | |
return $directory_tree; | |
// if the path is not readable ... | |
}else{ | |
// ... we return false | |
return false; | |
} | |
} | |
function checkForBom($filePath, $checkForEnd = false){ | |
// $checkForEnd = false; | |
// foreach($GLOBALS['directoriesToCheckForEnd'] as $directoryToCheckForEnd){ | |
// if(mb_strpos($filePath, 'php_shared') !== false){ | |
// $checkForEnd = true; | |
// } | |
// } | |
$handle = fopen($filePath, "r"); | |
$byteOne = fread($handle, 1); | |
$byteEnd = false; | |
if($checkForEnd == true){ | |
fseek($handle, -1, SEEK_END); | |
$byteEnd = fread($handle, 1); | |
} | |
fclose($handle); | |
if(ord($byteOne) == 0xef ){ | |
echo "Error - php file ".$filePath." has bom!\r\n"; | |
} | |
if($checkForEnd == true){ | |
if($byteOne != '<' ){ | |
$errorString = "php file ".$filePath." doesn't start with <, which is weird.\r\n"; | |
echo $errorString; | |
$GLOBALS['errorInfoArray'][] = $errorString; | |
} | |
if($checkForEnd == true){ | |
if($byteEnd != '>' ){ | |
$errorString = "php file ".$filePath." ends with $byteEnd instead of >, which is weird.\r\n"; | |
echo $errorString; | |
$GLOBALS['errorInfoArray'][] = $errorString; | |
} | |
} | |
} | |
} | |
/** | |
* Scan a file for non-UTF8 characters. | |
* @param $filename | |
*/ | |
function checkFileForNonUTF8($filename){ | |
$fileLines = file($filename); | |
$line = 1; | |
if($fileLines == false){ | |
echo "Failed to open file [$filename] for checking for ascii only text aborting."; | |
exit(0); | |
} | |
$count = 0; | |
foreach($fileLines as $fileLine){ | |
//captures non-utf8 chars only in the capturing group 2 | |
$regex = '/([\x00-\x7F] | | |
[\xC0-\xDF][\x80-\xBF] | | |
[\xE0-\xEF][\x80-\xBF]{2} | | |
[\xF0-\xF7][\x80-\xBF]{3})* | | |
(.*?) | | |
([\x00-\x7F] | | |
[\xC0-\xDF][\x80-\xBF] | | |
[\xE0-\xEF][\x80-\xBF]{2} | | |
[\xF0-\xF7][\x80-\xBF]{3})* | |
/xU'; //Ungreedy flag is needed to not crash on long lines | |
$result = preg_replace($regex, '$2', $fileLine); | |
$result = trim($result); | |
if(mb_strlen($result) > 0){ | |
$letter = mb_strpos($fileLine, $result); | |
//$letter is ambiguous with regard to column. Non-utf8 chars confuse the columns in a document | |
$errorString = "Non-utf8 character [$result] at line $line, letter count = $letter.\r\n"; | |
echo $errorString; | |
$GLOBALS['errorInfoArray'][] = $errorString; | |
$count++; | |
} | |
$line++; | |
} | |
} | |
/** | |
* Check whether a file contains any multi-byte character dangerous functions | |
* If the line where the function call occurs has 'BYTE_SAFE_PHRASE' on it, no | |
* error will be reported. | |
* @param $filePath | |
*/ | |
function checkForUnsafeFunctions($filePath) { | |
$functionRegexString = implode('|', array_keys($GLOBALS['unsafeFunctionsReplaceMap'])); | |
$regex = '/[^_\w\']('.$functionRegexString.'){1,1}(?:\w)?\(/xu'; | |
$fileLines = file($filePath); | |
$line = 1; | |
if($fileLines == false){ | |
echo "Failed to open file [$filePath] for checking for ascii only text aborting."; | |
exit(0); | |
} | |
foreach($fileLines as $fileLine){ | |
$matches = array(); | |
$result = preg_match($regex, $fileLine, $matches); | |
if ($result){ | |
if (mb_stripos($fileLine, BYTE_SAFE_PHRASE) === false) { | |
$functionFound = $matches[1]; | |
echo "Unsafe function [$functionFound] detected on line $line in file $filePath\n"; | |
$replacement = $GLOBALS['unsafeFunctionsReplaceMap'][$functionFound]; | |
if ($replacement != null) { | |
echo "Please replace with $replacement \n"; | |
$GLOBALS['unsafeFunctionCount']++; | |
} | |
} | |
} | |
$line++; | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment