Created
April 10, 2019 07:28
-
-
Save afbora/97c8dfee5d7f5c22e9a2192599975ccc to your computer and use it in GitHub Desktop.
Script to get list of files and file comparison
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Script to get list of files and file comparison | |
* | |
* @version 2014.04.28 | |
* @author Fedik <getthesite at gmail dot com> | |
* @link http://www.getsite.org.ua | |
* @license GNU/GPL http://www.gnu.org/licenses/gpl.html | |
*/ | |
// Enable error reaporting, in case fault | |
error_reporting(E_ALL); | |
ini_set('display_errors', 'On'); | |
//base options | |
//The path of the folder to read. | |
$path = '.'; | |
//True to recursively search into sub-folders, or an integer to specify the maximum depth. | |
$recurse = true; | |
//A Regexp filter for allowed file names. '/./' - any | |
$filter_allow = '/./'; | |
//Regexp of files to exclude, have biger priority than $filter_allow | |
$filter_exclude = '/(\.gif$|\.jpg$|\.jpeg$|\.png$)/i'; | |
//Regexp of path for exclude, have biger priority than $filter_exclude | |
$filter_exclude_path = '/(\/\.svn|\/\.git|\/\CVS|\/\__MACOSX)/'; | |
//True to read the files, false to read the folders only | |
$findfiles = true; | |
//Scip folders from result | |
$scipfolders = true; | |
// Whether try find the removed files when map comparasion | |
$findremoved = true; | |
// Method for count HASH of each file, for compare in future | |
// Important for make $findremoved work, but can cause a error on big files | |
// Possible values: 'md5', 'sha256', 'haval160,4', 'sha1', 'crc32' ... | |
// see http://www.php.net/manual/en/function.hash-algos.php | |
// false - for disable, | |
$counthash = 'md5'; | |
// Allow file download | |
$download = true; | |
// base name for a map file | |
$map_file_name = 'files_map'; | |
// The date formating in the table | |
$date_format = 'Y-m-d H:i:s'; | |
// How much show the files on the page | |
$pager_limit = 80; | |
//profiling | |
$time_exec = array(); | |
$time_exec['start'] = getmicrotime(); | |
//utilite functions | |
/** | |
* Function to read the files/folders in a folder. | |
* @return array Files. | |
*/ | |
function getItems( | |
$path, //The path of the folder to read. | |
$recurse = true, //True to recursively search into sub-folders, or an integer to specify the maximum depth. | |
$filter_allow = '/./', //A filter for file names. | |
$filter_exclude = '', //Regexp of files to exclude | |
$filter_exclude_path = '', //Regexp of path to exclude | |
$findfiles = true, //True to read the files, false to read the folders | |
$scipfolders = true, //Scip folders from result | |
$counthash = false // Method used for count file hash, false - for disable | |
){ | |
//@set_time_limit(ini_get('max_execution_time')); | |
// Is the path a folder? | |
if (!is_dir($path)){ | |
echo '<b>No valid folder path</b>'; | |
return false; | |
} | |
// use RecursiveDirectoryIterator where it possible, php 5.2.x partiall suport | |
if (class_exists('RecursiveDirectoryIterator')) {//version_compare(PHP_VERSION, '5.3.0', 'ge') | |
return _getItemsDirectoryIterator($path, $recurse, $filter_allow, | |
$filter_exclude, $filter_exclude_path, $findfiles, $scipfolders, $counthash); | |
} | |
$arr = array(); | |
// Read the source directory | |
if (!($handle = @opendir($path))){ | |
return $arr; | |
} | |
while (($file = readdir($handle)) !== false){ | |
// Compute the fullpath | |
$fullpath = $path . '/' . $file; | |
// Compute the isDir flag | |
$isDir = is_dir($fullpath); | |
if ($file != '.' && $file != '..' | |
&& (empty($filter_exclude_path) || !preg_match($filter_exclude_path, $fullpath)) | |
&& (empty($filter_exclude) || !preg_match($filter_exclude, $file)) | |
){ | |
if ((($isDir && !$scipfolders) || ($findfiles && !$isDir)) | |
&& (empty($filter_allow) || preg_match($filter_allow, $file)) | |
){ | |
$about_file = array( | |
'path' => '', | |
'filename' => '', | |
'ext' => '', | |
'size' => 0, //size in bytes | |
'type' => '', //file, folder, link | |
'mtime' => '', //time of last modification (Unix timestamp) | |
'ctime' => '', //time of last inode change (Unix timestamp) | |
'mode' => '', //inode protection mode, permissions , base_convert($file_info['mode'],10,8); | |
'hash' => '', | |
'state' => '',//1.same;2.changed;3.new;4.removed | |
); | |
$about_file['path'] = $fullpath; | |
$about_file['filename'] = pathinfo($file, PATHINFO_FILENAME); | |
$about_file['ext'] = pathinfo($file, PATHINFO_EXTENSION); | |
$about_file['type'] = filetype($fullpath); | |
if($about_file['type'] != 'link'){ | |
$stat = stat($fullpath); | |
$about_file['size'] = $stat['size']; | |
$about_file['mtime'] = $stat['mtime']; | |
$about_file['ctime'] = $stat['ctime']; | |
$about_file['mode'] = $stat['mode']; | |
} | |
if (is_readable($fullpath)){ | |
$about_file['hash'] = $counthash ? hash_file($counthash, $fullpath) : ''; | |
} else { | |
//echo 'File not readable: '.$fullpath.'<br />'; | |
$about_file['hash'] = 'Not Readable!'; | |
} | |
$arr[$fullpath] = $about_file; | |
} | |
// Search recursively | |
if ($isDir && $recurse){ | |
if (is_int($recurse)){ | |
// Until depth 0 is reached | |
$arr = array_merge($arr, getItems($fullpath, $recurse - 1, $filter_allow, | |
$filter_exclude, $filter_exclude_path, $findfiles, $scipfolders, $counthash)); | |
}else{ | |
$arr = array_merge($arr, getItems($fullpath, $recurse, $filter_allow, | |
$filter_exclude, $filter_exclude_path, $findfiles, $scipfolders, $counthash)); | |
} | |
} | |
} | |
} | |
closedir($handle); | |
return $arr; | |
} | |
function _getItemsDirectoryIterator($path, $recurse, $filter_allow, | |
$filter_exclude, $filter_exclude_path, $findfiles, $scipfolders, $counthash) { | |
$arr = array(); | |
$dir_it = new RecursiveDirectoryIterator($path); | |
$dir_it = new RecursiveIteratorIterator($dir_it, | |
RecursiveIteratorIterator::SELF_FIRST , RecursiveIteratorIterator::CATCH_GET_CHILD); | |
//Apply $filter_allow | |
//RegexIterator not always available .. php > 5.2 | |
//if($filter_allow) { | |
//$dir_it = new RegexIterator($dir_it, $filter_allow, RegexIterator::MATCH); | |
//} | |
if(!$recurse) { | |
$dir_it->setMaxDepth(0); | |
} | |
foreach ($dir_it as $fullpath => $fileinfo) { | |
$file = pathinfo($fullpath); | |
if ($file['basename'] == '.' || $file['basename'] == '..' | |
|| (!$findfiles && $fileinfo->isFile()) | |
|| ($scipfolders && $fileinfo->isDir()) | |
|| (!empty($filter_allow) && !preg_match($filter_allow, $file['basename'])) | |
|| (!empty($filter_exclude_path) && preg_match($filter_exclude_path, $fullpath)) | |
|| (!empty($filter_exclude) && preg_match($filter_exclude, $file['basename'])) | |
) { | |
continue; | |
} | |
$about_file = array( | |
'path' => '', | |
'filename' => '', | |
'ext' => '', | |
'size' => 0, //size in bytes | |
'type' => '', //file, folder, link | |
'mtime' => '', //time of last modification (Unix timestamp) | |
'ctime' => '', //time of last inode change (Unix timestamp) | |
'mode' => '', //inode protection mode, permissions , base_convert($file_info['mode'],10,8); | |
'hash' => '', | |
'state' => '',//1.same;2.changed;3.new;4.removed | |
); | |
$about_file['path'] = $fullpath; | |
$about_file['filename'] = $file['filename']; | |
$about_file['ext'] = isset($file['extension']) ? $file['extension'] : ''; | |
$about_file['type'] = $fileinfo->getType(); | |
if($about_file['type'] != 'link'){ | |
$about_file['size'] = $fileinfo->getSize(); | |
$about_file['mtime'] = $fileinfo->getMTime(); | |
$about_file['ctime'] = $fileinfo->getCTime(); | |
$about_file['mode'] = $fileinfo->getPerms(); | |
} | |
if ($fileinfo->isReadable()){ | |
$about_file['hash'] = $counthash ? hash_file($counthash, $fullpath) : ''; | |
} else { | |
//echo 'File not readable: '.$fullpath.'<br />'; | |
$about_file['hash'] = 'Not Readable!'; | |
} | |
$arr[$fullpath] = $about_file; | |
} | |
return $arr; | |
} | |
/** | |
* Rename file | |
* @param $file | |
*/ | |
function renameOldFile($file_name, $path){ | |
$file = $file_name . '.map'; | |
if (!is_file($file)) { | |
return; | |
} | |
$new_name = $file_name .'_' . date('Y-m-d_H.i.s', filectime($file)).'-'. base64_encode($path) .'.map'; | |
return rename($file, $new_name); | |
} | |
/** | |
* Function for sorting files/folders by on of they properties. | |
* @return array sorted Files. | |
*/ | |
function itemsSort( | |
$files, //files Array | |
$key, // key for sorting | |
$dir = SORT_DESC // sorting direction SORT_DESC, SORT_ASC | |
) { | |
$sort_arr = array(); | |
foreach ($files as $k => $f) { | |
$sort_arr[$k] = $f[$key]; | |
} | |
array_multisort($sort_arr, $dir, $files); | |
return $files; | |
} | |
/** | |
* format bytes | |
* @return string | |
*/ | |
function formatBytes($bytes, $precision = 2) { | |
$units = array('B', 'KB', 'MB', 'GB', 'TB'); | |
$bytes = max($bytes, 0); | |
$pow = floor(($bytes ? log($bytes) : 0) / log(1024)); | |
$pow = min($pow, count($units) - 1); | |
// Uncomment one of the following alternatives | |
$bytes /= pow(1024, $pow); | |
//$bytes /= (1 << (10 * $pow)); | |
return round($bytes, $precision) . ' ' . $units[$pow]; | |
} | |
/** | |
* State format | |
* 1 = same, 2 = changed, 3 = new, 4 = removed | |
* @return HTML string | |
*/ | |
function stateFormat($state) { | |
$title = 'unknown'; | |
switch ($state){ | |
case 1: | |
$title = 'same'; | |
break; | |
case 2: | |
$title = 'changed'; | |
break; | |
case 3: | |
$title = 'new'; | |
break; | |
case 4: | |
$title = 'removed'; | |
break; | |
} | |
return '<span class="'.$title.'">'.$title.'</span>'; | |
} | |
/** | |
* Link to file, for allow download | |
* @param string $full_path path to the file | |
* @return string HTML link | |
*/ | |
function linkToFile($full_path){ | |
return '<a href="?download='. base64_encode($full_path) . '" target="_blank">' | |
. $full_path . '</a>'; | |
} | |
/** | |
* Download file method | |
* @param string base64_encode file path | |
*/ | |
function downloadFile($file){ | |
$file_path = realpath(base64_decode($file)); | |
if(!$file_path || !is_file($file_path) || !is_readable($file_path)){ | |
echo 'File "'.$file_path.'" not available!'; | |
exit; | |
} | |
//mime | |
$finfo = finfo_open(FILEINFO_MIME_TYPE); | |
$mime = finfo_file($finfo, $file_path); | |
//send to user | |
header('Content-Description: File Transfer'); | |
header('Content-Type: '.$mime); | |
header('Content-Disposition: attachment; filename="'.pathinfo($file_path, PATHINFO_BASENAME).'"'); | |
header('Content-Transfer-Encoding: binary'); | |
header('Expires: 0'); | |
header('Cache-Control: must-revalidate'); | |
header('Pragma: public'); | |
header('Content-Length: ' . filesize($file_path)); | |
ob_clean(); | |
flush(); | |
readfile($file_path); | |
exit; | |
} | |
/** | |
* build query | |
* @return new query array or query string | |
*/ | |
function buildUrlQuery($query, $build_query = false) { | |
$old_query = !empty($_GET) ? $_GET : array(); | |
foreach ($query as $k => $v){ | |
$old_query[$k] = $v; | |
} | |
return $build_query ? http_build_query($old_query) : $old_query; | |
} | |
/** | |
* Get the current time. | |
* @return float The current time | |
*/ | |
function getmicrotime(){ | |
list ($usec, $sec) = explode(' ', microtime()); | |
return ((float) $usec + (float) $sec); | |
} | |
// check download request | |
if(!empty($_GET['download']) && $download){ | |
downloadFile($_GET['download']); | |
} | |
// init variables | |
$files = array(); | |
$map_file = $map_file_name . '.map'; | |
$map_file_current = empty($_GET['filelist']) ? $map_file : $_GET['filelist'].'.map'; | |
$scan = isset($_GET['scan']); | |
$hashcompare = empty($_GET['hashcompare'])? false : $_GET['hashcompare'] .'.map'; | |
$sort_by = empty($_GET['sort']) ? 'path' : $_GET['sort']; | |
$sort_dir = empty($_GET['dir']) || $_GET['dir'] == 'asc' ? SORT_ASC : SORT_DESC ; | |
$page = empty($_GET['page']) || $_GET['page'] == 1 ? 0 : (int) ($_GET['page'] - 1); | |
$table = ''; | |
$stored_maps = ''; | |
$pager = ''; | |
$table_head = array( | |
'path' => 'Path', | |
'filename' => 'File Name', | |
'ext' => 'Ext', | |
'size' => 'Size', | |
'type' => 'Type', | |
'mtime' => 'Last modification', | |
'ctime' => 'Last inode change', | |
'mode' => 'Permissions', | |
'state' => 'File state', | |
'hash' => 'Hash (' . (string) $counthash . ')', | |
); | |
//prepare base URL query | |
$_GET['filelist'] = empty($_GET['filelist']) ? $map_file_name : $_GET['filelist']; | |
if (isset($_GET['scan'])) { | |
unset($_GET['scan']); | |
} | |
if ($scan && isset($_GET['hashcompare'])) { | |
unset($_GET['hashcompare']); | |
$hashcompare = false; | |
} | |
//run | |
if(is_file($map_file_current) && !$scan && $stored_data = file_get_contents($map_file)){ | |
$files = unserialize($stored_data); | |
$time_exec['Open Stored'] = getmicrotime(); | |
} elseif ($scan) { | |
$files = getItems($path, $recurse, $filter_allow, $filter_exclude, | |
$filter_exclude_path, $findfiles, $scipfolders, $counthash); | |
//keeep old file | |
renameOldFile($map_file_name, $path); | |
file_put_contents($map_file, serialize($files)); | |
$time_exec['Scan Files'] = getmicrotime(); | |
} | |
//'state' => 1.same, 2.changed, 3.new, 4.removed | |
if (!empty($files) && $hashcompare && $stored_map = file_get_contents($hashcompare)) { | |
$files_old = unserialize($stored_map); | |
//find changed and new | |
foreach ($files as $n => &$f) { | |
if (empty($files_old[$n])) { | |
$f['state'] = 3; | |
continue; | |
} | |
if ($f['hash'] != $files_old[$n]['hash']) { | |
$f['state'] = 2; | |
continue; | |
} | |
$f['state'] = 1; | |
} | |
unset($f); | |
//find removed | |
if ($findremoved) { | |
foreach ($files_old as $n => $f) { | |
if (empty($files[$n])) { | |
$files[$n] = $f; | |
$files[$n]['state'] = 4; | |
} | |
} | |
} | |
$time_exec['Comparison'] = getmicrotime(); | |
} | |
$files = itemsSort($files, $sort_by, $sort_dir); | |
$time_exec['Sorting'] = getmicrotime(); | |
$files_total = count($files); | |
//cut by pager | |
if ($pager_limit && $files_total > $pager_limit) { | |
$pager_start = $pager_limit * $page; | |
$files = array_slice($files, $pager_start, $pager_limit); | |
//build pager | |
$pages_total = round($files_total/$pager_limit, 0, PHP_ROUND_HALF_DOWN); | |
$pager .= '<a href="?'.buildUrlQuery(array('page' => 1), true).'"><< First</a> ... '; | |
//5 prev and 5 next | |
for($i = max(1, $page - 5); $i <= min($page + 5, $pages_total); $i++){ | |
$pager .= '<a href="?'.buildUrlQuery(array('page' => $i), true).'">'.$i.'</a>'; | |
} | |
$pager .= ' ... <a href="?'.buildUrlQuery(array('page' => $pages_total), true).'">Last >></a>'; | |
} | |
//render table | |
$table = '<table>'; | |
//head | |
$table .= '<thead><tr>'; | |
foreach ($table_head as $k => $h) { | |
$active = ($k == $sort_by); | |
$table .= '<th class="'.$k.'">' | |
. $h | |
. ' ' | |
. '<a href="?'.buildUrlQuery(array('sort' => $k, 'dir' => 'desc'), true).'">'.(($active && $sort_dir == SORT_DESC) ? '⬆' : '↑').'</a>'//desc | |
. ' ' | |
. '<a href="?'.buildUrlQuery(array('sort' => $k, 'dir' => 'asc'), true).'">'.(($active && $sort_dir == SORT_ASC) ? '⬇' : '↓') .'</a>'//asc | |
.'</th>'; | |
} | |
$table .= '</tr></thead>'; | |
if($files_total) { | |
//rows | |
$table .= '<tbody>'; | |
$columns = array_keys($table_head); | |
foreach ($files as $file) { | |
$table .= '<tr>'; | |
foreach ($columns as $col) { | |
switch ($col) { | |
case 'size': | |
$table .= '<td>'.formatBytes($file[$col]).'</td>'; | |
break; | |
case 'mtime': | |
case 'ctime': | |
$table .= '<td>'. ($file[$col] ? date($date_format, $file[$col]) : '') .'</td>'; | |
break; | |
case 'mode': | |
$table .= '<td>'.base_convert($file[$col],10,8).'</td>'; | |
break; | |
case 'state': | |
$table .= '<td>'.stateFormat($file[$col]).'</td>'; | |
break; | |
case 'path': | |
$table .= '<td>'. ( $download ? linkToFile($file[$col]) : $file[$col]) .'</td>'; | |
break; | |
default: | |
$table .= '<td>'.$file[$col].'</td>';; | |
break; | |
} | |
} | |
$table .= '</tr>'; | |
} | |
$table .= '</tbody>'; | |
} else { | |
$table .= '<tbody> | |
<tr> | |
<td colspan="'.count($table_head).'"> | |
<p>No filemap found. Please click <a href="?'. buildUrlQuery(array('scan' => true, 'hashcompare' => ''), true).'" title="click for scan">"Scan"</a></p> | |
</td> | |
</tr> | |
</tbody>'; | |
} | |
$table .= '</table>'; | |
//render stored map files list for hashcompare | |
$maps = getItems('.', false, '/('.base64_encode($path).'|'.$map_file_name.')\.map$/', null, null, true, true); | |
rsort($maps); | |
$tmp_arr = array(); | |
foreach ($maps as $map) { | |
$tmp_arr[] = '<li>'.$map['filename']. ' ' | |
.'<a href="?'.buildUrlQuery(array('sort' => 'state', 'dir' => 'desc', 'hashcompare' => $map['filename']), true).'">Compare</a> ' | |
.'<a href="?'.buildUrlQuery(array('sort' => 'path', 'dir' => 'desc', 'page' => 1, 'hashcompare' => '', 'filelist' => $map['filename']), true).'">Open</a>' | |
.'</li>'; | |
} | |
$stored_maps = '<ul>'.implode('', $tmp_arr).'</ul>'; | |
$time_exec['Render'] = getmicrotime(); | |
?> | |
<!doctype html> | |
<html lang="en-US"> | |
<head> | |
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
<title>Files list</title> | |
<style type="text/css"> | |
body { | |
font: 12px Helvetica, sans-serif; | |
background: #fff; | |
} | |
table { | |
width: 100%; | |
border: 1px solid grey; | |
border-spacing: 0; | |
border-collapse: collapse; | |
} | |
table th,table td { | |
text-align: left; | |
padding: 3px; | |
} | |
table th { | |
border: 1px solid grey; | |
background: #F2F2F2; | |
} | |
table th a {color: #000;} | |
table tr:nth-child(even) { | |
background: #F2F2F2; | |
} | |
table tr:hover{ | |
background: #EAF2D3; | |
} | |
.pager a{ | |
padding: 3px; | |
margin-right: 3px; | |
} | |
span.new, | |
span.changed, | |
span.removed {color: #bf1122;} | |
span.same{ color: #009159} | |
</style> | |
</head> | |
<body> | |
<div id="files-table"> | |
<p>Files total: <?php echo $files_total;?><br /> | |
Base path: <?php echo realpath($path);?><br /> | |
<a href="?<?php echo buildUrlQuery(array('scan' => true, 'hashcompare' => ''), true); ?>" title="click for scan again">Scan again</a></p> | |
<p>map file: <b><?php echo $map_file_current; ?></b> | |
<?php if ($hashcompare): ?> vs <b><?php echo $hashcompare; ?></b> <?php endif; ?> | |
</p> | |
<?php echo $table; ?> | |
</div> | |
<p class="pager"><?php echo $pager; ?></p> | |
<p> | |
Previously saved the map files:<br /> | |
<?php echo $stored_maps; ?> | |
</p> | |
<p> | |
<?php | |
//profiling info | |
foreach ($time_exec as $k => $t) { | |
if ($k == 'start'){ | |
$prev = $t; | |
continue; | |
} | |
echo 'Time <b>'. $k .'</b>: ' . round(($t - $prev), 6) .'<br />'; | |
$prev = $t; | |
} | |
echo 'Memory used: '. formatBytes(memory_get_usage()); | |
?> | |
</p> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment