Created
December 25, 2019 02:48
-
-
Save vijinho/09f7b542d2671bf9b67db3ef58e6767d to your computer and use it in GitHub Desktop.
Display transliterated files and folders to non-accented ASCII characters script - rename feature tested successfully with a filesystem of over 65,000 files - USE AT OWN RISK!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
// show and optionally rename files and dirs to transliterated unaccented ascii from current working folder | |
// 2019-12-24 Vijay Mahrra [email protected] | |
// code is public domain | |
define('DEBUG', true); // use pre-generated file list | |
define('VERBOSE', true); | |
define('RENAME', false); // allow renaming | |
define('RENAME_FILES', false); | |
define('RENAME_DIRS', false); | |
define('PRESERVE_TIME', true); // preserve earliest file time from m/c/atime | |
function output($message, $str = '', $date_format = 'Y-m-d h:i:s') { | |
if (is_array($message)) { | |
$message = print_r($message,1); | |
} | |
if (is_array($str)) { | |
$str = print_r($str,1); | |
} | |
fwrite(STDOUT, sprintf("\n%s\t%s", date($date_format), $message)); | |
if (!empty($str)) { | |
fwrite(STDOUT, "\n\t" . $str); | |
} | |
flush(); | |
} | |
// get minimum, earliest time of a file path | |
function filetime_min($path) { | |
if (!file_exists($path)) { | |
return false; | |
} | |
$atime = fileatime($path); | |
$mtime = filemtime($path); | |
$ctime = filectime($path); | |
return min($atime, $mtime, $ctime); | |
} | |
$include_paths = ['/data/','/media/', '/Users/', '/home/']; // ONLY process paths containing these strings | |
$ignore_paths = ['.debris', '/backup/']; // IGNORE paths containing these strings | |
if (DEBUG) { | |
output("Include Path: ", $include_paths); | |
output("Ignore Path: ", $ignore_paths); | |
} | |
//$tmpfile = 'files.txt'; // find . -type f > files.txt | |
if (!empty($tmpfile) && !file_exists($tmpfile)) { | |
output("File to process does not exist: $tmpfile"); | |
goto end; | |
} | |
// generate a tmp file to be deleted after script ends | |
$i = 0; | |
if (empty($tmpfile)) { | |
$delete_file = true; | |
// get a temporary filename to process all files in subfolders | |
$tmpfile = realpath(tempnam(sys_get_temp_dir(), 'php_transliterate_')); | |
if (DEBUG) | |
output("Created tmpfile: $tmpfile"); | |
$cmd = 'find ' . getcwd() . ' -type f > ' . realpath($tmpfile); | |
if (DEBUG) | |
output("Generating files list: $cmd"); | |
system($cmd); | |
if (0 == filesize($tmpfile)) { | |
if (DEBUG) { | |
output("Could not generate tmp file: $tmpfile\n"); | |
goto end; | |
} | |
} | |
} | |
// dirs and files to be renamed | |
$rename_dirs = []; | |
$rename_files = []; | |
$h = fopen($tmpfile, 'r'); // process line by line to avoid memory issues | |
while (($path = fgets($h)) !== false) { | |
$i++; | |
// check if the path should be ignored or not | |
$path = trim($path); | |
$path_ok = false; | |
foreach ($include_paths as $str) { | |
if (!empty($str) && strstr($path, $str) !== false) { | |
$path_ok = true; | |
break; | |
} | |
} | |
foreach ($ignore_paths as $str) { | |
if (!empty($str) && strstr($path, $str) !== false) { | |
$path_ok = false; | |
break; | |
} | |
} | |
if (!$path_ok) { | |
if (VERBOSE) { | |
output("Skipping:", $path); | |
} | |
continue; | |
} | |
// get the current dir and filename | |
if (DEBUG) { | |
if (VERBOSE) { | |
output("Checking:", $path); | |
} | |
} | |
$filename = basename($path); | |
$dir = substr($path, 0, strlen($path) - strlen($filename) - 1); | |
// check the dir has characters that can be transliterated, if so, add to list | |
$newdir = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $dir); | |
if (!empty($newdir) && $newdir !== $dir && !array_key_exists($newdir, $rename_dirs)) { | |
if (VERBOSE) | |
output("Will transliterate directory: $dir"); | |
$rename_dirs[iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', $newdir)] = $dir; | |
} | |
// if the filename can be transliterated, add to list | |
$newfilename = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $filename); | |
if (!empty($newfilename) && $newfilename !== $filename) { | |
if (VERBOSE) | |
output("Will transliterate file: $filename"); | |
$rename_files[$path] = $dir . '/' . iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', $newfilename); | |
} | |
} | |
fclose($h); | |
// list files normalized | |
echo "\n"; | |
foreach ($rename_files as $path => $newpath) { | |
printf("\n%s\n%s\t\n%s\t\n", $path, basename($path), basename($newpath)); | |
if (RENAME && RENAME_FILES) { // && file_exists($path) && is_file($path) | |
$ts = filetime_min($path); | |
if (!rename($path, $newpath)) { | |
if (VERBOSE) | |
output("Could not rename file!\n\t$path\n\t$newpath"); | |
} else if (file_exists($newpath) && PRESERVE_TIME) { | |
touch($newpath, $ts, $ts); | |
} | |
} | |
} | |
echo "\n"; | |
$renames = []; | |
foreach ($rename_dirs as $dest => $src) { | |
$src_parts = (preg_split('/\//', $src)); | |
$dest_parts = (preg_split('/\//', $dest)); | |
foreach ($src_parts as $i => $part) { | |
if ($src_parts[$i] !== $dest_parts[$i]) { | |
$a = '/' . join('/', array_slice($src_parts,1,$i)); | |
$b = '/' . join('/', array_slice($dest_parts,1,$i)); | |
$src_parts[$i] = $dest_parts[$i]; | |
$renames[$a] = $b; | |
} | |
} | |
} | |
foreach ($renames as $path => $newpath) { | |
printf("\n%s\t\n%s\t\n", $path, $newpath); | |
if (RENAME && RENAME_DIRS && file_exists($path) && is_dir($path)) { | |
$ts = filetime_min($path); | |
if (!rename($path, $newpath)) { | |
if (VERBOSE) | |
output("Could not rename file!\n\t$path\n\t$newpath"); | |
} else if (file_exists($newpath) && PRESERVE_TIME) { | |
touch($newpath, $ts, $ts); | |
} | |
} | |
} | |
// if tmp file was created | |
if (!empty($delete_file)) { | |
if (DEBUG) | |
output("Deleting tmp file: " . $tmpfile); | |
unlink($tmpfile); | |
} | |
end: | |
output("Processed files: " . (int) $i); | |
if (!empty($rename_files)) { | |
output("Transliterated files: " . count($rename_files)); | |
} | |
if (!empty($rename_dirs)) { | |
output("Transliterated dirs: " . count($rename_dirs)); | |
} | |
echo "\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment