Skip to content

Instantly share code, notes, and snippets.

@vijinho
Created May 5, 2019 22:53
Show Gist options
  • Save vijinho/3cabdde9fd09ea12c2ebccc1320d7760 to your computer and use it in GitHub Desktop.
Save vijinho/3cabdde9fd09ea12c2ebccc1320d7760 to your computer and use it in GitHub Desktop.
Find/remove duplicate files in subfolders and move/rename files exported from calibre ebook application folder in OPF files.
<?php
/**
* get-rename.php - get metadata from files readable with exiftool
* and presents options for renaming
* creates cache files, metadata.ser
*
* https://www.sno.phy.queensu.ca/~phil/exiftool/#filename
* see https://www.sno.phy.queensu.ca/~phil/exiftool/exiftool_pod.html
*
* relies on command-line tools, tested on MacOS.
*
* @license GPLv3 (http://www.gnu.org/licenses/gpl-3.0.html)
*/
define('DEBUG',1);
define('VERBOSE',1);
$import_folder = 'calibre'; // import files exported from calibre folder
//$backup_folder = '/Users/vijay/MEGA/.debris/' . join('-', [date('Y'), date('m'), date('d')]); // folder to move overwritten dupes to
//@mkdir($backup_folder, 0777, true);
$backup_folder = ''; // no backup
$move_opfs = true; // move opf files
date_default_timezone_set('UTC');
ini_set('default_charset', 'utf-8');
ini_set('mbstring.encoding_translation', 'On');
ini_set('mbstring.func_overload', 6);
ini_set('auto_detect_line_endings', TRUE);
// check CLI commands are available
$commands = getCommands();
if (empty($commands)) exit;
// jdupes -r . > dupes.list
die("Run: jdupes -r . > dupes.list");
$calibre_export_folder = '/calibre/';
$files = [];
$lines = @file('dupes.list');
$group = [];
foreach ($lines as $l) {
$l = trim($l);
if (empty($l)) {
$groups[] = $group;
$group = [];
} else {
if (!file_exists($l)) {
//continue;
}
$group[$l] = $l;
}
}
unset($lines);
foreach ($groups as $group) {
$key = '';
$sources = [];
$opfs = [];
$targets = [];
foreach ($group as $i => $g) {
if (false !== stristr($i, $calibre_export_folder)) {
//verbose(sprintf("\nCalibre file found found!\n\t%s", $i));
$sources[] = $i;
$ext = strtolower('.' . pathinfo($i, PATHINFO_EXTENSION));
$opf = substr($i, 0, -(strlen($ext))) . '.opf';
if (file_exists($opf)) {
//verbose(sprintf("\nOPF found!\n\t%s", $opf));
$opfs[] = $opf;
}
} else {
$targets[] = $i;
}
}
// remove duplicate sources, opfs and targets
if (count($sources) > 1) {
verbose("Sources:");
print_r($sources);
for ($i = 1; $i < count($sources); $i++) {
unlink($sources[$i]);
unset($sources[$i]);
}
}
$source = $sources[0];
verbose("Source:\n\t$source\n");
if (count($opfs) > 1) {
verbose("OPFs:");
print_r($opfs);
for ($i = 1; $i < count($opfs); $i++) {
unlink($opfs[$i]);
unset($opfs[$i]);
}
}
$opf = $opfs[0];
verbose("Source OPF:\n\t$opf\n");
if (count($targets) > 1) {
verbose("Targets:");
print_r($targets);
for ($i = 1; $i < count($targets); $i++) {
var_dump($i);
unlink($targets[$i]);
unset($targets[$i]);
}
}
$target = $targets[0];
verbose("Target:\n\t$target\n");
$ext = strtolower('.' . pathinfo($target, PATHINFO_EXTENSION));
$target_opf = substr($target, 0, -(strlen($ext))) . '.opf';
if (file_exists($target_opf)) {
debug("OPF target file exists:\n\t$target_opf\n");
}
verbose("Target OPF:\n\t$target_opf\n");
if (!empty($backup_folder) && file_exists($target)) {
$backup = $backup_folder . '/' . basename($target);
verbose(sprintf("\nBacking-up:\n\t%s\n\t%s\n", $target, $backup));
if (!rename($target, $backup)) {
verbose(sprintf("\nFailed to move file:\n\t%s\n\t", $target, $backup));
exit;
}
}
if (file_exists($source)) {
verbose(sprintf("\nMoving:\n\t%s\n\t%s\n", $source, $target));
if (!rename($source, $target)) {
verbose(sprintf("\nFailed to move file:\n\t%s\n\t", $source, $target));
exit;
}
}
if (!empty($move_opfs) && file_exists($opf)) {
verbose(sprintf("\nMoving OPF:\n\t%s\n\t%s\n", $opf, $target_opf));
if (!rename($opf, $target_opf)) {
verbose(sprintf("\nFailed to move OPF file:\n\t%s\n\t", $opf, $target_opf));
exit;
}
}
}
exit;
//-----------------------------------------------------------------------------
// functions used above
/**
* Execute a command and return streams as an array of
* stdin, stdout, stderr
*
* @param string $cmd command to execute
* @return array|false array $streams | boolean false if failure
* @see https://secure.php.net/manual/en/function.proc-open.php
*/
function shell_execute($cmd)
{
$process = proc_open(
$cmd,
[
['pipe', 'r'],
['pipe', 'w'],
['pipe', 'w']
], $pipes
);
if (is_resource($process)) {
$streams = [];
foreach ($pipes as $p => $v) {
$streams[] = stream_get_contents($pipes[$p]);
}
proc_close($process);
return [
'stdin' => $streams[0],
'stdout' => $streams[1],
'stderr' => $streams[2]
];
}
return false;
}
/**
* Execute a command and return output of stdout or throw exception of stderr
*
* @param string $cmd command to execute
* @param boolean $split split returned results? default on newline
* @param string $exp regular expression to preg_split to split on
* @return mixed string $stdout | Exception if failure
* @see shell_execute($cmd)
*/
function cmd_execute($cmd, $split = true, $exp = "/\n/")
{
$result = shell_execute($cmd);
if (!empty($result['stderr'])) {
throw new Exception($result['stderr']);
}
$data = $result['stdout'];
if (empty($split) || empty($exp) || empty($data)) {
return $data;
}
return preg_split($exp, $data);
}
// check required commands installed and get path
function getCommands()
{
static $commands = []; // cli command paths
if (!empty($commands)) {
return $commands;
}
$requirements = [
'exiftool' => 'https://sno.phy.queensu.ca/~phil/exiftool/',
'gs' => 'http://manpages.ubuntu.com/manpages/bionic/man1/gs.1.html',
'cp' => 'copy system command - cp',
'mv' => 'move system command - mv',
'find' => 'system find commmand'
];
$errors = [];
foreach ($requirements as $tool => $description) {
$cmd = cmd_execute("which $tool");
if (empty($cmd)) {
$errors[] = "Error: Missing requirement: $tool - " . $description;
} else {
$commands[$tool] = $cmd[0];
}
}
if (!empty($errors)) {
debug(join("\n", $errors) . "\n");
}
return $commands;
}
/**
* Return the memory used by the script, (current/peak)
*
* @return string memory used
*/
function get_memory_used()
{
return(
ceil(memory_get_usage() / 1024 / 1024) . '/' .
ceil(memory_get_peak_usage() / 1024 / 1024));
}
/**
* Output string, to STDERR if available
*
* @param string { string to output
* @param boolean $STDERR write to stderr if it is available
*/
function output($text, $STDERR = true)
{
if (!empty($STDERR) && defined('STDERR')) {
fwrite(STDERR, $text);
} else {
echo $text;
}
}
/**
* Dump debug data if DEBUG constant is set
*
* @param optional string $string string to output
* @param optional mixed $data to dump
* @return boolean true if string output, false if not
*/
function debug($string = '', $data = [])
{
if (DEBUG) {
output(trim('[D ' . get_memory_used() . '] ' . $string) . "\n");
if (!empty($data)) {
output(print_r($data, 1));
}
return true;
}
return false;
}
/**
* Output string if VERBOSE constant is set
*
* @param string $string string to output
* @param optional mixed $data to dump
* @return boolean true if string output, false if not
*/
function verbose($string, $data = [])
{
if (VERBOSE && !empty($string)) {
output(trim('[V' . ((DEBUG) ? ' ' . get_memory_used() : '') . '] ' . $string) . "\n");
if (!empty($data)) {
output(print_r($data, 1));
}
return true;
}
return false;
}
/**
* Clear an array of empty values
*
* @param array $keys array keys to explicitly remove regardless
* @return array the trimmed down array
*/
function array_clear($array, $keys = [])
{
foreach ($array as $key => $value) {
if (is_array($value)) {
do {
$oldvalue = $value;
$value = array_clear($value, $keys);
}
while ($oldvalue !== $value);
$array[$key] = array_clear($value, $keys);
}
if (empty($value) && 0 !== $value) {
unset($array[$key]);
}
if (in_array($key, $keys, true)) {
unset($array[$key]);
}
}
return $array;
}
/**
* Encode array character encoding recursively
*
* @param mixed $data
* @param string $to_charset convert to encoding
* @param string $from_charset convert from encoding
* @return mixed
*/
function to_charset($data, $to_charset = 'UTF-8', $from_charset = 'auto')
{
if (is_numeric($data)) {
if (is_float($data)) {
return (float) $data;
} else {
return (int) $data;
}
} else if (is_string($data)) {
return mb_convert_encoding($data, $to_charset, $from_charset);
} else if (is_array($data)) {
foreach ($data as $key => $value) {
$data[$key] = to_charset($value, $to_charset, $from_charset);
}
} else if (is_object($data)) {
foreach ($data as $key => $value) {
$data->$key = to_charset($value, $to_charset, $from_charset);
}
}
return $data;
}
/**
* Load a serialized php data file and return it
*
* @param string $filename the json filename
* @return array $data
*/
function serialize_load($file)
{
if (file_exists($file)) {
$data = unserialize(file_get_contents($file));
$data = array_clear($data);
}
return empty($data) ? [] : $data;
}
/**
* Save data array to a php serialized data
*
* @param string $filename the json filename
* @param array $data data to save
* @return boolean result
*/
function serialize_save($file, $data)
{
if (empty($data)) {
return 'No data to write to file.';
}
$data = array_clear($data);
return file_put_contents($file, serialize($data));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment