Last active
May 6, 2019 14:18
-
-
Save vijinho/e5ee78e13b8c7ad9f171593234daa2ad to your computer and use it in GitHub Desktop.
re-compress PDFs, shrinking if possible improved version of https://gist.github.com/vijinho/724ebfac4739019fd36baa2ab8e2aca2 just run ‘php /path/to/pdf-compress.php’ in current working directory to re-compress all PDFs within
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* pdf-compress.php - re-compress PDFs, shrinking if possible | |
* WARNING: May run extremely slowly due to very large file sizes generated on pdf2ps | |
* | |
* relies on command-line tools, tested on ubuntu. | |
* | |
* @license GPLv3 (http://www.gnu.org/licenses/gpl-3.0.html) | |
*/ | |
$folder = '.'; | |
define('DEBUG',1); | |
define('VERBOSE',1); | |
date_default_timezone_set('UTC'); | |
ini_set('default_charset', 'utf-8'); | |
ini_set('mbstring.encoding_translation', 'On'); | |
ini_set('mbstring.func_overload', 6); | |
ini_set('auto_detect_line_endings', TRUE); | |
// check CLI commands are available | |
$commands = getCommands(); | |
if (empty($commands)) exit; | |
$ext = '.pdf'; | |
$cmd = 'find '.$folder.' -type f -iname "*'.$ext.'" -print'; | |
$files = cmd_execute($cmd); | |
$tmpfile = ''; | |
foreach ($files as $k => $path) { | |
unset($files[$k]); | |
output("Checking [".count($files)."]: $path\n"); | |
@unlink($tmpfile); | |
$tmpfile = $path . '.ps'; | |
$cmd = $commands['gs'] . ' -sDEVICE=pdfwrite -dPDFSETTINGS=/ebook -dNumRenderingThreads=4 -dBandBufferSpace=2000000000 -dBufferSpace=2000000000 -sBandListStorage=memory -dNOPAUSE -dQUIET -dBATCH -dDoThumbnails=true -dCompressFonts=true -sProcessColorModel=DeviceRGB -sColorConversionStrategy=sRGB -sColorConversionStrategyForImages=sRGB -dConvertCMYKImagesToRGB=true -dDetectDuplicateImages=true -dDownsampleColorImages=true -dDownsampleGrayImages=true -dDownsampleMonoImages=true -dColorImageDownsampleThreshold=1 -dGrayImageDownsampleThreshold=1 -dMonoImageDownsampleThreshold=1 -dColorImageDownsampleType=/Bicubic -dGrayImageDownsampleType=/Bicubic -dMonoImageDownsampleType=/Subsample -sOutputFile=' . escapeshellarg($tmpfile) . ' ' . escapeshellarg($path); | |
try { | |
//debug($cmd); | |
$start_time = time(); | |
verbose("Converting file..."); | |
cmd_execute($cmd); | |
} catch (Exception $e) { | |
debug(print_r($e,1)); | |
continue; | |
} | |
verbose("Time-taken: ", time() - $start_time); | |
if (!file_exists($tmpfile) || 0 == filesize($tmpfile)) { | |
verbose("\tFailed to convert: $path"); | |
continue; | |
} | |
// linearize pdf | |
verbose("Linearizing..."); | |
$bak = $path . '.bak'; | |
cmd_execute(sprintf("%s -p %s %s", $commands['cp'], escapeshellarg($path), escapeshellarg($bak))); | |
$cmd = sprintf("qpdf --linearize %s %s", escapeshellarg($tmpfile), escapeshellarg($path)); | |
debug($cmd); | |
try { | |
$output = cmd_execute($cmd); | |
} catch (Exception $e) { | |
cmd_execute(sprintf("%s %s %s", $commands['mv'], escapeshellarg($bak), escapeshellarg($file))); | |
debug(print_r($e,1)); | |
continue; | |
} | |
debug($output); | |
if (filesize($path) > filesize($bak)) { | |
verbose("\tFilesize is not smaller!\n"); | |
//cmd_execute(sprintf("%s %s %s", $commands['mv'], escapeshellarg($bak), escapeshellarg($path))); | |
//continue; | |
} | |
$savedb = filesize($bak) - filesize($path); | |
output("\tShrinking, saved: ". ceil($savedb/1024) . " Kbytes\n"); | |
//unlink($bak); | |
} | |
exit; | |
//----------------------------------------------------------------------------- | |
// functions used above | |
/** | |
* Execute a command and return streams as an array of | |
* stdin, stdout, stderr | |
* | |
* @param string $cmd command to execute | |
* @return array|false array $streams | boolean false if failure | |
* @see https://secure.php.net/manual/en/function.proc-open.php | |
*/ | |
function shell_execute($cmd) | |
{ | |
$process = proc_open( | |
$cmd, | |
[ | |
['pipe', 'r'], | |
['pipe', 'w'], | |
['pipe', 'w'] | |
], $pipes | |
); | |
if (is_resource($process)) { | |
$streams = []; | |
foreach ($pipes as $p => $v) { | |
$streams[] = stream_get_contents($pipes[$p]); | |
} | |
proc_close($process); | |
return [ | |
'stdin' => $streams[0], | |
'stdout' => $streams[1], | |
'stderr' => $streams[2] | |
]; | |
} | |
return false; | |
} | |
/** | |
* Execute a command and return output of stdout or throw exception of stderr | |
* | |
* @param string $cmd command to execute | |
* @param boolean $split split returned results? default on newline | |
* @param string $exp regular expression to preg_split to split on | |
* @return mixed string $stdout | Exception if failure | |
* @see shell_execute($cmd) | |
*/ | |
function cmd_execute($cmd, $split = true, $exp = "/\n/") | |
{ | |
$result = shell_execute($cmd); | |
if (!empty($result['stderr'])) { | |
throw new Exception($result['stderr']); | |
} | |
$data = $result['stdout']; | |
if (empty($split) || empty($exp) || empty($data)) { | |
return $data; | |
} | |
return preg_split($exp, $data); | |
} | |
// check required commands installed and get path | |
function getCommands() | |
{ | |
static $commands = []; // cli command paths | |
if (!empty($commands)) { | |
return $commands; | |
} | |
$requirements = [ | |
'gs' => 'http://manpages.ubuntu.com/manpages/bionic/man1/gs.1.html', | |
'pdf2ps' => 'http://manpages.ubuntu.com/manpages/bionic/man1/pdf2ps.1.html', | |
'ps2pdf' => 'http://manpages.ubuntu.com/manpages/bionic/man1/ps2pdf.1.html', | |
'qpdf' => 'http://qpdf.sourceforge.net/', | |
'cp' => 'copy system command - cp', | |
'mv' => 'move system command - mv', | |
'find' => 'system find commmand', | |
]; | |
$errors = []; | |
foreach ($requirements as $tool => $description) { | |
$cmd = cmd_execute("which $tool"); | |
if (empty($cmd)) { | |
$errors[] = "Error: Missing requirement: $tool - " . $description; | |
} else { | |
$commands[$tool] = $cmd[0]; | |
} | |
} | |
if (!empty($errors)) { | |
echo join("\n", $errors) . "\n"; | |
} | |
return $commands; | |
} | |
/** | |
* Return the memory used by the script, (current/peak) | |
* | |
* @return string memory used | |
*/ | |
function get_memory_used() | |
{ | |
return( | |
ceil(memory_get_usage() / 1024 / 1024) . '/' . | |
ceil(memory_get_peak_usage() / 1024 / 1024)); | |
} | |
/** | |
* Output string, to STDERR if available | |
* | |
* @param string { string to output | |
* @param boolean $STDERR write to stderr if it is available | |
*/ | |
function output($text, $STDERR = true) | |
{ | |
if (!empty($STDERR) && defined('STDERR')) { | |
fwrite(STDERR, $text); | |
} else { | |
echo $text; | |
} | |
} | |
/** | |
* Dump debug data if DEBUG constant is set | |
* | |
* @param optional string $string string to output | |
* @param optional mixed $data to dump | |
* @return boolean true if string output, false if not | |
*/ | |
function debug($string = '', $data = []) | |
{ | |
if (DEBUG) { | |
output(trim('[D ' . get_memory_used() . '] ' . $string) . "\n"); | |
if (!empty($data)) { | |
output(print_r($data, 1)); | |
} | |
return true; | |
} | |
return false; | |
} | |
/** | |
* Output string if VERBOSE constant is set | |
* | |
* @param string $string string to output | |
* @param optional mixed $data to dump | |
* @return boolean true if string output, false if not | |
*/ | |
function verbose($string, $data = []) | |
{ | |
if (VERBOSE && !empty($string)) { | |
output(trim('[V' . ((DEBUG) ? ' ' . get_memory_used() : '') . '] ' . $string) . "\n"); | |
if (!empty($data)) { | |
output(print_r($data, 1)); | |
} | |
return true; | |
} | |
return false; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment