Skip to content

Instantly share code, notes, and snippets.

@vijinho
Last active May 6, 2019 14:18
Show Gist options
  • Save vijinho/e5ee78e13b8c7ad9f171593234daa2ad to your computer and use it in GitHub Desktop.
Save vijinho/e5ee78e13b8c7ad9f171593234daa2ad to your computer and use it in GitHub Desktop.
re-compress PDFs, shrinking if possible improved version of https://gist.github.com/vijinho/724ebfac4739019fd36baa2ab8e2aca2 just run ‘php /path/to/pdf-compress.php’ in current working directory to re-compress all PDFs within
<?php
/**
* pdf-compress.php - re-compress PDFs, shrinking if possible
* WARNING: May run extremely slowly due to very large file sizes generated on pdf2ps
*
* relies on command-line tools, tested on ubuntu.
*
* @license GPLv3 (http://www.gnu.org/licenses/gpl-3.0.html)
*/
$folder = '.';
define('DEBUG',1);
define('VERBOSE',1);
date_default_timezone_set('UTC');
ini_set('default_charset', 'utf-8');
ini_set('mbstring.encoding_translation', 'On');
ini_set('mbstring.func_overload', 6);
ini_set('auto_detect_line_endings', TRUE);
// check CLI commands are available
$commands = getCommands();
if (empty($commands)) exit;
$ext = '.pdf';
$cmd = 'find '.$folder.' -type f -iname "*'.$ext.'" -print';
$files = cmd_execute($cmd);
$tmpfile = '';
foreach ($files as $k => $path) {
unset($files[$k]);
output("Checking [".count($files)."]: $path\n");
@unlink($tmpfile);
$tmpfile = $path . '.ps';
$cmd = $commands['gs'] . ' -sDEVICE=pdfwrite -dPDFSETTINGS=/ebook -dNumRenderingThreads=4 -dBandBufferSpace=2000000000 -dBufferSpace=2000000000 -sBandListStorage=memory -dNOPAUSE -dQUIET -dBATCH -dDoThumbnails=true -dCompressFonts=true -sProcessColorModel=DeviceRGB -sColorConversionStrategy=sRGB -sColorConversionStrategyForImages=sRGB -dConvertCMYKImagesToRGB=true -dDetectDuplicateImages=true -dDownsampleColorImages=true -dDownsampleGrayImages=true -dDownsampleMonoImages=true -dColorImageDownsampleThreshold=1 -dGrayImageDownsampleThreshold=1 -dMonoImageDownsampleThreshold=1 -dColorImageDownsampleType=/Bicubic -dGrayImageDownsampleType=/Bicubic -dMonoImageDownsampleType=/Subsample -sOutputFile=' . escapeshellarg($tmpfile) . ' ' . escapeshellarg($path);
try {
//debug($cmd);
$start_time = time();
verbose("Converting file...");
cmd_execute($cmd);
} catch (Exception $e) {
debug(print_r($e,1));
continue;
}
verbose("Time-taken: ", time() - $start_time);
if (!file_exists($tmpfile) || 0 == filesize($tmpfile)) {
verbose("\tFailed to convert: $path");
continue;
}
// linearize pdf
verbose("Linearizing...");
$bak = $path . '.bak';
cmd_execute(sprintf("%s -p %s %s", $commands['cp'], escapeshellarg($path), escapeshellarg($bak)));
$cmd = sprintf("qpdf --linearize %s %s", escapeshellarg($tmpfile), escapeshellarg($path));
debug($cmd);
try {
$output = cmd_execute($cmd);
} catch (Exception $e) {
cmd_execute(sprintf("%s %s %s", $commands['mv'], escapeshellarg($bak), escapeshellarg($file)));
debug(print_r($e,1));
continue;
}
debug($output);
if (filesize($path) > filesize($bak)) {
verbose("\tFilesize is not smaller!\n");
//cmd_execute(sprintf("%s %s %s", $commands['mv'], escapeshellarg($bak), escapeshellarg($path)));
//continue;
}
$savedb = filesize($bak) - filesize($path);
output("\tShrinking, saved: ". ceil($savedb/1024) . " Kbytes\n");
//unlink($bak);
}
exit;
//-----------------------------------------------------------------------------
// functions used above
/**
* Execute a command and return streams as an array of
* stdin, stdout, stderr
*
* @param string $cmd command to execute
* @return array|false array $streams | boolean false if failure
* @see https://secure.php.net/manual/en/function.proc-open.php
*/
function shell_execute($cmd)
{
$process = proc_open(
$cmd,
[
['pipe', 'r'],
['pipe', 'w'],
['pipe', 'w']
], $pipes
);
if (is_resource($process)) {
$streams = [];
foreach ($pipes as $p => $v) {
$streams[] = stream_get_contents($pipes[$p]);
}
proc_close($process);
return [
'stdin' => $streams[0],
'stdout' => $streams[1],
'stderr' => $streams[2]
];
}
return false;
}
/**
* Execute a command and return output of stdout or throw exception of stderr
*
* @param string $cmd command to execute
* @param boolean $split split returned results? default on newline
* @param string $exp regular expression to preg_split to split on
* @return mixed string $stdout | Exception if failure
* @see shell_execute($cmd)
*/
function cmd_execute($cmd, $split = true, $exp = "/\n/")
{
$result = shell_execute($cmd);
if (!empty($result['stderr'])) {
throw new Exception($result['stderr']);
}
$data = $result['stdout'];
if (empty($split) || empty($exp) || empty($data)) {
return $data;
}
return preg_split($exp, $data);
}
// check required commands installed and get path
function getCommands()
{
static $commands = []; // cli command paths
if (!empty($commands)) {
return $commands;
}
$requirements = [
'gs' => 'http://manpages.ubuntu.com/manpages/bionic/man1/gs.1.html',
'pdf2ps' => 'http://manpages.ubuntu.com/manpages/bionic/man1/pdf2ps.1.html',
'ps2pdf' => 'http://manpages.ubuntu.com/manpages/bionic/man1/ps2pdf.1.html',
'qpdf' => 'http://qpdf.sourceforge.net/',
'cp' => 'copy system command - cp',
'mv' => 'move system command - mv',
'find' => 'system find commmand',
];
$errors = [];
foreach ($requirements as $tool => $description) {
$cmd = cmd_execute("which $tool");
if (empty($cmd)) {
$errors[] = "Error: Missing requirement: $tool - " . $description;
} else {
$commands[$tool] = $cmd[0];
}
}
if (!empty($errors)) {
echo join("\n", $errors) . "\n";
}
return $commands;
}
/**
* Return the memory used by the script, (current/peak)
*
* @return string memory used
*/
function get_memory_used()
{
return(
ceil(memory_get_usage() / 1024 / 1024) . '/' .
ceil(memory_get_peak_usage() / 1024 / 1024));
}
/**
* Output string, to STDERR if available
*
* @param string { string to output
* @param boolean $STDERR write to stderr if it is available
*/
function output($text, $STDERR = true)
{
if (!empty($STDERR) && defined('STDERR')) {
fwrite(STDERR, $text);
} else {
echo $text;
}
}
/**
* Dump debug data if DEBUG constant is set
*
* @param optional string $string string to output
* @param optional mixed $data to dump
* @return boolean true if string output, false if not
*/
function debug($string = '', $data = [])
{
if (DEBUG) {
output(trim('[D ' . get_memory_used() . '] ' . $string) . "\n");
if (!empty($data)) {
output(print_r($data, 1));
}
return true;
}
return false;
}
/**
* Output string if VERBOSE constant is set
*
* @param string $string string to output
* @param optional mixed $data to dump
* @return boolean true if string output, false if not
*/
function verbose($string, $data = [])
{
if (VERBOSE && !empty($string)) {
output(trim('[V' . ((DEBUG) ? ' ' . get_memory_used() : '') . '] ' . $string) . "\n");
if (!empty($data)) {
output(print_r($data, 1));
}
return true;
}
return false;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment