|
<?php |
|
|
|
define('DS', DIRECTORY_SEPARATOR); |
|
// Check if in Docker Container |
|
define('CGROUP_FILE', '/proc/1/cgroup'); |
|
define('IN_DOCKER', file_exists(CGROUP_FILE) && false !== preg_match('#\:/docker/#', file_get_contents(CGROUP_FILE))); |
|
|
|
// config values |
|
$saveParentDir = __DIR__; // Parent dir of the ebooks and extras directories |
|
$ebooksDir = 'ebooks'; // path of the ebooks directory relative to $saveParentDir |
|
$extrasDir = 'ebooks' . DS . 'extras'; // path of the extras directory relative to $saveParentDir |
|
$sleepDuration = 4; // Time to delay between page requests / different book downloads |
|
$booksPerListPage = 24; // Book details to try requesting from the PacktPub API. This can be max 25 |
|
$fileTypesWanted = ['epub', 'mobi', 'pdf', 'code', 'video']; // Different file types from BOOK_FORMATS_URL you want to download |
|
$downloadFrontCover = true; // Whether or not you want the book front cover downloading (if available) |
|
$startIndex = false; // If set to a number this will be the first book downloaded of a range |
|
$endIndex = false; // If set to a number this will be the last book downloaded of a range |
|
|
|
// These are required from the website cookies to correctly authenticate |
|
$accessToken = ''; // Access Token. Obtain using associated user.js with TamperMonkey, or use dev-tools console |
|
$sessionId = ''; // Packt Session. Obtain using associated user.js with TamperMonkey, or use dev-tools console |
|
|
|
define( |
|
'OWNED_BOOKS_URL', |
|
'https://www.packtpub.com/api/entitlements/users/me/owned?sort=createdAt:desc&search=&limit=%d&offset=%d' |
|
); |
|
define('BOOK_FORMATS_URL', 'https://services.packtpub.com/products-v1/products/%d/types'); |
|
define('FILE_DOWNLOAD_DETAILS_URL', 'https://services.packtpub.com/products-v1/products/%d/files/%s'); |
|
define('BOOK_SUMMARY_URL', 'https://static.packt-cdn.com/products/%d/summary'); |
|
|
|
$defaultCurlOptions = [ |
|
CURLOPT_RETURNTRANSFER => true, |
|
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36', |
|
]; |
|
|
|
// Used to clear the current output line |
|
// pinched from symfony console: https://github.com/symfony/console/blob/master/Helper/ProgressIndicator.php#L211 |
|
define('CLEAR_LINE', "\x0D\x1B[2K"); |
|
// I might write this properly, and use symfony console someday! |
|
|
|
echo 'Confiruration Options:', PHP_EOL; |
|
|
|
$cliOptMap = [ |
|
'save-parent-dir:' => 'saveParentDir', |
|
'ebooks-dir:' => 'ebooksDir', |
|
'extras-dir:' => 'extrasDir', |
|
'sleep-duration:' => 'sleepDuration', |
|
'books-per-list-page:' => 'booksPerListPage', |
|
'file-types-wanted:' => 'fileTypesWanted', |
|
'download-front-cover:' => 'downloadFrontCover', |
|
'start-index:' => 'startIndex', |
|
'end-index:' => 'endIndex', |
|
]; |
|
|
|
$cliOptValues = getopt('', array_keys($cliOptMap)); |
|
|
|
if (IN_DOCKER) { |
|
if (!empty($cliOptValues['save-parent-dir'])) { |
|
echo 'Ignoring --save-parent-dir as running inside docker container. Mount a volume to /mnt instead.', PHP_EOL; |
|
unset($cliOptValues['save-parent-dir']); |
|
} |
|
$saveParentDir = '/mnt'; |
|
} |
|
|
|
foreach ($cliOptMap as $cliOptName => $varName) { |
|
$nameToUse = str_replace(':', '', $cliOptName); |
|
if (!empty($cliOptValues[$nameToUse])) { |
|
$valToAssign = $cliOptValues[$nameToUse]; |
|
if ($varName == 'fileTypesWanted' && !is_array($valToAssign)) { |
|
$valToAssign = [$valToAssign]; |
|
} |
|
// I know, variable variables are awful, but it's got some protection as they're known |
|
// variable names, and at-least it's not PHP 4/5's register_globals! I'm just being lazy ;-P |
|
$$varName = $valToAssign; |
|
} |
|
echo $nameToUse, ' = ', var_export($$varName), PHP_EOL; |
|
} |
|
|
|
// These are env vars rather than arguments due to their sensitive nature. If using docker run, use --env, -e or --env-file |
|
$envVarMap = [ |
|
'PP_ACCESS_TOKEN' => 'accessToken', |
|
'PP_SESSION_ID' => 'sessionId' |
|
]; |
|
|
|
foreach ($envVarMap as $envVarName => $varName) { |
|
if (!empty($_ENV[$envVarName])) { |
|
// see above note on variable variables |
|
$$varName = $_ENV[$envVarName]; |
|
} |
|
echo $varName, ' = ', var_export($$varName), PHP_EOL; |
|
} |
|
|
|
function errorAndDie($message) |
|
{ |
|
echo $message, PHP_EOL; |
|
die; |
|
} |
|
|
|
function getAccessTokenExpiry($accessToken) |
|
{ |
|
// It's a JWT, so we can easily extract it out |
|
list(, $tokenDataBase64, ) = explode('.', $accessToken); |
|
$tokenData = json_decode(base64_decode($tokenDataBase64)); |
|
return $tokenData->exp; |
|
} |
|
|
|
// Human readable format, taken from here: https://stackoverflow.com/questions/15188033/human-readable-file-size#answer-23888858 |
|
function sizeToHuman($bytes) |
|
{ |
|
$size = array('B', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'); |
|
$factor = floor((strlen($bytes) - 1) / 3); |
|
$dec = ($bytes > 0) ? 2 : 0; |
|
|
|
return sprintf("%.{$dec}f %s", $bytes / (1000 ** $factor), @$size[$factor]); |
|
} |
|
|
|
$checkAccessTokenExpiry = function(bool $outputExpiry = false) use (&$accessToken) |
|
{ |
|
$expiryTimestamp = getAccessTokenExpiry($accessToken); |
|
|
|
if ($expiryTimestamp <= time()) { |
|
errorAndDie('Current Access Token has expired'); |
|
} |
|
if ($outputExpiry) { |
|
echo 'Access Token expires at ', date('Y/m/d H:i:s T', $expiryTimestamp), PHP_EOL; |
|
} |
|
|
|
return $expiryTimestamp; |
|
}; |
|
|
|
$getJson = function ($url, $errorMessage, $extraOptions = []) use ($checkAccessTokenExpiry, $defaultCurlOptions, &$accessToken, &$sessionId) |
|
{ |
|
if (!empty($accessToken)) { |
|
$checkAccessTokenExpiry(); |
|
if (!isset($extraOptions[CURLOPT_HTTPHEADER])) { |
|
$extraOptions[CURLOPT_HTTPHEADER] = []; |
|
} |
|
$extraOptions[CURLOPT_HTTPHEADER][] = 'Authorization: Bearer ' . $accessToken; |
|
$extraOptions[CURLOPT_COOKIE] = 'packt_session=' . $sessionId . '; path=/'; |
|
} |
|
|
|
$ch = curl_init($url); |
|
curl_setopt_array($ch, $defaultCurlOptions + $extraOptions); |
|
|
|
$response = curl_exec($ch); |
|
$responseCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
|
curl_close($ch); |
|
|
|
if ($responseCode !== 200) { |
|
errorAndDie($errorMessage. ': '. $responseCode); |
|
} |
|
|
|
if (null === ($decodedJson = json_decode($response))) { |
|
errorAndDie($errorMessage); |
|
} |
|
|
|
return $decodedJson; |
|
}; |
|
|
|
$downloadFile = function ($url, $savePath, $errorMessage) use ($defaultCurlOptions) |
|
{ |
|
$fh = fopen($savePath, 'w+'); |
|
|
|
$filesize = 0; |
|
|
|
$ch = curl_init($url); |
|
curl_setopt_array( |
|
$ch, |
|
$defaultCurlOptions + [ |
|
CURLOPT_FOLLOWLOCATION => true, |
|
CURLOPT_RETURNTRANSFER => false, |
|
CURLOPT_FILE => $fh, |
|
CURLOPT_HEADERFUNCTION => function($ch, $header) use (&$filesize) { |
|
static $setFilesize = false; |
|
if (!$setFilesize) { |
|
$headerParts = explode(':', $header, 2); |
|
if (2 == count($headerParts) && 'content-length' == strtolower(trim($headerParts[0]))) { |
|
$filesize = intval(trim($headerParts[1])); |
|
echo ' , filesize: ', sizeToHuman($filesize), PHP_EOL; |
|
$setFilesize = true; |
|
} |
|
} |
|
return strlen($header); |
|
}, |
|
CURLOPT_WRITEFUNCTION => function($ch, $data) use ($fh, &$filesize) { |
|
if ($filesize > 0) { |
|
static $downloadedSize = 0; |
|
$downloadedSize += strlen($data); |
|
$downloadedPercentage = $downloadedSize / $filesize; |
|
echo CLEAR_LINE; |
|
if ($downloadedPercentage < 1) { |
|
echo 'Downloaded ', number_format($downloadedPercentage * 100, 2, '.', ''), '%'; |
|
} |
|
return fwrite($fh, $data); |
|
} |
|
return false; |
|
} |
|
] |
|
); |
|
curl_exec($ch); |
|
$responseCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
|
curl_close($ch); |
|
fclose($fh); |
|
|
|
if ($responseCode !== 200 || !filesize($savePath)) { |
|
echo $errorMessage, ': ', $responseCode, PHP_EOL; |
|
return false; |
|
} |
|
return true; |
|
}; |
|
|
|
if (empty($accessToken)) { |
|
errorAndDie('Missing auth token. Stopping script.'); |
|
} |
|
|
|
$checkAccessTokenExpiry(true); |
|
|
|
echo 'Sleeping for ', $sleepDuration, ' seconds', PHP_EOL; |
|
sleep($sleepDuration); |
|
|
|
$bookCount = 0; |
|
echo 'Getting list of eBooks', PHP_EOL; |
|
$booksInfo = $getJson( |
|
sprintf(OWNED_BOOKS_URL, $booksPerListPage, $bookCount), |
|
'Couldn\'t retrieve list of books' |
|
); |
|
$totalNumberOfBooks = $booksInfo->count; |
|
$noOfPages = ceil($totalNumberOfBooks / $booksPerListPage); |
|
|
|
echo 'Total number of books: ', $totalNumberOfBooks, ', Total number of pages: ', $noOfPages, PHP_EOL; |
|
|
|
for ($pageCount = 1; $pageCount <= $noOfPages; $pageCount++) { |
|
|
|
if ($pageCount > 1) { |
|
$booksInfo = $getJson( |
|
sprintf(OWNED_BOOKS_URL, $booksPerListPage, $bookCount), |
|
'Couldn\'t retrieve list of books' |
|
); |
|
} |
|
|
|
$pageBooksCount = count($booksInfo->data); |
|
|
|
echo 'Found ', $pageBooksCount, ' books on page ', $pageCount, PHP_EOL; |
|
|
|
if (count($booksInfo->data)) { |
|
if (!file_exists($saveParentDir . DS . $ebooksDir)) { |
|
mkdir($saveParentDir . DS . $ebooksDir); |
|
} |
|
if (!file_exists($saveParentDir . DS . $extrasDir)) { |
|
mkdir($saveParentDir . DS . $extrasDir); |
|
} |
|
foreach ($booksInfo->data as $bookData) { |
|
$bookCount++; |
|
|
|
if ($startIndex !== false && $bookCount < $startIndex) { |
|
continue; |
|
} |
|
|
|
$name = $bookData->productName; |
|
echo $bookCount, '. Examining "', $name, '"', PHP_EOL; |
|
$fileName = preg_replace(['/[\<\>\:\"\/\\\|\?\*\%]+/', '/\s+/', '/[\[\]]/'], ['-', '_', ''], $name); |
|
|
|
$downloadFormatInfo = $getJson( |
|
sprintf(BOOK_FORMATS_URL, $bookData->productId), |
|
'Couldn\'t retrieve available book formats' |
|
); |
|
|
|
$downloadLinks = []; |
|
foreach ($downloadFormatInfo->data[0]->fileTypes as $fileType) { |
|
if (in_array($fileType, $fileTypesWanted)) { |
|
$downloadLinks[$fileType] = sprintf(FILE_DOWNLOAD_DETAILS_URL, $bookData->productId, $fileType); |
|
} |
|
} |
|
|
|
if (0 === count($downloadLinks)) { |
|
echo 'No Downloadable Books / Code', PHP_EOL; |
|
continue; |
|
} |
|
foreach ($downloadLinks as $format => $downloadHref) { |
|
$downloadLinkInfo = $getJson( |
|
$downloadHref, |
|
'Couldn\'t retrieve book download link' |
|
); |
|
$savePath = ('code' === $format) |
|
? $saveParentDir . DS . $extrasDir . DS . $fileName . '.zip' |
|
: $saveParentDir . DS . $ebooksDir . DS . $fileName . '.' . (('video' === $format) ? 'zip' : $format); |
|
echo 'Downloading ', $format, ' to ', $savePath; |
|
|
|
$downloadFile($downloadLinkInfo->data, $savePath, $format . ' download failed'); |
|
} |
|
if ($downloadFrontCover) { |
|
$frontCoverLinkInfo = $getJson( |
|
sprintf(BOOK_SUMMARY_URL, $bookData->productId), |
|
'Couldn\'t retrieve book summary link' |
|
); |
|
if (!empty($frontCoverLinkInfo->coverImage)) { |
|
$fileExt = preg_replace('/^.+\.([^\.]+)$/', '$1', $frontCoverLinkInfo->coverImage); |
|
$savePath = $saveParentDir . DS . $extrasDir . DS . $fileName . '.' . $fileExt; |
|
echo 'Downloading Front Cover to: ', $savePath; |
|
$downloadFile( |
|
$frontCoverLinkInfo->coverImage, |
|
$savePath, |
|
'Front cover download failed' |
|
); |
|
} |
|
} |
|
|
|
if ($endIndex !== false && $bookCount >= $endIndex) { |
|
break 2; |
|
} |
|
|
|
echo 'Sleeping for ', $sleepDuration, ' seconds', PHP_EOL; |
|
sleep($sleepDuration); |
|
} |
|
} |
|
} |
No longer works due to changes in site - quick hack I put together here you can look at to see new method against their new REST endpoints which is much simpler than site parsing. https://gist.github.com/nneul/6eda98fd87a58a623b857523247f3471