Created
July 15, 2014 03:46
-
-
Save jwoglom/e007a616dfd940e95d09 to your computer and use it in GitHub Desktop.
Pearson Successnet Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* Pearson Successnet Downloader * | |
* Fully mirrors a HTML-based textbook stored on Pearson Successnet for offline viewing. | |
* Author: James Woglom <[email protected]> (wogloms.com) | |
*/ | |
require_once 'simple_html_dom.php'; /* http://simplehtmldom.sourceforge.net/ */ | |
// Cookie information in form name=value; name2=value2 | |
$cookies = "name=value; name2=value2"; | |
// Content directory for the text | |
$prefix = "https://www.pearsonsuccessnet.com/snpapp/iText/products/ISBN/"; | |
function get($url) { | |
global $cookies, $prefix; | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_USERAGENT, "MSIE 6.0 Windows"); | |
curl_setopt($ch, CURLOPT_URL,$prefix.$url); | |
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: ".$cookies)); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); | |
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); | |
$res = curl_exec($ch); | |
curl_close($ch); | |
return $res; | |
} | |
$files = array(); | |
$fi = 0; | |
$prevpg = ""; // Second to last page downloaded at any given time. | |
function download($name) { | |
global $files, $fi, $cookies, $prefix; | |
$files[$fi] = get($name); | |
// file_put_contents($name, $files[$fi]); | |
$cmd = "wget -r -e robots=off -p -k --keep-session-cookies --cookies=on --header='Cookie: ".trim($cookies)."' ".$prefix.$name; | |
echo $cmd; | |
exec("wget $cmd | tee out-wget.txt 2> /dev/null > /dev/null"); | |
$nextpgs = explode('value="', $files[$fi]); | |
echo $nextpgs[1]."\n".$nextpgs[2]."\n".$nextpgs[3]; | |
$nextpg = explode('"', $nextpgs[2])[0]; | |
if(strpos($nextpg, "html") === false && strlen($nextpg) < 80) { | |
$nextpg = explode('"', $nextpgs[1])[0]; | |
} | |
if($files[$fi] == $files[$fi-1] || $files[$fi] == $files[$fi-2]) { | |
// Looping | |
echo "\n\nReached loop."; | |
return; | |
} | |
echo "\n\n\n\n\nNext page: $nextpg \n\n\n"; | |
$prevpg = $name; | |
$fi++; | |
download($nextpg); | |
} | |
// Start downloading the first page of the book | |
download("firstpage.html"); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment