jwoglom · July 15, 2014 03:46
diff --git a/downloader.php b/downloader.php
 <?php
 /*
 * Pearson Successnet Downloader *
 * Fully mirrors a HTML-based textbook stored on Pearson Successnet for offline viewing.
 * Author: James Woglom <[email protected]> (wogloms.com)
 */

 require_once 'simple_html_dom.php'; /* http://simplehtmldom.sourceforge.net/ */
 // Cookie information in form name=value; name2=value2
 $cookies = "name=value; name2=value2";
 // Content directory for the text
 $prefix = "https://www.pearsonsuccessnet.com/snpapp/iText/products/ISBN/";
 function get($url) {
    global $cookies, $prefix;
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_USERAGENT, "MSIE 6.0 Windows");
    curl_setopt($ch, CURLOPT_URL,$prefix.$url);
    curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: ".$cookies));
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    $res = curl_exec($ch);
    curl_close($ch);
    return $res;
 }
 $files = array();
 $fi = 0;
 $prevpg = ""; // Second to last page downloaded at any given time.
 function download($name) {
    global $files, $fi, $cookies, $prefix;
    $files[$fi] = get($name);
    // file_put_contents($name, $files[$fi]);
    $cmd = "wget -r -e robots=off -p -k --keep-session-cookies --cookies=on --header='Cookie: ".trim($cookies)."' ".$prefix.$name;
    echo $cmd;
    exec("wget $cmd | tee out-wget.txt 2> /dev/null > /dev/null");
    $nextpgs = explode('value="', $files[$fi]);
    echo $nextpgs[1]."\n".$nextpgs[2]."\n".$nextpgs[3];
    $nextpg = explode('"', $nextpgs[2])[0];
    if(strpos($nextpg, "html") === false && strlen($nextpg) < 80) {
        $nextpg = explode('"', $nextpgs[1])[0];
    }

    if($files[$fi] == $files[$fi-1] || $files[$fi] == $files[$fi-2]) {
        // Looping
        echo "\n\nReached loop.";
        return;
    }
    
    echo "\n\n\n\n\nNext page: $nextpg \n\n\n";
    $prevpg = $name;
    $fi++;
    download($nextpg);
 }
 // Start downloading the first page of the book
 download("firstpage.html");
 ?>
	<?php
	/*
	* Pearson Successnet Downloader *
	* Fully mirrors a HTML-based textbook stored on Pearson Successnet for offline viewing.
	* Author: James Woglom <[email protected]> (wogloms.com)
	*/

	require_once 'simple_html_dom.php'; /* http://simplehtmldom.sourceforge.net/ */
	// Cookie information in form name=value; name2=value2
	$cookies = "name=value; name2=value2";
	// Content directory for the text
	$prefix = "https://www.pearsonsuccessnet.com/snpapp/iText/products/ISBN/";
	function get($url) {
	global $cookies, $prefix;
	$ch = curl_init();
	curl_setopt($ch, CURLOPT_USERAGENT, "MSIE 6.0 Windows");
	curl_setopt($ch, CURLOPT_URL,$prefix.$url);
	curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: ".$cookies));
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
	curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
	$res = curl_exec($ch);
	curl_close($ch);
	return $res;
	}
	$files = array();
	$fi = 0;
	$prevpg = ""; // Second to last page downloaded at any given time.
	function download($name) {
	global $files, $fi, $cookies, $prefix;
	$files[$fi] = get($name);
	// file_put_contents($name, $files[$fi]);
	$cmd = "wget -r -e robots=off -p -k --keep-session-cookies --cookies=on --header='Cookie: ".trim($cookies)."' ".$prefix.$name;
	echo $cmd;
	exec("wget $cmd \| tee out-wget.txt 2> /dev/null > /dev/null");
	$nextpgs = explode('value="', $files[$fi]);
	echo $nextpgs[1]."\n".$nextpgs[2]."\n".$nextpgs[3];
	$nextpg = explode('"', $nextpgs[2])[0];
	if(strpos($nextpg, "html") === false && strlen($nextpg) < 80) {
	$nextpg = explode('"', $nextpgs[1])[0];
	}

	if($files[$fi] == $files[$fi-1] \|\| $files[$fi] == $files[$fi-2]) {
	// Looping
	echo "\n\nReached loop.";
	return;
	}

	echo "\n\n\n\n\nNext page: $nextpg \n\n\n";
	$prevpg = $name;
	$fi++;
	download($nextpg);
	}
	// Start downloading the first page of the book
	download("firstpage.html");
	?>