coryalder · August 17, 2011 07:37 · jgilmour · Jul 17, 2013 · hesyar · Dec 2, 2014
diff --git a/blogpost.md b/blogpost.md
diff --git a/safariparse.php b/safariparse.php
 <?php
 // code to accompany this blog post http://objectivesea.tumblr.com/post/9033067018/safaribooks
 try 
 {
  //create or open the database
  $database = new SQLite3('/Users/cory/Desktop/Safari.sqlite');
  if ($database) echo "Database connection open...\n";
  $booksQuery = 'SELECT * from ZBOOK where ZOFFLINESTATUS = "offline"';
  	if($result = $database->query($booksQuery)) {
 	    echo "Found " . $result->numColumns() . " books stored offline\n";
 	    while($row = $result->fetchArray()) {
 			echo "Saving book " . $row['ZTITLE'] . "\n";
 			save_a_book($row['ZTITLE'], $row['Z_PK'], $database, $row['ZFPID']);
 		}
 	} else echo "Database failed to return results for query: " . $booksQuery;
 	
 }
 catch(Exception $e) 
 {
  die($error);
 }


 function save_a_book($name, $book_id, $database, $fpid) {
 	$top = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/></head><body>';
 	$bottom = '</body></html>';
 	
 	$query = 'SELECT ZHTMLBODY, Z_PK, ZORDERINDEX from ZPAGE where ZBOOK = "' . $book_id . '" ORDER BY ZORDERINDEX';
 	  if($result = $database->query($query)) {
 		// make dir
 		mkdir_recursive(sanitize($name, TRUE));
 	    $counter = 1;
 	    while($row = $result->fetchArray()) {
 	      $myFile = "./" . sanitize($name, TRUE) . "/pg_" . $counter . ".html";
 	      $fh = fopen($myFile, 'w') or die("can't open file ". $row['Z_PK']);
 	      $stringData = $top . $row['ZHTMLBODY'] . $bottom;
 	      fwrite($fh, $stringData);
 	      fclose($fh);
 	      $counter++;
 	      print("Wrote page #: {$row['Z_PK']} to file {$myFile}\n");// . "Director: {$row['Director']} <br />" . "Year: {$row['Year']} <br /><br />");
 	    }
 	    mkdir_recursive(sanitize($name, TRUE) . "/images/" . $fpid);
 	  }
 }

 function mkdir_recursive($pathname)
 {
    is_dir(dirname($pathname)) || mkdir_recursive(dirname($pathname));
    return is_dir($pathname) || @mkdir($pathname);
 }


 function sanitize($string = '', $is_filename = FALSE)
 {
 // Replace all weird characters with dashes
 $string = preg_replace('/[^\w\-'. ($is_filename ? '~_\.' : ''). ']+/u', '-', $string);

 // Only allow one dash separator at a time (and make string lowercase)
 return mb_strtolower(preg_replace('/--+/u', '-', $string), 'UTF-8');
 }

 ?>
diff --git a/safariparse_single_file.php b/safariparse_single_file.php
 <?php

 // same as safariparse.php, except it outputs one big html file.
 // instead of putting each individual chapter in a separate html file.

 try 
 {
  //create or open the database
  $database = new SQLite3('/Users/cory/Desktop/Safari.sqlite');
  if ($database) echo "Database connection open...\n";
  $booksQuery = 'SELECT * from ZBOOK where ZOFFLINESTATUS = "offline"';
  	if($result = $database->query($booksQuery)) {
 	    echo "Found " . $result->numColumns() . " books stored offline\n";
 	    while($row = $result->fetchArray()) {
 			echo "Saving book " . $row['ZTITLE'] . "\n";
 			save_a_book($row['ZTITLE'], $row['Z_PK'], $database, $row['ZFPID']);
 		}
 	} else echo "Database failed to return results for query: " . $booksQuery;
 	
 }
 catch(Exception $e) 
 {
  die($error);
 }


 function save_a_book($name, $book_id, $database, $fpid) {
 	$top = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>' . $name . '</title></head><body>';
 	$bottom = '</body></html>';
 	
 	$query = 'SELECT ZHTMLBODY, Z_PK, ZORDERINDEX from ZPAGE where ZBOOK = "' . $book_id . '" ORDER BY ZORDERINDEX';
 	  if($result = $database->query($query)) {
 		// make dir
 		$myFile = "./" . sanitize($name, TRUE) . ".html";
 	    $fh = fopen($myFile, 'w') or die("can't open file ". $myFile);
 		fwrite($fh, $top);
 		mkdir_recursive(sanitize($name, TRUE));
 	    $counter = 1;
 	    while($row = $result->fetchArray()) {
 	      $stringData = $row['ZHTMLBODY'];
 	      fwrite($fh, $stringData);
 	      $counter++;
 	      print("Wrote page #: {$row['Z_PK']} to file {$myFile}\n");// . "Director: {$row['Director']} <br />" . "Year: {$row['Year']} <br /><br />");
 	    }
 		fwrite($fh, $bottom);
 	    fclose($fh);
 	    mkdir_recursive(sanitize($name, TRUE) . "/images/" . $fpid);
 	  }
 }

 function mkdir_recursive($pathname)
 {
    is_dir(dirname($pathname)) || mkdir_recursive(dirname($pathname));
    return is_dir($pathname) || @mkdir($pathname);
 }


 function sanitize($string = '', $is_filename = FALSE)
 {
 // Replace all weird characters with dashes
 $string = preg_replace('/[^\w\-'. ($is_filename ? '~_\.' : ''). ']+/u', '-', $string);

 // Only allow one dash separator at a time (and make string lowercase)
 return mb_strtolower(preg_replace('/--+/u', '-', $string), 'UTF-8');
 }

 ?>
	<?php
	// code to accompany this blog post http://objectivesea.tumblr.com/post/9033067018/safaribooks
	try
	{
	//create or open the database
	$database = new SQLite3('/Users/cory/Desktop/Safari.sqlite');
	if ($database) echo "Database connection open...\n";
	$booksQuery = 'SELECT * from ZBOOK where ZOFFLINESTATUS = "offline"';
	if($result = $database->query($booksQuery)) {
	echo "Found " . $result->numColumns() . " books stored offline\n";
	while($row = $result->fetchArray()) {
	echo "Saving book " . $row['ZTITLE'] . "\n";
	save_a_book($row['ZTITLE'], $row['Z_PK'], $database, $row['ZFPID']);
	}
	} else echo "Database failed to return results for query: " . $booksQuery;

	}
	catch(Exception $e)
	{
	die($error);
	}


	function save_a_book($name, $book_id, $database, $fpid) {
	$top = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/></head><body>';
	$bottom = '</body></html>';

	$query = 'SELECT ZHTMLBODY, Z_PK, ZORDERINDEX from ZPAGE where ZBOOK = "' . $book_id . '" ORDER BY ZORDERINDEX';
	if($result = $database->query($query)) {
	// make dir
	mkdir_recursive(sanitize($name, TRUE));
	$counter = 1;
	while($row = $result->fetchArray()) {
	$myFile = "./" . sanitize($name, TRUE) . "/pg_" . $counter . ".html";
	$fh = fopen($myFile, 'w') or die("can't open file ". $row['Z_PK']);
	$stringData = $top . $row['ZHTMLBODY'] . $bottom;
	fwrite($fh, $stringData);
	fclose($fh);
	$counter++;
	print("Wrote page #: {$row['Z_PK']} to file {$myFile}\n");// . "Director: {$row['Director']} <br />" . "Year: {$row['Year']} <br /><br />");
	}
	mkdir_recursive(sanitize($name, TRUE) . "/images/" . $fpid);
	}
	}

	function mkdir_recursive($pathname)
	{
	is_dir(dirname($pathname)) \|\| mkdir_recursive(dirname($pathname));
	return is_dir($pathname) \|\| @mkdir($pathname);
	}


	function sanitize($string = '', $is_filename = FALSE)
	{
	// Replace all weird characters with dashes
	$string = preg_replace('/[^\w\-'. ($is_filename ? '~_\.' : ''). ']+/u', '-', $string);

	// Only allow one dash separator at a time (and make string lowercase)
	return mb_strtolower(preg_replace('/--+/u', '-', $string), 'UTF-8');
	}

	?>
	<?php

	// same as safariparse.php, except it outputs one big html file.
	// instead of putting each individual chapter in a separate html file.

	try
	{
	//create or open the database
	$database = new SQLite3('/Users/cory/Desktop/Safari.sqlite');
	if ($database) echo "Database connection open...\n";
	$booksQuery = 'SELECT * from ZBOOK where ZOFFLINESTATUS = "offline"';
	if($result = $database->query($booksQuery)) {
	echo "Found " . $result->numColumns() . " books stored offline\n";
	while($row = $result->fetchArray()) {
	echo "Saving book " . $row['ZTITLE'] . "\n";
	save_a_book($row['ZTITLE'], $row['Z_PK'], $database, $row['ZFPID']);
	}
	} else echo "Database failed to return results for query: " . $booksQuery;

	}
	catch(Exception $e)
	{
	die($error);
	}


	function save_a_book($name, $book_id, $database, $fpid) {
	$top = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>' . $name . '</title></head><body>';
	$bottom = '</body></html>';

	$query = 'SELECT ZHTMLBODY, Z_PK, ZORDERINDEX from ZPAGE where ZBOOK = "' . $book_id . '" ORDER BY ZORDERINDEX';
	if($result = $database->query($query)) {
	// make dir
	$myFile = "./" . sanitize($name, TRUE) . ".html";
	$fh = fopen($myFile, 'w') or die("can't open file ". $myFile);
	fwrite($fh, $top);
	mkdir_recursive(sanitize($name, TRUE));
	$counter = 1;
	while($row = $result->fetchArray()) {
	$stringData = $row['ZHTMLBODY'];
	fwrite($fh, $stringData);
	$counter++;
	print("Wrote page #: {$row['Z_PK']} to file {$myFile}\n");// . "Director: {$row['Director']} <br />" . "Year: {$row['Year']} <br /><br />");
	}
	fwrite($fh, $bottom);
	fclose($fh);
	mkdir_recursive(sanitize($name, TRUE) . "/images/" . $fpid);
	}
	}

	function mkdir_recursive($pathname)
	{
	is_dir(dirname($pathname)) \|\| mkdir_recursive(dirname($pathname));
	return is_dir($pathname) \|\| @mkdir($pathname);
	}


	function sanitize($string = '', $is_filename = FALSE)
	{
	// Replace all weird characters with dashes
	$string = preg_replace('/[^\w\-'. ($is_filename ? '~_\.' : ''). ']+/u', '-', $string);

	// Only allow one dash separator at a time (and make string lowercase)
	return mb_strtolower(preg_replace('/--+/u', '-', $string), 'UTF-8');
	}

	?>