Created
June 20, 2010 23:54
-
-
Save zeen/446234 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* A command line interface for automatic downloads of image feeds | |
* | |
* @package easypopulate | |
* @author Waqas Hussain | |
* @copyright 2010 | |
* @license http://www.gnu.org/licenses/gpl-2.0.html GNU General Public License (v2 only) | |
*/ | |
$json_file = "sample.json"; // json, formatted as [{...}, {...}, ...] | |
$image_root = "./out"; // folder in which the output goes | |
$zip_root = "./zips"; // folder in which zips are cached | |
$tmpfile = "$zip_root/tmp.zip"; // temporary file (currently downloading zip) | |
function debug($message) { echo("debug: $message\n"); } | |
function warn ($message) { echo("warn: $message\n"); } | |
function error($message) { echo("error: $message\n"); } | |
function fatal($message) { die($message); } | |
function load_json($file) { | |
$json = file_get_contents($file) or fatal("failed to load json file"); | |
$json = json_decode($json) or fatal("failed to parse json file");; | |
return $json; | |
} | |
function get_remote_file_time($url) { | |
$result = FALSE; | |
if ($ch = curl_init($url)) { | |
curl_setopt($ch, CURLOPT_NOBODY, 1); // HEAD request | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // don't print to stdout | |
curl_setopt($ch, CURLOPT_FAILONERROR, 1); | |
curl_setopt($ch, CURLOPT_FILETIME, 1); | |
$_ = curl_exec($ch); | |
if(!curl_errno($ch)) { | |
$result = curl_getinfo($ch, CURLINFO_FILETIME); | |
} | |
curl_close($ch); | |
} else error("failed to intialize curl"); | |
return $result; | |
} | |
function get_useful_zip_entries($zip, $destination, $filter_prefix) { | |
$filter_prefix = $filter_prefix or ""; | |
$files = array(); | |
$sets = array(); | |
for($i = 0; $i < $zip->numFiles; $i++) { | |
$entry = $zip->getNameIndex($i); | |
if (strpos($entry, $filter_prefix) == 0) { | |
// FIXME: Validate path validity? | |
if (preg_match('/^(.*)_(\d+).jpg$/', $entry, $matches) > 0) { | |
$setname = $matches[1]; $size = intval($matches[2]); | |
if (!(isset($sets[$setname]) && $sets[$setname][0] > $size)) { | |
$sets[$setname] = array($size, $entry); | |
} | |
} else { | |
$path = $destination."/".substr($entry, strlen($filter_prefix)); | |
$files[$entry] = $path; | |
} | |
} | |
} | |
foreach($sets as $k => $v) { | |
$entry = $v[1]; | |
$path = $destination."/".substr($entry, strlen($filter_prefix)); | |
$files[$entry] = $path; | |
} | |
return $files; | |
} | |
function is_file_younger_than_url($url, $filename) { | |
if (($last_modified = @filemtime($filename)) !== FALSE) { // file exists | |
$remote_time = get_remote_file_time($url); | |
if ($remote_time == -1) { | |
debug("Server didn't provide last-modified time"); | |
} elseif ($remote_time !== FALSE && $remote_time <= $last_modified) { | |
debug("No new file on server, skipping"); | |
return TRUE; // skip extraction | |
} | |
} | |
return FALSE; | |
} | |
function download_file($url, $filename) { | |
debug("Downloading $filename ($url)"); | |
$result = FALSE; | |
if ($ch = curl_init($url)) { | |
if ($fp = fopen($filename, "w")) { | |
curl_setopt($ch, CURLOPT_FILE, $fp); | |
curl_setopt($ch, CURLOPT_HEADER, 0); | |
curl_setopt($ch, CURLOPT_FAILONERROR, 1); | |
curl_setopt($ch, CURLOPT_FILETIME, 1); | |
$result = curl_exec($ch); | |
fclose($fp); | |
if ($result) { | |
if (($remote_time = curl_getinfo($ch, CURLINFO_FILETIME)) !== -1) { | |
touch($filename, $remote_time); | |
} | |
} else error("download failed"); | |
} else error("failed to open $filename for writing"); | |
curl_close($ch); | |
} else error("failed to intialize curl"); | |
return $result; | |
} | |
function extract_file($filename, $destination, $filter_prefix) { | |
debug("Extracting $filename to $destination"); | |
$result = FALSE; | |
$zip = new ZipArchive; | |
if ($zip->open($filename) === TRUE) { | |
$files = get_useful_zip_entries($zip, $destination, $filter_prefix); | |
foreach ($files as $inner_path => $outer_path) { | |
if ($content = $zip->getFromName($inner_path)) { | |
$directory = pathinfo($outer_path, PATHINFO_DIRNAME); | |
if (!file_exists($directory)) mkdir($directory, 0777, TRUE); | |
file_put_contents($outer_path, $content); | |
} | |
} | |
$zip->close(); | |
return TRUE; | |
} else error("failed to open zip for reading"); | |
return $result; | |
} | |
function unextract_file($filename, $destination, $filter_prefix) { | |
debug("Unextracting $filename to $destination"); | |
$result = FALSE; | |
$zip = new ZipArchive; | |
if ($zip->open($filename) === TRUE) { | |
$files = get_useful_zip_entries($zip, $destination, $filter_prefix); | |
foreach ($files as $inner_path => $outer_path) { | |
if (file_exists($outer_path)) | |
unlink($outer_path); | |
} | |
$zip->close(); | |
return TRUE; | |
} else error("failed to open zip for reading"); | |
return $result; | |
} | |
function download($provider) { | |
global $image_root, $zip_root, $tmpfile; | |
$name = $provider->name; | |
if (!property_exists($provider, "images_url")) { | |
debug("skipping: $name, no images_url"); | |
return; | |
} | |
$images_url = $provider->images_url; | |
$images_file_path = $provider->images_file_path; | |
$image_path_prefix = $provider->image_path_prefix; | |
// FIXME escape/validate paths/filenames? | |
$zip_file = "$zip_root/$name.zip"; | |
$image_folder = "$image_root/$image_path_prefix"; | |
debug("processing: $name"); | |
if (!is_file_younger_than_url($images_url, $zip_file)) { | |
if (download_file($images_url, $tmpfile)) { | |
debug("downloaded file: $zip_file"); | |
if (file_exists($zip_file)) { | |
unextract_file($zip_file, $image_folder, $images_file_path); | |
} | |
rename($tmpfile, $zip_file); // todo: last modified | |
if (extract_file($zip_file, $image_folder, $images_file_path)) { | |
debug("extracted file: $zip_file"); | |
} | |
} | |
} | |
} | |
$json = load_json($json_file); | |
foreach($json as $i => $provider) { | |
download($provider); | |
debug(""); | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment