Last active
March 6, 2019 06:43
-
-
Save cesarmiquel/0ae439e103e37e22c603eb93a7744c41 to your computer and use it in GitHub Desktop.
A script to backup all your liked images and metadata from Tumblr.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$date = date('Y-m-d'); | |
if ($argv[1]) { | |
$date = $argv[1]; | |
} | |
// Get all posts for this montn posts | |
$biggest_timestamp = strtotime($date) - 60; | |
$selected_month = date('Ym', strtotime($date)); | |
$index = 0; | |
$has_more = true; | |
$posts = []; | |
while($has_more) { | |
$new_posts = get_more_posts($biggest_timestamp); | |
$new_posts = remove_posts_not_in_current_month($new_posts, $selected_month); | |
if (count($new_posts) == 0) { | |
$has_more = false; | |
} | |
$biggest_timestamp = array_reduce($new_posts, function($t, $post) { return $post->liked_timestamp > $t ? $post->liked_timestamp : $t; }, 0); | |
$last_post_month = date('Ym', $biggest_timestamp); | |
log_message(" Last post so far ... " . date('d/m/Y', $biggest_timestamp), 'info'); | |
if ($last_post_month != $selected_month) { | |
$has_more = false; | |
} | |
$posts = array_merge($posts, $new_posts); | |
} | |
log_message("Processing " . count($posts) . " for $selected_month ..."); | |
usort( $posts, function($post1, $post2) { return $post1->liked_timestamp - $post2->liked_timestamp; } ); | |
$posts = array_reverse($posts); | |
foreach($posts as $post) { | |
$post_month = date('Ym', $post->liked_timestamp); | |
if ($post_month != $selected_month) { | |
// finished processing month | |
break; | |
} | |
$index++; | |
$dir = date('Y/m', $post->liked_timestamp); | |
process_post($post, $dir, $index); | |
} | |
function remove_posts_not_in_current_month($posts, $selected_month) { | |
$new_posts = []; | |
foreach($posts as $post) { | |
if (date('Ym', $post->liked_timestamp) == $selected_month) { | |
$new_posts[] = $post; | |
} | |
} | |
return $new_posts; | |
} | |
function process_post($post, $dir, $index) { | |
// Only process photo and text posts | |
if ($post->type != 'photo' && $post->type != 'text') { | |
log_message(" Skipping post $post->post_url of type $post->type"); | |
return; | |
} | |
if (!file_exists($dir)) { | |
mkdir($dir, 0755, true); | |
mkdir($dir . '/.meta', 0755, true); | |
} | |
$index = sprintf("%010d", $post->liked_timestamp); | |
$meta_filename = $dir . '/.meta/' . $index . '.json'; | |
if (file_exists($meta_filename)) { | |
log_message("Skipping $dir/$index.json....", 'info'); | |
return; | |
} | |
log_message("Processing $dir/$index.json...."); | |
file_put_contents($meta_filename, json_encode($post)); | |
$image_urls = []; | |
if ($post->type == 'photo') { | |
foreach($post->photos as $photo) { | |
$image_urls[] = $photo->original_size->url; | |
} | |
// Process caption. Some posts have images in caption | |
$num_images_in_caption = preg_match_all('/< *img[^>]*src *= *["\']?([^"\']*)/i', $post->caption, $matches); | |
for($i = 0; $i < $num_images_in_caption; $i++) { | |
$image_urls[] = $matches[1][$i]; | |
} | |
} | |
else if ($post->type == 'text') { | |
$num_images = preg_match_all('/< *img[^>]*src *= *["\']?([^"\']*)/i', $post->body, $matches); | |
for($i = 0; $i < $num_images; $i++) { | |
$image_urls[] = $matches[1][$i]; | |
} | |
} | |
$photo_index = 1; | |
foreach($image_urls as $photo_url) { | |
$photo_url_path = parse_url($photo_url, PHP_URL_PATH); | |
$extension = pathinfo($photo_url_path, PATHINFO_EXTENSION); | |
file_put_contents($dir . '/' . $index . '-' . $post->blog_name . '-' . $photo_index . '.' . $extension, file_get_contents($photo_url)); | |
$photo_index++; | |
} | |
} | |
function get_more_posts($timestamp) { | |
$api_key = '[API KEY GOES HERE]'; | |
$blog = 'hypro'; | |
$limit = 50; | |
$url = sprintf( | |
'https://api.tumblr.com/v2/blog/%s/likes?api_key=%s&limit=%s&after=%s', | |
$blog, | |
$api_key, | |
$limit, | |
$timestamp); | |
log_message("Getting posts later than " . date('d/m/Y', $timestamp)); | |
$response = json_decode(file_get_contents($url)); | |
if ($response->meta->status != 200) { | |
print_r($respose->meta); | |
return []; | |
} | |
return $response->response->liked_posts; | |
} | |
function log_message($msg, $status = 'info') { | |
if ($status == 'info') { | |
print( "\033[37m ✔ INFO \033[32m$msg\033[37m\n" ); | |
} else if ($status == 'error') { | |
print( "\033[31m ✖ \e[4mERROR\e[0m\033[0;37m $msg\033[37m\n" ); | |
} else { | |
print( "\033[37m\033[1;32m$msg\033[0;37m\n" ); | |
} | |
return; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment