Skip to content

Instantly share code, notes, and snippets.

@cesarmiquel
Last active March 6, 2019 06:43
Show Gist options
  • Save cesarmiquel/0ae439e103e37e22c603eb93a7744c41 to your computer and use it in GitHub Desktop.
Save cesarmiquel/0ae439e103e37e22c603eb93a7744c41 to your computer and use it in GitHub Desktop.
A script to backup all your liked images and metadata from Tumblr.
<?php
$date = date('Y-m-d');
if ($argv[1]) {
$date = $argv[1];
}
// Get all posts for this montn posts
$biggest_timestamp = strtotime($date) - 60;
$selected_month = date('Ym', strtotime($date));
$index = 0;
$has_more = true;
$posts = [];
while($has_more) {
$new_posts = get_more_posts($biggest_timestamp);
$new_posts = remove_posts_not_in_current_month($new_posts, $selected_month);
if (count($new_posts) == 0) {
$has_more = false;
}
$biggest_timestamp = array_reduce($new_posts, function($t, $post) { return $post->liked_timestamp > $t ? $post->liked_timestamp : $t; }, 0);
$last_post_month = date('Ym', $biggest_timestamp);
log_message(" Last post so far ... " . date('d/m/Y', $biggest_timestamp), 'info');
if ($last_post_month != $selected_month) {
$has_more = false;
}
$posts = array_merge($posts, $new_posts);
}
log_message("Processing " . count($posts) . " for $selected_month ...");
usort( $posts, function($post1, $post2) { return $post1->liked_timestamp - $post2->liked_timestamp; } );
$posts = array_reverse($posts);
foreach($posts as $post) {
$post_month = date('Ym', $post->liked_timestamp);
if ($post_month != $selected_month) {
// finished processing month
break;
}
$index++;
$dir = date('Y/m', $post->liked_timestamp);
process_post($post, $dir, $index);
}
function remove_posts_not_in_current_month($posts, $selected_month) {
$new_posts = [];
foreach($posts as $post) {
if (date('Ym', $post->liked_timestamp) == $selected_month) {
$new_posts[] = $post;
}
}
return $new_posts;
}
function process_post($post, $dir, $index) {
// Only process photo and text posts
if ($post->type != 'photo' && $post->type != 'text') {
log_message(" Skipping post $post->post_url of type $post->type");
return;
}
if (!file_exists($dir)) {
mkdir($dir, 0755, true);
mkdir($dir . '/.meta', 0755, true);
}
$index = sprintf("%010d", $post->liked_timestamp);
$meta_filename = $dir . '/.meta/' . $index . '.json';
if (file_exists($meta_filename)) {
log_message("Skipping $dir/$index.json....", 'info');
return;
}
log_message("Processing $dir/$index.json....");
file_put_contents($meta_filename, json_encode($post));
$image_urls = [];
if ($post->type == 'photo') {
foreach($post->photos as $photo) {
$image_urls[] = $photo->original_size->url;
}
// Process caption. Some posts have images in caption
$num_images_in_caption = preg_match_all('/< *img[^>]*src *= *["\']?([^"\']*)/i', $post->caption, $matches);
for($i = 0; $i < $num_images_in_caption; $i++) {
$image_urls[] = $matches[1][$i];
}
}
else if ($post->type == 'text') {
$num_images = preg_match_all('/< *img[^>]*src *= *["\']?([^"\']*)/i', $post->body, $matches);
for($i = 0; $i < $num_images; $i++) {
$image_urls[] = $matches[1][$i];
}
}
$photo_index = 1;
foreach($image_urls as $photo_url) {
$photo_url_path = parse_url($photo_url, PHP_URL_PATH);
$extension = pathinfo($photo_url_path, PATHINFO_EXTENSION);
file_put_contents($dir . '/' . $index . '-' . $post->blog_name . '-' . $photo_index . '.' . $extension, file_get_contents($photo_url));
$photo_index++;
}
}
function get_more_posts($timestamp) {
$api_key = '[API KEY GOES HERE]';
$blog = 'hypro';
$limit = 50;
$url = sprintf(
'https://api.tumblr.com/v2/blog/%s/likes?api_key=%s&limit=%s&after=%s',
$blog,
$api_key,
$limit,
$timestamp);
log_message("Getting posts later than " . date('d/m/Y', $timestamp));
$response = json_decode(file_get_contents($url));
if ($response->meta->status != 200) {
print_r($respose->meta);
return [];
}
return $response->response->liked_posts;
}
function log_message($msg, $status = 'info') {
if ($status == 'info') {
print( "\033[37m ✔ INFO \033[32m$msg\033[37m\n" );
} else if ($status == 'error') {
print( "\033[31m ✖ \e[4mERROR\e[0m\033[0;37m $msg\033[37m\n" );
} else {
print( "\033[37m\033[1;32m$msg\033[0;37m\n" );
}
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment