-
-
Save jamiew/1080846 to your computer and use it in GitHub Desktop.
# Usage: | |
# [sudo] gem install mechanize | |
# ruby tumblr-photo-ripper.rb | |
require 'rubygems' | |
require 'mechanize' | |
# Your Tumblr subdomain, e.g. "jamiew" for "jamiew.tumblr.com" | |
site = "doctorwho" | |
FileUtils.mkdir_p(site) | |
concurrency = 8 | |
num = 50 | |
start = 0 | |
loop do | |
puts "start=#{start}" | |
url = "http://#{site}.tumblr.com/api/read?type=photo&num=#{num}&start=#{start}" | |
page = Mechanize.new.get(url) | |
doc = Nokogiri::XML.parse(page.body) | |
images = (doc/'post photo-url').select{|x| x if x['max-width'].to_i == 1280 } | |
image_urls = images.map {|x| x.content } | |
image_urls.each_slice(concurrency).each do |group| | |
threads = [] | |
group.each do |url| | |
threads << Thread.new { | |
puts "Saving photo #{url}" | |
begin | |
file = Mechanize.new.get(url) | |
filename = File.basename(file.uri.to_s.split('?')[0]) | |
file.save_as("#{site}/#{filename}") | |
rescue Mechanize::ResponseCodeError | |
puts "Error getting file, #{$!}" | |
end | |
} | |
end | |
threads.each{|t| t.join } | |
end | |
puts "#{images.count} images found (num=#{num})" | |
if images.count < num | |
puts "our work here is done" | |
break | |
else | |
start += num | |
end | |
end |
Very nice code, it is exactly what i need!
I will give you just some advises, if u wanna spread this beautiful piece of code.
-
Make a .exe .
-
A very simple Graphical Interface
-
Be happy
Why have constants when you could append the link at the end of the command, and have it strip off everything but the blog name
Nice tool. Helped me save 500+ images. Thanks.
Great script! Thank you so much!
Successfully done for 16290 images (3.8G). Thank you!
Thanks a lot. Downloaded 5800 images.
Hi, I'm really sorry, I don't know anything about programming, but I'm desperately looking for some software or way of downloading images from a tumblr. Is there some way I can use this script with my ignorance? Thanks in advanced.
Version in Perl to grab every single image instead, surpassing XML parsing etc.:
#!/usr/bin/perl
use strict;
use LWP::Simple;
my $site = 'foo';
foreach (my $i = 0; $i < 9999; $i += 50) {
my $url = "http://$site.tumblr.com/api/read?type=photo&num=50&start=$i";
warn "Retrieving $url\n";
my $src = get($url);
foreach my $image( $src =~ m{https://[^\s]+\.jpg}g ) {
my ($filename) = $image =~ m{/([^/]+\.jpg)};
print "found $filename\n";
getstore($image, "pictures/$filename");
}
}
Awesome, exactly what i needed, thanks