|
#!/usr/bin/ruby |
|
|
|
require "rubygems" |
|
require "bundler/setup" |
|
require "flickraw" |
|
|
|
$out_dir = File.expand_path "data" |
|
FileUtils.mkdir_p $out_dir |
|
|
|
$short_sleep = 0.5 |
|
$long_sleep = 1 |
|
|
|
# Largely borrowed from the flickraw README example: |
|
# https://github.com/hanklords/flickraw |
|
def authorize |
|
if ENV["FLICKR_TOKEN"] && ENV["FLICKR_SECRET"] |
|
flickr.access_token = ENV["FLICKR_TOKEN"] |
|
flickr.access_secret = ENV["FLICKR_SECRET"] |
|
else |
|
token = flickr.get_request_token |
|
auth_url = flickr.get_authorize_url token["oauth_token"], perms: "delete" |
|
|
|
puts "Open this url in your browser to complete the authication process:" |
|
puts auth_url |
|
puts "Copy here the number given when you complete the process." |
|
verify = gets.strip |
|
|
|
begin |
|
flickr.get_access_token token["oauth_token"], |
|
token["oauth_token_secret"], |
|
verify |
|
puts "Save yourself some trouble next time--export these in your shell:" |
|
puts " FLICKR_TOKEN=#{flickr.access_token}" |
|
puts " FLICKR_SECRET=#{flickr.access_secret}" |
|
rescue FlickRaw::FailedResponse => e |
|
puts "Authentication failed: #{e.msg}" |
|
end |
|
end |
|
end |
|
|
|
if ENV["FLICKR_API_KEY"] && ENV["FLICKR_SHARED_SECRET"] |
|
FlickRaw.api_key = ENV["FLICKR_API_KEY"] |
|
FlickRaw.shared_secret = ENV["FLICKR_SHARED_SECRET"] |
|
|
|
authorize |
|
|
|
login = flickr.test.login |
|
$user_id = login.id |
|
puts "*** Logged in as #{login.username} (#{$user_id})" |
|
else |
|
puts "*** Be sure to export FLICKR_API_KEY and FLICKR_SHARED_SECRET in your shell." |
|
puts " You can get them for your app at: http://www.flickr.com/services/apps/" |
|
end |
|
|
|
$licenses = {} |
|
def get_licenses |
|
flickr.photos.licenses.getInfo["license"].each do |license| |
|
# Convert the `Array` of `Hash`es into an `"id"`-keyed `Hash`. I'm sure |
|
# there's an inscrutable Ruby one-liner, but I don't know it. |
|
$licenses[license["id"]] = { |
|
"name" => license["name"], |
|
"url" => license["url"] |
|
} |
|
end |
|
end |
|
get_licenses |
|
|
|
def dump_to_file filename, data |
|
file = File.new "#{$out_dir}/#{filename}.json", "w" |
|
file.puts data.to_json |
|
rescue Exception => e |
|
puts "!!! Failed to save data (#{$out_dir}/#{filename}.json)" |
|
puts e.message |
|
ensure |
|
file.close |
|
end |
|
|
|
def get_photo_ids |
|
total_photos = flickr.people.getInfo(user_id: $user_id)["photos"]["count"] |
|
per_page = 500 # max: 500 |
|
pages = (total_photos / per_page.to_f).ceil |
|
|
|
puts "*** #{total_photos} photos" |
|
|
|
ids = [] |
|
pages.downto(1).each do |page| |
|
photos = flickr.people.getPhotos({ |
|
user_id: $user_id, |
|
per_page: per_page, |
|
page: page |
|
}) |
|
photos.each { |photo| ids << photo["id"] } |
|
|
|
sleep $long_sleep |
|
end |
|
|
|
ids |
|
end |
|
|
|
def download_photos |
|
photo_ids = get_photo_ids |
|
FileUtils.mkdir_p "#{$out_dir}/originals" |
|
|
|
puts |
|
puts "*** Downloading #{photo_ids.length} originals" |
|
|
|
total = photo_ids.length |
|
|
|
photo_ids.each_with_index do |id, idx| |
|
data = flickr.photos.getInfo(photo_id: id) |
|
url = FlickRaw.url_o data |
|
|
|
puts " * #{id} [#{idx + 1}/#{total}" |
|
|
|
# http://blog.sacaluta.com/2011/08/flickr-interestingness-downloader-in.html |
|
open("#{$out_dir}/originals/#{id}.jpg", "wb") do |file| |
|
file.write Net::HTTP.get_response(URI.parse(url)).body |
|
end |
|
|
|
sleep $long_sleep |
|
end |
|
end |
|
|
|
# * photo metadata |
|
# * comments |
|
# * favorites |
|
# * EXIF data, if present and publicly available |
|
# * license |
|
def get_photo_data |
|
photo_ids = get_photo_ids |
|
total = photo_ids.length |
|
out = [] |
|
|
|
photo_ids.each_with_index do |id, idx| |
|
photo = flickr.photos.getInfo(photo_id: id).to_hash |
|
opts = { photo_id: id, secret: photo["secret"] } |
|
|
|
# I wish flickraw just nested hashes. |
|
data = {} |
|
photo.each do |k,v| |
|
data[k] = case v |
|
when FlickRaw::ResponseList then v.original_hash.values.last |
|
when FlickRaw::Response then v.to_hash |
|
else v |
|
end |
|
end |
|
|
|
favorites = flickr.photos.getFavorites(opts).original_hash["person"] || [] |
|
|
|
# Save a lookup if possible. Wish a count were included for favorites. |
|
if photo["comments"] != 0 |
|
comments = flickr.photos.comments.getList(opts).original_hash |
|
comments = comments["comment"] || [] |
|
else |
|
comments = [] |
|
end |
|
|
|
# begin/rescue because EXIF access can be blocked, making FlickRaw bomb out. |
|
# http://www.flickr.com/account/privacy/ |
|
begin |
|
exif = flickr.photos.getExif(opts).original_hash["exif"] || [] |
|
rescue FlickRaw::FailedResponse |
|
exif = [] |
|
end |
|
|
|
puts " * #{id} (#{comments.length} comments, #{favorites.length} favorites) [#{idx + 1}/#{total}]" |
|
|
|
data.merge!("meta" => { |
|
"comments" => comments, |
|
"exif" => exif, |
|
"favorites" => favorites, |
|
"license" => $licenses[data["license"]] |
|
}) |
|
|
|
out << data |
|
|
|
sleep $long_sleep |
|
end |
|
|
|
dump_to_file "photos", out |
|
end |
|
|
|
# * collection metadata |
|
# * metadata for contained photosets |
|
# |
|
# Note: ignores nested collections, since I don't have any. |
|
def get_collections |
|
collections = flickr.collections.getTree |
|
total = collections.length |
|
puts "*** #{total} collections" |
|
|
|
out = [] |
|
|
|
collections.each_with_index do |collection, idx| |
|
item = collection.original_hash |
|
|
|
puts " * #{item["title"]} [#{idx + 1}/#{total}]" |
|
|
|
# Can't directly override |
|
item.delete "set" |
|
item["set"] = collection["set"].map do |set| |
|
sleep $short_sleep |
|
|
|
flickr.photosets.getInfo(photoset_id: set["id"]).to_hash |
|
end |
|
|
|
out << item |
|
|
|
sleep $long_sleep |
|
end |
|
|
|
dump_to_file "collections", out |
|
end |
|
|
|
# * photoset metadata |
|
# * list of photos in each photoset |
|
def get_photosets |
|
total = flickr.photosets.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i |
|
per_page = 500 # max: 500 |
|
pages = (total / per_page.to_f).ceil |
|
|
|
puts "*** #{total} photosets" |
|
|
|
out = [] |
|
|
|
pages.downto(1).each_with_index do |page, p_idx| |
|
photosets = flickr.photosets.getList({ |
|
user_id: $user_id, |
|
per_page: per_page, |
|
page: page |
|
}).original_hash["photoset"] |
|
|
|
photosets.each_with_index do |item, i_idx| |
|
n = (p_idx * per_page) + i_idx + 1 |
|
|
|
photo_count = item["photos"].to_i |
|
item["photos"] = [] |
|
|
|
puts " * #{item["title"]} (#{photo_count} photos) [#{n}/#{total}]" |
|
|
|
# Overwrites the existing `"photos"` key, which gives the number of |
|
# photos in the set. |
|
photos_per_page = 500 # max: 500 |
|
photo_pages = (photo_count / photos_per_page.to_f).ceil |
|
photo_pages.downto(1).each_with_index do |photo_page, p_idx| |
|
item["photos"] << flickr.photosets.getPhotos({ |
|
photoset_id: item["id"], |
|
page: photo_page, |
|
per_page: photos_per_page |
|
}).to_hash["photo"].map { |photo| photo.to_hash } |
|
|
|
sleep $short_sleep |
|
end |
|
item["photos"].flatten! |
|
out << item |
|
|
|
sleep $short_sleep |
|
end |
|
|
|
sleep $long_sleep |
|
end |
|
|
|
dump_to_file "photosets", out |
|
end |
|
|
|
# * contact metadata |
|
# * user info for each contact |
|
def get_contacts |
|
total = flickr.contacts.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i |
|
per_page = 1000 # max: 1000 |
|
pages = (total / per_page.to_f).ceil |
|
|
|
puts "*** #{total} contacts" |
|
|
|
out = [] |
|
|
|
pages.downto(1).each_with_index do |page, p_idx| |
|
contacts = flickr.contacts.getList({ |
|
user_id: $user_id, |
|
per_page: per_page, |
|
page: page, |
|
sort: "time" |
|
}).original_hash["contact"] |
|
|
|
contacts.each_with_index do |item, i_idx| |
|
n = (p_idx * per_page) + i_idx + 1 |
|
|
|
puts " * #{item["id"]} [#{n}/#{total}]" |
|
item["person"] = flickr.people.getInfo(user_id: item["nsid"]).original_hash |
|
|
|
out << item |
|
|
|
sleep $short_sleep |
|
end |
|
|
|
sleep $long_sleep |
|
end |
|
|
|
dump_to_file "contacts", out |
|
end |
|
|
|
# * favorite metadata |
|
# * photo metadata for each favorite |
|
def get_favorites |
|
total = flickr.favorites.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i |
|
per_page = 500 # max: 500 |
|
pages = (total / per_page.to_f).ceil |
|
|
|
puts "*** #{total} favorites" |
|
|
|
out = [] |
|
|
|
pages.downto(1).each_with_index do |page, p_idx| |
|
favorites = flickr.favorites.getList({ |
|
user_id: $user_id, |
|
per_page: per_page, |
|
page: page |
|
}).original_hash["photo"] |
|
|
|
favorites.each_with_index do |item, i_idx| |
|
n = (p_idx * per_page) + i_idx + 1 |
|
|
|
puts " * #{item["id"]} [#{n}/#{total}]" |
|
|
|
item["photo"] = flickr.photos.getInfo(photo_id: item["id"]).original_hash |
|
out << item |
|
|
|
sleep $short_sleep |
|
end |
|
|
|
sleep $long_sleep |
|
end |
|
|
|
dump_to_file "favorites", out |
|
end |