stilist · December 15, 2015 06:49
diff --git a/README.md b/README.md
diff --git a/flickr_backup.rb b/flickr_backup.rb
 #!/usr/bin/ruby

 require "rubygems"
 require "bundler/setup"
 require "flickraw"

 $out_dir = File.expand_path "data"
 FileUtils.mkdir_p $out_dir

 $short_sleep = 0.5
 $long_sleep = 1

 # Largely borrowed from the flickraw README example:
 # https://github.com/hanklords/flickraw
 def authorize
  if ENV["FLICKR_TOKEN"] && ENV["FLICKR_SECRET"]
 		flickr.access_token = ENV["FLICKR_TOKEN"]
 		flickr.access_secret = ENV["FLICKR_SECRET"]
 	else
 		token = flickr.get_request_token
 		auth_url = flickr.get_authorize_url token["oauth_token"], perms: "delete"

 		puts "Open this url in your browser to complete the authication process:"
 		puts auth_url
 		puts "Copy here the number given when you complete the process."
 		verify = gets.strip

 		begin
 			flickr.get_access_token token["oauth_token"],
 					token["oauth_token_secret"],
 					verify
 			puts "Save yourself some trouble next time--export these in your shell:"
 			puts "   FLICKR_TOKEN=#{flickr.access_token}"
 			puts "   FLICKR_SECRET=#{flickr.access_secret}"
 		rescue FlickRaw::FailedResponse => e
 			puts "Authentication failed: #{e.msg}"
 		end
 	end
 end

 if ENV["FLICKR_API_KEY"] && ENV["FLICKR_SHARED_SECRET"]
 	FlickRaw.api_key = ENV["FLICKR_API_KEY"]
 	FlickRaw.shared_secret = ENV["FLICKR_SHARED_SECRET"]

 	authorize

 	login = flickr.test.login
 	$user_id = login.id
 	puts "*** Logged in as #{login.username} (#{$user_id})"
 else
 	puts "*** Be sure to export FLICKR_API_KEY and FLICKR_SHARED_SECRET in your shell."
 	puts "    You can get them for your app at: http://www.flickr.com/services/apps/"
 end

 $licenses = {}
 def get_licenses
 	flickr.photos.licenses.getInfo["license"].each do |license|
 		# Convert the `Array` of `Hash`es into an `"id"`-keyed `Hash`. I'm sure
 		# there's an inscrutable Ruby one-liner, but I don't know it.
 		$licenses[license["id"]] = {
 			"name" => license["name"],
 			"url" => license["url"]
 		}
 	end
 end
 get_licenses

 def dump_to_file filename, data
 	file = File.new "#{$out_dir}/#{filename}.json", "w"
 	file.puts data.to_json
 rescue Exception => e
 	puts "!!! Failed to save data (#{$out_dir}/#{filename}.json)"
 	puts e.message
 ensure
 	file.close
 end

 def get_photo_ids
 	total_photos = flickr.people.getInfo(user_id: $user_id)["photos"]["count"]
 	per_page = 500 # max: 500
 	pages = (total_photos / per_page.to_f).ceil

 	puts "*** #{total_photos} photos"

 	ids = []
 	pages.downto(1).each do |page|
 		photos = flickr.people.getPhotos({
 			user_id: $user_id,
 			per_page: per_page,
 			page: page
 		})
 		photos.each { |photo| ids << photo["id"] }

 		sleep $long_sleep
 	end

 	ids
 end

 def download_photos
 	photo_ids = get_photo_ids
 	FileUtils.mkdir_p "#{$out_dir}/originals"

 	puts
 	puts "*** Downloading #{photo_ids.length} originals"

 	total = photo_ids.length

 	photo_ids.each_with_index do |id, idx|
 		data = flickr.photos.getInfo(photo_id: id)
 		url = FlickRaw.url_o data

 		puts "  * #{id} [#{idx + 1}/#{total}"

 		# http://blog.sacaluta.com/2011/08/flickr-interestingness-downloader-in.html
 		open("#{$out_dir}/originals/#{id}.jpg", "wb") do |file|
 			file.write Net::HTTP.get_response(URI.parse(url)).body
 		end

 		sleep $long_sleep
 	end
 end

 # * photo metadata
 # * comments
 # * favorites
 # * EXIF data, if present and publicly available
 # * license
 def get_photo_data
 	photo_ids = get_photo_ids
 	total = photo_ids.length
 	out = []

 	photo_ids.each_with_index do |id, idx|
 		photo = flickr.photos.getInfo(photo_id: id).to_hash
 		opts = { photo_id: id, secret: photo["secret"] }

 		# I wish flickraw just nested hashes.
 		data = {}
 		photo.each do |k,v|
 			data[k] = case v
 				when FlickRaw::ResponseList then v.original_hash.values.last
 				when FlickRaw::Response then v.to_hash
 				else v
 			end
 		end

 		favorites = flickr.photos.getFavorites(opts).original_hash["person"] || []

 		# Save a lookup if possible. Wish a count were included for favorites.
 		if photo["comments"] != 0
 			comments = flickr.photos.comments.getList(opts).original_hash
 			comments = comments["comment"] || []
 		else
 			comments = []
 		end

 		# begin/rescue because EXIF access can be blocked, making FlickRaw bomb out.
 		# http://www.flickr.com/account/privacy/
 		begin
 			exif = flickr.photos.getExif(opts).original_hash["exif"] || []
 		rescue FlickRaw::FailedResponse
 			exif = []
 		end

 		puts "  * #{id} (#{comments.length} comments, #{favorites.length} favorites) [#{idx + 1}/#{total}]"

 		data.merge!("meta" => {
 			"comments" => comments,
 			"exif" => exif,
 			"favorites" => favorites,
 			"license" => $licenses[data["license"]]
 		})

 		out << data

 		sleep $long_sleep
 	end

 	dump_to_file "photos", out
 end

 # * collection metadata
 # * metadata for contained photosets
 #
 # Note: ignores nested collections, since I don't have any.
 def get_collections
 	collections = flickr.collections.getTree
 	total = collections.length
 	puts "*** #{total} collections"

 	out = []

 	collections.each_with_index do |collection, idx|
 		item = collection.original_hash

 		puts "  * #{item["title"]} [#{idx + 1}/#{total}]"

 		# Can't directly override
 		item.delete "set"
 		item["set"] = collection["set"].map do |set|
 			sleep $short_sleep

 			flickr.photosets.getInfo(photoset_id: set["id"]).to_hash
 		end

 		out << item

 		sleep $long_sleep
 	end

 	dump_to_file "collections", out
 end

 # * photoset metadata
 # * list of photos in each photoset
 def get_photosets
 	total = flickr.photosets.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i
 	per_page = 500 # max: 500
 	pages = (total / per_page.to_f).ceil

 	puts "*** #{total} photosets"

 	out = []

 	pages.downto(1).each_with_index do |page, p_idx|
 		photosets = flickr.photosets.getList({
 			user_id: $user_id,
 			per_page: per_page,
 			page: page
 		}).original_hash["photoset"]

 		photosets.each_with_index do |item, i_idx|
 			n = (p_idx * per_page) + i_idx + 1

 			photo_count = item["photos"].to_i
 			item["photos"] = []

 			puts "  * #{item["title"]} (#{photo_count} photos) [#{n}/#{total}]"

 			# Overwrites the existing `"photos"` key, which gives the number of
 			# photos in the set.
 			photos_per_page = 500 # max: 500
 			photo_pages = (photo_count / photos_per_page.to_f).ceil
 			photo_pages.downto(1).each_with_index do |photo_page, p_idx|
 				item["photos"] << flickr.photosets.getPhotos({
 					photoset_id: item["id"],
 					page: photo_page,
 					per_page: photos_per_page
 				}).to_hash["photo"].map { |photo| photo.to_hash }

 				sleep $short_sleep
 			end
 			item["photos"].flatten!
 			out << item

 			sleep $short_sleep
 		end

 		sleep $long_sleep
 	end

 	dump_to_file "photosets", out
 end

 # * contact metadata
 # * user info for each contact
 def get_contacts
 	total = flickr.contacts.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i
 	per_page = 1000 # max: 1000
 	pages = (total / per_page.to_f).ceil

 	puts "*** #{total} contacts"

 	out = []

 	pages.downto(1).each_with_index do |page, p_idx|
 		contacts = flickr.contacts.getList({
 			user_id: $user_id,
 			per_page: per_page,
 			page: page,
 			sort: "time"
 		}).original_hash["contact"]

 		contacts.each_with_index do |item, i_idx|
 			n = (p_idx * per_page) + i_idx + 1

 			puts "  * #{item["id"]} [#{n}/#{total}]"
 			item["person"] = flickr.people.getInfo(user_id: item["nsid"]).original_hash

 			out << item

 			sleep $short_sleep
 		end

 		sleep $long_sleep
 	end

 	dump_to_file "contacts", out
 end

 # * favorite metadata
 # * photo metadata for each favorite
 def get_favorites
 	total = flickr.favorites.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i
 	per_page = 500 # max: 500
 	pages = (total / per_page.to_f).ceil

 	puts "*** #{total} favorites"

 	out = []

 	pages.downto(1).each_with_index do |page, p_idx|
 		favorites = flickr.favorites.getList({
 			user_id: $user_id,
 			per_page: per_page,
 			page: page
 		}).original_hash["photo"]

 		favorites.each_with_index do |item, i_idx|
 			n = (p_idx * per_page) + i_idx + 1

 			puts "  * #{item["id"]} [#{n}/#{total}]"

 			item["photo"] = flickr.photos.getInfo(photo_id: item["id"]).original_hash
 			out << item

 			sleep $short_sleep
 		end

 		sleep $long_sleep
 	end

 	dump_to_file "favorites", out
 end
diff --git a/Gemfile b/Gemfile
 source "https://rubygems.org"

 ruby "1.9.3"

 gem "flickraw"
diff --git a/LICENSE b/LICENSE
 Copyright (c) 2013 Jordan Cole

 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	#!/usr/bin/ruby

	require "rubygems"
	require "bundler/setup"
	require "flickraw"

	$out_dir = File.expand_path "data"
	FileUtils.mkdir_p $out_dir

	$short_sleep = 0.5
	$long_sleep = 1

	# Largely borrowed from the flickraw README example:
	# https://github.com/hanklords/flickraw
	def authorize
	if ENV["FLICKR_TOKEN"] && ENV["FLICKR_SECRET"]
	flickr.access_token = ENV["FLICKR_TOKEN"]
	flickr.access_secret = ENV["FLICKR_SECRET"]
	else
	token = flickr.get_request_token
	auth_url = flickr.get_authorize_url token["oauth_token"], perms: "delete"

	puts "Open this url in your browser to complete the authication process:"
	puts auth_url
	puts "Copy here the number given when you complete the process."
	verify = gets.strip

	begin
	flickr.get_access_token token["oauth_token"],
	token["oauth_token_secret"],
	verify
	puts "Save yourself some trouble next time--export these in your shell:"
	puts " FLICKR_TOKEN=#{flickr.access_token}"
	puts " FLICKR_SECRET=#{flickr.access_secret}"
	rescue FlickRaw::FailedResponse => e
	puts "Authentication failed: #{e.msg}"
	end
	end
	end

	if ENV["FLICKR_API_KEY"] && ENV["FLICKR_SHARED_SECRET"]
	FlickRaw.api_key = ENV["FLICKR_API_KEY"]
	FlickRaw.shared_secret = ENV["FLICKR_SHARED_SECRET"]

	authorize

	login = flickr.test.login
	$user_id = login.id
	puts "*** Logged in as #{login.username} (#{$user_id})"
	else
	puts "*** Be sure to export FLICKR_API_KEY and FLICKR_SHARED_SECRET in your shell."
	puts " You can get them for your app at: http://www.flickr.com/services/apps/"
	end

	$licenses = {}
	def get_licenses
	flickr.photos.licenses.getInfo["license"].each do \|license\|
	# Convert the `Array` of `Hash`es into an `"id"`-keyed `Hash`. I'm sure
	# there's an inscrutable Ruby one-liner, but I don't know it.
	$licenses[license["id"]] = {
	"name" => license["name"],
	"url" => license["url"]
	}
	end
	end
	get_licenses

	def dump_to_file filename, data
	file = File.new "#{$out_dir}/#{filename}.json", "w"
	file.puts data.to_json
	rescue Exception => e
	puts "!!! Failed to save data (#{$out_dir}/#{filename}.json)"
	puts e.message
	ensure
	file.close
	end

	def get_photo_ids
	total_photos = flickr.people.getInfo(user_id: $user_id)["photos"]["count"]
	per_page = 500 # max: 500
	pages = (total_photos / per_page.to_f).ceil

	puts "*** #{total_photos} photos"

	ids = []
	pages.downto(1).each do \|page\|
	photos = flickr.people.getPhotos({
	user_id: $user_id,
	per_page: per_page,
	page: page
	})
	photos.each { \|photo\| ids << photo["id"] }

	sleep $long_sleep
	end

	ids
	end

	def download_photos
	photo_ids = get_photo_ids
	FileUtils.mkdir_p "#{$out_dir}/originals"

	puts
	puts "*** Downloading #{photo_ids.length} originals"

	total = photo_ids.length

	photo_ids.each_with_index do \|id, idx\|
	data = flickr.photos.getInfo(photo_id: id)
	url = FlickRaw.url_o data

	puts " * #{id} [#{idx + 1}/#{total}"

	# http://blog.sacaluta.com/2011/08/flickr-interestingness-downloader-in.html
	open("#{$out_dir}/originals/#{id}.jpg", "wb") do \|file\|
	file.write Net::HTTP.get_response(URI.parse(url)).body
	end

	sleep $long_sleep
	end
	end

	# * photo metadata
	# * comments
	# * favorites
	# * EXIF data, if present and publicly available
	# * license
	def get_photo_data
	photo_ids = get_photo_ids
	total = photo_ids.length
	out = []

	photo_ids.each_with_index do \|id, idx\|
	photo = flickr.photos.getInfo(photo_id: id).to_hash
	opts = { photo_id: id, secret: photo["secret"] }

	# I wish flickraw just nested hashes.
	data = {}
	photo.each do \|k,v\|
	data[k] = case v
	when FlickRaw::ResponseList then v.original_hash.values.last
	when FlickRaw::Response then v.to_hash
	else v
	end
	end

	favorites = flickr.photos.getFavorites(opts).original_hash["person"] \|\| []

	# Save a lookup if possible. Wish a count were included for favorites.
	if photo["comments"] != 0
	comments = flickr.photos.comments.getList(opts).original_hash
	comments = comments["comment"] \|\| []
	else
	comments = []
	end

	# begin/rescue because EXIF access can be blocked, making FlickRaw bomb out.
	# http://www.flickr.com/account/privacy/
	begin
	exif = flickr.photos.getExif(opts).original_hash["exif"] \|\| []
	rescue FlickRaw::FailedResponse
	exif = []
	end

	puts " * #{id} (#{comments.length} comments, #{favorites.length} favorites) [#{idx + 1}/#{total}]"

	data.merge!("meta" => {
	"comments" => comments,
	"exif" => exif,
	"favorites" => favorites,
	"license" => $licenses[data["license"]]
	})

	out << data

	sleep $long_sleep
	end

	dump_to_file "photos", out
	end

	# * collection metadata
	# * metadata for contained photosets
	#
	# Note: ignores nested collections, since I don't have any.
	def get_collections
	collections = flickr.collections.getTree
	total = collections.length
	puts "*** #{total} collections"

	out = []

	collections.each_with_index do \|collection, idx\|
	item = collection.original_hash

	puts " * #{item["title"]} [#{idx + 1}/#{total}]"

	# Can't directly override
	item.delete "set"
	item["set"] = collection["set"].map do \|set\|
	sleep $short_sleep

	flickr.photosets.getInfo(photoset_id: set["id"]).to_hash
	end

	out << item

	sleep $long_sleep
	end

	dump_to_file "collections", out
	end

	# * photoset metadata
	# * list of photos in each photoset
	def get_photosets
	total = flickr.photosets.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i
	per_page = 500 # max: 500
	pages = (total / per_page.to_f).ceil

	puts "*** #{total} photosets"

	out = []

	pages.downto(1).each_with_index do \|page, p_idx\|
	photosets = flickr.photosets.getList({
	user_id: $user_id,
	per_page: per_page,
	page: page
	}).original_hash["photoset"]

	photosets.each_with_index do \|item, i_idx\|
	n = (p_idx * per_page) + i_idx + 1

	photo_count = item["photos"].to_i
	item["photos"] = []

	puts " * #{item["title"]} (#{photo_count} photos) [#{n}/#{total}]"

	# Overwrites the existing `"photos"` key, which gives the number of
	# photos in the set.
	photos_per_page = 500 # max: 500
	photo_pages = (photo_count / photos_per_page.to_f).ceil
	photo_pages.downto(1).each_with_index do \|photo_page, p_idx\|
	item["photos"] << flickr.photosets.getPhotos({
	photoset_id: item["id"],
	page: photo_page,
	per_page: photos_per_page
	}).to_hash["photo"].map { \|photo\| photo.to_hash }

	sleep $short_sleep
	end
	item["photos"].flatten!
	out << item

	sleep $short_sleep
	end

	sleep $long_sleep
	end

	dump_to_file "photosets", out
	end

	# * contact metadata
	# * user info for each contact
	def get_contacts
	total = flickr.contacts.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i
	per_page = 1000 # max: 1000
	pages = (total / per_page.to_f).ceil

	puts "*** #{total} contacts"

	out = []

	pages.downto(1).each_with_index do \|page, p_idx\|
	contacts = flickr.contacts.getList({
	user_id: $user_id,
	per_page: per_page,
	page: page,
	sort: "time"
	}).original_hash["contact"]

	contacts.each_with_index do \|item, i_idx\|
	n = (p_idx * per_page) + i_idx + 1

	puts " * #{item["id"]} [#{n}/#{total}]"
	item["person"] = flickr.people.getInfo(user_id: item["nsid"]).original_hash

	out << item

	sleep $short_sleep
	end

	sleep $long_sleep
	end

	dump_to_file "contacts", out
	end

	# * favorite metadata
	# * photo metadata for each favorite
	def get_favorites
	total = flickr.favorites.getList(user_id: $user_id, per_page: 1).original_hash["total"].to_i
	per_page = 500 # max: 500
	pages = (total / per_page.to_f).ceil

	puts "*** #{total} favorites"

	out = []

	pages.downto(1).each_with_index do \|page, p_idx\|
	favorites = flickr.favorites.getList({
	user_id: $user_id,
	per_page: per_page,
	page: page
	}).original_hash["photo"]

	favorites.each_with_index do \|item, i_idx\|
	n = (p_idx * per_page) + i_idx + 1

	puts " * #{item["id"]} [#{n}/#{total}]"

	item["photo"] = flickr.photos.getInfo(photo_id: item["id"]).original_hash
	out << item

	sleep $short_sleep
	end

	sleep $long_sleep
	end

	dump_to_file "favorites", out
	end
	Copyright (c) 2013 Jordan Cole

	Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.