rtanglao · September 20, 2010 06:15 · rtanglao · Sep 20, 2010
diff --git a/downloadSq.rb b/downloadSq.rb
 #!/usr/bin/env ruby
 require 'json'
 require 'pp'
 require 'curb'
 # requires serialized flickr json file to be $stdin or specified on the command line and then
 # downloads the flickr "sq" (75x75) files to the current directory
 def chunk_array(array, pieces=2)
  len = array.length;
  mid = (len/pieces)
  chunks = []
  start = 0
  1.upto(pieces) do |i|
    last = start+mid
    last = last-1 unless len%pieces >= i
    chunks << array[start..last] || []
    start = last+1
  end
  chunks
 end

 def fetch_parallel(urls)
 multi_options = {:pipeline => false}
  easy_options = {:follow_location => true}
  Curl::Multi.download(urls,easy_options,multi_options){|c,code,method| 
    filename = c.url.split(/\?/).first.split(/\//).last
    $stderr.printf "fetching:%s\n", filename
  }
 end

 ARGF.each_line do |line|
  serializedJSON = line
  flickr_data_page =  JSON.parse(serializedJSON)
  total  = flickr_data_page["photos"]["total"].to_i
  total_pages = flickr_data_page["photos"]["pages"].to_i
  page = flickr_data_page["photos"]["page"].to_i
  $stderr.printf "Total photos to download:%d page:%d of:%d\n", total, page, total_pages
  
  total_to_download_for_this_page = 0
  if page == total_pages 
    total_to_download_for_this_page = total % 250 # 250 per page
  else
    total_to_download_for_this_page = 250
  end
  urls = []
  0.upto(total_to_download_for_this_page - 1) do |i|
    urls[i] = flickr_data_page["photos"]["photo"][i]["url_sq"]        
  end
  chunks = chunk_array(urls, 50) # retrieve 5 urls at at time, each page is 250, so 50 chunks of urls
  chunks.each do |chunk_of_urls|
    if !chunk_of_urls.empty?
      fetch_parallel(chunk_of_urls)
    end
    # sleep(15.75) #kludge! fix me!
  end    
 end
	#!/usr/bin/env ruby
	require 'json'
	require 'pp'
	require 'curb'
	# requires serialized flickr json file to be $stdin or specified on the command line and then
	# downloads the flickr "sq" (75x75) files to the current directory
	def chunk_array(array, pieces=2)
	len = array.length;
	mid = (len/pieces)
	chunks = []
	start = 0
	1.upto(pieces) do \|i\|
	last = start+mid
	last = last-1 unless len%pieces >= i
	chunks << array[start..last] \|\| []
	start = last+1
	end
	chunks
	end

	def fetch_parallel(urls)
	multi_options = {:pipeline => false}
	easy_options = {:follow_location => true}
	Curl::Multi.download(urls,easy_options,multi_options){\|c,code,method\|
	filename = c.url.split(/\?/).first.split(/\//).last
	$stderr.printf "fetching:%s\n", filename
	}
	end

	ARGF.each_line do \|line\|
	serializedJSON = line
	flickr_data_page = JSON.parse(serializedJSON)
	total = flickr_data_page["photos"]["total"].to_i
	total_pages = flickr_data_page["photos"]["pages"].to_i
	page = flickr_data_page["photos"]["page"].to_i
	$stderr.printf "Total photos to download:%d page:%d of:%d\n", total, page, total_pages

	total_to_download_for_this_page = 0
	if page == total_pages
	total_to_download_for_this_page = total % 250 # 250 per page
	else
	total_to_download_for_this_page = 250
	end
	urls = []
	0.upto(total_to_download_for_this_page - 1) do \|i\|
	urls[i] = flickr_data_page["photos"]["photo"][i]["url_sq"]
	end
	chunks = chunk_array(urls, 50) # retrieve 5 urls at at time, each page is 250, so 50 chunks of urls
	chunks.each do \|chunk_of_urls\|
	if !chunk_of_urls.empty?
	fetch_parallel(chunk_of_urls)
	end
	# sleep(15.75) #kludge! fix me!
	end
	end
No results found