Created
September 10, 2010 13:50
-
-
Save blech75/573657 to your computer and use it in GitHub Desktop.
a script to download images associated with a GSMLS ID
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# gsmls-image-downloader.rb | |
# ------------------------- | |
# When passed a GSMLS ID, this script will download all of the listing's | |
# associated images. | |
require 'open-uri' | |
def download_image(mls_id) | |
# i've seen URLs with these variants: | |
# | |
# http://imagehost.gsmls.com/pubhigh/80/2765180_0.jpg | |
# from http://new.gsmls.com/publicsite/common/media.jsp | |
# | |
# http://img.gsmls.com/imagedb/highres/80/2765180_0.jpg | |
# from the real estate agent reports | |
# | |
# ...but they both seem to return the exact same image | |
url_prefix = "http://imagehost.gsmls.com/pubhigh" | |
# get last two digits of MLS ID; it's needed for the directory | |
dir_name = mls_id[-2,2] | |
num = 0 | |
loop do | |
filename = "#{mls_id}_#{num}.jpg" | |
image_url = "#{url_prefix}/#{dir_name}/#{filename}" | |
begin | |
# pull down the file | |
temp_file = open(image_url, { | |
"User-Agent" => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)", # heh | |
"Referer" => "http://new.gsmls.com/publicsite/common/media.jsp" | |
}) | |
# put all the images into a folder named the MLS ID. | |
# only check the directory on the first iteration. | |
Dir.mkdir(mls_id) unless FileTest.directory?(mls_id) if num == 0 | |
# write it out, overwriting any existing files | |
open("#{mls_id}/#{filename}", "w+").write(temp_file.read) | |
# puts "#{image_url} >> #{mls_id}/#{filename}" | |
# set the ctime and mtime of the file to be the last-modified date of the HTTP header | |
# FIXME: this doesn't seem to be working as expected. | |
File.utime(temp_file.last_modified, temp_file.last_modified, "#{mls_id}/#{filename}") | |
$stderr.putc "." | |
$stderr.flush # flush the output after each char so it'll actually display | |
rescue OpenURI::HTTPError | |
# if we haven't downloaded any images yet and get an HTTP error, then | |
# there's obviously no photos for the listing. | |
# TODO: more granular HTTP error checking | |
if num < 1 | |
puts "No photos for MLS ID #{mls_id}" | |
else | |
$stderr.putc "\n" | |
puts "Downloaded #{num} images for GSMLS ID #{mls_id}" | |
end | |
# either way, we're finished attempting to download images, so break out of the loop | |
break | |
end | |
num += 1 | |
# be nice to the remote server | |
sleep 0.5 | |
end # loop | |
end # download_image | |
##### | |
# basic input validation | |
if /^\d{3,}$/.match(ARGV[0]) | |
download_image(ARGV[0]) | |
else | |
$stderr.puts "Error parsing GSMLS ID #{ARGV[0]}." | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment