Created
September 12, 2012 12:14
-
-
Save 0xGGGGG/3706230 to your computer and use it in GitHub Desktop.
scrabing photos from movie catalog site
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "rubygems" | |
require "mechanize" | |
require "cgi" | |
require "csv" | |
require 'digest/sha1' | |
require "image_downloader" | |
class PhotoFetcher | |
def initialize | |
@main_url = "http://www.cinemasquid.com" | |
@agent = Mechanize.new | |
@data_file = "data/photos.csv" | |
end | |
def movie_list | |
index_page = @agent.get "#{@main_url}/blu-ray/movies/screenshots" | |
movie_nodes = index_page.search('ul.icon-list li a') # movie = { text: "sth", href: "link" } | |
@movie_list = [] | |
movie_nodes.each do |node| | |
@movie_list << { title: node.text, link: "#{@main_url}#{node['href']}" } | |
end | |
puts "#{@movie_list.length} Movies Found." | |
return @movie_list | |
end | |
def download_photos_from_movies | |
movie_list.each do |movie| | |
puts "Now downloading #{movie[:title]}..." | |
download_photos_of movie | |
end | |
end | |
def download_photos_of( movie = {} ) | |
movie_link = movie[:link] # in case of movie = { .. link: "link" } || "link" | |
movie_dir = "data/" + Digest::SHA1.hexdigest( "#{movie[:title]}}" ) | |
unless File::directory? movie_dir | |
puts "creating directory #{movie_dir}" | |
Dir.mkdir movie_dir | |
end | |
downloader = ImageDownloader::Process.new movie_link, movie_dir | |
downloader.parse( regexp: /[^'"]+screenshot-med-[^'"]+\.jpg/i ) | |
downloader.download | |
movie[:path] = "#{movie_dir}" | |
insert_movie_folder_map_of movie | |
end | |
def print_movie_list | |
@movie_list.each do |movie| | |
puts "#{movie[:title]} -> #{movie[:link]}" | |
end | |
return nil | |
end | |
private | |
def insert_movie_folder_map_of( movie = {} ) | |
CSV.open("data/movies.csv", "a+", encoding: "ISO8859-1") do |csv| | |
puts "Writing to csv: #{movie[:title]}, #{movie[:path]}" | |
csv << [ movie[:title], movie[:path] ] | |
end | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment