Created
September 20, 2011 16:16
-
-
Save venj/1229550 to your computer and use it in GitHub Desktop.
Yet another script for grab pictures....
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require "rubygems" | |
require "open-uri" | |
require "fileutils" | |
require "hpricot" | |
(puts "Usage: #{File.basename $0} http://link.to.webpage" ; exit 1) if ARGV.size != 1 | |
begin | |
imagedir = "images" | |
if File.exists?(imagedir) | |
File.directory?(imagedir) ? (puts "Warning: image directory is already exists, continue anyway.") : (puts "A file named 'images' is already exists.";exit 1) | |
else | |
FileUtils.mkdir imagedir | |
end | |
print "Fetching page..." | |
html = Hpricot(open(ARGV[0]).read) | |
imagelinks = [] | |
html.search("//div[@class='entry-body']/").each do |e| | |
e.search("//a").each do |a| | |
imagelinks << a.attributes['href'] if a.search("img").any? | |
end | |
end | |
puts "Done!\nParseing page...#{imagelinks.size} images." | |
FileUtils.cd(imagedir) do | |
index = 1 | |
imagelinks.each do |link| | |
print "Fetching image page #{index}..." | |
html = Hpricot(open(link).read) | |
puts "Done!" | |
html.search("//body/p/img").each do |img| | |
print "Fetching image #{index}..." | |
name = img.attributes['alt'].split(" ")[0] + "_" + index.to_s + ".jpg" | |
ilink = img.attributes['src'] | |
open(ilink) { |infile| open(name, "wb") { |f| f.write infile.read } } | |
puts "Done!" | |
end | |
index += 1 | |
end | |
end | |
puts "\nAll Done!" | |
rescue Exception => e | |
puts "Unknown error: #{e.message}" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment