Skip to content

Instantly share code, notes, and snippets.

@venj
Created September 20, 2011 16:16
Show Gist options
  • Save venj/1229550 to your computer and use it in GitHub Desktop.
Save venj/1229550 to your computer and use it in GitHub Desktop.
Yet another script for grab pictures....
#!/usr/bin/env ruby
require "rubygems"
require "open-uri"
require "fileutils"
require "hpricot"
(puts "Usage: #{File.basename $0} http://link.to.webpage" ; exit 1) if ARGV.size != 1
begin
imagedir = "images"
if File.exists?(imagedir)
File.directory?(imagedir) ? (puts "Warning: image directory is already exists, continue anyway.") : (puts "A file named 'images' is already exists.";exit 1)
else
FileUtils.mkdir imagedir
end
print "Fetching page..."
html = Hpricot(open(ARGV[0]).read)
imagelinks = []
html.search("//div[@class='entry-body']/").each do |e|
e.search("//a").each do |a|
imagelinks << a.attributes['href'] if a.search("img").any?
end
end
puts "Done!\nParseing page...#{imagelinks.size} images."
FileUtils.cd(imagedir) do
index = 1
imagelinks.each do |link|
print "Fetching image page #{index}..."
html = Hpricot(open(link).read)
puts "Done!"
html.search("//body/p/img").each do |img|
print "Fetching image #{index}..."
name = img.attributes['alt'].split(" ")[0] + "_" + index.to_s + ".jpg"
ilink = img.attributes['src']
open(ilink) { |infile| open(name, "wb") { |f| f.write infile.read } }
puts "Done!"
end
index += 1
end
end
puts "\nAll Done!"
rescue Exception => e
puts "Unknown error: #{e.message}"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment