Skip to content

Instantly share code, notes, and snippets.

@andreaseger
Last active January 2, 2016 05:29
Show Gist options
  • Save andreaseger/8257346 to your computer and use it in GitHub Desktop.
Save andreaseger/8257346 to your computer and use it in GitHub Desktop.
download images of one piece manga chapter from kissmanga
ruby=ruby-2.1.0
ruby-gemset=one_piece
#ruby-gem-install=bundler rake
#ruby-bundle-install=true
# A sample Gemfile
source "https://rubygems.org"
gem 'pry'
gem 'nokogiri'
gem 'em-synchrony'
gem 'em-http-request'
GEM
remote: https://rubygems.org/
specs:
addressable (2.3.5)
coderay (1.1.0)
cookiejar (0.3.0)
em-http-request (1.1.2)
addressable (>= 2.3.4)
cookiejar
em-socksify (>= 0.3)
eventmachine (>= 1.0.3)
http_parser.rb (>= 0.6.0)
em-socksify (0.3.0)
eventmachine (>= 1.0.0.beta.4)
em-synchrony (1.0.3)
eventmachine (>= 1.0.0.beta.1)
eventmachine (1.0.3)
http_parser.rb (0.6.0)
method_source (0.8.2)
mini_portile (0.5.2)
nokogiri (1.6.1)
mini_portile (~> 0.5.0)
pry (0.9.12.4)
coderay (~> 1.0)
method_source (~> 0.8)
slop (~> 3.4)
slop (3.4.7)
PLATFORMS
ruby
DEPENDENCIES
em-http-request
em-synchrony
nokogiri
pry
#!/usr/bin/env ruby
require 'bundler/setup'
require 'open-uri'
require 'rss'
require 'fileutils'
require 'nokogiri'
require "em-synchrony"
require "em-synchrony/em-http"
RSS_URL = "http://kissmanga.com/RSS/Manga/One-Piece".freeze
DOWNLOAD_BASE = "./chapters"
def extract_chapter_image_urls chapter
chapter = Nokogiri::HTML(chapter)
chapter.search('div#divImage img').map{|e| e['src']}
end
def get_images_concurrent(urls, concurrency=5)
image_data = nil
EM.synchrony do
# iterator will execute async blocks until completion, .each, .inject also work!
results = EM::Synchrony::Iterator.new(urls, concurrency).map do |url, iter|
# fire async requests, on completion advance the iterator
http = EventMachine::HttpRequest.new(url).aget
http.callback { iter.return(http) }
http.errback { iter.return(http) }
end
image_data = results # all completed requests
EventMachine.stop
end
image_data.map{|image|
[ image.response,
image.req.path.split('/').last
]
}
end
case ARGV.size
when 1
first2load = ARGV.first.to_i
when 2
first2load = ARGV[0].to_i
last2load = ARGV[1].to_i
else
p "wtf"
exit 1
end
open(RSS_URL) do |rss|
feed = RSS::Parser.parse(rss)
feed.items.each do |item|
number = item.title.match(/\d{3}/)[0].to_i
next if (last2load && number >= last2load)
exit if number < first2load
title = item.title.gsub(/^One Piece /,'')
chapter_path = "#{DOWNLOAD_BASE}/#{title}"
next if File.exists?(chapter_path)
FileUtils.mkdir_p(chapter_path)
#open(URI.encode(item.link)) do |chapter|
# IO.write("#{chapter_path}/images.urls", extract_chapter_image_urls(chapter).join("\n"))
#end
open(URI.encode(item.link)) do |chapter|
images = get_images_concurrent(extract_chapter_image_urls(chapter))
images.each do |data,filename|
IO.write("#{chapter_path}/#{filename}", data)
end
end
p "Chapter #{title} downloaded!"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment