Last active
December 24, 2015 05:29
-
-
Save lsongdev/6751051 to your computer and use it in GitHub Desktop.
Youjizz Downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: UTF-8 | |
require 'net/http' | |
require 'nokogiri' | |
require 'colorize' | |
require 'fileutils' | |
require 'tempfile' | |
require 'uri' | |
require 'json' | |
### | |
# Lsong | |
# [email protected] | |
# http://lsong.org | |
# | |
# MIT LICENSE | |
# http://lsong.mit-license.org/ | |
# | |
# RUNTIME | |
# ============== | |
# $ ruby -v | |
# ruby 2.0.0p247 (2013-06-27 revision 41674) [x86_64-darwin12.4.0] | |
# | |
def fetch_site | |
n = 1 | |
puts "index of page #{n} .".colorize(:yellow) | |
site = "http://m.youjizz.com" | |
until (posts_url = fetch_page(site,n)).nil? | |
posts_url.each do |post_url| | |
post_title = get_title_from post_url | |
if File.exist?("#{post_title}/manifest.json") | |
puts "already exist #{post_title}".colorize(:red) | |
next | |
end | |
#FileUtils.rm_rf("#{post_title}") | |
puts "fetch of #{post_title}".colorize(:green) | |
will_download_images = [] | |
will_download_videos = [] | |
downloaded_images = [] | |
downloaded_videos = [] | |
post_images, post_videos = fetch_images_and_videos post_url | |
puts "discover #{post_images.count} images and #{post_videos.count} videos".colorize(:yellow) | |
post_images.each_with_index do |image_url, index| | |
image_filename = "#{post_title}_#{index + 1}#{get_ext_from image_url}" | |
will_download_images << { :filename => image_filename, :url => image_url } | |
end | |
post_videos.each_with_index do |video_url, index| | |
n = 1 | |
video_filename = "#{post_title}#{get_ext_from video_url}" | |
while (will_download_videos.find_all{ |v| v[:filename] == video_filename }.count > 0) | |
video_filename = "#{post_title}_x#{n}#{get_ext_from video_url}" | |
end | |
will_download_videos << { :filename => video_filename, :url => video_url } | |
end | |
puts "create album dir of #{post_title}".colorize(:yellow) | |
FileUtils.mkdir_p("#{post_title}/preview") | |
will_download_images.each_with_index do |image, index| | |
puts "downloading image #{image[:filename]}".colorize(:yellow) | |
image_path = "#{post_title}/preview/#{image[:filename]}" | |
download_file(image[:url], image_path) unless File.exist? image_path | |
downloaded_images << image[:url] if File.exist? image_path | |
end | |
FileUtils.mkdir_p("#{post_title}/videos") | |
will_download_videos.each_with_index do |video, index| | |
puts "downloading video #{video[:filename]}".colorize(:yellow) | |
video_path = "#{post_title}/videos/#{video[:filename]}" | |
download_file(video[:url], video_path) unless File.exist? video_path | |
downloaded_videos << video[:url] if File.exist? video_path | |
end | |
files = post_images + post_videos | |
failed_files = files - (downloaded_images + downloaded_videos) | |
if failed_files.count > 0 | |
puts "download #{post_title} with #{failed_files.count}/#{files.count} failed ".colorize(:red) | |
next | |
end | |
puts "done , write to manifest.json".colorize(:yellow) | |
manifest = { | |
:title => post_title , | |
:url => post_url , | |
:images => will_download_images, | |
:videos => will_download_videos | |
} | |
File.open("#{post_title}/manifest.json","w") do |f| | |
f.write(manifest.to_json) | |
end | |
end | |
puts "waiting for 3s to continue .".colorize(:blue) | |
sleep 30 | |
n = n + 1 | |
end | |
end | |
def get_ext_from(res_url) | |
File.extname(res_url).split('?')[0] | |
end | |
def fetch_page(site,num) | |
posts_url = [] | |
page_url = "#{site}/page#{num}.html" | |
uri = URI.parse(page_url) | |
Net::HTTP.start(uri.host,uri.port) do |http| | |
resp = http.get(uri.path) | |
#parse xml | |
doc = Nokogiri::HTML(resp.body) | |
posts = doc.css('.row .preview[href^=http]') | |
posts.each do |post| | |
post_url = post.attr('href') | |
#now loading ... | |
puts post_url | |
posts_url << post_url | |
end | |
end | |
posts_url | |
end | |
def get_title_from(post_url) | |
File.basename(post_url,'.html').gsub(/-+\d+$/,'') | |
end | |
def get_video_id_from(post_url) | |
/(\d+).html$/.match(post_url)[1] | |
end | |
def fetch_images_and_videos(post_url) | |
images_url = [] | |
videos_url = [] | |
post_title = get_title_from post_url | |
uri = URI.parse(post_url) | |
Net::HTTP.start(uri.host,uri.port) do |http| | |
resp = http.get(uri.path) | |
doc = Nokogiri::HTML(resp.body) | |
images = doc.css('#preview img') | |
images.each_with_index do |image,index| | |
image_url = image.attr('src') | |
images_url << image_url | |
end | |
#mpeg4 video | |
video_mp4 = doc.css('.preview_thumb').first | |
videos_url << video_mp4.attr('href') unless video_mp4.nil? | |
end | |
#flv video | |
video_id = get_video_id_from post_url | |
Net::HTTP.start('www.youjizz.com') do |http| | |
resp = http.get("/playlist.php?id=#{video_id}") | |
doc = Nokogiri::HTML(resp.body) | |
levels = doc.css('level') | |
levels.each_with_index do |flv,index| | |
flv_file = flv.attr('file') | |
flv_url = URI.decode flv_file | |
videos_url << flv_url | |
end | |
end | |
return images_url,videos_url | |
end | |
def download_file(from, to) | |
tempfile = "#{to}.tmp" | |
`wget "#{from}" -O #{tempfile}` | |
FileUtils.mv(tempfile, to) if $?.to_i == 0 | |
end | |
def main | |
fetch_site | |
end | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment