Last active
December 23, 2015 08:09
-
-
Save googya/6605852 to your computer and use it in GitHub Desktop.
download http://www.oracle.com/technetwork/java/javase/community/jvmls2013-2013900.html video and pdf, using underscore request jquery async exec etc..
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var _u = require('underscore') | |
| var request = require('request') | |
| var $ = require('jquery') | |
| var fs = require('fs'); | |
| var async = require('async'); | |
| var _url = require('url'); | |
| var http = require('http') | |
| var exec = require('child_process').exec; | |
| var spawn = require('child_process').spawn; | |
| var url = "http://www.oracle.com/technetwork/java/javase/community/jvmls2013-2013900.html" | |
| var outter_host = _url.parse(url).host | |
| function get_html(callback) { | |
| request.get(url, function(err, res, body){ | |
| callback(null, body); | |
| }); | |
| } | |
| function parse_html(body, type, callback){ | |
| var _urls = []; | |
| var sss = $(body).find(".dataTable a[href*='." + type + "']"); | |
| _urls = _u.map(sss, function(e) { | |
| var host = _url.parse(e.href).host | |
| var path = _url.parse(e.href).path | |
| console.log(" host is " + host + " typeof host " + typeof host); | |
| // if(host != null || host != "") return e.href; | |
| // else return outter_host + "/" + path; | |
| if(_u.isEmpty(host)){ | |
| return "http://" + outter_host + path | |
| } | |
| return e.href | |
| }); | |
| callback(null, _urls); | |
| } | |
| var DOWNLOAD_DIR = './' | |
| function download_all_files(err, urls){ | |
| console.log(" urls is " + urls) | |
| _u.each(urls, download_file_raw) | |
| } | |
| function download_file_wget(file_url){ | |
| var wget = 'wget -c -P ' + DOWNLOAD_DIR + ' ' + file_url | |
| var child = exec(wget, {maxBuffer: 20000*1024 }, function(err, stdout, stderr){ | |
| if (err) throw err; | |
| else | |
| console.log(file_url + ' download to ' + DOWNLOAD_DIR); | |
| }); | |
| } | |
| function download_file_raw(file_url) { | |
| var file_name = _url.parse(file_url).pathname.split('/').pop(); | |
| var file = fs.createWriteStream(DOWNLOAD_DIR + file_name); | |
| var options = { | |
| host: _url.parse(file_url).host, | |
| port: 80, | |
| path: _url.parse(file_url).pathname | |
| }; | |
| http.get(options, function(res){ | |
| res.on('data', function(data) { | |
| file.write(data); | |
| }).on('end', function () { | |
| file.end(); | |
| console.log(file_name + ' download to ' + DOWNLOAD_DIR) | |
| }) | |
| }); | |
| } | |
| async.waterfall([ | |
| function(callback){ | |
| get_html(callback); | |
| }, | |
| function(body, callback){ | |
| parse_html(body, "mov", callback) | |
| }], download_all_files); |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
when using exec , you should set maxBuffer if necessary