Skip to content

Instantly share code, notes, and snippets.

@suisho
Created February 24, 2013 06:58
Show Gist options
  • Save suisho/5022948 to your computer and use it in GitHub Desktop.
Save suisho/5022948 to your computer and use it in GitHub Desktop.
wgetもどき的なもの
var path = require("path")
var cheerio = require('cheerio')
var request = require("request")
var fs = require("fs")
var mkdirp = require("mkdirp")
require('events').EventEmitter; // TODO:イベントハンドリングするにはこれだったけど。
module.exports = function(url, options){
var getBaseUrl = function(url){
var urlObj = require('url').parse(url);
return urlObj.protocol+"//"+urlObj.host;
}
var getNextUrl = function(html){
var $ = cheerio.load(html);
var nextLinks = [];
$("a").each(function(){
nextLinks.push($(this).attr("href"));
})
$("img").each(function(){
nextLinks.push((this).attr("src"));
})
// todo: wget的にはもっと色々取ってる。html-url.c参照
return nextLinks;
}
var toSavePath = function(url){
var urlObj = require('url').parse(url);
var savePath = path.resolve(options.directoryPrefix);
var pathSplit = urlObj.pathname.split("/");
if(pathSplit[path.length - 1] == ""){
pathSplit[path.length - 1] = "index.html";
}
savePath = [savePath, urlObj.host].concat(pathSplit).join(path.sep);
return savePath;
}
var save = function(fullPath,body){
var dirname = path.dirname(fullPath);
console.log(dirname);
mkdirp.sync(dirname)
fs.writeFileSync(fullPath, body);
}
var gettingMap = {}
var getting = function(url, level, callback){
request.get(url, function(error, response, body){
console.log(body);
var savePath = toSavePath(url);
save(savePath, body);
var contentType = response.headers["content-type"];
gettingMap[url] = true;
var _nextUrls = getNextUrl(body);
var baseUrl = getBaseUrl(url);
var nextUrl = []
_nextUrls.forEach(function(_nextUrl){ // mapでかけるときれいなのになー
if(_nextUrl.lastIndexOf("#",0) === 0){
return;
}
if(_nextUrl.lastIndexOf("/",0) === 0){
_nextUrl = baseUrl + _nextUrl;
}
nextUrl.push(_nextUrl);
});
if(options.filter){
nextUrl.filter(options.filter);
}
nextUrl.filter(function(url){
return !gettingMap[url];
})
var tasks = [];
nextUrl.forEach(function(_url){
tasks.push(function(next){
})
})
//console.log(nextUrl);
})
}
getting(url, 0, function(){
})
}
{
"name": "wwget",
"dependencies": {
"require": "~0.4.9",
"cheerio": "~0.10.6",
"request": "~2.12.0",
"async": "~0.1.22",
"mkdirp": "~0.3.4"
}
}
var options = {
directoryPrefix : "./save",
filter : function(url){
var urlObj = require('url').parse(url);
if(urlObj.pathname = "/"){
return false
}
return true
}
}
//require("./index.js")("http://lovemake.biz/parodies/230", options)
require("./index.js")("http://matome-blog.jp/img/logo.png", options)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment