Skip to content

Instantly share code, notes, and snippets.

@lsongdev
Last active August 29, 2015 14:10
Show Gist options
  • Select an option

  • Save lsongdev/df79bb9eec2ef700b443 to your computer and use it in GitHub Desktop.

Select an option

Save lsongdev/df79bb9eec2ef700b443 to your computer and use it in GitHub Desktop.
{
"name": "weipai-downloader",
"version": "1.0.0",
"description": "",
"main": "weipai.js",
"author": "lsong",
"license": "MIT",
"dependencies": {
"async": "^0.9.0",
"download": "^3.1.2",
"download-status": "^2.1.0",
"mkdirp": "^0.5.0",
"progress": "^1.1.8",
"underscore": "^1.7.0"
}
}
var _ = require('underscore');
var fs = require('fs');
var path = require('path');
var http = require('http');
var request = require('request');
var mkdirp = require('mkdirp');
var async = require('async');
var Download = require('download');
var progress = require('download-status');
var WeiPai = function(options){
};
var archive_dir = path.join(__dirname, 'archive');
WeiPai.prototype.get_user_list = function(callback){
request('http://w1.weipai.cn/top_user?&count=100000&type=top_day', function(err, res, body){
var data = JSON.parse(body);
callback(_.map(data.user_list, function(post){
return post.user_id;
}));
});
};
WeiPai.prototype.get_user_info = function(userId, callback){
request('http://w1.weipai.cn/home_user?&user_id=' + userId, function(err, res, body){
callback(JSON.parse(body));
});
};
WeiPai.prototype.generate_picture_urls = function(url){
var result = [];
([ '', '.1','.2','.3' ]).forEach(function(i){
result.push(url + i +'.jpg');
});
return result;
};
WeiPai.prototype.parse_m3u8 = function(url, callback){
if(~url.indexOf('http://v.weipai.cn')){//broken domain ?
return callback([]);
}
var list = [];
request(url, function(err, res, body){
_.each(body.split('\n'), function(line){
if(/^http/.test(line)){
list.push(line);
}
});
callback(list);
});
};
WeiPai.prototype.download = function(url, dest, callback){
mkdirp(dest, function(){
var filename = path.join(dest, path.basename(url));
fs.exists(filename, function(exists){
if(exists){
console.log('%s already exists .', filename);
callback();
}else{
console.log('OK %s', url);
new Download()
.use(progress())
.get(url)
.dest(dest)
.run(callback);
}
});
});
};
WeiPai.prototype.parse = function(user, done){
var weipai = this;
var result = {};
var userDir = path.join(archive_dir, user.user_id);
var diaryTaskGroup = [];
_.each(user.diary_list, function(diary){
diaryTaskGroup.push(function(callback){
var videoTaskGroup = [];
_.each(diary.video_list, function(video){
videoTaskGroup.push(function(callback){
var url = weipai.resolve(video.video_screenshot);
var pictures = weipai.generate_picture_urls(url);
var postDir = path.join(userDir, video.blog_id);
var m3u8_url = url + '.m3u8';
weipai.parse_m3u8(m3u8_url, function(videos){
var downloadTask = [];
if(videos.length){
videos.forEach(function(video_url){
downloadTask.push(function(callback){
weipai.download(video_url, path.join(postDir, 'videos'), callback);
});
});
pictures.forEach(function(pic_url){
downloadTask.push(function(callback){
weipai.download(pic_url, path.join(postDir, 'pictures'), callback);
});
});
downloadTask.push(function(callback){
weipai.download(m3u8_url, path.join(postDir, 'videos'), callback);
});
async.series(downloadTask, callback);
}else{
callback();
}
});
});
});
async.series(videoTaskGroup, callback);
});
});
async.series(diaryTaskGroup, done);
};
WeiPai.prototype.resolve = function(url){
return url.replace(/(\.\d)?\.jpg$/,'');
};
WeiPai.prototype.write_manifest = function(user, callback){
var userDir = path.join(archive_dir, user.user_id);
mkdirp(userDir, function(){
fs.writeFile(path.join(userDir, 'manifest.json'), JSON.stringify(user, null, 2), callback);
});
};
WeiPai.prototype.fetch = function(done){
var weipai = this;
this.get_user_list(function(users){
var userTaskGroup = [];
users.forEach(function(userId){
userTaskGroup.push(function(callback){
weipai.get_user_info(userId, function(info){
weipai.parse(info, callback);
});
});
});
async.series(userTaskGroup, done)
});
};
new WeiPai().fetch();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment