Skip to content

Instantly share code, notes, and snippets.

@sftblw
Created July 21, 2015 13:48
Show Gist options
  • Save sftblw/9aa5fe6bf5207f1ba6dc to your computer and use it in GitHub Desktop.
Save sftblw/9aa5fe6bf5207f1ba6dc to your computer and use it in GitHub Desktop.
hitomiapp2.js
var cheerio = require('cheerio');
var request = require('request');
var url = require('url');
var path = require('path');
//var fs = require('fs');
var fs = require('fs-extra');
//var mkdirp = require('mkdirp');
var async = require('async');
var indexQueue = [];
var downloadQueue = [];
var hitomi = '웹 사이트 주소 (안알랴줌)';
var targetDirName = 'dl2';
var reqQueue = async.queue(function (urlItem, callback) {
var requesting = true;
request(urlItem, function (error, response, body) {
if (!error && response.statusCode == 200) {
callback(body);
} else {
reqQueue.push(urlItem);
}
//this.q.shift(); // remove front
requesting = false;
});
//while (requesting);
}, 1);
var imgDlQueue = async.queue(function (urlItem, callback) {
//console.log("imgdl : " + urlItem.imgUrl);
//console.log("asyncImgDl Start");
var requesting = true;
// request
// .get(urlItem.imgUrl)
// .on('error', function(err) {
// console.log(err)
// }).on('response', function () {callback()})
// .pipe(fs.createWriteStream(urlItem.directory+"/"+urlItem.name))
// ;
var options_req = {
url : urlItem.imgUrl,
encoding: null // binary data
}
request(options_req, function (error, response, body) {
if (!error && response.statusCode == 200) {
fs.writeFile(urlItem.directory+"/"+urlItem.name, body, 'binary', callback);
//callback(body);
} else {
reqQueue.push(urlItem);
}
//this.q.shift(); // remove front
requesting = false;
});
//while (requesting);
}, 5);
// var reqQueue = {
// qLimit : 5,
// q: [],
// req: function (urlItem, callbackFn) { // 외부 접근용 함수
// console.log("req start, q.Length : " + this.q.length);
// this.q.push({url: urlItem, callback: callbackFn});
// while (this.q.length > this.qLimit) {
// console.log("req wait, q.Length : " + this.q.length);
// process.nextTick();
// }
// this.procReq();
// console.log("req end");
// },
// procReq: function () { // 실제로 리퀘스트를 처리
// console.log("procReq start");
//
// var r = this.q[0];
//
// var reqEnd = false;
//
// request(r.url, function (error, response, body) {
// if (!error && response.statusCode == 200) {
// r.callback(body);
// reqEnd = true;
// } else {
// this.q.push(r);
// reqEnd = true;
// }
// this.q.shift(); // remove front
// });
//
// //while(!reqEnd) { process.nextTick(); };
// }
// }
// 페이지별로 요청
function requestIndex(i) {
console.log("page " + i + " loading...");
var pageEnd = false;
reqQueue.push(url.resolve(hitomi, '/index-korean-' + i.toString() + '.html'), function (body) {
checkBody(body, function () {
console.log("requesting next index...");
requestIndex(i+1); // next request
});
pageEnd = true;
while (imgDlQueue.length() != 0) process.nextTick();
});
//while (!pageEnd) process.nextTick();
}
requestIndex(1);
// 본문에 대한 처리. 본문 요청이 완료되면 콜백으로 호출됨
function checkBody(body, callback) {
//console.log("checkbody start");
$ = cheerio.load(body);
var $galleriesElem = $('.gallery-content > div')
var galleriesElemDone = 0;
$galleriesElem.each(function (index, elem) {
//console.log("galeriesElem.each on work");
// 해당 항목의 정보 생성
var targetItem = {
id : $('div > a', this).attr('href').split('/')[2].split('.')[0],
name : $('div h1 a', this).text().trim(),
series : $('.dj-desc tr:nth-child(1) td:nth-child(2) a', this).html(),
type : $('.dj-desc tr:nth-child(2) td:nth-child(2) a', this).html(),
artist : $('div .artist-list li', this).text().trim().replace(/(?:\r\n|\r|\n)/g, ' _ ')
}
//console.log(targetItem);
// 다운로드 받을 필요가 없는 경우 스킵
if (!shoudDownload(targetItem)) {
galleriesElemDone++;
console.log("!!![" + galleriesElemDone + "/" + $galleriesElem.length + "] " + targetItem.name + " --- already downloaded");
if (galleriesElemDone == $galleriesElem.length) {
callback();
}
return;
}
// 해당 갤러리의 모든 아이템을 다운받는 함수. 디렉토리 변경까지 책임짐.
downloadGallery($, targetItem, function () {
galleriesElemDone++;
console.log("[" + galleriesElemDone + "/" + $galleriesElem.length + "] " + targetItem.name + " --- done ");
if (galleriesElemDone == $galleriesElem.length) {
callback();
}
});
});
//console.log("checkbody working");
//while ($galleriesElem.length() < galleriesElemDone) { process.nextTick(); console.log("checkbody waiting"); }
//console.log("checkbody end");
}
/** itemInfo 스키마
{
id : number
name : string,
artist : string,
series : string, undefined
type : string
}
*/
// 이미 다운받은 갤러리인지 확인. 중간에 안 끝난 갤러리인 경우
function shoudDownload(itemInfo) {
var targetDir = makeItemPath(itemInfo);
var tempDir = makeItemTempPath(itemInfo);
var targetDirExist = isDirExist(targetDir);
var tempDirExist = isDirExist(tempDir);
// if temp file is exist, remove all and retry download.
if (tempDirExist) {
fs.removeSync(tempDir);
if (targetDirExist)
fs.removeSync(targetDir);
return true;
}
// if temp file is not exist and file exist, skip.
if (targetDirExist) {
return false;
}
return true;
}
// 갤러리 하나를 다운로드.
function downloadGallery($, itemInfo, callback) {
//console.log("downloadGallery start");
var targetDir = makeItemPath(itemInfo);
var tempDir = makeItemTempPath(itemInfo);
// 타겟 임시 디렉터리 생성
fs.mkdirsSync(tempDir);
var galleryDlDone = false;
// 항목 리스트 다운로드
var listUrl = url.resolve(hitomi, '/galleries/' + itemInfo.id.toString() + '.js');
reqQueue.push(listUrl, function (body) { // body 는 갤러리.js 파일로, 갤러리의 이미지 리스트 객체 배열 변수임. eval 대신 자르기를 사용.
//배열로 변환. 카와이하게 자름
var imgItemArray = body.substring(body.search(/\[/), body.length);
var imageItems = JSON.parse(imgItemArray);
var imageItemsDownloaded = 0;
imageItems.forEach(function (elem, index, array) {
//console.log("imageItems.forEach start");
var imgUrll = url.resolve(hitomi, '/galleries/' + itemInfo.id.toString() + "/" + elem.name );
imgDlQueue.push({imgUrl:imgUrll, directory : tempDir, name: elem.name}, function () {
// console.log("image request " + elem.name + " of " + itemInfo.name + " done");
// fs.writeFile(path.join(tempDir, elem.name), function () {
imageItemsDownloaded++;
// console.log("image " + elem.name + " of " + itemInfo.name + " downloaded");
// });
//
// imgDlQueue.push({imgUrl:})
if (imageItems.length == imageItemsDownloaded) {
fs.move(tempDir, targetDir, function () {
//console.log("moved");
fs.removeSync(tempDir);
callback();
});
}
});
//console.log("imageItems.forEach end");
});
//while(imageItems.length > imageItemsDownloaded) process.nextTick();
galleryDlDone = true;
});
//while (!galleryDlDone) process.nextTick();
//console.log("downloadGallery end");
}
function isDirExist(dirPath) {
try {
// Query the entry
stats = fs.lstatSync(dirPath);
// Is it a directory?
if (stats.isDirectory()) {
return true;
} else {
return false;
}
}
catch (e) { // in error case?
//console.log("Error : isDirExist()", e);
return false;
}
}
function makeItemPath(itemInfo) {
if (itemInfo.series === null) itemInfo.series = "all";
return path.join('./', targetDirName, itemInfo.type, itemInfo.series, (itemInfo.id.toString()) + "__" + itemInfo.name.substr(0,100)
+ ((itemInfo.artist != "") ? "__ by " + itemInfo.artist : ""));
}
function makeItemTempPath(itemInfo) {
if (itemInfo.series === null) itemInfo.series = "all";
return path.join('./', targetDirName, "/temp", (itemInfo.id.toString()));
}
{
"name": "hitomiapp"
, "version": "0.0.1"
, "private": true
, "dependencies": {
"cheerio": ">= 0.19.0"
, "request": ">= 2.59.0"
, "fs-extra": ">= 0.22.1"
, "async": ">= 1.4.0"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment