Created
October 3, 2011 10:37
-
-
Save ishiduca/1258870 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://gist.github.com/1258811 のサンプル | |
var HTTPClient = require('./lib/http/client.js').HTTPClient; | |
var body = require('querystring').stringify({ | |
mode : 'login', pixiv_id : 'your pixiv id', pass : 'your pixiv pass' | |
}); | |
try { | |
main (); | |
} catch (e) { | |
console.log([ e.name, e.message ].join(': ')); | |
} | |
function main () { // redirect を自動化 | |
var client = new HTTPClient({ redirect : true }); | |
var wwwPixivNet = 'http://www.pixiv.net', | |
loginPhp = [ wwwPixivNet, '/login.php' ].join(''), | |
illustTop = [ wwwPixivNet, '/member_illust.php?mode=medium&illust_id=22021584' ].join(''); | |
client | |
.request('POST', loginPhp, {}, body, function (res, req) { | |
res.on('error', function (e) { throw e; }); | |
client.release(function () { report(res, req); }); | |
}) | |
.request('GET', illustTop, function (res, req) { | |
var data = ''; | |
res.setEncoding('utf-8'); | |
res.on('error', function (e) { throw e; }); | |
res.on('data', function (chunk) { data += chunk; }); | |
res.on('end', function () { | |
data = data.replace(/\n/g, ''); | |
var pattern = /<a href="(member_illust\.php\?mode=[^"]+?)"[^>]+?><img src="([^"]+?)".+?title="([^"]+?)"/; | |
var result = pattern.exec(data); | |
var nextURL = unescapeHTML([ wwwPixivNet, result[1] ].join('/')); | |
//var imgSrc = unescapeHTML(result[2]); | |
//var title = result[3]; | |
client.release(function () { | |
client.set('nextURL', nextURL); | |
report(res, req); | |
console.log("nextURL(illust big) >> " + client.get('nextURL')); | |
}); | |
}); | |
}) | |
// 一つ前の request でスクレイピングしたURLを利用するのに コールバックを使う | |
.request(function () { | |
var callback = function (res, req) { | |
var data = ''; | |
res.setEncoding('utf-8'); | |
res.on('error', function (e) { throw e; }); | |
res.on('data', function (chunk) { data += chunk; }); | |
res.on('end', function () { | |
data = data.replace(/\n/g, ''); | |
var pattern = /<img src="(http:\/\/img\d{2,3}[^"]+?)"/; | |
var result = pattern.exec(data); | |
var nextURL = result[1]; | |
client.release(function () { | |
client.set('nextURL', nextURL); | |
report(res, req); | |
console.log("nextURL(imgSRC) >> " + client.get('nextURL')); | |
}); | |
}); | |
}; | |
return [ 'GET', client.get('nextURL'), callback ]; | |
}) | |
.request(function () { | |
var callback = function (res, req) { | |
var target = require('path').basename(req[0]); | |
var writeStream = require('fs').createWriteStream(target); | |
writeStream.on('error', function (exception) { | |
throw exception; | |
}); | |
writeStream.on('close', function () { | |
client.release(function () { report(res, req); }); | |
}); | |
res.on('error', function (e) { throw e; }); | |
res.on('data', function (chunk) { | |
writeStream.write(chunk); | |
}); | |
res.on('end', function () { | |
writeStream.end(); | |
}); | |
}; | |
return [ 'GET', client.get('nextURL'), callback ]; | |
}) | |
.end(function () { console.log("! !! !!! Finish !!! !! !"); }) | |
; | |
} | |
function unescapeHTML (str) { | |
[ | |
[ """, '"' ], | |
[ "<", "<" ], | |
[ ">", ">" ], | |
[ "&", "&" ] | |
].forEach(function (cha) { | |
str = str.replace(new RegExp(cha[0], "g"), cha[1]); | |
}); | |
return str; | |
} | |
function report (res, req) { | |
console.log("Ruquest URI: " + req[0]); | |
console.log("Ruquest Headers: " + JSON.stringify(req[1])); | |
console.log("Ruquest Body: " + req[2]); | |
console.log("Response StatusCode: " + res.statusCode); | |
console.log("Response Headers: " + JSON.stringify(res.headers)); | |
console.log(''); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
実行した際のログ
Ruquest URI: http://www.pixiv.net/login.php
Ruquest Headers: {"content-length":40,"content-type":"application/x-www-form-urlencoded"}
Ruquest Body: mode=login&pixiv_id=xxxxxxxx&pass=ohohohoho
Response StatusCode: 302
Response Headers: {"server":"nginx/1.0.2","date":"Mon, 03 Oct 2011 09:43:52 GMT","content-type":"text/html; charset=UTF-8","connection":"close","set-cookie":["PHPSESSID=9b07f59a9d2a2ca0210dad06495870c4; expires=Mon, 03-Oct-2011 10:43:52 GMT; path=/; domain=.pixiv.net"],"expires":"Thu, 19 Nov 1981 08:52:00 GMT","cache-control":"no-store, no-cache, must-revalidate, post-check=0, pre-check=0","pragma":"no-cache","location":"http://www.pixiv.net/mypage.php","content-length":"0"}
Ruquest URI: http://www.pixiv.net/member_illust.php?mode=medium&illust_id=22021584
Ruquest Headers: {"referer":"http://www.pixiv.net/mypage.php","cookie":"PHPSESSID=9b07f59a9d2a2ca0210dad06495870c4; expires=Mon, 03-Oct-2011 10:43:52 GMT; path=/; domain=.pixiv.net"}
Ruquest Body: null
Response StatusCode: 200
Response Headers: {"server":"nginx/1.0.5","date":"Mon, 03 Oct 2011 09:43:52 GMT","content-type":"text/html; charset=UTF-8","transfer-encoding":"chunked","connection":"close","vary":"Accept-Encoding","expires":"Thu, 19 Nov 1981 08:52:00 GMT","cache-control":"no-store, no-cache, must-revalidate, post-check=0, pre-check=0","pragma":"no-cache"}
nextURL(illust big) >> http://www.pixiv.net/member_illust.php?mode=big&illust_id=22021584
Ruquest URI: http://www.pixiv.net/member_illust.php?mode=big&illust_id=22021584
Ruquest Headers: {"referer":"http://www.pixiv.net/member_illust.php?mode=medium&illust_id=22021584","cookie":"PHPSESSID=9b07f59a9d2a2ca0210dad06495870c4; expires=Mon, 03-Oct-2011 10:43:52 GMT; path=/; domain=.pixiv.net"}
Ruquest Body: null
Response StatusCode: 200
Response Headers: {"server":"nginx/1.0.5","date":"Mon, 03 Oct 2011 09:43:53 GMT","content-type":"text/html; charset=UTF-8","connection":"close","vary":"Accept-Encoding","expires":"Thu, 19 Nov 1981 08:52:00 GMT","cache-control":"no-store, no-cache, must-revalidate, post-check=0, pre-check=0","pragma":"no-cache","content-length":"1667"}
nextURL(imgSRC) >> http://img24.pixiv.net/img/konoma/22021584.jpg
Ruquest URI: http://img24.pixiv.net/img/konoma/22021584.jpg
Ruquest Headers: {"referer":"http://www.pixiv.net/member_illust.php?mode=big&illust_id=22021584","cookie":"PHPSESSID=9b07f59a9d2a2ca0210dad06495870c4; expires=Mon, 03-Oct-2011 10:43:52 GMT; path=/; domain=.pixiv.net"}
Ruquest Body: null
Response StatusCode: 200
Response Headers: {"server":"nginx/1.0.5","date":"Mon, 03 Oct 2011 09:43:53 GMT","content-type":"image/jpeg","connection":"close","last-modified":"Mon, 26 Sep 2011 15:50:07 GMT","etag":""974b074b-23823-4adda1cf9b7fa"","accept-ranges":"bytes","content-length":"145443","cache-control":"max-age=1209600","expires":"Sun, 16 Oct 2011 02:52:10 GMT","age":"111103"}
! !! !!! Finish !!! !! !