Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Created September 2, 2015 11:12
Show Gist options
  • Save ishiduca/e67ba9711a60623a7e06 to your computer and use it in GitHub Desktop.
Save ishiduca/e67ba9711a60623a7e06 to your computer and use it in GitHub Desktop.
セマフォで当時接続数を調整する
'use strict'
var fs = require('fs')
var path = require('path')
var url = require('url')
var hyperquest = require('hyperquest')
var through = require('through2')
var trumpet = require('trumpet')
var semaphore = require('semaphore')
var uri = 'http://matome.naver.jp/odai/2142172053104925201'
var selector = '.LyWrap>.lyWrapInner>div>div>.LyMain>.ArMain01'
+ '>.MdMTMWidgetList01>.MdMTMWidget01>.mdMTMWidget01Content01'
+ ' a>img[src^="http://rr.img.naver.jp"]'
var sem = semaphore(3)
var multi = through.obj()
var scraper = trumpet()
scraper.selectAll(selector, function (img) {
img.getAttribute('src', function (src) {
multi.write(src)
})
})
var count = 0
hyperquest(uri)
.once('error', onError)
.once('response', onResponse)
.pipe(scraper)
multi
.pipe(through.obj(function (src, enc, done) {
sem.take(function () {
done(null, src)
})
}))
.pipe(through.obj(function (src, enc, done) {
var me = this
var file = path.join(__dirname, 'image', (count += 1) + '.jpg')
hyperquest(src)
.once('response', onResponse)
.once('error', onError)
.pipe(fs.createWriteStream(file))
.once('error', onError)
.once('finish', function () {
setTimeout(sem.leave.bind(sem), 1000)
done(null, file + '\n')
})
function onError (err) {
console.error(err)
done(err)
}
}))
.pipe(process.stdout)
function onResponse (res) {
if (res.statusCode !== 200) {
var err = new Error('statusCode: ' + res.statusCode + ' url: ' + uri)
this.emit('error', err)
}
}
function onError (err) {
console.error(err)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment