Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Created May 20, 2015 05:31
Show Gist options
  • Save ishiduca/5c728078a32fb597698c to your computer and use it in GitHub Desktop.
Save ishiduca/5c728078a32fb597698c to your computer and use it in GitHub Desktop.
trumpetでスクレイピングの試作。複雑な構造をパースするには骨が折れる
{"code":"aaa","product":"AAA","price":1000}
{"code":"abc","product":"ABC","price":1100}
{"code":"xyz","product":"XYZ","price":10000}
var fs = require('fs')
var trumpet = require('trumpet')
var es = require('event-stream')
var duplexer = require('duplexer')
var merge = require('deepmerge')
var rsAll = es.through()
var trp = trumpet()
trp.selectAll('table>tr', function (elTr) {
var tr = trumpet()
var r = {}
var rs = es.through(function ondata (data) {
r = merge(r, data)
done()
}, function () {
rsAll.write(r)
})
rs.setMaxListeners(0)
tr.select('td.code').createReadStream()
.pipe(es.through(function (code) {
this.queue({code: String(code)})
})).pipe(rs, {end: false})
tr.select('td.product').createReadStream()
.pipe(es.through(function (product) {
this.queue({product: String(product)})
})).pipe(rs, {end: false})
tr.select('td.price').createReadStream()
.pipe(es.through(function (price) {
this.queue({price: Number(price)})
})).pipe(rs, {end: false})
elTr.createReadStream().pipe(tr)
function done () {
Object.keys(r).length === 3 && rs.end()
}
})
fs.createReadStream(__dirname + '/table.html')
.pipe(duplexer(trp, rsAll))
.pipe(es.stringify())
.pipe(process.stdout)
<body>
<table>
<tr>
<th class="code">code</th>
<th class="product">product</th>
<th class="price">price</th>
</tr>
<tr>
<td class="code">aaa</td>
<td class="product">AAA</td>
<td class="price">1000</td>
</tr>
<tr>
<td class="code">abc</td>
<td class="product">ABC</td>
<td class="price">1100</td>
</tr>
<tr>
<td class="code">xyz</td>
<td class="product">XYZ</td>
<td class="price">10000</td>
</tr>
</table>
</body>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment