Created
April 6, 2017 15:40
-
-
Save MichalCz/435bc2085264556c2f746efe4e02b066 to your computer and use it in GitHub Desktop.
How to crawl a rest API and save response to DB with Scramjet.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const scramjet = require("scramjet"); | |
const stream = require("stream"); | |
const request = require("request-promise"); | |
const config = require(process.env.CONFIG); | |
let cnt = 0; | |
// build a stream of indexes | |
new scramjet.DataStream({ | |
read() { | |
this.offset = (this.offset || 0); | |
this.push(this.offset); | |
this.offset += 1; | |
} | |
}) | |
// map indexes to API responses | |
.map((i) => request.get(config.rest_api_url, i, i + config.range)) | |
// do the same as flatten in HighLand | |
.flatMap((item) => item) | |
// filter out the id's containing "3" | |
.filter((item) => (""+item.id).indexOf('3') > -1) | |
// add timestamp to id's containing "9" | |
.map((item) => (""+item.id).indexOf('3') > -1 ? Object.assign(item, {timestamp: Date.now()) : item) | |
// write to database | |
.accumulate((insert, item) => insert(item), insert); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment