Created
July 13, 2018 09:21
-
-
Save josephrocca/83d7aff59a5ea8a8e3ea3ce267b6d6be to your computer and use it in GitHub Desktop.
Streaming bz2 decompression in browser (newline delimited JSON)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<script src="https://unpkg.com/[email protected]/dist/unbzip2-stream.min.js"></script> | |
<script> | |
async function loadBzipJson(url, cb) { | |
let text = ""; | |
let bz2 = window.unbzip2Stream(); | |
bz2.on('data', (d) => { | |
text += new TextDecoder('utf-8').decode(d); | |
while(text.includes("\n")) { | |
cb(JSON.parse(text.slice(0, text.indexOf("\n")))); | |
text = text.slice(text.indexOf("\n")+1); | |
} | |
}); | |
let reader = await fetch(url).then(r => r.body.getReader()) | |
let r; | |
while(r = await reader.read(), !r.done) { | |
bz2.write( r.value ); | |
} | |
} | |
let i = 0; | |
loadBzipJson("https://files.pushshift.io/reddit/comments/RC_2017-11.bz2", (obj) => { | |
console.log(obj); | |
}); | |
</script> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment