Skip to content

Instantly share code, notes, and snippets.

@yeungon
Last active November 16, 2022 03:32
Show Gist options
  • Save yeungon/643f94bf5ee2e4bbe2564e4d72ad212f to your computer and use it in GitHub Desktop.
Save yeungon/643f94bf5ee2e4bbe2564e4d72ad212f to your computer and use it in GitHub Desktop.
Read huge JSON with Node
const StreamArray = require('stream-json/streamers/StreamArray');
const fs = require('fs');
let combine = "Working_Vietnamese_English_Combined.json";
let vnedict = `vnedict__vietnamese-english.json`
const jsonStream = StreamArray.withParser();
fs.createReadStream(vnedict).pipe(jsonStream.input);
const combinedictjson = fs.readFileSync(combine);
const combinedictArray = JSON.parse(combinedictjson);
let combinedDictionary = []
let indexTracking = 0;
jsonStream.on('data', ({ key, value }) => {
let wordvndict = value.word.toLowerCase().trim();
let flag = false;
for (let i = 0; i < combinedictArray.length; i++) {
let combineEachWord = combinedictArray[i].word.toLowerCase().trim();
if (wordvndict === combineEachWord) {
flag = true;
let eachItem = {}
eachItem.word = combineEachWord
eachItem.meaning = combinedictArray[i].meaning;
eachItem.meaningvnedict = combinedictArray[i].meaningvnedict
combinedDictionary.push(eachItem)
break;
}
}
if (flag === false) {
let eachItem = {}
eachItem.word = value.word
eachItem.meaning = "";
eachItem.meaningvnedict = value.meaning;
combinedDictionary.push(eachItem)
}
});
jsonStream.on('end', (what) => {
console.log(indexTracking)
let data = JSON.stringify(combinedDictionary)
fs.writeFile('Vietnamese_English_Combined.json', data, (err) => {
if (err) throw err;
console.log('Data written to file');
});
console.log('All Done');
});
//source: http://www.denisowski.org/Vietnamese/vnedict.txt
//https://www.informatik.uni-leipzig.de/~duc/TD/td/index.php?bpos=0&db=ev
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment