Skip to content

Instantly share code, notes, and snippets.

@bacalj
Created November 29, 2017 15:39
Show Gist options
  • Save bacalj/239d373fe9be34b533cabaf58423c1c3 to your computer and use it in GitHub Desktop.
Save bacalj/239d373fe9be34b533cabaf58423c1c3 to your computer and use it in GitHub Desktop.
const fs = require('fs');
const xml2js = require('xml2js');
const util = require('util');
const eyes = require('eyes');
const converter = require('json-2-csv');
const parser = new xml2js.Parser();
//const xml = fs.readFileSync('just_notes.xml', 'utf8');
const inspect = require('eyes').inspector({maxLength: false});
// parseString(xml, function (err, result) {
// console.log(util.inspect(result, false, null));
// });
fs.readFile('just_notes.xml', function(err, data) {
parser.parseString(data, function (err, result) {
//console.log(util.inspect(result, false, null));
const messyArr = result.rss['channel'][0].item;
let bestarr = [];
for (let i = 0; i < messyArr.length; i++) {
let newObj = {};
const element = messyArr[i];
/*
process title
Make it an array of elements as spearated by space
Pull values into appropriate meta fields
*/
const messyTitle = element.title[0];
const titleArr = messyTitle.split(" ");
newObj.cTitle = titleArr[1];
newObj.metaInitials = titleArr[0];
newObj.metaCharsets = titleArr[2].replace(/[()]/g, '');;
/*
process content:
get the raw cdata
split it at strongs to get initial data headings in to array
for each iten in the array, clean off the </strong>...
then split results at colon to get arrays like [ 'Radical', '木' ]
*/
const messyContent = element['content:encoded'][0];
const messyContentArray = messyContent.split("<strong>");
for ( let k = 0; k < messyContentArray.length; k++ ){
mystr = messyContentArray[k];
if ( mystr.length > 0 ){
subst = mystr.substring(0, mystr.lastIndexOf("</"));
messyContentArray[k] = subst;
messyContentArray[k] = subst.split(": ");
}
}
//content is now an array of arrays
//newObj.content = messyContentArray;
//console.log(messyContentArray);
for ( let z=0; z < messyContentArray.length; z++ ){
let keyEl = messyContentArray[z][0];
let valEl = messyContentArray[z][1];
if (keyEl === "Definition"){
newObj.definition = '"' + String(valEl) + '"';
}
if (keyEl === "Stroke number"){
newObj.strokeNum = String(valEl);
}
if (keyEl == "Radical" || keyEl == "Radical:"){
newObj.radical = String(valEl).trim();
if (newObj.radical.length > 2){
newObj.radical = 'no_radical_parsed';
}
newObj.radLen = newObj.radical.length;
}
if (keyEl === "Radical"){
newObj.radical = String(valEl).trim();
newObj.radLen = newObj.radical.length;
if (newObj.radical.length > 2){
newObj.radical = 'no_radical_parsed';
}
}
//now we need to make sure all objs have same schema
if (newObj.hasOwnProperty('radical') !== true ){
newObj.radical = 'no_radical_found'
}
if (newObj.hasOwnProperty('cTitle') !== true ){
newObj.cTitle = 'no_cTitle_found';
}
if (newObj.hasOwnProperty('metaInitials') !== true ){
newObj.metaInitials = 'no_metaInitials_found'
}
if (newObj.hasOwnProperty('metaCharsets') !== true ){
newObj.metaCharsets = 'no_metaCharsets_found'
}
if (newObj.hasOwnProperty('definition') !== true ){
newObj.definition = 'no_definition_found'
}
if (newObj.hasOwnProperty('strokeNum') !== true ){
newObj.strokeNum = 'no_strokeNum_found'
}
if (newObj.hasOwnProperty('radLen') !== true ){
newObj.radLen = 'no_radLen_found'
}
}
bestarr.push(newObj);
}
const sha = JSON.stringify(bestarr);
//convert object to json file
fs.writeFile("sha.json", sha, 'utf8', function (err) {
if (err) {
return console.log(err);
}
console.log("The file was saved!");
//console.log(sha);
});
//log it to console as a csv
const json2csvCallback = function (err, csv) {
if (err) throw err;
console.log(csv);
};
converter.json2csv(bestarr, json2csvCallback);
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment