Created
November 29, 2017 15:39
-
-
Save bacalj/239d373fe9be34b533cabaf58423c1c3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const xml2js = require('xml2js'); | |
const util = require('util'); | |
const eyes = require('eyes'); | |
const converter = require('json-2-csv'); | |
const parser = new xml2js.Parser(); | |
//const xml = fs.readFileSync('just_notes.xml', 'utf8'); | |
const inspect = require('eyes').inspector({maxLength: false}); | |
// parseString(xml, function (err, result) { | |
// console.log(util.inspect(result, false, null)); | |
// }); | |
fs.readFile('just_notes.xml', function(err, data) { | |
parser.parseString(data, function (err, result) { | |
//console.log(util.inspect(result, false, null)); | |
const messyArr = result.rss['channel'][0].item; | |
let bestarr = []; | |
for (let i = 0; i < messyArr.length; i++) { | |
let newObj = {}; | |
const element = messyArr[i]; | |
/* | |
process title | |
Make it an array of elements as spearated by space | |
Pull values into appropriate meta fields | |
*/ | |
const messyTitle = element.title[0]; | |
const titleArr = messyTitle.split(" "); | |
newObj.cTitle = titleArr[1]; | |
newObj.metaInitials = titleArr[0]; | |
newObj.metaCharsets = titleArr[2].replace(/[()]/g, '');; | |
/* | |
process content: | |
get the raw cdata | |
split it at strongs to get initial data headings in to array | |
for each iten in the array, clean off the </strong>... | |
then split results at colon to get arrays like [ 'Radical', '木' ] | |
*/ | |
const messyContent = element['content:encoded'][0]; | |
const messyContentArray = messyContent.split("<strong>"); | |
for ( let k = 0; k < messyContentArray.length; k++ ){ | |
mystr = messyContentArray[k]; | |
if ( mystr.length > 0 ){ | |
subst = mystr.substring(0, mystr.lastIndexOf("</")); | |
messyContentArray[k] = subst; | |
messyContentArray[k] = subst.split(": "); | |
} | |
} | |
//content is now an array of arrays | |
//newObj.content = messyContentArray; | |
//console.log(messyContentArray); | |
for ( let z=0; z < messyContentArray.length; z++ ){ | |
let keyEl = messyContentArray[z][0]; | |
let valEl = messyContentArray[z][1]; | |
if (keyEl === "Definition"){ | |
newObj.definition = '"' + String(valEl) + '"'; | |
} | |
if (keyEl === "Stroke number"){ | |
newObj.strokeNum = String(valEl); | |
} | |
if (keyEl == "Radical" || keyEl == "Radical:"){ | |
newObj.radical = String(valEl).trim(); | |
if (newObj.radical.length > 2){ | |
newObj.radical = 'no_radical_parsed'; | |
} | |
newObj.radLen = newObj.radical.length; | |
} | |
if (keyEl === "Radical"){ | |
newObj.radical = String(valEl).trim(); | |
newObj.radLen = newObj.radical.length; | |
if (newObj.radical.length > 2){ | |
newObj.radical = 'no_radical_parsed'; | |
} | |
} | |
//now we need to make sure all objs have same schema | |
if (newObj.hasOwnProperty('radical') !== true ){ | |
newObj.radical = 'no_radical_found' | |
} | |
if (newObj.hasOwnProperty('cTitle') !== true ){ | |
newObj.cTitle = 'no_cTitle_found'; | |
} | |
if (newObj.hasOwnProperty('metaInitials') !== true ){ | |
newObj.metaInitials = 'no_metaInitials_found' | |
} | |
if (newObj.hasOwnProperty('metaCharsets') !== true ){ | |
newObj.metaCharsets = 'no_metaCharsets_found' | |
} | |
if (newObj.hasOwnProperty('definition') !== true ){ | |
newObj.definition = 'no_definition_found' | |
} | |
if (newObj.hasOwnProperty('strokeNum') !== true ){ | |
newObj.strokeNum = 'no_strokeNum_found' | |
} | |
if (newObj.hasOwnProperty('radLen') !== true ){ | |
newObj.radLen = 'no_radLen_found' | |
} | |
} | |
bestarr.push(newObj); | |
} | |
const sha = JSON.stringify(bestarr); | |
//convert object to json file | |
fs.writeFile("sha.json", sha, 'utf8', function (err) { | |
if (err) { | |
return console.log(err); | |
} | |
console.log("The file was saved!"); | |
//console.log(sha); | |
}); | |
//log it to console as a csv | |
const json2csvCallback = function (err, csv) { | |
if (err) throw err; | |
console.log(csv); | |
}; | |
converter.json2csv(bestarr, json2csvCallback); | |
}); | |
}); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment