Created
April 15, 2015 11:11
-
-
Save sftblw/77644b5e20b2759f80d8 to your computer and use it in GitHub Desktop.
ECMAScript 표준문서 부록에서 문법만 추출하는 node 앱
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 이거 만들고 까였데시타... AST는 이렇게 만드는게 아니데시타.. | |
//var jsdom = require('jsdom'); | |
var fs = require('fs'); | |
var sprintf = require('sprintf-js').sprintf; | |
var jquery = fs.readFileSync("./js/jquery-2.1.3.js", "utf-8"); | |
var cheerio = require('cheerio'), $ = cheerio.load(fs.readFileSync("./ecmafile/ecmaspec.htm", "utf-8")); | |
console.log("start!"); | |
var shortener = { | |
origShort : new Map(), | |
shortOrig : new Map(), | |
shorten : function (str, len) { | |
if (this.origShort.get(str) !== undefined) | |
return this.origShort.get(str); | |
//console.log("before : " + str); | |
var camels = str.replace(/([a-z])([A-Z])/,'$1,$2' ).split(","); | |
// for condition purpose | |
camels.getStrLenSum = function () { | |
var sum = 0; | |
this.forEach(function (elem) { | |
sum += elem.length; | |
}); | |
return sum; | |
} | |
// phase 1 : remove vowel | |
var index = camels.length-1; | |
while((camels.getStrLenSum() > len ) && (index >= 0) ) { | |
camels[index] = camels[index].replace(/[aeiou]/g,''); | |
index--; | |
} | |
//phase 2 : remove letters | |
index = camels.length-1; | |
while((camels.getStrLenSum() > len ) || (this.shortOrig.get(camels.join('')) !== undefined) ) { | |
if (camels[index].length > 1) | |
camels[index] = camels[index].slice(0,-1); | |
index--; | |
if (index < 0) { | |
index = camels.length-1; | |
} | |
} | |
var ret = camels.join(''); | |
this.shortOrig.set(ret, str); | |
this.origShort.set(str, ret); | |
//console.log("after : " + ret + " : " + ret.length); | |
return ret; | |
} | |
} | |
exc($); | |
function exc($) { | |
const LHS_MAX_LEN = 16; | |
const RHS_MAX_LEN = 39; | |
var tokenMap = new Map(); | |
console.log("response:"); | |
var str = ""; | |
$("#sec-A div.gp").each(function (idx, gpElem) { | |
// lhs | |
var lhs = ""; | |
var lhsElem = $(".lhs", gpElem); | |
lhs = $(".nt", lhsElem).html(); | |
if (lhs.length > LHS_MAX_LEN) { | |
lhs = shortener.shorten(lhs, LHS_MAX_LEN); | |
} | |
// increase state of keyword | |
tokenMap.set(lhs, (tokenMap.get(lhs) !=0 ) ? 1 : (tokenMap.get(lhs)+1) ) | |
var grhsmodElem = $(".grhsmod", lhsElem); | |
// normally there are rhs part | |
var rhsElems = $(".rhs", gpElem); | |
if ($(rhsElems).length > 0) { | |
$(rhsElems).each(function (idx2, rhsElem) { | |
var curStr = ""; | |
// rhs | |
var rhs = ""; | |
var grhsmodElemAtRhs = $(".grhsmod", rhsElem); | |
// for each rhs terminal / non-terminal | |
$(".rhs *", rhsElem).each(function (idx3, rhsSubElem) { | |
if ($(rhsSubElem).hasClass("nt") || $(rhsSubElem).hasClass("t")) { | |
var rhs_text = $(rhsSubElem).html(); | |
// shorten | |
if (rhs_text.length > LHS_MAX_LEN) { | |
rhs_text = shortener.shorten(rhs_text, LHS_MAX_LEN); | |
} | |
// decode html | |
rhs_text = $('<div/>').html(rhs_text).text(); | |
// terminal symbol | |
if ($(rhsSubElem).hasClass("t")) { | |
rhs_text = "'" + rhs_text + "'"; | |
} | |
rhs += " " + rhs_text; | |
// if it is one-of... | |
if ($(grhsmodElem).html() == "one of") { | |
//compose | |
curStr += sprintf("%-"+LHS_MAX_LEN+"s -> %-"+"s;", lhs, rhs); | |
// newline | |
curStr += "\r\n"; | |
str += curStr; | |
//console.log(curStr); | |
lhs = ""; | |
rhs = ""; | |
curStr = ""; | |
} | |
} | |
else if (String($(grhsmodElemAtRhs).html()).match(/but not.*/)) { | |
console.log("matches " + $(grhsmodElemAtRhs).html()); | |
return false; // jQuery .each() break; | |
} | |
}); | |
// if rhs is valid (normal route) | |
if (rhs.length > 0) { | |
//compose | |
curStr += sprintf("%-"+LHS_MAX_LEN+"s -> %-"+"s;", lhs, rhs); | |
// newline | |
curStr += "\r\n"; | |
str += curStr; | |
//console.log(curStr); | |
lhs = ""; | |
} | |
}); | |
} | |
// it is one-of type, by table | |
else if ($(grhsmodElem).html() == "one of") { | |
console.log("one of"); | |
$("td", $(gpElem).next()).each(function (idx, oneOfElem) { | |
var curStr = ""; | |
// rhs | |
var rhs = ""; | |
////////////////////////////////////////////// | |
var rhs_text = $(oneOfElem).html(); | |
if (rhs_text != "") { | |
//console.log(rhs_text); | |
if (rhs_text.length > LHS_MAX_LEN) { | |
rhs_text = shortener.shorten(rhs_text, LHS_MAX_LEN); | |
} | |
// decode html | |
rhs_text = $('<div/>').html(rhs_text).text(); | |
//if ($(oneOfElem).hasClass("t")) { | |
rhs_text = "'" + rhs_text + "'"; | |
//} | |
rhs += " " + rhs_text; | |
////////////////////////////////////////////// | |
//compose | |
curStr += sprintf("%-"+LHS_MAX_LEN+"s -> %-"+"s;", lhs, rhs); | |
// newline | |
curStr += "\r\n"; | |
str += curStr; | |
//console.log(curStr); | |
lhs = ""; | |
} | |
}); | |
} | |
}); | |
var state = ""; | |
// tokenMap.forEach(function (val, key) { | |
// var curState = "key " + key + " : " + val; | |
// state += curState; | |
// console.log(state); | |
// state += "\r\n"; | |
// }); | |
fs.writeFile("ECMAScript.gr", str, "UTF-8", function () { | |
console.log("grammar written to file."); | |
}) | |
fs.writeFile("ECMAScript.gr.state", state, "UTF-8", function () { | |
console.log("state written to file."); | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment