Skip to content

Instantly share code, notes, and snippets.

@sftblw
Created April 15, 2015 11:11
Show Gist options
  • Save sftblw/77644b5e20b2759f80d8 to your computer and use it in GitHub Desktop.
Save sftblw/77644b5e20b2759f80d8 to your computer and use it in GitHub Desktop.
ECMAScript 표준문서 부록에서 문법만 추출하는 node 앱
// 이거 만들고 까였데시타... AST는 이렇게 만드는게 아니데시타..
//var jsdom = require('jsdom');
var fs = require('fs');
var sprintf = require('sprintf-js').sprintf;
var jquery = fs.readFileSync("./js/jquery-2.1.3.js", "utf-8");
var cheerio = require('cheerio'), $ = cheerio.load(fs.readFileSync("./ecmafile/ecmaspec.htm", "utf-8"));
console.log("start!");
var shortener = {
origShort : new Map(),
shortOrig : new Map(),
shorten : function (str, len) {
if (this.origShort.get(str) !== undefined)
return this.origShort.get(str);
//console.log("before : " + str);
var camels = str.replace(/([a-z])([A-Z])/,'$1,$2' ).split(",");
// for condition purpose
camels.getStrLenSum = function () {
var sum = 0;
this.forEach(function (elem) {
sum += elem.length;
});
return sum;
}
// phase 1 : remove vowel
var index = camels.length-1;
while((camels.getStrLenSum() > len ) && (index >= 0) ) {
camels[index] = camels[index].replace(/[aeiou]/g,'');
index--;
}
//phase 2 : remove letters
index = camels.length-1;
while((camels.getStrLenSum() > len ) || (this.shortOrig.get(camels.join('')) !== undefined) ) {
if (camels[index].length > 1)
camels[index] = camels[index].slice(0,-1);
index--;
if (index < 0) {
index = camels.length-1;
}
}
var ret = camels.join('');
this.shortOrig.set(ret, str);
this.origShort.set(str, ret);
//console.log("after : " + ret + " : " + ret.length);
return ret;
}
}
exc($);
function exc($) {
const LHS_MAX_LEN = 16;
const RHS_MAX_LEN = 39;
var tokenMap = new Map();
console.log("response:");
var str = "";
$("#sec-A div.gp").each(function (idx, gpElem) {
// lhs
var lhs = "";
var lhsElem = $(".lhs", gpElem);
lhs = $(".nt", lhsElem).html();
if (lhs.length > LHS_MAX_LEN) {
lhs = shortener.shorten(lhs, LHS_MAX_LEN);
}
// increase state of keyword
tokenMap.set(lhs, (tokenMap.get(lhs) !=0 ) ? 1 : (tokenMap.get(lhs)+1) )
var grhsmodElem = $(".grhsmod", lhsElem);
// normally there are rhs part
var rhsElems = $(".rhs", gpElem);
if ($(rhsElems).length > 0) {
$(rhsElems).each(function (idx2, rhsElem) {
var curStr = "";
// rhs
var rhs = "";
var grhsmodElemAtRhs = $(".grhsmod", rhsElem);
// for each rhs terminal / non-terminal
$(".rhs *", rhsElem).each(function (idx3, rhsSubElem) {
if ($(rhsSubElem).hasClass("nt") || $(rhsSubElem).hasClass("t")) {
var rhs_text = $(rhsSubElem).html();
// shorten
if (rhs_text.length > LHS_MAX_LEN) {
rhs_text = shortener.shorten(rhs_text, LHS_MAX_LEN);
}
// decode html
rhs_text = $('<div/>').html(rhs_text).text();
// terminal symbol
if ($(rhsSubElem).hasClass("t")) {
rhs_text = "'" + rhs_text + "'";
}
rhs += " " + rhs_text;
// if it is one-of...
if ($(grhsmodElem).html() == "one of") {
//compose
curStr += sprintf("%-"+LHS_MAX_LEN+"s -> %-"+"s;", lhs, rhs);
// newline
curStr += "\r\n";
str += curStr;
//console.log(curStr);
lhs = "";
rhs = "";
curStr = "";
}
}
else if (String($(grhsmodElemAtRhs).html()).match(/but not.*/)) {
console.log("matches " + $(grhsmodElemAtRhs).html());
return false; // jQuery .each() break;
}
});
// if rhs is valid (normal route)
if (rhs.length > 0) {
//compose
curStr += sprintf("%-"+LHS_MAX_LEN+"s -> %-"+"s;", lhs, rhs);
// newline
curStr += "\r\n";
str += curStr;
//console.log(curStr);
lhs = "";
}
});
}
// it is one-of type, by table
else if ($(grhsmodElem).html() == "one of") {
console.log("one of");
$("td", $(gpElem).next()).each(function (idx, oneOfElem) {
var curStr = "";
// rhs
var rhs = "";
//////////////////////////////////////////////
var rhs_text = $(oneOfElem).html();
if (rhs_text != "") {
//console.log(rhs_text);
if (rhs_text.length > LHS_MAX_LEN) {
rhs_text = shortener.shorten(rhs_text, LHS_MAX_LEN);
}
// decode html
rhs_text = $('<div/>').html(rhs_text).text();
//if ($(oneOfElem).hasClass("t")) {
rhs_text = "'" + rhs_text + "'";
//}
rhs += " " + rhs_text;
//////////////////////////////////////////////
//compose
curStr += sprintf("%-"+LHS_MAX_LEN+"s -> %-"+"s;", lhs, rhs);
// newline
curStr += "\r\n";
str += curStr;
//console.log(curStr);
lhs = "";
}
});
}
});
var state = "";
// tokenMap.forEach(function (val, key) {
// var curState = "key " + key + " : " + val;
// state += curState;
// console.log(state);
// state += "\r\n";
// });
fs.writeFile("ECMAScript.gr", str, "UTF-8", function () {
console.log("grammar written to file.");
})
fs.writeFile("ECMAScript.gr.state", state, "UTF-8", function () {
console.log("state written to file.");
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment