Skip to content

Instantly share code, notes, and snippets.

@lobodin
Created January 25, 2011 14:29
Show Gist options
  • Save lobodin/794979 to your computer and use it in GitHub Desktop.
Save lobodin/794979 to your computer and use it in GitHub Desktop.
/*
Pascal scanner
*/
var Lexeme = function(name, type, nextLexemePos) {
this.name = name;
this.type = type;
this.nextLexemePos = nextLexemePos;
}
var Scanner = (function() {
var keywords = [
/^real/gi,
/^char/gi,
/^string/gi,
/^const/gi,
/^var/gi,
/^integer/gi,
/^array/gi,
/^of/gi,
/^begin/gi,
/^for/gi,
/^while/gi,
/^repeat/gi,
/^if/gi,
/^then/gi,
/^else/gi,
/^do/gi,
/^to/gi,
/^end/gi,
/^until/gi,
/^record/gi,
/^div/gi,
/^and/gi,
/^or/gi,
/^not/gi
],
numbers = [
/^\-?\d+/g,
/^\-?\d+[.]\d*/g,
/^\-?\d+[.]\d*E\-\d+/g
],
separators = [
/^:=/g,
/^\.\./g,
/^[:,.;()]/g
],
operations = [
/^>=/g,
/^<=/g,
/^<>/g,
/^[+\-*\/><=]/g
],
identifiers = [
/^[a-z]+[a-z0-9_]*/g
],
position = 0,
code = null,
program = null;
var updateCodeAndPosition = function(pos) {
position += pos;
code = program.slice(position);
};
var escape = function() {
var regexps = [
/^\{(.|\n)*\}/g,
/^\/\/(.)*$/gm,
/^\s{1,}/g
///^\n/g
];
while(!regexps.every(function(regexp) {
regexp.lastIndex = 0;
var doesMatch = regexp.exec(code) !== null;
pos = regexp.lastIndex;
updateCodeAndPosition(regexp.lastIndex);
return !doesMatch;
}));
};
var match = function() {
var LexemeType = function(name, regexps) {
this.name = name;
this.regexps = regexps;
};
var precedence = [
new LexemeType('Keyword', keywords),
new LexemeType('Number', numbers),
new LexemeType('Separator', separators),
new LexemeType('Operation', operations),
new LexemeType('Identifier', identifiers)
];
var lexemeType = null,
lexemeName = null,
lexemeEndPos = 0;
precedence.every(function(type) {
lexemeType = type.name;
return type.regexps.every(function(regexp) {
regexp.lastIndex = 0;
matches = regexp.exec(code);
if (matches !== null) {
lexemeName = matches[matches.length-1];
lexemeEndPos = regexp.lastIndex;
}
return matches === null;
});
});
updateCodeAndPosition(lexemeEndPos);
escape();
return new Lexeme(lexemeName, lexemeType, position);
};
return {
init: function(prog) {
program = prog;
},
next: function(pos) {
position = 0;
updateCodeAndPosition(pos);
escape();
return match();
}
};
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment