Created
January 25, 2011 14:29
-
-
Save lobodin/794979 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Pascal scanner | |
*/ | |
var Lexeme = function(name, type, nextLexemePos) { | |
this.name = name; | |
this.type = type; | |
this.nextLexemePos = nextLexemePos; | |
} | |
var Scanner = (function() { | |
var keywords = [ | |
/^real/gi, | |
/^char/gi, | |
/^string/gi, | |
/^const/gi, | |
/^var/gi, | |
/^integer/gi, | |
/^array/gi, | |
/^of/gi, | |
/^begin/gi, | |
/^for/gi, | |
/^while/gi, | |
/^repeat/gi, | |
/^if/gi, | |
/^then/gi, | |
/^else/gi, | |
/^do/gi, | |
/^to/gi, | |
/^end/gi, | |
/^until/gi, | |
/^record/gi, | |
/^div/gi, | |
/^and/gi, | |
/^or/gi, | |
/^not/gi | |
], | |
numbers = [ | |
/^\-?\d+/g, | |
/^\-?\d+[.]\d*/g, | |
/^\-?\d+[.]\d*E\-\d+/g | |
], | |
separators = [ | |
/^:=/g, | |
/^\.\./g, | |
/^[:,.;()]/g | |
], | |
operations = [ | |
/^>=/g, | |
/^<=/g, | |
/^<>/g, | |
/^[+\-*\/><=]/g | |
], | |
identifiers = [ | |
/^[a-z]+[a-z0-9_]*/g | |
], | |
position = 0, | |
code = null, | |
program = null; | |
var updateCodeAndPosition = function(pos) { | |
position += pos; | |
code = program.slice(position); | |
}; | |
var escape = function() { | |
var regexps = [ | |
/^\{(.|\n)*\}/g, | |
/^\/\/(.)*$/gm, | |
/^\s{1,}/g | |
///^\n/g | |
]; | |
while(!regexps.every(function(regexp) { | |
regexp.lastIndex = 0; | |
var doesMatch = regexp.exec(code) !== null; | |
pos = regexp.lastIndex; | |
updateCodeAndPosition(regexp.lastIndex); | |
return !doesMatch; | |
})); | |
}; | |
var match = function() { | |
var LexemeType = function(name, regexps) { | |
this.name = name; | |
this.regexps = regexps; | |
}; | |
var precedence = [ | |
new LexemeType('Keyword', keywords), | |
new LexemeType('Number', numbers), | |
new LexemeType('Separator', separators), | |
new LexemeType('Operation', operations), | |
new LexemeType('Identifier', identifiers) | |
]; | |
var lexemeType = null, | |
lexemeName = null, | |
lexemeEndPos = 0; | |
precedence.every(function(type) { | |
lexemeType = type.name; | |
return type.regexps.every(function(regexp) { | |
regexp.lastIndex = 0; | |
matches = regexp.exec(code); | |
if (matches !== null) { | |
lexemeName = matches[matches.length-1]; | |
lexemeEndPos = regexp.lastIndex; | |
} | |
return matches === null; | |
}); | |
}); | |
updateCodeAndPosition(lexemeEndPos); | |
escape(); | |
return new Lexeme(lexemeName, lexemeType, position); | |
}; | |
return { | |
init: function(prog) { | |
program = prog; | |
}, | |
next: function(pos) { | |
position = 0; | |
updateCodeAndPosition(pos); | |
escape(); | |
return match(); | |
} | |
}; | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment