Last active
August 29, 2015 14:02
-
-
Save korniltsev/80a35f76a65c49c5a2df to your computer and use it in GitHub Desktop.
cool lexer definition
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * The scanner definition for COOL. | |
| */ | |
| import java_cup.runtime.Symbol; | |
| %% | |
| %{ | |
| /* Stuff enclosed in %{ %} is copied verbatim to the lexer class | |
| * definition, all the extra variables/functions you want to use in the | |
| * lexer actions should go here. Don't remove or modify anything that | |
| * was there initially. */ | |
| // Max size of string constants | |
| static int MAX_STR_CONST = 1025; | |
| // For assembling string constants | |
| StringBuffer string_buf = new StringBuffer(); | |
| private int curr_lineno = 1; | |
| int get_curr_lineno() { | |
| return curr_lineno; | |
| } | |
| private AbstractSymbol filename; | |
| void set_filename(String fname) { | |
| filename = AbstractTable.stringtable.addString(fname); | |
| } | |
| AbstractSymbol curr_filename() { | |
| return filename; | |
| } | |
| private int commentDepth = 0; | |
| %} | |
| %init{ | |
| /* Stuff enclosed in %init{ %init} is copied verbatim to the lexer | |
| * class constructor, all the extra initialization you want to do should | |
| * go here. Don't remove or modify anything that was there initially. */ | |
| // empty for now | |
| %init} | |
| %eofval{ | |
| /* Stuff enclosed in %eofval{ %eofval} specifies java code that is | |
| * executed when end-of-file is reached. If you use multiple lexical | |
| * states and want to do something special if an EOF is encountered in | |
| * one of those states, place your code in the switch statement. | |
| * Ultimately, you should return the EOF symbol, or your lexer won't | |
| * work. */ | |
| switch(yy_lexical_state) { | |
| case YYINITIAL: | |
| /* nothing special to do in the initial state */ | |
| break; | |
| case COMMENT: | |
| yybegin(YYINITIAL); | |
| return new Symbol(TokenConstants.ERROR, "EOF in comment"); | |
| case STRING: | |
| yybegin(YYINITIAL); | |
| return new Symbol(TokenConstants.ERROR, "EOF in string constant"); | |
| } | |
| /* strings | |
| errors: strings, comments, integers? | |
| */ | |
| return new Symbol(TokenConstants.EOF); | |
| %eofval} | |
| %class CoolLexer | |
| %cup | |
| %state COMMENT | |
| %state STRING | |
| Digit = [0-9] | |
| Letter = [a-zA-Z_] | |
| UpperLetter = [A-Z] | |
| LowerLetter = [a-z] | |
| StrEscSymbol = [a-zA-Z0-9] | |
| WhiteSpace = [ \f\t] | |
| EscStr = (\b|\n|\t|\f) | |
| TypeId = {UpperLetter}({Letter}|{Digit})* | |
| ObjectId = {LowerLetter}({Letter}|{Digit})* | |
| Integer = {Digit}+ | |
| NewLine = [\r\n] | |
| %% | |
| <YYINITIAL>"=>" { return new Symbol(TokenConstants.DARROW); } | |
| <YYINITIAL>"<-" { return new Symbol(TokenConstants.ASSIGN); } | |
| <YYINITIAL>"~" { return new Symbol(TokenConstants.NEG); } | |
| <YYINITIAL> [cC][lL][aA][sS][sS] { return new Symbol(TokenConstants.CLASS); } | |
| <YYINITIAL> [eE][lL][sS][eE] { return new Symbol(TokenConstants.ELSE); } | |
| <YYINITIAL> f[aA][lL][sS][eE] { return new Symbol(TokenConstants.BOOL_CONST, Boolean.FALSE); } | |
| <YYINITIAL> [fF][iI] { return new Symbol(TokenConstants.FI); } | |
| <YYINITIAL> [iI][fF] { return new Symbol(TokenConstants.IF); } | |
| <YYINITIAL> [iI][nN] { return new Symbol(TokenConstants.IN); } | |
| <YYINITIAL> [iI][nN][hH][eE][rR][iI][tT][sS] { return new Symbol(TokenConstants.INHERITS); } | |
| <YYINITIAL> [iI][sS][vV][oO][iI][dD] { return new Symbol(TokenConstants.ISVOID); } | |
| <YYINITIAL> [lL][eE][tT] { return new Symbol(TokenConstants.LET); } | |
| <YYINITIAL> [lL][oO][oO][pP] { return new Symbol(TokenConstants.LOOP); } | |
| <YYINITIAL> [pP][oO][oO][lL] { return new Symbol(TokenConstants.POOL); } | |
| <YYINITIAL> [tT][hH][eE][nN] { return new Symbol(TokenConstants.THEN); } | |
| <YYINITIAL> [wW][hH][iI][lL][eE] { return new Symbol(TokenConstants.WHILE); } | |
| <YYINITIAL> [cC][aA][sS][eE] { return new Symbol(TokenConstants.CASE); } | |
| <YYINITIAL> [eE][sS][aA][cC] { return new Symbol(TokenConstants.ESAC); } | |
| <YYINITIAL> [nN][eE][wW] { return new Symbol(TokenConstants.NEW); } | |
| <YYINITIAL> [oO][fF] { return new Symbol(TokenConstants.OF); } | |
| <YYINITIAL> [nN][oO][tT] { return new Symbol(TokenConstants.NOT); } | |
| <YYINITIAL> t[rR][uU][eE] { return new Symbol(TokenConstants.BOOL_CONST, Boolean.TRUE); } | |
| <YYINITIAL>";" { return new Symbol(TokenConstants.SEMI); } | |
| <YYINITIAL>":" { return new Symbol(TokenConstants.COLON); } | |
| <YYINITIAL>"{" { return new Symbol(TokenConstants.LBRACE); } | |
| <YYINITIAL>"}" { return new Symbol(TokenConstants.RBRACE); } | |
| <YYINITIAL>"(" { return new Symbol(TokenConstants.LPAREN); } | |
| <YYINITIAL>")" { return new Symbol(TokenConstants.RPAREN); } | |
| <YYINITIAL>"," { return new Symbol(TokenConstants.COMMA); } | |
| <YYINITIAL>"<" { return new Symbol(TokenConstants.LT); } | |
| <YYINITIAL>"<=" { return new Symbol(TokenConstants.LE); } | |
| <YYINITIAL>"=" { return new Symbol(TokenConstants.EQ); } | |
| <YYINITIAL>"+" { return new Symbol(TokenConstants.PLUS); } | |
| <YYINITIAL>"-" { return new Symbol(TokenConstants.MINUS); } | |
| <YYINITIAL>"/" { return new Symbol(TokenConstants.DIV); } | |
| <YYINITIAL>"*" { return new Symbol(TokenConstants.MULT); } | |
| <YYINITIAL>"." { return new Symbol(TokenConstants.DOT); } | |
| <YYINITIAL>"@" { return new Symbol(TokenConstants.AT); } | |
| <YYINITIAL> --.* { } | |
| <YYINITIAL,COMMENT> [\r\n] { curr_lineno++; } | |
| <YYINITIAL> "*)" { return new Symbol(TokenConstants.ERROR, "Unmatched *)");} | |
| <YYINITIAL,COMMENT> "(*" { | |
| commentDepth++; | |
| yybegin(COMMENT); } | |
| <COMMENT> "*)" { | |
| commentDepth--; | |
| if (commentDepth == 0) { | |
| yybegin(YYINITIAL); | |
| } | |
| } | |
| <COMMENT> . {} | |
| <YYINITIAL> "\"" { yybegin(STRING);} | |
| <STRING> "\"" { | |
| yybegin(YYINITIAL); | |
| String strRes = string_buf.toString(); | |
| string_buf.setLength(0); | |
| if (strRes.contains("\0")) { | |
| return new Symbol(TokenConstants.ERROR, "String contains escaped null character."); | |
| } else { | |
| return new Symbol(TokenConstants.STR_CONST, AbstractTable.stringtable.addString(strRes)); | |
| } | |
| } | |
| <STRING> \\n { string_buf.append("\n"); } | |
| <STRING> \\t { string_buf.append("\t"); } | |
| <STRING> \\b { string_buf.append("\b"); } | |
| <STRING> \\f { string_buf.append("\f"); } | |
| <STRING> \\\n { string_buf.append("\n"); } | |
| <STRING> \\\\ { string_buf.append("\\"); } | |
| <STRING> \n { | |
| yybegin(YYINITIAL); | |
| string_buf.setLength(0); | |
| return new Symbol(TokenConstants.ERROR, "Unterminated string constant"); | |
| } | |
| <STRING> \\\" {string_buf.append("\""); } | |
| <STRING> \\{StrEscSymbol} { string_buf.append(yytext().substring(1)); } | |
| <STRING> . { string_buf.append(yytext()); } | |
| <YYINITIAL> {TypeId} { return new Symbol(TokenConstants.TYPEID, AbstractTable.stringtable.addString(yytext())); } | |
| <YYINITIAL> {ObjectId} { return new Symbol(TokenConstants.OBJECTID, AbstractTable.stringtable.addString(yytext())); } | |
| <YYINITIAL> {Integer} { return new Symbol(TokenConstants.INT_CONST, AbstractTable.inttable.addString(yytext()));} | |
| <YYINITIAL> \xB { } | |
| <YYINITIAL> {WhiteSpace}+ { } | |
| <YYINITIAL> [_!] { return new Symbol(TokenConstants.ERROR, yytext());} | |
| <YYINITIAL> . { System.err.println("LEXER BUG - UNMATCHED: " + yytext()); } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment