Skip to content

Instantly share code, notes, and snippets.

@korniltsev
Last active August 29, 2015 14:02
Show Gist options
  • Select an option

  • Save korniltsev/80a35f76a65c49c5a2df to your computer and use it in GitHub Desktop.

Select an option

Save korniltsev/80a35f76a65c49c5a2df to your computer and use it in GitHub Desktop.
cool lexer definition
/*
* The scanner definition for COOL.
*/
import java_cup.runtime.Symbol;
%%
%{
/* Stuff enclosed in %{ %} is copied verbatim to the lexer class
* definition, all the extra variables/functions you want to use in the
* lexer actions should go here. Don't remove or modify anything that
* was there initially. */
// Max size of string constants
static int MAX_STR_CONST = 1025;
// For assembling string constants
StringBuffer string_buf = new StringBuffer();
private int curr_lineno = 1;
int get_curr_lineno() {
return curr_lineno;
}
private AbstractSymbol filename;
void set_filename(String fname) {
filename = AbstractTable.stringtable.addString(fname);
}
AbstractSymbol curr_filename() {
return filename;
}
private int commentDepth = 0;
%}
%init{
/* Stuff enclosed in %init{ %init} is copied verbatim to the lexer
* class constructor, all the extra initialization you want to do should
* go here. Don't remove or modify anything that was there initially. */
// empty for now
%init}
%eofval{
/* Stuff enclosed in %eofval{ %eofval} specifies java code that is
* executed when end-of-file is reached. If you use multiple lexical
* states and want to do something special if an EOF is encountered in
* one of those states, place your code in the switch statement.
* Ultimately, you should return the EOF symbol, or your lexer won't
* work. */
switch(yy_lexical_state) {
case YYINITIAL:
/* nothing special to do in the initial state */
break;
case COMMENT:
yybegin(YYINITIAL);
return new Symbol(TokenConstants.ERROR, "EOF in comment");
case STRING:
yybegin(YYINITIAL);
return new Symbol(TokenConstants.ERROR, "EOF in string constant");
}
/* strings
errors: strings, comments, integers?
*/
return new Symbol(TokenConstants.EOF);
%eofval}
%class CoolLexer
%cup
%state COMMENT
%state STRING
Digit = [0-9]
Letter = [a-zA-Z_]
UpperLetter = [A-Z]
LowerLetter = [a-z]
StrEscSymbol = [a-zA-Z0-9]
WhiteSpace = [ \f\t]
EscStr = (\b|\n|\t|\f)
TypeId = {UpperLetter}({Letter}|{Digit})*
ObjectId = {LowerLetter}({Letter}|{Digit})*
Integer = {Digit}+
NewLine = [\r\n]
%%
<YYINITIAL>"=>" { return new Symbol(TokenConstants.DARROW); }
<YYINITIAL>"<-" { return new Symbol(TokenConstants.ASSIGN); }
<YYINITIAL>"~" { return new Symbol(TokenConstants.NEG); }
<YYINITIAL> [cC][lL][aA][sS][sS] { return new Symbol(TokenConstants.CLASS); }
<YYINITIAL> [eE][lL][sS][eE] { return new Symbol(TokenConstants.ELSE); }
<YYINITIAL> f[aA][lL][sS][eE] { return new Symbol(TokenConstants.BOOL_CONST, Boolean.FALSE); }
<YYINITIAL> [fF][iI] { return new Symbol(TokenConstants.FI); }
<YYINITIAL> [iI][fF] { return new Symbol(TokenConstants.IF); }
<YYINITIAL> [iI][nN] { return new Symbol(TokenConstants.IN); }
<YYINITIAL> [iI][nN][hH][eE][rR][iI][tT][sS] { return new Symbol(TokenConstants.INHERITS); }
<YYINITIAL> [iI][sS][vV][oO][iI][dD] { return new Symbol(TokenConstants.ISVOID); }
<YYINITIAL> [lL][eE][tT] { return new Symbol(TokenConstants.LET); }
<YYINITIAL> [lL][oO][oO][pP] { return new Symbol(TokenConstants.LOOP); }
<YYINITIAL> [pP][oO][oO][lL] { return new Symbol(TokenConstants.POOL); }
<YYINITIAL> [tT][hH][eE][nN] { return new Symbol(TokenConstants.THEN); }
<YYINITIAL> [wW][hH][iI][lL][eE] { return new Symbol(TokenConstants.WHILE); }
<YYINITIAL> [cC][aA][sS][eE] { return new Symbol(TokenConstants.CASE); }
<YYINITIAL> [eE][sS][aA][cC] { return new Symbol(TokenConstants.ESAC); }
<YYINITIAL> [nN][eE][wW] { return new Symbol(TokenConstants.NEW); }
<YYINITIAL> [oO][fF] { return new Symbol(TokenConstants.OF); }
<YYINITIAL> [nN][oO][tT] { return new Symbol(TokenConstants.NOT); }
<YYINITIAL> t[rR][uU][eE] { return new Symbol(TokenConstants.BOOL_CONST, Boolean.TRUE); }
<YYINITIAL>";" { return new Symbol(TokenConstants.SEMI); }
<YYINITIAL>":" { return new Symbol(TokenConstants.COLON); }
<YYINITIAL>"{" { return new Symbol(TokenConstants.LBRACE); }
<YYINITIAL>"}" { return new Symbol(TokenConstants.RBRACE); }
<YYINITIAL>"(" { return new Symbol(TokenConstants.LPAREN); }
<YYINITIAL>")" { return new Symbol(TokenConstants.RPAREN); }
<YYINITIAL>"," { return new Symbol(TokenConstants.COMMA); }
<YYINITIAL>"<" { return new Symbol(TokenConstants.LT); }
<YYINITIAL>"<=" { return new Symbol(TokenConstants.LE); }
<YYINITIAL>"=" { return new Symbol(TokenConstants.EQ); }
<YYINITIAL>"+" { return new Symbol(TokenConstants.PLUS); }
<YYINITIAL>"-" { return new Symbol(TokenConstants.MINUS); }
<YYINITIAL>"/" { return new Symbol(TokenConstants.DIV); }
<YYINITIAL>"*" { return new Symbol(TokenConstants.MULT); }
<YYINITIAL>"." { return new Symbol(TokenConstants.DOT); }
<YYINITIAL>"@" { return new Symbol(TokenConstants.AT); }
<YYINITIAL> --.* { }
<YYINITIAL,COMMENT> [\r\n] { curr_lineno++; }
<YYINITIAL> "*)" { return new Symbol(TokenConstants.ERROR, "Unmatched *)");}
<YYINITIAL,COMMENT> "(*" {
commentDepth++;
yybegin(COMMENT); }
<COMMENT> "*)" {
commentDepth--;
if (commentDepth == 0) {
yybegin(YYINITIAL);
}
}
<COMMENT> . {}
<YYINITIAL> "\"" { yybegin(STRING);}
<STRING> "\"" {
yybegin(YYINITIAL);
String strRes = string_buf.toString();
string_buf.setLength(0);
if (strRes.contains("\0")) {
return new Symbol(TokenConstants.ERROR, "String contains escaped null character.");
} else {
return new Symbol(TokenConstants.STR_CONST, AbstractTable.stringtable.addString(strRes));
}
}
<STRING> \\n { string_buf.append("\n"); }
<STRING> \\t { string_buf.append("\t"); }
<STRING> \\b { string_buf.append("\b"); }
<STRING> \\f { string_buf.append("\f"); }
<STRING> \\\n { string_buf.append("\n"); }
<STRING> \\\\ { string_buf.append("\\"); }
<STRING> \n {
yybegin(YYINITIAL);
string_buf.setLength(0);
return new Symbol(TokenConstants.ERROR, "Unterminated string constant");
}
<STRING> \\\" {string_buf.append("\""); }
<STRING> \\{StrEscSymbol} { string_buf.append(yytext().substring(1)); }
<STRING> . { string_buf.append(yytext()); }
<YYINITIAL> {TypeId} { return new Symbol(TokenConstants.TYPEID, AbstractTable.stringtable.addString(yytext())); }
<YYINITIAL> {ObjectId} { return new Symbol(TokenConstants.OBJECTID, AbstractTable.stringtable.addString(yytext())); }
<YYINITIAL> {Integer} { return new Symbol(TokenConstants.INT_CONST, AbstractTable.inttable.addString(yytext()));}
<YYINITIAL> \xB { }
<YYINITIAL> {WhiteSpace}+ { }
<YYINITIAL> [_!] { return new Symbol(TokenConstants.ERROR, yytext());}
<YYINITIAL> . { System.err.println("LEXER BUG - UNMATCHED: " + yytext()); }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment