Last active
December 18, 2025 07:48
-
-
Save yamasushi/bbfc1dd937617ca43bda9d1defda9218 to your computer and use it in GitHub Desktop.
Compilers / Chapter 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // https://gist.github.com/yamasushi/bbfc1dd937617ca43bda9d1defda9218 | |
| // Compilers Chapter 2 | |
| // | |
| // Java17 | |
| import java.io.*; | |
| import java.util.Map; | |
| import java.util.HashMap; | |
| import java.util.Collections; | |
| public class Main{ | |
| public static void main(String[] args) throws IOException { | |
| Lexer.line=1; | |
| var l = new Lexer(); | |
| System.out.println("-----------"); | |
| printWords(l); | |
| System.out.println("-----------"); | |
| int i = 1; | |
| System.out.printf("[%04d]",i); | |
| for( ; ; ) { | |
| var t = l.scan(); | |
| assert i <= Lexer.line; | |
| if ( t instanceof Eof e ) { | |
| System.out.printf("<End of File>%n"); | |
| break; | |
| } else { | |
| if ( i < Lexer.line ){ | |
| i = Lexer.line; | |
| System.out.printf("%n[%04d]",i); | |
| } | |
| if (t instanceof Char c) { | |
| System.out.printf("<%s>",t); | |
| } else { | |
| System.out.printf("<%s,%s>",t.tag,t); | |
| } | |
| } | |
| } | |
| System.out.println("-----------"); | |
| printWords(l); | |
| } | |
| static void printWords(Lexer l) { | |
| l.getWords() | |
| .forEach((k,v)-> | |
| System.out.printf("key=%-10s , value=(tag=%-10s,lexeme=%s)%n",k,v.tag,v.lexeme)); | |
| } | |
| } | |
| /* | |
| public class Tag { | |
| public static final int AND = 256 ; | |
| public static final int BASIC = 257 ; | |
| public static final int BREAK = 258 ; | |
| public static final int DO = 259 ; | |
| public static final int ELSE = 260 ; | |
| public static final int EQ = 261 ; | |
| public static final int FALSE = 262 ; | |
| public static final int GE = 263 ; | |
| public static final int ID = 264 ; | |
| public static final int IF = 265 ; | |
| public static final int INDEX = 266 ; | |
| public static final int LE = 267 ; | |
| public static final int MINUS = 268 ; | |
| public static final int NE = 269 ; | |
| public static final int NUM = 270 ; | |
| public static final int OR = 271 ; | |
| public static final int REAL = 272 ; | |
| public static final int TEMP = 273 ; | |
| public static final int TRUE = 274 ; | |
| public static final int WHILE = 275 ; } | |
| */ | |
| public enum Tag { | |
| EOF ( -1), | |
| CHAR ( 0), | |
| // | |
| AND (256), | |
| BASIC(257), | |
| BREAK(258), | |
| DO (259), | |
| ELSE (260), | |
| EQ (261), | |
| FALSE(262), | |
| GE (263), | |
| ID (264), | |
| IF (265), | |
| INDEX(266), | |
| LE (267), | |
| MINUS(268), | |
| NE (269), | |
| NUM (270), | |
| OR (271), | |
| REAL (272), | |
| TEMP (273), | |
| TRUE (274), | |
| WHILE(275) ; | |
| private final int value; | |
| Tag(int value) { this.value = value; } | |
| public int getValue() { return this.value; } | |
| } | |
| public class Token { | |
| public final Tag tag; | |
| protected Token (Tag t) { this.tag = t;} | |
| } | |
| public class Char extends Token { | |
| public final char value; | |
| public Char (char v) {super(Tag.CHAR); this.value = v;} | |
| @Override | |
| public String toString() { | |
| return "" + this.value;} | |
| } | |
| public class Eof extends Token { | |
| public Eof () {super(Tag.EOF);} | |
| @Override | |
| public String toString() { return "<EOF>";} | |
| } | |
| public class Num extends Token { | |
| public final int value; | |
| public Num(int v){super(Tag.NUM); this.value = v;} | |
| @Override | |
| public String toString() { | |
| return "" + this.value;} | |
| } | |
| public class Real extends Token { | |
| public final double value; | |
| public Real(double v){super(Tag.REAL); this.value = v;} | |
| @Override | |
| public String toString() { | |
| return "" + this.value;} | |
| } | |
| public class Word extends Token { | |
| public final String lexeme; | |
| public Word( String s, Tag tag) { super(tag); this.lexeme=s; } | |
| @Override | |
| public String toString() {return this.lexeme;} | |
| public static final Word and = new Word("&&",Tag.AND); | |
| public static final Word or = new Word("||",Tag.OR ); | |
| public static final Word eq = new Word("==",Tag.EQ); | |
| public static final Word ne = new Word("!=",Tag.NE); | |
| public static final Word le = new Word("<=",Tag.LE); | |
| public static final Word ge = new Word(">=",Tag.GE); | |
| public static final Word minus = new Word("minus" ,Tag.MINUS); | |
| public static final Word True = new Word("true" ,Tag.TRUE); | |
| public static final Word False = new Word("false" ,Tag.FALSE); | |
| public static final Word temp = new Word("t" ,Tag.TEMP); | |
| } | |
| public class Type extends Word{ | |
| public final int width ; | |
| public Type (String s,Tag tag,int w){super(s,tag); this.width=w;} | |
| public static final Type Int = new Type("int" , Tag.BASIC ,4); | |
| public static final Type Float = new Type("float" , Tag.BASIC ,8); | |
| public static final Type Char = new Type("char" , Tag.BASIC ,1); | |
| public static final Type Bool = new Type("bool" , Tag.BASIC ,1); | |
| } | |
| public class Lexer { | |
| public static int line = 1; | |
| int peek = ' '; | |
| Map<String,Word> words = new HashMap<>(); | |
| public Map<String,Word> getWords(){ | |
| return Collections.unmodifiableMap(this.words); } | |
| void reserve(Word w){words.put(w.lexeme,w);} | |
| public Lexer(){ | |
| this.reserve(new Word("if" ,Tag.IF)); | |
| this.reserve(new Word("else" ,Tag.ELSE)); | |
| this.reserve(new Word("while",Tag.WHILE)); | |
| this.reserve(new Word("do" ,Tag.DO)); | |
| this.reserve(new Word("break",Tag.BREAK)); | |
| this.reserve(Word.True); | |
| this.reserve(Word.False); | |
| this.reserve(Type.Int); | |
| this.reserve(Type.Char); | |
| this.reserve(Type.Bool); | |
| this.reserve(Type.Float); | |
| } | |
| private int nextChar() throws IOException { | |
| return System.in.read(); } | |
| private void readch() throws IOException { | |
| this.peek = this.nextChar(); } | |
| private boolean readch(char c) throws IOException { | |
| readch(); | |
| if(this.peek != c ) return false; | |
| this.peek = ' '; | |
| return true; | |
| } | |
| public Token scan() throws IOException { | |
| // Remove blank & comments (Exercise 2.6.1) | |
| blankLoop : for ( ; ; this.readch() ){ | |
| switch (this.peek) { | |
| case ' ': | |
| case '\t': | |
| continue; | |
| case '\n': | |
| Lexer.line ++; continue; | |
| case -1: | |
| return new Eof(); | |
| case '/': | |
| this.readch(); | |
| switch(this.peek){ | |
| case '/': | |
| this.skipLineComments(); | |
| if(this.peek==-1) return new Eof(); | |
| Lexer.line++; continue; | |
| case '*': | |
| this.skipComments(); | |
| if(this.peek==-1) return new Eof(); | |
| continue; | |
| default: | |
| return new Char('/'); | |
| } | |
| default: | |
| break blankLoop; | |
| } | |
| } | |
| // Operators (Exercise 2.6.2)] | |
| switch (this.peek){ | |
| case '&': | |
| if(this.readch('&')) return Word.and; else return new Char('&'); | |
| case '|': | |
| if(this.readch('|')) return Word.or; else return new Char('|'); | |
| case '=': | |
| if(this.readch('=')) return Word.eq; else return new Char('='); | |
| case '!': | |
| if(this.readch('=')) return Word.ne; else return new Char('!'); | |
| case '<': | |
| if(this.readch('=')) return Word.le; else return new Char('<'); | |
| case '>': | |
| if(this.readch('=')) return Word.ge; else return new Char('>'); | |
| } | |
| // Numerals (Exercise 2.6.3) | |
| if ( Character.isDigit (this.peek)) { | |
| int i = 0; | |
| do { | |
| i = 10*i + Character.digit(this.peek,10); | |
| this.readch(); | |
| } while(Character.isDigit(this.peek)); | |
| if (this.peek != '.') { | |
| return new Num(i); | |
| } else { | |
| this.readch(); | |
| if(Character.isDigit(this.peek)){ | |
| return new Real(i + this.scanDecimal()); | |
| } else { | |
| return new Real (i); | |
| } | |
| } | |
| } else if ( this.peek == '.' ){ | |
| this.readch(); | |
| if (Character.isDigit(this.peek)){ | |
| return new Real (this.scanDecimal()); | |
| } else { | |
| return new Char('.'); | |
| } | |
| } | |
| // this.peek must be neither a digit nor a '.' | |
| // Id | |
| if (Character.isLetter(this.peek)){ | |
| var b = new StringBuilder(); | |
| do{ | |
| b.append((char)this.peek); this.readch(); | |
| } while (Character.isLetterOrDigit(this.peek)); | |
| String s = b.toString(); | |
| var w = this.words.get(s); | |
| if(w != null) return w; | |
| w = new Word(s,Tag.ID); | |
| this.words.put(s,w); | |
| return w; | |
| } | |
| // Others | |
| var tok = new Char((char)this.peek); | |
| this.peek = ' '; | |
| return tok; | |
| } | |
| private void skipLineComments() throws IOException { | |
| for(;;){ | |
| this.readch(); | |
| switch(this.peek) { | |
| case '\n': return; | |
| case -1 : return; | |
| default : continue; | |
| } | |
| } | |
| } | |
| private void skipComments() throws IOException { | |
| for(;;){ | |
| this.readch(); | |
| switch(this.peek){ | |
| case '\n' : Lexer.line++; continue; | |
| case '*': | |
| this.readch(); | |
| switch(this.peek){ | |
| case '/' : return; | |
| case '\n' : Lexer.line++; continue; | |
| case -1 : return; | |
| default : continue; | |
| } | |
| case -1 : return; | |
| default: continue; | |
| } | |
| } | |
| } | |
| private double scanDecimal() throws IOException { | |
| // peek must be a digit | |
| int i = 0; | |
| int k = 1; | |
| do { | |
| i = 10*i + Character.digit(this.peek,10); | |
| k = 10*k; | |
| this.readch(); | |
| } while (Character.isDigit(this.peek)) ; | |
| return ((double)i) / ((double)k); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment