Created
November 14, 2015 12:52
-
-
Save beta/4a1a98774ed3c31f4e91 to your computer and use it in GitHub Desktop.
A simple lexer for C programs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package lexer; | |
import java.io.IOException; | |
import java.nio.file.Files; | |
import java.nio.file.Paths; | |
import java.util.ArrayList; | |
import java.util.List; | |
public class Lexer { | |
private String mInputFile; | |
private List<String> mSymbols; | |
private long mCharCount = 0; | |
private long mWordCount = 0; | |
private long mLineCount = 0; | |
private Boolean mIsInComment = false; | |
private final static String[] KEYWORDS = { | |
"auto", "break", "case", "char", "const", "continue", "default", "do", | |
"double", "else", "enum", "extern", "float", "for", "goto", "if", "int", | |
"long", "register", "return", "short", "signed", "sizeof", "static", | |
"struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while" | |
}; | |
private final static String[] OPERATORS = { | |
"<", ">", "<=", ">=", "==", "!=", "+", "-", "*", "/", "++", "--", "&", | |
"<<", ">>", "&", "|", "^", "&&", "||", "!", "?", ":", "=", "+=", "-=", | |
"*=", "/=", "%=", "<<=", ">>=", "&=", "^=", "|=" | |
}; | |
private final static String[] DELIMITERS = { | |
",",";", "(", ")", "[", "]", "{", "}" | |
}; | |
public Lexer(String inputFile) { | |
mInputFile = inputFile; | |
} | |
public void run() { | |
mSymbols = new ArrayList<>(); | |
String result = ""; | |
List<String> lines = this.readFile(); | |
for (String line : lines) { | |
if (!line.startsWith("#")) { // 忽略宏指令 | |
result += processLine(line); | |
} | |
} | |
System.out.println("字符数:" + mCharCount); | |
System.out.println("单词数:" + mWordCount); | |
System.out.println("行数:" + mLineCount); | |
System.out.println(); | |
System.out.println("符号表:"); | |
for (int i = 0; i < mSymbols.size(); i++) { | |
System.out.printf("%d\t%s\n", i, mSymbols.get(i)); | |
} | |
System.out.println(); | |
System.out.println("记号流:"); | |
System.out.println(result); | |
} | |
private List<String> readFile() { | |
try { | |
mCharCount = Files.size(Paths.get(mInputFile)); | |
List<String> lines = Files.readAllLines(Paths.get(mInputFile)); | |
mLineCount = lines.size(); | |
return lines; | |
} catch (IOException ex) { | |
System.err.println("无法打开文件 " + mInputFile); | |
ex.printStackTrace(); | |
System.exit(2); | |
} | |
return null; | |
} | |
/** | |
* 处理一行代码。 | |
* | |
* @param line 要处理的代码 | |
* @return 处理后的一行记号流 | |
*/ | |
private String processLine(String line) { | |
String processedLine = ""; | |
for (int i = 0; i < line.length(); i++) { | |
Character c = line.charAt(i); | |
if (mIsInComment) { | |
if (i < line.length() - 1) { | |
Character cNext = line.charAt(i + 1); | |
if (c.equals('*') && cNext.equals('/')) { | |
mIsInComment = false; | |
i++; | |
continue; | |
} else { | |
continue; | |
} | |
} else { | |
continue; | |
} | |
} | |
if (i < line.length() - 1) { | |
Character cNext = line.charAt(i + 1); | |
if (c.equals('/')) { | |
if (cNext.equals('/')) { | |
// 单行注释 | |
return processedLine; | |
} else if (cNext.equals('*')) { | |
// 多行注释 | |
mIsInComment = true; | |
i++; | |
continue; | |
} | |
} | |
} | |
if (c == '_' || isAlphabet(c)) { | |
// 关键字或标识符 | |
int j; | |
for (j = i; j < line.length(); j++) { | |
Character newChar = line.charAt(j); | |
if (!(newChar.equals('_') || isAlphabet(newChar) || isDigit(newChar))) { | |
break; | |
} | |
} | |
String name = line.substring(i, j); | |
i = j - 1; | |
if (isKeyword(name)) { // 关键字 | |
processedLine += "(key, " + name + ")"; | |
} else if (name.equals("sizeof")) { // sizeof 运算符 | |
processedLine += "(op, sizeof)"; | |
} else { // 标识符 | |
int symbolId = addSymbol(name); | |
processedLine += "(id, " + symbolId + ")"; | |
} | |
mWordCount++; | |
} else if (isDigit(c)) { | |
// 常数 | |
int j; | |
for (j = i; j < line.length(); j++) { | |
Character newChar = line.charAt(j); | |
if (!(newChar.equals('.') || isDigit(newChar))) { | |
break; | |
} | |
} | |
String value = line.substring(i, j); | |
i = j - 1; | |
processedLine += "(const, " + value + ")"; | |
mWordCount++; | |
} else if (c.equals('\'') || c.equals('"')) { | |
// 字符或字符串常量 | |
int j; | |
for (j = i + 1; j < line.length(); j++) { | |
if (line.charAt(j) == c) { | |
break; | |
} | |
} | |
String value = line.substring(i, j + 1); | |
i = j; | |
processedLine += "(const, " + value + ")"; | |
mWordCount += value.substring(1, value.length() - 1).split(" ").length; | |
} else if (isOperator(c.toString())) { | |
String op = c.toString(); | |
if (i < line.length() - 2) { | |
String s = "" + line.charAt(i + 1) + line.charAt(i + 2); | |
if (isOperator(s)) { | |
op = s; | |
} | |
} else if (i < line.length() - 1) { | |
String s = "" + line.charAt(i + 1); | |
if (isOperator(s)) { | |
op = s; | |
} | |
} | |
processedLine += "(op, " + op + ")"; | |
} else if (isDelimiter(c.toString())) { | |
processedLine += "(delim, " + c + ")"; | |
} | |
} | |
return processedLine; | |
} | |
/** | |
* 将符号添加到符号表。 | |
* 如果该符号已存在,则不再重复添加。 | |
* | |
* @param symbol 要添加的符号 | |
* @return 该符号在符号表的索引 | |
*/ | |
private int addSymbol(String symbol) { | |
if (!mSymbols.contains(symbol)) { | |
mSymbols.add(symbol); | |
} | |
return mSymbols.indexOf(symbol); | |
} | |
private Boolean isDigit(Character c) { | |
return Character.toString(c).matches("[0-9]"); | |
} | |
private Boolean isAlphabet(Character c) { | |
return Character.toString(c).matches("[A-Za-z]"); | |
} | |
private Boolean isKeyword(String s) { | |
for (String keyword : KEYWORDS) { | |
if (s.equals(keyword)) { | |
return true; | |
} | |
} | |
return false; | |
} | |
private Boolean isOperator(String s) { | |
for (String operator : OPERATORS) { | |
if (s.equals(operator)) { | |
return true; | |
} | |
} | |
return false; | |
} | |
private Boolean isDelimiter(String s) { | |
for (String delim : DELIMITERS) { | |
if (s.equals(delim)) { | |
return true; | |
} | |
} | |
return false; | |
} | |
public static void main(String[] args) { | |
if (args.length == 0) { | |
System.err.println("缺少文件名"); | |
System.exit(1); | |
} | |
new Lexer(args[0]).run(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment