Last active
August 29, 2015 13:56
-
-
Save Taluu/9232626 to your computer and use it in GitHub Desktop.
Following nohar's tutorial :) http://calmettes.arnaud.free.fr/compilation.pdf (FR)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| module Compiler | |
| module Lexeme | |
| CAT = '' | |
| UNION = '|' | |
| OPTION = '?' | |
| REPEAT = '+' | |
| CLOSURE = '*' | |
| class Symbol | |
| attr_reader :prec | |
| def initialize(prec) | |
| @prec = prec | |
| end | |
| end | |
| class Char < Symbol | |
| attr_reader :value | |
| def initialize(value) | |
| super(4) | |
| @value = value | |
| end | |
| def to_s | |
| value = @value | |
| if '*|()+?'.include? value | |
| value = '\\' + value | |
| end | |
| return value | |
| end | |
| end | |
| class Operator < Symbol | |
| @@list = {UNION => 1, | |
| CAT => 2, | |
| OPTION => 3, | |
| REPEAT => 3, | |
| CLOSURE => 3} | |
| attr_reader :operator | |
| def initialize(operator) | |
| super(@@list[operator]) | |
| @operator = operator | |
| end | |
| end | |
| class Unop < Operator | |
| attr_reader :argument | |
| def initialize(operator, argument) | |
| super(operator) | |
| @argument = argument | |
| end | |
| def to_s | |
| arg = @argument.to_s | |
| if (@argument.prec < @prec) | |
| arg = "(%s)" % arg | |
| end | |
| return arg + @operator | |
| end | |
| end | |
| class Binop < Operator | |
| attr_reader :left, :right | |
| def initialize(operator, left, right) | |
| super(operator) | |
| @left = left | |
| @right = right | |
| end | |
| def to_s | |
| left, right = @left.to_s, @right.to_s | |
| if (@left.prec < @prec) | |
| left = "(%s)" % left | |
| end | |
| if (@right.prec < @prec) | |
| right = "(%s)" % right | |
| end | |
| return left + @operator + right | |
| end | |
| end | |
| class << self | |
| def option(arg) | |
| Unop.new(OPTION, arg) | |
| end | |
| def repeat(arg) | |
| Unop.new(REPEAT, arg) | |
| end | |
| def closure(arg) | |
| Unop.new(CLOSURE, arg) | |
| end | |
| def cat(left, right) | |
| Binop.new(CAT, left, right) | |
| end | |
| def union(left, right) | |
| Binop.new(UNION, left, right) | |
| end | |
| def char(arg) | |
| Char.new(arg) | |
| end | |
| end | |
| end | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| $LOAD_PATH << './' | |
| require 'lexemes.rb' | |
| require 'ostruct' | |
| module Kernel | |
| def typehint(argument, klass) | |
| raise ArgumentError, "Invalid argument. Expected %s, had %s" % [klass, argument.class.name] unless argument.kind_of? klass | |
| end | |
| end | |
| module Compiler | |
| class Lexer | |
| attr_reader :input | |
| def initialize(input) | |
| typehint(input, String) | |
| @buffer = nil | |
| @input = input | |
| @iterator = input.each_char.with_index | |
| end | |
| def consume | |
| if not @buffer.nil? | |
| token, @buffer = @buffer, nil | |
| return token | |
| end | |
| token = nil | |
| begin | |
| token = @iterator.next | |
| if '*|()+?'.include? token[0] | |
| token = OpenStruct.new(:name => token[0], :value => token[0], :index => token[1]) | |
| elsif token == '\\' | |
| token = OpenStruct.new(:name => 'CHAR', :value => @iterator.next[0], :index => token[1]) | |
| else | |
| token = OpenStruct.new(:name => 'CHAR', :value => token[0], :index => token[1]) | |
| end | |
| rescue StopIteration | |
| end | |
| return token | |
| end | |
| def peek | |
| if @buffer.nil? | |
| @buffer = consume | |
| end | |
| return @buffer | |
| end | |
| end | |
| class ParseError < StandardError | |
| end | |
| class Parser | |
| @@first = {'operand' => ['(', 'CHAR'].freeze, | |
| 'unop' => ['(', 'CHAR'].freeze, | |
| 'cat' => ['(', 'CHAR'].freeze, | |
| 'expr' => ['(', 'CHAR'].freeze} | |
| @@following = {'expr' => [')', nil].freeze, | |
| 'cat' => ['|', ')', nil].freeze, | |
| 'unop' => ['(', 'CHAR', '|', ')', nil].freeze, | |
| 'operand' => ['?', '*', '+', '(', 'CHAR', ')', '|', nil].freeze} | |
| def initialize(lexer) | |
| typehint(lexer, Lexer) | |
| @lexer = lexer | |
| end | |
| def parse | |
| ast = expr | |
| raise ParseError, "Unbalanced )" if not @lexer.peek.nil? | |
| return ast | |
| end | |
| # Checks that the node can be used by the next token | |
| private | |
| def begins(node) | |
| term = @lexer.peek | |
| if not term.nil? | |
| term = term.name | |
| end | |
| return @@first[node].include? term | |
| end | |
| private | |
| def error(current, suggestions) | |
| typehint(suggestions, Array) | |
| s = 'end of input' | |
| pos = '' | |
| if not current.nil? | |
| s = '"%s" on position %d' % [current.value, current.index + 1] | |
| pos = @lexer.input + "\n" + (' ' * (current.index)) + "^" | |
| end | |
| suggested = []; | |
| suggestions.each do |suggestion| | |
| case suggestion | |
| when 'CHAR' | |
| suggestion = 'character' | |
| when nil | |
| suggestion = 'end of input' | |
| else | |
| suggestion = '"%s"' % suggestion | |
| end | |
| suggested.push(suggestion) | |
| end | |
| suggestions = suggestions.replace(suggested) | |
| suggested = suggestions[0...-1].join(',') | |
| if (1 < suggestions.length) | |
| suggested = suggested + ' or ' | |
| end | |
| suggested = suggested + suggestions[-1] | |
| raise ParseError, "Found %s, expected %s\n%s" % [s, suggested, pos] | |
| end | |
| # Validates a node, as it should end with an expected token | |
| private | |
| def check_ends(node) | |
| term = @lexer.peek | |
| if not term.nil? | |
| term = term.name | |
| end | |
| error(@lexer.peek, @@following[node].dup) unless @@following[node].include? term | |
| end | |
| # Represents a expr rule | |
| # | |
| # expr ::= cat '|' expr | |
| # | cat | |
| # | |
| private | |
| def expr | |
| ast = cat | |
| token = @lexer.peek | |
| if not token.nil? and token.name == '|' | |
| @lexer.consume # consume | | |
| ast = Lexeme::union(ast, expr) | |
| end | |
| check_ends('expr') | |
| return ast | |
| end | |
| # Represents a cat rule | |
| # | |
| # cat ::= unop cat | |
| # | unop | |
| # | |
| private | |
| def cat | |
| ast = unop | |
| if begins('cat') | |
| ast = Lexeme::cat(ast, cat) | |
| end | |
| check_ends('cat') | |
| return ast | |
| end | |
| # Represents a unop rule | |
| # | |
| # unop ::= operand '?' | |
| # | operand '*' | |
| # | operand '+' | |
| # | operand | |
| # | |
| private | |
| def unop | |
| ast = operand | |
| token = @lexer.peek | |
| if not token.nil? | |
| if token.name == '?' | |
| @lexer.consume # consume ? | |
| ast = Lexeme::option(ast) | |
| elsif token.name == '*' | |
| @lexer.consume # consume * | |
| ast = Lexeme::closure(ast) | |
| elsif token.name == '+' | |
| @lexer.consume # consume + | |
| ast = Lexeme::repeat(ast) | |
| end | |
| end | |
| check_ends('unop') | |
| return ast | |
| end | |
| # Represents a operand rule | |
| # | |
| # operand ::= '(' expr ')' | |
| # | CHAR | |
| # | |
| private | |
| def operand | |
| token = @lexer.peek | |
| error(token, @@first['operand'].dup) unless token | |
| ast = nil | |
| if token.name == '(' | |
| @lexer.consume # consume ( | |
| ast = expr | |
| raise ParseError, "Unbalanced (" if @lexer.consume.nil? # consume ) | |
| elsif token.name == 'CHAR' | |
| @lexer.consume # consume CHAR | |
| ast = Lexeme::char(token.value) | |
| else | |
| error(token, @@first['operand'].dup) | |
| end | |
| check_ends('operand') | |
| return ast | |
| end | |
| end | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| $LOAD_PATH << './' | |
| require 'lexemes.rb' | |
| def from_postfix(input) | |
| unops = {'*' => lambda {|a| return Compiler::Lexeme::closure(a)}, | |
| '+' => lambda {|a| return Compiler::Lexeme::repeat(a)}, | |
| '?' => lambda {|a| return Compiler::Lexeme::option(a)}} | |
| binops = {'.' => lambda {|a, b| return Compiler::Lexeme::cat(a, b)}, | |
| '|' => lambda {|a, b| return Compiler::Lexeme::union(a, b)}} | |
| stack = [] | |
| input.each_char do |c| | |
| if binops.include? c | |
| right, left = stack.pop, stack.pop | |
| raise IndexError if left.nil? | |
| raise IndexError if right.nil? | |
| stack << binops[c].call(left, right) | |
| elsif unops.include? c | |
| arg = stack.pop | |
| raise IndexError if arg.nil? | |
| stack << unops[c].call(arg) | |
| else | |
| stack << Compiler::Lexeme::char(c) | |
| end | |
| end | |
| stack.pop | |
| end | |
| trap("SIGINT") { raise Interrupt } | |
| while true do | |
| begin | |
| puts ">>> " | |
| puts from_postfix(gets.chomp!) | |
| rescue IndexError | |
| puts "[ERROR] malformed expression" | |
| rescue Interrupt | |
| break | |
| end | |
| end | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| $LOAD_PATH << './' | |
| require 'lexemes' | |
| module Compiler | |
| a = Lexeme.union(Lexeme.repeat(Lexeme.union(Lexeme.char('a'), Lexeme.char('b'))), Lexeme.option(Lexeme.char('c'))) | |
| puts a # prints ((a|b)+)|c? | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| $LOAD_PATH << './' | |
| require 'parser.rb' | |
| module Compiler | |
| trap("SIGINT") { raise Interrupt } | |
| while true do | |
| begin | |
| puts ">>> " | |
| puts Parser.new(Lexer.new(gets.chomp!)).parse | |
| rescue ParseError => e | |
| puts "[ERROR] " + e.message | |
| rescue Interrupt | |
| break | |
| end | |
| end | |
| end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment