Skip to content

Instantly share code, notes, and snippets.

@Taluu
Last active August 29, 2015 13:56
Show Gist options
  • Select an option

  • Save Taluu/9232626 to your computer and use it in GitHub Desktop.

Select an option

Save Taluu/9232626 to your computer and use it in GitHub Desktop.
Following nohar's tutorial :) http://calmettes.arnaud.free.fr/compilation.pdf (FR)
#!/usr/bin/env ruby
module Compiler
module Lexeme
CAT = ''
UNION = '|'
OPTION = '?'
REPEAT = '+'
CLOSURE = '*'
class Symbol
attr_reader :prec
def initialize(prec)
@prec = prec
end
end
class Char < Symbol
attr_reader :value
def initialize(value)
super(4)
@value = value
end
def to_s
value = @value
if '*|()+?'.include? value
value = '\\' + value
end
return value
end
end
class Operator < Symbol
@@list = {UNION => 1,
CAT => 2,
OPTION => 3,
REPEAT => 3,
CLOSURE => 3}
attr_reader :operator
def initialize(operator)
super(@@list[operator])
@operator = operator
end
end
class Unop < Operator
attr_reader :argument
def initialize(operator, argument)
super(operator)
@argument = argument
end
def to_s
arg = @argument.to_s
if (@argument.prec < @prec)
arg = "(%s)" % arg
end
return arg + @operator
end
end
class Binop < Operator
attr_reader :left, :right
def initialize(operator, left, right)
super(operator)
@left = left
@right = right
end
def to_s
left, right = @left.to_s, @right.to_s
if (@left.prec < @prec)
left = "(%s)" % left
end
if (@right.prec < @prec)
right = "(%s)" % right
end
return left + @operator + right
end
end
class << self
def option(arg)
Unop.new(OPTION, arg)
end
def repeat(arg)
Unop.new(REPEAT, arg)
end
def closure(arg)
Unop.new(CLOSURE, arg)
end
def cat(left, right)
Binop.new(CAT, left, right)
end
def union(left, right)
Binop.new(UNION, left, right)
end
def char(arg)
Char.new(arg)
end
end
end
end
#!/usr/bin/env ruby
$LOAD_PATH << './'
require 'lexemes.rb'
require 'ostruct'
module Kernel
def typehint(argument, klass)
raise ArgumentError, "Invalid argument. Expected %s, had %s" % [klass, argument.class.name] unless argument.kind_of? klass
end
end
module Compiler
class Lexer
attr_reader :input
def initialize(input)
typehint(input, String)
@buffer = nil
@input = input
@iterator = input.each_char.with_index
end
def consume
if not @buffer.nil?
token, @buffer = @buffer, nil
return token
end
token = nil
begin
token = @iterator.next
if '*|()+?'.include? token[0]
token = OpenStruct.new(:name => token[0], :value => token[0], :index => token[1])
elsif token == '\\'
token = OpenStruct.new(:name => 'CHAR', :value => @iterator.next[0], :index => token[1])
else
token = OpenStruct.new(:name => 'CHAR', :value => token[0], :index => token[1])
end
rescue StopIteration
end
return token
end
def peek
if @buffer.nil?
@buffer = consume
end
return @buffer
end
end
class ParseError < StandardError
end
class Parser
@@first = {'operand' => ['(', 'CHAR'].freeze,
'unop' => ['(', 'CHAR'].freeze,
'cat' => ['(', 'CHAR'].freeze,
'expr' => ['(', 'CHAR'].freeze}
@@following = {'expr' => [')', nil].freeze,
'cat' => ['|', ')', nil].freeze,
'unop' => ['(', 'CHAR', '|', ')', nil].freeze,
'operand' => ['?', '*', '+', '(', 'CHAR', ')', '|', nil].freeze}
def initialize(lexer)
typehint(lexer, Lexer)
@lexer = lexer
end
def parse
ast = expr
raise ParseError, "Unbalanced )" if not @lexer.peek.nil?
return ast
end
# Checks that the node can be used by the next token
private
def begins(node)
term = @lexer.peek
if not term.nil?
term = term.name
end
return @@first[node].include? term
end
private
def error(current, suggestions)
typehint(suggestions, Array)
s = 'end of input'
pos = ''
if not current.nil?
s = '"%s" on position %d' % [current.value, current.index + 1]
pos = @lexer.input + "\n" + (' ' * (current.index)) + "^"
end
suggested = [];
suggestions.each do |suggestion|
case suggestion
when 'CHAR'
suggestion = 'character'
when nil
suggestion = 'end of input'
else
suggestion = '"%s"' % suggestion
end
suggested.push(suggestion)
end
suggestions = suggestions.replace(suggested)
suggested = suggestions[0...-1].join(',')
if (1 < suggestions.length)
suggested = suggested + ' or '
end
suggested = suggested + suggestions[-1]
raise ParseError, "Found %s, expected %s\n%s" % [s, suggested, pos]
end
# Validates a node, as it should end with an expected token
private
def check_ends(node)
term = @lexer.peek
if not term.nil?
term = term.name
end
error(@lexer.peek, @@following[node].dup) unless @@following[node].include? term
end
# Represents a expr rule
#
# expr ::= cat '|' expr
# | cat
#
private
def expr
ast = cat
token = @lexer.peek
if not token.nil? and token.name == '|'
@lexer.consume # consume |
ast = Lexeme::union(ast, expr)
end
check_ends('expr')
return ast
end
# Represents a cat rule
#
# cat ::= unop cat
# | unop
#
private
def cat
ast = unop
if begins('cat')
ast = Lexeme::cat(ast, cat)
end
check_ends('cat')
return ast
end
# Represents a unop rule
#
# unop ::= operand '?'
# | operand '*'
# | operand '+'
# | operand
#
private
def unop
ast = operand
token = @lexer.peek
if not token.nil?
if token.name == '?'
@lexer.consume # consume ?
ast = Lexeme::option(ast)
elsif token.name == '*'
@lexer.consume # consume *
ast = Lexeme::closure(ast)
elsif token.name == '+'
@lexer.consume # consume +
ast = Lexeme::repeat(ast)
end
end
check_ends('unop')
return ast
end
# Represents a operand rule
#
# operand ::= '(' expr ')'
# | CHAR
#
private
def operand
token = @lexer.peek
error(token, @@first['operand'].dup) unless token
ast = nil
if token.name == '('
@lexer.consume # consume (
ast = expr
raise ParseError, "Unbalanced (" if @lexer.consume.nil? # consume )
elsif token.name == 'CHAR'
@lexer.consume # consume CHAR
ast = Lexeme::char(token.value)
else
error(token, @@first['operand'].dup)
end
check_ends('operand')
return ast
end
end
end
#!/usr/bin/env ruby
$LOAD_PATH << './'
require 'lexemes.rb'
def from_postfix(input)
unops = {'*' => lambda {|a| return Compiler::Lexeme::closure(a)},
'+' => lambda {|a| return Compiler::Lexeme::repeat(a)},
'?' => lambda {|a| return Compiler::Lexeme::option(a)}}
binops = {'.' => lambda {|a, b| return Compiler::Lexeme::cat(a, b)},
'|' => lambda {|a, b| return Compiler::Lexeme::union(a, b)}}
stack = []
input.each_char do |c|
if binops.include? c
right, left = stack.pop, stack.pop
raise IndexError if left.nil?
raise IndexError if right.nil?
stack << binops[c].call(left, right)
elsif unops.include? c
arg = stack.pop
raise IndexError if arg.nil?
stack << unops[c].call(arg)
else
stack << Compiler::Lexeme::char(c)
end
end
stack.pop
end
trap("SIGINT") { raise Interrupt }
while true do
begin
puts ">>> "
puts from_postfix(gets.chomp!)
rescue IndexError
puts "[ERROR] malformed expression"
rescue Interrupt
break
end
end
#!/usr/bin/env ruby
$LOAD_PATH << './'
require 'lexemes'
module Compiler
a = Lexeme.union(Lexeme.repeat(Lexeme.union(Lexeme.char('a'), Lexeme.char('b'))), Lexeme.option(Lexeme.char('c')))
puts a # prints ((a|b)+)|c?
end
#!/usr/bin/env ruby
$LOAD_PATH << './'
require 'parser.rb'
module Compiler
trap("SIGINT") { raise Interrupt }
while true do
begin
puts ">>> "
puts Parser.new(Lexer.new(gets.chomp!)).parse
rescue ParseError => e
puts "[ERROR] " + e.message
rescue Interrupt
break
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment