Last active
August 29, 2015 14:04
-
-
Save jimsynz/debd983235429bc5885f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'https://rubygems.org' | |
gem 'oedipus_lex' | |
gem 'rake' | |
gem 'rspec-core' | |
gem 'rspec-mocks' | |
gem 'rspec-expectations' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GEM | |
remote: https://rubygems.org/ | |
specs: | |
diff-lcs (1.2.5) | |
oedipus_lex (2.3.1) | |
rake (10.3.2) | |
rspec-core (3.0.2) | |
rspec-support (~> 3.0.0) | |
rspec-expectations (3.0.2) | |
diff-lcs (>= 1.2.0, < 2.0) | |
rspec-support (~> 3.0.0) | |
rspec-mocks (3.0.2) | |
rspec-support (~> 3.0.0) | |
rspec-support (3.0.2) | |
PLATFORMS | |
ruby | |
DEPENDENCIES | |
oedipus_lex | |
rake | |
rspec-core | |
rspec-expectations | |
rspec-mocks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require './lexer.rex' | |
class Lexer | |
attr_accessor :indent_level | |
def initialize str | |
super() | |
@indent_level = 0 | |
parse str | |
end | |
def each | |
Enumerator.new do |y| | |
while token = next_token | |
y << token | |
end | |
end | |
end | |
def tokens | |
each.to_a | |
end | |
def in_or_out_dent text | |
text = text.gsub("\t", ' ').gsub("\n", '') | |
raise "Invalid indent level of #{text.size} spaces" unless text.size % 2 == 0 | |
depth = text.size / 2 | |
if depth > @indent_level | |
(depth - @indent_level).times.map { [ :INDENT, ' ' ] } | |
elsif depth < @indent_level | |
(@indent_level - depth).times.map { [ :OUTDENT, ' ' ] } | |
else | |
[] | |
end | |
end | |
def do_parse; end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Lexer | |
macros | |
IDENTIFIER /[a-zA-Z_][a-zA-Z_0-9]*/ | |
INT /(0|[1-9][0-9]*)/ | |
rules | |
/'/ :SINGLE_TICK_STRING | |
:SINGLE_TICK_STRING /[^']+/ { [ :STRING, text ] } | |
:SINGLE_TICK_STRING /'/ nil | |
/"/ :DOUBLE_TICK_STRING | |
:DOUBLE_TICK_STRING /[^"]+/ { [ :STRING, text ] } | |
:DOUBLE_TICK_STRING /"/ nil | |
/def/ { [ :DEF, text ] } | |
/#{INT}\.[0-9]+/ { [ :FLOAT, text ] } | |
/0x[0-9a-fA-F]+/ { [ :INTEGER, text.to_i(16) ] } | |
/0b[01]+/ { [ :INTEGER, text.to_i(2) ] } | |
/#{INT}/ { [ :INTEGER, text.to_i ] } | |
/\s*(\#.*)/ { [ :COMMENT, text ] } | |
/:#{IDENTIFIER}/ { [ :SYMBOL, text ] } | |
/#{IDENTIFIER}\:/ { [ :SIGNATURE, text ] } | |
/#{IDENTIFIER}/ { [ :IDENTIFIER, text ] } | |
/\./ { [ :DOT, text ] } | |
/\:/ { [ :COLON, text ] } | |
/\=/ { [ :EQUAL, text ] } | |
/\+/ { [ :PLUS, text ] } | |
/\-/ { [ :MINUS, text ] } | |
/\*\*/ { [ :EXPO, text ] } | |
/\*/ { [ :ASTERISK, text ] } | |
/\// { [ :FWD_SLASH, text ] } | |
/%/ { [ :PERCENT, text ] } | |
/\(/ { [ :OPAREN, text ] } | |
/\)/ { [ :CPAREN, text ] } | |
/\n+[\ \t]+/ in_or_out_dent | |
/\s+/ | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: UTF-8 | |
#-- | |
# This file is automatically generated. Do not modify it. | |
# Generated by: oedipus_lex version 2.3.1. | |
# Source: lexer.rex | |
#++ | |
class Lexer | |
require 'strscan' | |
IDENTIFIER = /[a-zA-Z_][a-zA-Z_0-9]*/ | |
INT = /(0|[1-9][0-9]*)/ | |
class ScanError < StandardError ; end | |
attr_accessor :lineno | |
attr_accessor :filename | |
attr_accessor :ss | |
attr_accessor :state | |
alias :match :ss | |
def matches | |
m = (1..9).map { |i| ss[i] } | |
m.pop until m[-1] or m.empty? | |
m | |
end | |
def action | |
yield | |
end | |
def scanner_class | |
StringScanner | |
end unless instance_methods(false).map(&:to_s).include?("scanner_class") | |
def parse str | |
self.ss = scanner_class.new str | |
self.lineno = 1 | |
self.state ||= nil | |
do_parse | |
end | |
def parse_file path | |
self.filename = path | |
open path do |f| | |
parse f.read | |
end | |
end | |
def next_token | |
token = nil | |
until ss.eos? or token do | |
token = | |
case state | |
when nil then | |
case | |
when text = ss.scan(/'/) then | |
[:state, :SINGLE_TICK_STRING] | |
when text = ss.scan(/"/) then | |
[:state, :DOUBLE_TICK_STRING] | |
when text = ss.scan(/def/) then | |
action { [ :DEF, text ] } | |
when text = ss.scan(/#{INT}\.[0-9]+/) then | |
action { [ :FLOAT, text ] } | |
when text = ss.scan(/0x[0-9a-fA-F]+/) then | |
action { [ :INTEGER, text.to_i(16) ] } | |
when text = ss.scan(/0b[01]+/) then | |
action { [ :INTEGER, text.to_i(2) ] } | |
when text = ss.scan(/#{INT}/) then | |
action { [ :INTEGER, text.to_i ] } | |
when text = ss.scan(/\s*(\#.*)/) then | |
action { [ :COMMENT, text ] } | |
when text = ss.scan(/:#{IDENTIFIER}/) then | |
action { [ :SYMBOL, text ] } | |
when text = ss.scan(/#{IDENTIFIER}\:/) then | |
action { [ :SIGNATURE, text ] } | |
when text = ss.scan(/#{IDENTIFIER}/) then | |
action { [ :IDENTIFIER, text ] } | |
when text = ss.scan(/\./) then | |
action { [ :DOT, text ] } | |
when text = ss.scan(/\:/) then | |
action { [ :COLON, text ] } | |
when text = ss.scan(/\=/) then | |
action { [ :EQUAL, text ] } | |
when text = ss.scan(/\+/) then | |
action { [ :PLUS, text ] } | |
when text = ss.scan(/\-/) then | |
action { [ :MINUS, text ] } | |
when text = ss.scan(/\*\*/) then | |
action { [ :EXPO, text ] } | |
when text = ss.scan(/\*/) then | |
action { [ :ASTERISK, text ] } | |
when text = ss.scan(/\//) then | |
action { [ :FWD_SLASH, text ] } | |
when text = ss.scan(/%/) then | |
action { [ :PERCENT, text ] } | |
when text = ss.scan(/\(/) then | |
action { [ :OPAREN, text ] } | |
when text = ss.scan(/\)/) then | |
action { [ :CPAREN, text ] } | |
when text = ss.scan(/\n+[\ \t]+/) then | |
in_or_out_dent text | |
when text = ss.scan(/\s+/) then | |
# do nothing | |
else | |
text = ss.string[ss.pos .. -1] | |
raise ScanError, "can not match (#{state.inspect}): '#{text}'" | |
end | |
when :SINGLE_TICK_STRING then | |
case | |
when text = ss.scan(/[^']+/) then | |
action { [ :STRING, text ] } | |
when text = ss.scan(/'/) then | |
[:state, nil] | |
else | |
text = ss.string[ss.pos .. -1] | |
raise ScanError, "can not match (#{state.inspect}): '#{text}'" | |
end | |
when :DOUBLE_TICK_STRING then | |
case | |
when text = ss.scan(/[^"]+/) then | |
action { [ :STRING, text ] } | |
when text = ss.scan(/"/) then | |
[:state, nil] | |
else | |
text = ss.string[ss.pos .. -1] | |
raise ScanError, "can not match (#{state.inspect}): '#{text}'" | |
end | |
else | |
raise ScanError, "undefined state: '#{state}'" | |
end # token = case state | |
next unless token # allow functions to trigger redo w/ nil | |
end # while | |
raise "bad lexical result: #{token.inspect}" unless | |
token.nil? || (Array === token && token.size >= 2) | |
# auto-switch state | |
self.state = token.last if token && token.first == :state | |
token | |
end # def next_token | |
end # class |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require './lexer' | |
RSpec.describe Lexer do | |
let(:results) { Lexer.new(source).tokens } | |
let(:tokens) { results.map(&:first) } | |
let(:values) { results.map(&:last) } | |
describe 'integers' do | |
%w| 0 1 123 |.each do |i| | |
describe i do | |
let(:source) { i } | |
it 'is an INTEGER' do | |
expect(tokens).to include :INTEGER | |
end | |
end | |
end | |
end | |
describe 'indent' do | |
[ "\n ", "\n\n ", "\n\t\t" ].each do |s| | |
describe s do | |
let(:source) { s } | |
it 'indents twice' do | |
expect(tokens.size).to eq 2 | |
expect(tokens.first).to eq :INDENT | |
expect(tokens.last).to eq :INDENT | |
end | |
end | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rake.application.rake_require "oedipus_lex" | |
task :spec do | |
sh "rspec ./lexer_spec.rb" | |
end | |
task lexer: 'lexer.rex.rb' | |
task default: [:lexer, :spec] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment