Created
September 22, 2024 01:06
-
-
Save tenderlove/74655290c7eadc6a9ccee712bede00d2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "strscan" | |
class Scanner | |
TOK = [] | |
TOK["{".ord] = :LBRACE; TOK["}".ord] = :RBRACE; TOK[";".ord] = :SEMI | |
def initialize data | |
@scan = StringScanner.new data | |
@prev_pos = @scan.pos | |
end | |
def next_token | |
return if @scan.eos? | |
byte = @scan.peek_byte | |
@prev_pos = @scan.pos | |
if tok = TOK[byte] | |
@scan.scan_byte; tok | |
else | |
if byte == '\\'.ord # escape or ctrl | |
@scan.scan_byte | |
return :CTRL if @scan.skip(/[A-Za-z]+/) | |
raise if @scan.peek_byte == "'".ord # special | |
@prev_pos = @scan.pos | |
@scan.scan_byte; :ESCAPED_BYTE | |
else | |
return :NUM if @scan.skip(/-?\d+/) | |
return :LIT if @scan.skip(/[^{}\\]*/) | |
raise | |
end | |
end | |
end | |
def ctrl_name | |
@scan.string.byteslice(@prev_pos + 1, (@scan.pos - @prev_pos) - 1) | |
end | |
def value | |
@scan.string.byteslice(@prev_pos, @scan.pos - @prev_pos) | |
end | |
def byte | |
@scan.string.getbyte(@prev_pos) | |
end | |
end | |
class Group | |
include Enumerable | |
def initialize children | |
@children = children | |
end | |
def each(&blk) | |
yield self | |
@children.each { |child| child.each(&blk) } | |
end | |
def to_rtf | |
"{" + @children.map(&:to_rtf).join + "}" | |
end | |
end | |
class Lit | |
attr_reader :value | |
def initialize value | |
@value = value | |
end | |
def each | |
yield self | |
end | |
def to_rtf | |
value | |
end | |
end | |
class CTRL | |
attr_reader :name | |
attr_reader :value | |
attr_reader :has_semi | |
def initialize name, value, has_semi | |
@name = name | |
@value = value | |
@has_semi = has_semi | |
end | |
def each | |
yield self | |
end | |
def to_rtf | |
"\\#{name}#{value}#{has_semi ? ";" : ""}" | |
end | |
end | |
class ESC | |
attr_accessor :value | |
def initialize value | |
@value = value | |
end | |
def each | |
yield self | |
end | |
def to_rtf | |
"\\#{value.chr}" | |
end | |
end | |
class Parser | |
def initialize scan | |
@scan = scan | |
@next_tok = scan.next_token | |
end | |
def parse | |
parse_group | |
end | |
def parse_group | |
items = [] | |
expect :LBRACE | |
loop do | |
items << case @next_tok | |
when :LBRACE then parse_group | |
when :CTRL | |
name = @scan.ctrl_name | |
accept | |
num = nil | |
has_semi = false | |
if @next_tok == :NUM | |
num = @scan.value | |
accept | |
end | |
if @next_tok == :SEMI | |
has_semi = true | |
accept | |
end | |
CTRL.new(name, num, has_semi) | |
when :LIT | |
value = @scan.value | |
accept | |
Lit.new(value) | |
when :RBRACE then accept && break | |
when :ESCAPED_BYTE then | |
value = @scan.byte | |
accept | |
ESC.new(value) | |
else | |
p @next_tok | |
raise | |
end | |
end | |
Group.new(items) | |
end | |
def expect val | |
raise unless @next_tok == val | |
accept | |
end | |
def accept | |
@next_tok = @scan.next_token | |
end | |
end | |
scanner = Scanner.new File.binread ARGV[0] | |
parser = Parser.new scanner | |
group = parser.parse | |
#p group.grep(Lit).map(&:value) | |
first = true | |
group.grep(Lit).each do |node| | |
if node.value =~ /contents/ && first | |
first = false | |
else | |
next if node.value =~ /^[ \t\n]*$/ || node.value =~ /;/ | |
node.value.gsub!(/./, ' ') | |
end | |
end | |
group.grep(ESC).each do |node| | |
if node.value > 32 | |
node.value = 32 | |
end | |
end | |
puts group.to_rtf |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment