pocke · March 1, 2020 12:38
diff --git a/rurema-converter2.rb b/rurema-converter2.rb
 require 'ast'
 require 'strscan'

 class RuremaParser
  def initialize(text, path: nil)
    @text = text
    @path = path
    @index = 0
  end

  def parse
    tokenize
    parse_document
  end

  private def tokenize
    @tokens = []
    s = StringScanner.new(@text)
    until s.eos?
      case
      when s.scan(/^\#\@.+\n?/)
        @tokens << s.matched
      else
        text = +''
        while !s.eos? && s.scan(/.*\n?/)
          if s.matched.start_with?('#@')
            s.unscan
            break
          end
          text << s.matched
        end
        @tokens << text
      end
    end
  end

  private def parse_document
    children = []
    until eof?
      children << parse_statement
    end

    RuremaNode.new(:group, children)
  end

  private def parse_statement
    case token = next_token
    when /^\#\@(since|until|if)(.+)$/
      parse_conditional(Regexp.last_match)
    when /^\#\@\#(.*)$/
      RuremaNode.new(:comment, [Regexp.last_match[1]])
    when /^\#\@samplecode(.*)$/
      parse_samplecode(Regexp.last_match)
    when /^\#\@include(\s*\(.+\) *)$/
      RuremaNode.new(:include, [Regexp.last_match[1]])
    when /^\#\@todo(.*)$/
      RuremaNode.new(:todo, [Regexp.last_match[1]])
    when /^\#\@/
      parse_error! "unexpected token #{token}"
    else
      RuremaNode.new(:text, [token])
    end
  end

  private def parse_conditional(match)
    type = match[1]
    condition = match[2]
    if_body = []
    else_body = nil
    children = if_body

    loop do
      case next_token
      when /\#@end\s*\n?/
        break
      when /\#\@else\s*\n/
        parse_error! 'double else' if else_body
        children = else_body = []
      else
        back_token
        children << parse_statement
      end
    end
    RuremaNode.new(type, [
      condition,
      RuremaNode.new(:group, if_body),
      else_body && RuremaNode.new(:group, else_body)
    ])
  end

  private def parse_samplecode(match)
    annotation = match[1]
    children = []

    loop do
      break if next_token.start_with?('#@end')

      back_token
      children << parse_statement
    end
    RuremaNode.new(:samplecode, [
      annotation, RuremaNode.new(:group, children)
    ])
  end

  private def back_token
    @index -= 1
  end

  private def next_token
    @tokens[@index].tap do
      @index += 1
    end
  end

  private def eof?
    @tokens.size <= @index
  end

  private def lnum
    @tokens[0..@index-2].sum{|token|token.lines.size} + 1
  end

  private def parse_error!(message)
    raise "#{@path || '-'}:#{lnum}: #{message}"
  end
 end

 class RuremaNode < AST::Node
  def to_source
    case type
    when :group
      children.map(&:to_source).join
    when :text
      children[0]
    when :comment
      '#@#' + children[0] + "\n"
    when :samplecode
      '#@samplecode' + children[0] + "\n" +
        children[1].to_source +
        "\#@end\n"
    when :if
      ret = '#@if' + children[0] + "\n" + children[1].to_source
      ret += "\#@else\n" + children[2].to_source if children[2]
      ret += "\#@end\n"
      ret
    when :include
      '#@include' + children[0] + "\n"
    when :todo
      '#@todo' + children[0] + "\n"
    when :until
      ret = '#@until' + children[0] + "\n" + children[1].to_source
      ret += "\#@else\n" + children[2].to_source if children[2]
      ret += "\#@end\n"
      ret
    when :since
      ret = '#@since' + children[0] + "\n" + children[1].to_source
      ret += "\#@else\n" + children[2].to_source if children[2]
      ret += "\#@end\n"
      ret
    else
      raise "unknown type: #{type}"
    end
  end
 end

 def modify(node, &block)
  children = node.children.map do |child|
    case child
    when RuremaNode
      modify(child, &block)
    else
      child
    end
  end

  block.call RuremaNode.new(node.type, children)
 end

 def traverse(node, &block)
  return to_enum(__method__, node) unless block_given?

  block.call node
  node.children.each do |child|
    traverse child, &block if child.is_a? RuremaNode
  end
 end

 def remove_useless_whitespaces_in_samplecode
  ARGV.each do |path|
    ast = RuremaParser.new(File.read(path), path: path).parse
    modified = modify(ast) do |node|
      next node unless node.type == :samplecode

      first_text = traverse(node).find { |c| c.type == :text }
      next node unless first_text

      indent = first_text.children.first[/\A( +)/, 1]
      next node unless indent

      modify(node) do |c|
        next c unless c.type == :text

        RuremaNode.new(c.type, [c.children.first.gsub(/^ {#{indent.size}}/, '')])
      end
    end

    File.write(path, modified.to_source)
  end
 end

 remove_useless_whitespaces_in_samplecode
	require 'ast'
	require 'strscan'

	class RuremaParser
	def initialize(text, path: nil)
	@text = text
	@path = path
	@index = 0
	end

	def parse
	tokenize
	parse_document
	end

	private def tokenize
	@tokens = []
	s = StringScanner.new(@text)
	until s.eos?
	case
	when s.scan(/^\#\@.+\n?/)
	@tokens << s.matched
	else
	text = +''
	while !s.eos? && s.scan(/.*\n?/)
	if s.matched.start_with?('#@')
	s.unscan
	break
	end
	text << s.matched
	end
	@tokens << text
	end
	end
	end

	private def parse_document
	children = []
	until eof?
	children << parse_statement
	end

	RuremaNode.new(:group, children)
	end

	private def parse_statement
	case token = next_token
	when /^\#\@(since\|until\|if)(.+)$/
	parse_conditional(Regexp.last_match)
	when /^\#\@\#(.*)$/
	RuremaNode.new(:comment, [Regexp.last_match[1]])
	when /^\#\@samplecode(.*)$/
	parse_samplecode(Regexp.last_match)
	when /^\#\@include(\s\(.+\) )$/
	RuremaNode.new(:include, [Regexp.last_match[1]])
	when /^\#\@todo(.*)$/
	RuremaNode.new(:todo, [Regexp.last_match[1]])
	when /^\#\@/
	parse_error! "unexpected token #{token}"
	else
	RuremaNode.new(:text, [token])
	end
	end

	private def parse_conditional(match)
	type = match[1]
	condition = match[2]
	if_body = []
	else_body = nil
	children = if_body

	loop do
	case next_token
	when /\#@end\s*\n?/
	break
	when /\#\@else\s*\n/
	parse_error! 'double else' if else_body
	children = else_body = []
	else
	back_token
	children << parse_statement
	end
	end
	RuremaNode.new(type, [
	condition,
	RuremaNode.new(:group, if_body),
	else_body && RuremaNode.new(:group, else_body)
	])
	end

	private def parse_samplecode(match)
	annotation = match[1]
	children = []

	loop do
	break if next_token.start_with?('#@end')

	back_token
	children << parse_statement
	end
	RuremaNode.new(:samplecode, [
	annotation, RuremaNode.new(:group, children)
	])
	end

	private def back_token
	@index -= 1
	end

	private def next_token
	@tokens[@index].tap do
	@index += 1
	end
	end

	private def eof?
	@tokens.size <= @index
	end

	private def lnum
	@tokens[0..@index-2].sum{\|token\|token.lines.size} + 1
	end

	private def parse_error!(message)
	raise "#{@path \|\| '-'}:#{lnum}: #{message}"
	end
	end

	class RuremaNode < AST::Node
	def to_source
	case type
	when :group
	children.map(&:to_source).join
	when :text
	children[0]
	when :comment
	'#@#' + children[0] + "\n"
	when :samplecode
	'#@samplecode' + children[0] + "\n" +
	children[1].to_source +
	"\#@end\n"
	when :if
	ret = '#@if' + children[0] + "\n" + children[1].to_source
	ret += "\#@else\n" + children[2].to_source if children[2]
	ret += "\#@end\n"
	ret
	when :include
	'#@include' + children[0] + "\n"
	when :todo
	'#@todo' + children[0] + "\n"
	when :until
	ret = '#@until' + children[0] + "\n" + children[1].to_source
	ret += "\#@else\n" + children[2].to_source if children[2]
	ret += "\#@end\n"
	ret
	when :since
	ret = '#@since' + children[0] + "\n" + children[1].to_source
	ret += "\#@else\n" + children[2].to_source if children[2]
	ret += "\#@end\n"
	ret
	else
	raise "unknown type: #{type}"
	end
	end
	end

	def modify(node, &block)
	children = node.children.map do \|child\|
	case child
	when RuremaNode
	modify(child, &block)
	else
	child
	end
	end

	block.call RuremaNode.new(node.type, children)
	end

	def traverse(node, &block)
	return to_enum(__method__, node) unless block_given?

	block.call node
	node.children.each do \|child\|
	traverse child, &block if child.is_a? RuremaNode
	end
	end

	def remove_useless_whitespaces_in_samplecode
	ARGV.each do \|path\|
	ast = RuremaParser.new(File.read(path), path: path).parse
	modified = modify(ast) do \|node\|
	next node unless node.type == :samplecode

	first_text = traverse(node).find { \|c\| c.type == :text }
	next node unless first_text

	indent = first_text.children.first[/\A( +)/, 1]
	next node unless indent

	modify(node) do \|c\|
	next c unless c.type == :text

	RuremaNode.new(c.type, [c.children.first.gsub(/^ {#{indent.size}}/, '')])
	end
	end

	File.write(path, modified.to_source)
	end
	end

	remove_useless_whitespaces_in_samplecode