Created
March 1, 2020 12:38
-
-
Save pocke/9f56916314a5d1fc1092d9629a1a4f4f to your computer and use it in GitHub Desktop.
For removing unnecessary indents
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'ast' | |
require 'strscan' | |
class RuremaParser | |
def initialize(text, path: nil) | |
@text = text | |
@path = path | |
@index = 0 | |
end | |
def parse | |
tokenize | |
parse_document | |
end | |
private def tokenize | |
@tokens = [] | |
s = StringScanner.new(@text) | |
until s.eos? | |
case | |
when s.scan(/^\#\@.+\n?/) | |
@tokens << s.matched | |
else | |
text = +'' | |
while !s.eos? && s.scan(/.*\n?/) | |
if s.matched.start_with?('#@') | |
s.unscan | |
break | |
end | |
text << s.matched | |
end | |
@tokens << text | |
end | |
end | |
end | |
private def parse_document | |
children = [] | |
until eof? | |
children << parse_statement | |
end | |
RuremaNode.new(:group, children) | |
end | |
private def parse_statement | |
case token = next_token | |
when /^\#\@(since|until|if)(.+)$/ | |
parse_conditional(Regexp.last_match) | |
when /^\#\@\#(.*)$/ | |
RuremaNode.new(:comment, [Regexp.last_match[1]]) | |
when /^\#\@samplecode(.*)$/ | |
parse_samplecode(Regexp.last_match) | |
when /^\#\@include(\s*\(.+\) *)$/ | |
RuremaNode.new(:include, [Regexp.last_match[1]]) | |
when /^\#\@todo(.*)$/ | |
RuremaNode.new(:todo, [Regexp.last_match[1]]) | |
when /^\#\@/ | |
parse_error! "unexpected token #{token}" | |
else | |
RuremaNode.new(:text, [token]) | |
end | |
end | |
private def parse_conditional(match) | |
type = match[1] | |
condition = match[2] | |
if_body = [] | |
else_body = nil | |
children = if_body | |
loop do | |
case next_token | |
when /\#@end\s*\n?/ | |
break | |
when /\#\@else\s*\n/ | |
parse_error! 'double else' if else_body | |
children = else_body = [] | |
else | |
back_token | |
children << parse_statement | |
end | |
end | |
RuremaNode.new(type, [ | |
condition, | |
RuremaNode.new(:group, if_body), | |
else_body && RuremaNode.new(:group, else_body) | |
]) | |
end | |
private def parse_samplecode(match) | |
annotation = match[1] | |
children = [] | |
loop do | |
break if next_token.start_with?('#@end') | |
back_token | |
children << parse_statement | |
end | |
RuremaNode.new(:samplecode, [ | |
annotation, RuremaNode.new(:group, children) | |
]) | |
end | |
private def back_token | |
@index -= 1 | |
end | |
private def next_token | |
@tokens[@index].tap do | |
@index += 1 | |
end | |
end | |
private def eof? | |
@tokens.size <= @index | |
end | |
private def lnum | |
@tokens[0..@index-2].sum{|token|token.lines.size} + 1 | |
end | |
private def parse_error!(message) | |
raise "#{@path || '-'}:#{lnum}: #{message}" | |
end | |
end | |
class RuremaNode < AST::Node | |
def to_source | |
case type | |
when :group | |
children.map(&:to_source).join | |
when :text | |
children[0] | |
when :comment | |
'#@#' + children[0] + "\n" | |
when :samplecode | |
'#@samplecode' + children[0] + "\n" + | |
children[1].to_source + | |
"\#@end\n" | |
when :if | |
ret = '#@if' + children[0] + "\n" + children[1].to_source | |
ret += "\#@else\n" + children[2].to_source if children[2] | |
ret += "\#@end\n" | |
ret | |
when :include | |
'#@include' + children[0] + "\n" | |
when :todo | |
'#@todo' + children[0] + "\n" | |
when :until | |
ret = '#@until' + children[0] + "\n" + children[1].to_source | |
ret += "\#@else\n" + children[2].to_source if children[2] | |
ret += "\#@end\n" | |
ret | |
when :since | |
ret = '#@since' + children[0] + "\n" + children[1].to_source | |
ret += "\#@else\n" + children[2].to_source if children[2] | |
ret += "\#@end\n" | |
ret | |
else | |
raise "unknown type: #{type}" | |
end | |
end | |
end | |
def modify(node, &block) | |
children = node.children.map do |child| | |
case child | |
when RuremaNode | |
modify(child, &block) | |
else | |
child | |
end | |
end | |
block.call RuremaNode.new(node.type, children) | |
end | |
def traverse(node, &block) | |
return to_enum(__method__, node) unless block_given? | |
block.call node | |
node.children.each do |child| | |
traverse child, &block if child.is_a? RuremaNode | |
end | |
end | |
def remove_useless_whitespaces_in_samplecode | |
ARGV.each do |path| | |
ast = RuremaParser.new(File.read(path), path: path).parse | |
modified = modify(ast) do |node| | |
next node unless node.type == :samplecode | |
first_text = traverse(node).find { |c| c.type == :text } | |
next node unless first_text | |
indent = first_text.children.first[/\A( +)/, 1] | |
next node unless indent | |
modify(node) do |c| | |
next c unless c.type == :text | |
RuremaNode.new(c.type, [c.children.first.gsub(/^ {#{indent.size}}/, '')]) | |
end | |
end | |
File.write(path, modified.to_source) | |
end | |
end | |
remove_useless_whitespaces_in_samplecode |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment