Created
June 20, 2018 19:23
-
-
Save stewartpark/4d16d082983960e1dcc13d5d24a6cca4 to your computer and use it in GitHub Desktop.
Quick and dirty Ruby lexer/parser/rewriter to make the rails 5 migration easier
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
## | |
# Quick and dirty ruby lexer | |
class RubyLexer | |
TOKENS = { | |
identifier: /\A[@a-zA-Z_][a-zA-Z_0-9]*/m, | |
symbol: /\A:[a-zA-Z0-9_]+/m, | |
string: /\A"([^"]|\\.)*"|\A'([^']|\\.)*'/, | |
shellout_literal: /\A`([^`\\]|\\.)*`/m, | |
ruby_literal: /\A\%[a-zA-Z]?(\(\{[\s\S]*?\}\)|\[[\s\S]*?\]|\{[\s\S]*?\}|\([\s\S]*?\))/m, | |
number: /\A[+\-]?[0-9]+(\.[0-9]+)?/m, | |
kv_sep_old: /\A=>/m, | |
ops: /\A[+\-\/*.=!~<>?&|][|&<>=.]?/m, | |
assign_ops: /\A[+\-\/*]?[=]/m, | |
namespace_sep: /\A::/m, | |
lambda_args: /\A\|.*?\|/m, | |
comma: /\A,/m, | |
kv_sep: /\A:/m, | |
lparen: /\A\(/m, | |
rparen: /\A\)/m, | |
lcurl: /\A\{/m, | |
rcurl: /\A\}/m, | |
lsquare: /\A\[/m, | |
rsquare: /\A\]/m, | |
concat: /\A\\/m, | |
ignore: /\A[ \n\t\r\n;]+|\A#(.+?)[\n]/m, | |
}.freeze | |
def initialize(source) | |
@source = source | |
end | |
def lex | |
tokens = [] | |
cur = @source | |
until cur.empty? | |
TOKENS.each do |k, v| | |
t = v.match(cur).to_a | |
next if t.empty? | |
cur = cur[t.first.size..-1] | |
tokens << [k, t.first] | |
break | |
end | |
end | |
tokens | |
rescue Interrupt | |
p tokens | |
exit 1 | |
end | |
def self.to_source(tokens) | |
tokens.map do |v| | |
v[1] | |
end.join('') | |
end | |
end | |
## | |
# RubyInterestParser only parses interest points and rewrites. | |
# i.e. things to fix | |
class RubyInterestParser | |
INTEREST_RULES = { | |
rule_rspec: [ | |
[:identifier, /get|post|put|delete/], | |
[/symbol|string/, //], | |
[:comma, //] | |
], | |
rule_rspec_2: [ | |
[:identifier, /get|post|put|delete/], | |
[:lparen, //], | |
[/symbol|string/, //], | |
[:comma, //] | |
], | |
rule_wrong_select_usage: [ | |
[:identifier, 'select'], | |
[:lcurl, //], | |
[:number, '1'], | |
[:rcurl, //] | |
], | |
rule_wrong_kv_usage: [ | |
[:string, //], | |
[:kv_sep, //] | |
], | |
# Disabled | |
#rule_old_kv_usage_1: [ | |
# [:symbol, //], | |
# [:kv_sep_old, //] | |
#], | |
#rule_old_kv_usage_2: [ | |
# [:string, //], | |
# [:kv_sep_old, //] | |
#] | |
}.freeze | |
def initialize(tokens, rewriters) | |
@tokens = tokens | |
@rewriters = rewriters | |
end | |
def match_part?(term_part, token_part) | |
if term_part.is_a?(Regexp) && term_part =~ token_part.to_s | |
true | |
else | |
term_part == token_part | |
end | |
end | |
def match?(rule, tokens) | |
tokens = tokens.reject { |x| x.first == :ignore } | |
rule.each_with_index do |r, i| | |
t = tokens[i] | |
return false unless match_part?(r[0], t[0]) && match_part?(r[1], t[1]) | |
end | |
true | |
end | |
def parse_and_rewrite | |
@rewriters.each do |rw| | |
c = 0 | |
cur = @tokens | |
printf "" | |
until cur.empty? | |
printf "\r#{rw.class.name}: #{(100 - (cur.size.to_f / @tokens.size * 100)).to_i}%" if cur.size % 1000 == 0 | |
unless cur.first.first == :ignore | |
INTEREST_RULES.each do |k, v| | |
next unless match?(v, cur) | |
c += 1 if rw.rewrite!(k, cur.reject { |x| x.first == :ignore }) | |
end | |
end | |
cur = cur[1..-1] | |
end | |
puts "\r#{rw.class.name}: #{c} rewritten." | |
end | |
RubyLexer.to_source(@tokens) | |
end | |
end | |
## | |
# RubyFixer rewrites some syntax | |
class RubyFixer | |
def rewrite!(name, tokens) | |
case name | |
when :rule_wrong_select_usage | |
## | |
# Fix #1 | |
# We convert ranges with `select{1}` sometimes, which can just be `to_a`. | |
tokens[0][1] = 'to_a' | |
tokens[1][0] = :ignore | |
tokens[1][1] = '' | |
tokens[2][0] = :ignore | |
tokens[2][1] = '' | |
tokens[3][0] = :ignore | |
tokens[3][1] = '' | |
true | |
when :rule_wrong_kv_usage | |
## | |
# Fix #2 | |
# Just assume this is an identifier | |
tokens[0][0] = :identifier | |
true | |
when :rule_old_kv_usage_1 | |
## | |
# Fix #3 | |
# Change old style hash to the new one | |
tokens[0][0] = :identifier | |
tokens[0][1] = tokens[0][1][1..-1] | |
tokens[1][0] = :kv_sep | |
tokens[1][1] = ':' | |
true | |
when :rule_old_kv_usage_2 | |
## | |
# Fix #4 | |
# Change old style hash to the new one | |
unless ['-', '+', '*', '/', '.'].any?{|x|tokens[0][1].include?(x)} | |
tokens[0][0] = :identifier | |
tokens[0][1] = tokens[0][1][1..-2] | |
end | |
tokens[1][0] = :kv_sep | |
tokens[1][1] = ':' | |
true | |
else | |
false | |
end | |
end | |
end | |
## | |
# RspecFixer rewrites the syntax difference between rails 4 and rails 5 in rspec | |
class RspecFixer | |
def skip_value(i, tokens) | |
case tokens[i]&.first | |
when :lparen | |
i += 1 | |
i = skip_value(i, tokens) | |
if tokens[i]&.first == :rparen | |
i += 1 | |
else | |
fail "rparen expected" | |
end | |
when :lsquare | |
i += 1 | |
until tokens[i]&.first == :rsquare | |
i = skip_value(i, tokens) | |
if tokens[i]&.first == :comma | |
i += 1 | |
end | |
end | |
i += 1 | |
when :lcurl | |
i += 1 | |
until tokens[i]&.first == :rcurl | |
i = skip_value(i, tokens) | |
if [:kv_sep, :kv_sep_old].include?(tokens[i]&.first) | |
i += 1 | |
else | |
fail "kv_sep expected" | |
end | |
i = skip_value(i, tokens) | |
if tokens[i]&.first == :comma | |
i += 1 | |
end | |
end | |
i += 1 | |
else | |
i += 1 | |
end | |
# Function call | |
if tokens[i]&.first == :lparen | |
i += 1 | |
until tokens[i]&.first == :rparen | |
i = skip_value(i, tokens) | |
if tokens[i]&.first == :comma | |
i += 1 | |
end | |
end | |
i += 1 | |
end | |
# Index | |
if tokens[i]&.first == :lsquare | |
i += 1 | |
i = skip_value(i, tokens) | |
if tokens[i]&.first == :rsquare | |
i += 1 | |
else | |
fail "rsquare expected" | |
end | |
end | |
# Binary operators | |
if tokens[i]&.first == :ops || tokens[i]&.first == :namespace_sep | |
i += 1 | |
i = skip_value(i, tokens) | |
end | |
i | |
end | |
def rewrite!(name, tokens) | |
if name == :rule_rspec_2 | |
# Process the data as if there is no lparen. | |
tokens.delete_at(1) | |
name = :rule_rspec | |
end | |
return false unless name == :rule_rspec | |
has_fixed = false | |
## | |
# Fix #1 | |
# If the endpoint is written in string, let's use symbol. | |
# e.g. (old) get "update", something | |
# (new) get :update, something | |
if tokens[1].first == :string | |
has_fixed = true | |
tokens[1][0] = :symbol | |
tokens[1][1] = ":#{tokens[1][1][1..-2]}" | |
end | |
## | |
# Fix #2 | |
# If the request does not have `params:`, add it. | |
# This is not easy with sed becasue we need to understand where the line | |
# ends. | |
# e.g. (old) get :update, something | |
# (new) get :update, params: something | |
# (old) get :update, a: 'test', | |
# c: { a: 1 } | |
# (new) get :update, params: { a: 'test', | |
# c: { a: 1 } } | |
unless tokens[2].first == :comma && | |
tokens[3] == [:identifier, 'params'] && | |
tokens[4].first == :kv_sep | |
has_fixed = true | |
tokens[2][1] += ' params:' | |
# Diffiult key argument case | |
# If it is already in a hash form, we wouldn't have a problem. | |
# But there are many instances where it's a key argument, which is actually just a hash at the end. | |
if (tokens[3]&.first == :symbol && tokens[4]&.first == :kv_sep_old) || | |
(tokens[3]&.first == :identifier && tokens[4]&.first == :kv_sep) | |
tokens[2][1] += ' {' | |
i = 3 | |
loop do | |
i = skip_value(i, tokens) | |
if [:kv_sep, :kv_sep_old].include?(tokens[i]&.first) | |
i += 1 | |
else | |
fail "kv_sep expected" | |
end | |
i = skip_value(i, tokens) | |
if tokens[i]&.first == :comma | |
i += 1 | |
else | |
break | |
end | |
end | |
tokens[i - 1][1] += ' }' | |
end | |
end | |
has_fixed | |
end | |
end | |
ARGV.each do |f| | |
begin | |
next unless f.end_with?('.rb') | |
puts "Fixing #{f}..." | |
# The order of rewriters will be honored. | |
rewriters = [ | |
RubyFixer.new, | |
RspecFixer.new | |
] | |
tokens = RubyLexer.new(File.read(f)).lex | |
new_source = RubyInterestParser.new(tokens, rewriters).parse_and_rewrite | |
File.write(f, new_source) | |
rescue Errno::ENOENT => e | |
puts "!!! Cannot open file: #{f}" | |
end | |
end | |
puts "Done! #{ARGV.length} file(s) fixed" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment