Created
June 29, 2012 11:36
-
-
Save prathamesh-sonpatki/3017493 to your computer and use it in GitHub Desktop.
Program to convert yacc files to grammar files (.y to to .grammar)(Incomplete)(Section#1 and Section#2 are completed, Working on Section# 3)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def match_package(str) | |
str.match /package\s.*;/ | |
end | |
def match_import(str) | |
str.match /import\s.*;/ | |
end | |
def match_class(str) | |
str.match /class\s*[A-Z]\w*/ | |
end | |
def match_java_code(str) | |
str.match /[^%^\/].*/ | |
end | |
def match_token(str) | |
str.match /%token\s*<.*>\s*\w*/ | |
end | |
def match_type(str) | |
str.match /%type\s*<.*>\s*(\w*,?\s*)*/ | |
end | |
def create_token_list(tokens) | |
token_list = ""; | |
tokens.each do |token| | |
token_list += token +", "; | |
end | |
token_list | |
end | |
src = File.new("/home/chaitanya/projects/jruby/src/org/jruby/parser/DefaultRubyParser.y", 'r') | |
dest = File.new("jruby.grammar", 'w') | |
input = src.read | |
#split the input into 3 sections as per convention of yacc file | |
section1 = (input.match /%{(\n|.)*%}/).to_s | |
section2 = (input.match /%}(\n|.)*%%(\n|.)*%%/).to_s.gsub! "%}", "" | |
section3 = (section2.match /%%(\n|.)*%%/).to_s.gsub! "%%", "" | |
section2.gsub! section3, "" | |
section2.gsub! "%%", "" | |
#comments from section1 | |
comments = section1.match /\/\*(.|\n)*\*\// | |
section1.gsub! comments.to_s, "" | |
section1.gsub! "%{", "" | |
section1.gsub! "%}", "" | |
section1 = section1.split "\n" | |
section2 = section2.split "\n" | |
#section3 = (section3.split "\n").join | |
section3_comments = [] | |
count = 0 | |
section3 = section3.split | |
section3.delete "" | |
embed = "" | |
tokens = [] | |
types = {} | |
#process section1 | |
section1.each do |line| | |
if result = match_package(line) | |
result = result.to_s.split | |
result[1].gsub! ";" , '";' | |
dest.write ("%" + result[0] + ' "' + result[1]) | |
elsif result = match_import(line) | |
result = result.to_s.split | |
result[1].gsub! ";" , '";' | |
dest.write("\n%" + result[0] + ' "' + result[1] ) | |
elsif result = match_java_code(line) | |
embed += result.to_s + "\n" | |
end | |
if result = match_class(line) | |
result = result.to_s.split | |
dest.write("\n\n%" + result[0] + ' "' + result[1] + '";') | |
end | |
end | |
dest.write "\n\n%embed{:\n" + embed + "\n:};" | |
#process section2 | |
section2.each do |line| | |
if result = match_token(line) | |
result = result.to_s | |
result.gsub! "<", "" | |
result.gsub! ">", "" | |
result = result.split | |
tokens << result[2] | |
if types[result[1].to_sym] | |
types[result[1].to_sym] += [ result[2].to_s ] | |
else | |
types[result[1].to_sym] = [ result[2].to_s ] | |
end | |
elsif result = match_type(line) | |
result = result.to_s | |
result.gsub! "<", "" | |
result.gsub! "<", "" | |
result.gsub! ">", "" | |
result.gsub! ",", "" | |
result = result.split | |
if types[result[1].to_sym] | |
types[result[1].to_sym] += [result[2].to_s] | |
else | |
types[result[1].to_sym] = [result[2] ] | |
end | |
end | |
end | |
dest.write "\n\n%terminals " | |
dest.write " " + create_token_list(tokens)[0..-3]+";" | |
types.each_key do |type| | |
dest.write "\n\n%typeof " + create_token_list(types[type])[0..-3] + " = " + '"' + type.to_s + '"' + ";" | |
end | |
#process section3 | |
@state = 'lhs' | |
@count = 1 | |
@loop = 0 | |
@index = 0 | |
@paren = 0 | |
@return = false | |
section3.each do |token| | |
if token.include? "comment#" | |
dest.write "\n" + section3_comments[token[-1].to_i] + "\n" | |
elsif @state == 'lhs' | |
if token == ":" | |
dest.write " = " | |
@state = 'rhs_initial' | |
elsif token =~ /\w*/ | |
dest.write "\n"+token | |
end | |
elsif @state == 'rhs_initial' | |
if token == "{" | |
@state ="javacode_start" | |
@count = 1 | |
dest.write "\n"+ token + ":\n" | |
elsif token == "|" | |
dest.write token + "\n " | |
elsif (token.scan /\w+/) != [] and (section3[@index + 1].scan /\w+/) != [] and section3[@index + 2] == ":" | |
#dest.write token + "\n ; \n" | |
@count = 1 | |
elsif token =~ /\w*/ and section3[ @index + 1] == ":" | |
dest.write ";\n" | |
dest.write token+" " | |
@state = 'lhs' | |
elsif (token.scan /\w+/) != [] | |
unless token.include? '\'' and token.include? '"' | |
p token | |
dest.write token+".arg#{@count} " | |
end | |
@count = @count + 1 | |
end | |
elsif @state == "javacode_start" | |
if token == "{" | |
dest.write "\n" + token +"\n" | |
@loop = @loop + 1 | |
elsif token == "}" and @loop !=0 | |
dest.write "\n" + token + "\n" | |
@loop = @loop - 1 | |
elsif token == "}" | |
if @return == false | |
dest.write "\n return new Symbol(arg1);" | |
else | |
@return = false | |
end | |
if section3[@index + 2] == ":" | |
dest.write "\n:" + token + "\n" | |
else | |
dest.write "\n" + token + "\n" | |
end | |
@state = "rhs_initial" | |
@count = 1 | |
elsif token == "$$" | |
@return = true | |
dest.write "\n return new Symbol(" | |
elsif (token.scan /.*\$\d.*/) != [] | |
argument = (token.scan /\$\d/).to_s | |
token[argument[2] + argument[3]] = "arg" + argument[3] | |
if token[-1] == ";" and @return_state == 'return_rhs' | |
token[-1] = ');' | |
@return_state = '' | |
end | |
dest.write token + " " | |
elsif (token.scan /.*\$<.*>\d/) != [] | |
token.gsub! (token.match /<.*>/)[0] , "" | |
argument = (token.scan /\$\d/).to_s | |
token[argument[2] + argument[3]] = "arg" + argument[3] | |
if token[-1] == ";" and @return_state == 'return_rhs' | |
token[-1] = ');' | |
@return_state = '' | |
end | |
dest.write token + " " | |
elsif token == "if" || token == "else" | |
dest.write "\n" + token | |
elsif token == "=" and @return == true | |
dest.write "" | |
@return_state = 'return_rhs' | |
else | |
if token[-1] == ";" and @return_state == 'return_rhs' | |
token[-1] = ');' | |
@return_state = '' | |
end | |
p token | |
dest.write token + " " | |
end | |
end | |
@index = @index + 1 | |
end | |
#wrap up the process | |
src.close | |
dest.close | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment