Created
April 7, 2021 14:52
-
-
Save julbouln/28e26e1b99b0b991c151d9c09debb8e5 to your computer and use it in GitHub Desktop.
convert C globals to structures using clang ast json dump
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'set' | |
require 'json' | |
require 'tree' | |
require 'fileutils' | |
# convert C globals to structures using clang ast json dump | |
# algorithm description | |
# - get AST with clang | |
# - convert AST into ruby tree | |
# - get all static globals and the functions that use them | |
# - get all extern globals and the functions that use them | |
# - for each function, get all the static and extern used | |
# - refactor all functions with a new needed struct declaration + call | |
# - rename variables with this struct : struct_name->variable | |
# FIXME: replacement problem when at the same position | |
# FIXME: function pointer does not works | |
# FIXME: conflict when variable with same name as struct var exists | |
# tested on a 6000 lines C program (http://www.davidashen.net/rnv.html) with some success, manual refactoring still needed. | |
# some sources | |
# https://www.scirp.org/pdf/JSEA_2013052311191508.pdf | |
class ASTNode < Tree::TreeNode | |
def ast_name | |
content[:name] | |
end | |
def global_name | |
self.storage == "static" ? "#{self.parsed_file}##{self.ast_name}" : self.ast_name | |
end | |
def self.struct(file) | |
file.sub(/\.(c|h)$/, "")+"_st" | |
end | |
def struct | |
ASTNode.struct(self.parsed_file) | |
end | |
def definition | |
if self.type.include?("(*)") | |
"#{self.type.sub("(*)", "(*#{self.ast_name})")}" | |
else | |
"#{self.type} #{self.ast_name}" | |
end | |
end | |
def kind | |
content[:kind] | |
end | |
def type | |
content[:type] | |
end | |
def storage | |
content[:storage] | |
end | |
def parsed_file | |
content[:parsed_file] | |
end | |
def file | |
content[:file] | |
end | |
def line | |
content[:file] | |
end | |
def range | |
content[:range] | |
end | |
def included_from | |
content[:included_from] | |
end | |
def has_inner | |
content[:has_inner] | |
end | |
def reference | |
content[:reference] | |
end | |
end | |
class GlobalsConverter | |
def initialize(files, struct) | |
@struct = struct | |
@types_h = "" | |
@files_data = {} | |
@json_ast = {} | |
files.each do |file| | |
#puts "parse #{file}" | |
ast = `clang-11 -Xclang -ast-dump=json -fsyntax-only #{file} 2> /dev/null` | |
@json_ast[file] = JSON.parse(ast) | |
@files_data[file] = File.read(file) | |
end | |
end | |
def init_tree | |
@node_by_id = {} | |
@node_by_name = {} | |
@tree = ASTNode.new("0x00000000", {}) | |
@node_by_id["0x00000000"] = @tree | |
@json_ast.each do |file, json_ast| | |
traverse_json(json_ast) do |c, p, l| | |
if p | |
if c["referencedDecl"] | |
c["name"] = c["referencedDecl"]["name"] | |
c["kind"] = c["referencedDecl"]["kind"] + "RefExpr" | |
c["refId"] = c["referencedDecl"]["id"] | |
end | |
# ignore referencedDecl nodes | |
if c["id"] && (!p["referencedDecl"] || p["referencedDecl"]["id"] != c["id"]) | |
parent_node = @node_by_id[p["id"]] || @tree | |
child_node = ASTNode.new(c["id"], | |
{ name: c["name"], | |
kind: c["kind"], | |
type: c["type"] ? c["type"]["qualType"] : nil, | |
storage: c["storageClass"], | |
parsed_file: file, | |
file: c["loc"] ? c["loc"]["file"] : nil, | |
line: c["loc"] ? c["loc"]["line"] : nil, | |
range: c["range"] ? Range.new(c["range"]["begin"]["offset"].to_i, c["range"]["end"]["offset"].to_i) : nil, | |
included_from: c["loc"] && c["loc"]["includedFrom"] ? true : false, | |
has_inner: c["inner"] ? true : false, | |
reference: c["refId"] | |
}) | |
unless parent_node[c["id"]] | |
@node_by_name[child_node.ast_name] ||= [] | |
@node_by_name[child_node.ast_name] << child_node | |
#existing = @node_by_name[child_node.ast_name].select { |e| e.storage == child_node.storage && e.line == child_node.line && e.range == child_node.range }.first | |
@node_by_id[c["id"]] = child_node | |
parent_node << child_node | |
end | |
end | |
end | |
end | |
end | |
traverse_tree do |node| | |
if node.reference | |
ref_node = @tree[node.reference] | |
if ref_node | |
#puts "REF #{node.content} -> #{ref_node.content}" | |
node.content[:storage] = ref_node.storage | |
end | |
end | |
end | |
end | |
def get_by_name(name) | |
@node_by_name[name] | |
end | |
def get_globals_variables(storage = nil) | |
nodes = [] | |
traverse_tree do |n| | |
if n.node_depth == 1 && n.kind == "VarDecl" && n.storage == storage && !n.type.include?("(*") | |
if n.ast_name == "match" | |
puts "DEBUG: #{n.content}" | |
end | |
nodes << n | |
end | |
end | |
nodes | |
end | |
def get_functions | |
return @functions if @functions | |
nodes = {} | |
traverse_tree do |n| | |
if n.kind == "FunctionDecl" and n.children.select { |c| c.kind == "CompoundStmt" }.length > 0 | |
nodes[n.global_name] = n | |
end | |
end | |
@functions = nodes | |
end | |
def get_functions_by_name | |
return @functions_by_name if @functions_by_name | |
nodes = {} | |
get_functions.values.each do |f| | |
nodes[f.global_name] = f | |
end | |
@functions_by_name = nodes | |
end | |
def get_function_decls(name) | |
get_by_name(name).select { |n| n.kind == "FunctionDecl" and (!n.included_from or n.children.select { |c| c.kind == "CompoundStmt" }.length > 0) } | |
end | |
def get_function_calls(name) | |
get_by_name(name).select {|node| node.kind == "FunctionDeclRefExpr" } | |
end | |
def functions_calls | |
return @functions_calls if @functions_calls | |
@functions_calls = {} | |
get_functions.values.each do |parent| | |
func_calls = Set.new | |
parent.each do |child| | |
if child.kind == "FunctionDeclRefExpr" | |
func_calls << child.global_name | |
end | |
end | |
@functions_calls[parent.global_name] = func_calls | |
end | |
@functions_calls | |
end | |
def get_called_functions(parent, l = 0) | |
called_children = Set.new | |
functions_calls[parent.global_name].each do |child| | |
called_children << child | |
n = get_functions_by_name[child] | |
if n | |
l += 1 | |
return called_children if l >= 20 | |
called_children += get_called_functions(n, l) | |
end | |
end | |
called_children | |
end | |
def get_var_calls(name, file) | |
nodes = [] | |
get_by_name(name).each do |n| | |
if n.ast_name == name and (!file or n.parsed_file == file) | |
found_p = nil | |
pn = n.parent | |
while pn do | |
if pn.kind == "FunctionDecl" | |
found_p = pn | |
end | |
pn = pn.parent | |
end | |
if found_p | |
nodes << found_p | |
end | |
end | |
end | |
nodes.uniq | |
end | |
def traverse_tree &block | |
@tree.each &block | |
end | |
def replace_global_vars(name, file, nm = nil) | |
replacements = [] | |
get_by_name(name).select { |n| !file or n.parsed_file == file }.each do |node| | |
if node.kind == "VarDeclRefExpr" and node.range | |
range_start = node.range.first | |
if range_start > 0 | |
nm ||= node.struct | |
range = Range.new(range_start, (range_start + node.ast_name.length - 1)) | |
replacements << { type: "replace_global_var", name: node.ast_name, file: node.parsed_file, range: range, | |
replace: @files_data[node.parsed_file][range], | |
insert: "#{nm}->", | |
replace_by: "#{nm}->#{node.ast_name}" } | |
end | |
end | |
end | |
replacements | |
end | |
def replace_function_signatures(name, needs, file) | |
replacements = [] | |
get_function_decls(name).select { |n| !file or n.parsed_file == file }.each do |node| | |
insert = true | |
params = node.children.select { |c| c.kind == "ParmVarDecl" } | |
range_start = 2 ** 32 | |
range_end = 0 | |
if params.length > 0 | |
params.each do |param| | |
if param.range.first < range_start | |
range_start = param.range.first | |
end | |
if param.range.last > range_end | |
range_end = param.range.last | |
end | |
end | |
else | |
if node.range | |
insert = false | |
void = @files_data[node.parsed_file][node.range].index(/#{name}\s*\(([^\)]*)\)/) | |
range_mt = Range.new(node.range.first + void, node.range.last) | |
void_st = @files_data[node.parsed_file][range_mt].index(/\(/) | |
range_st = Range.new(node.range.first + void + void_st + 1, node.range.first + void + void_st + 4) | |
range_start = range_st.first | |
if @files_data[node.parsed_file][range_st] == "void" | |
range_end = range_st.last | |
else | |
range_end = range_st.first | |
end | |
end | |
end | |
if range_end > 0 | |
range = Range.new(range_start, range_end) | |
replace_by = insert ? needs.join(", ") + ", " + @files_data[node.parsed_file][range] : needs.join(", ") | |
replacements << { type: "replace_function_signature", name: node.ast_name, file: node.parsed_file, range: range, | |
replace: @files_data[node.parsed_file][range], | |
insert: insert ? needs.join(", ") + ", " : nil, | |
replace_by: replace_by } | |
end | |
end | |
replacements | |
end | |
def replace_function_calls(name, needs, file) | |
replacements = [] | |
get_function_calls(name).select { |n| !file or n.parsed_file == file }.each do |node| | |
if node.range.first > 0 | |
insert = true | |
parent = node.parent.parent | |
range_start = 2 ** 32 | |
range_end = 0 | |
params = parent.children.select { |c| c != node.parent && c.range.first > 0 } | |
if params.length > 0 | |
params.each do |c| | |
if c.range.first < range_start | |
range_start = c.range.first | |
end | |
if c.range.last > range_end | |
range_end = c.range.last | |
end | |
end | |
else | |
insert = false | |
range_start = node.range.first + name.length + 1 | |
range_end = range_start - 1 | |
if @files_data[node.parsed_file][range_start] != ')' | |
insert = true | |
end | |
end | |
if range_end > 0 | |
range = Range.new(range_start, range_end) | |
replace_by = insert ? needs.join(", ") + ", " + @files_data[node.parsed_file][range] : needs.join(", ") | |
replacements << { type: "replace_function_call", name: node.ast_name, file: node.parsed_file, range: range, | |
replace: @files_data[node.parsed_file][range], | |
insert: insert ? needs.join(", ") + ", " : nil, | |
replace_by: replace_by } | |
end | |
end | |
end | |
replacements | |
end | |
# @param [Array<String>] ignores ignores these globals | |
def process(ignores = []) | |
static_needs = {} | |
export_needs = {} | |
static_per_file = {} | |
statics = self.get_globals_variables("static") | |
puts "# static globals (#{statics.length})" | |
statics.each do |global| | |
unless global.has_inner | |
static_per_file[global.parsed_file] ||= [] | |
static_per_file[global.parsed_file] << global | |
#puts " #{global.parsed_file} '#{global.storage} #{global.type}'" | |
parents = self.get_var_calls(global.ast_name, global.parsed_file) | |
parents.each do |parent| | |
#puts " called from #{parent.parsed_file}(#{parent.line}) '#{parent.storage} #{parent.type} #{parent.ast_name}'" | |
static_needs[parent.global_name] ||= [] | |
static_needs[parent.global_name] << global | |
end | |
end | |
end | |
static_per_file.each do |file, globals| | |
#puts " -> new struct" | |
nm = ASTNode.struct(file) | |
@types_h += " typedef struct #{nm} {\n" | |
globals.each do |global| | |
@types_h += " #{global.definition};\n" | |
end | |
@types_h += " } #{nm}_t;\n\n" | |
end | |
exports = self.get_globals_variables(nil) | |
exports.select! { |global| !global.type.start_with?("const") } # reject constants | |
exports.select! { |global| !ignores.include?(global.ast_name) } # reject ignores | |
extern = [] | |
puts "# extern globals (#{exports.length})" | |
exports.each do |global| | |
extern << global | |
#puts " '#{global.storage} #{global.type} #{global.ast_name}' (#{global.has_inner})" | |
p_hash = {} | |
parents = self.get_var_calls(global.ast_name, nil) | |
parents.each do |parent| | |
p_hash[parent.parsed_file] ||= [] | |
p_hash[parent.parsed_file] << parent | |
export_needs[parent.global_name] ||= [] | |
export_needs[parent.global_name] << global | |
end | |
p_hash.each do |file, parents| | |
#puts " called from #{file}" | |
parents.each do |parent| | |
#puts " #{parent.parsed_file}(#{parent.line}) '#{parent.storage} #{parent.type} #{parent.ast_name}'" | |
end | |
end | |
end | |
#puts " -> new struct" | |
nm = @struct | |
@types_h += " typedef struct #{nm} {\n" | |
extern.each do |global| | |
@types_h += " #{global.definition};\n" | |
end | |
@types_h += " } #{nm}_t;\n\n" | |
puts "# function needs" | |
needs_struct = {} | |
self.get_functions.values.each do |parent| | |
puts " #{parent.parsed_file}(#{parent.line}) '#{parent.storage} #{parent.type} #{parent.ast_name}'" | |
#puts "LOOKUP #{parent.global_name} #{parent.children.length}" | |
called_children = self.get_called_functions(parent) | |
static_needs_all = [] | |
export_needs_all = [] | |
static_needs_all += (static_needs[parent.global_name] || []) | |
export_needs_all += (export_needs[parent.global_name] || []) | |
called_children.each do |nm| | |
#puts " call #{nm}" | |
static_needs_all += (static_needs[nm] || []) | |
export_needs_all += (export_needs[nm] || []) | |
end | |
static_needs_all.uniq! | |
export_needs_all.uniq! | |
if static_needs_all.length > 0 or export_needs_all.length > 0 | |
static_needs_struct = static_needs_all.map { |n| n.struct }.uniq | |
export_needs_struct = export_needs_all.length > 0 ? [@struct] : [] | |
needs_struct[parent.global_name] = export_needs_struct + static_needs_struct | |
puts " needs struct #{static_needs_struct.map { |s| "'#{s}'" }.join(", ")}" | |
static_needs_all.each do |global| | |
puts " needs #{global.parsed_file} '#{global.storage} #{global.type} #{global.ast_name}'" | |
end | |
export_needs_all.each do |global| | |
puts " needs #{global.parsed_file} '#{global.storage} #{global.type} #{global.ast_name}'" | |
end | |
end | |
end | |
# at this point, we generate all replacements | |
all_replacements = {} | |
self.get_functions.values.each do |parent| | |
if needs_struct[parent.global_name] and needs_struct[parent.global_name].length > 0 | |
#puts " replace function declaration #{parent.ast_name} #{parent.storage}" | |
replace_function_signatures(parent.ast_name, needs_struct[parent.global_name].map { |var| "#{var}_t *#{var}" }, | |
parent.storage == "static" ? parent.parsed_file : nil).each do |r| | |
all_replacements[r[:file]] ||= [] | |
all_replacements[r[:file]] << r | |
end | |
#puts " replace function calls #{parent.ast_name} #{parent.storage}" | |
replace_function_calls(parent.ast_name, needs_struct[parent.global_name], | |
parent.storage == "static" ? parent.parsed_file : nil).each do |r| | |
all_replacements[r[:file]] ||= [] | |
all_replacements[r[:file]] << r | |
end | |
end | |
end | |
statics.each do |global| | |
unless global.has_inner | |
#puts " replace static #{global.ast_name}" | |
replace_global_vars(global.ast_name, global.parsed_file).each do |r| | |
all_replacements[r[:file]] ||= [] | |
all_replacements[r[:file]] << r | |
end | |
end | |
end | |
exports.each do |global| | |
#puts " replace export #{global.ast_name}" | |
replace_global_vars(global.ast_name, nil, @struct).each do |r| | |
all_replacements[r[:file]] ||= [] | |
all_replacements[r[:file]] << r | |
end | |
end | |
# now really replace | |
FileUtils.mkdir_p("out") | |
@files_data.each do |file, out_data| | |
File.open("out/#{file}", "w") do |f| | |
f.write out_data | |
end | |
end | |
File.open("out/type.h", "w") do |f| | |
f.write "#ifndef TYPE_H\n" | |
f.write "#define TYPE_H\n\n" | |
f.write @types_h | |
f.write "#endif // TYPE_H\n" | |
end | |
all_replacements.each do |file, replacements| | |
replacements.sort! { |a, b| b[:range].first <=> a[:range].first } | |
puts " replace in #{file}" | |
out_data = @files_data[file] | |
pos = 0 | |
replacements.each do |r| | |
if r[:insert] | |
if pos == r[:range].first | |
puts "ERROR #{r[:file]} #{r[:name]} pos #{pos} already set" | |
else | |
out_data.insert(r[:range].first, r[:insert]) | |
end | |
pos = r[:range].first | |
else | |
out_data[r[:range]] = r[:replace_by] | |
end | |
puts " #{r}" | |
end | |
File.open("out/#{file}", "w") do |f| | |
f.write "#include \"type.h\"\n\n" | |
f.write out_data | |
end | |
end | |
end | |
private | |
def traverse_json(json_ast, &block) | |
_traverse_json([json_ast], nil, -1, &block) | |
end | |
def _traverse_json(n, p, l = 0, &block) | |
n.each do |c| | |
block.call c, p, l if block | |
if c["inner"] | |
_traverse_json(c["inner"], c, l + 1, &block) | |
end | |
if c["referencedDecl"] | |
_traverse_json([c["referencedDecl"]], c, l + 1, &block) | |
end | |
end | |
end | |
end | |
conv = GlobalsConverter.new(Dir.glob("*.{h,c}"), "rnv") | |
conv.init_tree | |
conv.process(["er_printf", "er_vprintf", | |
"drv_verror_handler","rnl_verror_handler","rnv_verror_handler","rnd_verror_handler", | |
"xsd_verror_handler","rx_verror_handler","rnc_verror_handler"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment