Skip to content

Instantly share code, notes, and snippets.

@julbouln
Created April 7, 2021 14:52
Show Gist options
  • Save julbouln/28e26e1b99b0b991c151d9c09debb8e5 to your computer and use it in GitHub Desktop.
Save julbouln/28e26e1b99b0b991c151d9c09debb8e5 to your computer and use it in GitHub Desktop.
convert C globals to structures using clang ast json dump
require 'set'
require 'json'
require 'tree'
require 'fileutils'
# convert C globals to structures using clang ast json dump
# algorithm description
# - get AST with clang
# - convert AST into ruby tree
# - get all static globals and the functions that use them
# - get all extern globals and the functions that use them
# - for each function, get all the static and extern used
# - refactor all functions with a new needed struct declaration + call
# - rename variables with this struct : struct_name->variable
# FIXME: replacement problem when at the same position
# FIXME: function pointer does not works
# FIXME: conflict when variable with same name as struct var exists
# tested on a 6000 lines C program (http://www.davidashen.net/rnv.html) with some success, manual refactoring still needed.
# some sources
# https://www.scirp.org/pdf/JSEA_2013052311191508.pdf
class ASTNode < Tree::TreeNode
def ast_name
content[:name]
end
def global_name
self.storage == "static" ? "#{self.parsed_file}##{self.ast_name}" : self.ast_name
end
def self.struct(file)
file.sub(/\.(c|h)$/, "")+"_st"
end
def struct
ASTNode.struct(self.parsed_file)
end
def definition
if self.type.include?("(*)")
"#{self.type.sub("(*)", "(*#{self.ast_name})")}"
else
"#{self.type} #{self.ast_name}"
end
end
def kind
content[:kind]
end
def type
content[:type]
end
def storage
content[:storage]
end
def parsed_file
content[:parsed_file]
end
def file
content[:file]
end
def line
content[:file]
end
def range
content[:range]
end
def included_from
content[:included_from]
end
def has_inner
content[:has_inner]
end
def reference
content[:reference]
end
end
class GlobalsConverter
def initialize(files, struct)
@struct = struct
@types_h = ""
@files_data = {}
@json_ast = {}
files.each do |file|
#puts "parse #{file}"
ast = `clang-11 -Xclang -ast-dump=json -fsyntax-only #{file} 2> /dev/null`
@json_ast[file] = JSON.parse(ast)
@files_data[file] = File.read(file)
end
end
def init_tree
@node_by_id = {}
@node_by_name = {}
@tree = ASTNode.new("0x00000000", {})
@node_by_id["0x00000000"] = @tree
@json_ast.each do |file, json_ast|
traverse_json(json_ast) do |c, p, l|
if p
if c["referencedDecl"]
c["name"] = c["referencedDecl"]["name"]
c["kind"] = c["referencedDecl"]["kind"] + "RefExpr"
c["refId"] = c["referencedDecl"]["id"]
end
# ignore referencedDecl nodes
if c["id"] && (!p["referencedDecl"] || p["referencedDecl"]["id"] != c["id"])
parent_node = @node_by_id[p["id"]] || @tree
child_node = ASTNode.new(c["id"],
{ name: c["name"],
kind: c["kind"],
type: c["type"] ? c["type"]["qualType"] : nil,
storage: c["storageClass"],
parsed_file: file,
file: c["loc"] ? c["loc"]["file"] : nil,
line: c["loc"] ? c["loc"]["line"] : nil,
range: c["range"] ? Range.new(c["range"]["begin"]["offset"].to_i, c["range"]["end"]["offset"].to_i) : nil,
included_from: c["loc"] && c["loc"]["includedFrom"] ? true : false,
has_inner: c["inner"] ? true : false,
reference: c["refId"]
})
unless parent_node[c["id"]]
@node_by_name[child_node.ast_name] ||= []
@node_by_name[child_node.ast_name] << child_node
#existing = @node_by_name[child_node.ast_name].select { |e| e.storage == child_node.storage && e.line == child_node.line && e.range == child_node.range }.first
@node_by_id[c["id"]] = child_node
parent_node << child_node
end
end
end
end
end
traverse_tree do |node|
if node.reference
ref_node = @tree[node.reference]
if ref_node
#puts "REF #{node.content} -> #{ref_node.content}"
node.content[:storage] = ref_node.storage
end
end
end
end
def get_by_name(name)
@node_by_name[name]
end
def get_globals_variables(storage = nil)
nodes = []
traverse_tree do |n|
if n.node_depth == 1 && n.kind == "VarDecl" && n.storage == storage && !n.type.include?("(*")
if n.ast_name == "match"
puts "DEBUG: #{n.content}"
end
nodes << n
end
end
nodes
end
def get_functions
return @functions if @functions
nodes = {}
traverse_tree do |n|
if n.kind == "FunctionDecl" and n.children.select { |c| c.kind == "CompoundStmt" }.length > 0
nodes[n.global_name] = n
end
end
@functions = nodes
end
def get_functions_by_name
return @functions_by_name if @functions_by_name
nodes = {}
get_functions.values.each do |f|
nodes[f.global_name] = f
end
@functions_by_name = nodes
end
def get_function_decls(name)
get_by_name(name).select { |n| n.kind == "FunctionDecl" and (!n.included_from or n.children.select { |c| c.kind == "CompoundStmt" }.length > 0) }
end
def get_function_calls(name)
get_by_name(name).select {|node| node.kind == "FunctionDeclRefExpr" }
end
def functions_calls
return @functions_calls if @functions_calls
@functions_calls = {}
get_functions.values.each do |parent|
func_calls = Set.new
parent.each do |child|
if child.kind == "FunctionDeclRefExpr"
func_calls << child.global_name
end
end
@functions_calls[parent.global_name] = func_calls
end
@functions_calls
end
def get_called_functions(parent, l = 0)
called_children = Set.new
functions_calls[parent.global_name].each do |child|
called_children << child
n = get_functions_by_name[child]
if n
l += 1
return called_children if l >= 20
called_children += get_called_functions(n, l)
end
end
called_children
end
def get_var_calls(name, file)
nodes = []
get_by_name(name).each do |n|
if n.ast_name == name and (!file or n.parsed_file == file)
found_p = nil
pn = n.parent
while pn do
if pn.kind == "FunctionDecl"
found_p = pn
end
pn = pn.parent
end
if found_p
nodes << found_p
end
end
end
nodes.uniq
end
def traverse_tree &block
@tree.each &block
end
def replace_global_vars(name, file, nm = nil)
replacements = []
get_by_name(name).select { |n| !file or n.parsed_file == file }.each do |node|
if node.kind == "VarDeclRefExpr" and node.range
range_start = node.range.first
if range_start > 0
nm ||= node.struct
range = Range.new(range_start, (range_start + node.ast_name.length - 1))
replacements << { type: "replace_global_var", name: node.ast_name, file: node.parsed_file, range: range,
replace: @files_data[node.parsed_file][range],
insert: "#{nm}->",
replace_by: "#{nm}->#{node.ast_name}" }
end
end
end
replacements
end
def replace_function_signatures(name, needs, file)
replacements = []
get_function_decls(name).select { |n| !file or n.parsed_file == file }.each do |node|
insert = true
params = node.children.select { |c| c.kind == "ParmVarDecl" }
range_start = 2 ** 32
range_end = 0
if params.length > 0
params.each do |param|
if param.range.first < range_start
range_start = param.range.first
end
if param.range.last > range_end
range_end = param.range.last
end
end
else
if node.range
insert = false
void = @files_data[node.parsed_file][node.range].index(/#{name}\s*\(([^\)]*)\)/)
range_mt = Range.new(node.range.first + void, node.range.last)
void_st = @files_data[node.parsed_file][range_mt].index(/\(/)
range_st = Range.new(node.range.first + void + void_st + 1, node.range.first + void + void_st + 4)
range_start = range_st.first
if @files_data[node.parsed_file][range_st] == "void"
range_end = range_st.last
else
range_end = range_st.first
end
end
end
if range_end > 0
range = Range.new(range_start, range_end)
replace_by = insert ? needs.join(", ") + ", " + @files_data[node.parsed_file][range] : needs.join(", ")
replacements << { type: "replace_function_signature", name: node.ast_name, file: node.parsed_file, range: range,
replace: @files_data[node.parsed_file][range],
insert: insert ? needs.join(", ") + ", " : nil,
replace_by: replace_by }
end
end
replacements
end
def replace_function_calls(name, needs, file)
replacements = []
get_function_calls(name).select { |n| !file or n.parsed_file == file }.each do |node|
if node.range.first > 0
insert = true
parent = node.parent.parent
range_start = 2 ** 32
range_end = 0
params = parent.children.select { |c| c != node.parent && c.range.first > 0 }
if params.length > 0
params.each do |c|
if c.range.first < range_start
range_start = c.range.first
end
if c.range.last > range_end
range_end = c.range.last
end
end
else
insert = false
range_start = node.range.first + name.length + 1
range_end = range_start - 1
if @files_data[node.parsed_file][range_start] != ')'
insert = true
end
end
if range_end > 0
range = Range.new(range_start, range_end)
replace_by = insert ? needs.join(", ") + ", " + @files_data[node.parsed_file][range] : needs.join(", ")
replacements << { type: "replace_function_call", name: node.ast_name, file: node.parsed_file, range: range,
replace: @files_data[node.parsed_file][range],
insert: insert ? needs.join(", ") + ", " : nil,
replace_by: replace_by }
end
end
end
replacements
end
# @param [Array<String>] ignores ignores these globals
def process(ignores = [])
static_needs = {}
export_needs = {}
static_per_file = {}
statics = self.get_globals_variables("static")
puts "# static globals (#{statics.length})"
statics.each do |global|
unless global.has_inner
static_per_file[global.parsed_file] ||= []
static_per_file[global.parsed_file] << global
#puts " #{global.parsed_file} '#{global.storage} #{global.type}'"
parents = self.get_var_calls(global.ast_name, global.parsed_file)
parents.each do |parent|
#puts " called from #{parent.parsed_file}(#{parent.line}) '#{parent.storage} #{parent.type} #{parent.ast_name}'"
static_needs[parent.global_name] ||= []
static_needs[parent.global_name] << global
end
end
end
static_per_file.each do |file, globals|
#puts " -> new struct"
nm = ASTNode.struct(file)
@types_h += " typedef struct #{nm} {\n"
globals.each do |global|
@types_h += " #{global.definition};\n"
end
@types_h += " } #{nm}_t;\n\n"
end
exports = self.get_globals_variables(nil)
exports.select! { |global| !global.type.start_with?("const") } # reject constants
exports.select! { |global| !ignores.include?(global.ast_name) } # reject ignores
extern = []
puts "# extern globals (#{exports.length})"
exports.each do |global|
extern << global
#puts " '#{global.storage} #{global.type} #{global.ast_name}' (#{global.has_inner})"
p_hash = {}
parents = self.get_var_calls(global.ast_name, nil)
parents.each do |parent|
p_hash[parent.parsed_file] ||= []
p_hash[parent.parsed_file] << parent
export_needs[parent.global_name] ||= []
export_needs[parent.global_name] << global
end
p_hash.each do |file, parents|
#puts " called from #{file}"
parents.each do |parent|
#puts " #{parent.parsed_file}(#{parent.line}) '#{parent.storage} #{parent.type} #{parent.ast_name}'"
end
end
end
#puts " -> new struct"
nm = @struct
@types_h += " typedef struct #{nm} {\n"
extern.each do |global|
@types_h += " #{global.definition};\n"
end
@types_h += " } #{nm}_t;\n\n"
puts "# function needs"
needs_struct = {}
self.get_functions.values.each do |parent|
puts " #{parent.parsed_file}(#{parent.line}) '#{parent.storage} #{parent.type} #{parent.ast_name}'"
#puts "LOOKUP #{parent.global_name} #{parent.children.length}"
called_children = self.get_called_functions(parent)
static_needs_all = []
export_needs_all = []
static_needs_all += (static_needs[parent.global_name] || [])
export_needs_all += (export_needs[parent.global_name] || [])
called_children.each do |nm|
#puts " call #{nm}"
static_needs_all += (static_needs[nm] || [])
export_needs_all += (export_needs[nm] || [])
end
static_needs_all.uniq!
export_needs_all.uniq!
if static_needs_all.length > 0 or export_needs_all.length > 0
static_needs_struct = static_needs_all.map { |n| n.struct }.uniq
export_needs_struct = export_needs_all.length > 0 ? [@struct] : []
needs_struct[parent.global_name] = export_needs_struct + static_needs_struct
puts " needs struct #{static_needs_struct.map { |s| "'#{s}'" }.join(", ")}"
static_needs_all.each do |global|
puts " needs #{global.parsed_file} '#{global.storage} #{global.type} #{global.ast_name}'"
end
export_needs_all.each do |global|
puts " needs #{global.parsed_file} '#{global.storage} #{global.type} #{global.ast_name}'"
end
end
end
# at this point, we generate all replacements
all_replacements = {}
self.get_functions.values.each do |parent|
if needs_struct[parent.global_name] and needs_struct[parent.global_name].length > 0
#puts " replace function declaration #{parent.ast_name} #{parent.storage}"
replace_function_signatures(parent.ast_name, needs_struct[parent.global_name].map { |var| "#{var}_t *#{var}" },
parent.storage == "static" ? parent.parsed_file : nil).each do |r|
all_replacements[r[:file]] ||= []
all_replacements[r[:file]] << r
end
#puts " replace function calls #{parent.ast_name} #{parent.storage}"
replace_function_calls(parent.ast_name, needs_struct[parent.global_name],
parent.storage == "static" ? parent.parsed_file : nil).each do |r|
all_replacements[r[:file]] ||= []
all_replacements[r[:file]] << r
end
end
end
statics.each do |global|
unless global.has_inner
#puts " replace static #{global.ast_name}"
replace_global_vars(global.ast_name, global.parsed_file).each do |r|
all_replacements[r[:file]] ||= []
all_replacements[r[:file]] << r
end
end
end
exports.each do |global|
#puts " replace export #{global.ast_name}"
replace_global_vars(global.ast_name, nil, @struct).each do |r|
all_replacements[r[:file]] ||= []
all_replacements[r[:file]] << r
end
end
# now really replace
FileUtils.mkdir_p("out")
@files_data.each do |file, out_data|
File.open("out/#{file}", "w") do |f|
f.write out_data
end
end
File.open("out/type.h", "w") do |f|
f.write "#ifndef TYPE_H\n"
f.write "#define TYPE_H\n\n"
f.write @types_h
f.write "#endif // TYPE_H\n"
end
all_replacements.each do |file, replacements|
replacements.sort! { |a, b| b[:range].first <=> a[:range].first }
puts " replace in #{file}"
out_data = @files_data[file]
pos = 0
replacements.each do |r|
if r[:insert]
if pos == r[:range].first
puts "ERROR #{r[:file]} #{r[:name]} pos #{pos} already set"
else
out_data.insert(r[:range].first, r[:insert])
end
pos = r[:range].first
else
out_data[r[:range]] = r[:replace_by]
end
puts " #{r}"
end
File.open("out/#{file}", "w") do |f|
f.write "#include \"type.h\"\n\n"
f.write out_data
end
end
end
private
def traverse_json(json_ast, &block)
_traverse_json([json_ast], nil, -1, &block)
end
def _traverse_json(n, p, l = 0, &block)
n.each do |c|
block.call c, p, l if block
if c["inner"]
_traverse_json(c["inner"], c, l + 1, &block)
end
if c["referencedDecl"]
_traverse_json([c["referencedDecl"]], c, l + 1, &block)
end
end
end
end
conv = GlobalsConverter.new(Dir.glob("*.{h,c}"), "rnv")
conv.init_tree
conv.process(["er_printf", "er_vprintf",
"drv_verror_handler","rnl_verror_handler","rnv_verror_handler","rnd_verror_handler",
"xsd_verror_handler","rx_verror_handler","rnc_verror_handler"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment