Created
August 10, 2022 15:40
-
-
Save alexander-hanel/0d9e6fc91702fa124e5a60f552b49265 to your computer and use it in GitHub Desktop.
Extract Go source code function comments and add them to an IDB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import idautils | |
import subprocess | |
import os | |
import re | |
import json | |
import sys | |
GOBIN = r"C:\Program Files\Go" | |
def get_comments(index, lines): | |
threshold = 20 | |
comment = "" | |
for cc in range(1, threshold): | |
line = lines[index - cc] | |
if line.startswith("//"): | |
comment += line | |
else: | |
return comment | |
def parse_go_lines(file_path): | |
parsed = [] | |
with open(file_path, "r", encoding="utf8") as go_code: | |
lines = go_code.readlines() | |
for index, line in enumerate(lines): | |
if line.startswith("func"): | |
comment = get_comments(index, lines) | |
offset = line.find("{") | |
clean_up = line[:offset].rstrip() | |
parsed.append((clean_up, comment)) | |
return parsed | |
def extract_func_name(func): | |
tt = re.search(r"\s\w+\(", func) | |
if tt: | |
pp = tt.group(0) | |
xx = pp.lstrip()[:-1] | |
return xx | |
return None | |
def extract_comments(file_path): | |
temp = {} | |
go_path = os.path.join(file_path, "src") | |
for root, dirs, files in os.walk(go_path): | |
for file in files: | |
if "testdata" in root: | |
break | |
if file.endswith(".go"): | |
if not file.endswith("_test.go"): | |
pp = os.path.join(root, file) | |
# get base name | |
func_matches = parse_go_lines(pp) | |
src = "src" | |
path_spl = os.path.normpath(root).split(os.path.sep) | |
rn = "/".join(path_spl[path_spl.index(src) + 1:]) | |
if func_matches: | |
if rn not in temp: | |
temp[rn] = {} | |
for match in func_matches: | |
func_dec, comment = match | |
name = extract_func_name(func_dec) | |
if name in temp[rn]: | |
# skipping dups for now. TODO | |
continue | |
else: | |
temp[rn][name] = {"name": name, | |
"func_dec": func_dec, "comment": comment, "file_name": file} | |
return temp | |
def main(): | |
if not os.path.exists(GOBIN): | |
print("Error: Godir could not be found") | |
return | |
if not os.path.exists("gopher.json"): | |
data = extract_comments(GOBIN) | |
with open('gopher.json', 'w') as outfile: | |
json.dump(data, outfile, indent=4) | |
else: | |
with open('gopher.json', 'r') as infile: | |
data = json.load(infile) | |
if idc.get_name_ea_simple("go.buildid") == idc.BADADDR: | |
print("go_buildid function is not present. Possibly not a Go binary...") | |
for func in idautils.Functions(): | |
name = idc.get_name(func) | |
if "main." in name: | |
continue | |
sp = name.split(".") | |
package = "" | |
api_name = "" | |
# skip type function names | |
if len(sp) > 3: | |
continue | |
# appear to have method types added to the function name | |
if len(sp) == 3: | |
if "_" in sp[1]: | |
package = sp[0] | |
api_name = sp[2] | |
# standard looking function name | |
elif len(sp) == 2: | |
package = sp[0] | |
api_name = sp[1] | |
if package and api_name: | |
if package in data: | |
if api_name in data[package]: | |
comment = "" | |
func_dec = data[package][api_name]["func_dec"] | |
func_comment = data[package][api_name]["comment"] | |
if func_comment: | |
comment = func_comment | |
if func_dec: | |
comment += func_dec | |
if not comment: | |
continue | |
if isinstance(comment, (bytes, bytearray)): | |
comment = comment.decode("utf-8") | |
if not comment: | |
continue | |
temp = idc.get_func_cmt(func, 0) | |
comment += "\n" + temp | |
idc.set_func_cmt(func, comment, 0) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Before

After