Skip to content

Instantly share code, notes, and snippets.

@mu-mu-mu
Last active August 21, 2021 11:30
Show Gist options
  • Save mu-mu-mu/61b78fb561810df623b799298e7c242e to your computer and use it in GitHub Desktop.
Save mu-mu-mu/61b78fb561810df623b799298e7c242e to your computer and use it in GitHub Desktop.
[GSoC 2021] Examining the Coccinelle’s Handleability of All Function Headers Found in the Linux Kernel
import glob,pathlib,os,sys,subprocess
files = glob.glob("check/**/*.cocci", recursive=True)
fnum = len(files)
failed = 0
import random
random.shuffle(files)
for f in files:
print(f)
fname_in_linux = f[len("check/"):-len("occi")]
print(fname_in_linux)
try:
res = subprocess.run(["path/to/spatch.opt", "--very-quiet", "-sp-file", str(f), str(fname_in_linux)], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except:
failed += 1
continue
if not ("Success" in str(res.stdout)):
failed += 1
continue
print("Success")
print(failed)
print(fnum)
print(fnum-failed)
print(failed/fnum)

The official project page is here.

Project Summary

Coccinelle is a C-program matching and transformation tool. To describe matching and transformation rules in the source code with a patch-like appearance, this tool employs a domain specific language called the Semantic Patch Language (SmPL), and attempts to manipulate the given C code based on the given SmPL rule.

I investigated Coccinelle's handleability of all function headers found in the Linux kernel and fixed most of issues that I detected. For 98% of the files, Coccinelle was able to parse all of the headers for their functions and it was possible to write a semantic patch rule that would successfully match the function header.

Commit Lists

TODO

  • Support for function attributes such as __aquire(x) and __releases(x).
  • Support for mutltiple attributes
  • Support for __attribute__ with multiple arguments
@initialize:python@
@@
import re,os,random
# reg exps
braces = re.compile(r"{.*}\s*$")
lcomment1 = re.compile(r"/\*[\s\S]*?\*/$",re.DOTALL|re.MULTILINE)
lcomment2 = re.compile(r"//[^\n]*$")
hcomment1 = re.compile(r"^/\*[\s\S]*?\*/",re.DOTALL|re.MULTILINE)
hcomment2 = re.compile(r"^//[^\n]*?\n")
comment1 = re.compile(r"/\*[\s\S]*?\*/",re.DOTALL|re.MULTILINE)
comment2 = re.compile(r"//[^\n]*?\n")
last = re.compile(r"\s*$")
array = re.compile(r"\[.*?\]")
####
def_types = ["void", "int", "short", "long", "char", "size_t", "unsigned", "ssize_t", "const", "struct", "enum", "union", "double", "float", "volatile","*", "signed"]
special_defs = ["SYSCALL_DEFINE","BPF_PROG","BUILD_TRAP_HANDLER","DEFINE_IDTENTRY_SYSVEC"]
functions = dict()
def extract_from_ident_line(lines):
res = ""
par = 0
if "#" == lines[0][0]:
return res
for l in lines:
if len(l) > 1 and '\\' == l[-2]:
return res
if "{" == l[0]:
return res
par += l.count(r"(")
par -= l.count(r")")
res += l
if len(l) > 1 and r")" == res[-2] and par == 0:
return res
if "{" in res and par == 0:
return res
return res
def extract_bef_ident_line(lines):
res = ""
if len(lines) == 0:
return res
for l in reversed(lines):
if len(l) <= 1:
return res
if len(l) >= 2 and '\\' == l[-2]:
return res[res.find("\n")+1:]
if len(l) >= 2 and '}' == l[-2]:
return res
if "#" == l[0]:
return res
if ";" in l:
return res
if len(comment2.sub("",comment1.sub("",l))) <= 1:
return res
if "*/" in l:
return res
res = l + res
return res
def make_valid_header(l):
tmp = lcomment2.sub("",lcomment1.sub("",last.sub("",l)))
func_line = hcomment2.sub("",hcomment1.sub("",tmp))
return func_line[:func_line.rindex(r")")+1]
def pr(s,pos,t,param):
x, = pos
# print("func name: "+s)
# print("{}:{}:{}".format(x.file, x.line, x.column))
with open(x.file) as f:
lines = f.readlines()
frm = extract_from_ident_line(lines[int(x.line)-1:])
if frm == "":
return
bef = extract_bef_ident_line(lines[:int(x.line)-1])
func_line = bef + frm
func_header = make_valid_header(func_line)
# Filtering
for filt in special_defs:
if filt in func_header:
return
if not x.file in functions.keys():
functions[x.file] = list()
functions[x.file].append((func_header,s,x.line,x.column,t,param))
@r@
type T;
function f;
position p;
parameter list P;
@@
T f@p(P) { ... }
@script:python@
fname << r.f;
tname << r.T;
param << r.P;
pos << r.p;
@@
pr(fname,pos,tname,param)
@finalize:python@
@@
# TODO
script = r"""
@smpl{id}@
attribute name __iomem, __init, __user,__unused,asmlinkage,__deprecated;
attribute name __always_inline,notrace,noinline,__weak,__inline__,__exception_irq_entry;
attribute name __visible, __always_unused,__maybe_unused,__rcu,__cpuidle;
attribute name __net_exit, __exit,noinstr,noinline_for_stack,__noinline;
attribute name __latent_entropy,__kprobes,__must_check,__net_init,__percpu;
attribute name __counted,__rcu,__notrace_funcgraph,__exception_irq_entry;
attribute name XZ_EXTERN,STATIC,FORCE_INLINE,STATIC_NOPV,INLINING,INIT;
attribute name JNICALL,JNEXPORT,ACPI_SYSTEM_XFACE;
attribute name SEC,__printf,__acquires;
typedef __u8,__u16,__u32,__u64,__s8,__s16,__s32,__s64;
typedef __be16,__be32,__be64,__le16,__le32,__le64;
typedef __wsum,__sum16,__virtio16;
position pos{id};
{T}
@@
{header}
{{ ... }}
@script:python@
pos << smpl{id}.pos{id};
@@
chk(pos)
"""
def check_fragment(header,func_name,line,column,t,param):
id = random.randint(0,1000000)
typedef = ""
ret_type = [ s for s in t.split() if not (len(s) > 2 and s[:2] == "__" and s[-2:] != "_t") ]
if ret_type != []:
tp = ret_type[1] if len(ret_type) >= 2 and ret_type[0] == "const" else ret_type[0]
typedef += "typedef " + tp + ";\n" if not tp in def_types else ""
def check(pl):
if len(pl) == 0:
return None
if pl[-1] in ["const", "*"]:
return check(pl[:-1])
if pl[-1] in def_types:
return None
if len(pl) >= 2 and (pl[-2] in ["struct", "enum", "union"]):
return None
return "typedef " + pl[-1] + ";\n"
# for function pointer types
# int (*f)(char p, long q), => int *f, char p, long q,
for p in str(param).replace(") (",",").replace(")","").replace("(","").split(","):
# array[a][b]
constants = list()
for ars in array.findall(p):
for ar in ars.split("+"):
const= ar[1:-1].replace(" ","")
if const != "" and not const.isdigit():
constants.append(const)
for const in list(set(constants)):
typedef += "constant " + const + ";\n"
p = array.sub("",p)
params = [ s for s in p.split() if not ((len(s) > 2 and s[:2] == "__" and s[-2:] != "_t") or s[0] == "#")]
if params == []:
continue
res = check(params[:-1])
if res == None:
continue
typedef += res
return script.format(id=id, header=re.sub(r"(\s*)"+func_name+r"(\s*\()", r"\1"+func_name+"@pos"+str(id)+r"\2", header).replace("\n","\n "), T = typedef)
init_sc = r"""
@initialize:python@
@@
lines = [{0}]
orig = [{0}]
def chk(pos):
x, = pos
if not int(x.line) in lines:
if not int(x.line) in orig:
print "ERROR: ll." + x.line
else:
# This function is defined twice (maybe #ifdef)
pass
else:
lines.remove(int(x.line))
"""
def py_init(lines):
return init_sc.format(",".join(lines))
fin_sc = r"""
@finalize:python@
@@
if lines == []:
print("Success")
else:
print("Failed")
print(lines)
"""
for file,fns in functions.items():
fname = "check/" + os.path.relpath(file) + "occi"
lines = []
smpls = ""
for header,name,line,column,t,param in fns:
lines.append(line)
smpls += check_fragment(header,name,line,column,t,param)
if not os.path.exists(os.path.dirname(fname)):
os.makedirs(os.path.dirname(fname))
with open(fname,"w") as f:
f.write(py_init(lines))
f.write(smpls)
f.write(fin_sc)
print("Fin")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment