Skip to content

Instantly share code, notes, and snippets.

@3ki5tj
Created November 18, 2014 23:34
Show Gist options
  • Select an option

  • Save 3ki5tj/84a16d35e52332eb5aff to your computer and use it in GitHub Desktop.

Select an option

Save 3ki5tj/84a16d35e52332eb5aff to your computer and use it in GitHub Desktop.
Add spaces to C-like source code (a simple formatter)
#!/usr/bin/env python
'''
add spaces for C source code
the main function is addspacef()
Example 1:
if(a>b&&a>3)c=d;
--> if (a > b && a > 3) c = d;
Example 2:
int foo(int a,int b){
return (a>b)?a:b;
}
---> int foo(int a, int b)
{
return (a > b) ? a : b;
}
'''
import os, sys, shutil, getopt, re
# module attributes
use_rule_add = 0
use_rule_comma = 1
use_rule_assign = 1
use_rule_knr = 1
use_rule_paren0 = 1
use_rule_paren1 = 1
use_rule_paren2 = 0
use_rule_cmp = 1
use_rule_ter = 1
use_rule_bitws = 1
use_rule_else = 1
use_rule_scolon = 1
use_rule_spb4quo = 1
use_rule_nocppcmt = 0
assign = r"([\+\-\*\/\|\%\&\^]?=)" # =, +=, -=, *=, ...
assign2 = r"(\>\>|\<\<)=" # >>= or <<=
cmps = r"([<>])"
cmp2 = r"([\!<=>]=)"
op1 = r"([\w0-9\'\"\)\]])"
op1b = r"([\w0-9\)\]])"
op2 = r"([\w0-9\-\'\"\(])"
op2b = r"([\w0-9\-\(\{])"
op2c = r"([\*\&\!\+\-][\w0-9\-\(])"
spa = r"(\s)"
bitws = r"(\|\||\&\&)" # || or &&
# Rules
# pattern, replacement, description
rules_basic = [];
rule_else = [
(r"\Else{", "else {", r"\belse{"),
(r"}else\b", "} else", r"}else\b"),
]
rule_scolon = [
(";}", "; }", ";}"),
(r"(;)" + op2b, r"\1 \2", ";a"), # i=0;i<n --> i=0; i<n
];
# the additional comma rule
rule_comma = [
(r"(,)" + op2b, r"\1 \2", ",a"), # a,i --> a, i
(r"(,)" + op2c, r"\1 \2", ",*a"), # a,*p --> a, *p
]
rule_assign = [
(op1b + assign + op2, r"\1 \2 \3", "a=b"), # a=b --> a = b
(op1b + assign + op2c, r"\1 \2 \3", "a=*b"), # a=*b --> a = *b
(op1b + assign2 + op2, r"\1 \2= \3", "a>>=b"), # a>>=b --> a >>= b
(op1b + assign2 + op2c, r"\1 \2= \3", "a>>=*b"), # a>>=*b --> a >>= *b
]
rule_cmp = [
(op1 + cmps + op2c, r"\1 \2 \3", "a<*b"), # a<*b --> a < *b
(op1 + cmps + op2, r"\1 \2 \3", "a<b"), # a<b --> a < b
(spa + cmps + op2, r"\1\2 \3", "a <b"), # a <b --> a < b
(op1 + cmps + spa, r"\1 \2\3", "a> b"), # a> b --> a > b
(op1 + cmp2 + op2c, r"\1 \2 \3", "a==*b"), # a==*b --> a == *b
(op1 + cmp2 + op2, r"\1 \2 \3", "a==b"), # a==b --> a == b
(spa + cmp2 + op2, r"\1\2 \3", "a ==b"), # a <=b --> a <= b
(op1 + cmp2 + spa, r"\1 \2\3", "a== b"), # a>= b --> a >= b
]
rule_ter = [
(op1 +"\?"+ op2, r"\1 ? \2", "a?b"), # a?b --> a ? b
(op1 +"\:"+ op2, r"\1 : \2", "b:c"), # b:c --> b : c
("\)([\?\:])\(", r") \1 (", ")?("), # (a)?(b) --> (a) ? (b)
("\)([\?\:])([\w$])", r") \1 \2", ")?a"), # (a)?b --> (a) ?b
("([\w])([\?\:])\(", r"\1 \2 (", "a?("), # a?(b) --> a ? (b)
]
rule_bitws = [
(op1b + bitws + op2b, r"\1 \2 \3", "a||b"), # a||b --> a || b
(spa + bitws + op2b, r"\1 \2 \3", "a ||b"), # a ||b --> a || b
(op1b + bitws + spa, r"\1 \2 \3", "a|| b"), # a|| b --> a || b
]
rule_paren0 = [
(r"\bif\(", "if (", r"\bif("),
(r"\bfor\(", "for (", r"\bfor("),
(r"\bswitch\(", "switch (", r"\bswitch("),
]
rule_paren1 = [
(r"\)(\w)", r") \1", ")a"), # )a --> ) a
("\)\{", ") {", "){"),
(r"(\)\s*{)(\w)", r"\1 \2", "){a"), # ){a --> ){ a
]
rule_paren2 = [
(r"\bif\( ", "if (", r"\bif( "), # if( a) --> if (a)
(" \)\{", ") {", " ){"), # a ){ --> a) {
]
# an additional rule
# a+b, a-b, a&b, a|b
op1d = r"([\w\)\]])";
op1e = r"([a-df-zA-DF-Z0-9_\)\]])"; # exclude e for 1e-5
op2d = r"([\w\(])";
plsmin = r"([\+\-])";
bandor = r"([\|\&])"
bor = r"(\|)"
# serveral special cases
# 1e-5
# -1.0
# +3.0
# &abc
rule_add = [
(op1d + bandor + op2d, r"\1 \2 \3", "a&b"), # a&b --> a & b
(op1e + plsmin + op2d, r"\1 \2 \3", "a+b"), # a+b --> a + b
(spa + bor + op2d, r"\1\2 \3", "a |b"), # a |b --> a | b
(op1d + bandor + spa, r"\1 \2\3", "a& b"), # a+ b --> a + b
(op1d + plsmin + spa, r"\1 \2\3", "a+ b"), # a+ b --> a + b
]
comments = [
("/*", "*/"),
("'''", "'''"), # python __doc__ string
('"""', '"""'),
("//", ""),
]
strings = [
('"', '"'),
("'", "'"),
]
# comment or literals
cmtstr = [("BEGINTEXT", "ENDTEXT"), # dummy pair, unused
] + comments + strings
def c_cs_start(s):
''' find the beginning of the first comment/string starter '''
mtype = 0 # normal code
minpos = 1000000
n = len(cmtstr)
# loop over comment starters
for tp in range(1, n):
(sym0, sym1) = cmtstr[tp]
pos = s.find(sym0, 0)
if pos >= 0 and pos < minpos: # if it starts first
mtype = tp
minpos = pos
return (mtype, minpos)
def c_cs_end(s, cstype):
''' find the end of a comment/string started as cstype '''
if cstype <= 0:
raise Exception
sym0, sym1 = cmtstr[cstype]
pos0 = len(sym0) if s.startswith(sym0) else 0
if (sym0 == '/*' or # block comments
sym0 == "'''" or sym0 == '"""'): # docstrings
pos1 = s.find(sym1, pos0) # search the end from the line
elif sym0 == '//':
pos1 = len(s) # till the end of the line
elif sym0 == '"' or sym0 == "'": # tricky
pos1 = pos0
while True:
pos1 = s.find(sym1, pos1)
if pos1 >= 0 and s[pos1 - 1] == '\\': # fake, \", keep going
pos1 += 1 # skip over it
else:
break
else:
print "unknown sym0: [%s] cstype=%d" % (sym0, cstype)
raise Exception
if pos1 >= 0: # return to normal code
return (0, pos1 + len(sym1))
else: # remain in this type
return (cstype, -1)
def c_parse_line(s, cstype = 0):
'''
parse a line like
foo(a, "haha /* cmt */", /* this is a number */ 0, b); // line end
to an array of code and comments (or strings):
`foo(a, '
`"haha /* cmt */"'
`, '
`/* this is a number */'
` 0, b); '
`// line end'
return a list of (text, comment_type)
the `comment_type' of regular code is 0
`cstype' is the current cstype
'''
lst = [(s, cstype)] # construct an initial list
id = 0
while id < len(lst):
s, cstype = lst[id]
if cstype == 0: # is normal code
cstype, pos = c_cs_start(s)
if cstype <= 0: break # no more cmtstr
# split it into code + comment
sp = s[:pos]
s = s[pos:]
lst[id] = (sp, 0) # change the current item to normal code
lst += [(s, cstype)] # push the current symbol
else: # current in comment/string of type `cstype', look for the end
cstypep = cstype
cstype, pos = c_cs_end(s, cstype)
if cstype > 0: # current comment is not finished yet
break # no need to add anything else
sp = s[:pos]
s = s[pos:]
lst[id] = (sp, cstypep)
if len(s) > 0: # the leftover of `// ...'
lst += [(s, 0)]
id += 1
# return a list of parsed items
return lst, cstype
def addspace(ilines, verbose = 0):
''' add spaces to `ilines', an array of lines,
return the output file '''
nlines = len(ilines)
olines = []
nchanges = 0
inpp = False
cstype = 0 # it is currently code
# I. install rules
rules = rules_basic
if use_rule_paren0:
rules += rule_paren0
if use_rule_paren1:
rules += rule_paren1
if use_rule_paren2:
rules += rule_paren2
if use_rule_else:
rules += rule_else
if use_rule_cmp:
rules += rule_cmp
if use_rule_ter:
rules += rule_ter
if use_rule_assign:
rules += rule_assign
if use_rule_bitws:
rules += rule_bitws
if use_rule_comma:
rules += rule_comma
if use_rule_scolon:
rules += rule_scolon
if use_rule_add:
rules += rule_add
if verbose >= 4:
i = 1
for r in rules:
print "%3d: '%s'" % (i, r[2])
i += 1
raw_input()
# II. add spaces line by line
for i in range(nlines):
iline = ilines[i]
oline = iline.rstrip()
# apply various rules
changed = []
# check if we are inside a preprocessor line
if cstype == 0 and (
inpp or iline.lstrip().startswith("#")): # preprocessor
if verbose >= 5:
print "line %6d: %s\npreprocessor" % (i+1, iline.rstrip())
raw_input()
ilin = iline.rstrip()
if len(ilin) > 0 and ilin[-1] == '\\': # line of preprocessor
inpp = True # continue the preprocessor line
else:
inpp = False # start code again
else:
# split a line to code and comment and strings
lst, cstype = c_parse_line(oline, cstype)
nlast = len(lst) - 1
item, cstype1 = lst[nlast]
sym0, sym1 = cmtstr[cstype1]
if verbose >= 5: # show the parser
print "line %6d: %s" % (i+1, iline.rstrip())
print "cstype %6d: %s" % (cstype, lst)
raw_input()
''' function starting brace to K & R style
int foo(){
-->int foo()
{
'''
if (use_rule_knr and 0 == cstype and nlast == 0
and not item[0:1].isspace()):
pat = r"(\)\s*\{$)";
repl = ")\n{"
if re.search(pat, item):
item = re.sub(pat, repl, item)
lst[-1] = (item, 0)
changed += ["K&R function"]
if verbose > 0:
print "converting to K & R function"
# convert `// comments', to `/* comments */'
if (use_rule_nocppcmt and sym0 == "//"):
item = "/* " + item[2:].strip() + " */"
lst[-1] = (item, lst[-1][1])
if verbose > 0:
print "convert C++ comment"
if use_rule_spb4quo:
for k in range(1, len(lst)):
cstp = lst[k][1]
if (cmtstr[cstp][0] in "\'\""
and len(lst[k-1][0]) > 0
and lst[k-1][0][-1] in ",;"):
lst[k-1] = (lst[k-1][0] + " ", lst[k-1][1])
changed += ['[space before "]']
# apply rules
oline = "" # "%d: " % cstype
for k in range(len(lst)):
(item, cstp) = lst[k]
if cstp == 0: # only apply to code
for pat,repl,desc in rules:
# we apply each rule multiple times
# until it no longer applies
# Note some pattern may emerge after replacement
while re.search(pat, item):
item = re.sub(pat, repl, item)
changed += ['[' + desc + ']']
oline += item
# change cstype for the next line
sym0, sym1 = cmtstr[ lst[-1][1] ]
if sym0 in ("'", "//", '"'):
cstype = 0 # return to code mode
# print out the change
if oline.rstrip() != iline.rstrip():
nchanges += 1
if verbose >= 2:
print "%6d INPUT : %s" % (i+1, iline),
print "%6d OUTPUT: %s" % (i+1, oline.rstrip())
print "Rules:", ', '.join(changed)
if verbose >= 3:
raw_input()
# strip away trailing spaces before outputting
olines += [ oline + '\n', ]
# re split lines
olines = ''.join(olines).splitlines(True)
olines = [s.rstrip() + '\n' for s in olines]
return olines, nchanges
def addspacef(fninp, fnout = "", overwrite = False, verbose = 0):
""" add space to file 'fninp' """
try:
ilines = open(fninp, 'r').readlines();
except IOError:
print "cannot open", fninp
return
if verbose >= 2: print "processing", fninp
olines, nchanges = addspace(ilines, verbose)
# print the number of changes
if not nchanges:
if verbose:
print "keep", fninp
return
else:
nlines = len(ilines)
print ("about to make %d changes (%.2f%%) to %s"
% (nchanges, 100.0*nchanges/(nlines + 1e-6), fninp) )
if (nchanges >= 20 or nchanges >= 0.1*nlines) and verbose >= 1:
print "the code looks nasty, did you write it?"
raw_input()
if overwrite: # overwrite mode
try: # backup
import zcom
zcom.safebackup(fninp, ".orig")
fnout = fninp # write on the original input
except ImportError: pass
else: #
if not fnout:
fnout = fninp + ".spr"
if verbose > 0:
print "assume the output is", fnout
open(fnout, 'w').writelines(olines)
def usage():
''' print usage and die '''
print sys.argv[0], "[OPTIONS] input"
print """
add spaces to C source code
OPTIONS:
-R, --recursive recursively apply to subdirectories
if `input' is a wildcard pattern like *.c
the pattern must be quoted as '*.c'
-L, --nolinks skip symbolic links
-w, --overwrite overwrite the original file
-a, --add add space around +, -, &, |
--paren2 convert if(_ to if_(, and _){ to )_{
--noknr allow { to hang after ) for functions
--noparen0 don't convert if( to if (
--noparen1 don't convert ){ to ) {
--noelse don't convert }else{ to } else {
--noassign don't add space around =
--nocmp don't add spaces around == or <
--nobitws don't add space around ||
--noter don't add spaces around ? :
--nocomma don't add space after ,
--noscolon don't add space after ;
--nospb4quo allow no space before the leading quote
-c, --conservative be conservative
--cppcmt convert C++ style comments // to /* */
-v be verbose
--verbose=[0-9] specify verbose level
"""
exit(1)
def doargs():
''' Handle common parameters from command line options
results saved to module attributes '''
try:
opts, args = getopt.gnu_getopt(sys.argv[1:], "hvbwacRL",
["help", "verbose=", "backup", "overwrite",
"add", "conservative", "nocomma", "noscolon",
"noassign", "nocmp", "noter",
"noparen0", "noparen1", "paren2",
"noelse", "noknr", "nobitws",
"nospb4quo",
"cppcmt",
"--recursive", "--nolinks",
])
except getopt.GetoptError, err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
usage()
global use_rule_nocppcmt
global use_rule_paren0
global use_rule_paren1
global use_rule_paren2
global use_rule_add
global use_rule_cmp
global use_rule_bitws
global use_rule_ter
global use_rule_assign
global use_rule_comma
global use_rule_scolon
global use_rule_else
global use_rule_knr
global use_rule_spb4quo
overwrite = False
verbose = 0
recur = False
links = True
for o, a in opts:
if o in ("-R", "--recursive",):
recur = True
elif o in ("-L", "--nolinks",):
links = False
elif o in ("-b", "-w", "--backup", "--overwrite"):
overwrite = True
elif o in ("--cppcmt",):
use_rule_nocppcmt = 1
print "will convert C++ comments to C ones"
elif o in ("-a", "--add",):
use_rule_add = 1
print "enable the additional add rule"
elif o in ("--paren2",):
use_rule_paren2 = 1
print "enable the additional parentheses rule"
elif o in ("-c", "--conservative",):
use_rule_add = 0
use_rule_paren2 = 0
#
use_rule_paren0 = 0
use_rule_comma = 0
use_rule_bitws = 0
use_rule_ter = 0
use_rule_cmp = 0
use_rule_assign = 0
use_rule_knr = 0
use_rule_spb4quo = 0
'''
use_rule_scolon = 0
use_rule_paren1 = 0
use_rule_else = 0
'''
print "use basic rules only"
elif o in ("--noparen0",):
use_rule_paren0 = 0
print "disable the parentheses start rule"
elif o in ("--noparen1",):
use_rule_paren1 = 0
print "disable the parentheses end rule"
elif o in ("--noelse",):
use_rule_else = 0
print "disable the else rule"
elif o in ("--nocmp",):
use_rule_cmp = 0
print "disable the comparison rule"
elif o in ("--noter",):
use_rule_ter = 0
print "disable the ternary rule"
elif o in ("--nobitws",):
use_rule_bitws = 0
print "disable the bitws rule"
elif o in ("--noassign",):
use_rule_assign = 0
print "disable the assignment rule"
elif o in ("--nocomma",):
use_rule_comma = 0
print "disable the comma rule"
elif o in ("--noscolon",):
use_rule_scolon = 0
print "disable the semicolon rule"
elif o in ("--nospb4quo",):
print "allow space before quotes"
elif o in ("--noknr",):
use_rule_knr = 0
print "disable the K&R rule"
elif o in ("-v",):
verbose += 1
elif o in ("--verbose",):
verbose = int(a)
elif o in ("-h", "--help",):
usage()
ls = args
try: # limit the dependence on argsglob
import zcom
ls = zcom.argsglob(args, "*.c *.cpp *.h *.hpp *.java", recur = recur, links = links)
except ImportError: pass
return ls, overwrite, verbose
if __name__ == "__main__":
ls, overwrite, verbose = doargs()
for fn in ls:
addspacef(fn, None, overwrite, verbose)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment