Skip to content

Instantly share code, notes, and snippets.

@ChunMinChang
Last active August 6, 2024 09:04
Show Gist options
  • Save ChunMinChang/88bfa5842396c1fbbc5b to your computer and use it in GitHub Desktop.
Save ChunMinChang/88bfa5842396c1fbbc5b to your computer and use it in GitHub Desktop.
Python: Remove C/C++ style comments #parser
#!/usr/bin/python
import re
import sys
def removeComments(text):
""" remove c-style comments.
text: blob of text with comments (can include newlines)
returns: text with comments removed
"""
pattern = r"""
## --------- COMMENT ---------
//.*?$ ## Start of // .... comment
| ##
/\* ## Start of /* ... */ comment
[^*]*\*+ ## Non-* followed by 1-or-more *'s
( ##
[^/*][^*]*\*+ ##
)* ## 0-or-more things which don't start with /
## but do end with '*'
/ ## End of /* ... */ comment
| ## -OR- various things which aren't comments:
( ##
## ------ " ... " STRING ------
" ## Start of " ... " string
( ##
\\. ## Escaped char
| ## -OR-
[^"\\] ## Non "\ characters
)* ##
" ## End of " ... " string
| ## -OR-
##
## ------ ' ... ' STRING ------
' ## Start of ' ... ' string
( ##
\\. ## Escaped char
| ## -OR-
[^'\\] ## Non '\ characters
)* ##
' ## End of ' ... ' string
| ## -OR-
##
## ------ ANYTHING ELSE -------
. ## Anything other char
[^/"'\\]* ## Chars which doesn't start a comment, string
) ## or escape
"""
regex = re.compile(pattern, re.VERBOSE|re.MULTILINE|re.DOTALL)
noncomments = [m.group(2) for m in regex.finditer(text) if m.group(2)]
return "".join(noncomments)
def commentRemover(text):
def replacer(match):
s = match.group(0)
if s.startswith('/'):
return " " # note: a space and not an empty string
else:
return s
pattern = re.compile(
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
re.DOTALL | re.MULTILINE
)
return re.sub(pattern, replacer, text)
filename = 'test.h'
with open(filename) as f:
# uncmtFile = removeComments(f.read())
uncmtFile = commentRemover(f.read())
print uncmtFile
/* This is a C-style comment. */
This is not a comment.
/* This is another
* C-style comment.
*/
"This is /* also not a comment */"
// This is also a comment
This is still // a comment
This is still /* a comment */
This is still /* a comment */ again
This is the final line
@number623
Copy link

at line number 57,
return "\n" * s.count( "\n" )
will keep line position of original input file.

@Red-Sage
Copy link

Hello,

First, thank you for this it was super helpful. Second, can you state what the license is? I would like to use your code but can't without license.

Thanks,

Casey

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment