Syncrossus · December 6, 2019 16:13 · Syncrossus · Oct 21, 2019
diff --git a/comment_proportions.py b/comment_proportions.py
 from pygments import highlight
 from pygments.lexers import get_lexer_by_name
 from pygments.formatters import BBCodeFormatter

 import re
 import sys


 def get_comments(code):
    """ Extracts comments and docstrings from python code.
        Uses pygments to mark comments and docstrings and
        then regular expressions to extract them.
        Args:
            - code (str) : the code to extract comments from
        Return:
            - comments (list<str>) : the comments and docstrings
                extracted from the code
    """
    comment_regex = re.compile(
        r"(?:\[color=#408080\]\[i\])(.*)(?:\[/i\]\[/color\])")
    docstring_regex = re.compile(
        r"(?:\[color=#BA2121\]\[i\])(.*?)(?:\[/i\]\[/color\])", re.DOTALL)

    lexer = get_lexer_by_name("python", stripall=True)
    formatter = BBCodeFormatter(linenos=False)
    result = highlight(code, lexer, formatter)

    comments = comment_regex.findall(result)
    docstrings = docstring_regex.findall(result)
    return comments + docstrings


 def compute_comment_stats(file_list):
    """ Finds the total number of bytes, the number of bytes of comments,
        and computes the ratio of the two, for each file and in total.
        Args:
            - file_list (list<str>): the list of files to compute stats for
        Return:
            - stats (dict): a dict with filenames as keys and dicts of
                statistics as values.
    """
    stats = {}
    total_len = 0
    total_comment_len = 0

    for file in file_list:
        with open(file, 'r') as f:
            source_code = f.read()
            total_len += len(source_code)

            comments = get_comments(source_code)
            comment_len = sum([len(comment) for comment in comments])
            total_comment_len += comment_len

            stats[file] = {
                "comment bytes": comment_len,
                "total bytes": len(source_code),
                "ratio": comment_len / len(source_code)}

    stats["total"] = {
        "comment bytes": total_comment_len,
        "total bytes": total_len,
        "ratio": total_comment_len / total_len}

    return stats


 if __name__ == '__main__':
    stats = compute_comment_stats(sys.argv[1:])
    print(stats)
	from pygments import highlight
	from pygments.lexers import get_lexer_by_name
	from pygments.formatters import BBCodeFormatter

	import re
	import sys


	def get_comments(code):
	""" Extracts comments and docstrings from python code.
	Uses pygments to mark comments and docstrings and
	then regular expressions to extract them.
	Args:
	- code (str) : the code to extract comments from
	Return:
	- comments (list<str>) : the comments and docstrings
	extracted from the code
	"""
	comment_regex = re.compile(
	r"(?:\[color=#408080\]\[i\])(.*)(?:\[/i\]\[/color\])")
	docstring_regex = re.compile(
	r"(?:\[color=#BA2121\]\[i\])(.*?)(?:\[/i\]\[/color\])", re.DOTALL)

	lexer = get_lexer_by_name("python", stripall=True)
	formatter = BBCodeFormatter(linenos=False)
	result = highlight(code, lexer, formatter)

	comments = comment_regex.findall(result)
	docstrings = docstring_regex.findall(result)
	return comments + docstrings


	def compute_comment_stats(file_list):
	""" Finds the total number of bytes, the number of bytes of comments,
	and computes the ratio of the two, for each file and in total.
	Args:
	- file_list (list<str>): the list of files to compute stats for
	Return:
	- stats (dict): a dict with filenames as keys and dicts of
	statistics as values.
	"""
	stats = {}
	total_len = 0
	total_comment_len = 0

	for file in file_list:
	with open(file, 'r') as f:
	source_code = f.read()
	total_len += len(source_code)

	comments = get_comments(source_code)
	comment_len = sum([len(comment) for comment in comments])
	total_comment_len += comment_len

	stats[file] = {
	"comment bytes": comment_len,
	"total bytes": len(source_code),
	"ratio": comment_len / len(source_code)}

	stats["total"] = {
	"comment bytes": total_comment_len,
	"total bytes": total_len,
	"ratio": total_comment_len / total_len}

	return stats


	if __name__ == '__main__':
	stats = compute_comment_stats(sys.argv[1:])
	print(stats)