jayvdb · July 25, 2016 16:12
diff --git a/check_dubious_concat.py b/check_dubious_concat.py
 def check_dubious_concat(src):
    """Check for suspicious string literal concatenation.

    We will warn about instances of string literal concatenation in list-like
    contexts - that is, where concatenated string literals are adjacent to
    commas or inside square brackets.

    Such a heuristic will catch instances like this::

        foo = [
            "item1"
            "item2"
        ]

    or::

        foo = (
            "item1",
            "item2"
            "item3",
        )

    but will not warn about instances like this::

        msg = (
            "This is a long message "
            "that has been broken across lines."
        )

    """

    BEFORE_BAD = ',', '['
    AFTER_BAD = ',', ']'

    lines = src.splitlines(True)
    d = []
    prev = None
    for type, tok, start, end, line in tokenize.generate_tokens(iter(lines).next):
        if type in (tokenize.NL, tokenize.COMMENT):
            continue
        if type == tokenize.STRING:
            d.append((start, end))
        else:
            if (prev in BEFORE_BAD or tok in AFTER_BAD) and len(d) > 1:
                startl, startoff = d[0][0]
                endl, endoff = d[-1][1]
                if startl == endl:
                    size = endoff - startoff
                else:
                    size = len(lines[startl - 1]) - startoff
                    for l in lines[startl:endl - 1]:
                        size += len(l)
                    size += endoff

                print((
                    "[line {startl}, col {startoff}, {size} chars] "
                    "Suspicious string literal concatenation - did you miss a comma?"
                ).format(**locals()))
            else:
                prev = tok
            del d[:]
	def check_dubious_concat(src):
	"""Check for suspicious string literal concatenation.

	We will warn about instances of string literal concatenation in list-like
	contexts - that is, where concatenated string literals are adjacent to
	commas or inside square brackets.

	Such a heuristic will catch instances like this::

	foo = [
	"item1"
	"item2"
	]

	or::

	foo = (
	"item1",
	"item2"
	"item3",
	)

	but will not warn about instances like this::

	msg = (
	"This is a long message "
	"that has been broken across lines."
	)

	"""

	BEFORE_BAD = ',', '['
	AFTER_BAD = ',', ']'

	lines = src.splitlines(True)
	d = []
	prev = None
	for type, tok, start, end, line in tokenize.generate_tokens(iter(lines).next):
	if type in (tokenize.NL, tokenize.COMMENT):
	continue
	if type == tokenize.STRING:
	d.append((start, end))
	else:
	if (prev in BEFORE_BAD or tok in AFTER_BAD) and len(d) > 1:
	startl, startoff = d[0][0]
	endl, endoff = d[-1][1]
	if startl == endl:
	size = endoff - startoff
	else:
	size = len(lines[startl - 1]) - startoff
	for l in lines[startl:endl - 1]:
	size += len(l)
	size += endoff

	print((
	"[line {startl}, col {startoff}, {size} chars] "
	"Suspicious string literal concatenation - did you miss a comma?"
	).format(**locals()))
	else:
	prev = tok
	del d[:]