-
-
Save jayvdb/5499e63acaf46ec44d716a74d341706e to your computer and use it in GitHub Desktop.
Linter for implicit string literal concatentation in list-like context
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def check_dubious_concat(src): | |
"""Check for suspicious string literal concatenation. | |
We will warn about instances of string literal concatenation in list-like | |
contexts - that is, where concatenated string literals are adjacent to | |
commas or inside square brackets. | |
Such a heuristic will catch instances like this:: | |
foo = [ | |
"item1" | |
"item2" | |
] | |
or:: | |
foo = ( | |
"item1", | |
"item2" | |
"item3", | |
) | |
but will not warn about instances like this:: | |
msg = ( | |
"This is a long message " | |
"that has been broken across lines." | |
) | |
""" | |
BEFORE_BAD = ',', '[' | |
AFTER_BAD = ',', ']' | |
lines = src.splitlines(True) | |
d = [] | |
prev = None | |
for type, tok, start, end, line in tokenize.generate_tokens(iter(lines).next): | |
if type in (tokenize.NL, tokenize.COMMENT): | |
continue | |
if type == tokenize.STRING: | |
d.append((start, end)) | |
else: | |
if (prev in BEFORE_BAD or tok in AFTER_BAD) and len(d) > 1: | |
startl, startoff = d[0][0] | |
endl, endoff = d[-1][1] | |
if startl == endl: | |
size = endoff - startoff | |
else: | |
size = len(lines[startl - 1]) - startoff | |
for l in lines[startl:endl - 1]: | |
size += len(l) | |
size += endoff | |
print(( | |
"[line {startl}, col {startoff}, {size} chars] " | |
"Suspicious string literal concatenation - did you miss a comma?" | |
).format(**locals())) | |
else: | |
prev = tok | |
del d[:] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment