Created
June 12, 2014 19:48
-
-
Save brettcannon/40af38646cdc959dffbf to your computer and use it in GitHub Desktop.
Python 2/3 syntax checker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import symbol | |
import token | |
import astroid | |
from pylint import checkers, interfaces | |
from pylint.checkers import utils | |
# http://python3porting.com/differences.html | |
## Straight-forward | |
### No exec with arguments | |
### No __metaclass__ | |
### No dict.iter*() | |
### No parameter unpacking | |
### round() different | |
### No list.sort(cmp=) | |
## Scoping | |
### no sorted(cmp=) | |
### io.open() over open() | |
### list(filter()) or future_builtins.filter() | |
### No exception object escaping `except` scope | |
### No listcomp variable escaping | |
## Don't know | |
### indexing bytes | |
# Python 2.6 | |
## from __future__ import absolute_import (only needed 2.6 or 2.5) | |
# Python 2.5 | |
## codecs.open over open()/file() | |
class SixChecker(checkers.BaseChecker): | |
__implements__ = interfaces.IAstroidChecker | |
name = 'six' | |
msgs = { | |
'E6001': ('Use of a print statement', | |
'print-statement', | |
'Used when a print statement is found (invalid syntax in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6001': ('__getslice__ defined', | |
'getslice-method', | |
'Used when a __getslice__ method is defined (unused in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6002': ('__setslice__ defined', | |
'setslice-method', | |
'Used when a __setslice__ method is defined (unused in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6003': ('__cmp__ defined', | |
'cmp-method', | |
'Used when a __cmp__ method is defined (unused in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6004': ('__coerce__ defined', | |
'coerce-method', | |
'Used when a __coerce__ method is defined (unused in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6005': ('__unicode__ defined', | |
'unicode-method', | |
'Used when a __unicode__ method is defined (renamed __str__ in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6006': ('next defined', | |
'next-method', | |
"Used when a 'next' method is defined (renamed __next__ in Python 3)", | |
{'maxversion': (3, 0)}), | |
'W6007': ('buffer built-in referenced', | |
'buffer-builtin', | |
'Used when the buffer() built-in function is referenced (removed in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6008': ('apply built-in referenced', | |
'apply-builtin', | |
'Used when the apply() built-in function is referenced (removed in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6009': ('cmp built-in referenced', | |
'cmp-builtin', | |
'Used when the cmp() built-in function is referenced (removed in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6010': ('file built-in referenced', | |
'file-builtin', | |
'Used when the file() built-in function is referenced (removed in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6011': ('raw_input built-in referenced', | |
'raw_input-builtin', | |
"Used when the raw_input() built-in function is referenced (renamed 'input' in Python 3)", | |
{'maxversion': (3, 0)}), | |
'W6012': ('long built-in referenced', | |
'long-builtin', | |
'Used when the long() built-in function is referenced (removed in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6013': ('coerce built-in referenced', | |
'coerce-builtin', | |
'Used when the coerce() built-in function is referenced (removed in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6014': ('execfile built-in referenced', | |
'execfile-builtin', | |
'Used when the execfile() built-in function is referenced (removed in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6015': ('xrange built-in referenced', | |
'xrange-builtin', | |
"Used when the xrange() built-in function is referenced (renamed 'range' in Python 3)", | |
{'maxversion': (3, 0)}), | |
'W6016': ('unicode built-in referenced', | |
'unicode-builtin', | |
"Used when the unicode() built-in function is referenced (renamed 'str' in Python 3)", | |
{'maxversion': (3, 0)}), | |
'W6017': ('StandardError built-in referenced', | |
'standarderror-builtin', | |
'Used when the StandardError built-in exception is referenced (removed in Python 3)', | |
{'maxversion': (3, 0)}), | |
'W6018': ('map built-in referenced', | |
'map-builtin', | |
'Used when the map built-in function is referenced (semantics different in Python 3; ' | |
'use future_builtins.map)', | |
{'maxversion': (3, 0)}), | |
'W6019': ('zip built-in referenced', | |
'zip-builtin', | |
'Used when the zip built-in function is referenced (semantics different in Python 3; ' | |
'use future_builtins.zip)', | |
{'maxversion': (3, 0)}), | |
'W6020': ('division w/o __future__ statement', | |
'division', | |
'Used for non-floor division w/o a float literal or ' | |
'``from __future__ import division``' | |
'(Python 3 returns a float for int division unconditionally)', | |
{'maxversion': (3, 0)}), | |
} | |
def __init__(self, *args, **kwargs): | |
self._future_division = False | |
super(SixChecker, self).__init__(*args, **kwargs) | |
@utils.check_messages('print-statement') | |
def visit_print(self, node): | |
self.add_message('print-statement', node=node) | |
def visit_from(self, node): | |
if node.modname == u'__future__' : | |
for name, _ in node.names: | |
if name == u'division': | |
self._future_division = True | |
break | |
@utils.check_messages('division') | |
def visit_binop(self, node): | |
if not self._future_division and node.op == u'/': | |
for arg in (node.left, node.right): | |
if isinstance(arg, astroid.Const) and isinstance(arg.value, float): | |
break | |
else: | |
self.add_message('division', node=node) | |
def visit_function(self, node): | |
bad_methods = {'__getslice__': 'getslice-method', | |
'__setslice__': 'setslice-method', | |
'__cmp__': 'cmp-method', | |
'__coerce__': 'coerce-method', | |
'__unicode__': 'unicode-method', | |
'next': 'next-method'} | |
if node.is_method() and node.name in bad_methods: | |
self.add_message(bad_methods[node.name], node=node) | |
def visit_name(self, node): | |
if node.lookup(node.name)[0].name == '__builtin__': | |
bad_builtins = {'buffer': 'buffer-builtin', | |
'apply': 'apply-builtin', | |
'cmp': 'cmp-builtin', | |
'file': 'file-builtin', | |
'raw_input': 'raw_input-builtin', | |
'long': 'long-builtin', | |
'coerce': 'coerce-builtin', | |
'execfile': 'execfile-builtin', | |
'xrange': 'xrange-builtin', | |
'unicode': 'unicode-builtin', | |
'StandardError': 'standarderror-builtin', | |
'map': 'map-builtin', # Technically only care when used. | |
'zip': 'zip-builtin', # Technically only care when used. | |
} | |
if node.name in bad_builtins: | |
self.add_message(bad_builtins[node.name], node=node) | |
class UnicodeChecker(checkers.BaseTokenChecker): | |
__implements__ = interfaces.IAstroidChecker | |
name = 'unicode' | |
msgs = { | |
'W6100': ('native string literal', | |
'native-string', | |
'Used when a string has no b/u prefix and ' | |
'``from __future__ import unicode_literals`` not found ' | |
'(strings w/ no prefix in Python 3 are Unicode)', | |
{'maxversion': (3, 0)}), | |
} | |
def process_tokens(self, tokens): | |
# Module docstring can be a native string. | |
# Also use as a flag to notice when __future__ statements are no longer | |
# valid to avoid wasting time check every NAME token | |
# (which is < STRING). | |
module_start = True | |
for type_, val, start, end, line in tokens: | |
# Anything else means we are past the first string in the module, | |
# any comments (e.g. shebang), and no more __future__ statements | |
# are possible. | |
if type_ > token.STRING and type_ < token.N_TOKENS: | |
module_start = False | |
elif type_ == token.STRING: | |
if not module_start and not val.startswith(('u', 'b')): | |
self.add_message('native-string', line=line) | |
elif module_start and type_ == token.NAME: | |
if len(line) >= 39: # Fast-fail check | |
if u'__future__' in line and u'unicode_literals' in line: | |
return | |
class SyntaxChecker(checkers.BaseTokenChecker): | |
__implements__ = interfaces.IAstroidChecker | |
name = 'six' | |
msgs = { | |
'W6101': ('octal literal', | |
'octal-literal', | |
'Used when a octal literal w/ ``0`` prefix is defined ' | |
'(Python 3 uses ``0o``)', | |
{'maxversion': (3, 0)}), | |
'W6102': ('long literal', | |
'long-literal', | |
'Used when a long literal is defined ' | |
'(Python 3 unified int and long)', | |
{'maxversion': (3, 0)}), | |
} | |
def process_tokens(self, tokens): | |
for type_, val, start, end, line in tokens: | |
if type_ == token.NUMBER and len(val) > 1: | |
if val.startswith(u'0'): | |
if not val.startswith(u'0o'): | |
self.add_message('octal-literal', line=line) | |
elif val.endswith((u'L', u'l')): | |
self.add_message('long-literal', line=line) | |
def register(linter): | |
linter.register_checker(SixChecker(linter)) | |
linter.register_checker(UnicodeChecker(linter)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tests at https://gist.github.com/brettcannon/834b73c40055c249eb9f