Created
September 5, 2016 13:39
-
-
Save dmmfll/b6e1b89fc203ad9ee4132c183a0a75ed to your computer and use it in GitHub Desktop.
tokenize method for testing if a string is a valid identifier in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""http://stackoverflow.com/questions/12700893/how-to-check-if-a-string-is-a-valid-python-identifier-including-keyword-check""" | |
import keyword | |
import tokenize | |
def isidentifier(ident): | |
"""Determines, if string is valid Python identifier.""" | |
# Smoke test — if it's not string, then it's not identifier, but we don't | |
# want to just silence exception. It's better to fail fast. | |
if not isinstance(ident, str): | |
raise TypeError('expected str, but got {!r}'.format(type(ident))) | |
# Quick test — if string is in keyword list, it's definitely not an ident. | |
if keyword.iskeyword(ident): | |
return False | |
readline = (lambda: (yield ident.encode('utf-8-sig')))().__next__ | |
tokens = list(tokenize.tokenize(readline)) | |
# You should get exactly 3 tokens | |
if len(tokens) != 3: | |
return False | |
# First one is ENCODING, it's always utf-8 because we explicitly passed in | |
# UTF-8 BOM with ident. | |
if tokens[0].type != tokenize.ENCODING: | |
return False | |
# Second is NAME, identifier. | |
if tokens[1].type != tokenize.NAME: | |
return False | |
# Name should span all the string, so there would be no whitespace. | |
if ident != tokens[1].string: | |
return False | |
# Third is ENDMARKER, ending stream | |
if tokens[2].type != tokenize.ENDMARKER: | |
return False | |
return True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment