Skip to content

Instantly share code, notes, and snippets.

@yeiichi
Last active November 6, 2024 23:11
Show Gist options
  • Save yeiichi/3ee4eeebf9f57c52f532436e5ac7213c to your computer and use it in GitHub Desktop.
Save yeiichi/3ee4eeebf9f57c52f532436e5ac7213c to your computer and use it in GitHub Desktop.
Check if the word is an SQL keyword or reserved word.
#!/usr/bin/env python3
import csv
def is_sql_kwd(word_in):
"""Check if the word is an SQL keyword or reserved word.
Args:
word_in (str):
Returns:
bool: If word_in is a SQL keyword/reserved word (True) or not (False).
References:
https://www.postgresql.org/docs/current/sql-keywords-appendix.html
https://superuser.com/questions/887161/remove-200b-character-from-text-file
"""
reserved_words = [
'ABORT', 'ABS', 'ABSENT', 'ABSOLUTE', 'ACCESS', 'ACCORDING', 'ACOS', 'ACTION', 'ADA', 'ADD', 'ADMIN', 'AFTER',
'AGGREGATE', 'ALL', 'ALLOCATE', 'ALSO', 'ALTER', 'ALWAYS', 'ANALYSE', 'ANALYZE', 'AND', 'ANY', 'ANY_VALUE',
'ARE', 'ARRAY', 'ARRAY_AGG', 'ARRAY_MAX_CARDINALITY', 'AS', 'ASC', 'ASIN', 'ASSERTION', 'ASSIGNMENT',
'ASYMMETRIC', 'AT', 'ATAN', 'ATOMIC', 'ATTACH', 'ATTRIBUTE', 'ATTRIBUTES', 'AUTHORIZATION', 'AVG', 'BACKWARD',
'BASE64', 'BEFORE', 'BEGIN', 'BEGIN_FRAME', 'BEGIN_PARTITION', 'BERNOULLI', 'BETWEEN', 'BIGINT', 'BINARY',
'BIT', 'BIT_LENGTH', 'BLOB', 'BLOCKED', 'BOM', 'BOOLEAN', 'BOTH', 'BREADTH', 'BTRIM', 'BY', 'CACHE', 'CALL',
'CALLED', 'CARDINALITY', 'CASCADE', 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CEIL', 'CEILING',
'CHAIN', 'CHAINING', 'CHAR', 'CHARACTER', 'CHARACTERISTICS', 'CHARACTERS', 'CHARACTER_LENGTH',
'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHARACTER_SET_CATALOG', 'CHAR_LENGTH', 'CHECK', 'CHECKPOINT',
'CLASS', 'CLASSIFIER', 'CLASS_ORIGIN', 'CLOB', 'CLOSE', 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',
'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLLECT', 'COLUMN', 'COLUMNS', 'COLUMN_NAME',
'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT', 'COMMENTS', 'COMMIT', 'COMMITTED', 'COMPRESSION',
'CONCURRENTLY', 'CONDITION', 'CONDITIONAL', 'CONDITION_NUMBER', 'CONFIGURATION', 'CONFLICT', 'CONNECT',
'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS', 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME',
'CONSTRAINT_SCHEMA', 'CONSTRUCTOR', 'CONTAINS', 'CONTENT', 'CONTINUE', 'CONTROL', 'CONVERSION', 'CONVERT',
'COPARTITION', 'COPY', 'CORR', 'CORRESPONDING', 'COS', 'COSH', 'COST', 'COUNT', 'COVAR_POP', 'COVAR_SAMP',
'CREATE', 'CROSS', 'CSV', 'CUBE', 'CUME_DIST', 'CURRENT', 'CURRENT_CATALOG', 'CURRENT_DATE', 'CURRENT_PATH',
'CURRENT_ROLE', 'CURRENT_ROW', 'CURRENT_SCHEMA', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
'CURRENT_DEFAULT_TRANSFORM_GROUP', 'CURRENT_TRANSFORM_GROUP_FOR_TYPE', 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA',
'DATABASE', 'DATALINK', 'DATE', 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY', 'DB',
'DEALLOCATE', 'DEC', 'DECFLOAT', 'DECIMAL', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', 'DEFERRED',
'DEFINE', 'DEFINED', 'DEFINER', 'DEGREE', 'DELETE', 'DELIMITER', 'DELIMITERS', 'DENSE_RANK', 'DEPENDS', 'DEPTH',
'DEREF', 'DERIVED', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DETACH', 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY',
'DISABLE', 'DISCARD', 'DISCONNECT', 'DISPATCH', 'DISTINCT', 'DLNEWCOPY', 'DLPREVIOUSCOPY', 'DLURLCOMPLETE',
'DLURLCOMPLETEONLY', 'DLURLCOMPLETEWRITE', 'DLURLPATH', 'DLURLPATHONLY', 'DLURLPATHWRITE', 'DLURLSCHEME',
'DLURLSERVER', 'DLVALUE', 'DO', 'DOCUMENT', 'DOMAIN', 'DOUBLE', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',
'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELEMENT', 'ELSE', 'EMPTY', 'ENABLE', 'ENCODING', 'ENCRYPTED', 'END',
'END-EXEC', 'END_FRAME', 'END_PARTITION', 'ENFORCED', 'ENUM', 'EQUALS', 'ERROR', 'ESCAPE', 'EVENT', 'EVERY',
'EXCEPT', 'EXCEPTION', 'EXCLUDE', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTS', 'EXP', 'EXPLAIN',
'EXPRESSION', 'EXTENSION', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FAMILY', 'FETCH', 'FILE', 'FILTER', 'FINAL',
'FINALIZE', 'FINISH', 'FIRST', 'FIRST_VALUE', 'FLAG', 'FLOAT', 'FLOOR', 'FOLLOWING', 'FOR', 'FORCE', 'FOREIGN',
'FORMAT', 'FORTRAN', 'FORWARD', 'FOUND', 'FRAME_ROW', 'FREE', 'FREEZE', 'FROM', 'FS', 'FULFILL', 'FULL',
'FUNCTION', 'FUNCTIONS', 'FUSION', 'GENERAL', 'GENERATED', 'GET', 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED',
'GREATEST', 'GROUP', 'GROUPING', 'GROUPS', 'HANDLER', 'HAVING', 'HEADER', 'HEX', 'HIERARCHY', 'HOLD', 'HOUR',
'ID', 'IDENTITY', 'IF', 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION',
'IMPLICIT', 'IMPORT', 'IN', 'INCLUDE', 'INCLUDING', 'INCREMENT', 'INDENT', 'INDEX', 'INDEXES', 'INDICATOR',
'INHERIT', 'INHERITS', 'INITIAL', 'INITIALLY', 'INLINE', 'INNER', 'INOUT', 'INPUT', 'INSENSITIVE', 'INSERT',
'INSTANCE', 'INSTANTIABLE', 'INSTEAD', 'INT', 'INTEGER', 'INTEGRITY', 'INTERSECT', 'INTERSECTION', 'INTERVAL',
'INTO', 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'JOIN', 'JSON', 'JSON_ARRAY', 'JSON_ARRAYAGG', 'JSON_EXISTS',
'JSON_OBJECT', 'JSON_OBJECTAGG', 'JSON_QUERY', 'JSON_SCALAR', 'JSON_SERIALIZE', 'JSON_TABLE',
'JSON_TABLE_PRIMITIVE', 'JSON_VALUE', 'KEEP', 'KEY', 'KEYS', 'KEY_MEMBER', 'KEY_TYPE', 'LABEL', 'LAG',
'LANGUAGE', 'LARGE', 'LAST', 'LAST_VALUE', 'LATERAL', 'LEAD', 'LEADING', 'LEAKPROOF', 'LEAST', 'LEFT', 'LENGTH',
'LEVEL', 'LIBRARY', 'LIKE', 'LIKE_REGEX', 'LIMIT', 'LINK', 'LISTAGG', 'LISTEN', 'LN', 'LOAD', 'LOCAL',
'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION', 'LOCATOR', 'LOCK', 'LOCKED', 'LOG', 'LOG10', 'LOGGED', 'LOWER',
'LPAD', 'LTRIM', 'MAP', 'MAPPING', 'MATCH', 'MATCHED', 'MATCHES', 'MATCH_NUMBER', 'MATCH_RECOGNIZE',
'MATERIALIZED', 'MAX', 'MAXVALUE', 'MEASURES', 'MEMBER', 'MERGE', 'MERGE_ACTION', 'MESSAGE_LENGTH',
'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN', 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES',
'MODULE', 'MONTH', 'MORE', 'MOVE', 'MULTISET', 'MUMPS', 'NAME', 'NAMES', 'NAMESPACE', 'NATIONAL', 'NATURAL',
'NCHAR', 'NCLOB', 'NESTED', 'NESTING', 'NEW', 'NEXT', 'NFC', 'NFD', 'NFKC', 'NFKD', 'NIL', 'NO', 'NONE',
'NORMALIZE', 'NORMALIZED', 'NOT', 'NOTHING', 'NOTIFY', 'NOTNULL', 'NOWAIT', 'NTH_VALUE', 'NTILE', 'NULLABLE',
'NULLIF', 'NULLS', 'NULL_ORDERING', 'NUMBER', 'NUMERIC', 'OBJECT', 'OCCURRENCE', 'OCCURRENCES_REGEX', 'OCTETS',
'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'OMIT', 'ON', 'ONE', 'ONLY', 'OPEN', 'OPERATOR', 'OPTION',
'OPTIONS', 'OR', 'ORDER', 'ORDERING', 'ORDINALITY', 'OTHERS', 'OUT', 'OUTER', 'OUTPUT', 'OVER', 'OVERFLOW',
'OVERLAPS', 'OVERLAY', 'OVERRIDING', 'OWNED', 'OWNER', 'PAD', 'PARALLEL', 'PARAMETER', 'PARAMETER_MODE',
'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION', 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',
'PARAMETER_SPECIFIC_SCHEMA', 'PARSER', 'PARTIAL', 'PARTITION', 'PASCAL', 'PASS', 'PASSING', 'PASSTHROUGH',
'PASSWORD', 'PAST', 'PATH', 'PATTERN', 'PER', 'PERCENT', 'PERCENTILE_CONT', 'PERCENTILE_DISC', 'PERCENT_RANK',
'PERIOD', 'PERMISSION', 'PERMUTE', 'PIPE', 'PLACING', 'PLAN', 'PLANS', 'PLI', 'POLICY', 'PORTION', 'POSITION',
'POSITION_REGEX', 'POWER', 'PRECEDES', 'PRECEDING', 'PRECISION', 'PREPARE', 'PREPARED', 'PRESERVE', 'PREV',
'PRIMARY', 'PRIOR', 'PRIVATE', 'PRIVILEGES', 'PROCEDURAL', 'PROCEDURE', 'PROCEDURES', 'PROGRAM', 'PRUNE', 'PTF',
'PUBLIC', 'PUBLICATION', 'Prev', 'QUOTE', 'QUOTES', 'RANGE', 'RANK', 'READ', 'READS', 'REAL', 'REASSIGN',
'RECHECK', 'RECOVERY', 'RECURSIVE', 'REF', 'REFERENCES', 'REFERENCING', 'REFRESH', 'REGR_AVGX', 'REGR_AVGY',
'REGR_COUNT', 'REGR_INTERCEPT', 'REGR_R2', 'REGR_SLOPE', 'REGR_SXX', 'REGR_SXY', 'REGR_SYY', 'REINDEX',
'RELATIVE', 'RELEASE', 'RENAME', 'REPEATABLE', 'REPLACE', 'REPLICA', 'REQUIRING', 'RESET', 'RESPECT', 'RESTART',
'RESTORE', 'RESTRICT', 'RESULT', 'RETURN', 'RETURNED_CARDINALITY', 'RETURNED_LENGTH', 'RETURNED_SQLSTATE',
'RETURNED_OCTET_LENGTH', 'RETURNING', 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',
'ROUTINES', 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS', 'ROW_COUNT', 'ROW_NUMBER',
'RPAD', 'RTRIM', 'RULE', 'RUNNING', 'SAVEPOINT', 'SCALAR', 'SCALE', 'SCHEMA', 'SCHEMAS', 'SCHEMA_NAME', 'SCOPE',
'SCOPE_CATALOG', 'SCOPE_NAME', 'SCOPE_SCHEMA', 'SCROLL', 'SEARCH', 'SECOND', 'SECTION', 'SECURITY', 'SEEK',
'SELECT', 'SELECTIVE', 'SELF', 'SEMANTICS', 'SENSITIVE', 'SEQUENCE', 'SEQUENCES', 'SERIALIZABLE', 'SERVER',
'SERVER_NAME', 'SESSION', 'SESSION_USER', 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIN',
'SINH', 'SIZE', 'SKIP', 'SMALLINT', 'SNAPSHOT', 'SOME', 'SORT_DIRECTION', 'SOURCE', 'SPACE', 'SPECIFIC',
'SPECIFICTYPE', 'SPECIFIC_NAME', 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNING', 'SQRT',
'STABLE', 'STANDALONE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDDEV_POP', 'STDDEV_SAMP',
'STDIN', 'STDOUT', 'STORAGE', 'STORED', 'STRICT', 'STRING', 'STRIP', 'STRUCTURE', 'STYLE', 'SUBCLASS_ORIGIN',
'SUBMULTISET', 'SUBSCRIPTION', 'SUBSET', 'SUBSTRING', 'SUBSTRING_REGEX', 'SUCCEEDS', 'SUM', 'SUPPORT',
'SYMMETRIC', 'SYSID', 'SYSTEM', 'SYSTEM_TIME', 'SYSTEM_USER', 'TABLE', 'TABLES', 'TABLESAMPLE', 'TABLESPACE',
'TABLE_NAME', 'TAN', 'TANH', 'TARGET', 'TEMP', 'TEMPLATE', 'TEMPORARY', 'TEXT', 'THEN', 'THROUGH', 'TIES',
'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR', 'TIMEZONE_MINUTE', 'TO', 'TOKEN', 'TOP_LEVEL_COUNT', 'TRAILING',
'TRANSACTION', 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE', 'TRANSFORM',
'TRANSFORMS', 'TRANSLATE', 'TRANSLATE_REGEX', 'TRANSLATION', 'TREAT', 'TRIGGER', 'TRIGGER_CATALOG',
'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRIM_ARRAY', 'TRUE', 'TRUNCATE', 'TRUSTED', 'TYPE', 'TYPES',
'UESCAPE', 'UNBOUNDED', 'UNCOMMITTED', 'UNCONDITIONAL', 'UNDER', 'UNENCRYPTED', 'UNION', 'UNIQUE', 'UNKNOWN',
'UNLINK', 'UNLISTEN', 'UNLOGGED', 'UNMATCHED', 'UNNAMED', 'UNNEST', 'UNTIL', 'UNTYPED', 'UPDATE', 'UPPER',
'URI', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG', 'USER_DEFINED_TYPE_CODE', 'USER_DEFINED_TYPE_NAME',
'USER_DEFINED_TYPE_SCHEMA', 'USING', 'UTF16', 'UTF32', 'UTF8', 'VACUUM', 'VALID', 'VALIDATE', 'VALIDATOR',
'VALUE', 'VALUES', 'VALUE_OF', 'VARBINARY', 'VARCHAR', 'VARIADIC', 'VARYING', 'VAR_POP', 'VAR_SAMP', 'VERBOSE',
'VERSION', 'VERSIONING', 'VIEW', 'VIEWS', 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WHITESPACE', 'WIDTH_BUCKET',
'WINDOW', 'WITH', 'WITHIN', 'WITHOUT', 'WORK', 'WRAPPER', 'WRITE', 'XML', 'XMLAGG', 'XMLATTRIBUTES',
'XMLBINARY', 'XMLCAST', 'XMLCOMMENT', 'XMLCONCAT', 'XMLDECLARATION', 'XMLDOCUMENT', 'XMLELEMENT', 'XMLEXISTS',
'XMLFOREST', 'XMLITERATE', 'XMLNAMESPACES', 'XMLPARSE', 'XMLPI', 'XMLQUERY', 'XMLROOT', 'XMLSCHEMA',
'XMLSERIALIZE', 'XMLTABLE', 'XMLTEXT', 'XMLVALIDATE', 'YEAR', 'YES', 'ZONE']
return word_in.upper() in reserved_words
def check_csv_column_names(csv_file):
with open(csv_file) as f:
csv_reader = csv.reader(f)
header = next(csv_reader)
for col_name in header:
# noinspection PyCompatibility
match is_sql_kwd(col_name):
case True:
print(f'\033[93mNG: {col_name}\033[0m')
case False:
print(f'OK: {col_name}')
if __name__ == '__main__':
check_csv_column_names(input('CSV file? >> '))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment