Created
March 1, 2022 00:02
-
-
Save tigerhawkvok/7c718afb34876d5c59b1e73f7f190f02 to your computer and use it in GitHub Desktop.
Take in a dirty SQL file formatted who knows how and get it mostly formatted
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python3 | |
""" | |
Quick and dirty SQL beautifier | |
""" | |
FILEPATH = "buildWDTables.sql" | |
TABSIZE = 4 | |
####################################################### | |
import re | |
import os | |
import shutil | |
with open(FILEPATH) as fh: | |
fileText = fh.read() | |
outFile = ".".join(FILEPATH.split(".")[:-1]) + "_orig." + FILEPATH.split(".").pop() | |
i = 0 | |
while os.path.isfile(outFile): | |
i += 1 | |
outFile = ".".join(FILEPATH.split(".")[:-1]) + f"_orig{i}." + FILEPATH.split(".").pop() | |
shutil.copy(FILEPATH, outFile) | |
fileText = re.sub(r"(from|join) +(#?[\[\]a-z\.]+) ([a-z0-9]+)\s*($|(with)?\s*\(nolock\))", r'\1 \2 AS "\3" \4', fileText, 0, re.IGNORECASE | re.MULTILINE) | |
fileText = re.sub(r"\s*(with)?\s*\(nolock\)", " WITH (NOLOCK)", fileText, 0, re.IGNORECASE | re.MULTILINE) | |
fileText = re.sub(r"as +([a-z_0-9]+)((?: +--.*?|(?: +with)? *\(nolock\))? *[,;]?)$", r'AS "\1"\2', fileText, 0, re.IGNORECASE | re.MULTILINE) | |
fileText = re.sub(r'as +"?([a-z_0-9]+)"?((?: +--.*?|(?: +with)? *\(nolock\))? *[,;]?)(on .*)$', 'AS "\\1"\\2\n\t\\3', fileText, 0, re.IGNORECASE | re.MULTILINE) | |
fileText = re.sub(",([^ ])", r", \1", fileText, 0, re.IGNORECASE | re.MULTILINE) | |
fileText = fileText.replace("<>", "!=") | |
# Quoting aliases | |
fileText = re.sub(r",\s*((?:[a-z\_0-9\\-]+\.)?\[?[a-z\_0-9\\-]+\]?)[ \t]+\[?([a-z\_0-9\\-]+)\]?(?: \t)*$", r', \1 AS "\2"', fileText, 0, re.IGNORECASE | re.MULTILINE) | |
fileText = re.sub(r"(\s*)((?:[a-z\_0-9\\-]+\.)?\[?[a-z\_0-9\\-]+\]?)[ \t]+\[?([a-z\_0-9\\-]+)\]?\s*,(?: \t)*$", r'\1\2 AS "\3",', fileText, 0, re.IGNORECASE | re.MULTILINE) | |
fileText = re.sub(r"(from|join)\s+((?:#?\[?[a-z\_0-9\-]+\]?\.?){1,3})[ \t]+\[?([a-z\_0-9\-]+)\]?[ \t]*$", r'\1 \2 AS "\3"', fileText, 0, re.IGNORECASE | re.MULTILINE) | |
# This may break poorly-formatted files | |
fileText = re.sub("from (#[a-z0-9_]+) +([a-z0-9_]+)[ \t]*$", 'FROM $1 AS "$2"', fileText, 0, re.IGNORECASE | re.MULTILINE) | |
while True: | |
fileTextNew = re.sub(r"^(?!20[0-9]{2})(.*?)([a-z0-9\(\)])(\+|-|/|\*|>=|<=|>|<|=|!=)([^ \-])", r"\1\2 \3 \4", fileText, 0, re.IGNORECASE | re.MULTILINE) | |
if fileTextNew == fileText: | |
break | |
fileText = fileTextNew | |
fileText = re.sub(r"'(\d+) / (\d+) / (\d+)'", r"'\1/\2/\3'", fileText, 0, re.IGNORECASE | re.MULTILINE) | |
def computeReplacement(matchObj): | |
"""Handle the case replacements""" | |
start, _ = matchObj.span() | |
lineLength = 0 | |
isCommented = False | |
matchString = matchObj.group() | |
for line in matchObj.string.splitlines(): | |
if lineLength + len(line) > start: | |
# Match is on this line | |
if line.find("--") > -1 and line.find("--") + lineLength < start: | |
# Line is commented | |
isCommented = True | |
if re.search(f"""['"][^'"]*?({re.escape(matchString)})[^'"]*?['"]""", line, re.IGNORECASE): | |
# Match is in a string | |
isCommented = True # Not exactly but kinda | |
break | |
lineLength += len(line) + 1 | |
return matchString.upper() if not isCommented else matchString | |
fileText = re.sub(r"(?!--)((?:\s+|^)(?:over|partition|order|left|join|union|select|from|isnull|cast|float|int|varchar|getdate|datetime|is|null|case|when|else|then|as|with|nolock|outer|where|group|by|having|top|into|drop|table|sum|and|end|in|not|on|or|nullif|exists|exists|dateadd|count|distinct|log|is not null|left join|inner join)(?:\s+|\(|\)|$))", computeReplacement, fileText, 0, re.IGNORECASE | re.MULTILINE) | |
fileText = fileText.replace("\t", " " * TABSIZE) | |
fileText = re.sub("'([0-9]{4}) - ([0-9]{2}) - ([0-9]{2})", r"'\1-\2-\3", fileText, 0, re.IGNORECASE | re.MULTILINE) | |
with open(FILEPATH, "w", encoding= "utf-8", newline= "\n") as fh: | |
# Write it back out, fixing newlines, encoding, trailing spaces, | |
# and adding a terminal newline | |
fh.write("\n".join([line.rstrip() for line in fileText.splitlines()]) + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment