Created
November 11, 2018 21:03
-
-
Save hay/5f0f184cf36dc2e96246b72f01275f0a to your computer and use it in GitHub Desktop.
A pretty horrible Python script to fix the SQL errors in the Rijksmonumenten dump mentioned here: https://github.com/clytras/AccessConverter/issues/5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from tqdm import tqdm | |
from sys import argv, exit | |
from os.path import getsize | |
def fix_token(token): | |
if token == ",": | |
return "''," | |
elif token == "(,": | |
return "(''," | |
elif ",)," in token: | |
return token.replace(",),", ",''),") | |
elif token == "),": | |
return "'')," | |
elif token == ");": | |
return "'');" | |
elif ",);" in token: | |
return token.replace(",);", ",'');") | |
else: | |
return token | |
def iter_tokens(path): | |
with open(path) as f: | |
data = "" | |
statement = "" | |
for line in f: | |
for token in line.split(" "): | |
yield token.strip() | |
yield "\n" | |
def main(): | |
if len(argv) != 2: | |
exit("Invalid arguments") | |
path = argv[1] | |
target = path.replace(".sql", "-fixed.sql") | |
tokens = iter_tokens(path) | |
tokens = tqdm(tokens, total = getsize(path) / 10) | |
with open(target, "w") as f: | |
statement = [] | |
values_open = False | |
for token in tokens: | |
if token == "INSERT": | |
values_open = True | |
if token == "CREATE" and values_open: | |
# This should not happen, drop the whole current statement | |
values_open = False | |
statement = [] | |
if values_open: | |
statement.append(token) | |
else: | |
f.write(token + " ") | |
if ");" in token: | |
statement = [fix_token(token) for token in statement] | |
statement = " ".join(statement) | |
f.write(f"\n{statement}\n") | |
values_open = False | |
statement = [] | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment