Skip to content

Instantly share code, notes, and snippets.

@themorgantown
Last active September 17, 2024 17:15
Show Gist options
  • Save themorgantown/f32813e1efedcfeb005fc3f8e0c9ad24 to your computer and use it in GitHub Desktop.
Save themorgantown/f32813e1efedcfeb005fc3f8e0c9ad24 to your computer and use it in GitHub Desktop.
given a HUGE sql export, this script will split it up into individual tables and generate import statements for each table, like: mysql -u dbusername -p'dbpassword' database_name < table1
# usage:
# python3 splitter.py database_name dbusername dbpassword
import sys
import os
import re
import logging
def main():
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
if len(sys.argv) != 4:
logging.error("Usage: python script.py database_name username password")
sys.exit(1)
database_name = sys.argv[1]
username = sys.argv[2]
password = sys.argv[3]
sql_filename = get_sql_filename() # Function to get the .sql filename in the current directory
logging.info(f"Processing SQL file: {sql_filename}")
# Try to detect encoding or use a common encoding
encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252']
for enc in encodings:
try:
process_sql_file(sql_filename, enc, database_name, username, password)
break # Break if successful
except UnicodeDecodeError as e:
logging.warning(f"Failed to decode with encoding {enc}: {e}")
else:
logging.error("Failed to process the SQL file with known encodings.")
sys.exit(1)
def process_sql_file(sql_filename, encoding, database_name, username, password):
# Initialize variables
initial_statements = []
table_names = []
current_table_name = None
table_file = None
line_number = 0
logging.info(f"Attempting to process the SQL file with encoding: {encoding}")
with open(sql_filename, 'r', encoding=encoding, errors='replace') as sql_file:
for line in sql_file:
line_number += 1
# Check for the table structure comment
match = re.match(r'^--\s*Table structure for table `(.+?)`', line)
if match:
current_table_name = match.group(1)
table_names.append(current_table_name)
# Close previous table file if open
if table_file:
table_file.close()
logging.debug(f"Closed file for table: {current_table_name}")
table_filename = f"{current_table_name}.sql"
table_file = open(table_filename, 'w', encoding='utf-8')
logging.info(f"Creating file: {table_filename}")
# Write initial statements to the table's file
table_file.writelines(initial_statements)
# Write the current line
table_file.write(line)
elif current_table_name is None:
# Collect initial statements
initial_statements.append(line)
elif table_file:
table_file.write(line)
# Close the last table file
if table_file:
table_file.close()
logging.debug(f"Closed file for table: {current_table_name}")
# Generate function.txt
generate_function_txt(table_names, database_name, username, password)
logging.info("Successfully processed the SQL file.")
def get_sql_filename():
# Find the .sql file in the current directory
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.sql')]
if len(files) == 0:
logging.error("No .sql file found in the current directory.")
sys.exit(1)
elif len(files) > 1:
logging.error("Multiple .sql files found in the current directory.")
logging.error("Please specify the .sql file to process.")
sys.exit(1)
else:
return files[0]
def generate_function_txt(table_names, database_name, username, password):
table_names.sort()
with open('function.txt', 'w', encoding='utf-8') as func_file:
for table_name in table_names:
table_filename = f"{table_name}.sql"
cmd = f"mysql -u {username} -p'{password}' {database_name} < {table_filename}\n"
func_file.write(cmd)
logging.info(f"Added import command for table: {table_name}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment