r0yfire · October 12, 2015 23:09 · goors · Jun 2, 2016 · Turbine1991 · May 3, 2021
diff --git a/README.md b/README.md
diff --git a/mysqldump2json.py b/mysqldump2json.py
 #!/usr/bin/env python
 import fileinput
 import csv
 import json
 import sys

 # This prevents prematurely closed pipes from raising
 # an exception in Python
 from signal import signal, SIGPIPE, SIG_DFL

 signal(SIGPIPE, SIG_DFL)

 # The csv file might contain very huge fields,
 # therefore increase the field_size_limit
 csv.field_size_limit(sys.maxsize)


 def is_insert(line):
    """
    Returns true if the line begins a SQL insert statement.
    """
    return line.startswith('INSERT INTO') or False


 def get_values(line):
    """
    Returns the portion of an INSERT statement containing values
    """
    return line.partition('` VALUES ')[2]


 def values_sanity_check(values):
    """
    Ensures that values from the INSERT statement meet basic checks.
    """
    assert values
    assert values[0] == '('
    # Assertions have not been raised
    return True


 def parse_row(row):
    return {
        'id': row[2],
        'created': row[5],
        'text': row[8]
    }


 def parse_values(values, outfile):
    """
    Given a file handle and the raw values from a MySQL INSERT
    statement, write the equivalent CSV to the file
    """
    latest_row = []

    reader = csv.reader([values], delimiter=',',
                        doublequote=False,
                        escapechar='\\',
                        quotechar="'",
                        strict=True
                        )

    transactions = []
    for reader_row in reader:
        for column in reader_row:
            # If our current string is empty...
            if len(column) == 0:
                latest_row.append('""')
                continue
            # If our string starts with an open paren
            if column[0] == "(":
                # Assume that this column does not begin
                # a new row.
                new_row = False
                # If we've been filling out a row
                if len(latest_row) > 0:
                    # Check if the previous entry ended in
                    # a close paren. If so, the row we've
                    # been filling out has been COMPLETED
                    # as:
                    #    1) the previous entry ended in a )
                    #    2) the current entry starts with a (
                    if latest_row[-1][-1] == ")":
                        # Remove the close paren.
                        latest_row[-1] = latest_row[-1][:-1]
                        new_row = True
                # If we've found a new row, write it out
                # and begin our new one
                if new_row:
                    json_row = parse_row(latest_row)
                    if json_row.get('text', '') != '':
                        transactions.append(json_row)
                    latest_row = []
                # If we're beginning a new row, eliminate the
                # opening parentheses.
                if len(latest_row) == 0:
                    column = column[1:]
            # Add our column to the row we're working on.
            latest_row.append(column)
        # At the end of an INSERT statement, we'll
        # have the semicolon.
        # Make sure to remove the semicolon and
        # the close paren.
        if latest_row[-1][-2:] == ");":
            latest_row[-1] = latest_row[-1][:-2]
            json_row = parse_row(latest_row)
            if json_row.get('text', '') != '':
                transactions.append(json_row)

    outfile.write(json.dumps(transactions))


 def main():
    """
    Parse arguments and start the program
    """
    # Iterate over all lines in all files
    # listed in sys.argv[1:]
    # or stdin if no args given.
    try:
        for line in fileinput.input():
            # Look for an INSERT statement and parse it.
            if is_insert(line):
                values = get_values(line)
                if values_sanity_check(values):
                    parse_values(values, sys.stdout)
    except KeyboardInterrupt:
        sys.exit(0)


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python
	import fileinput
	import csv
	import json
	import sys

	# This prevents prematurely closed pipes from raising
	# an exception in Python
	from signal import signal, SIGPIPE, SIG_DFL

	signal(SIGPIPE, SIG_DFL)

	# The csv file might contain very huge fields,
	# therefore increase the field_size_limit
	csv.field_size_limit(sys.maxsize)


	def is_insert(line):
	"""
	Returns true if the line begins a SQL insert statement.
	"""
	return line.startswith('INSERT INTO') or False


	def get_values(line):
	"""
	Returns the portion of an INSERT statement containing values
	"""
	return line.partition('` VALUES ')[2]


	def values_sanity_check(values):
	"""
	Ensures that values from the INSERT statement meet basic checks.
	"""
	assert values
	assert values[0] == '('
	# Assertions have not been raised
	return True


	def parse_row(row):
	return {
	'id': row[2],
	'created': row[5],
	'text': row[8]
	}


	def parse_values(values, outfile):
	"""
	Given a file handle and the raw values from a MySQL INSERT
	statement, write the equivalent CSV to the file
	"""
	latest_row = []

	reader = csv.reader([values], delimiter=',',
	doublequote=False,
	escapechar='\\',
	quotechar="'",
	strict=True
	)

	transactions = []
	for reader_row in reader:
	for column in reader_row:
	# If our current string is empty...
	if len(column) == 0:
	latest_row.append('""')
	continue
	# If our string starts with an open paren
	if column[0] == "(":
	# Assume that this column does not begin
	# a new row.
	new_row = False
	# If we've been filling out a row
	if len(latest_row) > 0:
	# Check if the previous entry ended in
	# a close paren. If so, the row we've
	# been filling out has been COMPLETED
	# as:
	# 1) the previous entry ended in a )
	# 2) the current entry starts with a (
	if latest_row[-1][-1] == ")":
	# Remove the close paren.
	latest_row[-1] = latest_row[-1][:-1]
	new_row = True
	# If we've found a new row, write it out
	# and begin our new one
	if new_row:
	json_row = parse_row(latest_row)
	if json_row.get('text', '') != '':
	transactions.append(json_row)
	latest_row = []
	# If we're beginning a new row, eliminate the
	# opening parentheses.
	if len(latest_row) == 0:
	column = column[1:]
	# Add our column to the row we're working on.
	latest_row.append(column)
	# At the end of an INSERT statement, we'll
	# have the semicolon.
	# Make sure to remove the semicolon and
	# the close paren.
	if latest_row[-1][-2:] == ");":
	latest_row[-1] = latest_row[-1][:-2]
	json_row = parse_row(latest_row)
	if json_row.get('text', '') != '':
	transactions.append(json_row)

	outfile.write(json.dumps(transactions))


	def main():
	"""
	Parse arguments and start the program
	"""
	# Iterate over all lines in all files
	# listed in sys.argv[1:]
	# or stdin if no args given.
	try:
	for line in fileinput.input():
	# Look for an INSERT statement and parse it.
	if is_insert(line):
	values = get_values(line)
	if values_sanity_check(values):
	parse_values(values, sys.stdout)
	except KeyboardInterrupt:
	sys.exit(0)


	if __name__ == "__main__":
	main()