notdodo · March 17, 2020 18:10
diff --git a/parse_dump.py b/parse_dump.py
 #!/usr/bin/env python3
 # -*- encoding: ascii -*-
 #
 # AUTHOR: Edoardo Rosa dodo https://github.com/notdodo
 #
 # DESCRIPTION: Parse `sqlmap` dumps from data breaches or leaks into JSON files
 #
 # Some files have shitty encoding/chars and they must be educated:
 # sed -i 's/[^[:print:]\t]//g; s/\\r//g' *.txt
 import click
 import json
 import mmap
 import re
 from collections import OrderedDict

 SQL_SEPARATOR = re.compile(r"\+.*\+")
 SQL_COLUMN_CONTENT = re.compile(r"\|\s+([\w\.\-\s\@\_\:\(\)\\>\<\&]+)", re.UNICODE)
 # UPDATE THE `find` ARRAY WHENEVER THE OUTPUT IS WRONG
 REPLACEMENT = {
    "email": {"find": ["mail", "email"], "replace": "email"},
    "username": {"find": ["nickname", "username", "user"], "replace": "username"},
    "password": {"find": ["password", "pwd", "hash", "passwd"], "replace": "password"},
    "nome": {"find": ["nome", "fullname", "name"], "replace": "nome"},
    "cognome": {"find": ["cognome", "surname", "second name"], "replace": "cognome"},
 }


 def get_table_headers(line):
    # Create the list of the table headers used as keys for the JSON
    headers = []
    for i in SQL_COLUMN_CONTENT.findall(line):
        i = i.strip()
        if i != "":
            headers.append(i)
    return headers


 def get_row_content(line, cols):
    # Create the empty JSON object
    parsed_row = {k: '' for k in cols}
    for i, v in enumerate(SQL_COLUMN_CONTENT.findall(line)):
        parsed_row[cols[i]] = v.strip()
    return parsed_row


 def normalize_json_auto(table, output_table=[]):
    # For each key in the parsed table auto find/replace
    for o in table:
        temp_row = {}
        for k in o.keys():
            for rk, rv in REPLACEMENT.items():
                # Find if the column name is compatible
                if k in rv.get("find"):
                    # Update the column name with the correct one
                    rep = rv.get("replace")
                    temp_row[k] = o[k]
                else:
                    # Otherwise drop the column
                    pass
        if temp_row:
            output_table.append(temp_row)
    if output_table:
        return output_table


 def normalize_json(table, output_table={}):
    # For each key in the parsed table ask the user what to do
    for k in table.keys():
        k = k.strip()
        print("Key: ", k, "Value: ", table[k][1])
        c = input("Keep: [Enter], Drop [D], Rename [new name]").strip()
        # Keep the name and the values
        if len(c) == 0:
            if k in output_table:
                for i in table[k]:
                    output_table[k].append(i)
            else:
                output_table[k] = table[k]
        # Rename the key
        elif c != "D" and c != "d" and len(c) > 1:
            if c in output_table:
                for i in table[k]:
                    output_table[c.lower()].append(i)
            else:
                output_table[c.lower()] = table[k]
        # Drop the key from the table
        elif c == "d" or c == "D" and len(c) == 1:
            pass
    if output_table:
        return output_table


 def parse_sqlmap(ifile, ofile):
    if ofile is None:
        ofile = ifile + ".json"
    global_output = {}
    with open(ifile, "r+") as f:
        # Use mmap to read big files
        dump = mmap.mmap(f.fileno(), 0)
        for line in iter(dump.readline, b""):
            # Check if `line` is the separator +------+----+
            if SQL_SEPARATOR.match(line.decode().strip()):
                # If `line` is a separator the next on contains the headers
                headers = dump.readline().decode().strip()
                # If there is a column header and the separator get the content of the table
                if SQL_COLUMN_CONTENT.match(headers) and SQL_SEPARATOR.match(
                    dump.readline().decode().strip()
                ):
                    table_cols = get_table_headers(headers)
                    # Empty table object
                    table = []
                    # This line contains the first table row
                    line = dump.readline().decode().strip()
                    while SQL_COLUMN_CONTENT.match(line):
                        # Read the content of each row
                        table.append(get_row_content(line, table_cols))
                        line = dump.readline().decode().strip()

                    global_output = normalize_json_auto(table)
    # Write the converted JSON table
    with open(ofile, "w") as fp:
        if global_output:
            json.dump(global_output, fp, ensure_ascii=False, indent=4)


 @click.command(context_settings={"help_option_names": ["-h", "--help"]})
 @click.option("--ifile", "-i", nargs=1, help="File to parse", type=click.Path())
 @click.option(
    "--ofile", "-o", nargs=1, help="JSON parsed file to save", type=click.Path()
 )
 def cmd(ifile, ofile):
    if ifile is None:
        print("No input file selected")
    else:
        parse_sqlmap(ifile, ofile)


 if __name__ == "__main__":
    cmd()
diff --git a/test.txt b/test.txt
 +-----------------+-----+------------------+-----------+------------+------------------------+
 | cognome.email   | id  | nome             | password  | privilegio | username               |
 +-----------------+-----+------------------+-----------+------------+------------------------+
 | Bianchi         | 27  | Marco            | prova     | 1          | marco.marco            |
 |                 | 109 | Sara             | sara      | 1          | sara_Saretta           |
 |                 | 110 | Giovanni         | giogi     | 4          | giovannimichele.bianco |
 |                 | 111 | Roberta          | ro!@$#%b  | 1          | roberta.dindini        |
 |                 | 116 | Giovanni         | giò       | 1          | giò                    |
 |                 | 133 | Paolo            | pmagazine | 1          | paolo.rossi            |
 +-----------------+-----+------------------+-----------+------------+------------------------+
	#!/usr/bin/env python3
	# -- encoding: ascii --
	#
	# AUTHOR: Edoardo Rosa dodo https://github.com/notdodo
	#
	# DESCRIPTION: Parse `sqlmap` dumps from data breaches or leaks into JSON files
	#
	# Some files have shitty encoding/chars and they must be educated:
	# sed -i 's/[^[:print:]\t]//g; s/\\r//g' *.txt
	import click
	import json
	import mmap
	import re
	from collections import OrderedDict

	SQL_SEPARATOR = re.compile(r"\+.*\+")
	SQL_COLUMN_CONTENT = re.compile(r"\\|\s+([\w\.\-\s\@\_\:\(\)\\>\<\&]+)", re.UNICODE)
	# UPDATE THE `find` ARRAY WHENEVER THE OUTPUT IS WRONG
	REPLACEMENT = {
	"email": {"find": ["mail", "email"], "replace": "email"},
	"username": {"find": ["nickname", "username", "user"], "replace": "username"},
	"password": {"find": ["password", "pwd", "hash", "passwd"], "replace": "password"},
	"nome": {"find": ["nome", "fullname", "name"], "replace": "nome"},
	"cognome": {"find": ["cognome", "surname", "second name"], "replace": "cognome"},
	}


	def get_table_headers(line):
	# Create the list of the table headers used as keys for the JSON
	headers = []
	for i in SQL_COLUMN_CONTENT.findall(line):
	i = i.strip()
	if i != "":
	headers.append(i)
	return headers


	def get_row_content(line, cols):
	# Create the empty JSON object
	parsed_row = {k: '' for k in cols}
	for i, v in enumerate(SQL_COLUMN_CONTENT.findall(line)):
	parsed_row[cols[i]] = v.strip()
	return parsed_row


	def normalize_json_auto(table, output_table=[]):
	# For each key in the parsed table auto find/replace
	for o in table:
	temp_row = {}
	for k in o.keys():
	for rk, rv in REPLACEMENT.items():
	# Find if the column name is compatible
	if k in rv.get("find"):
	# Update the column name with the correct one
	rep = rv.get("replace")
	temp_row[k] = o[k]
	else:
	# Otherwise drop the column
	pass
	if temp_row:
	output_table.append(temp_row)
	if output_table:
	return output_table


	def normalize_json(table, output_table={}):
	# For each key in the parsed table ask the user what to do
	for k in table.keys():
	k = k.strip()
	print("Key: ", k, "Value: ", table[k][1])
	c = input("Keep: [Enter], Drop [D], Rename [new name]").strip()
	# Keep the name and the values
	if len(c) == 0:
	if k in output_table:
	for i in table[k]:
	output_table[k].append(i)
	else:
	output_table[k] = table[k]
	# Rename the key
	elif c != "D" and c != "d" and len(c) > 1:
	if c in output_table:
	for i in table[k]:
	output_table[c.lower()].append(i)
	else:
	output_table[c.lower()] = table[k]
	# Drop the key from the table
	elif c == "d" or c == "D" and len(c) == 1:
	pass
	if output_table:
	return output_table


	def parse_sqlmap(ifile, ofile):
	if ofile is None:
	ofile = ifile + ".json"
	global_output = {}
	with open(ifile, "r+") as f:
	# Use mmap to read big files
	dump = mmap.mmap(f.fileno(), 0)
	for line in iter(dump.readline, b""):
	# Check if `line` is the separator +------+----+
	if SQL_SEPARATOR.match(line.decode().strip()):
	# If `line` is a separator the next on contains the headers
	headers = dump.readline().decode().strip()
	# If there is a column header and the separator get the content of the table
	if SQL_COLUMN_CONTENT.match(headers) and SQL_SEPARATOR.match(
	dump.readline().decode().strip()
	):
	table_cols = get_table_headers(headers)
	# Empty table object
	table = []
	# This line contains the first table row
	line = dump.readline().decode().strip()
	while SQL_COLUMN_CONTENT.match(line):
	# Read the content of each row
	table.append(get_row_content(line, table_cols))
	line = dump.readline().decode().strip()

	global_output = normalize_json_auto(table)
	# Write the converted JSON table
	with open(ofile, "w") as fp:
	if global_output:
	json.dump(global_output, fp, ensure_ascii=False, indent=4)


	@click.command(context_settings={"help_option_names": ["-h", "--help"]})
	@click.option("--ifile", "-i", nargs=1, help="File to parse", type=click.Path())
	@click.option(
	"--ofile", "-o", nargs=1, help="JSON parsed file to save", type=click.Path()
	)
	def cmd(ifile, ofile):
	if ifile is None:
	print("No input file selected")
	else:
	parse_sqlmap(ifile, ofile)


	if __name__ == "__main__":
	cmd()
	+-----------------+-----+------------------+-----------+------------+------------------------+
	\| cognome.email \| id \| nome \| password \| privilegio \| username \|
	+-----------------+-----+------------------+-----------+------------+------------------------+
	\| Bianchi \| 27 \| Marco \| prova \| 1 \| marco.marco \|
	\| \| 109 \| Sara \| sara \| 1 \| sara_Saretta \|
	\| \| 110 \| Giovanni \| giogi \| 4 \| giovannimichele.bianco \|
	\| \| 111 \| Roberta \| ro!@$#%b \| 1 \| roberta.dindini \|
	\| \| 116 \| Giovanni \| giò \| 1 \| giò \|
	\| \| 133 \| Paolo \| pmagazine \| 1 \| paolo.rossi \|
	+-----------------+-----+------------------+-----------+------------+------------------------+