Skip to content

Instantly share code, notes, and snippets.

@jmaddington
Last active January 5, 2024 23:24
Show Gist options
  • Save jmaddington/f701e97630140a006d40954c9b376eda to your computer and use it in GitHub Desktop.
Save jmaddington/f701e97630140a006d40954c9b376eda to your computer and use it in GitHub Desktop.
Import raw ControlOne JSON SIEM data into a Sqlite database

ControlOne has a SIEM-type JSON export. This script assumes you have saved those requests to a directory, it then looks through all files and copies the data into a sqlite database.

Super quick and dirty.

import argparse
import json
import sqlite3
import os
import hashlib
def map_keys(obj):
# Map 'date' and 'method' keys to 'the_date' and 'the_method'
if 'date' in obj:
obj['the_date'] = obj.pop('date')
if 'method' in obj:
obj['the_method'] = obj.pop('method')
# Handle nested 'src_gateway' object
src_gateway = obj.get('src_gateway', {})
if src_gateway:
obj['src_gateway_company_id'] = src_gateway.get('company_id')
obj['src_gateway_id'] = src_gateway.get('id')
obj['src_gateway_name'] = src_gateway.get('name')
del obj['src_gateway'] # Remove the nested object after mapping
# Ignore 'eid' field
obj.pop('eid', None)
return obj
def remove_lists_and_nestings(obj):
# Remove elements that are lists, and handle nested objects
return {k: v for k, v in obj.items() if not isinstance(v, list) and not isinstance(v, dict)}
def md5_hash_string(s):
return hashlib.md5(s.encode('utf-8')).hexdigest()
def create_table(cursor):
cursor.execute('''
CREATE TABLE IF NOT EXISTS data (
auto_id INTEGER PRIMARY KEY AUTOINCREMENT,
_id TEXT,
app TEXT,
appcat TEXT,
appid TEXT,
applist TEXT,
apprisk TEXT,
cat TEXT,
catdesc TEXT,
cert_issuer TEXT,
cert_name TEXT,
company_id TEXT,
company_name TEXT,
the_date TEXT,
dstcountry TEXT,
dstip TEXT,
dstport TEXT,
dstuuid TEXT,
eventtype TEXT,
hostname TEXT,
the_method TEXT,
partner_id TEXT,
poluuid TEXT,
proto TEXT,
rcvdbyte TEXT,
rcvdpkt TEXT,
region TEXT,
sec_alert TEXT,
sentbyte TEXT,
sentpkt TEXT,
service TEXT,
session_id TEXT,
src_agent_company_id TEXT,
src_agent_geolocation TEXT,
src_agent_geolocation_latlong TEXT,
src_agent_hostname TEXT,
src_agent_latlong TEXT,
src_agent_model TEXT,
src_agent_os_platform TEXT,
src_agent_os_string TEXT,
src_agent_user_email TEXT,
src_agent_user_id TEXT,
src_agent_uuid TEXT,
src_appliance TEXT,
src_gateway_company_id TEXT,
src_gateway_id TEXT,
src_gateway_name TEXT,
src_user TEXT,
src_zone_company_id TEXT,
src_zone_id TEXT,
src_zone_name TEXT,
srccountry TEXT,
srcip TEXT,
srcport TEXT,
srcuuid TEXT,
timestamp_end TEXT,
transport TEXT,
url TEXT,
md5 TEXT
)''')
# Set up argument parser
parser = argparse.ArgumentParser(description='Insert JSON data into an SQLite database from files with one JSON object per line, ignoring lists and handling nested objects.')
parser.add_argument('--directory', '-d', required=True, help='Directory containing JSON files with one JSON object per line')
parser.add_argument('--database', '-db', required=True, help='Path to the SQLite database file')
# Parse arguments
args = parser.parse_args()
# Assign arguments to variables
directory = args.directory
database_path = args.database
# Connect to the SQLite database
conn = sqlite3.connect(database_path)
cursor = conn.cursor()
# Create the table if it does not exist
create_table(cursor)
# Function to recursively walk through the directory tree
def process_directory(dir_path):
for root, dirs, files in os.walk(dir_path):
for filename in files:
file_path = os.path.join(root, filename)
# Open the file
with open(file_path, 'r') as file:
# Process each line as a separate JSON object
for line in file:
md5_hash = md5_hash_string(line)
try:
# Parse the JSON object from the line, map keys, and remove lists and nestings
json_obj = remove_lists_and_nestings(map_keys(json.loads(line)))
json_obj['md5'] = md5_hash
# Construct the parameterized INSERT statement
columns = ', '.join(json_obj.keys())
placeholders = ', '.join('?' * len(json_obj))
sql = f'INSERT INTO data ({columns}) VALUES ({placeholders})'
# Insert the data into the database
cursor.execute(sql, tuple(json_obj.values()))
except json.JSONDecodeError as e:
print(f"Error parsing JSON in file {filename}, line: {line.strip()}: {e}")
except sqlite3.IntegrityError as e:
print(f"Error inserting data from file {filename}, line: {line.strip()}: {e}")
except Exception as e:
print(f"Unexpected error: {e}")
# Commit the changes after each file
conn.commit()
# Process the provided directory recursively
process_directory(directory)
# Close the database connection
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment