Skip to content

Instantly share code, notes, and snippets.

@mattetti
Created November 23, 2024 07:22
Show Gist options
  • Save mattetti/faf66d975a022692f199b5085c2876d9 to your computer and use it in GitHub Desktop.
Save mattetti/faf66d975a022692f199b5085c2876d9 to your computer and use it in GitHub Desktop.
.db parser script to get audio/misc content out
import xml.etree.ElementTree as ET
import re
import argparse
import os
class LegacyFileParser:
def __init__(self, filepath, output_directory):
self.filepath = filepath
self.output_directory = output_directory
if not os.path.exists(self.output_directory):
os.makedirs(self.output_directory)
self.structure = []
self.xml_end_index = 0
def parse(self):
# Read the file content
with open(self.filepath, 'rb') as file:
data = file.read()
# Locate the end of the XML header
self.xml_end_index = data.find(b'</FileSystem>') + len(b'</FileSystem>')
if self.xml_end_index == -1:
print("Error: Unable to locate XML header end.")
return []
# Skip the newline character after the XML end
if data[self.xml_end_index:self.xml_end_index + 1] == b'\n':
self.xml_end_index += 1
xml_data = data[:self.xml_end_index]
# Try decoding XML part with 'utf-8', fallback to 'latin-1' if decoding fails
try:
decoded_xml = xml_data.decode('utf-8')
except UnicodeDecodeError:
decoded_xml = xml_data.decode('latin-1')
# Clean the content and add root tag
clean_data = f"<root>{decoded_xml}</root>"
clean_data = clean_data.replace('</FileSystem>', '').replace('<FileSystem>', '')
# Parse XML data
try:
root = ET.fromstring(clean_data)
except ET.ParseError as e:
print(f"Error parsing XML: {e}")
return []
self.structure = self._parse_node(root)
return self.structure
def _parse_node(self, node):
result = []
for element in node:
entry = {
'name': element.get('name'),
'offset': int(element.get('offset')) if element.get('offset') else None,
'size': int(element.get('size')) if element.get('size') else None,
'type': element.tag,
'children': self._parse_node(element) if len(element) > 0 else None
}
if element.tag == 'FILE' and entry['offset'] is not None and entry['size'] is not None:
self._extract_file(entry)
result.append(entry)
return result
def _extract_file(self, file_entry):
# Adjust offset to start after the XML header
offset = self.xml_end_index + file_entry['offset']
size = file_entry['size']
# Extract the data directly from the file to ensure correct offset handling
with open(self.filepath, 'rb') as file:
file.seek(offset)
data = file.read(size)
output_path = os.path.join(self.output_directory, file_entry['name'])
with open(output_path, 'wb') as output_file:
output_file.write(data)
print(f"Extracted: {file_entry['name']} to {output_path}")
def print_structure(self, structure=None, indent=0):
if structure is None:
structure = self.structure
for element in structure:
print(f"{' ' * indent}{element['type'].upper()}: {element['name']}")
if element['children']:
self.print_structure(element['children'], indent + 1)
# main cmd to decode a .db file
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Parse a .db file and extract embedded audio files.")
parser.add_argument('filepath', type=str, help="Path to the db file")
parser.add_argument('output_directory', type=str, nargs='?', default='extracted_files', help="Directory to store extracted files (default: 'extracted_files')")
args = parser.parse_args()
filepath = args.filepath
output_directory = args.output_directory
parser = LegacyFileParser(filepath, output_directory)
structure = parser.parse()
parser.print_structure()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment