Created
November 23, 2024 07:22
-
-
Save mattetti/faf66d975a022692f199b5085c2876d9 to your computer and use it in GitHub Desktop.
.db parser script to get audio/misc content out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
import re | |
import argparse | |
import os | |
class LegacyFileParser: | |
def __init__(self, filepath, output_directory): | |
self.filepath = filepath | |
self.output_directory = output_directory | |
if not os.path.exists(self.output_directory): | |
os.makedirs(self.output_directory) | |
self.structure = [] | |
self.xml_end_index = 0 | |
def parse(self): | |
# Read the file content | |
with open(self.filepath, 'rb') as file: | |
data = file.read() | |
# Locate the end of the XML header | |
self.xml_end_index = data.find(b'</FileSystem>') + len(b'</FileSystem>') | |
if self.xml_end_index == -1: | |
print("Error: Unable to locate XML header end.") | |
return [] | |
# Skip the newline character after the XML end | |
if data[self.xml_end_index:self.xml_end_index + 1] == b'\n': | |
self.xml_end_index += 1 | |
xml_data = data[:self.xml_end_index] | |
# Try decoding XML part with 'utf-8', fallback to 'latin-1' if decoding fails | |
try: | |
decoded_xml = xml_data.decode('utf-8') | |
except UnicodeDecodeError: | |
decoded_xml = xml_data.decode('latin-1') | |
# Clean the content and add root tag | |
clean_data = f"<root>{decoded_xml}</root>" | |
clean_data = clean_data.replace('</FileSystem>', '').replace('<FileSystem>', '') | |
# Parse XML data | |
try: | |
root = ET.fromstring(clean_data) | |
except ET.ParseError as e: | |
print(f"Error parsing XML: {e}") | |
return [] | |
self.structure = self._parse_node(root) | |
return self.structure | |
def _parse_node(self, node): | |
result = [] | |
for element in node: | |
entry = { | |
'name': element.get('name'), | |
'offset': int(element.get('offset')) if element.get('offset') else None, | |
'size': int(element.get('size')) if element.get('size') else None, | |
'type': element.tag, | |
'children': self._parse_node(element) if len(element) > 0 else None | |
} | |
if element.tag == 'FILE' and entry['offset'] is not None and entry['size'] is not None: | |
self._extract_file(entry) | |
result.append(entry) | |
return result | |
def _extract_file(self, file_entry): | |
# Adjust offset to start after the XML header | |
offset = self.xml_end_index + file_entry['offset'] | |
size = file_entry['size'] | |
# Extract the data directly from the file to ensure correct offset handling | |
with open(self.filepath, 'rb') as file: | |
file.seek(offset) | |
data = file.read(size) | |
output_path = os.path.join(self.output_directory, file_entry['name']) | |
with open(output_path, 'wb') as output_file: | |
output_file.write(data) | |
print(f"Extracted: {file_entry['name']} to {output_path}") | |
def print_structure(self, structure=None, indent=0): | |
if structure is None: | |
structure = self.structure | |
for element in structure: | |
print(f"{' ' * indent}{element['type'].upper()}: {element['name']}") | |
if element['children']: | |
self.print_structure(element['children'], indent + 1) | |
# main cmd to decode a .db file | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Parse a .db file and extract embedded audio files.") | |
parser.add_argument('filepath', type=str, help="Path to the db file") | |
parser.add_argument('output_directory', type=str, nargs='?', default='extracted_files', help="Directory to store extracted files (default: 'extracted_files')") | |
args = parser.parse_args() | |
filepath = args.filepath | |
output_directory = args.output_directory | |
parser = LegacyFileParser(filepath, output_directory) | |
structure = parser.parse() | |
parser.print_structure() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment