Last active
September 23, 2024 09:59
-
-
Save eruffaldi/4dc2c9d51cd4107997f0e5241edbc866 to your computer and use it in GitHub Desktop.
Dump MP4 Atoms Structure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Emanuele Ruffaldi 2024 | |
import mmap | |
import json | |
import os | |
import struct | |
import sys | |
def extract_boxes(mp4_file,offset=0,size=None): | |
""" | |
Parse the MP4 file and extract all boxes. | |
""" | |
size = len(mp4_file) if size is None else size | |
loffset = 0 | |
while size-loffset >= 8: | |
box_header = mp4_file[offset:offset+8] | |
box_size, box_type = struct.unpack('>I4s', box_header) | |
yield offset+8,box_size-8,box_type.decode("latin1") | |
offset = offset + box_size | |
loffset = loffset + box_size | |
def dump_mp4(mmapped_file,offset,size,prefix): | |
# Add more containers here if needed | |
containers = ["moov","udta","trak","edts","mdia","minf","stbl","dinf","gmhd","clip","matt"] | |
for box_offset,box_size,box_type in extract_boxes(mmapped_file,offset,size): | |
if box_type in containers: | |
dump_mp4(mmapped_file,box_offset,box_size,prefix + "/" + box_type + "@" + str(box_offset)) | |
else: | |
print(prefix + "/" + box_type,box_offset,box_size) | |
# Function to parse MP4 and generate an index for H.265 data | |
def process_mp4(input_file): | |
# Memory-map the MP4 file for efficient reading | |
with open(input_file, 'rb') as f: | |
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) | |
dump_mp4(mmapped_file,0,len(mmapped_file),"") | |
process_mp4(sys.argv[1]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment