vdcrim · December 12, 2015 09:18
diff --git a/mkv_suid_duration.py b/mkv_suid_duration.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """Parse SegmentUID and Duration on Matroska files, for ordered chapters

 Usage (Python 2 or 3):
  $python mkv_suid_duration.py file1 [file2...]
  $python mkv_suid_duration.py directory


 Copyright (C) 2013  Diego Fernández Gosende (dfgosende [at] gmail [dot] com)
 GPL v3 or later <http://www.gnu.org/licenses/gpl-3.0.html>
 """

 from __future__ import print_function
 import os.path
 import binascii
 import struct

 def parse_files(paths):
    """Parse SegmentUID and Duration on Matroska files"""
    mkv_dict = {}
    chunk_size = 100000 # 100 kB
    for path in (path for path in paths if os.path.isfile(path)):
        with open(path, 'rb') as file:
            if file.read(4) != b'\x1A\x45\xDF\xA3': # not a Matroska file
                continue
            basename = os.path.basename(path)
            mkv_dict[basename] = {'suid': None, 'duration': None}
            suid = tcscale = duration = False
            i = 0
            while True:
                if suid and tcscale and duration:
                    break
                bin = file.read(chunk_size)
                if not bin:
                    break
                suid_pos = bin.find(b'\x73\xA4\x90') # \x90 -> 16 bytes
                if suid_pos != -1:
                    suid_pos = 4 + i * chunk_size + suid_pos + 3
                    file.seek(suid_pos)
                    suid = binascii.hexlify(file.read(16)).decode()
                    mkv_dict[basename]['suid'] = suid
                tcscale_pos = bin.find(b'\x2A\xD7\xB1')
                if tcscale_pos != -1:
                    tcscale_pos = 4 + i * chunk_size + tcscale_pos + 3
                    file.seek(tcscale_pos)
                    tcscale_len = get_data_len(file.read(1))
                    tcscale = int(binascii.hexlify(file.read(tcscale_len)), 16)
                duration_pos = bin.find(b'\x44\x89\x84') # float (4 bytes)
                if duration_pos != -1:
                    duration_pos = 4 + i * chunk_size + duration_pos + 3
                    file.seek(duration_pos)
                    duration = struct.unpack('>f', file.read(4))[0]
                if not duration:
                    duration_pos = bin.find(b'\x44\x89\x88') # double (8 bytes)
                    if duration_pos != -1:
                        duration_pos = 4 + i * chunk_size + duration_pos + 3
                        file.seek(duration_pos)
                        duration = struct.unpack('>d', file.read(8))[0]                       
                if bin.find(b'\x1F\x43\xB6\x75') != -1:
                    # segment info should be before the clusters
                    break
                i += 1
        if tcscale and duration:
            mkv_dict[basename]['duration'] = ms2str(
                                                duration * tcscale / 1000000)
    return mkv_dict

 def get_data_len(byte):
    """Get the length (bytes) of the element data"""
    n = ord(byte)
    mask = 0b10000000
    while not n & mask:
        mask >>= 1
    return n & ~mask

 def ms2str(ms):
    """Convert ms to a 'hh:mm:ss.mmm' string"""
    s, ms = divmod(ms, 1000)
    m, s = divmod(s, 60)
    h, m = divmod(m, 60)
    return '{0:02d}:{1:02d}:{2:02d}.{3:03d}'.format(
                                               *[int(i) for i in (h, m, s, ms)])

 if __name__ == '__main__':
    import os
    import sys
    import glob
    
    def decode_arg(arg):
        if sys.version_info[0] >= 3:
            arg = os.fsencode(arg)
        return arg.decode(sys.stdin.encoding)
    
    if len(sys.argv) > 1:
        arg1 = decode_arg(sys.argv[1])
        if os.path.isdir(arg1):
            paths = glob.iglob(os.path.join(arg1, '*.mkv'))
        else:
            paths = (decode_arg(path) for path in sys.argv[1:])
        mkv_dict = parse_files(paths)
        for file in mkv_dict:
            print(mkv_dict[file]['suid'], mkv_dict[file]['duration'], file[:30])
    else:
        print('A file(s) or directory is needed!')
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""Parse SegmentUID and Duration on Matroska files, for ordered chapters

	Usage (Python 2 or 3):
	$python mkv_suid_duration.py file1 [file2...]
	$python mkv_suid_duration.py directory


	Copyright (C) 2013 Diego Fernández Gosende (dfgosende [at] gmail [dot] com)
	GPL v3 or later <http://www.gnu.org/licenses/gpl-3.0.html>
	"""

	from __future__ import print_function
	import os.path
	import binascii
	import struct

	def parse_files(paths):
	"""Parse SegmentUID and Duration on Matroska files"""
	mkv_dict = {}
	chunk_size = 100000 # 100 kB
	for path in (path for path in paths if os.path.isfile(path)):
	with open(path, 'rb') as file:
	if file.read(4) != b'\x1A\x45\xDF\xA3': # not a Matroska file
	continue
	basename = os.path.basename(path)
	mkv_dict[basename] = {'suid': None, 'duration': None}
	suid = tcscale = duration = False
	i = 0
	while True:
	if suid and tcscale and duration:
	break
	bin = file.read(chunk_size)
	if not bin:
	break
	suid_pos = bin.find(b'\x73\xA4\x90') # \x90 -> 16 bytes
	if suid_pos != -1:
	suid_pos = 4 + i * chunk_size + suid_pos + 3
	file.seek(suid_pos)
	suid = binascii.hexlify(file.read(16)).decode()
	mkv_dict[basename]['suid'] = suid
	tcscale_pos = bin.find(b'\x2A\xD7\xB1')
	if tcscale_pos != -1:
	tcscale_pos = 4 + i * chunk_size + tcscale_pos + 3
	file.seek(tcscale_pos)
	tcscale_len = get_data_len(file.read(1))
	tcscale = int(binascii.hexlify(file.read(tcscale_len)), 16)
	duration_pos = bin.find(b'\x44\x89\x84') # float (4 bytes)
	if duration_pos != -1:
	duration_pos = 4 + i * chunk_size + duration_pos + 3
	file.seek(duration_pos)
	duration = struct.unpack('>f', file.read(4))[0]
	if not duration:
	duration_pos = bin.find(b'\x44\x89\x88') # double (8 bytes)
	if duration_pos != -1:
	duration_pos = 4 + i * chunk_size + duration_pos + 3
	file.seek(duration_pos)
	duration = struct.unpack('>d', file.read(8))[0]
	if bin.find(b'\x1F\x43\xB6\x75') != -1:
	# segment info should be before the clusters
	break
	i += 1
	if tcscale and duration:
	mkv_dict[basename]['duration'] = ms2str(
	duration * tcscale / 1000000)
	return mkv_dict

	def get_data_len(byte):
	"""Get the length (bytes) of the element data"""
	n = ord(byte)
	mask = 0b10000000
	while not n & mask:
	mask >>= 1
	return n & ~mask

	def ms2str(ms):
	"""Convert ms to a 'hh:mm:ss.mmm' string"""
	s, ms = divmod(ms, 1000)
	m, s = divmod(s, 60)
	h, m = divmod(m, 60)
	return '{0:02d}:{1:02d}:{2:02d}.{3:03d}'.format(
	*[int(i) for i in (h, m, s, ms)])

	if __name__ == '__main__':
	import os
	import sys
	import glob

	def decode_arg(arg):
	if sys.version_info[0] >= 3:
	arg = os.fsencode(arg)
	return arg.decode(sys.stdin.encoding)

	if len(sys.argv) > 1:
	arg1 = decode_arg(sys.argv[1])
	if os.path.isdir(arg1):
	paths = glob.iglob(os.path.join(arg1, '*.mkv'))
	else:
	paths = (decode_arg(path) for path in sys.argv[1:])
	mkv_dict = parse_files(paths)
	for file in mkv_dict:
	print(mkv_dict[file]['suid'], mkv_dict[file]['duration'], file[:30])
	else:
	print('A file(s) or directory is needed!')