Created
January 5, 2018 14:00
-
-
Save milothiesen/8147ed1e339aaef6b75cb857e8957fc0 to your computer and use it in GitHub Desktop.
A Python script that searches recursively through a given directory and pulls technical metadata about the video codec and wrapper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import subprocess | |
import lxml.etree as ET | |
import pprint | |
rootdir = sys.argv[1] | |
accepted_file_types = ['mov', 'mp4', 'm4v', 'wmv'] | |
files_list = [] | |
formats_list = [] | |
def dirwalker(rootdir, accepted_file_types, files_list): | |
for root, dirs, files in os.walk(rootdir): | |
for file_ in files: | |
file_extension = file_.rpartition('.')[2] | |
if file_extension in accepted_file_types: | |
path = ( os.path.join(root, file_) ) | |
files_list.append(path) | |
generate_mediainfo(files_list) | |
def generate_mediainfo(files_list): | |
for filepath in files_list: | |
print(filepath) | |
try: | |
mediainfo_output = subprocess.check_output(['mediainfo', '--Output=XML', filepath]) | |
except FileNotFoundError as e: | |
print(e) | |
except Exception as e: | |
print(e) | |
else: | |
asset_dict = {} | |
root = ET.fromstring(mediainfo_output) | |
for text in root.xpath('.//track[@type="General"]/Complete_name/text()'): | |
asset_dict['Complete_name'] = text | |
for text in root.xpath('.//track[@type="General"]/Format/text()'): | |
asset_dict['Format'] = text | |
for text in root.xpath('.//track[@type="General"]/Commercial_name/text()'): | |
asset_dict['Commerical_name'] = text | |
for text in root.xpath('.//track[@type="General"]/Format_profile/text()'): | |
asset_dict['Format_profile'] = text | |
for text in root.xpath('.//track[@type="General"]/Codec_ID/text()'): | |
asset_dict['Codec_ID'] = text | |
for text in root.xpath('.//track[@type="General"]/Overall_bit_rate_mode/text()'): | |
asset_dict['Overall_bit_rate_mode'] = text | |
for text in root.xpath('.//track[@type="General"]/Writing_library/text()'): | |
asset_dict['Writing_library'] = text | |
# pp = pprint.PrettyPrinter(indent=4) | |
# pp.pprint(asset_dict) | |
formats_list.append(asset_dict) | |
# finally: | |
# print(formats_list) | |
dirwalker(rootdir, accepted_file_types, files_list) | |
pp = pprint.PrettyPrinter(indent=4) | |
pp.pprint(formats_list) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment