Created
August 28, 2015 04:04
-
-
Save zerolagtime/b0602d4f6f436369b207 to your computer and use it in GitHub Desktop.
Convert K3B audio projects into M4A files suitable for use on Apple devices that cannot handle numerous audio file formats.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Copyright 2015 by Charlie Todd except where noted below. | |
# You are granted permission to adapt this gist for your own project, | |
# but not to claim the work as entirely your own. Attribution must | |
# be provided in any derivative works in such a fashion that the | |
# final user of the software can see and understand the attribution. | |
import sys | |
import zipfile | |
import xml | |
import xml.dom.minidom | |
import re | |
import os.path | |
import subprocess | |
import logging | |
from mutagenx.easymp4 import EasyMP4 | |
logging.basicConfig(level=logging.INFO) | |
# logs below warning get show immediately | |
# http://stackoverflow.com/questions/107705/disable-output-buffering | |
# courtesy http://mail.python.org/pipermail/tutor/2003-November/026645.html | |
class Unbuffered(object): | |
def __init__(self, stream): | |
self.stream = stream | |
def write(self, data): | |
self.stream.write(data) | |
self.stream.flush() | |
def __getattr__(self, attr): | |
return getattr(self.stream, attr) | |
sys.stdout = Unbuffered(sys.stdout) | |
# inFile = "/data/hland/2015/09/23/12sep/audiocd_20091003-24.k3b" | |
try: | |
inFile = sys.argv[1] | |
except: | |
logging.error("Usage: " + sys.argv[0] + " [k3b_file] {out_directory}" ) | |
exit( 1 ) | |
try: | |
outPath = sys.argv[2] | |
except: | |
outPath = "prepped" | |
globalTags = {'artist':'HLand', 'genre': 'Harvestland', 'room':'2nd-3rd'} | |
######## methods ########## | |
def handleContent(dom): | |
"""Main method of parsing the XML tree from K3B. Drives conversion | |
and tagging. | |
""" | |
n=1 | |
alreadySeen = {} | |
for trk in dom.getElementsByTagName("track"): | |
trackInfo = handleTrack(trk) | |
trackInfo["number"] = ("%02d" % n) | |
trackInfo.update( extractDate( inFile ) ) | |
if not os.path.exists(trackInfo["source"]): | |
logging.warning("Source file " + trackInfo["source"] + | |
" does not exist. Skipping.") | |
else: | |
# exclude tracks that meet specific patterns | |
if re.search(r'(/master/full|/master/trax|activity)', | |
trackInfo["source"]): | |
logging.info("Skipping " + trackInfo["source"] + " skipped.") | |
# exclude tracks that are listed multiple times | |
elif alreadySeen.get( trackInfo["source"]): | |
logging.info("Skipping duplicate track " + trackInfo["source"]) | |
else: | |
trackInfo["destination"] = os.path.join(outPath, trackInfo["number"] + | |
"-" + os.path.basename( trackInfo["destination"] ) ) | |
if convertAudio( trackInfo ): | |
tagAudio( trackInfo ) | |
logging.debug( trackInfo ) | |
alreadySeen[ trackInfo["source"] ] = "yes" | |
n+=1 | |
def handleTrack(track): | |
"""Extract the main elements out of the XML tree | |
""" | |
sourceFile = handleFile(track.getElementsByTagName("file")[0]) | |
trackTitle = handleCdText(track.getElementsByTagName("cd-text")[0]) | |
destFile = os.path.basename( filenameExtensionChange(sourceFile, "m4a" ) ) | |
return {'source': sourceFile, | |
'destination': destFile, | |
'title': trackTitle } | |
def handleFile(file): | |
if file.hasAttribute("url"): | |
url=file.getAttribute("url") | |
return url | |
else: | |
return 'no file' | |
def handleCdText(cdtext): | |
try: | |
return handleTitle(cdtext.getElementsByTagName("title")[0]) | |
except: | |
return 'no url' | |
def handleTitle(title): | |
titleStr=getText(title.childNodes) | |
return titleStr | |
def getText(nodelist): | |
rc = [] | |
for node in nodelist: | |
if node.nodeType == node.TEXT_NODE: | |
rc.append(node.data) | |
return ''.join(rc) | |
def filenameExtensionChange(path,ext): | |
return re.sub(r'\.([A-Za-z0-9]{1,5}$)', | |
"." + ext, | |
path) | |
def replacePath(path, newpath): | |
return re.sub(r'^.*/', newpath + "/", path) | |
def extractDate(path): | |
""" | |
Find the date in the path. A "room" number may be part of the path. | |
Format is YYYY/MM/room/DDmon or YYYY/MM/DDmon. A dict is returns with | |
elements: year, month, room, and day. | |
Example: 2015/09/23/12sep | |
""" | |
m = re.search(r'(?P<year>\d\d\d\d)/(?P<month>\d\d)(?:/(?P<room>23|k1|24))?/(?P<day>\d\d)\w\w\w',path) | |
if m: | |
return m.groupdict() | |
return {} | |
def convertAudio(trackInfo): | |
""" | |
convert audio to MPEG-4 (m4a container), using the avconv program | |
""" | |
tmout=45 # seconds | |
logging.info("Converting " + trackInfo["source"] + " to " + | |
trackInfo["destination"] + " with avconv") | |
if os.path.exists( trackInfo["destination"]): | |
logging.warn("Deleting previous copy of " + trackInfo["destination"]) | |
os.unlink(trackInfo["destination"]) | |
try: | |
subprocess.check_call([ 'avconv', '-loglevel', 'error', | |
'-hide_banner', '-nostdin', '-i', | |
trackInfo["source"], '-b:a', '128', | |
trackInfo["destination"] ], | |
timeout=tmout, stdout=subprocess.DEVNULL) | |
return True | |
except subprocess.CalledProcessError as e: | |
logging.error("Audio conversion of " + trackInfo["source"] + " to " + | |
trackInfo["destination"] + " failed (exit code " + | |
str(e.returncode) + ")") | |
logging.debug("Failed command was " + e.cmd) | |
return False | |
except subprocess.TimeoutExpired as t: | |
logging.error("Audio conversion of " + trackInfo["source"] + " to " + | |
trackInfo["destination"] + " failed to finish in " + tmout + | |
" seconds.") | |
return False | |
except: | |
logging.error("An unexpected problem happened while converting " + | |
trackInfo["source"] + " to " + trackInfo["destination"]) | |
return False | |
def tagAudio(trackInfo): | |
""" | |
Using the mutagenx package, tag the media with information from | |
the trackInfo dict and the globalTags dict. | |
""" | |
logging.info("Writing tag information to " + trackInfo["destination"]) | |
audioTags = EasyMP4(trackInfo["destination"]) | |
audioTags['title'] = trackInfo["title"] | |
audioTags['album'] = "%4d-%02d-%02d %s" % (int(trackInfo["year"]), | |
int(trackInfo["month"]), int(trackInfo["day"]), globalTags["room"]) | |
audioTags['artist'] = globalTags["artist"] | |
audioTags['genre'] = globalTags["genre"] | |
audioTags['date'] = trackInfo["year"] | |
audioTags['tracknumber'] = trackInfo["number"] | |
audioTags.save() | |
return '' | |
##### Main program ##### | |
if not zipfile.is_zipfile(inFile): | |
logging.error("%s is NOT a K3B file. Skipping.\n" % inFile) | |
exit( 1 ) | |
try: | |
zipObj = zipfile.ZipFile( inFile, 'r') | |
except: | |
logging.error("Error opening %s as a zip file. Check file type." % inFile) | |
exit( 2 ) | |
try: | |
xmlFilePointer = zipObj.open("maindata.xml","r") | |
except: | |
logging.error("Couldn't find maindata.xml in %s. Is it a K3B file?" % inFile) | |
exit( 2 ) | |
dom = xml.dom.minidom.parse(xmlFilePointer) | |
if dom.getElementsByTagName("k3b_audio_project").length > 0: | |
if not os.path.exists( outPath ): | |
os.mkdir( outPath ) | |
handleContent(dom) | |
elif os.path.isdir( outPath ): | |
handleContent(dom) | |
else: | |
logging.error("Cannot write output to the directory " + outPath) | |
else: | |
logging.error("This utility only works on K3B Audio projects") | |
zipObj.close() | |
logging.info("Exiting successfully") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment