|
#!/usr/bin/php |
|
<?php |
|
/** |
|
* TTML (XML) to SRT subtitle converter. |
|
* Only tested with subs from Akamai player really, but hopefully works on other stuff as well. |
|
* Author: Alexander Karlstad |
|
*/ |
|
|
|
/** |
|
*Modified to work on TTML files that use the end attribute instead of dur |
|
*/ |
|
|
|
// TTML file to parse |
|
$in = $argv[1]; |
|
|
|
$cont = file_get_contents(utf8_encode($in)); |
|
|
|
// Replace <br/>'s |
|
$cont = str_replace(['<br/>', '<br />'], "\n", $cont); |
|
$cont = str_replace(['&'], "&", $cont); |
|
$xml = simplexml_load_string($cont); |
|
|
|
$subs = $xml->body->div->p; |
|
$num = 1; |
|
|
|
foreach ($subs as $i => $sub) { |
|
$attrs = $sub->attributes(); |
|
$begin = $attrs['begin']; |
|
$end = $attrs['end']; |
|
|
|
// Do we have spans within? Typically for adding bold/italic text. |
|
if ($sub->count() > 0) { |
|
$text = $sub->asXML(); |
|
|
|
foreach ($sub->children() as $child) { |
|
$child_text = trim($child); |
|
$child_attrs = $child->attributes(); |
|
$child_style = isset($child_attrs['style']) ? $child_attrs['style'] : ''; |
|
$child_xml = $child->asXML(); |
|
|
|
if ('italic' == $child_style) { |
|
$t = "<i>{$child_text}</i>\n"; |
|
} |
|
else if ('bold' == $child_style) { |
|
$t = "<b>{$child_text}</b>\n"; |
|
} |
|
else { |
|
$t = "{$child_text}\n"; |
|
} |
|
|
|
$text = str_replace($child_xml, $t, $text); |
|
} |
|
|
|
// Only allow <b> and <i> for SRT compatibility. Don't mind <u>. |
|
$text = strip_tags($text, '<b><i>'); |
|
} |
|
else { |
|
$text = (string) $sub; |
|
} |
|
|
|
$text = trim($text); |
|
|
|
// remove weird spacings that sometimes come after a newline |
|
// due to xml formatting and >1 newlines. |
|
$text = preg_replace([',\n+[ ]+,', ',\n+,'], "\n", $text); |
|
|
|
$timecode = calc_timecode($begin, $end); |
|
|
|
// Output in Subrip format |
|
echo "${num}\n"; |
|
echo "${timecode}\n"; |
|
echo "${text}\n\n"; |
|
|
|
$num++; |
|
} |
|
|
|
// Subrip time code handling. God damn. |
|
// Written for readability. |
|
// Input: $orig - string with original start time from TTML (HH:MM:SS.XXX) |
|
// Input: $add - string with original due time from TTML, which will be added onto $orig (HH:MM:SS.XXX) |
|
|
|
function calc_timecode($orig, $add) { |
|
// Split hours, minutes, seconds and ms |
|
$orig = preg_split('/[:.,]+/', $orig); |
|
$add = preg_split('/[:.,]+/', $add); |
|
|
|
// A variable for each unit, for readability |
|
$o_h = $orig[0]; |
|
$o_m = $orig[1]; |
|
$o_s = $orig[2]; |
|
$o_ms = $orig[3]; |
|
|
|
// A variable for each unit, for readability |
|
$a_h = $add[0]; |
|
$a_m = $add[1]; |
|
$a_s = $add[2]; |
|
$a_ms = $add[3]; |
|
|
|
/* |
|
// Combine them |
|
$r_h = $o_h + $a_h; |
|
$r_m = $o_m + $a_m; |
|
$r_s = $o_s + $a_s; |
|
$r_ms = $o_ms + $a_ms; |
|
|
|
// MS needs to be lt 1000, add to $r_s if gt 1000. |
|
if (1000 <= $r_ms) { |
|
$r_s += floor($r_ms/1000); |
|
$r_ms = $r_ms%1000; |
|
} |
|
// S needs to be lt 60, add to $r_m if gt 60. |
|
if (60 <= $r_s) { |
|
$r_m += floor($r_s/60); |
|
$r_s = $r_s%60; |
|
} |
|
// M needs to be lt 60, add to $r_h if gt 60. |
|
if (60 <= $r_m) { |
|
$r_h += floor($r_m/60); |
|
$r_m = $r_m%60; |
|
} |
|
|
|
$r_h = ($r_h < 10) ? "0" . $r_h : $r_h; |
|
$r_m = ($r_m < 10) ? "0" . $r_m : $r_m; |
|
$r_s = ($r_s < 10) ? "0" . $r_s : $r_s; |
|
$r_ms = (2 == strlen($r_ms)) ? "0" . $r_ms : ((1 == strlen($r_ms)) ? "00" . $r_ms : $r_ms); |
|
*/ |
|
|
|
$o = "{$o_h}:{$o_m}:{$o_s},{$o_ms}"; |
|
$r = "{$a_h}:{$a_m}:{$a_s},{$a_ms}"; |
|
|
|
return "{$o} --> {$r}"; |
|
} |