Created
August 16, 2011 20:16
-
-
Save gliese1337/1150054 to your computer and use it in GitHub Desktop.
Non-incremental parser for WebVTT files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
http://www.whatwg.org/specs/web-apps/current-work/webvtt.html | |
*/ | |
function parseWebVTT(input){ | |
"use strict"; | |
var line,l,p,cue_list=[], | |
cue,cue_text,id,fields, | |
time_pat = /\s*(\d*:?[0-5]\d:[0-5]\d\.\d\d\d)\s*-->\s*(\d*:?[0-5]\d:[0-5]\d\.\d\d\d)\s*(.*)/; | |
//If the first character is a BYTE ORDER MARK (BOM) character, advance position to the next character in input. | |
l = p = +(input[0] === '\uFEFF'); | |
//Collect a sequence of characters that are not CR or LF characters. | |
while(p<input.length && input[p]!=='\r' && input[p] !=='\n'){p++;} | |
//If line is less than six characters long, then abort these steps. The file is not a WebVTT file. | |
if(p-l<6){throw new Error("Not WebVTT Data");} | |
line = input.substring(l,p); | |
//If the first six characters do not exactly equal "WEBVTT", then abort these steps. The file is not a WebVTT file. | |
//If line is more than six characters long but the seventh character is neither a U+0020 SPACE character nor a U+0009 CHARACTER TABULATION (tab) character, then abort these steps. The file is not a WebVTT file. | |
if(!/^WEBVTT([\u0020\u0009].*|$)/.test(line)){throw new Error("Not WebVTT Data");} | |
//If position is past the end of input, then jump to the step labeled end. | |
if(p>=input.length){return cue_list;} | |
do{ //Header: | |
if(input[p] === '\r'){ //Skip CR | |
//If position is past the end of input, then jump to the step labeled end. | |
if(++p>=input.length){return cue_list;} | |
} | |
if(input[p] === '\n'){//Skip LF | |
if(++p>=input.length){return cue_list;} | |
} | |
l=p; //Collect a sequence of characters that are not CR or LF characters. | |
while(input[p] !== '\r' && input[p] !== '\n'){ | |
if(++p>=input.length){return cue_list;} | |
} | |
}while(l!==p); //If line is not the empty string, then jump back to the step labeled header. | |
cue_loop: do{ | |
//Skip CR & LF characters. | |
while(input[p]==='\r' || input[p]==='\n'){ | |
if(++p>=input.length){break cue_loop;} | |
} | |
l=p; //Collect a sequence of characters that are not CR or LF characters. | |
while(input[p]!=='\r' && input[p] !=='\n'){ | |
if(++p>=input.length){break cue_loop;} | |
} | |
line = input.substring(l,p); | |
//If line does not contain "-->", treat it as an id & get a new line | |
if(line.indexOf('-->')===-1){ | |
if(input[p] === '\r'){ //Skip CR | |
if(++p>=input.length){break cue_loop;} | |
} | |
if(input[p] === '\n'){ //Skip LF | |
if(++p>=input.length){break cue_loop;} | |
} | |
l=p; //Collect a sequence of characters that are not CR or LF characters. | |
while(input[p]!=='\r' && input[p] !=='\n'){ | |
if(++p>=input.length){break cue_loop;} | |
} //If line is the empty string, jump to the step labeled cue loop. | |
if(l===p){continue cue_loop;} | |
id = line; //Let cue's text track cue identifier be the previous line. | |
line = input.substring(l,p); | |
}else{id = '';} | |
cue = { //set default cue parameters | |
id:id, text:'', | |
pause_on_exit:false, | |
wdir:'horizontal', snap:true, | |
line:'auto', position:50, | |
size:100, align:'middle' | |
}; | |
//Timings: | |
try{ //Collect WebVTT cue timings and settings from line, using cue for the results. | |
if(!(fields = time_pat.exec(line))){throw new Error("Invalid Timestamp Data");} | |
cue.start = parse_timestamp(fields[1]); | |
cue.stop = parse_timestamp(fields[2]); | |
parse_settings(cue,fields[3]); | |
} catch(e) { | |
console.log(e.stack); | |
do{ //Bad cue loop: | |
if(input[p] === '\r'){ //Skip CR | |
if(++p>=input.length){break cue_loop;} | |
} | |
if(input[p] === '\n'){ //Skip LF | |
if(++p>=input.length){break cue_loop;} | |
} | |
l=p; //Collect a sequence of characters that are not CR or LF characters. | |
while(input[p]!=='\r' && input[p] !=='\n'){ | |
if(++p>=input.length){break cue_loop;} | |
} | |
}while(l!==p);//If line is the empty string, then jump to the step labeled cue loop. | |
continue cue_loop; | |
} | |
cue_text = []; | |
do{ //Cue text loop: | |
if(input[p] === '\r'){ //Skip CR | |
//If position is past the end of input, jump to cue text processing. | |
if(++p===input.length){break;} | |
} | |
if(input[p] === '\n'){ //Skip LF | |
if(++p===input.length){break;} | |
} | |
l=p; //Collect a sequence of characters that are not CR or LF characters. | |
while(p<input.length && input[p]!=='\r' && input[p] !=='\n'){p++;} | |
if(l===p){break;} //If line is the empty string, then jump to the step labeled cue text processing. | |
//Replace all U+0000 NULL characters in input by U+FFFD REPLACEMENT CHARACTERs. | |
cue_text.push(input.substring(l,p).replace('\0','\uFFFD')); | |
}while(p<input.length); | |
//Cue text processing: | |
cue.text = cue_text.join('\n'); | |
//This where we ought to construct the cue-text DOM | |
cue_list.push(cue); //Add cue to the text track list of cues output. | |
}while(p<input.length); | |
//End: The file has ended. The WebVTT parser has finished. | |
return cue_list; | |
}; | |
function parse_timestamp(input){ | |
var ret,p,fields; | |
"use strict"; | |
if(input[0]===':'){throw new SyntaxError("Unexpected Colon");} | |
fields = input.split(/[:.]/); | |
if(fields.length===4){ | |
ret = parseInt(fields[0],10)*3600+parseInt(fields[3],10)/1000; | |
p = 1; | |
}else{ | |
ret = parseInt(fields[2],10)/1000; | |
p = 0; | |
} | |
return ret + parseInt(fields[p],10)*60 + parseInt(fields[++p],10); | |
} | |
function parse_settings(cue,input){ | |
"use strict"; | |
var match,value,number, | |
set_pat = /([ADLST]):(\S+)/g; | |
while(!!(match = set_pat.exec(input))){ | |
value = match[2]; | |
switch(match[1]){ | |
case 'A': //Alignment | |
if(value==='start' || value==='middle' || value==='end'){cue.align=value;} | |
continue; | |
case 'D': //Text direction | |
if(value === 'vertical' || value === 'vertical-lr'){cue.wdir = value;} | |
continue; | |
case 'L': //Line position | |
if(/^-?\d+%?$/.test(value)){ | |
number = parseInt(value,10); | |
if(value[value.length-1] === '%'){ //If the last character in value is % | |
if(number<0 || number>100){continue;} | |
cue.snap = false; | |
} | |
cue.line = number; | |
} | |
continue; | |
case 'S': //Text Size | |
if(/^\d+%$/.test(value)){ | |
number = parseInt(value,10); | |
if(number>=0 && number<=100){ | |
cue.size = number; | |
} | |
} | |
continue; | |
case 'T': //Text Position | |
if(/^\d+%$/.test(value)){ | |
number = parseInt(value,10); | |
if(number>=0 && number<=100){ | |
cue.position = number; | |
} | |
} | |
continue; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment