|
// ==UserScript== |
|
// @name Sonix parser |
|
// @namespace |
|
// @version 0.2 |
|
// @description Parse SRT from sonix transcription |
|
// @author Cabbage Storm |
|
// @match https://sonix.ai/resources/* |
|
// @grant none |
|
// @copyright 2019, Newt300 (https://openuserjs.org//users/Newt300) |
|
// @license MIT |
|
// ==/UserScript== |
|
|
|
let sonixParser = {}; |
|
|
|
sonixParser.parse = function() { |
|
let arr = []; |
|
document.querySelectorAll(".sonix--transcript-exchange").forEach(function(e) { |
|
let curr = { |
|
time: e.querySelector('.sonix--transcript-exchange-timestamp').getAttribute('data-ts'), |
|
speakerName: e.querySelector('.sonix--transcript-exchange-speaker-name').innerText, |
|
text: e.querySelector('.sonix--transcript-exchange-text').innerText, |
|
speach: [] |
|
} |
|
e.querySelectorAll('.sonix--transcript-exchange-text span').forEach(function(el) { |
|
curr.speach.push({ |
|
time: el.getAttribute('data-ts'), |
|
text: el.innerText |
|
}); |
|
}); |
|
arr.push(curr); |
|
}) |
|
sonixParser.parsedJSON = arr; |
|
console.log(arr); |
|
} |
|
|
|
function prepare(options) { |
|
options = options ? options : {}; |
|
options.lines = options.lines ? options.lines : 1; |
|
options.lineLength = options.lineLength ? options.lineLength : 36; |
|
let data = sonixParser.parsedJSON; |
|
let tempStrData = []; |
|
let sentences = []; |
|
|
|
data.forEach(function (e) { |
|
let currSent = ''; |
|
let curSentObj = {}; |
|
e.speach.forEach(function (t) { |
|
if (!curSentObj.from) curSentObj.from = secondsToHms(t.time); |
|
currSent += ' ' + t.text; |
|
curSentObj.lines = curSentObj.lines ? curSentObj.lines : 1; |
|
|
|
let currLine = currSent.split('\n')[curSentObj.lines - 1]; |
|
if (t.text.match(/(\.|\?|!)/) && currLine.length > 10 || currLine.length > options.lineLength) { |
|
if (curSentObj.lines >= options.lines) { |
|
curSentObj.to = secondsToHms(t.time); |
|
curSentObj.text = currSent.replace(/ +/g,' ').trim(); |
|
|
|
if (options.named) { |
|
curSentObj.text = e.speakerName.toUpperCase() |
|
+ (options.nameOnAnotherLine ? ':\n' : ': ') |
|
+ curSentObj.text; |
|
} |
|
|
|
sentences.push(curSentObj); |
|
currSent = ''; |
|
curSentObj = {}; |
|
} else { |
|
currSent += '\n'; |
|
curSentObj.lines++; |
|
} |
|
} |
|
}) |
|
}); |
|
// console.log(sentences); |
|
return sentences; |
|
} |
|
|
|
sonixParser.getSRT = function(options) { |
|
sonixParser.parse(); |
|
let data = prepare(options); |
|
let SRTstring = ''; |
|
|
|
data.forEach(function (e, i, arr) { |
|
SRTstring += (i + 1) + '\n'; |
|
SRTstring += e.from + ' --> ' + (arr[i+1] ? arr[i+1].from : e.to) + '\n'; |
|
SRTstring += e.text + '\n\n'; |
|
|
|
}); |
|
|
|
return SRTstring; |
|
} |
|
|
|
sonixParser.getSRTByReplies = function(options) { |
|
sonixParser.parse(); |
|
let data = sonixParser.parsedJSON; |
|
let SRTstring = ''; |
|
|
|
data.forEach(function (e, i, arr) { |
|
SRTstring += (i + 1) + '\n'; |
|
SRTstring += secondsToHms(e.time) + ' --> ' + (arr[i+1] ? secondsToHms(arr[i+1].time) : secondsToHms(e.time + 10)) + '\n'; |
|
if (options.named) { |
|
SRTstring += e.speakerName.toUpperCase() |
|
+ (options.nameOnAnotherLine ? ':\n' : ': '); |
|
} |
|
SRTstring += e.text + '\n\n'; |
|
}) |
|
return SRTstring; |
|
} |
|
|
|
|
|
|
|
function secondsToHms(d) { |
|
var mill = d.split('.')[1] |
|
d = Number(d); |
|
var h = Math.floor(d / 3600); |
|
var m = Math.floor(d % 3600 / 60); |
|
var s = Math.floor(d % 3600 % 60); |
|
|
|
return ('0' + h).slice(-2) + ':' + ('0' + m).slice(-2) + ':' + ('0' + s).slice(-2) + ',' + ((mill ? mill : '000') + '000').slice(0,3); |
|
} |
|
|
|
window.sonixParser = sonixParser; |