Skip to content

Instantly share code, notes, and snippets.

@Timur00Kh
Last active January 4, 2019 23:19
Show Gist options
  • Save Timur00Kh/179de9424bc3dc3329854f512eae093b to your computer and use it in GitHub Desktop.
Save Timur00Kh/179de9424bc3dc3329854f512eae093b to your computer and use it in GitHub Desktop.
Parse SRT from sonix transcription

How does it work?

I was too lazy to create an interface so all interaction with the script takes place through the console.

  1. Inject script. (for example using Tampermonkey)
  2. Open the console in browser (F12)
  3. Write sonixParser.getSRT();
  4. Copy text from the console and save as .srt in text editor

Options

sonixParser.getSRT({
	lines: 3, // the number of strings in the subtitles. DEFAULT: 1
	lineLength: 50, // max length of strings in the subtitles. DEFAULT: 36
	named: true, // show speaker name. DEFAULT: false
	nameOnAnotherLine: true // DEFAULT: false
})

https://sonix.ai/

// ==UserScript==
// @name Sonix parser
// @namespace
// @version 0.2
// @description Parse SRT from sonix transcription
// @author Cabbage Storm
// @match https://sonix.ai/resources/*
// @grant none
// @copyright 2019, Newt300 (https://openuserjs.org//users/Newt300)
// @license MIT
// ==/UserScript==
let sonixParser = {};
sonixParser.parse = function() {
let arr = [];
document.querySelectorAll(".sonix--transcript-exchange").forEach(function(e) {
let curr = {
time: e.querySelector('.sonix--transcript-exchange-timestamp').getAttribute('data-ts'),
speakerName: e.querySelector('.sonix--transcript-exchange-speaker-name').innerText,
text: e.querySelector('.sonix--transcript-exchange-text').innerText,
speach: []
}
e.querySelectorAll('.sonix--transcript-exchange-text span').forEach(function(el) {
curr.speach.push({
time: el.getAttribute('data-ts'),
text: el.innerText
});
});
arr.push(curr);
})
sonixParser.parsedJSON = arr;
console.log(arr);
}
function prepare(options) {
options = options ? options : {};
options.lines = options.lines ? options.lines : 1;
options.lineLength = options.lineLength ? options.lineLength : 36;
let data = sonixParser.parsedJSON;
let tempStrData = [];
let sentences = [];
data.forEach(function (e) {
let currSent = '';
let curSentObj = {};
e.speach.forEach(function (t) {
if (!curSentObj.from) curSentObj.from = secondsToHms(t.time);
currSent += ' ' + t.text;
curSentObj.lines = curSentObj.lines ? curSentObj.lines : 1;
let currLine = currSent.split('\n')[curSentObj.lines - 1];
if (t.text.match(/(\.|\?|!)/) && currLine.length > 10 || currLine.length > options.lineLength) {
if (curSentObj.lines >= options.lines) {
curSentObj.to = secondsToHms(t.time);
curSentObj.text = currSent.replace(/ +/g,' ').trim();
if (options.named) {
curSentObj.text = e.speakerName.toUpperCase()
+ (options.nameOnAnotherLine ? ':\n' : ': ')
+ curSentObj.text;
}
sentences.push(curSentObj);
currSent = '';
curSentObj = {};
} else {
currSent += '\n';
curSentObj.lines++;
}
}
})
});
// console.log(sentences);
return sentences;
}
sonixParser.getSRT = function(options) {
sonixParser.parse();
let data = prepare(options);
let SRTstring = '';
data.forEach(function (e, i, arr) {
SRTstring += (i + 1) + '\n';
SRTstring += e.from + ' --> ' + (arr[i+1] ? arr[i+1].from : e.to) + '\n';
SRTstring += e.text + '\n\n';
});
return SRTstring;
}
sonixParser.getSRTByReplies = function(options) {
sonixParser.parse();
let data = sonixParser.parsedJSON;
let SRTstring = '';
data.forEach(function (e, i, arr) {
SRTstring += (i + 1) + '\n';
SRTstring += secondsToHms(e.time) + ' --> ' + (arr[i+1] ? secondsToHms(arr[i+1].time) : secondsToHms(e.time + 10)) + '\n';
if (options.named) {
SRTstring += e.speakerName.toUpperCase()
+ (options.nameOnAnotherLine ? ':\n' : ': ');
}
SRTstring += e.text + '\n\n';
})
return SRTstring;
}
function secondsToHms(d) {
var mill = d.split('.')[1]
d = Number(d);
var h = Math.floor(d / 3600);
var m = Math.floor(d % 3600 / 60);
var s = Math.floor(d % 3600 % 60);
return ('0' + h).slice(-2) + ':' + ('0' + m).slice(-2) + ':' + ('0' + s).slice(-2) + ',' + ((mill ? mill : '000') + '000').slice(0,3);
}
window.sonixParser = sonixParser;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment