Last active
December 20, 2021 12:06
-
-
Save hrishikeshrt/d38536b640d79dccb6d52fd8baeaac1a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Transliterate DCS | |
// @namespace http://www.gist.github.com/ | |
// @version 1.1 | |
// @description Transliterate IAST text from DCS into Devanagari | |
// @author Hrishikesh Terdalkar | |
// @match http://www.sanskrit-linguistics.org/dcs/index.php?contents=texte&IDTextDisplay=* | |
// @require https://raw.githubusercontent.com/sanskrit/sanscript.js/master/sanscript.js | |
// ==/UserScript== | |
(function() { | |
'use strict'; | |
const corpus_selector = document.getElementById("text_id"); | |
const options = corpus_selector.getElementsByTagName("option"); | |
for(var i=0; i < options.length; ++i) { | |
options[i].innerHTML = Sanscript.t(options[i].innerHTML.toLowerCase(), 'iast', 'devanagari'); | |
} | |
const button_1 = document.createElement('button'); | |
button_1.innerHTML = "Transliterate"; | |
corpus_selector.parentNode.insertBefore(button_1, corpus_selector.nextSibling); | |
button_1.addEventListener("click", function () { | |
transliterate_sentences(); | |
transliterate_analysis(); | |
}); | |
})(); | |
function transliterate_sentences() { | |
const display_text = document.getElementById("sentences"); | |
const sentences = display_text.getElementsByClassName("sentence_div"); | |
for(var i=0; i < sentences.length; ++i) { | |
var _html = sentences[i].innerHTML; | |
var _parts = split_text_html(_html); | |
for (var j=0; j < _parts.length; ++j) { | |
if (_parts[j] == " ") { | |
break; | |
} | |
if (_parts[j].match(/^[^&\[<]*$/g)) { | |
_parts[j] = Sanscript.t(_parts[j], 'iast', 'devanagari'); | |
} | |
} | |
_html = _parts.join(""); | |
sentences[i].innerHTML = _html; | |
} | |
} | |
function transliterate_analysis() { | |
const display_text = document.getElementById("sentences"); | |
const lemmas = display_text.getElementsByClassName("text-lemma-link"); | |
for(var i=0; i < lemmas.length; ++i) { | |
lemmas[i].innerHTML = Sanscript.t(lemmas[i].innerHTML, 'iast', 'devanagari'); | |
} | |
} | |
function split_text_html(s) { | |
var patterns = [ | |
"&[^; ]*;", // valid html token | |
"<[^>]*>", // valid html tag | |
"\\\[[^\\\]]*\\\]", // text in brackets | |
"[^&\\\[<]*" // any text until the start of next tag or token | |
]; | |
/* NOTE: | |
If the & is not used as a start of an html tag | |
(as in, there is a whitespace after it before the ";" (or no ";" at all)) | |
then that "&" will get ommitted by this | |
*/ | |
var pattern = ""; | |
for (var i=0; i < patterns.length - 1; ++i) { | |
pattern += patterns[i] + "|"; | |
} | |
pattern += patterns[i]; | |
var re = new RegExp(pattern, "g"); | |
return s.match(re) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment