Skip to content

Instantly share code, notes, and snippets.

@abarrak
Last active September 29, 2016 23:38
Show Gist options
  • Select an option

  • Save abarrak/12cc40eeb423598fe00b32e07ebb9f92 to your computer and use it in GitHub Desktop.

Select an option

Save abarrak/12cc40eeb423598fe00b32e07ebb9f92 to your computer and use it in GitHub Desktop.
Scripting Wikipedia to get RTL languages list

This shows how to obtain a list of the known languages that are written from right to lift.

Scripting is done on ISO 15924 Page on Wikipedia

Get Data Container:

var t = document.querySelector('table.wikitable');
var tb = t.getElementsByTagName('tbody');
var ch = tb[0].children;
ch = ch[0];

Define the lists:

var rtl_iso_code = []
var rtl_iso_num = []
var rtl_unicode = []
var rtl_full = []

Pupulate the lists:

for (var i = 0; i < ch.length; i++) {
  var c = ch[i];
  if (c.children[4].innerHTML == "R-to-L")
    rtl_iso_code.push(c.children[0].innerHTML);
}

for (var i = 0; i < ch.length; i++) {
  var c = ch[i];
  if (c.children[4].innerHTML == "R-to-L")
    rtl_iso_num.push(c.children[1].innerHTML);
}

for (var i = 0; i < ch.length; i++) {
  var c = ch[i];
  if (c.children[4].innerHTML == "R-to-L")
    rtl_unicode.push(c.children[3].innerHTML);
}

for (var i = 0; i < ch.length; i++) {
  var c = ch[i];
  if (c.children[4].innerHTML == "R-to-L")
    rtl_full.push(c.children[2].children[0].innerHTML);
}

The lists extracted:

for(var i = 0; i < rtl_iso_code.length; i++){
  str += rtl_iso_code[i] + " ";
}
console.log(str);

// Output:
// Adlm Arab Aran Armi Avst Cprt Egyd Egyh Hatr Hebr Hung Inds Khar Lydi Mand Mani Mend Merc Mero Narb Nbat Nkoo Orkh Palm Phli Phlp Phlv Phnx Prti Samr Sarb Syrc Syre Syrj Syrn Thaa Wole
str = ""
for(var i = 0; i < rtl_iso_num.length; i++){
  str += rtl_iso_num[i] + " ";
}
console.log(str);

// Output:
// 166 160 161 124 134 403 070 060 127 125 176 610 305 116 140 139 438 101 100 106 159 165 175 126 131 132 133 115 130 123 105 135 138 137 136 170 480
str = ""
for(var i = 0; i < rtl_full.length; i++){
  str += '"' + rtl_full[i] + '"' + ", ";
}
console.log(str);

// Output:
// "Adlam", "Arabic", "Arabic (Nastaliq variant)", "Imperial Aramaic", "Avestan", "Cypriot", "Egyptian demotic", "Egyptian hieratic", "Hatran", "Hebrew", "Old Hungarian (Hungarian Runic)", "Indus (Harappan)", "Kharoshthi", "Lydian", "Mandaic, Mandaean", "Manichaean", "Mende Kikakui", "Meroitic Cursive", "Meroitic Hieroglyphs", "Old North Arabian (Ancient North Arabian)", "Nabataean", "N’Ko", "Old Turkic, Orkhon Runic", "Palmyrene", "Inscriptional Pahlavi", "Psalter Pahlavi", "Book Pahlavi", "Phoenician", "Inscriptional Parthian", "Samaritan", "Old South Arabian", "Syriac", "Syriac (Estrangelo variant)", "Syriac (Western variant)", "Syriac (Eastern variant)", "Thaana", "Woleai"
str = ""
for(var i = 0; i < rtl_unicode.length; i++){
  str += '"' + rtl_unicode[i] + '"' + ", ";
}
console.log(str);

// Output:
// "Adlam", "Arabic", "", "Imperial Aramaic", "Avestan", "Cypriot", "", "", "Hatran", "Hebrew", "Old Hungarian", "", "Kharoshthi", "Lydian", "Mandaic", "Manichaean", "Mende Kikakui", "Meroitic Cursive", "Meroitic Hieroglyphs", "Old North Arabian", "Nabataean", "NKo", "Old Turkic", "Palmyrene", "Inscriptional Pahlavi", "Psalter Pahlavi", "", "Phoenician", "Inscriptional Parthian", "Samaritan", "Old South Arabian", "Syriac", "", "", "", "Thaana", ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment