Last active
February 2, 2022 10:07
-
-
Save IlanFrumer/3ee28256f0e340f924f3a82d97725835 to your computer and use it in GitHub Desktop.
Transform peshitta into hebrew letters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// npm install fast-glob | |
// wget https://github.com/ETCBC/peshitta/archive/refs/tags/v0.5.zip && unzip v0.5.zip | |
const fg = require("fast-glob"); | |
const path = require("path"); | |
const fs = require("fs"); | |
const SyriacMap = { | |
ܐ: /* Aleph */ "א", | |
ܒ: /* Beth */ "ב", | |
ܓ: /* Gimel */ "ג", | |
ܕ: /* Daleth */ "ד", | |
ܗ: /* He */ "ה", | |
ܘ: /* Vav */ "ו", | |
ܙ: /* Zayin */ "ז", | |
ܚ: /* Het */ "ח", | |
ܛ: /* Tet */ "ט", | |
ܝ: /* Yod */ "י", | |
ܟ: /* Kaf */ "כ", | |
ܠ: /* Lemed */ "ל", | |
ܡ: /* Mem */ "מ", | |
ܢ: /* Nun */ "נ", | |
ܣ: /* Samekh */ "ס", | |
ܤ: /* Samekh */ "ס", | |
ܥ: /* Ayin */ "ע", | |
ܦ: /* Pe */ "פ", | |
ܨ: /* Tsadi */ "צ", | |
ܩ: /* Qof */ "ק", | |
ܪ: /* Resh */ "ר", | |
ܫ: /* Shin */ "ש", | |
ܬ: /* Tav */ "ת", | |
}; | |
const finalFormMap = { | |
מ: /** Mem */ "ם", | |
נ: /** Nun */ "ן", | |
צ: /** Tsadi */ "ץ", | |
פ: /** Pe */ "ף", | |
כ: /** Kaf */ "ך", | |
}; | |
const source_dir = "./peshitta-0.5/plain"; | |
const target_dir = "./peshitta"; | |
async function main() { | |
await fs.promises.mkdir(target_dir, { recursive: true }); | |
const source = path.resolve(source_dir, "**/*.txt"); | |
const files = await fg(source); | |
const specialcharsMap = new Map(); | |
for (const origin of files) { | |
const filename = path.basename(origin); | |
let body = await fs.promises.readFile(origin, "utf8"); | |
const target = path.join(target_dir, filename); | |
let last = ""; | |
body = body | |
.split("") | |
.map((char) => { | |
let out = SyriacMap[char]; | |
if (!out) { | |
const code = char.codePointAt(0); | |
const hex = code.toString(16); | |
if (code > 255) { | |
const item = specialcharsMap.get(char) ?? { hex, count: 0 }; | |
item.count++; | |
specialcharsMap.set(char, item); | |
out = ""; | |
} else { | |
out = char; | |
} | |
} | |
last = char; | |
return out; | |
}) | |
.join("") | |
.replace( | |
/([מנצפכ])([^\u05D0-\u05EA])/g, | |
(_m, m1, m2) => finalFormMap[m1] + m2 | |
); | |
await fs.promises.writeFile(target, body); | |
} | |
console.log("specialcharsMap: ", specialcharsMap); | |
} | |
main().catch((e) => { | |
console.log(e); | |
process.exit(1); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment