Skip to content

Instantly share code, notes, and snippets.

@fusetim
Created April 12, 2021 08:54
Show Gist options
  • Save fusetim/5145054414582fd64b3a4750815afef5 to your computer and use it in GitHub Desktop.
Save fusetim/5145054414582fd64b3a4750815afef5 to your computer and use it in GitHub Desktop.
Converting Greek source text to LaTeX source text (using babel mapping)
use std::collections::HashMap;
macro_rules! collection {
// map-like
($($k:expr => $v:expr),* $(,)?) => {
std::iter::Iterator::collect(std::array::IntoIter::new([$(($k, $v),)*]))
};
// set-like
($($v:expr),* $(,)?) => {
std::iter::Iterator::collect(std::array::IntoIter::new([$($v,)*]))
};
}
fn main() {
println!("{}", match_char("Εἰπέ µοι, ἔφη, ὦ Εὐθύδηµε, τῷ ὄντι, ὥσπερ ἐγὼ ἀκούω, πολλὰ γράµµατα συνῆχας τῶν λεγοµένων σοφῶν ἀνδρῶν γεγονέναι; » Καὶ ὁ Εὐθύδηµος, « Νὴ τὸν Δί᾽, ἔφη, ὦ Σώκρατες: καὶ ἔτι γε συνάγω, ἕως ἂν κτήσωµαι ὡς ἂν δύνωµαι πλεῖστα. [9] — Νὴ τὴν Ἥραν, ἔφη ὁ Σωκράτης, ἄγαµαί γέ σου, διότι οὐκ ἀργυρίου καὶ χρυσίου προείλου θησαυροὺς κεκτῆσθαι µᾶλλον ἢ σοφίας: δῆλον γὰρ ὅτι νοµίζεις ἀργύριον καὶ χρυσίον οὐδὲν βελτίους ποιεῖν τοὺς ἀνθρώπους, τὰς δὲ τῶν σοφῶν ἀνδρῶν γνώµας ἀρετῇ πλουτίζειν τοὺς κεκτηµένους. » Καὶ ὁ Εὐθύδηµος ἔχαιρεν ἀκούων ταῦτα, νοµίζων δοκεῖν τῷ Σωκράτει ὀρθῶς µετιέναι τὴν σοφίαν.".to_string()));
}
fn match_char(caracter: String) -> String {
let corresp0_6: HashMap<_, _> = collection! {
0 => ">",
1 => "<",
2 => "\\`>",
3 => "\\`<",
4 => "\\'>",
5 => "\\'<",
6 => "\\~>",
7 => "\\~<",
};
let corresp7: HashMap<_, _> = collection! {
0x1F70 => "\\`a",
0x1F71 => "\\'a",
0x1F72 => "\\`e",
0x1F73 => "\\'e",
0x1F74 => "\\`h",
0x1F75 => "\\'h",
0x1F76 => "\\`i",
0x1F77 => "\\'i",
0x1F78 => "\\`u",
0x1F79 => "\\'u",
0x1F7A => "\\`w",
0x1F7B => "\\'w",
};
let correspB: HashMap<_, _> = collection! {
0x1FB0 => "a",
0x1FB1 => "a",
0x1FB2 => "\\`a|",
0x1FB3 => "a|",
0x1FB4 => "\\'a|",
0x1FB6 => "\\~a",
0x1FB7 => "\\~a|",
};
let correspC: HashMap<_, _> = collection! {
0x1FC2 => "\\`h|",
0x1FC3 => "h|",
0x1FC4 => "\\'h|",
0x1FC6 => "\\~h",
0x1FC7 => "\\~h|",
};
let correspD: HashMap<_, _> = collection! {
0x1FD0 => "i",
0x1FD1 => "i",
0x1FD2 => "\\`\"i",
0x1FD3 => "\\'\"i",
0x1FD6 => "\\~i",
0x1FD7 => "\\~\"i",
};
let correspE: HashMap<_, _> = collection! {
0x1FE0 => "u",
0x1FE1 => "u",
0x1FE2 => "\\`\"u",
0x1FE3 => "\\'\"u",
0x1FE4 => ">r",
0x1FE5 => "<r",
0x1FE6 => "\\~u",
0x1FE7 => "\\~\"u",
};
let correspF: HashMap<_, _> = collection! {
0x1FF2 => "\\`w|",
0x1FF3 => "w|",
0x1FF4 => "\\'w|",
0x1FF6 => "\\~w",
0x1FF7 => "\\~w|",
};
let correspAlphabet: HashMap<_, _> = collection! {
0x037E => "?",
0x0387 => ";",
0x03AC => "\\'a",
0x03AD => "\\'e",
0x03AE => "\\'h",
0x03AF => "\\'i",
0x03B0 => "\\'\"u",
0x03B1 => "a",
0x03B2 => "b",
0x03B3 => "g",
0x03B4 => "d",
0x03B5 => "e",
0x03B6 => "z",
0x03B7 => "h",
0x03B8 => "j",
0x03B9 => "i",
0x03BA => "k",
0x03BB => "l",
0x03BC => "m",
0x03BD => "n",
0x03BE => "x",
0x03BF => "o",
0x03C0 => "p",
0x03C1 => "r",
0x03C2 => "c",
0x03C3 => "s",
0x03C4 => "t",
0x03C5 => "u",
0x03C6 => "f",
0x03C7 => "q",
0x03C8 => "y",
0x03C9 => "w",
0x03CA => "\"i",
0x03CB => "\"u",
0x03CC => "\\'o",
0x03CD => "\\'u",
0x03CE => "\\'w",
};
let mut latex_encoded = String::new();
for char_ in caracter.chars() {
let uppercase = char_.is_uppercase();
for char_ in char_.to_lowercase().to_string().chars() {
let codepoint = char_ as u32;
match codepoint {
0x0370..=0x03FF => {
if let Some(s) = correspAlphabet.get(&codepoint) {
latex_encoded = format!(
"{}{}",
latex_encoded,
if uppercase { s.to_uppercase() } else { s.to_string() }
);
}
}
0x1F70..=0x1F7B => {
if let Some(s) = corresp7.get(&codepoint) {
latex_encoded = format!(
"{}{}",
latex_encoded,
if uppercase { s.to_uppercase() } else { s.to_string() }
);
}
}
0x1FB0..=0x1FB7 => {
if let Some(s) = correspB.get(&codepoint) {
latex_encoded = format!(
"{}{}",
latex_encoded,
if uppercase { s.to_uppercase() } else { s.to_string() }
);
}
}
0x1FC0..=0x1FC7 => {
if let Some(s) = correspC.get(&codepoint) {
latex_encoded = format!(
"{}{}",
latex_encoded,
if uppercase { s.to_uppercase() } else { s.to_string() }
);
}
}
0x1FD0..=0x1FD7 => {
if let Some(s) = correspD.get(&codepoint) {
latex_encoded = format!(
"{}{}",
latex_encoded,
if uppercase { s.to_uppercase() } else { s.to_string() }
);
}
}
0x1FE0..=0x1FE7 => {
if let Some(s) = correspE.get(&codepoint) {
latex_encoded = format!(
"{}{}",
latex_encoded,
if uppercase { s.to_uppercase() } else { s.to_string() }
);
}
}
0x1FF0..=0x1FF7 => {
if let Some(s) = correspF.get(&codepoint) {
latex_encoded = format!(
"{}{}",
latex_encoded,
if uppercase { s.to_uppercase() } else { s.to_string() }
);
}
}
0x1F00..=0x1F6F => {
let letter = match codepoint {
0x1F00..= 0x1F0F => "a",
0x1F10..= 0x1F1F => "e",
0x1F20..= 0x1F2F => "h",
0x1F30..= 0x1F3F => "i",
0x1F40..= 0x1F4F => "o",
0x1F50..= 0x1F5F => "u",
0x1F60..= 0x1F6F => "w",
_ => "",
};
if let Some(s) = corresp0_6.get(&(codepoint & 0x000F)) {
latex_encoded = format!(
"{}{}{}",
latex_encoded,
s,
if uppercase { letter.to_uppercase() } else { letter.to_string() }
);
}
}
0x1F80..=0x1FAF => {
let letter = match codepoint {
0x1F80..= 0x1F8F => "a",
0x1F90..= 0x1F9F => "h",
0x1FA0..= 0x1FAF => "w",
_ => "",
};
if let Some(s) = corresp0_6.get(&(codepoint & 0x000F)) {
latex_encoded = format!(
"{}{}{}",
latex_encoded,
s,
if uppercase { letter.to_uppercase() } else { letter.to_string() }
);
}
}
_ => {
latex_encoded = format!(
"{}{}",
latex_encoded,
if uppercase { char_.to_uppercase().to_string() } else { char_.to_string() }
);
}
}
}
}
latex_encoded
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment