Created
April 12, 2021 08:54
-
-
Save fusetim/5145054414582fd64b3a4750815afef5 to your computer and use it in GitHub Desktop.
Converting Greek source text to LaTeX source text (using babel mapping)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashMap; | |
macro_rules! collection { | |
// map-like | |
($($k:expr => $v:expr),* $(,)?) => { | |
std::iter::Iterator::collect(std::array::IntoIter::new([$(($k, $v),)*])) | |
}; | |
// set-like | |
($($v:expr),* $(,)?) => { | |
std::iter::Iterator::collect(std::array::IntoIter::new([$($v,)*])) | |
}; | |
} | |
fn main() { | |
println!("{}", match_char("Εἰπέ µοι, ἔφη, ὦ Εὐθύδηµε, τῷ ὄντι, ὥσπερ ἐγὼ ἀκούω, πολλὰ γράµµατα συνῆχας τῶν λεγοµένων σοφῶν ἀνδρῶν γεγονέναι; » Καὶ ὁ Εὐθύδηµος, « Νὴ τὸν Δί᾽, ἔφη, ὦ Σώκρατες: καὶ ἔτι γε συνάγω, ἕως ἂν κτήσωµαι ὡς ἂν δύνωµαι πλεῖστα. [9] — Νὴ τὴν Ἥραν, ἔφη ὁ Σωκράτης, ἄγαµαί γέ σου, διότι οὐκ ἀργυρίου καὶ χρυσίου προείλου θησαυροὺς κεκτῆσθαι µᾶλλον ἢ σοφίας: δῆλον γὰρ ὅτι νοµίζεις ἀργύριον καὶ χρυσίον οὐδὲν βελτίους ποιεῖν τοὺς ἀνθρώπους, τὰς δὲ τῶν σοφῶν ἀνδρῶν γνώµας ἀρετῇ πλουτίζειν τοὺς κεκτηµένους. » Καὶ ὁ Εὐθύδηµος ἔχαιρεν ἀκούων ταῦτα, νοµίζων δοκεῖν τῷ Σωκράτει ὀρθῶς µετιέναι τὴν σοφίαν.".to_string())); | |
} | |
fn match_char(caracter: String) -> String { | |
let corresp0_6: HashMap<_, _> = collection! { | |
0 => ">", | |
1 => "<", | |
2 => "\\`>", | |
3 => "\\`<", | |
4 => "\\'>", | |
5 => "\\'<", | |
6 => "\\~>", | |
7 => "\\~<", | |
}; | |
let corresp7: HashMap<_, _> = collection! { | |
0x1F70 => "\\`a", | |
0x1F71 => "\\'a", | |
0x1F72 => "\\`e", | |
0x1F73 => "\\'e", | |
0x1F74 => "\\`h", | |
0x1F75 => "\\'h", | |
0x1F76 => "\\`i", | |
0x1F77 => "\\'i", | |
0x1F78 => "\\`u", | |
0x1F79 => "\\'u", | |
0x1F7A => "\\`w", | |
0x1F7B => "\\'w", | |
}; | |
let correspB: HashMap<_, _> = collection! { | |
0x1FB0 => "a", | |
0x1FB1 => "a", | |
0x1FB2 => "\\`a|", | |
0x1FB3 => "a|", | |
0x1FB4 => "\\'a|", | |
0x1FB6 => "\\~a", | |
0x1FB7 => "\\~a|", | |
}; | |
let correspC: HashMap<_, _> = collection! { | |
0x1FC2 => "\\`h|", | |
0x1FC3 => "h|", | |
0x1FC4 => "\\'h|", | |
0x1FC6 => "\\~h", | |
0x1FC7 => "\\~h|", | |
}; | |
let correspD: HashMap<_, _> = collection! { | |
0x1FD0 => "i", | |
0x1FD1 => "i", | |
0x1FD2 => "\\`\"i", | |
0x1FD3 => "\\'\"i", | |
0x1FD6 => "\\~i", | |
0x1FD7 => "\\~\"i", | |
}; | |
let correspE: HashMap<_, _> = collection! { | |
0x1FE0 => "u", | |
0x1FE1 => "u", | |
0x1FE2 => "\\`\"u", | |
0x1FE3 => "\\'\"u", | |
0x1FE4 => ">r", | |
0x1FE5 => "<r", | |
0x1FE6 => "\\~u", | |
0x1FE7 => "\\~\"u", | |
}; | |
let correspF: HashMap<_, _> = collection! { | |
0x1FF2 => "\\`w|", | |
0x1FF3 => "w|", | |
0x1FF4 => "\\'w|", | |
0x1FF6 => "\\~w", | |
0x1FF7 => "\\~w|", | |
}; | |
let correspAlphabet: HashMap<_, _> = collection! { | |
0x037E => "?", | |
0x0387 => ";", | |
0x03AC => "\\'a", | |
0x03AD => "\\'e", | |
0x03AE => "\\'h", | |
0x03AF => "\\'i", | |
0x03B0 => "\\'\"u", | |
0x03B1 => "a", | |
0x03B2 => "b", | |
0x03B3 => "g", | |
0x03B4 => "d", | |
0x03B5 => "e", | |
0x03B6 => "z", | |
0x03B7 => "h", | |
0x03B8 => "j", | |
0x03B9 => "i", | |
0x03BA => "k", | |
0x03BB => "l", | |
0x03BC => "m", | |
0x03BD => "n", | |
0x03BE => "x", | |
0x03BF => "o", | |
0x03C0 => "p", | |
0x03C1 => "r", | |
0x03C2 => "c", | |
0x03C3 => "s", | |
0x03C4 => "t", | |
0x03C5 => "u", | |
0x03C6 => "f", | |
0x03C7 => "q", | |
0x03C8 => "y", | |
0x03C9 => "w", | |
0x03CA => "\"i", | |
0x03CB => "\"u", | |
0x03CC => "\\'o", | |
0x03CD => "\\'u", | |
0x03CE => "\\'w", | |
}; | |
let mut latex_encoded = String::new(); | |
for char_ in caracter.chars() { | |
let uppercase = char_.is_uppercase(); | |
for char_ in char_.to_lowercase().to_string().chars() { | |
let codepoint = char_ as u32; | |
match codepoint { | |
0x0370..=0x03FF => { | |
if let Some(s) = correspAlphabet.get(&codepoint) { | |
latex_encoded = format!( | |
"{}{}", | |
latex_encoded, | |
if uppercase { s.to_uppercase() } else { s.to_string() } | |
); | |
} | |
} | |
0x1F70..=0x1F7B => { | |
if let Some(s) = corresp7.get(&codepoint) { | |
latex_encoded = format!( | |
"{}{}", | |
latex_encoded, | |
if uppercase { s.to_uppercase() } else { s.to_string() } | |
); | |
} | |
} | |
0x1FB0..=0x1FB7 => { | |
if let Some(s) = correspB.get(&codepoint) { | |
latex_encoded = format!( | |
"{}{}", | |
latex_encoded, | |
if uppercase { s.to_uppercase() } else { s.to_string() } | |
); | |
} | |
} | |
0x1FC0..=0x1FC7 => { | |
if let Some(s) = correspC.get(&codepoint) { | |
latex_encoded = format!( | |
"{}{}", | |
latex_encoded, | |
if uppercase { s.to_uppercase() } else { s.to_string() } | |
); | |
} | |
} | |
0x1FD0..=0x1FD7 => { | |
if let Some(s) = correspD.get(&codepoint) { | |
latex_encoded = format!( | |
"{}{}", | |
latex_encoded, | |
if uppercase { s.to_uppercase() } else { s.to_string() } | |
); | |
} | |
} | |
0x1FE0..=0x1FE7 => { | |
if let Some(s) = correspE.get(&codepoint) { | |
latex_encoded = format!( | |
"{}{}", | |
latex_encoded, | |
if uppercase { s.to_uppercase() } else { s.to_string() } | |
); | |
} | |
} | |
0x1FF0..=0x1FF7 => { | |
if let Some(s) = correspF.get(&codepoint) { | |
latex_encoded = format!( | |
"{}{}", | |
latex_encoded, | |
if uppercase { s.to_uppercase() } else { s.to_string() } | |
); | |
} | |
} | |
0x1F00..=0x1F6F => { | |
let letter = match codepoint { | |
0x1F00..= 0x1F0F => "a", | |
0x1F10..= 0x1F1F => "e", | |
0x1F20..= 0x1F2F => "h", | |
0x1F30..= 0x1F3F => "i", | |
0x1F40..= 0x1F4F => "o", | |
0x1F50..= 0x1F5F => "u", | |
0x1F60..= 0x1F6F => "w", | |
_ => "", | |
}; | |
if let Some(s) = corresp0_6.get(&(codepoint & 0x000F)) { | |
latex_encoded = format!( | |
"{}{}{}", | |
latex_encoded, | |
s, | |
if uppercase { letter.to_uppercase() } else { letter.to_string() } | |
); | |
} | |
} | |
0x1F80..=0x1FAF => { | |
let letter = match codepoint { | |
0x1F80..= 0x1F8F => "a", | |
0x1F90..= 0x1F9F => "h", | |
0x1FA0..= 0x1FAF => "w", | |
_ => "", | |
}; | |
if let Some(s) = corresp0_6.get(&(codepoint & 0x000F)) { | |
latex_encoded = format!( | |
"{}{}{}", | |
latex_encoded, | |
s, | |
if uppercase { letter.to_uppercase() } else { letter.to_string() } | |
); | |
} | |
} | |
_ => { | |
latex_encoded = format!( | |
"{}{}", | |
latex_encoded, | |
if uppercase { char_.to_uppercase().to_string() } else { char_.to_string() } | |
); | |
} | |
} | |
} | |
} | |
latex_encoded | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment