Created
June 16, 2011 22:18
-
-
Save np/1030431 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE PatternGuards #-} | |
import Yi.Char.Unicode | |
import Data.Char | |
import Data.List | |
import qualified System.IO.UTF8 as U8 | |
genLine :: String -> String -> String | |
genLine name s = "<Multi_key> " ++ keysFromString name ++ " : \"" ++ s ++ "\"" | |
genLines :: [(String,String)] -> String | |
genLines = unlines . map (uncurry genLine) | |
keysFromString :: String -> String | |
keysFromString = intercalate " " . map keyFromChar | |
keyFromCharData :: [(Char,String)] | |
keyFromCharData = | |
[('\'', "apostrophe") | |
,('`', "grave") | |
,('<', "less") | |
,('>', "greater") | |
,('|', "bar") | |
,('(', "parenleft") | |
,(')', "parenright") | |
,('[', "bracketleft") | |
,(']', "bracketright") | |
,('{', "braceleft") | |
,('}', "braceright") | |
,('+', "plus") | |
,('-', "minus") | |
,('^', "asciicircum") | |
,('.', "period") | |
,('=', "equal") | |
,('~', "asciitilde") | |
,('/', "slash") | |
,('\\', "backslash") | |
,('?', "question") | |
,('!', "exclam") | |
,('_', "underscore") | |
,(':', "colon") | |
,('*', "asterisk") | |
,('"', "quotedbl") | |
,('#', "numbersign") | |
,(',', "comma") | |
,(' ', "space") | |
{- | |
,('', "") | |
-} | |
] | |
keyFromChar :: Char -> String | |
keyFromChar c | isAscii c && isAlphaNum c = ['<',c,'>'] | |
| Just s <- lookup c keyFromCharData = '<' : s ++ ">" | |
| otherwise = error $ "keyFromChar: `" ++ c : "'" | |
disambMore = filter ((/= "fake") . snd) . disamb . ((".>","fake"):) | |
main :: IO () | |
main = | |
U8.writeFile ".XCompose" . genLines . disambMore | |
$ greek ++ symbols ++ subscripts ++ superscripts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Yi.Char.Unicode (greek, symbols, subscripts, superscripts, checkAmbs, disamb) where | |
import Data.List (isPrefixOf) | |
import Control.Applicative | |
greek :: [(String, String)] | |
greek = [(name, unicode) | (_,name,unicode) <- greekData] ++ | |
[ ([leading,shorthand],unicode) | |
| (Just shorthand,_,unicode) <- greekData | |
, leading <- ['\'', 'g'] ] | |
-- | Triples: (shorthand, name, unicode) | |
greekData :: [(Maybe Char, String, String)] | |
greekData = [(Just 'a', "alpha", "α") | |
,(Just 'b', "beta", "β") | |
,(Just 'g', "gamma", "γ") | |
,(Just 'G', "Gamma", "Γ") | |
,(Just 'd', "delta", "δ") | |
,(Just 'D', "Delta", "Δ") | |
,(Just 'e' , "epsilon", "ε") | |
,(Just 'z', "zeta", "ζ") | |
,(Just 'N' , "eta", "η") -- N is close to n which is graphically close | |
,(Just 'E' , "eta", "η") -- E is close to e which is the start of eta | |
,(Nothing , "theta", "θ") | |
,(Nothing , "Theta", "Θ") | |
,(Just 'i', "iota", "ι") | |
,(Just 'k', "kapa", "κ") | |
,(Just 'l', "lambda", "λ") | |
,(Just 'L', "Lambda", "Λ") | |
,(Just 'm', "mu", "μ") | |
,(Just 'n', "nu", "ν") | |
,(Just 'x', "xi", "ξ") | |
,(Just 'o', "omicron", "ο") | |
,(Just 'p' , "pi", "π") | |
,(Just 'P' , "Pi", "Π") | |
,(Just 'r', "rho", "ρ") | |
,(Just 's', "sigma", "σ") | |
,(Just 'S', "Sigma", "Σ") | |
,(Just 't', "tau", "τ") | |
,(Just 'f' , "phi", "φ") | |
,(Just 'F' , "Phi", "Φ") | |
,(Just 'c', "chi", "χ") | |
,(Just 'C', "Chi", "Χ") | |
,(Nothing , "psi", "ψ") | |
,(Nothing , "Psi", "Ψ") | |
,(Just 'w', "omega", "ω") | |
,(Just 'O', "Omega", "Ω") | |
] | |
accents :: [(String,String)] | |
accents = concat | |
[a 'a' 'á' 'à' 'â' 'ä' 'ã' 'å' ' ' | |
,a 'c' 'ć' ' ' 'ĉ' ' ' ' ' ' ' 'ç' | |
,a 'e' 'é' 'è' 'ê' 'ë' ' ' ' ' ' ' | |
,a 'i' 'í' 'ì' 'î' 'ï' 'ĩ' ' ' ' ' | |
,a 'n' 'ń' ' ' ' ' ' ' 'ñ' ' ' 'ņ' | |
,a 'o' 'ó' 'ò' 'ô' 'ö' 'õ' ' ' ' ' | |
,a 'u' 'ú' 'ù' 'û' 'ü' ' ' ' ' ' ' | |
,a 'y' 'ý' ' ' 'ŷ' 'ÿ' ' ' ' ' ' ' | |
] | |
where a k acute agrave acircum adiar atild aring aced = | |
filter ((/=" ").snd) | |
[b 'e' acute | |
,b '`' agrave | |
,b 'i' acircum | |
,b 'u' adiar | |
,b '~' atild | |
,b 'o' aring | |
,b ',' aced] where b p l = ([p,k],[l]) | |
parens :: [((String,String),(String,String))] | |
parens = | |
-- parens | |
[a "<" "⟨" ">" "⟩" | |
,a "<<" "⟪" ">>" "⟫" | |
-- These two confuse gnome-terminal. | |
,a "|(" "〖" ")|" "〗" | |
,a "{|" "⦃" "|}" "⦄" | |
,a "{{" "⦃" "}}" "⦄" | |
,a "[[" "⟦" "]]" "⟧" | |
,a "|_" "⌊" "_|" "⌋" | |
,a "r|_" "⌈" "r_|" "⌉" | |
] | |
where a x y z t = ((x,y),(z,t)) | |
symParen :: ((String,String),(String,String)) -> (String,String) | |
symParen ((x,y),(_,z)) = ('s':x, y ++ z) | |
symbols :: [(String, String)] | |
symbols = | |
map fst parens ++ map snd parens ++ map symParen parens ++ | |
accents ++ | |
[("99", "«") | |
,("00", "»") | |
,("90", "«»") | |
-- quantifiers | |
,("forall", "∀") | |
,("exists", "∃") | |
,("rA", "∀") -- reversed A | |
,("rE", "∃") -- reversed E | |
,("/rE", "∄") | |
-- operators | |
,("<|","◃") | |
-- ,("<|","◁") alternative | |
,("|>","▹") | |
-- ,("|>", "▷") | |
,("><","⋈") | |
,("<)", "◅") | |
,("(>", "▻") | |
,("v","∨") | |
,("u","∪") | |
,("V","⋁") | |
,("+u","⊎") | |
,("u+","⊎") | |
,("u[]","⊔") | |
,("n[]","⊓") | |
,("^","∧") | |
,("/\\", "∧") | |
,("\\/", "∨") | |
,("o","∘") | |
,(".","·") | |
,("...", "…") | |
,("c...", "⋯") -- 'c' for centered | |
,("x","×") | |
,("neg","¬") | |
,("-.","∸") | |
--- arrows | |
,("<-","←") | |
,("->","→") | |
,("|->","↦") | |
,("<-|","↤") | |
,("<--","⟵") | |
,("-->","⟶") | |
,("|-->","⟼") | |
,("|^", "↑") | |
,("==>","⟹") | |
,("=>","⇒") | |
,("<=","⇐") | |
,("<=>","⇔") | |
,("~>","↝") | |
,("<~","↜") | |
,("~->","⇝") | |
,("<-~","⇜") | |
,("<-<", "↢") | |
,(">->", "↣") | |
,("<->", "↔") | |
,("|<-", "⇤") | |
,("->|", "⇥") | |
,(">>=","↠") | |
,("->>","↠") | |
,("/-", "↼") | |
,("\\-", "↽") | |
,("-/", "⇁") | |
,("-\\", "⇀") | |
,("-|->", "⇸") | |
--- relations | |
,("c=","⊆") | |
,("/c=","⊈") | |
,("c","⊂") | |
,("/c","⊄") | |
,("c-","∈") | |
,("/c-","∉") | |
,("c/=","⊊") | |
,("rc=","⊇") -- r for reversed | |
,("rc","⊃") -- r for reversed | |
,("rc-","∋") -- r for reversed | |
,("r/c-","∌") -- r for reversed | |
,("rc/=","⊋") -- r for reversed | |
,(">=","≥") | |
,("=<","≤") | |
,("/>=","≱") | |
,("/=<","≰") | |
,("c[]","⊏") | |
,("rc[]","⊐") | |
,("c[]=","⊑") | |
,("rc[]=","⊒") | |
,("/c[]=","⋢") | |
,("/rc[]=","⋣") | |
,("c[]/=","⋤") | |
,("rc[]/=","⋥") | |
---- equal signs | |
,("=def","≝") | |
,("=?","≟") | |
,("=o","≗") | |
,("==","≡") | |
,("~~","≈") | |
,("~-","≃") | |
,("~=","≅") | |
,("~","∼") | |
,("/=","≠") | |
,("/==","≢") | |
,(":=","≔") | |
,("=:","≕") | |
-- misc | |
,("_|_","⊥") | |
,("Top","⊤") | |
,("l","ℓ") -- same as cl | |
,("::","∷") | |
,(":", "∶") | |
,("0", "∅") | |
,("r8","∞") | |
,("*", "★") -- or "⋆" | |
,("/'l","ƛ") | |
,("d","∂") | |
,("#b","♭") -- music bemol | |
,("#f","♮") -- music flat | |
,("##","♯") -- music # | |
,("Hot","♨") | |
,("Cut","✂") | |
,("Pen","✎") | |
,("Tick","✓") | |
-- Currency Symbols | |
,("B|","฿") | |
,("ob","ⓑ") | |
,("e=","€") -- alternatives =C =c E= =E =e | |
,("L-","£") -- alternatives -L | |
,("Y=","¥") -- alternatives =Y | |
,("ox","¤") -- currency sign | |
,("xo","¤") -- currency sign | |
{- | |
CE "₠" # EURO-CURRENCY SIGN | |
C/ "₡" # COLON SIGN | |
/C "₡" # COLON SIGN | |
Cr "₢" # CRUZEIRO SIGN | |
Fr "₣" # FRENCH FRANC SIGN | |
L= "₤" # LIRA SIGN | |
=L "₤" # LIRA SIGN | |
m/ "₥" # MILL SIGN | |
/m "₥" # MILL SIGN | |
N= "₦" # NAIRA SIGN | |
=N "₦" # NAIRA SIGN | |
Pt "₧" # PESETA SIGN | |
Rs "₨" # RUPEE SIGN | |
W= "₩" # WON SIGN | |
=W "₩" # WON SIGN | |
"₪" # NEW SHEQEL SIGN | |
d- "₫" # DONG SIGN | |
"₭" # KIP SIGN | |
"₮" # TUGRIK SIGN | |
"₯" # DRACHMA SIGN | |
"₰" # GERMAN PENNY SIGN | |
"₱" # PESO SIGN | |
"₲" # GUARANI SIGN | |
"₳" # AUSTRAL SIGN | |
"₴" # HRYVNIA SIGN | |
"₵" # CEDI SIGN | |
|c "¢" # CENT SIGN | |
c| "¢" # CENT SIGN | |
c/ "¢" # CENT SIGN | |
/c "¢" # CENT SIGN | |
-} | |
-- dashes | |
,("-","−") | |
-- quotes | |
,("\"","“”") | |
,("r`","′") | |
-- turnstyles | |
,("|-", "⊢") | |
,("|/-", "⊬") | |
,("-|", "⊣") | |
,("|=", "⊨") | |
,("|/=", "⊭") | |
,("||-", "⊩") | |
-- circled/squared operators | |
-- ⊝ ⍟ ⎊ ⎉ | |
] ++ | |
-- maybe this makes too much combinations and is wasteful? | |
[ (xy,[z]) | |
| x <- ['o','O'] | |
, (y,z) <- [('+','⊕') | |
,('-','⊖') | |
,('x','⊗') | |
,('/','⊘') | |
,('*','⊛') | |
,('=','⊜') | |
,('.','⊙') | |
] | |
, not (x=='o' && y=='=') | |
, xy <- [[x,y],[y,x]] | |
] | |
++ | |
[ -- I'm not yet happy with those | |
("Oo","⊚") | |
,("oo","°") | |
,("o^","°") | |
] | |
++ | |
[ | |
("[+]","⊞") | |
,("[-]","⊟") | |
,("[x]","⊠") | |
,("[.]","⊡") | |
,("[]","∎") | |
,("[ ]","☐") | |
,("[Tick]","☑") | |
-- circles "◎◍◐◑◒◓◔◕◖◗◠◡◴◵◶◷⚆⚇⚈⚉" | |
--,("ob","●") | |
--,("ow","○") | |
,("o..","◌") | |
,("oO","◯") | |
,("!!","‼") | |
,("??","⁇") | |
,("?!","⁈") | |
,("?b!","‽") -- 'b' like backspace | |
,("!?","⁉") | |
,("r?", "¿") -- 'r' for reversed | |
,("r!", "¡") -- 'r' for reversed | |
,("eth","ð") | |
] ++ [ (leading:l, [u]) | leading <- ['|','b'], (l,u) <- | |
[("N",'ℕ') | |
,("H",'ℍ') | |
,("P",'ℙ') | |
,("R",'ℝ') | |
,("C",'ℂ') | |
,("D",'ⅅ') | |
,("Q",'ℚ') | |
,("Z",'ℤ') | |
,("gg",'ℽ') | |
,("gG",'ℾ') | |
,("gP",'ℿ') | |
,("gS",'⅀') | |
] | |
] ++ [ | |
-- c for cal | |
("cP","℘") | |
,("cL","ℒ") | |
,("cR","ℛ") | |
,("cN","𝒩") | |
,("cE","ℰ") | |
,("cF","ℱ") | |
,("cH","ℋ") | |
,("cI","ℐ") | |
,("cM","ℳ") | |
,("ce","ℯ") | |
,("cg","ℊ") | |
,("co","ℴ") | |
,("cl","ℓ") | |
] | |
checkAmbs :: [(String, String)] -> [(String, String)] | |
checkAmbs table = check | |
where ambs = [ (x, y) | |
| v@(x, _) <- table | |
, w@(y, _) <- table | |
, v /= w | |
, x `isPrefixOf` y ] | |
check | null ambs = table | |
| otherwise = error $ "checkAmbs: ambiguous declarations for " ++ show ambs | |
disamb :: [(String, String)] -> [(String, String)] | |
disamb table = map f table | |
where f v@(x, vx) = | |
let ambs = [ w | |
| w@(y, _) <- table | |
, v /= w | |
, x `isPrefixOf` y ] | |
in if null ambs then v else (x ++ " ", vx) | |
-- More: | |
-- arrows: ⇆ | |
-- turnstyles: ⊦ ⊧ | |
-- subscript: ₔ | |
zipscripts :: Char -> String -> String -> [(String, String)] | |
zipscripts c ascii unicode | |
= zip (fmap ((c:) . pure) ascii) (fmap pure unicode) | |
subscripts, superscripts :: [(String, String)] | |
subscripts = zipscripts '_' "0123456789+-=()aeioruvx" | |
"₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑᵢₒᵣᵤᵥₓ" | |
superscripts = zipscripts '^' -- NOTE that qCFQSVXYZ are missing | |
"0123456789+-=()abcdefghijklmnoprstuvwxyzABDEGHIJKLMNOPRTUW" | |
"⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖʳˢᵗᵘᵛʷˣʸᶻᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂ" | |
{- | |
∣ ⋆ ⁺ ∙ ∥ ⁻ • ✶ ↯ ≺ ⋎ ″ ≳ ≲ ◁ ∗ ≊ ◇ ⌝ ⌜ ≉ ≰ ‿ ≋ ⨀ ⊴ ≮ □ ⇛ ⊸ ≯ | |
⇿ ⇨ ↓ ↡ ↛ ⇓ ⇑ | |
⋐ ⁆ ⁅ ϕ ⊪ ◂ ≴ ≁ ÷ ◈ ⑵ ⑴ ∩ ̂ ≻ ⇾ ↟ ► ≇ ∔ ▶ ≛ ⇦ ⦈ ⦇ ⑶ ⋃ ⋂ ≵ ½ ’ — | |
ö ɐ ∁ Μ ʇ ɥ ǝ ı ó í å à é | |
-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment