Skip to content

Instantly share code, notes, and snippets.

@andrewshadura
Forked from beniwohli/unicode_to_latex.py
Last active April 7, 2016 11:00
Show Gist options
  • Save andrewshadura/2c8815ccc5967833680cd58f7143c434 to your computer and use it in GitHub Desktop.
Save andrewshadura/2c8815ccc5967833680cd58f7143c434 to your computer and use it in GitHub Desktop.
Script to convert Cyrillic unicode characters to their respective LaTeX representation
#!/usr/bin/python3
# original XML at http://www.w3.org/Math/characters/unicode.xml
# XSL for conversion: https://gist.github.com/798546
# only cyrillic symbols
unicode_to_latex = {
0x0401: "\\CYRYO",
0x0402: "\\CYRDJE",
0x0403: "\\CYRG",
0x0404: "\\CYRIE",
0x0405: "\\CYRDZE",
0x0406: "\\CYRII",
0x0407: "\\CYRYI",
0x0408: "\\CYRJE",
0x0409: "\\CYRLJE",
0x040A: "\\CYRNJE",
0x040B: "\\CYRTSHE",
0x040C: "\\CYRK",
0x040E: "\\CYRUSHRT",
0x040F: "\\CYRDZHE",
0x0410: "\\CYRA",
0x0411: "\\CYRB",
0x0412: "\\CYRV",
0x0413: "\\CYRG",
0x0414: "\\CYRD",
0x0415: "\\CYRE",
0x0416: "\\CYRZH",
0x0417: "\\CYRZ",
0x0418: "\\CYRI",
0x0419: "\\CYRISHRT",
0x041A: "\\CYRK",
0x041B: "\\CYRL",
0x041C: "\\CYRM",
0x041D: "\\CYRN",
0x041E: "\\CYRO",
0x041F: "\\CYRP",
0x0420: "\\CYRR",
0x0421: "\\CYRS",
0x0422: "\\CYRT",
0x0423: "\\CYRU",
0x0424: "\\CYRF",
0x0425: "\\CYRH",
0x0426: "\\CYRC",
0x0427: "\\CYRCH",
0x0428: "\\CYRSH",
0x0429: "\\CYRSHCH",
0x042A: "\\CYRHRDSN",
0x042B: "\\CYRERY",
0x042C: "\\CYRSFTSN",
0x042D: "\\CYREREV",
0x042E: "\\CYRYU",
0x042F: "\\CYRYA",
0x0430: "\\cyra",
0x0431: "\\cyrb",
0x0432: "\\cyrv",
0x0433: "\\cyrg",
0x0434: "\\cyrd",
0x0435: "\\cyre",
0x0436: "\\cyrzh",
0x0437: "\\cyrz",
0x0438: "\\cyri",
0x0439: "\\cyrishrt",
0x043A: "\\cyrk",
0x043B: "\\cyrl",
0x043C: "\\cyrm",
0x043D: "\\cyrn",
0x043E: "\\cyro",
0x043F: "\\cyrp",
0x0440: "\\cyrr",
0x0441: "\\cyrs",
0x0442: "\\cyrt",
0x0443: "\\cyru",
0x0444: "\\cyrf",
0x0445: "\\cyrh",
0x0446: "\\cyrc",
0x0447: "\\cyrch",
0x0448: "\\cyrsh",
0x0449: "\\cyrshch",
0x044A: "\\cyrhrdsn",
0x044B: "\\cyrery",
0x044C: "\\cyrsftsn",
0x044D: "\\cyrerev",
0x044E: "\\cyryu",
0x044F: "\\cyrya",
0x0451: "\\cyryo",
0x0452: "\\cyrdje",
0x0453: "\\cyrg",
0x0454: "\\cyrie",
0x0455: "\\cyrdze",
0x0456: "\\cyrii",
0x0457: "\\cyryi",
0x0458: "\\cyrje",
0x0459: "\\cyrlje",
0x045A: "\\cyrnje",
0x045B: "\\cyrtshe",
0x045C: "\\cyrk",
0x045E: "\\cyrushrt",
0x045F: "\\cyrdzhe",
0x0460: "\\CYROMEGA",
0x0461: "\\cyromega",
0x0462: "\\CYRYAT",
0x0464: "\\CYRIOTE",
0x0465: "\\cyriote",
0x0466: "\\CYRLYUS",
0x0467: "\\cyrlyus",
0x0468: "\\CYRIOTLYUS",
0x0469: "\\cyriotlyus",
0x046A: "\\CYRBYUS",
0x046C: "\\CYRIOTBYUS",
0x046D: "\\cyriotbyus",
0x046E: "\\CYRKSI",
0x046F: "\\cyrksi",
0x0470: "\\CYRPSI",
0x0471: "\\cyrpsi",
0x0472: "\\CYRFITA",
0x0474: "\\CYRIZH",
0x0478: "\\CYRUK",
0x0479: "\\cyruk",
0x047A: "\\CYROMEGARND",
0x047B: "\\cyromegarnd",
0x047C: "\\CYROMEGATITLO",
0x047D: "\\cyromegatitlo",
0x047E: "\\CYROT",
0x047F: "\\cyrot",
0x0480: "\\CYRKOPPA",
0x0481: "\\cyrkoppa",
0x0482: "\\cyrthousands",
0x0488: "\\cyrhundredthousands",
0x0489: "\\cyrmillions",
0x048C: "\\CYRSEMISFTSN",
0x048D: "\\cyrsemisftsn",
0x048E: "\\CYRRTICK",
0x048F: "\\cyrrtick",
0x0490: "\\CYRGUP",
0x0491: "\\cyrgup",
0x0492: "\\CYRGHCRS",
0x0493: "\\cyrghcrs",
0x0494: "\\CYRGHK",
0x0495: "\\cyrghk",
0x0496: "\\CYRZHDSC",
0x0497: "\\cyrzhdsc",
0x0498: "\\CYRZDSC",
0x0499: "\\cyrzdsc",
0x049A: "\\CYRKDSC",
0x049B: "\\cyrkdsc",
0x049C: "\\CYRKVCRS",
0x049D: "\\cyrkvcrs",
0x049E: "\\CYRKHCRS",
0x049F: "\\cyrkhcrs",
0x04A0: "\\CYRKBEAK",
0x04A1: "\\cyrkbeak",
0x04A2: "\\CYRNDSC",
0x04A3: "\\cyrndsc",
0x04A4: "\\CYRNG",
0x04A5: "\\cyrng",
0x04A6: "\\CYRPHK",
0x04A7: "\\cyrphk",
0x04A8: "\\CYRABHHA",
0x04A9: "\\cyrabhha",
0x04AA: "\\CYRSDSC",
0x04AB: "\\cyrsdsc",
0x04AC: "\\CYRTDSC",
0x04AD: "\\cyrtdsc",
0x04AE: "\\CYRY",
0x04AF: "\\cyry",
0x04B0: "\\CYRYHCRS",
0x04B1: "\\cyryhcrs",
0x04B2: "\\CYRHDSC",
0x04B3: "\\cyrhdsc",
0x04B4: "\\CYRTETSE",
0x04B5: "\\cyrtetse",
0x04B6: "\\CYRCHRDSC",
0x04B7: "\\cyrchrdsc",
0x04B8: "\\CYRCHVCRS",
0x04B9: "\\cyrchvcrs",
0x04BA: "\\CYRSHHA",
0x04BB: "\\cyrshha",
0x04BC: "\\CYRABHCH",
0x04BD: "\\cyrabhch",
0x04BE: "\\CYRABHCHDSC",
0x04BF: "\\cyrabhchdsc",
0x04C0: "\\CYRpalochka",
0x04C3: "\\CYRKHK",
0x04C4: "\\cyrkhk",
0x04C7: "\\CYRNHK",
0x04C8: "\\cyrnhk",
0x04CB: "\\CYRCHLDSC",
0x04CC: "\\cyrchldsc",
0x04D4: "\\CYRAE",
0x04D5: "\\cyrae",
0x04D8: "\\CYRSCHWA",
0x04D9: "\\cyrschwa",
0x04E0: "\\CYRABHDZE",
0x04E1: "\\cyrabhdze",
0x04E8: "\\CYROTLD",
0x04E9: "\\cyrotld"
}
import fileinput
for line in fileinput.input():
print(line.translate(unicode_to_latex))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment