Skip to content

Instantly share code, notes, and snippets.

@twilight-sparkle-irl
Last active February 26, 2022 21:22
Show Gist options
  • Save twilight-sparkle-irl/537c76e95e87dc714f13bea1b54c9b94 to your computer and use it in GitHub Desktop.
Save twilight-sparkle-irl/537c76e95e87dc714f13bea1b54c9b94 to your computer and use it in GitHub Desktop.
.XCompose unicode shenanigans pack
# the unicode shenanigans pack for XCompose, by Twilight (synthetic.garden)
# for when you need obfuscation
# Please use these wisely, using them for everything everywhere is dumb and causes accessibility issues.
# If that for some reason isn't a good enough reason for you: using these excessively will cause them to lose their power.
# Don't be a dumbass.
# breakers
# used to break up text when lookalikes can't be used
<Multi_key> <a> <l> <m> : "؜" # arabic letter mark (U+061C) (never is a space, but can cause RTL)
<Multi_key> <h> <f> : "ㅤ" # hangul filler (U+3164) (sometimes is a space and causes translation issues, but can't cause RTL)
<Multi_key> <h> <h> <f> : "ᅠ" # half-width hangul filler (U+FFA0) (dubious, untested)
# lookalikes
# lookalikeness is not guaranteed, though:
# anything labeled "Cyrillic lookalike" is guaranteed to show up on basically anything that isn't WinXP, and usually looks like the imitated character
# Armenian and Cherokee lookalikes are more dubious
# Latin lookalikes probably work but may look weird
# Full-width lookalikes will always look weird, and are usually a last-resort
<Multi_key> <Down> <A> : "А" # Cyrillic lookalike U+0410
<Multi_key> <Down> <B> : "В" # Cyrillic lookalike U+0412
<Multi_key> <Down> <C> : "С" # Cyrillic lookalike U+0421
<Multi_key> <Down> <D> : "Ꭰ" # Cherokee lookalike U+13a0
<Multi_key> <Down> <E> : "Е" # Cyrillic lookalike U+0415
<Multi_key> <Down> <F> : "Ϝ" # Greek lookalike U+03dc
<Multi_key> <Down> <G> : "Ԍ" # Cyrillic (supplement) lookalike U+050c
<Multi_key> <Down> <H> : "Н" # Cyrillic lookalike U+041d
<Multi_key> <Down> <I> : "І" # Cyrillic lookalike U+0406
<Multi_key> <Down> <J> : "Ј" # Cyrillic lookalike U+0408
<Multi_key> <Down> <K> : "К" # Cyrillic lookalike U+041a
<Multi_key> <Down> <L> : "Ꮮ" # Cherokee lookalike U+13de
<Multi_key> <Down> <M> : "М" # Cyrillic lookalike U+041c
<Multi_key> <Down> <O> : "О" # Cyrillic lookalike U+041e
<Multi_key> <Down> <P> : "Р" # Cyrillic lookalike U+0420
<Multi_key> <Down> <Q> : "Q" # Full-width lookalike U+ff31 (nothing else has this)
<Multi_key> <Down> <R> : "Ꭱ" # Cherokee lookalike U+13a1
<Multi_key> <Down> <S> : "Ѕ" # Cyrillic lookalike U+0405
<Multi_key> <Down> <T> : "Т" # Cyrillic lookalike U+0422
<Multi_key> <Down> <U> : "Ս" # Armenian lookalike U+054d
<Multi_key> <Down> <V> : "Ѵ" # Cyrillic lookalike U+0474
<Multi_key> <Down> <W> : "Ԝ" # Cyrillic (supplement) lookalike U+051c
<Multi_key> <Down> <X> : "Х" # Cyrillic lookalike U+0425
<Multi_key> <Down> <Y> : "Ү" # Cyrillic lookalike U+04ae
<Multi_key> <Down> <Z> : "Ζ" # Greek lookalike U+0396
<Multi_key> <Down> <a> : "а" # Cyrillic lookalike U+0430
<Multi_key> <Down> <b> : "ᖯ" # Canadian syllabic lookalike U+15af
<Multi_key> <Down> <c> : "с" # Cyrillic lookalike U+0441
<Multi_key> <Down> <d> : "ԁ" # Cyrillic (supplement) lookalike U+0501
<Multi_key> <Down> <e> : "е" # Cyrillic lookalike U+0435
<Multi_key> <Down> <f> : "ſ" # Latin lookalike U+017f
<Multi_key> <Down> <g> : "ɡ" # Latin smallcaps lookalike U+0261
<Multi_key> <Down> <h> : "հ" # Armenian lookalike U+0570
<Multi_key> <Down> <i> : "і" # Cyrillic lookalike U+0456
<Multi_key> <Down> <j> : "ј" # Cyrillic lookalike U+0458
<Multi_key> <Down> <k> : "k" # Full-width lookalike U+ff4b (everything else looked like a capital letter)
<Multi_key> <Down> <l> : "Ɩ" # Latin lookalike U+0196 (only one that looks like an l in fonts that show the bars on capital I)
<Multi_key> <Down> <m> : "m" # Full-width lookalike U+ff4d
<Multi_key> <Down> <n> : "ո" # Armenian lookalike U+0578
<Multi_key> <Down> <o> : "о" # Cyrillic lookalike U+043e
<Multi_key> <Down> <p> : "р" # Cyrillic lookalike U+0440
<Multi_key> <Down> <q> : "ԛ" # Cyrillic (supplement) lookalike U+051b
<Multi_key> <Down> <r> : "г" # Cyrillic lookalike U+0433 (only one that looks rounded is full-width)
<Multi_key> <Down> <s> : "ѕ" # Cyrillic lookalike U+0455
<Multi_key> <Down> <t> : "t" # Full-width lookalike U+ff54 (everything else looked like a capital letter)
<Multi_key> <Down> <u> : "ս" # Armenian lookalike U+057d
<Multi_key> <Down> <v> : "ѵ" # Cyrillic lookalike U+0475
<Multi_key> <Down> <w> : "ᴡ" # Latin smallcaps lookalike U+1d21
<Multi_key> <Down> <x> : "х" # Cyrillic lookalike U+0445
<Multi_key> <Down> <y> : "у" # Cyrillic lookalike U+0443
<Multi_key> <Down> <z> : "ᴢ" # Latin smallcaps lookalike U+1d22

You probably don't need to know this because it won't update at any point, but in case you want to make your own confusable list.

(if you want to change the "multi_key down arrow" default, just use find and replace)

First, go on https://lookout.net/test/confusablesjs/ (https://web.archive.org/web/20191220151912im_/https://lookout.net/test/confusablesjs/ if it's down)

Then, run this in the JS console:

document.body.innerHTML=`
Key combo: <input id=keyinput value="<Multi_key> <Down>"> <button onclick=changekey()>Change</button>
<h2>Cyrillic base list</h2>
<textarea id=output1 style="width:600px;height:400px;font-family:sans-serif;"></textarea>
<br>
<h3>Generate all for character (select one to put in .XCompose)</h3>
Character: <input id=charinput maxlength=1 style="width:1em" /> <button onclick=gac()>Gen</button><br>
<textarea id=output2 style="width:600px;height:400px;font-family:sans-serif;"></textarea>
`
keycombo = "<Multi_key> <Down>"

ranges = [
   [[0x0,0x7F], "Basic Latin"], 
   [[0x80,0x2AF], "Latin supplement"],
   [[0x370,0x3FF], "Greek"],
   [[0x400,0x4FF], "Cyrillic"],
   [[0x500,0x52F], "Cyrillic (supplement)"],
   [[0x530,0x58F], "Armenian"], 
   [[0xFF00,0xFFEF], "Full-width"]
]
rangeTest = i=>{
  var o = ranges.find(x=>(i>=x[0][0]&&i<=x[0][1]))
  if(o&&o.length)return o[1]
  else return "Unknown" 
}

getCon = (codepoint,filter)=>(confusables.data.characters[confusables.data.index[codepoint]]||[]).filter(filter).map(x=>String.fromCharCode(x))||[]

function gen(char,filter=(x=>x!==codepoint)) {
  list = []
  codepoint = char.codePointAt(0)
  con = getCon(codepoint,filter)
  if(con.length==0){return []}
  con.forEach(c=>{
    list.push(`${keycombo} <${char}> : "${c}" # ${rangeTest(c.codePointAt(0))} lookalike U+${c.codePointAt(0).toString(16).padStart(4,'0')}`)
  })
  return list;
}

function updateCyr() { 
  cyrillicList = ""
  missed = []
  for(var i=0x41;i<0x7b;i++) { 
    if(i>0x5a&&i<0x61){continue;}
    cyr = gen(String.fromCharCode(i), (c=>rangeTest(c)=='Cyrillic'))[0]
    if(cyr) cyrillicList += cyr+'\n'
    else continue
  }
  console.log(output1.value=cyrillicList)
}
charinput.onkeyup = function(e){console.log(e.target.value);gac();return true}
function gac() {
  console.log(output2.value = gen(charinput.value).join`\n`||`# none found for ${charinput.value}`)
}

keyinput.onkeydown = function(e){if(e.keyCode==13)changekey();return true}
function changekey() {
  keycombo = keyinput.value.trim()
  updateCyr()
  if(charinput.value){gac()}
}

updateCyr()

This isn't exactly how I did it, but it's the things I did compiled in an easy way to do em yourself.

Note that this doesn't properly replace "<->" with "" or "<>>" with "" in the "generate all for char" field. Sorry if that's a dealbreaker, but that's a ton of work for not much of a payoff.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment