Last active
October 22, 2022 09:42
-
-
Save nownabe/d0db9e96ed2b58406ef56b2a26d343cc to your computer and use it in GitHub Desktop.
Tiny Hiragana encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Snippet to encode strings of Hiragana (Japanese alphabet) into a number value. | |
* | |
* This is written as an example for a Solidity application. | |
*/ | |
const maxLength = 5; | |
/* | |
* 7 bit for each character. | |
*/ | |
const bitSize = 7; | |
// The first element must be an empty string. | |
const hiraganaList = [ | |
"", | |
..."あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん".split(""), | |
..."がぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽぁぃぅぇぉゃゅょー".split(""), | |
]; | |
/* | |
* 6 bit for each character. | |
* | |
* This configuration accepts only monographs. | |
* If you need to handle diacrics like がぎぐげご and digraphs like きゃきゅきょ, | |
* you have to convert them to monographs like かきくけこ or きやきゆきよ in advance. | |
* | |
* const bitSize = 6; | |
* const hiraganaList = [ | |
* "", | |
* ..."あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん".split(""), | |
* ]; | |
*/ | |
// If bitSize = 7, The size of hiraganaList must be less than 129 including the first empty string. | |
// If bitSize = 7, The size of hiraganaList must be less than 65 including the first empty string. | |
console.log(`The size of hiraganaList = ${hiraganaList.length}`) | |
const mask = (function calculateMask() { | |
let mask = 0; | |
for (let i = 0; i < bitSize; i++) { | |
mask |= 1 << i; | |
} | |
return mask; | |
})(); | |
const encode = (word) => { | |
// Validate word length. Exception would be better. | |
if (word.length > maxLength) { return -1; } | |
let encoded = 0; | |
word.split("").forEach((c) => { | |
encoded <<= bitSize; | |
const hiraganaIdx = hiraganaList.indexOf(c); | |
encoded |= hiraganaIdx | |
}) | |
return encoded; | |
}; | |
const decode = (encoded) => { | |
if (encoded === -1) { return ""; } | |
let word = ""; | |
for (let i = 0; i < maxLength; i++) { | |
const hiraganaIdx = encoded & mask; | |
encoded = encoded >>> bitSize; | |
word = hiraganaList[hiraganaIdx] + word; | |
} | |
return word; | |
} | |
/* ------- Test code -------- */ | |
const test = (function() { | |
let counter = 0; | |
return (word) => { | |
console.log(`\n======== Test ${counter} ========\n`); | |
console.log(`Original word = '${word}'`); | |
const enc = encode(word); | |
console.log(`Encoded code = ${enc}`); | |
const dec = decode(enc); | |
console.log(`Decoded word = '${dec}'`); | |
counter++; | |
} | |
})(); | |
test("あいうえお"); | |
test("おかき"); | |
test("くりえいと"); | |
test("じゃんぷ"); | |
test("なが〜〜〜〜い"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment