Last active
October 28, 2024 14:01
-
-
Save pch/585f0c949acf2e10454f702c23cd63d2 to your computer and use it in GitHub Desktop.
Base32 encoding in Ruby (RFC 4648)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Base32 (RFC 4648) implemented in Ruby | |
# | |
# Source: https://ptrchm.com/posts/base32-explained/ | |
class Base32 | |
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567".split("") | |
PADDING_CHAR = "=" | |
BITS_PER_BYTE = 8 # 1 byte = 8 bits | |
BITS_PER_CHAR = Math.log2(ALPHABET.length).round # 5 = 32 chars = 2^5 number of bits encoded into a single character in the ALPHABET | |
BITS_PER_CHUNK = BITS_PER_CHAR.lcm(BITS_PER_BYTE) # 40 (least common mutliple of 5 and 8) | |
CHARS_PER_CHUNK = BITS_PER_CHUNK / BITS_PER_CHAR # 8 | |
CHUNK_LENGTH = BITS_PER_CHUNK / BITS_PER_BYTE # 5 | |
ENCODING_MASK = ALPHABET.length - 1 # 0x1f | |
DECODING_MASK = 0xff | |
def self.encode(str) | |
str.bytes.each_slice(CHUNK_LENGTH).map do |chunk| | |
bits_in_chunk = chunk.length * BITS_PER_BYTE | |
number_of_characters = (bits_in_chunk * CHARS_PER_CHUNK / BITS_PER_CHUNK.to_f).ceil | |
padding = bits_in_chunk < BITS_PER_CHUNK ? BITS_PER_CHAR - bits_in_chunk % BITS_PER_CHAR : 0 | |
buf = 0 | |
chunk.each do |byte| | |
buf = (buf << BITS_PER_BYTE) + byte | |
end | |
buf <<= padding | |
encoded = Array.new(CHARS_PER_CHUNK) | |
j = number_of_characters - 1 | |
number_of_characters.times do |i| | |
encoded[j] = ALPHABET[(buf >> BITS_PER_CHAR * i) & ENCODING_MASK] | |
j -= 1 | |
end | |
(CHARS_PER_CHUNK - number_of_characters).times do |i| | |
encoded[number_of_characters + i] = PADDING_CHAR | |
end | |
encoded | |
end.join | |
end | |
def self.decode(str) | |
str = str.delete(PADDING_CHAR) | |
bytes = str.each_char.map { |char| ALPHABET.index(char) } | |
bytes.each_slice(CHARS_PER_CHUNK).map do |chunk| | |
number_of_original_bytes = (chunk.length * BITS_PER_CHAR / BITS_PER_BYTE.to_f).floor | |
padding = chunk.length < CHARS_PER_CHUNK ? BITS_PER_CHAR - (number_of_original_bytes * BITS_PER_BYTE) % BITS_PER_CHAR : 0 | |
buf = 0 | |
chunk.each do |byte| | |
buf = (buf << BITS_PER_CHAR) + byte | |
end | |
buf >>= padding | |
decoded = Array.new(number_of_original_bytes) | |
j = decoded.length - 1 | |
number_of_original_bytes.times do |i| | |
decoded[i] = ((buf >> BITS_PER_BYTE * j) & DECODING_MASK).chr | |
j -= 1 | |
end | |
decoded | |
end.join | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
great write-up. i am finally starting to understand the padding "magic". thanks. fyi: i tried a gem with base32 alphabets some time ago, see here https://github.com/rubycocos/blockchain/tree/master/base32-alphabets greetings from austria.