Created
October 30, 2024 22:01
-
-
Save astromechza/51d49b0a6f5e900eb423328f2332c152 to your computer and use it in GitHub Desktop.
A single file Crockford Base32 implementation with generic bitPump that can be easily converted to other bit sizes. Unit tested along with fuzzing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package uid | |
import ( | |
"bytes" | |
"errors" | |
"fmt" | |
"io" | |
"strings" | |
) | |
// bitPump is the core of the bit encoder and decoder. It reads bytes from the src, decodes them into integer chunks and | |
// adds them to the buffer. Whenever the buffer has enough content for an output chunk, we read it, convert it to a byte | |
// and then write it. | |
func bitPump(src io.ByteReader, srcDecoder map[byte]int, srcChunkSize int, dropExtra bool, dstChunkSize int, dstEncoder map[int]byte, dst io.ByteWriter) (int64, error) { | |
// Setup some initial assignments | |
rem, remBits, i, b, ok, written, err := 0, 0, 0, byte(0), false, int64(0), error(nil) | |
// Now loop through the main body of the src stream. Reading bytes to fill in the buffer until we have enough for an output chunk. | |
for { | |
// If we have enough bits for an output chunk, let's produce one. | |
if remBits >= dstChunkSize { | |
// extract a dst chunk | |
remBits -= dstChunkSize | |
i = rem >> remBits | |
rem = rem & ((1 << remBits) - 1) | |
// encode it by converting the chunk to an output byte, if no alphabet is defined then just cast it. | |
if dstEncoder != nil { | |
if b, ok = dstEncoder[i]; !ok { | |
return written, fmt.Errorf("unknown dst alphabet: %v", b) | |
} | |
} else { | |
b = byte(i) | |
} | |
// write the byte | |
if err = dst.WriteByte(b); err != nil { | |
return written, err | |
} | |
written += 1 | |
} else { | |
// if we don't have enough data, read a chunk from the input stream | |
if b, err = src.ReadByte(); err != nil { | |
// If we have an EOF here, then we have to break, because we know we don't have enough | |
// for a complete chunk. | |
if errors.Is(err, io.EOF) { | |
break | |
} | |
return written, err | |
} else { | |
// now convert it into it's integer chunk or just cast it if no decoder is specified | |
if srcDecoder != nil { | |
i, ok = srcDecoder[b] | |
if !ok { | |
return written, fmt.Errorf("unknown src alphabet: %v", b) | |
} | |
} else { | |
i = int(b) | |
} | |
// add it to the buffer | |
rem = (rem << srcChunkSize) | i | |
remBits += srcChunkSize | |
} | |
} | |
} | |
// Now if we are left with some bits in the buffer, we either need to pad them with 0's in order to produce enough | |
// content, or we must just drop it if it's nil data (usually during decoding). | |
if remBits > 0 && (!dropExtra || rem > 0) { | |
// read the chunk | |
i = rem << (dstChunkSize - remBits) | |
// convert it to an output byte using the encoder or cast | |
if dstEncoder != nil { | |
b, ok = dstEncoder[i] | |
if !ok { | |
return written, fmt.Errorf("unknown dst alphabet: %v", b) | |
} | |
} else { | |
b = byte(i) | |
} | |
if err = dst.WriteByte(b); err != nil { | |
return written, err | |
} | |
written += 1 | |
} | |
return written, nil | |
} | |
var b32Encoding = map[int]byte{ | |
0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', | |
8: '8', 9: '9', 10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', | |
16: 'G', 17: 'H', 18: 'J', 19: 'K', 20: 'M', 21: 'N', 22: 'P', 23: 'Q', | |
24: 'R', 25: 'S', 26: 'T', 27: 'V', 28: 'W', 29: 'X', 30: 'Y', 31: 'Z', | |
} | |
var b32Decoding map[byte]int | |
func init() { | |
b32Decoding = make(map[byte]int, len(b32Encoding)) | |
b32Decoding['O'] = 0 | |
b32Decoding['I'] = 1 | |
b32Decoding['L'] = 1 | |
for i, b := range b32Encoding { | |
b32Decoding[b] = i | |
if b >= 'A' && b <= 'Z' { | |
b32Decoding[b+32] = i | |
} | |
} | |
} | |
func EncodeB32(dst io.ByteWriter, src io.ByteReader) (written int64, err error) { | |
return bitPump(src, nil, 8, false, 5, b32Encoding, dst) | |
} | |
func DecodeB32(dst io.ByteWriter, src io.ByteReader) (written int64, err error) { | |
return bitPump(src, b32Decoding, 5, true, 8, nil, dst) | |
} | |
func EncodeB32String(in []byte) (string, error) { | |
sb := bytes.NewBuffer(make([]byte, 0, len(in)*2)) | |
if _, err := EncodeB32(sb, bytes.NewReader(in)); err != nil { | |
return "", err | |
} | |
return sb.String(), nil | |
} | |
func DecodeB32String(in string) ([]byte, error) { | |
sb := bytes.NewBuffer(make([]byte, 0, len(in))) | |
if _, err := DecodeB32(sb, strings.NewReader(in)); err != nil { | |
return nil, err | |
} | |
return sb.Bytes(), nil | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package uid | |
import ( | |
"bytes" | |
"strings" | |
"testing" | |
"github.com/astromechza/memory-mouse/internal/testsupport" | |
) | |
func roundTripTest(t *testing.T, name, in, out string) { | |
t.Run(name, func(t *testing.T) { | |
sb := new(bytes.Buffer) | |
t.Run("encode", func(t *testing.T) { | |
n, err := EncodeB32(sb, strings.NewReader(in)) | |
if testsupport.AssertEqual(t, err, nil) { | |
testsupport.AssertEqual(t, n, int64(sb.Len())) | |
testsupport.AssertEqual(t, sb.String(), out) | |
} | |
o2, err := EncodeB32String([]byte(in)) | |
if testsupport.AssertEqual(t, err, nil) { | |
testsupport.AssertEqual(t, o2, out) | |
} | |
}) | |
t.Run("decode", func(t *testing.T) { | |
sb.Reset() | |
n, err := DecodeB32(sb, strings.NewReader(out)) | |
if testsupport.AssertEqual(t, err, nil) { | |
testsupport.AssertEqual(t, n, int64(sb.Len())) | |
testsupport.AssertEqual(t, sb.String(), in) | |
} | |
o2, err := DecodeB32String(out) | |
if testsupport.AssertEqual(t, err, nil) { | |
testsupport.AssertEqual(t, string(o2), in) | |
} | |
}) | |
}) | |
} | |
func TestEncodeDecodeB32(t *testing.T) { | |
roundTripTest(t, "empty", "", "") | |
roundTripTest(t, "1byte", "a", "C4") | |
roundTripTest(t, "2byte", "ab", "C5H0") | |
roundTripTest(t, "3byte", "abc", "C5H66") | |
roundTripTest(t, "4byte", "abcd", "C5H66S0") | |
} | |
func FuzzEncodeDecodeB32(f *testing.F) { | |
for _, s := range []string{"", "a", "zzzzz", ".", " ", "\n", "🎃", "\x00\xff", strings.Repeat("x", 100)} { | |
f.Add(s) | |
} | |
f.Fuzz(func(t *testing.T, in string) { | |
sb := new(bytes.Buffer) | |
n, err := EncodeB32(sb, strings.NewReader(in)) | |
if testsupport.AssertEqual(t, err, nil) { | |
testsupport.AssertEqual(t, n, int64(sb.Len())) | |
} | |
out := sb.String() | |
sb.Reset() | |
n, err = DecodeB32(sb, strings.NewReader(out)) | |
if testsupport.AssertEqual(t, err, nil) { | |
testsupport.AssertEqual(t, n, int64(sb.Len())) | |
testsupport.AssertEqual(t, sb.String(), in) | |
} | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment