Skip to content

Instantly share code, notes, and snippets.

@EmilHernvall
Last active February 24, 2021 06:24
Show Gist options
  • Save EmilHernvall/953733 to your computer and use it in GitHub Desktop.
Save EmilHernvall/953733 to your computer and use it in GitHub Desktop.
Simple base64-encoder for java
public class Base64
{
public static String encode(byte[] data)
{
char[] tbl = {
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' };
StringBuilder buffer = new StringBuilder();
int pad = 0;
for (int i = 0; i < data.length; i += 3) {
int b = ((data[i] & 0xFF) << 16) & 0xFFFFFF;
if (i + 1 < data.length) {
b |= (data[i+1] & 0xFF) << 8;
} else {
pad++;
}
if (i + 2 < data.length) {
b |= (data[i+2] & 0xFF);
} else {
pad++;
}
for (int j = 0; j < 4 - pad; j++) {
int c = (b & 0xFC0000) >> 18;
buffer.append(tbl[c]);
b <<= 6;
}
}
for (int j = 0; j < pad; j++) {
buffer.append("=");
}
return buffer.toString();
}
public static byte[] decode(String data)
{
int[] tbl = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54,
55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
byte[] bytes = data.getBytes();
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (int i = 0; i < bytes.length; ) {
int b = 0;
if (tbl[bytes[i]] != -1) {
b = (tbl[bytes[i]] & 0xFF) << 18;
}
// skip unknown characters
else {
i++;
continue;
}
int num = 0;
if (i + 1 < bytes.length && tbl[bytes[i+1]] != -1) {
b = b | ((tbl[bytes[i+1]] & 0xFF) << 12);
num++;
}
if (i + 2 < bytes.length && tbl[bytes[i+2]] != -1) {
b = b | ((tbl[bytes[i+2]] & 0xFF) << 6);
num++;
}
if (i + 3 < bytes.length && tbl[bytes[i+3]] != -1) {
b = b | (tbl[bytes[i+3]] & 0xFF);
num++;
}
while (num > 0) {
int c = (b & 0xFF0000) >> 16;
buffer.write((char)c);
b <<= 8;
num--;
}
i += 4;
}
return buffer.toByteArray();
}
}
import java.io.ByteArrayOutputStream;
import java.util.Random;
import java.util.Arrays;
import javax.xml.bind.DatatypeConverter;
public class Base64Test
{
public static void print(byte[] bytes)
{
for (byte b : bytes) {
System.out.printf("%02X ", b);
}
System.out.println();
}
public static void main(String[] args)
{
int steps = 1000000;
Random rand = new Random(System.currentTimeMillis());
System.out.println("Encode, decode");
for (int count = 0; count < steps; count++) {
int len = rand.nextInt(100)+1;
byte[] original = new byte[len];
for (int i = 0; i < len; i++) {
original[i] = (byte)rand.nextInt(0xFF);
}
String encoded = Base64.encode(original);
byte[] decoded = Base64.decode(encoded);
if (!Arrays.equals(original, decoded)) {
print(original);
print(decoded);
System.out.println();
}
}
System.out.println("Encode, decode with other");
for (int count = 0; count < steps; count++) {
int len = rand.nextInt(100)+1;
byte[] original = new byte[len];
for (int i = 0; i < len; i++) {
original[i] = (byte)rand.nextInt(0xFF);
}
String encoded = Base64.encode(original);
byte[] decoded = DatatypeConverter.parseBase64Binary(encoded);
if (!Arrays.equals(original, decoded)) {
print(original);
print(decoded);
System.out.println();
}
}
System.out.println("Encode with other, decode");
for (int count = 0; count < steps; count++) {
int len = rand.nextInt(100)+1;
byte[] original = new byte[len];
for (int i = 0; i < len; i++) {
original[i] = (byte)rand.nextInt(0xFF);
}
String encoded = DatatypeConverter.printBase64Binary(original);
byte[] decoded = Base64.decode(encoded);
if (!Arrays.equals(original, decoded)) {
print(original);
print(decoded);
System.out.println();
}
}
System.out.println("Comparison test");
for (int count = 0; count < 100000; count++) {
int len = rand.nextInt(100)+1;
byte[] original = new byte[len];
for (int i = 0; i < len; i++) {
original[i] = (byte)rand.nextInt(0xFF);
}
String encoded = DatatypeConverter.printBase64Binary(original);
String encoded2 = Base64.encode(original);
if (!encoded.equals(encoded2)) {
System.out.println("mismatch");
System.out.println(encoded);
System.out.println(encoded2);
System.out.println();
}
}
}
}
# script to generate the reverse lookup table
s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
res = [(ord(c), i) for i,c in enumerate(s)]
lookup = dict(res)
f = []
for i in xrange(0,255):
if lookup.has_key(i):
n = str(lookup[i])
if len(n) == 1:
n = " " + n
f.append(" " + n)
else:
f.append(" " + str(-1))
print len(f)
print ",".join(f)
@EmilHernvall
Copy link
Author

Some of the loops could easily be unrolled, but I reckon that the JIT will do that automatically if needed.

@albx79
Copy link

albx79 commented Mar 6, 2013

Not working for me:

    byte[] original = new byte[]{0, 1, 2, 3, 64, 99, 127, -128, -77};
    String encoded = Base64.encode(original);
    assertThat(Base64.decode(encoded), equalTo(original));

java.lang.AssertionError:
Expected: [<0>, <1>, <2>, <3>, <64>, <99>, <127>, <-128>, <-77>]
got: [<0>, <1>, <2>, <3>, <64>, <99>, <127>, <-62>, <-128>, <-62>, <-77>]

@scizzr
Copy link

scizzr commented Jun 2, 2013

Your code is not encoding correctly for strings ending with a '0'. It's omitting the ending '0'. I noticed this when using your code to encode a string, and using PHP to decode it.

//Java
String input = "testing0";
System.out.println(String.format("str: '%s'", input));
System.out.println(String.format("enc: '%s'", encode(input.getBytes())));

//output
str: 'testing0'
enc: 'dGVzdGluZz='
//PHP
$input = 'dGVzdGluZz=';
echo(sprintf("enc: '%s'", $input));
echo(sprintf("dec: '%s'", base64_decode($input)));

//output
enc: 'dGVzdGluZz='
dec: 'testing'

Give it a shot at one of these sites:
http://ostermiller.org/calc/encode.html
http://www.base64online.com/
http://www.opinionatedgeek.com/dotnet/tools/base64decode/ (gives an error when trying to decode)

Here's some working code, with source provided in the annotations.
https://gist.github.com/scizzr/5695546

The only thing that I couldn't get working was negative bytes (-128, for example) but honestly, I don't think you even need them since String.getBytes(); returns positive values.

Testing:

String orig = new String(new byte[] { 1, 2, 4, 8, 16, 32, 64, 0 });
byte[] enc = encode(orig);
byte[] dec = decode(enc);
System.out.println(String.format("orig: %s %s\n" + "encr: %s %s\n" + "decr: %s %s", orig, Arrays.toString(orig.getBytes()), new String(enc), Arrays.toString(enc), new String(dec), Arrays.toString(dec)));

try {
    assertThat(orig, equalTo(new String(dec)));
    System.out.println("Equals");
} catch (AssertionError err) {
    System.out.println("Different");
}

@EmilHernvall
Copy link
Author

There was a rather bad bug in it, and it was a bit of a miracle that it ever worked at all. :) It's fixed now.

@andrewchambers
Copy link

There is still a bug in it, try and decode this string:
AAAAAAAAAAABAg==
python says:
base64.b64decode("AAAAAAAAAAABAg==")
'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02'
this one fails.

@EmilHernvall
Copy link
Author

@andrewchambers: Confirmed. :/ Seems to be more subtleties to this than I imagined. I've updated the code to work with your example.

@EmilHernvall
Copy link
Author

I've added a test which generates random bytes and uses the built-in java6 methods for comparison, which hopefully rules out further bugs. This whole thing is obviously of much less utility now that java ships with base64 support, but it might be of some use for legacy code.

@kimwooglae
Copy link

kimwooglae commented Jan 18, 2018

There are some bugs in decode method.

  1. length of tbl array is 255. add one more.
  2. java byte is signed value(-128 ~ 127). When invalid character is feeded, Exception occured. Some text editor add BOM character and it can cause that problem.
for (int i = 0; i < bytes.length;) {
   int b = 0;
   if (bytes[i] >= 0 && tbl[bytes[i]] != -1) {
      b = (tbl[bytes[i]] & 0xFF) << 18;
   }

@hrules6872
Copy link

@ZhouGongZaiShi
Copy link

ZhouGongZaiShi commented Aug 29, 2019

There are some bugs in decode method.

  1. length of tbl array is 255. add one more.
  2. java byte is signed value(-128 ~ 127). When invalid character is feeded, Exception occured. Some text editor add BOM character and it can cause that problem.
for (int i = 0; i < bytes.length;) {
   int b = 0;
   if (bytes[i] >= 0 && tbl[bytes[i]] != -1) {
      b = (tbl[bytes[i]] & 0xFF) << 18;
   }

@kimwooglae
May I ask you a question?
Why should the length of the tbl array be 256 instead of 255?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment