Skip to content

Instantly share code, notes, and snippets.

@sergeych
Last active December 27, 2015 08:09
Show Gist options
  • Save sergeych/7294341 to your computer and use it in GitHub Desktop.
Save sergeych/7294341 to your computer and use it in GitHub Desktop.
Effectively encodes/decodes binary data to the text suitable to use as urls and file names, unlike Base64 which is case-sensitive
package net.sergeych.utils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
/**
* Binary to text encoder suitable to use encoded strings as any parts of urls
* and parts of file names safely even on case-insensitive systems. Human
* friendly, easy to retype from print or from voice, treats confusing
* characters (like No filling characters, byte granularity, minimal overhead
* for base 32 family. Ideal for keys, ids and so on.
*
* <p>
* Based on the Crockford's base32 alphabet "
* <code>0123456789ABCDEFGHJKMNPQRSTVWXYZ</code>" and provides more aliases to
* reduce errors when the encoded data are entered by a human:
* <code>"lLiI1", "oO0",
* "uUvV"</code> are all the same while decoding. <b>Hyphens are ignored<b>
* while decoding and can be inserted to make it more readable.
*
* <p>
* See also <a href='http://www.crockford.com/wrmg/base32.html'>Douglas
* Crockford base32</a> algorithm.
*
* @author sergeych ([email protected])
* @license MIT license.
*/
public class NameCode32 {
static private final char[] abc;
static private final HashMap<Character, Integer> inAbc;
static private final String abcString;
static {
abcString = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
abc = abcString.toCharArray();
inAbc = new HashMap<>();
for (int i = 0; i < 32; i++) {
char c = abc[i];
inAbc.put(c, i);
char cl = Character.toLowerCase(c);
if (c != cl)
inAbc.put(cl, i);
}
for (char c : "iIlL".toCharArray())
inAbc.put(c, 1);
for (char c : "oOоО".toCharArray())
inAbc.put(c, 0);
for (char c : "уУ".toCharArray())
inAbc.put(c, 30);
for (char c : "uU".toCharArray())
inAbc.put(c, 27);
}
/**
* Decode encoded string back into byte[].
*
* @param src
* encoded string
* @return byte[] result
*/
public static byte[] decode(String src) {
List<Byte> res = decodeList(src);
byte bres[] = new byte[res.size()];
int cnt = 0;
for (byte x : res)
bres[cnt++] = x;
return bres;
}
/**
* Decode encoded string back as a List<Byte>. Less overhead than
* {@link #decode(String)}.
*
* @param src
* encoded string
* @return List<Byte> result
*/
public static List<Byte> decodeList(String src) {
try {
ArrayList<Byte> res = new ArrayList<>();
int acc = 0;
int bits = 0;
for (char c : src.toCharArray()) {
if (c != '-') {
acc |= (inAbc.get(c) << bits);
bits += 5;
if (bits >= 8) {
res.add((byte) (acc & 0xFF));
acc >>>= 8;
bits -= 8;
}
}
}
if (bits >= 8)
res.add((byte) (acc & 0xFF));
return res;
} catch (NullPointerException x) {
throw new IllegalArgumentException("Invalid encoded string: " + src);
}
}
/**
* Encode binary data into sergeych's base32 string.
*
* @param data
* source data. Can be empty but can not be null.
* @return encoded string
*/
static public String encode(byte[] data) {
if (data.length == 0)
return "";
StringBuilder b = new StringBuilder();
int acc = 0;
int bits = 0;
int count = 0;
while (count < data.length) {
int x = (data[count++] + 256) & 0xff;
acc |= (x << bits);
bits += 8;
while (bits >= 5) {
b.append(abc[acc & 0x1F]);
bits -= 5;
acc >>>= 5;
}
}
if (bits > 0)
b.append(abc[acc & 0x1f]);
return b.toString();
}
/**
* Return the encoding alphabet (no aliases included).
*
* @return alhabet as string, e.g. first character has weight of 0, second 1
* and so on.
*/
public static String getAlphabet() {
return abcString;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment