Last active
December 27, 2015 08:09
-
-
Save sergeych/7294341 to your computer and use it in GitHub Desktop.
Effectively encodes/decodes binary data to the text suitable to use as urls and file names, unlike Base64 which is case-sensitive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.sergeych.utils; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
/** | |
* Binary to text encoder suitable to use encoded strings as any parts of urls | |
* and parts of file names safely even on case-insensitive systems. Human | |
* friendly, easy to retype from print or from voice, treats confusing | |
* characters (like No filling characters, byte granularity, minimal overhead | |
* for base 32 family. Ideal for keys, ids and so on. | |
* | |
* <p> | |
* Based on the Crockford's base32 alphabet " | |
* <code>0123456789ABCDEFGHJKMNPQRSTVWXYZ</code>" and provides more aliases to | |
* reduce errors when the encoded data are entered by a human: | |
* <code>"lLiI1", "oO0", | |
* "uUvV"</code> are all the same while decoding. <b>Hyphens are ignored<b> | |
* while decoding and can be inserted to make it more readable. | |
* | |
* <p> | |
* See also <a href='http://www.crockford.com/wrmg/base32.html'>Douglas | |
* Crockford base32</a> algorithm. | |
* | |
* @author sergeych ([email protected]) | |
* @license MIT license. | |
*/ | |
public class NameCode32 { | |
static private final char[] abc; | |
static private final HashMap<Character, Integer> inAbc; | |
static private final String abcString; | |
static { | |
abcString = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"; | |
abc = abcString.toCharArray(); | |
inAbc = new HashMap<>(); | |
for (int i = 0; i < 32; i++) { | |
char c = abc[i]; | |
inAbc.put(c, i); | |
char cl = Character.toLowerCase(c); | |
if (c != cl) | |
inAbc.put(cl, i); | |
} | |
for (char c : "iIlL".toCharArray()) | |
inAbc.put(c, 1); | |
for (char c : "oOоО".toCharArray()) | |
inAbc.put(c, 0); | |
for (char c : "уУ".toCharArray()) | |
inAbc.put(c, 30); | |
for (char c : "uU".toCharArray()) | |
inAbc.put(c, 27); | |
} | |
/** | |
* Decode encoded string back into byte[]. | |
* | |
* @param src | |
* encoded string | |
* @return byte[] result | |
*/ | |
public static byte[] decode(String src) { | |
List<Byte> res = decodeList(src); | |
byte bres[] = new byte[res.size()]; | |
int cnt = 0; | |
for (byte x : res) | |
bres[cnt++] = x; | |
return bres; | |
} | |
/** | |
* Decode encoded string back as a List<Byte>. Less overhead than | |
* {@link #decode(String)}. | |
* | |
* @param src | |
* encoded string | |
* @return List<Byte> result | |
*/ | |
public static List<Byte> decodeList(String src) { | |
try { | |
ArrayList<Byte> res = new ArrayList<>(); | |
int acc = 0; | |
int bits = 0; | |
for (char c : src.toCharArray()) { | |
if (c != '-') { | |
acc |= (inAbc.get(c) << bits); | |
bits += 5; | |
if (bits >= 8) { | |
res.add((byte) (acc & 0xFF)); | |
acc >>>= 8; | |
bits -= 8; | |
} | |
} | |
} | |
if (bits >= 8) | |
res.add((byte) (acc & 0xFF)); | |
return res; | |
} catch (NullPointerException x) { | |
throw new IllegalArgumentException("Invalid encoded string: " + src); | |
} | |
} | |
/** | |
* Encode binary data into sergeych's base32 string. | |
* | |
* @param data | |
* source data. Can be empty but can not be null. | |
* @return encoded string | |
*/ | |
static public String encode(byte[] data) { | |
if (data.length == 0) | |
return ""; | |
StringBuilder b = new StringBuilder(); | |
int acc = 0; | |
int bits = 0; | |
int count = 0; | |
while (count < data.length) { | |
int x = (data[count++] + 256) & 0xff; | |
acc |= (x << bits); | |
bits += 8; | |
while (bits >= 5) { | |
b.append(abc[acc & 0x1F]); | |
bits -= 5; | |
acc >>>= 5; | |
} | |
} | |
if (bits > 0) | |
b.append(abc[acc & 0x1f]); | |
return b.toString(); | |
} | |
/** | |
* Return the encoding alphabet (no aliases included). | |
* | |
* @return alhabet as string, e.g. first character has weight of 0, second 1 | |
* and so on. | |
*/ | |
public static String getAlphabet() { | |
return abcString; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment