Last active
January 8, 2025 00:43
-
-
Save satsen/5e7bcc38565ad193cf7d906a856f804e to your computer and use it in GitHub Desktop.
VLQ Reader and Writer for Java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.io.InputStream; | |
/** | |
* @see <a href="https://en.wikipedia.org/wiki/Variable-length_quantity">Variable-length quantity on Wikipedia</a> | |
* @see <a href="https://github.com/protocolbuffers/protobuf/blob/main/java/core/src/main/java/com/google/protobuf/CodedInputStream.java">protobuf/CodedInputStream.java</a> | |
*/ | |
public class VLQReader { | |
private VLQReader() {} | |
/** | |
* @apiNote Uses VLQ then ZigZag decoding. | |
*/ | |
public static short readShort(InputStream in) throws IOException { | |
return (short) decodeZigZagInt((int) readULong(in)); | |
} | |
public static int readUShort(InputStream in) throws IOException { | |
int value = (int) readULong(in); | |
if (value < 0 || value > 0xFFFF) | |
throw new IllegalArgumentException(value + " is out of unsigned short range"); | |
return value; | |
} | |
/** | |
* @apiNote Uses VLQ then ZigZag decoding. | |
*/ | |
public static int readInt(InputStream in) throws IOException { | |
return decodeZigZagInt((int) readULong(in)); | |
} | |
public static long readUInt(InputStream in) throws IOException { | |
long value = readULong(in); | |
if (value < 0 || value > 0xFFFFFFFFL) | |
throw new IllegalArgumentException(value + " is out of unsigned int range"); | |
return value; | |
} | |
/** | |
* @apiNote Uses VLQ then ZigZag decoding. | |
*/ | |
public static long readLong(InputStream in) throws IOException { | |
return decodeZigZagLong(readULong(in)); | |
} | |
/** | |
* Reads up to (but can be less than) 64 bits of VLQ-encoded value. | |
*/ | |
public static long readULong(InputStream in) throws IOException { | |
long result = 0; | |
for (int shift = 0; shift < 64; shift += 7) { | |
final byte b = (byte) in.read(); | |
result |= (long) (b & 0x7F) << shift; | |
if ((b & 0x80) == 0) { | |
return result; | |
} | |
} | |
throw new IllegalStateException("More bytes than needed found"); | |
} | |
/** | |
* Decodes a ZigZag-encoded integer (32 bits) | |
*/ | |
public static int decodeZigZagInt(int n) { | |
return (n >>> 1) ^ -(n & 1); | |
} | |
/** | |
* Decodes a ZigZag-encoded long (64 bits) | |
*/ | |
public static long decodeZigZagLong(long n) { | |
return (n >>> 1) ^ -(n & 1); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.io.OutputStream; | |
/** | |
* @see <a href="https://en.wikipedia.org/wiki/Variable-length_quantity">Variable-length quantity on Wikipedia</a> | |
* @see <a href="https://github.com/protocolbuffers/protobuf/blob/a85bbad37905e882f89973e9fa7836ff79d02024/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java">protobuf/CodedOutputStream.java</a> | |
*/ | |
public class VLQWriter { | |
private VLQWriter() {} | |
/** | |
* Writes a signed short encoded with ZigZag and VLQ. | |
* Both negative and positive values are supported, but due to ZigZag, encoding positive | |
* values is done less efficiently than by {@link #writeUShort}. | |
* Use {@link #writeUShort} to encode values that are always positive. | |
* | |
* @apiNote The resulting varint uses ZigZag encoding as well, which is more efficient at | |
* encoding negative values than pure VLQ. | |
* @param x signed int | |
*/ | |
public static void writeShort(OutputStream out, short x) throws IOException { | |
writeULong(out, encodeZigZagInt(x)); | |
} | |
/** | |
* @param x Unsigned short (0-0xFFFF) represented as int | |
* @throws IllegalArgumentException for values not in unsigned short range | |
*/ | |
public static void writeUShort(OutputStream out, int x) throws IOException { | |
if (x < 0 || x > 0xFFFF) | |
throw new IllegalArgumentException(x + " is out of unsigned short range"); | |
writeUInt(out, x); | |
} | |
/** | |
* Writes a signed int encoded with ZigZag and VLQ. | |
* Both negative and positive values are supported, but due to ZigZag, encoding positive | |
* values is done less efficiently than by {@link #writeUInt}. | |
* Use {@link #writeUInt} to encode values that are positive. | |
* | |
* @apiNote The resulting varint uses ZigZag encoding as well, which is more efficient at | |
* encoding negative values than pure VLQ. | |
*/ | |
public static void writeInt(OutputStream out, int x) throws IOException { | |
writeULong(out, encodeZigZagInt(x)); | |
} | |
/** | |
* Writes an unsigned int encoded with VLQ. | |
* Only positive values are supported. Use {@link #writeInt} | |
* to encode negative and positive values. | |
* | |
* @param x Unsigned int (0-0xFFFFFFFF) represented as long | |
* @throws IllegalArgumentException for values not in unsigned int range | |
*/ | |
public static void writeUInt(OutputStream out, long x) throws IOException { | |
if (x < 0 || x > 0xFFFFFFFFL) | |
throw new IllegalArgumentException(x + " is out of unsigned int range"); | |
writeULong(out, x); | |
} | |
/** | |
* Writes a signed long encoded with VLQ and ZigZag. | |
* Both negative and positive values are supported, but due to ZigZag, encoding positive | |
* values is done less efficiently than by {@link #writeULong}. | |
* Use {@link #writeULong} to encode values that are positive. | |
* | |
* @apiNote The resulting varint uses ZigZag encoding as well, which is more efficient at | |
* encoding negative values than pure VLQ. | |
*/ | |
public static void writeLong(OutputStream out, long x) throws IOException { | |
writeULong(out, encodeZigZagLong(x)); | |
} | |
/** | |
* Writes an unsigned long value encoded with VLQ. | |
* Both negative and positive values are supported, but only positive values are encoded | |
* efficiently, negative values are taking a toll and use six bytes. Use {@link #writeLong} | |
* to encode negative and positive values. | |
* | |
* @apiNote Don't use it for negative values, the resulting varint is always ten | |
* bytes long – it is, effectively, treated like a very large unsigned integer. | |
* If you use {@link #writeLong}, the resulting varint uses ZigZag encoding, | |
* which is much more efficient. | |
*/ | |
public static void writeULong(OutputStream out, long x) throws IOException { | |
byte[] buffer = new byte[10]; | |
int position = 0; | |
long value = x; | |
while (true) { | |
if ((value & ~0x7FL) == 0) { | |
buffer[position++] = (byte) value; | |
out.write(buffer, 0, position); | |
return; | |
} else { | |
buffer[position++] = (byte) (((int) value & 0x7F) | 0x80); | |
value >>>= 7; | |
} | |
} | |
} | |
public static int encodeZigZagInt(final int n) { | |
// Note: the right-shift must be arithmetic | |
return (n << 1) ^ (n >> 31); | |
} | |
public static long encodeZigZagLong(final long n) { | |
// Note: the right-shift must be arithmetic | |
return (n << 1) ^ (n >> 63); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment