Skip to content

Instantly share code, notes, and snippets.

@nitsanw
Last active December 11, 2015 10:38
Show Gist options
  • Save nitsanw/4587927 to your computer and use it in GitHub Desktop.
Save nitsanw/4587927 to your computer and use it in GitHub Desktop.
Hand rolled benchmarking for UTF8 encoding
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.util.ArrayList;
public class StringEncodingTest {
private static void error() {
System.err.println("(bytebuffer|string|chars|custom) (once|reuse) (buffer|array|bytebuffer) (input strings)");
System.exit(1);
}
private static enum OutputMode {
ARRAY, REUSE_BUFFER, NEW_BYTEBUFFER,
}
public static void main(String[] args) throws IOException {
if (args.length != 4) {
error();
return;
}
byte[] destination = new byte[4096];
UTF8Encoder encoder;
if (args[0].equals("bytebuffer")) {
encoder = new DirectEncoder(destination);
} else if (args[0].equals("string")) {
encoder = new StringEncoder(destination);
} else if (args[0].equals("string2")) {
encoder = new StringEncoder2(destination);
} else if (args[0].equals("chars")) {
encoder = new CharBufferCopyEncoder(destination);
} else if (args[0].equals("custom")) {
encoder = new CustomEncoder(destination);
} else {
error();
return;
}
boolean reuseEncoder = true;
if (args[1].equals("once")) {
reuseEncoder = false;
} else if (!args[1].equals("reuse")) {
error();
return;
}
OutputMode outputMode;
if (args[2].equals("array")) {
outputMode = OutputMode.ARRAY;
} else if (args[2].equals("buffer")) {
outputMode = OutputMode.REUSE_BUFFER;
} else if (args[2].equals("bytebuffer")) {
outputMode = OutputMode.NEW_BYTEBUFFER;
} else {
error();
return;
}
ArrayList<String> strings = new ArrayList<String>();
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[3]), "UTF-8"));
String line;
while ((line = reader.readLine()) != null) {
strings.add(line);
}
// ~ final int ITERATIONS = 5000000;
// ~ final int ITERATIONS = 1000000;
// ~ final int ITERATIONS = 10000;
final int ITERATIONS = 1000;
for (int j = 0; j < 50; ++j) {
long start = System.nanoTime();
testLoop(destination, encoder, reuseEncoder, outputMode, strings,
ITERATIONS);
long end = System.nanoTime();
System.out.println(((double) end - start) / 1000000. + " millis");
System.gc();
}
}
private static void testLoop(byte[] destination, UTF8Encoder encoder,
boolean reuseEncoder, OutputMode outputMode,
ArrayList<String> strings, final int ITERATIONS)
throws UnsupportedEncodingException {
for (int i = 0; i < ITERATIONS; ++i) {
encodeLoop(destination, encoder, reuseEncoder, outputMode, strings);
}
}
private static void encodeLoop(byte[] destination, UTF8Encoder encoder,
boolean reuseEncoder, OutputMode outputMode, ArrayList<String> strings) throws UnsupportedEncodingException {
for (String value : strings) {
UTF8Encoder temp = encoder;
if (!reuseEncoder) {
temp = encoder.newInstance();
}
if (outputMode == OutputMode.REUSE_BUFFER) {
int bytes = temp.encode(value);
assert new String(destination, 0, bytes, "UTF-8").equals(value);
} else if (outputMode == OutputMode.ARRAY) {
byte[] out = temp.encodeToArray(value);
assert new String(out, "UTF-8").equals(value);
} else {
assert outputMode == OutputMode.NEW_BYTEBUFFER;
ByteBuffer out = temp.encodeToNewBuffer(value);
assert new String(out.array(), 0, out.remaining(), "UTF-8")
.equals(value);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment