Skip to content

Instantly share code, notes, and snippets.

@quux00
Created October 19, 2014 15:52
Show Gist options
  • Save quux00/8882422fcf40c7adbf1b to your computer and use it in GitHub Desktop.
Save quux00/8882422fcf40c7adbf1b to your computer and use it in GitHub Desktop.
Microbenchmarks of four ways to tokenize a Java string
package mechsympathy;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import com.google.common.base.Splitter;
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 4, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(2)
public class JavaStringSplits {
@State(Scope.Benchmark)
public static class StringState {
String[] lines = new String[]{
"0000 12 34 5 666 77 88888 99",
"aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx yy zz",
"Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod"
};
}
@Benchmark
public static void javaSplit(StringState ss, Blackhole bh) {
for (String ln: ss.lines) {
String[] toks = ln.split(" ");
bh.consume(toks);
}
}
@Benchmark
public static void indexOf(StringState ss, Blackhole bh) {
for (String ln: ss.lines) {
List<String> ls = new ArrayList<String>();
int pos = 0;
int idx = 0;
while (true) {
idx = ln.indexOf(' ', pos);
if (idx < 0) {
ls.add(ln.substring(pos, ln.length()));
break;
}
ls.add(ln.substring(pos, idx));
pos = idx + 1;
}
bh.consume(ls);
}
}
@Benchmark
public static void stringTokenizer(StringState ss, Blackhole bh) {
for (String ln: ss.lines) {
List<String> ls = new ArrayList<String>();
StringTokenizer st = new StringTokenizer(ln, " ");
while (st.hasMoreTokens()) {
ls.add(st.nextToken());
}
bh.consume(ls);
}
}
@Benchmark
public static void guavaSplit(StringState ss, Blackhole bh) {
for (String ln: ss.lines) {
List<String> ls = new ArrayList<String>();
Splitter split = Splitter.on(" ");
Iterator<String> it = split.split(ln).iterator();
while (it.hasNext()) {
ls.add(it.next());
}
bh.consume(ls);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment