Created
October 19, 2014 15:52
-
-
Save quux00/8882422fcf40c7adbf1b to your computer and use it in GitHub Desktop.
Microbenchmarks of four ways to tokenize a Java string
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package mechsympathy; | |
import java.util.ArrayList; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.StringTokenizer; | |
import java.util.concurrent.TimeUnit; | |
import org.openjdk.jmh.annotations.Benchmark; | |
import org.openjdk.jmh.annotations.Fork; | |
import org.openjdk.jmh.annotations.Measurement; | |
import org.openjdk.jmh.annotations.OutputTimeUnit; | |
import org.openjdk.jmh.annotations.Scope; | |
import org.openjdk.jmh.annotations.State; | |
import org.openjdk.jmh.annotations.Warmup; | |
import org.openjdk.jmh.infra.Blackhole; | |
import com.google.common.base.Splitter; | |
@OutputTimeUnit(TimeUnit.MILLISECONDS) | |
@Warmup(iterations = 4, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Fork(2) | |
public class JavaStringSplits { | |
@State(Scope.Benchmark) | |
public static class StringState { | |
String[] lines = new String[]{ | |
"0000 12 34 5 666 77 88888 99", | |
"aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx yy zz", | |
"Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod" | |
}; | |
} | |
@Benchmark | |
public static void javaSplit(StringState ss, Blackhole bh) { | |
for (String ln: ss.lines) { | |
String[] toks = ln.split(" "); | |
bh.consume(toks); | |
} | |
} | |
@Benchmark | |
public static void indexOf(StringState ss, Blackhole bh) { | |
for (String ln: ss.lines) { | |
List<String> ls = new ArrayList<String>(); | |
int pos = 0; | |
int idx = 0; | |
while (true) { | |
idx = ln.indexOf(' ', pos); | |
if (idx < 0) { | |
ls.add(ln.substring(pos, ln.length())); | |
break; | |
} | |
ls.add(ln.substring(pos, idx)); | |
pos = idx + 1; | |
} | |
bh.consume(ls); | |
} | |
} | |
@Benchmark | |
public static void stringTokenizer(StringState ss, Blackhole bh) { | |
for (String ln: ss.lines) { | |
List<String> ls = new ArrayList<String>(); | |
StringTokenizer st = new StringTokenizer(ln, " "); | |
while (st.hasMoreTokens()) { | |
ls.add(st.nextToken()); | |
} | |
bh.consume(ls); | |
} | |
} | |
@Benchmark | |
public static void guavaSplit(StringState ss, Blackhole bh) { | |
for (String ln: ss.lines) { | |
List<String> ls = new ArrayList<String>(); | |
Splitter split = Splitter.on(" "); | |
Iterator<String> it = split.split(ln).iterator(); | |
while (it.hasNext()) { | |
ls.add(it.next()); | |
} | |
bh.consume(ls); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment