Last active
August 29, 2015 14:17
-
-
Save twillouer/e4180390dc503fe886d0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Fork(3) | |
@BenchmarkMode(Mode.Throughput) | |
@OutputTimeUnit(TimeUnit.NANOSECONDS) | |
@State(Scope.Benchmark) | |
public class ReplaceAllBench { | |
@Param({ "10", "100", "1000" }) | |
private int size; | |
String string; | |
@Setup | |
public void setup() throws Throwable | |
{ | |
string = randomAlphanumericString(size / 3) + "\n\r " + randomAlphanumericString(size / 3) + "\n " | |
+ randomAlphanumericString(size / 3); | |
} | |
static class Replacer { | |
private static final Pattern COMPILE = Pattern.compile("[\n\r]+ "); | |
static String unfold_original(String s) | |
{ | |
s = s.replaceAll("\n\r ", ""); | |
s = s.replaceAll("\r\n ", ""); | |
s = s.replaceAll("\n ", ""); | |
s = s.replaceAll("\r ", ""); | |
return s; | |
} | |
static String unfold_regexp(String s) | |
{ | |
s = s.replaceAll("\n\r |\r\n |\n |\r ", ""); | |
return s; | |
} | |
static String unfold_regexpcompiled(String s) | |
{ | |
s = COMPILE.matcher(s).replaceAll(""); | |
return s; | |
} | |
private enum NLStatus { | |
NONE, RC, NL, RC_NL, NL_RC; | |
} | |
/** | |
* Remove all the '\n' and ’\r' followed by a ' ' from a LDIF String. | |
* | |
* @param s The String to unfold | |
* @return The resulting String | |
*/ | |
protected static String unfold_optim_emmanuel_1(String s) | |
{ | |
int pos = 0; | |
char[] unfold = new char[s.length()]; | |
NLStatus newLine = NLStatus.NONE; | |
for (char c : s.toCharArray()) { | |
switch (c) { | |
case '\n': | |
switch (newLine) { | |
case NONE: | |
newLine = NLStatus.NL; | |
break; | |
case RC: | |
newLine = NLStatus.RC_NL; | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NL; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NL; | |
break; | |
} | |
break; | |
case '\r': | |
switch (newLine) { | |
case NONE: | |
newLine = NLStatus.RC; | |
break; | |
case NL: | |
newLine = NLStatus.NL_RC; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.RC; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.RC; | |
break; | |
} | |
break; | |
case ' ': | |
if (newLine == NLStatus.NONE) { | |
unfold[pos++] = c; | |
} else { | |
newLine = NLStatus.NONE; | |
} | |
break; | |
default: | |
switch (newLine) { | |
case NONE: | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NONE; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NONE; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NONE; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NONE; | |
break; | |
} | |
unfold[pos++] = c; | |
} | |
} | |
switch (newLine) { | |
case NONE: | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
break; | |
} | |
return new String(unfold, 0, pos); | |
} | |
private static final String[] TODO = { "\n\r ", "\r\n ", "\r ", "\n " }; | |
private static final String[] TO = { "", "", "", "" }; | |
protected static String unfold_with_stringutils_on_apache_common(final String string) | |
{ | |
return StringUtils.replaceEach(string, TODO, TO); | |
} | |
public static String unfold_olivier(String test) | |
{ | |
// Null -> null | |
if (test == null) { | |
return null; | |
} | |
// 0 or 1 char | |
if (test.length() < 2) | |
return test; | |
// 2 chars | |
if (test.length() == 2) { | |
if (test.charAt(1) == ' ') { | |
char c0 = test.charAt(0); | |
if (c0 == '\r' || c0 == '\n') { | |
return ""; | |
} | |
} | |
return test; | |
} | |
// More than 2 chars | |
char[] chars = test.toCharArray(); | |
char[] dest = new char[chars.length]; | |
int p = chars.length - 1; | |
int d = p; | |
while (p >= 2) { | |
char c = chars[p]; | |
// Not a space : keep as is | |
if (c != ' ') { | |
dest[d] = c; | |
p--; | |
d--; | |
} | |
// Space | |
else { | |
char c1 = chars[p - 1]; | |
// Previous char is special : investigate deeper | |
if (c1 == '\r' || c1 == '\n') { | |
p--; | |
char c2 = chars[p - 1]; | |
if ((c2 == '\r' || c2 == '\n') && c2 != c1) { | |
p--; | |
} | |
p--; | |
} | |
// It was just a space : keep it | |
else { | |
dest[d] = c; | |
p--; | |
d--; | |
} | |
} | |
} | |
// Keep the remaining chars as it (special cases already covered) | |
while (p >= 0) { | |
dest[d--] = chars[p--]; | |
} | |
return new String(dest, d + 1, chars.length - d - 1); | |
} | |
public static String unfold_olivier_optimise_par_emmanuel(String test) | |
{ | |
// Null -> null | |
if ( test == null ) | |
{ | |
return null; | |
} | |
char[] chars = test.toCharArray(); | |
// 0 or 1 char | |
if ( chars.length < 2 ) | |
{ | |
return test; | |
} | |
// 2 chars | |
if ( chars.length == 2 ) | |
{ | |
if ( chars[1] == ' ' ) | |
{ | |
if ( chars[0] == '\r' || chars[0] == '\n' ) | |
{ | |
return ""; | |
} | |
} | |
return test; | |
} | |
// More than 2 chars | |
int p = chars.length - 1; | |
int d = p; | |
while ( p >= 0 ) | |
{ | |
char c = chars[p]; | |
// Not a space : keep as is | |
if ( c != ' ' ) | |
{ | |
chars[d] = c; | |
p--; | |
d--; | |
} | |
// Space | |
else | |
{ | |
char c1 = chars[p - 1]; | |
// Previous char is special : investigate deeper | |
if ( c1 == '\r' ) | |
{ | |
if ( ( chars[p - 2] == '\n' ) ) | |
{ | |
p -= 3; | |
} | |
else | |
{ | |
p -= 2; | |
} | |
} | |
else if ( c1 == '\n' ) | |
{ | |
if ( ( chars[p - 2] == '\r' ) ) | |
{ | |
p -= 3; | |
} | |
else | |
{ | |
p -= 2; | |
} | |
} | |
// It was just a space : keep it | |
else | |
{ | |
chars[d] = c; | |
p--; | |
d--; | |
} | |
} | |
} | |
// Keep the remaining chars as it (special cases already covered) | |
while ( p >= 0 ) | |
{ | |
chars[d--] = chars[p--]; | |
} | |
return new String( chars, d + 1, chars.length - d - 1 ); | |
} | |
} | |
@Benchmark | |
public String unfold_original() | |
{ | |
return Replacer.unfold_original(string); | |
} | |
@Benchmark | |
public String unfold_regexp() | |
{ | |
return Replacer.unfold_regexp(string); | |
} | |
@Benchmark | |
public String unfold_regexpcompiled() | |
{ | |
return Replacer.unfold_regexpcompiled(string); | |
} | |
@Benchmark | |
public String unfold_optim_emmanuel_1() | |
{ | |
return Replacer.unfold_optim_emmanuel_1(string); | |
} | |
@Benchmark | |
public String unfold_with_stringutils_on_apache_common() | |
{ | |
return Replacer.unfold_with_stringutils_on_apache_common(string); | |
} | |
@Benchmark | |
public String unfold_unfold_olivier() | |
{ | |
return Replacer.unfold_olivier(string); | |
} | |
@Benchmark | |
public String unfold_olivier_optimise_par_emmanuel() | |
{ | |
return Replacer.unfold_olivier_optimise_par_emmanuel(string); | |
} | |
public static void main(String[] args) throws RunnerException, IOException | |
{ | |
Options opt = new OptionsBuilder().include(".*" + ReplaceAllBench.class.getSimpleName() + ".*") | |
.warmupIterations(20) | |
.warmupTime(TimeValue.seconds(1)) | |
.measurementIterations(20) | |
.timeUnit(TimeUnit.NANOSECONDS) | |
.forks(1) | |
// .addProfiler(LinuxPerfAsmProfiler.class) | |
.build(); | |
new Runner(opt).run(); | |
} | |
} |
En ne gardant que les algos corrects:
# Run complete. Total time: 00:14:30
Benchmark (size) Mode Samples Score Error Units
r.ReplaceAllBenchmark.unfold_all_regexp 10 thrpt 20 1467.862 ± 28.884 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 100 thrpt 20 342.911 ± 4.772 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 1000 thrpt 20 39.269 ± 0.936 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 10 thrpt 20 24729.376 ± 935.766 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 100 thrpt 20 7074.242 ± 53.409 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 1000 thrpt 20 809.072 ± 16.939 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 10 thrpt 20 36381.822 ± 263.571 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 100 thrpt 20 9391.023 ± 60.022 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 1000 thrpt 20 720.513 ± 5.490 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 10 thrpt 20 36553.386 ± 367.102 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 100 thrpt 20 9565.954 ± 142.565 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 1000 thrpt 20 877.977 ± 11.356 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 10 thrpt 20 32364.978 ± 706.792 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 100 thrpt 20 9668.940 ± 72.017 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 1000 thrpt 20 932.645 ± 5.214 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 10 thrpt 20 6869.541 ± 133.914 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 100 thrpt 20 1739.391 ± 18.654 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 1000 thrpt 20 133.172 ± 2.303 ops/ms
Hey, mon algo était correct normalement ^^;
Quel est le problème ? Quels sont les cas qui ne passent pas ?
C'est pas moi qui ait dit qu'il était faux, c'est la machine. Sur des cas concrets :) Tu peux regarder ce que fait le main
pour trouver des exemples qui plantent. De mémoire le tien se plantait sur des séquences du genre \r\r (mais j'ai un doute, ils ont tous des pbs différents :)).
Hmmm, effectivement, un cas n'était pas géré. J'en ai profité pour essayer d'améliorer un peu l'algo. Tu peux le tester ?
public static String unfold_olivier2(String test) {
// Throws NPE if null, like the original method
if (test.length() < 2) return test;
char[] chars = test.toCharArray();
int p = chars.length - 1;
int d = p;
char c, c1, c2;
while (p > 0) {
c = chars[p];
c1 = chars[p - 1];
if (c == ' ' && (c1 == '\n' || c1 == '\r')) {
p--;
if (p > 0) {
c2 = chars[p - 1];
if ((c2 == '\n' || c2 == '\r') && c2 != c1) {
p--;
}
}
}
else {
chars[d] = c;
d--;
}
p--;
}
while (p >= 0) {
chars[d--] = chars[p--];
}
return new String(chars, d + 1, chars.length - d - 1);
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
After running the full benchmark:
The following methods are deemed to be correct:
Reflection.unfold_cedric_ultimate2
Reflection.unfold_cedric
Reflection.unfold_cedric_improved
Reflection.unfold_regexp
Reflection.unfold_cedric_ultimate
Reflection.unfold_common
The following methods are proved to be incorrect:
Reflection.unfold_regexpcompiled
Reflection.unfold
Reflection.unfold3
Reflection.unfold2
Reflection.unfold_olivier