|
using System; |
|
using System.Collections.Generic; |
|
using System.Linq; |
|
using System.Text.RegularExpressions; |
|
using BenchmarkDotNet.Attributes; |
|
using BenchmarkDotNet.Running; |
|
|
|
namespace BenchITSplit |
|
{ |
|
class Program |
|
{ |
|
static void Main(string[] args) |
|
{ |
|
assertEqual_to_make_sure_both_algo_produce_same_result(); |
|
|
|
var summary = BenchmarkRunner.Run<StringSplitter>(); |
|
|
|
} |
|
|
|
static void assertEqual_to_make_sure_both_algo_produce_same_result() |
|
{ |
|
var splitter = new StringSplitter(); |
|
|
|
var regexed = StringSplitter.stringsToSplit.Select(s => splitter.StringRegex()).ToArray(); |
|
|
|
splitter.index = 0; |
|
var scaned = StringSplitter.stringsToSplit.Select(s => splitter.StringScan()).ToArray(); |
|
|
|
if (regexed.Length != scaned.Length) throw new Exception("non equal length"); |
|
|
|
for (int i = 0; i < regexed.Length; i++) |
|
{ |
|
var rx = regexed[i]; |
|
var sc = scaned[i]; |
|
if (rx.Length != sc.Length) throw new Exception("non equal length"); |
|
|
|
for (int j = 0; j < rx.Length; j++) |
|
{ |
|
var rx_j = rx[j]; |
|
var cs_j = sc[j]; |
|
|
|
if (rx_j != cs_j) |
|
{ |
|
throw new Exception("non equal"); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
[Config("jobs=AllJits")] |
|
public class StringSplitter |
|
{ |
|
public static List<string> stringsToSplit = null; |
|
|
|
static StringSplitter() |
|
{ |
|
// just generate some strings. Same random seed ensure that data will be same across run |
|
var random = new Random(1000); stringsToSplit = new List<string>(); |
|
for (int i = 0; i < max; i++) |
|
{ |
|
stringsToSplit.Add(string.Join("", GenerateRandom(random).Take(random.Next(70, 100)))); |
|
} |
|
} |
|
|
|
public int index = 0; |
|
private static int max = 1000; |
|
private Regex rx = new Regex("(?<!\r)\n", RegexOptions.Compiled); |
|
|
|
private static IEnumerable<string> GenerateRandom(Random rnd) |
|
{ |
|
var generator = new Utilities.Random.NameGenerators.MaleNameGenerator(true, true, true, true); |
|
//var gen = new Utilities.Random.StringGenerators.PatternGenerator("@@##\r\n"); |
|
var endline = new[] { "\n", "\r\n" }; |
|
while (true) |
|
{ |
|
var dice = rnd.Next(0, 100); |
|
string end = ""; |
|
if (dice > 30 && dice <= 80) end = endline[0]; |
|
if (dice > 80) end = endline[1]; |
|
var name = generator.Next(rnd); |
|
if (rnd.Next(100) > 90) name = ""; |
|
|
|
yield return name + end; |
|
} |
|
} |
|
|
|
[Benchmark] |
|
public string[] StringScan() |
|
{ |
|
var value = stringsToSplit[index]; |
|
if (++index == max) index = 0; |
|
return SplitByN(value).ToArray(); |
|
} |
|
|
|
[Benchmark] |
|
public string[] StringRegex() |
|
{ |
|
var value = stringsToSplit[index]; |
|
if (++index == max) index = 0; |
|
return rx.Split(value); |
|
} |
|
|
|
private IEnumerable<string> SplitByN(string value) |
|
{ |
|
var subIndex = 0; |
|
var i = value.IndexOf('\n'); |
|
|
|
while (i >= 0 && i < value.Length) |
|
{ |
|
if (value[i] == '\n') |
|
{ |
|
if (i <= 0 || value[i - 1] != '\r') |
|
{ |
|
var res = value.Substring(subIndex, i - subIndex); |
|
yield return res; |
|
|
|
subIndex = i + 1; |
|
} |
|
} |
|
|
|
i = value.IndexOf('\n', i + 1); |
|
} |
|
|
|
yield return value.Substring(subIndex); |
|
} |
|
} |
|
} |