Created
October 17, 2017 14:14
-
-
Save shana/200e4719d4f571caab9dbf5921fa5276 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
using System.Threading.Tasks; | |
namespace Implementation1 | |
{ | |
public class LineReader | |
{ | |
static readonly LineInformation Default = new LineInformation(null, 0); | |
static readonly LineInformation Empty = new LineInformation(String.Empty, 0); | |
static readonly char[] LineEndings = { '\r', '\n' }; | |
readonly string text; | |
readonly int length = 0; | |
int index = 0; | |
public LineReader(string text) | |
{ | |
this.text = text; | |
this.length = text.Length; | |
} | |
public LineInformation ReadLine() | |
{ | |
if (index == -1 || index == length) | |
{ | |
if (index == 0) | |
{ | |
index = -1; | |
return Empty; | |
} | |
return Default; | |
} | |
var previousIndex = index; | |
var carriageReturns = 0; | |
var ret = Default; | |
while (true) | |
{ | |
index = text.IndexOfAny(LineEndings, index); | |
if (index == -1) | |
{ | |
ret.Line = text.Substring(previousIndex); | |
break; | |
} | |
else if (text[index] == '\n') | |
{ | |
ret.Line = text.Substring(previousIndex, index - previousIndex); | |
index++; | |
break; | |
} | |
else // hit a \r | |
{ | |
if (index < length - 1) | |
{ | |
if (text[index + 1] == '\n') // it's a \r\n, ignore both and return | |
{ | |
ret.Line = text.Substring(previousIndex, index - previousIndex); | |
index += 2; | |
if (index == length) | |
index = -1; | |
break; | |
} | |
else // it's a single \r, count and continue | |
{ | |
carriageReturns++; | |
index++; | |
continue; | |
} | |
} | |
else // end of text | |
{ | |
carriageReturns++; | |
ret.Line = text.Substring(previousIndex); | |
index = -1; | |
break; | |
} | |
} | |
} | |
ret.CarriageReturns = carriageReturns; | |
return ret; | |
} | |
public struct LineInformation | |
{ | |
public string Line; | |
public int CarriageReturns; | |
public LineInformation(string line, int carriageReturns) | |
{ | |
this.Line = line; | |
this.CarriageReturns = carriageReturns; | |
} | |
} | |
} | |
} | |
namespace Implementation3 | |
{ | |
public class LineReader | |
{ | |
static readonly LineInformation Default = new LineInformation(null, 0); | |
static readonly LineInformation Empty = new LineInformation(String.Empty, 0); | |
static readonly char[] LineEndings = { '\r', '\n' }; | |
readonly string text; | |
readonly int length = 0; | |
int index = 0; | |
Regex eol; | |
Regex cr; | |
Match match; | |
public LineReader(string text) | |
{ | |
this.text = text; | |
this.length = text.Length; | |
eol = new Regex("^(.*)\r?$", RegexOptions.Multiline); | |
cr = new Regex("\r"); | |
} | |
public LineInformation ReadLine() | |
{ | |
if (EndOfText) | |
{ | |
if (StartOfText) | |
{ | |
index = -1; | |
return Empty; | |
} | |
return Default; | |
} | |
var previousIndex = index; | |
if (match == null) | |
match = eol.Match(text); | |
else | |
match = match.NextMatch(); | |
index = match.Index; | |
var ret = Default; | |
ret.Line = match.Value; | |
ret.CarriageReturns = cr.Matches(ret.Line).Count; | |
return ret; | |
} | |
bool StartOfText => index == 0; | |
bool EndOfText => index == -1 || index == length; | |
public struct LineInformation | |
{ | |
public string Line; | |
public int CarriageReturns; | |
public LineInformation(string line, int carriageReturns) | |
{ | |
this.Line = line; | |
this.CarriageReturns = carriageReturns; | |
} | |
} | |
} | |
} | |
namespace Implementation2 | |
{ | |
public class LineReader | |
{ | |
readonly string text; | |
int index = 0; | |
public LineReader(string text) | |
{ | |
this.text = text; | |
} | |
public string ReadLine() | |
{ | |
if (EndOfText) | |
{ | |
if (StartOfText) | |
{ | |
index = -1; | |
return string.Empty; | |
} | |
return null; | |
} | |
var startIndex = index; | |
index = text.IndexOf('\n', index); | |
var endIndex = index != -1 ? index : text.Length; | |
var length = endIndex - startIndex; | |
if (index != -1) | |
{ | |
if (index > 0 && text[index - 1] == '\r') | |
{ | |
length--; | |
} | |
index++; | |
} | |
return text.Substring(startIndex, length); | |
} | |
bool StartOfText => index == 0; | |
bool EndOfText => index == -1 || index == text.Length; | |
public static int CountCarriageReturns(string text) | |
{ | |
int count = 0; | |
int index = 0; | |
while ((index = text.IndexOf('\r', index)) != -1) | |
{ | |
index++; | |
count++; | |
} | |
return count; | |
} | |
} | |
} | |
namespace Implementation4 | |
{ | |
public class LineReader | |
{ | |
static readonly LineInformation Default = new LineInformation(null, 0); | |
static readonly LineInformation Empty = new LineInformation(String.Empty, 0); | |
static readonly char[] LineEndings = { '\r', '\n' }; | |
readonly string text; | |
readonly int length = 0; | |
int index = 0; | |
public LineReader(string text) | |
{ | |
this.text = text; | |
this.length = text.Length; | |
} | |
public LineInformation ReadLine() | |
{ | |
if (index == -1 || index == length) | |
{ | |
if (index == 0) | |
{ | |
index = -1; | |
return Empty; | |
} | |
return Default; | |
} | |
var previousIndex = index; | |
var carriageReturns = 0; | |
var ret = Default; | |
index = text.IndexOf('\n', index); | |
if (index < 0) | |
{ | |
ret.Line = text.Substring(previousIndex); | |
index = length - 1; | |
} | |
else if (index == previousIndex) | |
{ | |
ret = Empty; | |
} | |
else if (text[index - 1] == '\r') | |
{ | |
ret.Line = text.Substring(previousIndex, index - previousIndex - 1); | |
} | |
else | |
{ | |
ret.Line = text.Substring(previousIndex, index - previousIndex); | |
} | |
index++; | |
for (var cr = ret.Line.IndexOf('\r'); cr != -1; ret.CarriageReturns++, cr++, cr = ret.Line.IndexOf('\r', cr)) | |
{ | |
} | |
ret.CarriageReturns = carriageReturns; | |
return ret; | |
} | |
public struct LineInformation | |
{ | |
public string Line; | |
public int CarriageReturns; | |
public LineInformation(string line, int carriageReturns) | |
{ | |
this.Line = line; | |
this.CarriageReturns = carriageReturns; | |
} | |
} | |
} | |
} | |
namespace ConsoleApp | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
//GenerateFiles(args[2]); | |
ParseFiles(args[0], args[1]); | |
} | |
struct FileInfo | |
{ | |
public string File; | |
public string Content; | |
public int HowMany; | |
public int[] Positions; | |
public int HowManyFound; | |
} | |
static void ParseFiles(string dir, string implementation) | |
{ | |
var logfile = Path.Combine(dir, "log"); | |
List<FileInfo> files = new List<FileInfo>(); | |
foreach (var line in File.ReadLines(logfile)) | |
{ | |
var parts = line.Split('\t'); | |
files.Add(new FileInfo | |
{ | |
File = parts[0], | |
Content = File.ReadAllText(Path.Combine(dir, parts[0])), | |
HowMany = int.Parse(parts[1]), | |
Positions = parts[2].Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries).Select(x => int.Parse(x)).ToArray(), | |
HowManyFound = 0 | |
}); | |
} | |
Stopwatch watch; | |
watch = Stopwatch.StartNew(); | |
if (implementation == "1") | |
{ | |
RunImplementation1(files); | |
} | |
else if (implementation == "2") | |
{ | |
RunImplementation2(files); | |
} | |
else if (implementation == "3") | |
{ | |
RunImplementation3(files); | |
} | |
else if (implementation == "4") | |
{ | |
RunImplementation4(files); | |
} | |
watch.Stop(); | |
Console.WriteLine(watch.ElapsedMilliseconds); | |
} | |
static void RunImplementation1(List<FileInfo> files) | |
{ | |
for (int i = 0; i < files.Count; i++) | |
{ | |
var file = files[i]; | |
var reader = new Implementation1.LineReader(file.Content); | |
Implementation1.LineReader.LineInformation line; | |
while ((line = reader.ReadLine()).Line != null) | |
{ | |
file.HowManyFound += line.CarriageReturns; | |
files[i] = file; | |
} | |
} | |
} | |
static void RunImplementation2(List<FileInfo> files) | |
{ | |
for (int i = 0; i < files.Count; i++) | |
{ | |
var file = files[i]; | |
var reader = new Implementation2.LineReader(file.Content); | |
string line; | |
while ((line = reader.ReadLine()) != null) | |
{ | |
file.HowManyFound += Implementation2.LineReader.CountCarriageReturns(line); | |
files[i] = file; | |
} | |
} | |
} | |
static void RunImplementation3(List<FileInfo> files) | |
{ | |
for (int i = 0; i < files.Count; i++) | |
{ | |
var file = files[i]; | |
var reader = new Implementation3.LineReader(file.Content); | |
Implementation3.LineReader.LineInformation line; | |
while ((line = reader.ReadLine()).Line != null) | |
{ | |
file.HowManyFound += line.CarriageReturns; | |
files[i] = file; | |
} | |
} | |
} | |
static void RunImplementation4(List<FileInfo> files) | |
{ | |
for (int i = 0; i < files.Count; i++) | |
{ | |
var file = files[i]; | |
var reader = new Implementation4.LineReader(file.Content); | |
Implementation4.LineReader.LineInformation line; | |
while ((line = reader.ReadLine()).Line != null) | |
{ | |
file.HowManyFound += line.CarriageReturns; | |
files[i] = file; | |
} | |
} | |
} | |
static void GenerateFiles(string dir) | |
{ | |
Random rnd = new Random(); | |
var outdir = Path.Combine(dir, "out"); | |
if (!Directory.Exists(outdir)) | |
Directory.CreateDirectory(outdir); | |
var logfile = Path.Combine(outdir, "log"); | |
if (File.Exists(logfile)) | |
File.Delete(logfile); | |
using (var logsw = new StreamWriter(File.OpenWrite(logfile))) | |
{ | |
foreach (var file in Directory.EnumerateFiles(dir)) | |
{ | |
var outfile = Path.Combine(outdir, Path.GetFileName(file)); | |
File.Copy(file, outfile, true); | |
var howmany = rnd.Next(0, 10); | |
logsw.Write(Path.GetFileName(outfile)); | |
logsw.Write("\t"); | |
logsw.Write(howmany); | |
logsw.Write("\t"); | |
var len = file.Length; | |
using (var sw = File.OpenWrite(outfile)) | |
{ | |
for (var i = 0; i < howmany; i++) | |
{ | |
var pos = rnd.Next(len); | |
sw.Seek(pos, SeekOrigin.Begin); | |
sw.WriteByte(13); | |
if (rnd.Next(1) == 1) | |
{ | |
sw.Seek(1, SeekOrigin.Current); | |
sw.WriteByte(10); | |
} | |
else | |
{ | |
logsw.Write(pos); | |
if (i < howmany - 1) | |
logsw.Write(","); | |
} | |
} | |
} | |
logsw.WriteLine(); | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment