Skip to content

Instantly share code, notes, and snippets.

@shana
Created October 17, 2017 14:14
Show Gist options
  • Save shana/200e4719d4f571caab9dbf5921fa5276 to your computer and use it in GitHub Desktop.
Save shana/200e4719d4f571caab9dbf5921fa5276 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace Implementation1
{
public class LineReader
{
static readonly LineInformation Default = new LineInformation(null, 0);
static readonly LineInformation Empty = new LineInformation(String.Empty, 0);
static readonly char[] LineEndings = { '\r', '\n' };
readonly string text;
readonly int length = 0;
int index = 0;
public LineReader(string text)
{
this.text = text;
this.length = text.Length;
}
public LineInformation ReadLine()
{
if (index == -1 || index == length)
{
if (index == 0)
{
index = -1;
return Empty;
}
return Default;
}
var previousIndex = index;
var carriageReturns = 0;
var ret = Default;
while (true)
{
index = text.IndexOfAny(LineEndings, index);
if (index == -1)
{
ret.Line = text.Substring(previousIndex);
break;
}
else if (text[index] == '\n')
{
ret.Line = text.Substring(previousIndex, index - previousIndex);
index++;
break;
}
else // hit a \r
{
if (index < length - 1)
{
if (text[index + 1] == '\n') // it's a \r\n, ignore both and return
{
ret.Line = text.Substring(previousIndex, index - previousIndex);
index += 2;
if (index == length)
index = -1;
break;
}
else // it's a single \r, count and continue
{
carriageReturns++;
index++;
continue;
}
}
else // end of text
{
carriageReturns++;
ret.Line = text.Substring(previousIndex);
index = -1;
break;
}
}
}
ret.CarriageReturns = carriageReturns;
return ret;
}
public struct LineInformation
{
public string Line;
public int CarriageReturns;
public LineInformation(string line, int carriageReturns)
{
this.Line = line;
this.CarriageReturns = carriageReturns;
}
}
}
}
namespace Implementation3
{
public class LineReader
{
static readonly LineInformation Default = new LineInformation(null, 0);
static readonly LineInformation Empty = new LineInformation(String.Empty, 0);
static readonly char[] LineEndings = { '\r', '\n' };
readonly string text;
readonly int length = 0;
int index = 0;
Regex eol;
Regex cr;
Match match;
public LineReader(string text)
{
this.text = text;
this.length = text.Length;
eol = new Regex("^(.*)\r?$", RegexOptions.Multiline);
cr = new Regex("\r");
}
public LineInformation ReadLine()
{
if (EndOfText)
{
if (StartOfText)
{
index = -1;
return Empty;
}
return Default;
}
var previousIndex = index;
if (match == null)
match = eol.Match(text);
else
match = match.NextMatch();
index = match.Index;
var ret = Default;
ret.Line = match.Value;
ret.CarriageReturns = cr.Matches(ret.Line).Count;
return ret;
}
bool StartOfText => index == 0;
bool EndOfText => index == -1 || index == length;
public struct LineInformation
{
public string Line;
public int CarriageReturns;
public LineInformation(string line, int carriageReturns)
{
this.Line = line;
this.CarriageReturns = carriageReturns;
}
}
}
}
namespace Implementation2
{
public class LineReader
{
readonly string text;
int index = 0;
public LineReader(string text)
{
this.text = text;
}
public string ReadLine()
{
if (EndOfText)
{
if (StartOfText)
{
index = -1;
return string.Empty;
}
return null;
}
var startIndex = index;
index = text.IndexOf('\n', index);
var endIndex = index != -1 ? index : text.Length;
var length = endIndex - startIndex;
if (index != -1)
{
if (index > 0 && text[index - 1] == '\r')
{
length--;
}
index++;
}
return text.Substring(startIndex, length);
}
bool StartOfText => index == 0;
bool EndOfText => index == -1 || index == text.Length;
public static int CountCarriageReturns(string text)
{
int count = 0;
int index = 0;
while ((index = text.IndexOf('\r', index)) != -1)
{
index++;
count++;
}
return count;
}
}
}
namespace Implementation4
{
public class LineReader
{
static readonly LineInformation Default = new LineInformation(null, 0);
static readonly LineInformation Empty = new LineInformation(String.Empty, 0);
static readonly char[] LineEndings = { '\r', '\n' };
readonly string text;
readonly int length = 0;
int index = 0;
public LineReader(string text)
{
this.text = text;
this.length = text.Length;
}
public LineInformation ReadLine()
{
if (index == -1 || index == length)
{
if (index == 0)
{
index = -1;
return Empty;
}
return Default;
}
var previousIndex = index;
var carriageReturns = 0;
var ret = Default;
index = text.IndexOf('\n', index);
if (index < 0)
{
ret.Line = text.Substring(previousIndex);
index = length - 1;
}
else if (index == previousIndex)
{
ret = Empty;
}
else if (text[index - 1] == '\r')
{
ret.Line = text.Substring(previousIndex, index - previousIndex - 1);
}
else
{
ret.Line = text.Substring(previousIndex, index - previousIndex);
}
index++;
for (var cr = ret.Line.IndexOf('\r'); cr != -1; ret.CarriageReturns++, cr++, cr = ret.Line.IndexOf('\r', cr))
{
}
ret.CarriageReturns = carriageReturns;
return ret;
}
public struct LineInformation
{
public string Line;
public int CarriageReturns;
public LineInformation(string line, int carriageReturns)
{
this.Line = line;
this.CarriageReturns = carriageReturns;
}
}
}
}
namespace ConsoleApp
{
class Program
{
static void Main(string[] args)
{
//GenerateFiles(args[2]);
ParseFiles(args[0], args[1]);
}
struct FileInfo
{
public string File;
public string Content;
public int HowMany;
public int[] Positions;
public int HowManyFound;
}
static void ParseFiles(string dir, string implementation)
{
var logfile = Path.Combine(dir, "log");
List<FileInfo> files = new List<FileInfo>();
foreach (var line in File.ReadLines(logfile))
{
var parts = line.Split('\t');
files.Add(new FileInfo
{
File = parts[0],
Content = File.ReadAllText(Path.Combine(dir, parts[0])),
HowMany = int.Parse(parts[1]),
Positions = parts[2].Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries).Select(x => int.Parse(x)).ToArray(),
HowManyFound = 0
});
}
Stopwatch watch;
watch = Stopwatch.StartNew();
if (implementation == "1")
{
RunImplementation1(files);
}
else if (implementation == "2")
{
RunImplementation2(files);
}
else if (implementation == "3")
{
RunImplementation3(files);
}
else if (implementation == "4")
{
RunImplementation4(files);
}
watch.Stop();
Console.WriteLine(watch.ElapsedMilliseconds);
}
static void RunImplementation1(List<FileInfo> files)
{
for (int i = 0; i < files.Count; i++)
{
var file = files[i];
var reader = new Implementation1.LineReader(file.Content);
Implementation1.LineReader.LineInformation line;
while ((line = reader.ReadLine()).Line != null)
{
file.HowManyFound += line.CarriageReturns;
files[i] = file;
}
}
}
static void RunImplementation2(List<FileInfo> files)
{
for (int i = 0; i < files.Count; i++)
{
var file = files[i];
var reader = new Implementation2.LineReader(file.Content);
string line;
while ((line = reader.ReadLine()) != null)
{
file.HowManyFound += Implementation2.LineReader.CountCarriageReturns(line);
files[i] = file;
}
}
}
static void RunImplementation3(List<FileInfo> files)
{
for (int i = 0; i < files.Count; i++)
{
var file = files[i];
var reader = new Implementation3.LineReader(file.Content);
Implementation3.LineReader.LineInformation line;
while ((line = reader.ReadLine()).Line != null)
{
file.HowManyFound += line.CarriageReturns;
files[i] = file;
}
}
}
static void RunImplementation4(List<FileInfo> files)
{
for (int i = 0; i < files.Count; i++)
{
var file = files[i];
var reader = new Implementation4.LineReader(file.Content);
Implementation4.LineReader.LineInformation line;
while ((line = reader.ReadLine()).Line != null)
{
file.HowManyFound += line.CarriageReturns;
files[i] = file;
}
}
}
static void GenerateFiles(string dir)
{
Random rnd = new Random();
var outdir = Path.Combine(dir, "out");
if (!Directory.Exists(outdir))
Directory.CreateDirectory(outdir);
var logfile = Path.Combine(outdir, "log");
if (File.Exists(logfile))
File.Delete(logfile);
using (var logsw = new StreamWriter(File.OpenWrite(logfile)))
{
foreach (var file in Directory.EnumerateFiles(dir))
{
var outfile = Path.Combine(outdir, Path.GetFileName(file));
File.Copy(file, outfile, true);
var howmany = rnd.Next(0, 10);
logsw.Write(Path.GetFileName(outfile));
logsw.Write("\t");
logsw.Write(howmany);
logsw.Write("\t");
var len = file.Length;
using (var sw = File.OpenWrite(outfile))
{
for (var i = 0; i < howmany; i++)
{
var pos = rnd.Next(len);
sw.Seek(pos, SeekOrigin.Begin);
sw.WriteByte(13);
if (rnd.Next(1) == 1)
{
sw.Seek(1, SeekOrigin.Current);
sw.WriteByte(10);
}
else
{
logsw.Write(pos);
if (i < howmany - 1)
logsw.Write(",");
}
}
}
logsw.WriteLine();
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment