Created
October 15, 2013 12:41
-
-
Save alfeg/6990995 to your computer and use it in GitHub Desktop.
This tool were used to convert files from tabs to spaces. This file can be compiled with mono
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
using System.Threading; | |
using System.Threading.Tasks; | |
namespace expand | |
{ | |
public class Program | |
{ | |
private static bool identOnly = true; | |
private static bool doCRLFfix; | |
private static bool quiet; | |
private static bool isVerbose; | |
public static List<string> toInvoke = new List<string>(); | |
private static int idxLastArg; | |
private static int _threadsRunning; | |
private static int readCounter; | |
private static int writeCounter; | |
private static int totalFiles; | |
private static long totalReadBytes; | |
private static long totalWriteBytes; | |
private static readonly UTF8Encoding utfWithBom = new UTF8Encoding(true); | |
private static readonly UTF8Encoding utfWithNoBom = new UTF8Encoding(false); | |
private static readonly ThreadLocal<char[]> inputBuffer = new ThreadLocal<char[]>(() => new char[0x1000]); | |
private static readonly ThreadLocal<char[]> outputBuffer = new ThreadLocal<char[]>(() => new char[0x10000]); | |
private static readonly ThreadLocal<byte[]> preambulaBuffer = new ThreadLocal<byte[]>(() => new byte[3]); | |
private static readonly byte[] utf8 = new UTF8Encoding(true).GetPreamble(); | |
private static bool Has(List<string> args, params string[] values) | |
{ | |
List<string> matches = args.Where(arg => values.Contains(arg.ToLower())).ToList(); | |
foreach (string match in matches) | |
{ | |
int idx = args.IndexOf(match) + 1; | |
if (idx > idxLastArg) idxLastArg = idx; | |
} | |
return matches.Count > 0; | |
} | |
private static void Log(string message, bool verbose = false) | |
{ | |
if (quiet || (verbose && !isVerbose)) return; | |
Console.WriteLine(message); | |
} | |
public static int Main(string[] args) | |
{ | |
var sw = new Stopwatch(); | |
sw.Start(); | |
List<string> arguments = args.Take(4).ToList(); | |
if (arguments.Any()) | |
{ | |
if (Has(arguments, "q", "-q")) | |
{ | |
quiet = true; | |
} | |
if (Has(arguments, "-v")) | |
{ | |
Console.WriteLine("Verbose mode"); | |
isVerbose = true; | |
} | |
if (Has(arguments, "crlf")) | |
{ | |
Log("Will do CRLF => LF conversion"); | |
doCRLFfix = true; | |
} | |
if (Has(arguments, "alltabs")) | |
{ | |
Log("Will process all tabs, not only ident"); | |
identOnly = false; | |
} | |
if (Has(arguments, "help", "-h", "-?", "--help")) | |
{ | |
Log("User 'q' for quiet"); | |
Log("Use 'clrf' argument for CRLF => LF conversion"); | |
Log("Use 'alltabs' argument for all tabs conversion, not only ident one"); | |
return 0; | |
} | |
if (Has(arguments, "demo")) | |
{ | |
arguments = args.Skip(idxLastArg).ToList(); | |
string changeIn = arguments.First(); | |
string[] ext = changeIn.Split(' '); | |
toInvoke = | |
Directory.EnumerateFiles(Directory.GetCurrentDirectory(), "*.*", SearchOption.AllDirectories) | |
.Where( | |
file => | |
!file.Contains(".git") && !file.Contains(".gitrewrite") && ext.Any(file.EndsWith)) | |
.ToList(); | |
ProcessFiles(); | |
return 0; | |
} | |
} | |
arguments = args.Skip(idxLastArg).ToList(); | |
Log("Args parsed in " + sw.ElapsedMilliseconds); | |
if (isVerbose) | |
{ | |
Log(string.Join(" ", arguments), verbose: true); | |
} | |
if (arguments.Any()) | |
{ | |
Log("Processing files from arguments"); | |
toInvoke = arguments.ToList(); | |
ProcessFiles(); | |
return 0; | |
} | |
Log("Waiting for console input"); | |
while (Console.In.Peek() != -1) | |
{ | |
string input = Console.In.ReadLine(); | |
toInvoke.Add(input); | |
} | |
ProcessFiles(); | |
return 0; | |
} | |
public static void ProcessFiles() | |
{ | |
Log("Reading/Writing files. Total: " + toInvoke.Count); | |
totalFiles = toInvoke.Count; | |
if (toInvoke.Count == 0) return; | |
const int numOfSlices = 12; | |
List<int> idSlicesList = Enumerable.Range(0, toInvoke.Count).ToList(); | |
IEnumerable<IEnumerable<int>> idSlices = idSlicesList.Split(numOfSlices); | |
Log("Starting threads"); | |
Parallel.ForEach(idSlices, ids => | |
{ | |
foreach (int id in ids) | |
{ | |
ProcessFile(id); | |
} | |
}); | |
Log(string.Format("Total files: {0}, Read/WriteBytes: {1}/{2}", toInvoke.Count, totalReadBytes, totalWriteBytes)); | |
} | |
private static void UpdateReadCounter() | |
{ | |
Interlocked.Increment(ref readCounter); | |
UpdateText(); | |
} | |
private static void UpdateText() | |
{ | |
if ((readCounter % 100 == 0 || writeCounter % 100 == 0 || readCounter == 0 || writeCounter == 0) && !quiet) | |
{ | |
Console.Write("\rRead: {0} of {1}, Write: {2} of {3}, using {4} threads", readCounter, totalFiles, | |
writeCounter, totalFiles, _threadsRunning); | |
} | |
} | |
private static void UpdateWriteCounter() | |
{ | |
Interlocked.Increment(ref writeCounter); | |
UpdateText(); | |
} | |
private static bool HasBom(FileStream stream) | |
{ | |
byte[] bits = preambulaBuffer.Value; | |
int read = stream.Read(bits, 0, 3); | |
stream.Seek(-read, SeekOrigin.Current); | |
// UTF8 byte order mark is: 0xEF,0xBB,0xBF | |
if (bits[0] == utf8[0] && bits[1] == utf8[1] && bits[2] == utf8[2]) | |
{ | |
return true; | |
} | |
return false; | |
} | |
private static void ProcessFile(int id) | |
{ | |
if (id >= toInvoke.Count || id < 0) | |
{ | |
Log("File index out of range " + id, true); | |
} | |
string filePath = Path.Combine(Directory.GetCurrentDirectory(), toInvoke[id]); | |
if (!File.Exists(filePath)) | |
{ | |
Log("File not found: " + toInvoke[id]); | |
return; | |
} | |
string extenstion = (Path.GetExtension(filePath) ?? "").ToLower(); | |
int tabStops; | |
switch (extenstion) | |
{ | |
case ".xml": | |
case ".vm": | |
tabStops = 2; | |
break; | |
default: | |
tabStops = 4; | |
break; | |
} | |
var outputMemoryBuffer = new MemoryStream(); | |
var outputBufferWriter = new StreamWriter(outputMemoryBuffer); | |
bool isDirty; | |
Encoding enc; | |
bool hasBom; | |
using (var inputFile = new FileStream(filePath, FileMode.Open, FileAccess.ReadWrite)) | |
{ | |
hasBom = HasBom(inputFile); | |
using (var inputReader = new StreamReader(inputFile, true)) | |
{ | |
isDirty = ExpandAndFix(inputReader, outputBufferWriter, tabStops); | |
enc = inputReader.CurrentEncoding; | |
UpdateReadCounter(); | |
} | |
inputFile.Close(); | |
} | |
if (!isDirty) | |
{ | |
return; | |
} | |
using (var outFile = new FileStream(filePath, FileMode.Truncate)) | |
{ | |
char[] buffer = inputBuffer.Value; | |
bool includeBom = enc.EncodingName == Encoding.UTF8.EncodingName && hasBom; | |
var outputWriter = new StreamWriter(outFile, includeBom ? utfWithBom : utfWithNoBom); | |
outputMemoryBuffer.Flush(); | |
outputMemoryBuffer.Seek(0, SeekOrigin.Begin); | |
if (outputMemoryBuffer.Length == 0) | |
{ | |
Debugger.Break(); | |
} | |
using (var outputBufferReader = new StreamReader(outputMemoryBuffer)) | |
{ | |
int read; | |
while ((read = outputBufferReader.Read(buffer, 0, buffer.Length)) > 0) | |
{ | |
outputWriter.Write(buffer, 0, read); | |
Interlocked.Add(ref totalWriteBytes, read); | |
} | |
outputWriter.Flush(); | |
outputWriter.Dispose(); | |
} | |
UpdateWriteCounter(); | |
outFile.Close(); | |
} | |
outputBufferWriter.Dispose(); | |
} | |
private static bool ExpandAndFix(StreamReader fileContent, StreamWriter resultFileContent, int tabLength) | |
{ | |
bool isIdent = true; | |
bool isDirty = false; | |
int currentPositionInLine = 0; | |
char[] inBuffer = inputBuffer.Value; | |
char[] outBuffer = outputBuffer.Value; | |
int read; | |
int outIdx = 0; | |
while ((read = fileContent.Read(inBuffer, 0, inBuffer.Length)) > 0) | |
{ | |
Interlocked.Add(ref totalReadBytes, read); | |
for (int i = 0; i < read; i++) | |
{ | |
char t = inBuffer[i]; | |
switch (t) | |
{ | |
case '\t': | |
{ | |
if (identOnly && !isIdent) // do tabs expansion ONLY if identOnly option is ON | |
{ | |
outBuffer[outIdx++] = t; | |
} | |
else | |
{ | |
int n = tabLength - (currentPositionInLine % tabLength); | |
for (int j = 0; j < n; j++) | |
outBuffer[outIdx++] = ' '; | |
currentPositionInLine += n; | |
isDirty = true; | |
} | |
} | |
break; | |
case '\r': // encounter begin of windows eol | |
if (!doCRLFfix) | |
{ | |
outBuffer[outIdx++] = t; | |
} | |
else | |
{ | |
isDirty = true; | |
} | |
break; | |
case '\n': | |
outBuffer[outIdx++] = t; | |
currentPositionInLine = 0; | |
isIdent = true; | |
break; | |
default: | |
if (t != ' ') | |
isIdent = false; | |
outBuffer[outIdx++] = t; | |
currentPositionInLine++; | |
break; | |
} | |
} | |
resultFileContent.Write(outBuffer, 0, outIdx); | |
outIdx = 0; | |
} | |
resultFileContent.Flush(); | |
return isDirty; | |
} | |
} | |
internal static class LinqExtensions | |
{ | |
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> list, int parts) | |
{ | |
int i = 0; | |
IEnumerable<IEnumerable<T>> splits = from item in list | |
group item by i++ % parts | |
into part | |
select part.AsEnumerable(); | |
return splits; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment