Created
September 8, 2012 05:13
-
-
Save pH200/3672019 to your computer and use it in GitHub Desktop.
Encoding files with UTF8 BOM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
namespace UTF8BOM | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
var isForce = args.Any(arg => arg == "-f"); | |
var dir = args.FirstOrDefault(arg => !arg.StartsWith("-")) ?? "."; | |
var path = Path.GetFullPath(dir); | |
var pathUri = new Uri(path, UriKind.Absolute); | |
var replaceCRLF = args.Any(arg => arg == "--crlf"); | |
EncodeFiles(isForce, path, pathUri); | |
if (replaceCRLF) | |
{ | |
ReplaceCRLF(path, pathUri); | |
} | |
} | |
private static void EncodeFiles(bool isForce, string path, Uri pathUri) | |
{ | |
var dirEnumerable = Directory.EnumerateFiles(path, "*.js", SearchOption.AllDirectories); | |
var files = DetectNoBom(dirEnumerable).ToList(); | |
if (files.Count == 0) | |
{ | |
Console.WriteLine("All js files were encoded with BOM already."); | |
return; | |
} | |
foreach (var file in files) | |
{ | |
Console.WriteLine(file); | |
} | |
if (!isForce) | |
{ | |
Console.Write("Encode these files? [y/N]: "); | |
var yN = Console.ReadLine(); | |
if (!Regex.IsMatch(yN, "(y|yes)", RegexOptions.IgnoreCase)) | |
{ | |
return; | |
} | |
} | |
foreach (var file in files) | |
{ | |
var uri = new Uri(file, UriKind.Absolute); | |
var relative = pathUri.MakeRelativeUri(uri); | |
Console.WriteLine("Encoding " + relative); | |
var text = File.ReadAllText(file); | |
File.WriteAllText(file, text, new UTF8Encoding(true)); | |
} | |
} | |
private static void ReplaceCRLF(string path, Uri pathUri) | |
{ | |
const string pattern = @"((?<!\r)\n|\r(?!\n))"; | |
var dirEnumerable = Directory.EnumerateFiles(path, "*.js", SearchOption.AllDirectories); | |
foreach (var file in dirEnumerable) | |
{ | |
var text = File.ReadAllText(file); | |
if (Regex.IsMatch(text, pattern, RegexOptions.Multiline)) | |
{ | |
var uri = new Uri(file, UriKind.Absolute); | |
var relative = pathUri.MakeRelativeUri(uri); | |
Console.WriteLine("Normalizing Line " + relative); | |
text = Regex.Replace(text, pattern, "\r\n", RegexOptions.Multiline); // CRLF | |
File.WriteAllText(file, text, new UTF8Encoding(true)); | |
} | |
} | |
} | |
private static IEnumerable<string> DetectNoBom(IEnumerable<string> files) | |
{ | |
// http://unicode.org/faq/utf_bom.html#bom4 | |
var bytes = new byte[] { 0xef, 0xbb, 0xbf }; | |
foreach (var file in files) | |
{ | |
using (var stream = File.OpenRead(file)) | |
{ | |
var readBytes = new byte[3]; | |
stream.Read(readBytes, 0, 3); | |
if (!bytes.SequenceEqual(readBytes)) | |
{ | |
yield return file; | |
} | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment