Skip to content

Instantly share code, notes, and snippets.

@RyuaNerin
Last active February 17, 2016 23:36
Show Gist options
  • Save RyuaNerin/2522466e3621340b5bba to your computer and use it in GitHub Desktop.
Save RyuaNerin/2522466e3621340b5bba to your computer and use it in GitHub Desktop.
파일이나 배열의 문자 인코딩을 찾아주는 클래스 http://blog.ryuanerin.kr/31
// RyuaNerin
// 2013-03-08
// 2016-02-18
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace RyuaNerin
{
public static class EncodingFinder
{
private struct PreambleInfo
{
public Encoding Encoding;
public byte[] Preamble;
}
private static PreambleInfo[] m_preambles = null;
private static int MaxPreambleLength { get; private set; }
static EncodingFinder()
{
var lst = new List<PreambleInfo>();
byte[] preamble;
foreach (EncodingInfo oEncodingInfo in Encoding.GetEncodings())
{
preamble = oEncodingInfo.GetEncoding().GetPreamble();
if (preamble.Length > 0)
{
lst.Add(new PreambleInfo { Encoding = oEncodingInfo.GetEncoding(), Preamble = preamble });
if (preamble.Length > MaxPreambleLength) MaxPreambleLength = preamble.Length;
}
}
lst.Sort((x, y) => x.Preamble.Length.CompareTo(y.Preamble.Length));
m_preambles = lst.ToArray();
}
public static Encoding DetectEncoding(string FileName)
{
using (var file = File.OpenRead(FileName))
{
int n = (int)file.Length;
if (n == 0) return null;
if (n > MaxPreambleLength) n = MaxPreambleLength;
var bytes = new byte[n];
file.Read(bytes, 0, n);
return DetectEncoding(bytes);
}
}
public static Encoding DetectEncoding(byte[] bytes)
{
int i;
bool match;
foreach (var preamble in m_preambles)
{
if (bytes.Length >= preamble.Preamble.Length)
{
match = true;
for (i = 0; i < preamble.Preamble.Length; i++)
{
if (bytes[i] != preamble.Preamble[i])
{
match = false;
break;
}
}
if (match) return preamble.Encoding;
}
}
return null;
}
public static string ReadAllText(string fileName)
{
Encoding usedEncoding;
return ReadAllText(fileName, Encoding.Default, out usedEncoding);
}
public static string ReadAllText(string fileName, Encoding defaultEncoding)
{
Encoding usedEncoding;
return ReadAllText(fileName, defaultEncoding, out usedEncoding);
}
public static string ReadAllText(string fileName, Encoding defaultEncoding, out Encoding usedEncoding)
{
byte[] bytes = File.ReadAllBytes(fileName);
usedEncoding = DetectEncoding(bytes);
int offset = 0;
if (usedEncoding == null)
{
offset = 0;
usedEncoding = defaultEncoding;
}
else
{
offset = usedEncoding.GetPreamble().Length;
}
return usedEncoding.GetString(bytes, offset, bytes.Length - offset);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment