Last active
May 22, 2016 20:07
-
-
Save yume-chan/adfce2921b364e6e6bbb310d4ee4fd4a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class NlsEncoding : Encoding | |
{ | |
public struct Range | |
{ | |
public readonly byte Start; | |
public readonly byte End; | |
public Range(byte start, byte end) | |
{ | |
Start = start; | |
End = end; | |
} | |
} | |
public class NlsDecoder : Decoder | |
{ | |
public NlsEncoding Encoding { get; } | |
public NlsDecoder(NlsEncoding encoding) | |
{ | |
Encoding = encoding; | |
} | |
// Save state. | |
private bool hasUnknownByte; | |
private byte leadByte; | |
private byte unknownByte; | |
public override int GetCharCount(byte[] bytes, int index, int count) | |
{ | |
var oldHasUnknwonByte = hasUnknownByte; | |
var oldLeadByte = leadByte; | |
var oldUnknownByte = unknownByte; | |
var result = GetChars(bytes, index, count, new char[count], 0); | |
hasUnknownByte = oldHasUnknwonByte; | |
leadByte = oldLeadByte; | |
unknownByte = oldUnknownByte; | |
return result; | |
} | |
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) | |
{ | |
if (Encoding.MaxCharSize == 1) | |
{ | |
for (var i = byteIndex; i < byteIndex + byteCount; i++) | |
{ | |
var b1 = bytes[i]; | |
chars[charIndex++] = Encoding.ByteToUnicodeMap[0][b1]; | |
} | |
return byteCount; | |
} | |
else // if (MaxCharSize == 2) | |
{ | |
var start = charIndex; | |
if (hasUnknownByte) | |
{ | |
var b1 = unknownByte; | |
var b2 = bytes[byteIndex]; | |
chars[charIndex++] = Encoding.ByteToUnicodeMap[leadByte][(b1 - leadByte) << 8 | b2]; | |
byteIndex++; | |
byteCount--; | |
hasUnknownByte = false; | |
} | |
for (var i = byteIndex; i < byteIndex + byteCount; i++) | |
{ | |
var b1 = bytes[i]; | |
var found = false; | |
foreach (var item in Encoding.LeadBytes) | |
{ | |
if (item.Start <= b1 && b1 <= item.End) | |
{ | |
i++; | |
if (i < byteIndex + byteCount) | |
{ | |
var b2 = bytes[i]; | |
chars[charIndex++] = Encoding.ByteToUnicodeMap[item.Start][(b1 - item.Start) << 8 | b2]; | |
} | |
else | |
{ | |
if (Encoding.DecoderFallback == DecoderFallback.ExceptionFallback) | |
throw new DecoderFallbackException(); | |
else | |
{ | |
hasUnknownByte = true; | |
leadByte = item.Start; | |
unknownByte = b1; | |
} | |
} | |
found = true; | |
} | |
} | |
if (!found) | |
{ | |
chars[charIndex++] = Encoding.ByteToUnicodeMap[0][b1]; | |
} | |
} | |
return charIndex - start; | |
} | |
} | |
} | |
public override Decoder GetDecoder() => new NlsDecoder(this); | |
public override int CodePage { get; } | |
public readonly ushort MaxCharSize; | |
public readonly char UnicodeFallbackChar; | |
public readonly char ByteFallbackChar; | |
public readonly char UnicodeFallbackChar2; | |
public readonly char ByteFallbackChar2; | |
public readonly Range[] LeadBytes; | |
public readonly Dictionary<byte, char[]> ByteToUnicodeMap; | |
public NlsEncoding(string path) | |
: this(File.OpenRead(path)) | |
{ } | |
public NlsEncoding(Stream stream, bool leaveOpen = false) | |
{ | |
using (var reader = new BinaryReader(stream, Default, leaveOpen)) | |
{ | |
// Magic number | |
if (reader.ReadInt16() != 0x000D) | |
throw new FormatException(); | |
CodePage = reader.ReadUInt16(); | |
MaxCharSize = reader.ReadUInt16(); | |
if (MaxCharSize > 2) | |
throw new NotSupportedException(); | |
UnicodeFallbackChar = (char)reader.ReadUInt16(); | |
ByteFallbackChar = (char)reader.ReadUInt16(); | |
UnicodeFallbackChar2 = (char)reader.ReadUInt16(); | |
ByteFallbackChar2 = (char)reader.ReadUInt16(); | |
var leadBytes = new List<Range>(); | |
for (var i = 0; i < 6; i++) | |
{ | |
var start = reader.ReadByte(); | |
var end = reader.ReadByte(); | |
if (start == 0 && end == 0) | |
break; | |
leadBytes.Add(new Range(start, end)); | |
} | |
LeadBytes = leadBytes.ToArray(); | |
if (MaxCharSize == 2 && LeadBytes.Length == 0) | |
{ | |
LeadBytes = new Range[] { new Range(0, 0xFF) }; | |
} | |
stream.Position = 26; // End of Lead Byte | |
ByteToUnicodeMap = new Dictionary<byte, char[]>(LeadBytes.Length + 1); | |
stream.Position += 2; // 0x8003, UNKNOWN | |
// ASCII Zone | |
var map = new char[256]; | |
for (var i = 0; i < map.Length; i++) | |
map[i] = (char)reader.ReadUInt16(); | |
ByteToUnicodeMap[0] = map; | |
stream.Position += 4; // 0x00010000 | |
stream.Position += 256; // All 0x00 | |
stream.Position += 256; // UTF16-BE | |
foreach (var leadByte in LeadBytes) | |
{ | |
map = new char[(leadByte.End - leadByte.Start + 1) * 256]; | |
for (var j = 0; j < map.Length; j++) | |
map[j] = (char)reader.ReadUInt16(); | |
ByteToUnicodeMap[leadByte.Start] = map; | |
} | |
} | |
} | |
public override int GetByteCount(char[] chars, int index, int count) | |
{ | |
throw new NotImplementedException(); | |
} | |
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) | |
{ | |
throw new NotImplementedException(); | |
} | |
public override int GetCharCount(byte[] bytes, int index, int count) | |
{ | |
return GetChars(bytes, index, count).Length; | |
} | |
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) | |
{ | |
if (MaxCharSize == 1) | |
{ | |
for (var i = byteIndex; i < byteIndex + byteCount; i++) | |
{ | |
var b1 = bytes[i]; | |
chars[charIndex++] = ByteToUnicodeMap[0][b1]; | |
} | |
return byteCount; | |
} | |
else // if (MaxCharSize == 2) | |
{ | |
var start = charIndex; | |
for (var i = byteIndex; i < byteIndex + byteCount; i++) | |
{ | |
var b1 = bytes[i]; | |
var found = false; | |
foreach (var item in LeadBytes) | |
{ | |
if (item.Start <= b1 && b1 <= item.End) | |
{ | |
i++; | |
if (i < byteIndex + byteCount) | |
{ | |
var b2 = bytes[i]; | |
chars[charIndex++] = ByteToUnicodeMap[item.Start][(b1 - item.Start) << 8 | b2]; | |
} | |
else | |
{ | |
if (DecoderFallback == DecoderFallback.ExceptionFallback) | |
throw new DecoderFallbackException(); | |
else | |
chars[charIndex++] = UnicodeFallbackChar; | |
} | |
found = true; | |
} | |
} | |
if (!found) | |
{ | |
chars[charIndex++] = ByteToUnicodeMap[0][b1]; | |
} | |
} | |
return charIndex - start; | |
} | |
} | |
public override int GetMaxByteCount(int charCount) => charCount * 2; | |
public override int GetMaxCharCount(int byteCount) => byteCount; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment