Created
November 28, 2024 18:42
-
-
Save donnaken15/7ca4ba030ae844a415a93dc478dd3bd7 to your computer and use it in GitHub Desktop.
store compacted text and binary files in UCS-2 strings using Encoding functions with T4 (m4 much) - RESX AND NEW BYTE[] IS BLOATED!!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<#@template language="C#" hostspecific="true"#> | |
<#@output extension=".cs"#> | |
<#@import namespace="System.IO"#> | |
<#@import namespace="System.Text"#> | |
<#@import namespace="System.Text.RegularExpressions"#> | |
<#@import namespace="System.Collections.Generic"#> | |
<# | |
Encoding A = Encoding.ASCII, U = Encoding.Unicode, UTF8 = Encoding.UTF8; | |
const byte | |
textfile = 0, | |
directstring = 1, | |
binary = 2, | |
escapes = 4, | |
raw = 8; // use when unicode is present in string | |
char[] throwawaySeps = "&~^\0".ToCharArray(); | |
char[] corrections = (// errors if any raw characters match these ranges in source files | |
"\u2027\u202a"+ | |
"\u0000\u0020"+ | |
"\u0080\u00a0" | |
).ToCharArray(); | |
Dictionary<string, string> escs = new Dictionary<string, string>() { | |
{"\\", "\\\\"}, {"\"", "\\\""}, {"\n", "\\n"}, {"\r", "\\r"}, {"\t", "\\t"}, | |
}; | |
var rawstrings = new Tuple<string, string, byte, char>[] { | |
// awful | |
// fname or content, varname, file or direct string, string([]) or byte array, escaped, splitter (both disregarded if binary) | |
//new Tuple<string, string, byte, char>("idc.txt", "test", textfile, '\n'), | |
new Tuple<string, string, byte, char>("lt.txt", "test", textfile | escapes, '\n'), | |
new Tuple<string, string, byte, char>("kl.bin", "kl", binary, '\0'), | |
new Tuple<string, string, byte, char>("sctn.txt", "sctn", textfile, '\n'), | |
new Tuple<string, string, byte, char>("gsn.txt", "gsn", textfile, '\n'), | |
//new Tuple<string, string, byte, char>("test", "test2", directstring | raw, '\0'), | |
}; | |
#> | |
using System.Text; | |
partial class Program | |
{ | |
static Encoding A = Encoding.ASCII, U = Encoding.Unicode, UTF8 = Encoding.UTF8; | |
<# foreach (var why in rawstrings) { | |
// i think i overcomplicated this | |
bool direct = (why.Item3 & directstring) == directstring; | |
bool escaped = (why.Item3 & escapes) == escapes; | |
bool rawstr = (why.Item3 & raw) == raw; | |
bool compactable = true; | |
bool wide = false; | |
string varname = why.Item2; | |
string target = direct ? why.Item1 : Host.ResolvePath(why.Item1); | |
string type = (new string[] {"string", "byte[]"})[(why.Item3 >> 1 & 1)]; | |
string convback = "UTF8"; | |
char splitter = why.Item4; | |
Encoding e = Encoding.UTF8; | |
char sep = '\0'; | |
bool cant = true; | |
if (type == "string") { | |
// https://stackoverflow.com/questions/3825390 | |
// "Blocks are not permitted after helpers" WTF IS THIS | |
byte bpc = 1; | |
if (!direct) | |
{ | |
var bom = new byte[4]; | |
using (var file = new FileStream(target, FileMode.Open, FileAccess.Read)) | |
file.Read(bom, 0, 4); | |
uint bom4 = BitConverter.ToUInt32(bom, 0); | |
// hopefully this is still correct | |
if ((bom4 & 0xffffff) == 0x762f2b) | |
{ | |
e = Encoding.UTF7; | |
wide = true; | |
} | |
//if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) e = Encoding.UTF7; | |
if ((bom4 & 0xffffff) == 0xbfbbef) | |
{ | |
e = Encoding.UTF8; | |
wide = true; | |
} | |
//if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) e = Encoding.UTF8; | |
if (bom4 == 0x0000feff) | |
//if (bom[0] == 0xff && bom[1] == 0xfe && bom[2] == 0 && bom[3] == 0) | |
{ | |
e = Encoding.UTF32; //UTF-32LE | |
wide = true; | |
bpc = 4; | |
} | |
if (bom4 == 0xfffe0000) | |
{ | |
e = new UTF32Encoding(true, true); //UTF-32BE | |
wide = true; | |
bpc = 4; | |
} | |
//if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) e = new UTF32Encoding(true, true); //UTF-32BE | |
if ((bom4 & 0xffff) == 0xfeff) | |
//if (bom[0] == 0xff && bom[1] == 0xfe) | |
{ | |
e = Encoding.Unicode; //UTF-16LE | |
wide = true; | |
bpc = 2; | |
} | |
if (bom4 == 0xfffe) | |
{ | |
e = Encoding.BigEndianUnicode; //UTF-16BE | |
wide = true; | |
bpc = 2; | |
} | |
//if (bom[0] == 0xfe && bom[1] == 0xff) e = Encoding.BigEndianUnicode; //UTF-16BE | |
target = File.ReadAllText(target); | |
} | |
else | |
{ | |
bpc = (byte)(rawstr ? 2 : 1); | |
} | |
if (rawstr) | |
compactable = false; | |
if (splitter != 0) | |
{ | |
type = "string[]"; | |
foreach (char s in throwawaySeps) | |
{ | |
if (!target.Contains(new string(s, 1))) | |
{ | |
cant = false; | |
sep = s; | |
break; | |
} | |
} | |
if (cant) | |
throw new Exception("Couldn't find a throwaway separator"); | |
target = target.Replace(new string(splitter, 1), new string(sep, 1)); | |
} | |
if (escaped) | |
{ | |
target = Regex.Unescape(target); | |
} | |
if ((target.Length & 1) == 1) | |
{ | |
target += '\0'; | |
} | |
if (bpc > 2) | |
{ | |
compactable = false; | |
} | |
//compactable = false; | |
if (compactable) | |
{ | |
byte[] comp; | |
if (bpc == 1) | |
comp = UTF8.GetBytes(target); | |
else | |
comp = U.GetBytes(target); | |
target = U.GetString(comp); | |
} | |
} else { | |
byte[] uh = File.ReadAllBytes(target); | |
if ((uh.Length & 1) == 1) | |
{ | |
Array.Resize(ref uh, uh.Length + 1); | |
} | |
target = U.GetString(uh); | |
compactable = true; | |
} | |
foreach (KeyValuePair<string, string> i in escs) | |
target = target.Replace(i.Key, i.Value); | |
for (int i = 0; i < corrections.Length; i++) | |
for (char c = corrections[i++]; c < corrections[i]; c++) | |
target = target.Replace(new string(c, 1), "\\u"+((ushort)c).ToString("x4")); | |
target = '\"' + target + '\"'; | |
if (compactable) | |
{ | |
target = "U.GetBytes(" + target + ")"; | |
} | |
if (type != "byte[]") | |
{ | |
if (compactable) | |
target = convback+".GetString(" + target + ")"; | |
if (type == "string[]") | |
{ | |
string spl = new string(splitter, 1); | |
foreach (KeyValuePair<string, string> i in escs) | |
spl = spl.Replace(i.Key, i.Value); | |
target += ".Split(new char[1]{'"+sep+"'})"; | |
} | |
} | |
Write("\tpublic static "+type+' '+varname+" = "+target+";\n"); | |
} | |
//static Program() | |
//{ | |
//} | |
#> | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment