Skip to content

Instantly share code, notes, and snippets.

@andrewboudreau
Last active January 20, 2021 00:43
Show Gist options
  • Save andrewboudreau/995ff4f9bdd2a2d018be230b9a792bc0 to your computer and use it in GitHub Desktop.
Save andrewboudreau/995ff4f9bdd2a2d018be230b9a792bc0 to your computer and use it in GitHub Desktop.
A way to make file formats more interesting
/// <summary>
/// ContentType describes the type content being stored in the file format.
/// </summary>
public abstract record ContentType(FileExtension Extension, string Name, string Description) : FileFormat(Extension, Name, Description);
public abstract record Email(FileExtension Extension, string Name, string Description) : ContentType(Extension, Name, Description);
public abstract record WordProcessing(FileExtension Extension, string Name, string Description) : ContentType(Extension, Name, Description);
public abstract record Spreadsheet(FileExtension Extension, string Name, string Description) : ContentType(Extension, Name, Description);
public abstract record Presentation(FileExtension Extension, string Name, string Description) : ContentType(Extension, Name, Description);
public record Excel(FileExtension Extension, string Description) : Spreadsheet(Extension, nameof(Excel), Description);
public record Excel95(FileExtension Extension, string Description) : Excel(Extension, Description);
public record Excel97(FileExtension Extension, string Description) : Excel(Extension, Description);
public record Excel2010(FileExtension Extension, string Description) : Excel(Extension, Description);
public record Word(FileExtension Extension, string Description) : WordProcessing(Extension, nameof(Word), Description);
public record Word97(FileExtension Extension, string Description) : Word(Extension, Description);
public record Word2010(FileExtension Extension, string Description) : Word(Extension, Description);
public record PowerPoint(FileExtension Extension, string Description) : Presentation(Extension, nameof(PowerPoint), Description);
public record PowerPoint97(FileExtension Extension, string Description) : PowerPoint(Extension, Description);
public record PowerPoint2010(FileExtension Extension, string Description) : PowerPoint(Extension, Description);
public record Pdf() : WordProcessing(FileExtension.Pdf, "PDF", "Adobe PDF");
public record PlainText() : WordProcessing(FileExtension.PlainText, "Text", "A plain text file");
public record Eml() : Email(FileExtension.Eml, "Email", "ASCII-based syntax required by SMTP for all email message");
/// <summary>
/// The known file extensions and media types.
/// </summary>
public record FileExtension
{
private FileExtension(string extension, MediaType mediaType)
{
const char dot = '.';
extension = extension.ToLowerInvariant();
if (extension.Any(x => x == ' '))
{
throw new InvalidFileExtensionException(extension, "Contains invalid whitespace characters.");
}
if (extension.Intersect(Path.GetInvalidFileNameChars()).Any())
{
throw new InvalidFileExtensionException(extension, "Contains invalid path characters.");
}
if (!extension.StartsWith(dot))
{
throw new InvalidFileExtensionException(extension, "Must start with a period.");
}
if (extension.Length < 2)
{
throw new InvalidFileExtensionException(extension, "Value must be at least 1 character.");
}
if (mediaType is null)
{
throw new ArgumentNullException(nameof(mediaType));
}
Value = extension;
MediaType = mediaType;
}
/// <summary>
/// Gets the string value of the file extension. Value does contain a leading period.
/// </summary>
public string Value { get; }
/// <summary>
/// Gets the media type for the file extension.
/// </summary>
public MediaType MediaType { get; }
public static FileExtension Pdf { get; } = new FileExtension(".pdf", new MediaType("pdf"));
public static FileExtension Excel { get; } = new FileExtension(".xlsx", new MediaType("vnd.openxmlformats-officedocument.spreadsheetml.sheet"));
public static FileExtension ExcelMacro { get; } = new FileExtension(".xlsm", new MediaType("vnd.ms-excel.sheet.macroenabled.12"));
public static FileExtension ExcelTemplate { get; } = new FileExtension(".xltx", new MediaType("vnd.openxmlformats-officedocument.spreadsheetml.template"));
public static FileExtension ExcelTemplateMacro { get; } = new FileExtension(".xltm", new MediaType("vnd.ms-excel.template.macroenabled.12"));
public static FileExtension ExcelBinary { get; } = new FileExtension(".xlsb", new MediaType("vnd.ms-excel.sheet.binary.macroenabled.12"));
public static FileExtension Excel97 { get; } = new FileExtension(".xls", new MediaType("vnd.ms-excel"));
public static FileExtension Excel97Macro { get; } = new FileExtension(".xlm", new MediaType("vnd.ms-excel"));
public static FileExtension Excel97Template { get; } = new FileExtension(".xlt", new MediaType("vnd.ms-excel"));
public static FileExtension PowerPoint { get; } = new FileExtension(".pptx", new MediaType("vnd.openxmlformats-officedocument.presentationml.presentation"));
public static FileExtension PowerPoint97 { get; } = new FileExtension(".ppt", new MediaType("vnd.ms-powerpoint"));
public static FileExtension Word { get; } = new FileExtension(".docx", new MediaType("vnd.openxmlformats-officedocument.wordprocessingml.document"));
public static FileExtension Word97 { get; } = new FileExtension(".doc", new MediaType("msword"));
public static FileExtension Eml { get; } = new FileExtension(".eml", new MediaType("message", "rfc822"));
public static FileExtension PlainText { get; } = new FileExtension(".txt", new MediaType("text", "plain"));
public static FileExtension CommaSeperatedValues { get; } = new FileExtension(".csv", new MediaType("text", "csv"));
}
/// <summary>
/// The factory responsible for mapping a file extensions to a file format.
/// </summary>
public static class FileFormatFactory
{
public static readonly Dictionary<string, FileExtension> Supported;
/// <summary>
/// Builds up the static list of known file extensions parsing the static properties of <see cref="FileExtension"/>.
/// </summary>
static FileFormatFactory()
{
var cache = typeof(FileExtension)
.GetProperties(BindingFlags.Public | BindingFlags.Static)
.Where(p => p.PropertyType == typeof(FileExtension))
.Select(x => (x.GetValue(null) as FileExtension) ?? throw new NullReferenceException())
.ToArray();
Supported = cache.ToDictionary(x => x.Value);
}
public static FileFormat FromPath(string path) => FromExtension(Path.GetExtension(path));
public static FileFormat FromExtension(string fileExtension)
{
fileExtension = fileExtension.ToLowerInvariant();
if (!Supported.ContainsKey(fileExtension))
{
throw new InvalidFileExtensionException(fileExtension);
}
var extension = Supported[fileExtension];
return extension switch
{
FileExtension e when (FileExtension.Pdf == e) => new Pdf(),
FileExtension e when (FileExtension.Eml == e) => new Eml(),
FileExtension e when (FileExtension.PlainText == e) => new PlainText(),
FileExtension e when (FileExtension.Excel == e) => new Excel2010(e, "The default XML-based file format for Excel 2007 and above"),
FileExtension e when (FileExtension.ExcelMacro == e) => new Excel2010(e, "The XML-based and macro-enabled file format for Excel 2007 and above"),
FileExtension e when (FileExtension.ExcelTemplate == e) => new Excel2010(e, "The default file format for an Excel template for Excel 2007 and above"),
FileExtension e when (FileExtension.ExcelTemplateMacro == e) => new Excel2010(e, "The macro-enabled file format for Excel 2007 and above"),
FileExtension e when (FileExtension.ExcelBinary == e) => new Excel2010(e, "The binary file format(BIFF12) for Excel 2007 and above"),
// FileExtension e when (FileExtension.Excel97 == e && variant == 1) => new Excel95(e, "The Excel 5.0 / 95 Binary file format(BIFF5)."),
FileExtension e when (FileExtension.Excel97 == e) => new Excel97(e, "The Excel 97 - Excel 2003 Binary file format(BIFF8)"),
FileExtension e when (FileExtension.Excel97Macro == e) => new Excel97(e, "The Excel 97 - Excel 2003 macro-enabled file format"),
FileExtension e when (FileExtension.Excel97Template == e) => new Excel97(e, "The Excel 97 - Excel 2003 Binary file format (BIFF8) for an Excel template"),
FileExtension e when (FileExtension.PowerPoint == e) => new PowerPoint2010(e, "The default XML-based file format for PowerPoint 2007 and above"),
FileExtension e when (FileExtension.PowerPoint97 == e) => new PowerPoint97(e, "The PowerPoint 97 - PowerPoint 2003 Binary file format"),
FileExtension e when (FileExtension.Word == e) => new Word2010(e, "The default XML-based file format for Word 2007 and above"),
FileExtension e when (FileExtension.Word97 == e) => new Word97(e, "The Word 97 - Word 2003 Binary file format"),
_ => throw new InvalidFileExtensionException(extension.Value)
};
}
}
/// <summary>
/// Format and type information for a document.
/// </summary>
public abstract record FileFormat(FileExtension Extension, string Name, string Description)
{
public static FileFormat FromPath(string path) => FromExtension(Path.GetExtension(path));
public static FileFormat FromName(string fileName) => FromExtension(Path.GetExtension(fileName));
public static FileFormat FromExtension(string fileExtension)
{
return FileFormatFactory.FromExtension(fileExtension);
}
};
/// <summary>
/// MediaType is the browsers way of working with files.
/// </summary>
/// <remarks>MediaType does have a <see cref="!:https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types">specification</see> but is only partially implemented here for type and subtype.</remarks>
public record MediaType(string Type, string SubType)
{
/// <summary>
/// Creates a new MediaType with the main Type defaults to "application".
/// </summary>
/// <param name="subType">The subtype which can include vendor info.</param>
public MediaType(string subType)
: this("application", subType)
{
}
public override string ToString()
{
return $"{Type}/{SubType}";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment