Last active
April 27, 2024 04:08
-
-
Save jchandra74/81d52bfd73ee444093e4 to your computer and use it in GitHub Desktop.
Detect MimeType and File Extension based on filename and falling back to fileStream for signature detection (specific to .binary extension)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//mimetypes: http://www.sitepoint.com/web-foundations/mime-types-complete-list/ | |
//https://technet.microsoft.com/en-us/library/ee309278(office.12).aspx | |
public static class FileUtil | |
{ | |
public static string DetectFileType(string filename, Stream fileStream) | |
{ | |
var ext = Path.GetExtension(filename); | |
if (string.IsNullOrEmpty(ext)) | |
{ | |
return "application/octet-stream"; | |
} | |
switch (ext.ToUpperInvariant()) | |
{ | |
case ".TXT": | |
//check if file contains <html> | |
if (IsHtml(fileStream)) | |
{ | |
return "text/html"; | |
} | |
return "text/plain"; | |
case ".BINARY": | |
if (IsJpeg(fileStream)) | |
{ | |
return "image/jpeg"; | |
} | |
if (IsOldOfficeDoc(fileStream)) | |
{ | |
//Assume it is Word | |
return "application/msword"; | |
} | |
if (IsZip(fileStream)) | |
{ | |
var openXmlType = GetOpenXmlType(fileStream); | |
return string.IsNullOrWhiteSpace(openXmlType) ? "application/zip" : openXmlType; | |
} | |
//Don't know what this is, so just return default. | |
return "application/octet-stream"; | |
default: | |
return MimeMapping.GetMimeMapping(filename); | |
} | |
} | |
public static string DetectExtension(string fileName, Stream s) | |
{ | |
var newExt = ""; | |
using (var m = new MemoryStream()) | |
{ | |
s.Seek(0, SeekOrigin.Begin); | |
s.CopyTo(m); | |
s.Seek(0, SeekOrigin.Begin); | |
var fileType = DetectFileType(fileName, m); | |
switch (fileType) | |
{ | |
case "application/pdf": | |
newExt = ".pdf"; | |
break; | |
case "application/zip": | |
newExt = ".zip"; | |
break; | |
case "text/html": | |
newExt = ".html"; | |
break; | |
case "image/jpeg": | |
newExt = ".jpg"; | |
break; | |
case "application/msword": | |
newExt = ".doc"; | |
break; | |
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
newExt = ".docx"; | |
break; | |
case "application/vnd.ms-word.document.macroEnabled.12": | |
newExt = ".docm"; | |
break; | |
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
newExt = ".xlsx"; | |
break; | |
case "application/vnd.ms-excel.sheet.macroEnabled.12": | |
newExt = ".xlsm"; | |
break; | |
default: | |
newExt = Path.GetExtension(fileName) ?? ""; | |
break; | |
} | |
} | |
return newExt; | |
} | |
//see: https://en.wikipedia.org/wiki/List_of_file_signatures | |
private static readonly byte[] oldOfficeSignature = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; | |
private static readonly byte[] jpegSignature = { 0xff, 0xd8, 0xff, 0xe0 }; | |
private static readonly byte[] zipSignature = { 0x50, 0x4b, 0x03, 0x04 }; | |
private static readonly byte[] pdfSignature = { 0x25, 0x50, 0x44, 0x46 }; | |
private static string GetOpenXmlType(Stream fileStream) | |
{ | |
fileStream.Seek(0, SeekOrigin.Begin); | |
var archive = new ZipArchive(fileStream, ZipArchiveMode.Read); | |
var entries = archive.Entries; | |
var entry = entries.FirstOrDefault(e => e.Name == "[Content_Types].xml"); | |
if (entry == null) | |
{ | |
return ""; | |
} | |
using (var reader = new StreamReader(entry.Open())) | |
{ | |
var content = reader.ReadToEnd(); | |
var result = ""; | |
if (content.Contains("application/vnd.openxmlformats-officedocument.wordprocessingml")) | |
{ | |
result = ".doc"; | |
} | |
if (content.Contains("application/vnd.openxmlformats-officedocument.spreadsheetml")) | |
{ | |
result = ".xls"; | |
} | |
if (string.IsNullOrWhiteSpace(result)) | |
{ | |
return "application/zip"; //Don't know what type of openxml doc this is (only detecting doc and xls) | |
} | |
result = content.Contains("macroEnabled") ? result + "m" : result + "x"; | |
switch (result) | |
{ | |
case ".docx": | |
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; | |
case ".docm": | |
return "application/vnd.ms-word.document.macroEnabled.12"; | |
case ".xlsx": | |
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; | |
case ".xlsm": | |
return "application/vnd.ms-excel.sheet.macroEnabled.12"; | |
default: | |
return ""; | |
} | |
} | |
} | |
private static bool CompareSignature(Stream stream, byte[] signature) | |
{ | |
var bytes = new byte[signature.Length]; | |
stream.Seek(0, SeekOrigin.Begin); | |
var read = stream.Read(bytes, 0, bytes.Length); | |
if (read != signature.Length) | |
{ | |
return false; | |
} | |
return bytes.SequenceEqual(signature); | |
} | |
private static bool IsPdf(Stream fileStream) | |
{ | |
return CompareSignature(fileStream, pdfSignature); | |
} | |
private static bool IsZip(Stream fileStream) | |
{ | |
return CompareSignature(fileStream, zipSignature); | |
} | |
private static bool IsOldOfficeDoc(Stream fileStream) | |
{ | |
return CompareSignature(fileStream, oldOfficeSignature); | |
} | |
private static bool IsJpeg(Stream fileStream) | |
{ | |
return CompareSignature(fileStream, jpegSignature); | |
} | |
private static bool IsHtml(Stream fileStream) | |
{ | |
var text = GetTextFileContent(fileStream); | |
return !string.IsNullOrWhiteSpace(text) && text.ToUpperInvariant().Contains("<HTML"); | |
} | |
private static string GetTextFileContent(Stream fileStream) | |
{ | |
fileStream.Seek(0, SeekOrigin.Begin); | |
using (var reader = new StreamReader(fileStream)) | |
{ | |
return reader.ReadToEnd(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment