Skip to content

Instantly share code, notes, and snippets.

@jchandra74
Last active April 27, 2024 04:08
Show Gist options
  • Save jchandra74/81d52bfd73ee444093e4 to your computer and use it in GitHub Desktop.
Save jchandra74/81d52bfd73ee444093e4 to your computer and use it in GitHub Desktop.
Detect MimeType and File Extension based on filename and falling back to fileStream for signature detection (specific to .binary extension)
//mimetypes: http://www.sitepoint.com/web-foundations/mime-types-complete-list/
//https://technet.microsoft.com/en-us/library/ee309278(office.12).aspx
public static class FileUtil
{
public static string DetectFileType(string filename, Stream fileStream)
{
var ext = Path.GetExtension(filename);
if (string.IsNullOrEmpty(ext))
{
return "application/octet-stream";
}
switch (ext.ToUpperInvariant())
{
case ".TXT":
//check if file contains <html>
if (IsHtml(fileStream))
{
return "text/html";
}
return "text/plain";
case ".BINARY":
if (IsJpeg(fileStream))
{
return "image/jpeg";
}
if (IsOldOfficeDoc(fileStream))
{
//Assume it is Word
return "application/msword";
}
if (IsZip(fileStream))
{
var openXmlType = GetOpenXmlType(fileStream);
return string.IsNullOrWhiteSpace(openXmlType) ? "application/zip" : openXmlType;
}
//Don't know what this is, so just return default.
return "application/octet-stream";
default:
return MimeMapping.GetMimeMapping(filename);
}
}
public static string DetectExtension(string fileName, Stream s)
{
var newExt = "";
using (var m = new MemoryStream())
{
s.Seek(0, SeekOrigin.Begin);
s.CopyTo(m);
s.Seek(0, SeekOrigin.Begin);
var fileType = DetectFileType(fileName, m);
switch (fileType)
{
case "application/pdf":
newExt = ".pdf";
break;
case "application/zip":
newExt = ".zip";
break;
case "text/html":
newExt = ".html";
break;
case "image/jpeg":
newExt = ".jpg";
break;
case "application/msword":
newExt = ".doc";
break;
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
newExt = ".docx";
break;
case "application/vnd.ms-word.document.macroEnabled.12":
newExt = ".docm";
break;
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
newExt = ".xlsx";
break;
case "application/vnd.ms-excel.sheet.macroEnabled.12":
newExt = ".xlsm";
break;
default:
newExt = Path.GetExtension(fileName) ?? "";
break;
}
}
return newExt;
}
//see: https://en.wikipedia.org/wiki/List_of_file_signatures
private static readonly byte[] oldOfficeSignature = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
private static readonly byte[] jpegSignature = { 0xff, 0xd8, 0xff, 0xe0 };
private static readonly byte[] zipSignature = { 0x50, 0x4b, 0x03, 0x04 };
private static readonly byte[] pdfSignature = { 0x25, 0x50, 0x44, 0x46 };
private static string GetOpenXmlType(Stream fileStream)
{
fileStream.Seek(0, SeekOrigin.Begin);
var archive = new ZipArchive(fileStream, ZipArchiveMode.Read);
var entries = archive.Entries;
var entry = entries.FirstOrDefault(e => e.Name == "[Content_Types].xml");
if (entry == null)
{
return "";
}
using (var reader = new StreamReader(entry.Open()))
{
var content = reader.ReadToEnd();
var result = "";
if (content.Contains("application/vnd.openxmlformats-officedocument.wordprocessingml"))
{
result = ".doc";
}
if (content.Contains("application/vnd.openxmlformats-officedocument.spreadsheetml"))
{
result = ".xls";
}
if (string.IsNullOrWhiteSpace(result))
{
return "application/zip"; //Don't know what type of openxml doc this is (only detecting doc and xls)
}
result = content.Contains("macroEnabled") ? result + "m" : result + "x";
switch (result)
{
case ".docx":
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
case ".docm":
return "application/vnd.ms-word.document.macroEnabled.12";
case ".xlsx":
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
case ".xlsm":
return "application/vnd.ms-excel.sheet.macroEnabled.12";
default:
return "";
}
}
}
private static bool CompareSignature(Stream stream, byte[] signature)
{
var bytes = new byte[signature.Length];
stream.Seek(0, SeekOrigin.Begin);
var read = stream.Read(bytes, 0, bytes.Length);
if (read != signature.Length)
{
return false;
}
return bytes.SequenceEqual(signature);
}
private static bool IsPdf(Stream fileStream)
{
return CompareSignature(fileStream, pdfSignature);
}
private static bool IsZip(Stream fileStream)
{
return CompareSignature(fileStream, zipSignature);
}
private static bool IsOldOfficeDoc(Stream fileStream)
{
return CompareSignature(fileStream, oldOfficeSignature);
}
private static bool IsJpeg(Stream fileStream)
{
return CompareSignature(fileStream, jpegSignature);
}
private static bool IsHtml(Stream fileStream)
{
var text = GetTextFileContent(fileStream);
return !string.IsNullOrWhiteSpace(text) && text.ToUpperInvariant().Contains("<HTML");
}
private static string GetTextFileContent(Stream fileStream)
{
fileStream.Seek(0, SeekOrigin.Begin);
using (var reader = new StreamReader(fileStream))
{
return reader.ReadToEnd();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment