Skip to content

Instantly share code, notes, and snippets.

@tazjin
Last active February 11, 2016 00:59
Show Gist options
  • Save tazjin/4016e16984dfbf27f1df to your computer and use it in GitHub Desktop.
Save tazjin/4016e16984dfbf27f1df to your computer and use it in GitHub Desktop.
Recognise file types in Erlang
%% Author: Vincent Ambo
%% This code is public domain.
-module(filetypes).
-export([recognise_data/1, recognise_mimetype/1]).
-type filename() :: list().
-type filetype() :: list().
-type mimetype() :: list().
%% Make a temporary file and return the filename
-spec mktemp() -> filename().
mktemp() ->
lists:droplast(os:cmd("mktemp")).
%% Write some data to a temporary file
-spec write_temp(iodata()) -> {ok, filename()}.
write_temp(Data) ->
File = mktemp(),
ok = file:write_file(File, Data),
{ok, File}.
%% What is the mimetype of this data?
-spec recognise_mimetype(iodata()) -> {ok, mimetype()}.
recognise_mimetype(Data) ->
{ok, File} = write_temp(Data),
FileType = lists:droplast(os:cmd("file -bi " ++ File)),
{ok, FileType}.
%% What kind of data is this? (verbose)
-spec recognise_data(iodata()) -> {ok, filetype()}.
recognise_data(Data) ->
{ok, File} = write_temp(Data),
FileType = lists:droplast(os:cmd("file -b " ++ File)),
{ok, FileType}.
vincent@stallo /tmp/filerec % erl
Erlang/OTP 18 [erts-7.2.1] [source] [64-bit] [smp:4:4] [async-threads:10] [hipe] [kernel-poll:false]
Eshell V7.2.1 (abort with ^G)
1> c(filetypes).
{ok,filetypes}
2> {ok, Pdf} = file:read_file("test.pdf").
{ok,<<"%PDF-1.3\n%Äåòåë§ó ÐÄÆ\n4 0 obj\n<< /Length 5 0 R /Filter /FlateDecode >>\nstream\nx"...>>}
3> {ok, EvilPdf} = file:read_file("evil_doc.pdf").
{ok,<<77,90,144,0,3,0,0,0,4,0,0,0,255,255,0,0,184,0,0,0,
0,0,0,0,64,0,0,...>>}
4> filetypes:recognise_data(Pdf).
{ok,"PDF document, version 1.3"}
5> filetypes:recognise_mimetype(Pdf).
{ok,"application/pdf; charset=binary"}
6> filetypes:recognise_data(EvilPdf).
{ok,"PE32 executable (GUI) Intel 80386, for MS Windows, Nullsoft Installer self-extracting archive"}
7> filetypes:recognise_mimetype(EvilPdf).
{ok,"application/x-dosexec; charset=binary"}
8>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment