Skip to content

Instantly share code, notes, and snippets.

@jemerick
Forked from mmalone/filetype.py
Created April 22, 2010 22:47
Show Gist options
  • Save jemerick/375930 to your computer and use it in GitHub Desktop.
Save jemerick/375930 to your computer and use it in GitHub Desktop.
"""
Some simple utilities to read the magic bytes from the beginning of a
file and determine whether the file meets certain criteria (e.g., contains
JPEG image data).
"""
import array
from operator import eq
IMAGE_MAGIC_DATA = (
([0xff, 0xd8], 'JPEG', 'jpg', 'image/jpeg'), # (0xff, 0xd8, 0xfe, 0xe0)?
([0x89, 0x50, 0x4e, 0x47], 'PNG', 'png', 'image/png'),
([0x47, 0x49, 0x46, 0x38], 'GIF', 'gif', 'image/gif'),
)
MP3_MAGIC_DATA = (
([ord(l) for l in 'ID3'], 'MP3', 'mp3', 'audio/mpeg'),
([0xff, 0xfe], 'MPEG ADTS, layer III, v1.0 [protected]', 'mp3', 'audio/mpeg'),
([0xff, 0xff], 'MPEG ADTS, layer III, v1.0', 'mp3', 'audio/mpeg'),
([0xff, 0xfa], 'MPEG ADTS, layer III, v1.0 [protected]', 'mp3', 'audio/mpeg'),
([0xff, 0xfb], 'MPEG ADTS, layer III, v1.0', 'mp3', 'audio/mpeg'),
([0xff, 0xf2], 'MPEG ADTS, layer III, v2.0 [protected]', 'mp3', 'audio/mpeg'),
([0xff, 0xf3], 'MPEG ADTS, layer III, v2.0', 'mp3', 'audio/mpeg'),
([0xff, 0xf4], 'MPEG ADTS, layer III, v2.0 [protected]', 'mp3', 'audio/mpeg'),
([0xff, 0xf5], 'MPEG ADTS, layer III, v2.0', 'mp3', 'audio/mpeg'),
([0xff, 0xf6], 'MPEG ADTS, layer III, v2.0 [protected]', 'mp3', 'audio/mpeg'),
([0xff, 0xf7], 'MPEG ADTS, layer III, v2.0', 'mp3', 'audio/mpeg'),
([0xff, 0xe2], 'MPEG ADTS, layer III, v2.5 [protected]', 'mp3', 'audio/mpeg'),
([0xff, 0xe3], 'MPEG ADTS, layer III, v2.5', 'mp3', 'audio/mpeg'),
)
MAGIC_DATA = (IMAGE_MAGIC_DATA, MP3_MAGIC_DATA)
# Bah. BIF all() and any() didn't come 'til 2.5. These do the same thing.
try:
all
except NameError:
def all(iterable):
for element in iterable:
if not element:
return False
return True
try:
any
except NameError:
def any(iterable):
for element in iterable:
if element:
return element
return False
def get_mimetype(fd):
"""Tries to guess the type of fd's data. Returns a mimetype."""
for magic_data in MAGIC_DATA:
n = max((len(b[0]) for b in magic_data))
data = fd.read(n)
bytes = array.array('B', data)
for magic_bytes, type, ext, mimetype in magic_data:
if all(map(eq, bytes[:len(magic_bytes)], magic_bytes)):
return mimetype
fd.seek(0)
return None
def _file_matches_magic_data(fd, magic_data):
n = max((len(b[0]) for b in magic_data))
data = fd.read(n)
bytes = array.array('B', data)
return any((all(map(eq, bytes[:len(magic_bytes)], magic_bytes))
for magic_bytes, type, ext, mimetype in magic_data))
def is_image(fd):
return _file_matches_magic_data(fd, IMAGE_MAGIC_DATA)
is_image.checks_for = 'image'
def is_mp3(fd):
return _file_matches_magic_data(fd, MP3_MAGIC_DATA)
is_mp3.checks_for = 'mp3'
if __name__ == '__main__':
import sys
if len(sys.argv) < 2:
print 'usage: ./file.py <function_name> <filename>'
sys.exit(0)
function_name = sys.argv[1]
filename = sys.argv[2]
if function_name not in locals():
print "Function named '%s' not found" % function_name
sys.exit(1)
func = locals()[function_name]
fd = open(filename)
if func(fd):
print '%s is an %s' % (filename, func.checks_for)
else:
print '%s is not an %s' % (filename, func.checks_for)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment