-
-
Save jemerick/375930 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Some simple utilities to read the magic bytes from the beginning of a | |
file and determine whether the file meets certain criteria (e.g., contains | |
JPEG image data). | |
""" | |
import array | |
from operator import eq | |
IMAGE_MAGIC_DATA = ( | |
([0xff, 0xd8], 'JPEG', 'jpg', 'image/jpeg'), # (0xff, 0xd8, 0xfe, 0xe0)? | |
([0x89, 0x50, 0x4e, 0x47], 'PNG', 'png', 'image/png'), | |
([0x47, 0x49, 0x46, 0x38], 'GIF', 'gif', 'image/gif'), | |
) | |
MP3_MAGIC_DATA = ( | |
([ord(l) for l in 'ID3'], 'MP3', 'mp3', 'audio/mpeg'), | |
([0xff, 0xfe], 'MPEG ADTS, layer III, v1.0 [protected]', 'mp3', 'audio/mpeg'), | |
([0xff, 0xff], 'MPEG ADTS, layer III, v1.0', 'mp3', 'audio/mpeg'), | |
([0xff, 0xfa], 'MPEG ADTS, layer III, v1.0 [protected]', 'mp3', 'audio/mpeg'), | |
([0xff, 0xfb], 'MPEG ADTS, layer III, v1.0', 'mp3', 'audio/mpeg'), | |
([0xff, 0xf2], 'MPEG ADTS, layer III, v2.0 [protected]', 'mp3', 'audio/mpeg'), | |
([0xff, 0xf3], 'MPEG ADTS, layer III, v2.0', 'mp3', 'audio/mpeg'), | |
([0xff, 0xf4], 'MPEG ADTS, layer III, v2.0 [protected]', 'mp3', 'audio/mpeg'), | |
([0xff, 0xf5], 'MPEG ADTS, layer III, v2.0', 'mp3', 'audio/mpeg'), | |
([0xff, 0xf6], 'MPEG ADTS, layer III, v2.0 [protected]', 'mp3', 'audio/mpeg'), | |
([0xff, 0xf7], 'MPEG ADTS, layer III, v2.0', 'mp3', 'audio/mpeg'), | |
([0xff, 0xe2], 'MPEG ADTS, layer III, v2.5 [protected]', 'mp3', 'audio/mpeg'), | |
([0xff, 0xe3], 'MPEG ADTS, layer III, v2.5', 'mp3', 'audio/mpeg'), | |
) | |
MAGIC_DATA = (IMAGE_MAGIC_DATA, MP3_MAGIC_DATA) | |
# Bah. BIF all() and any() didn't come 'til 2.5. These do the same thing. | |
try: | |
all | |
except NameError: | |
def all(iterable): | |
for element in iterable: | |
if not element: | |
return False | |
return True | |
try: | |
any | |
except NameError: | |
def any(iterable): | |
for element in iterable: | |
if element: | |
return element | |
return False | |
def get_mimetype(fd): | |
"""Tries to guess the type of fd's data. Returns a mimetype.""" | |
for magic_data in MAGIC_DATA: | |
n = max((len(b[0]) for b in magic_data)) | |
data = fd.read(n) | |
bytes = array.array('B', data) | |
for magic_bytes, type, ext, mimetype in magic_data: | |
if all(map(eq, bytes[:len(magic_bytes)], magic_bytes)): | |
return mimetype | |
fd.seek(0) | |
return None | |
def _file_matches_magic_data(fd, magic_data): | |
n = max((len(b[0]) for b in magic_data)) | |
data = fd.read(n) | |
bytes = array.array('B', data) | |
return any((all(map(eq, bytes[:len(magic_bytes)], magic_bytes)) | |
for magic_bytes, type, ext, mimetype in magic_data)) | |
def is_image(fd): | |
return _file_matches_magic_data(fd, IMAGE_MAGIC_DATA) | |
is_image.checks_for = 'image' | |
def is_mp3(fd): | |
return _file_matches_magic_data(fd, MP3_MAGIC_DATA) | |
is_mp3.checks_for = 'mp3' | |
if __name__ == '__main__': | |
import sys | |
if len(sys.argv) < 2: | |
print 'usage: ./file.py <function_name> <filename>' | |
sys.exit(0) | |
function_name = sys.argv[1] | |
filename = sys.argv[2] | |
if function_name not in locals(): | |
print "Function named '%s' not found" % function_name | |
sys.exit(1) | |
func = locals()[function_name] | |
fd = open(filename) | |
if func(fd): | |
print '%s is an %s' % (filename, func.checks_for) | |
else: | |
print '%s is not an %s' % (filename, func.checks_for) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment