Created
July 30, 2017 22:15
-
-
Save andy-thomason/994db9582d816eb0c9a65006753e96f4 to your computer and use it in GitHub Desktop.
Example of extracting a JPEG thumbnail from a camera Exif JPEG image.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
////////////////////////////////////////////////////////////////////////////////////////// | |
// | |
// Example Exif thumbnail extractor. | |
// | |
#include <iostream> | |
#include <fstream> | |
#include <vector> | |
// return a vector containing JPEG thummnail for an EXif file. | |
std::vector<char> findExifJpeg(std::istream &f) { | |
// Loop over JPEG markers | |
while (!f.eof()) { | |
int t = (uint8_t)f.get(); | |
//printf("%08x %02x\n", (int)f.tellg()-1, t); | |
if (t != 0xff) break; | |
int marker = (uint8_t)f.get(); | |
if (marker == 0xd8 || marker == 0xd9) { | |
// Start/end of image | |
//printf("%08x %02x %02x size = %04x\n", (int)f.tellg(), t, marker, 0); | |
} else if (marker == 0xda) { | |
// start of JPEG data | |
//printf("%08x %02x %02x size = %04x\n", (int)f.tellg(), t, marker, 0); | |
break; | |
} else { | |
// General tag: size is encoded in two bytes. | |
int hibyte = (uint8_t)f.get(); | |
int lobyte = (uint8_t)f.get(); | |
int size = hibyte * 0x100 + lobyte; | |
//printf("%08x %02x %02x size = %04x\n", (int)f.tellg(), t, marker, size); | |
// ff e1 tag is APP1 | |
if (marker == 0xe1) { | |
std::vector<char> exif(size-2); | |
f.read(exif.data(), size-2); | |
char *d = exif.data(); | |
char *base = d + 6; | |
// APP1 tags with "Exif\0\0" are Exif data encoded as TIFF data. | |
static const char exifHdrLe[] = { 0x45, 0x78, 0x69, 0x66, 0x00, 0x00, 0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00 }; | |
static const char exifHdrBe[] = { 0x45, 0x78, 0x69, 0x66, 0x00, 0x00, 0x4D, 0x4D, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x08 }; | |
auto sz = sizeof(exifHdrLe); | |
bool isLe = exif.size() > sz + 2 && std::mismatch(d, d+sz, exifHdrLe).first == d+sz; | |
bool isBe = exif.size() > sz + 2 && std::mismatch(d, d+sz, exifHdrBe).first == d+sz; | |
char *dmax = d + exif.size(); | |
// Two byte orders are possible (madness!) | |
if (isLe || isBe) { | |
//printf("EXIF!\n"); | |
d += sz; | |
auto b2 = [&]() { | |
int b0 = (uint8_t)*d++; | |
int b1 = (uint8_t)*d++; | |
return isBe ? b0 * 0x100 + b1 : b1 * 0x100 + b0; | |
}; | |
auto b4 = [&]() { | |
int w0 = b2(); | |
int w1 = b2(); | |
return isBe ? w0 * 0x10000 + w1 : w1 * 0x10000 + w0; | |
}; | |
// Loop over TIFF tags finding the JPEG image data | |
int jpegOffset = 0; | |
int jpegSize = 0; | |
while (d+2 <= dmax) { | |
int numEntries = b2(); | |
//printf("ne=%d\n", numEntries); | |
if (d + numEntries * 12 + 4 > dmax) break; | |
for (int i = 0; i != numEntries; ++i) { | |
int tag = b2(); | |
int fmt = b2(); | |
int nc = b4(); | |
int off = b4(); | |
//printf("%04x %04x %08x %08x\n", tag, fmt, nc, off); | |
if (tag == 0x0201) jpegOffset = off; | |
if (tag == 0x0202) jpegSize = off; | |
} | |
int next = b4(); | |
//printf("%08x\n", next); | |
if (next) d = base + next; else break; | |
} | |
//printf("%08x..%08x\n", jpegOffset, jpegSize); | |
if (jpegOffset && jpegSize && jpegOffset > 0 && base + jpegOffset <= dmax) { | |
//printf("jpeg found\n"); | |
char *b = base + jpegOffset; | |
char *e = b + jpegSize; | |
return std::vector<char>(b, e); | |
} | |
} | |
} else { | |
f.seekg(size-2, std::ios::cur); | |
} | |
} | |
} | |
return std::vector<char>{}; | |
} | |
// https://www.media.mit.edu/pia/Research/deepview/exif.html | |
int main() { | |
auto f = std::ifstream("big.jpg", std::ios::binary); | |
auto jpeg = findExifJpeg(f); | |
std::ofstream("big.thumb.jpg", std::ios::binary).write(jpeg.data(), jpeg.size()); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment