Skip to content

Instantly share code, notes, and snippets.

@pgolding
Last active May 2, 2018 20:39
Show Gist options
  • Save pgolding/4d05e400d0c62f22e6f8de3488aa1a2f to your computer and use it in GitHub Desktop.
Save pgolding/4d05e400d0c62f22e6f8de3488aa1a2f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"deletable": true,
"editable": true
},
"cell_type": "markdown",
"source": "# Check Image Meta Data (e.g. from EXIF)\n\nThis script pulls the EXIF data from a JPG image. For now, we skip PDF/PNG images although they do contain metadata.\n\nRequires:\n\n>`$ pip install exifread`\n\nSet `mount` to point to the mountpoint.\n\nSet `path_to_checks` to point at the sub-folder with the checks\n\nThe final path is the join of these two - i.e. `mount/path_to_checks`\n\nWe filter mostly for useful EXIF data fields (`cols`). But to grab all EXIF data set `limit_to_cols=False` when calling the `stream_exif` routine."
},
{
"metadata": {
"trusted": true,
"deletable": true,
"collapsed": false,
"editable": true
},
"cell_type": "code",
"source": "import os\nimport json\nimport sys\nimport exifread\nimport pandas\n\n# Control the checkpoint size - number of images to process and save at a time\ncheck_point = 100\n\n# EXIF tags that we want to extract and use as cols in our csv output\ncols = [\"EXIF ISOSpeedRatings\",\"EXIF Flash\",\"Image YResolution\",\"EXIF FNumber\",\"Image Orientation\",\"Image Model\",\n \"Image XResolution\",\"Image XResolution\",\"EXIF BrightnessValue\",\"EXIF ExifImageWidth\",\"EXIF ExifImageLength\",\n \"EXIF LensModel\", \"EXIF SubjectArea\", \"Image Make\"]\n\n# Exif data streamed to here:\ndf = pandas.DataFrame()\n\ndef save(data,csv_file):\n # Now create a frame for post-processing and/or CSV export\n df = pandas.DataFrame(data)\n df.set_index('file', inplace=True)\n df.to_csv(csv_file)\n\n# Set limit_to_cols to False if you want to collect ALL the EXIF fields (will slow things down)\ndef stream_exif(path,limit_to_cols=True,csv_file='Exif_data_checks.csv'):\n #path = os.getcwd() #current directory, if running script from checks folder\n print(\"Looking for jpg images in {}\".format(path))\n exif_stream = []\n try:\n extensions = [\"jpg\",\"jpeg\"]\n filelist = filter(lambda f: f.split('.')[-1].lower() in extensions, os.listdir(path))\n filelist = sorted(filelist)\n if len(list(filelist)) == 0:\n print(\"Unable to find any jpg images - check folder or path is correct\")\n raise\n else:\n print(\"Found {} jpg images\".format(len(list(filelist))))\n filelist = sorted(filelist)\n file_count = 0\n for file in filelist:\n if file.endswith(\".jpg\"): \n with open(os.path.join(path,file), 'rb') as f:\n tags = exifread.process_file(f, details=False)\n if limit_to_cols:\n vals = {tag: field for (tag, field) in tags.items() if tag in cols}\n else:\n vals = {tag: field for (tag, field) in tags.items()}\n vals[\"file\"] = file\n exif_stream.append(vals)\n file_count += 1\n if file_count % check_point == 0:\n save(exif_stream,csv_file)\n if len(exif_stream) > 0:\n save(exif_stream,csv_file) \n print(\"Checks processed: {}\".format(file_count))\n except OSError as err:\n print(\"OS error: {0}\".format(err))\n except KeyError as err:\n print(\"KeyError: {} - check that the folder is correct\".format(err))\n except Exception as err:\n print(err)\n\n# change this to the folder mount point\nmount = os.getcwd()\n# change this to the checks folder\npath_to_checks = 'sample_checks_prosper'\nfolder = os.path.join(mount,path_to_checks)\n# iterate over the files and stream the exif data\nstream_exif(folder,csv_file='all_checks.csv')\n",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true,
"deletable": true,
"collapsed": true,
"editable": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"language_info": {
"name": "python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4",
"file_extension": ".py",
"mimetype": "text/x-python"
},
"gist_id": "4d05e400d0c62f22e6f8de3488aa1a2f",
"kernelspec": {
"name": "conda-env-ocr-py",
"display_name": "Python [conda env:ocr]",
"language": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment