Skip to content

Instantly share code, notes, and snippets.

@fedeisas
Created April 21, 2017 16:56
Show Gist options
  • Save fedeisas/31fb64f5a82ab52054e479224c31f728 to your computer and use it in GitHub Desktop.
Save fedeisas/31fb64f5a82ab52054e479224c31f728 to your computer and use it in GitHub Desktop.
import os
from glob import glob
import piexif
import pandas as pd
from PIL import Image
import datetime, time
import numpy as np
from multiprocessing import Pool, Process, Value, Lock, current_process
import logging
reload(logging)
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
self.lock = Lock()
def increment(self):
with self.lock:
self.val.value += 1
def value(self):
with self.lock:
return self.val.value
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
# Get every filename into a list
result = [y for x in os.walk('.') for y in glob(os.path.join(x[0], '*.jpg'))]
#################
length = len(result)
counter = Counter()
# Create a DF
data = pd.DataFrame({'filename': result})
logging.info('Done with loading dataset...')
def get_brigtness(im):
brightness = None
luminance_a = None
luminance_b = None
luminance_c = None
try:
pixels = np.array(im, dtype=np.int)
except IOError as e:
return (None, None, None, None)
try:
pixels = pixels.reshape(pixels.shape[0] * pixels.shape[1], 3)
except ValueError as e:
return (None, None, None, None)
brightness = np.average(pixels.sum(axis=1) / 3)
luminance_a = np.average(0.2126 * pixels[:, 0] + 0.7152 * pixels[:, 1] + 0.0722 * pixels[:, 2])
luminance_b = np.average(0.299 * pixels[:, 0] + 0.587 * pixels[:, 1] + 0.114 * pixels[:, 2])
luminance_c = np.average(
np.sqrt(
np.power(pixels[:, 0], 2) +
np.power(pixels[:, 1], 2) +
np.power(pixels[:, 2], 2)
)
)
return (brightness, luminance_a, luminance_b, luminance_c)
def get_image_data(f):
dpi = None
brightness = None
luminance_a = None
luminance_b = None
luminance_c = None
width = None
height = None
try:
im = Image.open(f)
if 'dpi' in im.info:
dpi = im.info['dpi'][0]
width, height = im.size
brightness, luminance_a, luminance_b, luminance_c = get_brigtness(im)
if counter.value() % 1000 == 0:
logging.info(
"Progress {0}% ({1} / {2}) | {3}".format(
float(counter.value()) / float(length) * 100,
counter.value(),
length,
current_process()
)
)
counter.increment()
except Exception as e:
logging.error("Can't process {0} | {1}".format(f, current_process()))
logging.error(e)
return (
dpi,
width,
height,
brightness,
luminance_a,
luminance_b,
luminance_c
)
def parallelize_dataframe(df, func):
df_split = np.array_split(df, num_partitions)
pool = Pool(num_cores)
df = pd.concat(pool.map(func, df_split))
pool.close()
pool.join()
return df
def more_columns(data):
logging.info("Batch started | {0}".format(current_process()))
data['dpi'], data['width'], data['height'], data['brightness'], data['luminance_a'], data['luminance_b'], data['luminance_c'] = zip(*data.filename.map(get_image_data))
logging.info("Batch completed | {0}".format(current_process()))
return data
num_partitions = len(data) / 1000
num_cores = 16
data_full = parallelize_dataframe(data, more_columns)
data_full['created'] = data_full.filename.map(lambda f: datetime.datetime.fromtimestamp(os.path.getmtime(f)))
data_full['size'] = data_full.filename.map(lambda f: os.stat(f).st_size)
data_full.to_csv('output.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment