import numpy as np
import cv2
import os
import random
import pandas as pd
import sys
class ImageDataLoader():
def __init__(self, data_path, gt_path, shuffle=False, gt_downsample=False, pre_load=False, num_classes=10):
#pre_load: if true, all training and validation images are loaded into CPU RAM for faster processing.
# This avoids frequent file reads. Use this only for small datasets.
#num_classes: total number of classes into which the crowd count is divided (default: 10 as used in the paper)
self.data_path = data_path
self.gt_path = gt_path
self.gt_downsample = gt_downsample
self.pre_load = pre_load
self.data_files = [filename for filename in os.listdir(data_path) \
if os.path.isfile(os.path.join(data_path,filename))]
self.shuffle = shuffle
if shuffle:
self.num_samples = len(self.data_files)
self.blob_list = {}
self.id_list = range(0,self.num_samples)
self.min_gt_count = 922337203685477580
self.max_gt_count = 0
self.num_classes = num_classes
self.count_class_hist = np.zeros(self.num_classes)
if self.pre_load:
self.preload_data() #load input images and grount truth into memory
self.assign_gt_class_labels() #assign ground truth crowd group/class labels to each image
self.get_stats_in_dataset() #get min - max crowd count present in the dataset. used later for assigning crowd group/class
def get_classifier_weights(self):
#since the dataset is imbalanced, classifier weights are used to ensure balance.
#this function returns weights for each class based on the number of samples available for each class
wts = self.count_class_hist
wts = 1-wts/(sum(wts));
wts = wts/sum(wts);
return wts
def preload_data(self):
print('Pre-loading the data. This may take a while...')
idx = 0
for fname in self.data_files:
img, den, gt_count = self.read_image_and_gt(fname)
self.min_gt_count = min(self.min_gt_count, gt_count)
self.max_gt_count = max(self.max_gt_count, gt_count)
blob = {}
blob['fname'] = fname
blob['gt_count'] = gt_count
self.blob_list[idx] = blob
idx = idx+1
if idx % 100 == 0:
print('Loaded ', idx , '/' , self.num_samples)
print('Completed laoding ' ,idx, 'files')
def assign_gt_class_labels(self):
for i in range(0,self.num_samples):
gt_class_label = np.zeros(self.num_classes,
bin_val = (self.max_gt_count - self.min_gt_count)/float(self.num_classes)
class_idx = np.round(self.blob_list[i]['gt_count']/bin_val)
class_idx = int(min(class_idx,self.num_classes-1))
self.blob_list[i]['gt_class_label'] = gt_class_label.reshape(1,gt_class_label.shape[0])
self.count_class_hist[class_idx] += 1
def __iter__(self):
if self.shuffle:
if self.pre_load:
files = self.data_files
id_list = self.id_list
for idx in id_list:
if self.pre_load:
blob = self.blob_list[idx]
blob['idx'] = idx
fname = files[idx]
img, den, gt_count = self.read_image_and_gt(fname)
gt_class_label = np.zeros(self.num_classes,
bin_val = (self.max_gt_count - self.min_gt_count)/float(self.num_classes)
class_idx = np.round(gt_count/bin_val)
class_idx = int(min(class_idx,self.num_classes-1) )
gt_class_label[class_idx] = 1
blob = {}
blob['fname'] = fname
blob['gt_count'] = gt_count
blob['gt_class_label'] = gt_class_label.reshape(1,gt_class_label.shape[0])
yield blob
def get_stats_in_dataset(self):
min_count = sys.maxint
max_count = 0
gt_count_array = np.zeros(self.num_samples)
i = 0
for fname in self.data_files:
den = pd.read_csv(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.csv'), sep=',',header=None).as_matrix()
den = den.astype(np.float32, copy=False)
gt_count = np.sum(den)
min_count = min(min_count, gt_count)
max_count = max(max_count, gt_count)
gt_count_array[i] = gt_count
self.min_gt_count = min_count
self.max_gt_count = max_count
bin_val = (self.max_gt_count - self.min_gt_count)/float(self.num_classes)
class_idx_array = np.round(gt_count_array/bin_val)
for class_idx in class_idx_array:
class_idx = int(min(class_idx, self.num_classes-1))
def get_num_samples(self):
return self.num_samples
def read_image_and_gt(self,fname):
img = cv2.imread(os.path.join(self.data_path,fname),0)
img = img.astype(np.float32, copy=False)
ht = img.shape[0]
wd = img.shape[1]
ht_1 = int((ht/4)*4)
wd_1 = int((wd/4)*4)
img = cv2.resize(img,(wd_1,ht_1))
img = img.reshape((1,1,img.shape[0],img.shape[1]))
den = pd.read_csv(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.csv'), sep=',',header=None).as_matrix()
den = den.astype(np.float32, copy=False)
if self.gt_downsample:
wd_1 = int(wd_1/4)
ht_1 = int(ht_1/4)
den = cv2.resize(den,(wd_1,ht_1))
den = den * ((wd*ht)/(wd_1*ht_1))
den = cv2.resize(den,(wd_1,ht_1))
den = den * ((wd*ht)/(wd_1*ht_1))
den = den.reshape((1,1,den.shape[0],den.shape[1]))
gt_count = np.sum(den)
return img, den, gt_count
