Skip to content

Instantly share code, notes, and snippets.

@aimuhire
Created October 7, 2019 15:45
Show Gist options
  • Save aimuhire/443a7db77ebc0dbfbd79382aeb7576f9 to your computer and use it in GitHub Desktop.
Save aimuhire/443a7db77ebc0dbfbd79382aeb7576f9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# */AIPND/intropylab-classifying-images/check_images.py
#
# TODO: 0. Fill in your information in the programming header below
# PROGRAMMER: Arsene I. Muhire
# DATE CREATED: 24/9/2019
# REVISED DATE: 26/9/2019 defined get_input_args() get_pet_labels() - started the project <=(Date Revised - if any)
# REVISED DATE: 05/14/2018 - added import statement that imports the print
# functions that can be used to check the lab
# PURPOSE: Check images & report results: read them in, predict their
# content (classifier), compare prediction to actual value labels
# and output results
#
# Use argparse Expected Call with <> indicating expected user input:
# python check_images.py --dir <directory with images> --arch <model>
# --dogfile <file that contains dognames>
# Example call:
# python check_images.py --dir pet_images/ --arch vgg --dogfile dognames.txt
##
# Imports python modules
import argparse, re, os
from time import time, sleep,strftime,gmtime
from os import listdir
#get full path modules
from inspect import currentframe, getframeinfo
from pathlib import Path
# Imports classifier function for using CNN to classify images
from classifier import classifier
# Imports print functions that check the lab
from print_functions_for_lab_checks import *
# Main program function defined below
def main():
# TODO: 1. Define start_time to measure total program runtime by
# collecting start time
start_time = time()
print("started")
# TODO: 2. Define get_input_args() function to create & retrieve command
# line arguments
in_arg = get_input_args()
# TODO: 3. Define get_pet_labels() function to create pet image labels by
# creating a dictionary with key=filename and value=file label to be used
# to check the accuracy of the classifier function
answers_dic = get_pet_labels(in_arg.dir)
# TODO: 4. Define classify_images() function to create the classifier
# labels with the classifier function using in_arg.arch, comparing the
# labels, and creating a dictionary of results (result_dic)
result_dic = classify_images(in_arg.dir,answers_dic,in_arg.arch)
# TODO: 5. Define adjust_results4_isadog() function to adjust the results
# dictionary(result_dic) to determine if classifier correctly classified
# images as 'a dog' or 'not a dog'. This demonstrates if the model can
# correctly classify dog images as dogs (regardless of breed)
adjust_results4_isadog(result_dic,in_arg.dogfile)
# TODO: 6. Define calculates_results_stats() function to calculate
# results of run and puts statistics in a results statistics
# dictionary (results_stats_dic)
results_stats_dic = calculates_results_stats(result_dic)
# TODO: 7. Define print_results() function to print summary results,
# incorrect classifications of dogs and breeds if requested.
print_results(result_dic, results_stats_dic, in_arg.arch, True, True)
# TODO: 1. Define end_time to measure total program runtime
# by collecting end time
end_time = time()
# TODO: 1. Define tot_time to computes overall runtime in
# seconds & prints it in hh:mm:ss format
tot_time = end_time-start_time
tot_time=strftime('%H:%M:%S', gmtime(tot_time))
print("\n** Total Elapsed Runtime:", tot_time)
# TODO: 2.-to-7. Define all the function below. Notice that the input
# parameters and return values have been left in the function's docstrings.
# This is to provide guidance for achieving a solution similar to the
# instructor provided solution. Feel free to ignore this guidance as long as
# you are able to achieve the desired outcomes with this lab.
def get_input_args():
"""
Retrieves and parses the command line arguments created and defined using
the argparse module. This function returns these arguments as an
ArgumentParser object.
3 command line arguments are created:
dir - Path to the pet image files(default- 'pet_images/')
arch - CNN model architecture to use for image classification(default-
pick any of the following vgg, alexnet, resnet)
dogfile - Text file that contains all labels associated to dogs(default-
'dognames.txt'
Parameters:
None - simply using argparse module to create & store command line arguments
Returns:
parse_args() -data structure that stores the command line arguments object
"""
parser = argparse.ArgumentParser()
parser.add_argument("--dir")
parser.add_argument("--arch")
parser.add_argument("--dogfile")
return parser.parse_args()
def get_pet_labels(image_dir):
"""
Creates a dictionary of pet labels based upon the filenames of the image
files. Reads in pet filenames and extracts the pet image labels from the
filenames and returns these labels as petlabel_dic. This is used to check
the accuracy of the image classifier model.
Parameters:
image_dir - The (full) path to the folder of images that are to be
classified by pretrained CNN models (string)
Returns:
petlabels_dic - Dictionary storing image filename (as key) and Pet Image
Labels (as value)
"""
images=os.listdir(image_dir)
petlabel_dic={}
for image in images:
label=re.match("(\w+)\_",image).groups()[0]
label=label.replace("_"," ").lower()
petlabel_dic[image]=label
return petlabel_dic
def classify_images(images_dir, petlabel_dic, model):
"""
Creates classifier labels with classifier function, compares labels, and
creates a dictionary containing both labels and comparison of them to be
returned.
PLEASE NOTE: This function uses the classifier() function defined in
classifier.py within this function. The proper use of this function is
in test_classifier.py Please refer to this program prior to using the
classifier() function to classify images in this function.
Parameters:
images_dir - The (full) path to the folder of images that are to be
classified by pretrained CNN models (string)
petlabel_dic - Dictionary that contains the pet image(true) labels
that classify what's in the image, where its key is the
pet image filename & its value is pet image label where
label is lowercase with space between each word in label
model - pretrained CNN whose architecture is indicated by this parameter,
values must be: resnet alexnet vgg (string)
Returns:
results_dic - Dictionary with key as image filename and value as a List
(index)idx 0 = pet image label (string)
idx 1 = classifier label (string)
idx 2 = 1/0 (int) where 1 = match between pet image and
classifer labels and 0 = no match between labels
"""
results_dic={}
images=os.listdir(images_dir)
#get us image full path
filename = getframeinfo(currentframe()).filename
pet_images_dir = Path(filename).resolve().parent
for image in images:
image_path = "{}/pet_images/{}".format(pet_images_dir,image)
classifier_label = classifier(image_path, model)
classifier_labels=[ x.strip().lower() for x in classifier_label.split(",")]
results_dic[image]=[petlabel_dic[image],classifier_label, 1 if petlabel_dic[image] in classifier_labels else 0]
# print(results_dic[image])
return results_dic
def adjust_results4_isadog(results_dic,dogsfile):
"""
Adjusts the results dictionary to determine if classifier correctly
classified images 'as a dog' or 'not a dog' especially when not a match.
Demonstrates if model architecture correctly classifies dog images even if
it gets dog breed wrong (not a match).
Parameters:
results_dic - Dictionary with key as image filename and value as a List
(index)idx 0 = pet image label (string)
idx 1 = classifier label (string)
idx 2 = 1/0 (int) where 1 = match between pet image and
classifer labels and 0 = no match between labels
--- where idx 3 & idx 4 are added by this function ---
idx 3 = 1/0 (int) where 1 = pet image 'is-a' dog and
0 = pet Image 'is-NOT-a' dog.
idx 4 = 1/0 (int) where 1 = Classifier classifies image
'as-a' dog and 0 = Classifier classifies image
'as-NOT-a' dog.
dogsfile - A text file that contains names of all dogs from ImageNet
1000 labels (used by classifier model) and dog names from
the pet image files. This file has one dog name per line.
Dog names are all in lowercase with spaces separating the
distinct words of the dogname. This file should have been
passed in as a command line argument. (string - indicates
text file's name)
Returns:
None - results_dic is mutable data type so no return needed.
"""
infile = open(dogsfile,"r")
dogNames = infile.readlines()
infile.close()
dogNames=[dog.strip().lower() for dog in dogNames]
images = results_dic.keys()
for image in images:
listOfLists=[results_dic[image],[1 if results_dic[image][0] in dogNames else 0, 1 if results_dic[image][1].lower() in dogNames else 0]]
results_dic[image]=[]
results_dic[image]=[item for sublist in listOfLists for item in sublist]
def calculates_results_stats(results_dic):
"""
Calculates statistics of the results of the run using classifier's model
architecture on classifying images. Then puts the results statistics in a
dictionary (results_stats) so that it's returned for printing as to help
the user to determine the 'best' model for classifying images. Note that
the statistics calculated as the results are either percentages or counts.
Parameters:
results_dic - Dictionary with key as image filename and value as a List
(index)idx 0 = pet image label (string)
idx 1 = classifier label (string)
idx 2 = 1/0 (int) where 1 = match between pet image and
classifer labels and 0 = no match between labels
idx 3 = 1/0 (int) where 1 = pet image 'is-a' dog and
0 = pet Image 'is-NOT-a' dog.
idx 4 = 1/0 (int) where 1 = Classifier classifies image
'as-a' dog and 0 = Classifier classifies image
'as-NOT-a' dog.
Returns:
results_stats - Dictionary that contains the results statistics (either a
percentage or a count) where the key is the statistic's
name (starting with 'pct' for percentage or 'n' for count)
and the value is the statistic's value
"""
results_stats=dict()
results_stats['n_dogs_img'] = 0
results_stats['n_match'] = 0
results_stats['n_correct_dogs'] = 0
results_stats['n_correct_notdogs'] = 0
results_stats['n_correct_breed'] = 0
for key in results_dic:
if len(results_dic[key]) < 5:
break
if results_dic[key][2] == 1:
results_stats['n_match'] += 1
if results_dic[key][2] and results_dic[key][3]:
results_stats['n_correct_breed'] += 1
if results_dic[key][2] == 1:
results_stats['n_dogs_img'] += 1
if results_dic[key][4] == 1:
results_stats['n_correct_dogs'] += 1
# TODO: 6.b REPLACE pass with CODE that counts how many pet images
# that are NOT dogs were correctly classified. This happens
# when the pet image label indicates the image is-NOT-a-dog
# AND the classifier label indicates the images is-NOT-a-dog.
# You will need to write a conditional statement that
# determines when the classifier label indicates the image
# is-NOT-a-dog and then increments 'n_correct_notdogs' by 1.
# Recall the 'else:' above 'pass' already indicates that the
# pet image label indicates the image is-NOT-a-dog and
# 'n_correct_notdogs' is a key in the results_stats dictionary
# with it's value representing the number of correctly
# classified NOT-a-dog images.
#
# Pet Image Label is NOT a Dog
# is this true tho?
elif results_dic[key][4] == 0:
results_stats['n_correct_notdogs'] += 1
results_stats['n_images'] = len(results_dic)
results_stats['n_notdogs_img'] = (results_stats['n_images'] -
results_stats['n_dogs_img'])
results_stats['pct_match'] = (results_stats['n_match']/results_stats['n_images'])*100
results_stats['pct_correct_dogs'] =(results_stats['n_correct_dogs']/results_stats['n_dogs_img'])*100
results_stats['pct_correct_breed'] = (results_stats['n_correct_breed']/results_stats['n_dogs_img'])*100
if results_stats['n_notdogs_img'] > 0:
results_stats['pct_correct_notdogs'] = (results_stats['n_correct_notdogs'] /
results_stats['n_notdogs_img'])*100.0
else:
results_stats['pct_correct_notdogs'] = 0.0
return results_stats
def print_results(results_dic, results_stats, model, print_incorrect_dogs = False, print_incorrect_breed = False):
"""
Prints summary results on the classification and then prints incorrectly
classified dogs and incorrectly classified dog breeds if user indicates
they want those printouts (use non-default values)
Parameters:
results_dic - Dictionary with key as image filename and value as a List
(index)idx 0 = pet image label (string)
idx 1 = classifier label (string)
idx 2 = 1/0 (int) where 1 = match between pet image and
classifer labels and 0 = no match between labels
idx 3 = 1/0 (int) where 1 = pet image 'is-a' dog and
0 = pet Image 'is-NOT-a' dog.
idx 4 = 1/0 (int) where 1 = Classifier classifies image
'as-a' dog and 0 = Classifier classifies image
'as-NOT-a' dog.
results_stats - Dictionary that contains the results statistics (either a
percentage or a count) where the key is the statistic's
name (starting with 'pct' for percentage or 'n' for count)
and the value is the statistic's value
model - pretrained CNN whose architecture is indicated by this parameter,
values must be: resnet alexnet vgg (string)
print_incorrect_dogs - True prints incorrectly classified dog images and
False doesn't print anything(default) (bool)
print_incorrect_breed - True prints incorrectly classified dog breeds and
False doesn't print anything(default) (bool)
Returns:
None - simply printing results.
"""
# TODO: 7. EDIT and ADD code BELOW to do the following that's stated
# in the comments below that start with "TODO: 7."
#
# Prints summary statistics over the run
print("\n\n*** Results Summary for CNN Model Architecture",model.upper(),
"***")
print("%20s: %3d" % ('N Images', results_stats['n_images']))
print("%20s: %3d" % ('N Dog Images', results_stats['n_dogs_img']))
print("%20s: %3d" % ('N Not-Dog Images', results_stats['n_notdogs_img']))
# Prints summary statistics (percentages) on Model Run
print(" ")
for key in results_stats:
if key.startswith("p"):
print("{} : {} %".format(key,results_stats[key]))
# IF print_incorrect_dogs == True AND there were images incorrectly
# classified as dogs or vice versa - print out these cases
if (print_incorrect_dogs and
( (results_stats['n_correct_dogs'] + results_stats['n_correct_notdogs'])
!= results_stats['n_images'] )
):
print("\nINCORRECT Dog/NOT Dog Assignments:")
# process through results dict, printing incorrectly classified dogs
for key in results_dic:
# TODO: 7.c REPLACE pass with CODE that prints out the pet label
# and the classifier label from results_dic dictionary
# ONLY when the classifier function (classifier label)
# misclassified dogs specifically:
# pet label is-a-dog and classifier label is-NOT-a-dog
# -OR-
# pet label is-NOT-a-dog and classifier label is-a-dog
# You will need to write a conditional statement that
# determines if the classifier function misclassified dogs
# See 'Adjusting Results Dictionary' section in
# '13. Classifying Labels as Dogs' for details on the
# format of the results_dic dictionary. Remember the value
# is accessed by results_dic[key] and the value is a list
# so results_dic[key][idx] - where idx represents the
# index value of the list and can have values 0-4.
#
# Pet Image Label is a Dog - Classified as NOT-A-DOG -OR-
# Pet Image Label is NOT-a-Dog - Classified as a-DOG
if (results_dic[key][3]==1 and results_dic[key][4]==0) or (results_dic[key][3]==0 and results_dic[key][4]==1) :
print("{}".format(key))
# IF print_incorrect_breed == True AND there were dogs whose breeds
# were incorrectly classified - print out these cases
if (print_incorrect_breed and
(results_stats['n_correct_dogs'] != results_stats['n_correct_breed'])
):
print("\nINCORRECT Dog Breed Assignment:")
# process through results dict, printing incorrectly classified breeds
for key in results_dic:
# Pet Image Label is-a-Dog, classified as-a-dog but is WRONG breed
if ( sum(results_dic[key][3:]) == 2 and
results_dic[key][2] == 0 ):
print("Real: %-26s Classifier: %-30s" % (results_dic[key][0],
results_dic[key][1]))
# Call to main function to run the program
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment