Created
October 7, 2019 15:45
-
-
Save aimuhire/443a7db77ebc0dbfbd79382aeb7576f9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# */AIPND/intropylab-classifying-images/check_images.py | |
# | |
# TODO: 0. Fill in your information in the programming header below | |
# PROGRAMMER: Arsene I. Muhire | |
# DATE CREATED: 24/9/2019 | |
# REVISED DATE: 26/9/2019 defined get_input_args() get_pet_labels() - started the project <=(Date Revised - if any) | |
# REVISED DATE: 05/14/2018 - added import statement that imports the print | |
# functions that can be used to check the lab | |
# PURPOSE: Check images & report results: read them in, predict their | |
# content (classifier), compare prediction to actual value labels | |
# and output results | |
# | |
# Use argparse Expected Call with <> indicating expected user input: | |
# python check_images.py --dir <directory with images> --arch <model> | |
# --dogfile <file that contains dognames> | |
# Example call: | |
# python check_images.py --dir pet_images/ --arch vgg --dogfile dognames.txt | |
## | |
# Imports python modules | |
import argparse, re, os | |
from time import time, sleep,strftime,gmtime | |
from os import listdir | |
#get full path modules | |
from inspect import currentframe, getframeinfo | |
from pathlib import Path | |
# Imports classifier function for using CNN to classify images | |
from classifier import classifier | |
# Imports print functions that check the lab | |
from print_functions_for_lab_checks import * | |
# Main program function defined below | |
def main(): | |
# TODO: 1. Define start_time to measure total program runtime by | |
# collecting start time | |
start_time = time() | |
print("started") | |
# TODO: 2. Define get_input_args() function to create & retrieve command | |
# line arguments | |
in_arg = get_input_args() | |
# TODO: 3. Define get_pet_labels() function to create pet image labels by | |
# creating a dictionary with key=filename and value=file label to be used | |
# to check the accuracy of the classifier function | |
answers_dic = get_pet_labels(in_arg.dir) | |
# TODO: 4. Define classify_images() function to create the classifier | |
# labels with the classifier function using in_arg.arch, comparing the | |
# labels, and creating a dictionary of results (result_dic) | |
result_dic = classify_images(in_arg.dir,answers_dic,in_arg.arch) | |
# TODO: 5. Define adjust_results4_isadog() function to adjust the results | |
# dictionary(result_dic) to determine if classifier correctly classified | |
# images as 'a dog' or 'not a dog'. This demonstrates if the model can | |
# correctly classify dog images as dogs (regardless of breed) | |
adjust_results4_isadog(result_dic,in_arg.dogfile) | |
# TODO: 6. Define calculates_results_stats() function to calculate | |
# results of run and puts statistics in a results statistics | |
# dictionary (results_stats_dic) | |
results_stats_dic = calculates_results_stats(result_dic) | |
# TODO: 7. Define print_results() function to print summary results, | |
# incorrect classifications of dogs and breeds if requested. | |
print_results(result_dic, results_stats_dic, in_arg.arch, True, True) | |
# TODO: 1. Define end_time to measure total program runtime | |
# by collecting end time | |
end_time = time() | |
# TODO: 1. Define tot_time to computes overall runtime in | |
# seconds & prints it in hh:mm:ss format | |
tot_time = end_time-start_time | |
tot_time=strftime('%H:%M:%S', gmtime(tot_time)) | |
print("\n** Total Elapsed Runtime:", tot_time) | |
# TODO: 2.-to-7. Define all the function below. Notice that the input | |
# parameters and return values have been left in the function's docstrings. | |
# This is to provide guidance for achieving a solution similar to the | |
# instructor provided solution. Feel free to ignore this guidance as long as | |
# you are able to achieve the desired outcomes with this lab. | |
def get_input_args(): | |
""" | |
Retrieves and parses the command line arguments created and defined using | |
the argparse module. This function returns these arguments as an | |
ArgumentParser object. | |
3 command line arguments are created: | |
dir - Path to the pet image files(default- 'pet_images/') | |
arch - CNN model architecture to use for image classification(default- | |
pick any of the following vgg, alexnet, resnet) | |
dogfile - Text file that contains all labels associated to dogs(default- | |
'dognames.txt' | |
Parameters: | |
None - simply using argparse module to create & store command line arguments | |
Returns: | |
parse_args() -data structure that stores the command line arguments object | |
""" | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--dir") | |
parser.add_argument("--arch") | |
parser.add_argument("--dogfile") | |
return parser.parse_args() | |
def get_pet_labels(image_dir): | |
""" | |
Creates a dictionary of pet labels based upon the filenames of the image | |
files. Reads in pet filenames and extracts the pet image labels from the | |
filenames and returns these labels as petlabel_dic. This is used to check | |
the accuracy of the image classifier model. | |
Parameters: | |
image_dir - The (full) path to the folder of images that are to be | |
classified by pretrained CNN models (string) | |
Returns: | |
petlabels_dic - Dictionary storing image filename (as key) and Pet Image | |
Labels (as value) | |
""" | |
images=os.listdir(image_dir) | |
petlabel_dic={} | |
for image in images: | |
label=re.match("(\w+)\_",image).groups()[0] | |
label=label.replace("_"," ").lower() | |
petlabel_dic[image]=label | |
return petlabel_dic | |
def classify_images(images_dir, petlabel_dic, model): | |
""" | |
Creates classifier labels with classifier function, compares labels, and | |
creates a dictionary containing both labels and comparison of them to be | |
returned. | |
PLEASE NOTE: This function uses the classifier() function defined in | |
classifier.py within this function. The proper use of this function is | |
in test_classifier.py Please refer to this program prior to using the | |
classifier() function to classify images in this function. | |
Parameters: | |
images_dir - The (full) path to the folder of images that are to be | |
classified by pretrained CNN models (string) | |
petlabel_dic - Dictionary that contains the pet image(true) labels | |
that classify what's in the image, where its key is the | |
pet image filename & its value is pet image label where | |
label is lowercase with space between each word in label | |
model - pretrained CNN whose architecture is indicated by this parameter, | |
values must be: resnet alexnet vgg (string) | |
Returns: | |
results_dic - Dictionary with key as image filename and value as a List | |
(index)idx 0 = pet image label (string) | |
idx 1 = classifier label (string) | |
idx 2 = 1/0 (int) where 1 = match between pet image and | |
classifer labels and 0 = no match between labels | |
""" | |
results_dic={} | |
images=os.listdir(images_dir) | |
#get us image full path | |
filename = getframeinfo(currentframe()).filename | |
pet_images_dir = Path(filename).resolve().parent | |
for image in images: | |
image_path = "{}/pet_images/{}".format(pet_images_dir,image) | |
classifier_label = classifier(image_path, model) | |
classifier_labels=[ x.strip().lower() for x in classifier_label.split(",")] | |
results_dic[image]=[petlabel_dic[image],classifier_label, 1 if petlabel_dic[image] in classifier_labels else 0] | |
# print(results_dic[image]) | |
return results_dic | |
def adjust_results4_isadog(results_dic,dogsfile): | |
""" | |
Adjusts the results dictionary to determine if classifier correctly | |
classified images 'as a dog' or 'not a dog' especially when not a match. | |
Demonstrates if model architecture correctly classifies dog images even if | |
it gets dog breed wrong (not a match). | |
Parameters: | |
results_dic - Dictionary with key as image filename and value as a List | |
(index)idx 0 = pet image label (string) | |
idx 1 = classifier label (string) | |
idx 2 = 1/0 (int) where 1 = match between pet image and | |
classifer labels and 0 = no match between labels | |
--- where idx 3 & idx 4 are added by this function --- | |
idx 3 = 1/0 (int) where 1 = pet image 'is-a' dog and | |
0 = pet Image 'is-NOT-a' dog. | |
idx 4 = 1/0 (int) where 1 = Classifier classifies image | |
'as-a' dog and 0 = Classifier classifies image | |
'as-NOT-a' dog. | |
dogsfile - A text file that contains names of all dogs from ImageNet | |
1000 labels (used by classifier model) and dog names from | |
the pet image files. This file has one dog name per line. | |
Dog names are all in lowercase with spaces separating the | |
distinct words of the dogname. This file should have been | |
passed in as a command line argument. (string - indicates | |
text file's name) | |
Returns: | |
None - results_dic is mutable data type so no return needed. | |
""" | |
infile = open(dogsfile,"r") | |
dogNames = infile.readlines() | |
infile.close() | |
dogNames=[dog.strip().lower() for dog in dogNames] | |
images = results_dic.keys() | |
for image in images: | |
listOfLists=[results_dic[image],[1 if results_dic[image][0] in dogNames else 0, 1 if results_dic[image][1].lower() in dogNames else 0]] | |
results_dic[image]=[] | |
results_dic[image]=[item for sublist in listOfLists for item in sublist] | |
def calculates_results_stats(results_dic): | |
""" | |
Calculates statistics of the results of the run using classifier's model | |
architecture on classifying images. Then puts the results statistics in a | |
dictionary (results_stats) so that it's returned for printing as to help | |
the user to determine the 'best' model for classifying images. Note that | |
the statistics calculated as the results are either percentages or counts. | |
Parameters: | |
results_dic - Dictionary with key as image filename and value as a List | |
(index)idx 0 = pet image label (string) | |
idx 1 = classifier label (string) | |
idx 2 = 1/0 (int) where 1 = match between pet image and | |
classifer labels and 0 = no match between labels | |
idx 3 = 1/0 (int) where 1 = pet image 'is-a' dog and | |
0 = pet Image 'is-NOT-a' dog. | |
idx 4 = 1/0 (int) where 1 = Classifier classifies image | |
'as-a' dog and 0 = Classifier classifies image | |
'as-NOT-a' dog. | |
Returns: | |
results_stats - Dictionary that contains the results statistics (either a | |
percentage or a count) where the key is the statistic's | |
name (starting with 'pct' for percentage or 'n' for count) | |
and the value is the statistic's value | |
""" | |
results_stats=dict() | |
results_stats['n_dogs_img'] = 0 | |
results_stats['n_match'] = 0 | |
results_stats['n_correct_dogs'] = 0 | |
results_stats['n_correct_notdogs'] = 0 | |
results_stats['n_correct_breed'] = 0 | |
for key in results_dic: | |
if len(results_dic[key]) < 5: | |
break | |
if results_dic[key][2] == 1: | |
results_stats['n_match'] += 1 | |
if results_dic[key][2] and results_dic[key][3]: | |
results_stats['n_correct_breed'] += 1 | |
if results_dic[key][2] == 1: | |
results_stats['n_dogs_img'] += 1 | |
if results_dic[key][4] == 1: | |
results_stats['n_correct_dogs'] += 1 | |
# TODO: 6.b REPLACE pass with CODE that counts how many pet images | |
# that are NOT dogs were correctly classified. This happens | |
# when the pet image label indicates the image is-NOT-a-dog | |
# AND the classifier label indicates the images is-NOT-a-dog. | |
# You will need to write a conditional statement that | |
# determines when the classifier label indicates the image | |
# is-NOT-a-dog and then increments 'n_correct_notdogs' by 1. | |
# Recall the 'else:' above 'pass' already indicates that the | |
# pet image label indicates the image is-NOT-a-dog and | |
# 'n_correct_notdogs' is a key in the results_stats dictionary | |
# with it's value representing the number of correctly | |
# classified NOT-a-dog images. | |
# | |
# Pet Image Label is NOT a Dog | |
# is this true tho? | |
elif results_dic[key][4] == 0: | |
results_stats['n_correct_notdogs'] += 1 | |
results_stats['n_images'] = len(results_dic) | |
results_stats['n_notdogs_img'] = (results_stats['n_images'] - | |
results_stats['n_dogs_img']) | |
results_stats['pct_match'] = (results_stats['n_match']/results_stats['n_images'])*100 | |
results_stats['pct_correct_dogs'] =(results_stats['n_correct_dogs']/results_stats['n_dogs_img'])*100 | |
results_stats['pct_correct_breed'] = (results_stats['n_correct_breed']/results_stats['n_dogs_img'])*100 | |
if results_stats['n_notdogs_img'] > 0: | |
results_stats['pct_correct_notdogs'] = (results_stats['n_correct_notdogs'] / | |
results_stats['n_notdogs_img'])*100.0 | |
else: | |
results_stats['pct_correct_notdogs'] = 0.0 | |
return results_stats | |
def print_results(results_dic, results_stats, model, print_incorrect_dogs = False, print_incorrect_breed = False): | |
""" | |
Prints summary results on the classification and then prints incorrectly | |
classified dogs and incorrectly classified dog breeds if user indicates | |
they want those printouts (use non-default values) | |
Parameters: | |
results_dic - Dictionary with key as image filename and value as a List | |
(index)idx 0 = pet image label (string) | |
idx 1 = classifier label (string) | |
idx 2 = 1/0 (int) where 1 = match between pet image and | |
classifer labels and 0 = no match between labels | |
idx 3 = 1/0 (int) where 1 = pet image 'is-a' dog and | |
0 = pet Image 'is-NOT-a' dog. | |
idx 4 = 1/0 (int) where 1 = Classifier classifies image | |
'as-a' dog and 0 = Classifier classifies image | |
'as-NOT-a' dog. | |
results_stats - Dictionary that contains the results statistics (either a | |
percentage or a count) where the key is the statistic's | |
name (starting with 'pct' for percentage or 'n' for count) | |
and the value is the statistic's value | |
model - pretrained CNN whose architecture is indicated by this parameter, | |
values must be: resnet alexnet vgg (string) | |
print_incorrect_dogs - True prints incorrectly classified dog images and | |
False doesn't print anything(default) (bool) | |
print_incorrect_breed - True prints incorrectly classified dog breeds and | |
False doesn't print anything(default) (bool) | |
Returns: | |
None - simply printing results. | |
""" | |
# TODO: 7. EDIT and ADD code BELOW to do the following that's stated | |
# in the comments below that start with "TODO: 7." | |
# | |
# Prints summary statistics over the run | |
print("\n\n*** Results Summary for CNN Model Architecture",model.upper(), | |
"***") | |
print("%20s: %3d" % ('N Images', results_stats['n_images'])) | |
print("%20s: %3d" % ('N Dog Images', results_stats['n_dogs_img'])) | |
print("%20s: %3d" % ('N Not-Dog Images', results_stats['n_notdogs_img'])) | |
# Prints summary statistics (percentages) on Model Run | |
print(" ") | |
for key in results_stats: | |
if key.startswith("p"): | |
print("{} : {} %".format(key,results_stats[key])) | |
# IF print_incorrect_dogs == True AND there were images incorrectly | |
# classified as dogs or vice versa - print out these cases | |
if (print_incorrect_dogs and | |
( (results_stats['n_correct_dogs'] + results_stats['n_correct_notdogs']) | |
!= results_stats['n_images'] ) | |
): | |
print("\nINCORRECT Dog/NOT Dog Assignments:") | |
# process through results dict, printing incorrectly classified dogs | |
for key in results_dic: | |
# TODO: 7.c REPLACE pass with CODE that prints out the pet label | |
# and the classifier label from results_dic dictionary | |
# ONLY when the classifier function (classifier label) | |
# misclassified dogs specifically: | |
# pet label is-a-dog and classifier label is-NOT-a-dog | |
# -OR- | |
# pet label is-NOT-a-dog and classifier label is-a-dog | |
# You will need to write a conditional statement that | |
# determines if the classifier function misclassified dogs | |
# See 'Adjusting Results Dictionary' section in | |
# '13. Classifying Labels as Dogs' for details on the | |
# format of the results_dic dictionary. Remember the value | |
# is accessed by results_dic[key] and the value is a list | |
# so results_dic[key][idx] - where idx represents the | |
# index value of the list and can have values 0-4. | |
# | |
# Pet Image Label is a Dog - Classified as NOT-A-DOG -OR- | |
# Pet Image Label is NOT-a-Dog - Classified as a-DOG | |
if (results_dic[key][3]==1 and results_dic[key][4]==0) or (results_dic[key][3]==0 and results_dic[key][4]==1) : | |
print("{}".format(key)) | |
# IF print_incorrect_breed == True AND there were dogs whose breeds | |
# were incorrectly classified - print out these cases | |
if (print_incorrect_breed and | |
(results_stats['n_correct_dogs'] != results_stats['n_correct_breed']) | |
): | |
print("\nINCORRECT Dog Breed Assignment:") | |
# process through results dict, printing incorrectly classified breeds | |
for key in results_dic: | |
# Pet Image Label is-a-Dog, classified as-a-dog but is WRONG breed | |
if ( sum(results_dic[key][3:]) == 2 and | |
results_dic[key][2] == 0 ): | |
print("Real: %-26s Classifier: %-30s" % (results_dic[key][0], | |
results_dic[key][1])) | |
# Call to main function to run the program | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment