Last active
May 17, 2017 15:02
-
-
Save keithweaver/bf460c5ab749fa829ac00ac9c0a6ec92 to your computer and use it in GitHub Desktop.
Upload a whole folder of images to the Haar Cascade Market via API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file will find keywords in the file name to assign tags for the | |
# Cascade Market. Add keywords to the image file name in the following | |
# format "..._keyword_keyword2.jpg". This is explained more below. | |
# | |
# | |
# Tips: | |
# - Keep it consistent. I made the mistake of labeling a mix of longboard and | |
# longboarder to represent the same thing. As a result, when I train I have | |
# to make sure the other is not in the negative set or it will affect my | |
# cascade. | |
# - Typos - Look at your file names with fresh eyes | |
# - Don't use my exact naming convention. I should have used _ split keywords | |
# and - is a space. So a file could be light-blue-car_green-car which would | |
# have made things easier. | |
# - Tag as much as possible - worst case scenario you do not train based on that | |
# tag. It gives you better results and better understanding to whats in the | |
# images. | |
# Imports for getting the list of files. | |
from os import listdir | |
from os.path import isfile, join | |
# Imports for getting file info | |
import os | |
# Server Call Related | |
import json | |
import requests | |
# This is your Cascade Market API Key | |
API_KEY = 'YOUR_API_KEY'; | |
# This file name | |
THIS_PYTHON_FILE_NAME = 'upload-files.py'; | |
# If you are uploading from current directory, you do not want to upload this | |
# file. There is an images check but in addition. | |
# List of possible image extensions | |
IMAGE_EXTENSIONS = ['.jpg','.png']; | |
# The current location of the images. I just placed this script in the images | |
# folder for simplicity. | |
DIRECTORY_WITH_IMAGES = './'; | |
# I want these tags on all images when uploading. I added '05-17-2017' which | |
# is the current date so I can link back to this upload. If you upload often, | |
# you can add date and time. | |
KEYWORDS_FOR_ALL = ['driving-cascade','rover-images','05-17-2017']; | |
# I followed the notation of labeling all my images in a folder to be file#_ | |
# then followed by the things in the image. A final example would be like | |
# "file12_stop_sign_black_car_parked_car.jpg" | |
# These keywords are used to find those keywords so KEYWORDS is a list of all | |
# possible options. | |
KEYWORDS = ['stop_sign','black_car','person','parked_car','longboarder','longboard','crosswalk','white_car','light_blue_car','blue_car','speed_sign','no_parking','green_car','beige_car','red_car','silver_car','trafficlight','pot_holes','car']; | |
# If you have more specific tags that overlap with other tags, put the more | |
# specific tags closer to the start of the list above. | |
# Ex. KEYWORDS = ['light_blue_car','blue_car']; | |
# This would only get tagged with light_blue_car if the file name was: | |
# 'file_light_blue_car.jpg' | |
URL = 'https://keithweaver.ca/rest/cascades/v1/upload/image'; | |
# Get all files | |
allFiles = [f for f in listdir(DIRECTORY_WITH_IMAGES) if isfile(join(DIRECTORY_WITH_IMAGES, f))] | |
# This returns a list of strings | |
# General sudo for what's happening: | |
# Loop through each | |
# Grab name of file | |
# Determine Tags | |
# Remove extension | |
# Look for keywords | |
# Remove keyword from file name | |
# Add KEYWORDS_FOR_ALL | |
# Stringify the list of tags | |
# Upload to Cascade market | |
# List of files that have server issues will be saved. | |
filesWithServerIssues = []; | |
for fileName in allFiles: | |
print ('Preparing ... ' + fileName) | |
if (fileName == THIS_PYTHON_FILE_NAME): | |
print ('Error: [' + fileName + '] is the current script file.') | |
else: | |
filename, file_extension = os.path.splitext(DIRECTORY_WITH_IMAGES + '/' + fileName) | |
if file_extension not in IMAGE_EXTENSIONS: | |
print ('Error: [' + fileName + '] does not have a supported file extension.') | |
else: | |
tagsForUpload = []; | |
# Determine tags | |
maniFileName = fileName # manipulated fileName | |
# Remove extension | |
maniFileName = maniFileName[:(len(file_extension) * -1)] | |
for keyword in KEYWORDS: | |
if keyword in maniFileName and keyword not in tagsForUpload: | |
# Add keyword to tags | |
tagsForUpload.append(keyword) | |
# Remove keyword from string | |
#maniFileName = maniFileName.replace(keyword,'',1) | |
maniFileName = maniFileName.replace(keyword,'') | |
# Remove limit b/c if light_blue_car is found multiple times | |
# then thats fine. I dont want to have to loop to verify all | |
# keywords are out. And I dont want blue_car to be found in | |
# light_blue_car. | |
# Add keywords for all | |
for keyword in KEYWORDS_FOR_ALL: | |
if keyword not in tagsForUpload: | |
tagsForUpload.append(keyword) | |
# Stringify the list | |
tagsStr = '['; | |
for tag in tagsForUpload: | |
if tagsStr != '[': | |
tagsStr += '",'; | |
tagsStr += '"' + tag | |
tagsStr += '"]'; | |
FILE_PATH = DIRECTORY_WITH_IMAGES + '/' + fileName | |
files = {'file': open(FILE_PATH,'rb')} | |
values = {'apikey': API_KEY, 'tags':tagsStr} | |
result = requests.post(URL, files=files, data=values) | |
response = result.json() | |
if (result.status_code == 200): | |
if (response['success'] == True): | |
print ('Added ' + fileName) | |
else: | |
print ('Error with params on server call.') | |
errorObj = { 'fileName': fileName, 'reason': response['message'] } | |
filesWithServerIssues.append(errorObj) | |
else: | |
print ('Error with server call.') | |
errorObj = { 'fileName': fileName, 'reason': response['message'] } | |
filesWithServerIssues.append(errorObj) | |
if (len(filesWithServerIssues) > 0): | |
# Server issue occurred | |
filePathNameWExt = './error-upload.json' | |
data = { 'issues': filesWithServerIssues }; | |
with open(filePathNameWExt, 'w') as fp: | |
json.dump(data, fp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment