This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Use Gists to store code you would like to remember later on | |
console.log(window); // log the "window" object to the console |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from urllib import request | |
# Now let's grab some text from Great Expectations | |
url = 'http://www.gutenberg.org/files/1400/1400-0.txt' | |
response = request.urlopen(url) | |
raw = response.read().decode('utf8') | |
# Here is some text we'll start with | |
text = raw[886:1091] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dict_sort(dictionary, descending=False): | |
dict_to_list = [(dictionary[key], key) for key in dictionary] | |
dict_to_list.sort(descending=reverse) | |
return dict_to_list |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Let's make a single function to determine the parts of speech | |
import re | |
import nltk | |
import os | |
#from collections import Counter # Is this used? | |
# First we break the text into tokens | |
def tokinze_text(raw_text): | |
tokens = nltk.word_tokenize(raw_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def grayscale(image, algorithm='luminosity'): | |
""" | |
The algorithms are based on those used in GIMP. They are 'lightness', 'average', and 'luminosity'. Luminosity is the default | |
""" | |
l, w, color_space = image.shape | |
num_pxls = l*w | |
assert (color_space == 3), "The image must have three dimensions of color: red, green, and blue" | |
# Extract the individual colors | |
red = image[:,:,0] | |
green = image[:,:,1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
test_cases = ['04/30/2009', '06/20/95', '8/2/69', '1/25/2011', '9/3/2002', '4-13-82', 'Mar-02-2009', 'Jan 20, 1974', | |
'March 20, 1990', 'Dec. 21, 2001', 'May 25 2009', '01 Mar 2002', '2 April 2003', '20 Aug. 2004', | |
'20 November, 1993', 'Aug 10th, 1994', 'Sept 1st, 2005', 'Feb. 22nd, 1988', 'Sept 2002', 'Sep 2002', | |
'December, 1998', 'Oct. 2000', '6/2008', '12/2001', '1998', '2002'] | |
# Create a dictionary to convert from month names to numbers (e.g. Jan = 01) | |
month_dict = dict(jan='01', feb='02', mar='03', apr='04', may='05', jun='06', jul='07', aug='08', sep='09', | |
oct='10', nov='11', dec='12') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Find the internal structure of a list or numpy array | |
def shape(lst): | |
ob_type = type(lst) | |
if ob_type == np.ndarray: | |
print("The size is " + str(lst.shape)) | |
length = len(lst) | |
shp = tuple(shape(sub) if isinstance(sub, list) else 0 for sub in lst) | |
if any(x != 0 for x in shp): | |
return length, shp | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# if you want to change the path | |
# you'll need to: | |
# export DETECTRON2_DATASETS=/home/jsimonelli/data/detectron2 | |
# or | |
# import os | |
# os.environ['DETECTRON2_DATASETS'] = '/home/jsimonelli/data/detectron2' | |
# detectron2 expects it to be in a folder called coco | |
mkdir coco | |
cd coco |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mimetypes | |
image_extensions = set(k for k, v in mimetypes.types_map.items() if v.startswith('image')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sometimes I make functions that can either allow an object or a list of objects. | |
# To do this I check if an input is an object and, if so, wrap it in a list. | |
# Here's a simple way to do that. | |
def _isArrayLike(obj): | |
return hasattr(obj, '__iter__') and hasattr(obj, '__len__') | |
objs = objs if _isArrayLike(objs) else [objs] | |
#Another option is to do the opposite check: |