Skip to content

Instantly share code, notes, and snippets.

View sengstacken's full-sized avatar

Aaron Sengstacken sengstacken

View GitHub Profile
@sengstacken
sengstacken / read data from s3
Created September 25, 2020 01:37
read data from s3
#loop over all docs in the bucket
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
for obj in bucket.objects.all():
if obj.key.split('/')[0].split('.')[-1] == 'txt':
obj_from_s3 = s3.Object(bucket, key)
file_content = obj_from_s3.get()['Body'].read().decode('utf-8')
json_content = json.loads(file_content)
@sengstacken
sengstacken / assume_role.py
Created August 20, 2020 22:01 — forked from DaisukeMiyamoto/assume_role.py
AWS Boto3 Assume Role example
import boto3
from boto3.session import Session
def assume_role(arn, session_name):
"""aws sts assume-role --role-arn arn:aws:iam::00000000000000:role/example-role --role-session-name example-role"""
client = boto3.client('sts')
account_id = client.get_caller_identity()["Account"]
print(account_id)
@sengstacken
sengstacken / parse_args.py
Last active December 21, 2020 18:33
How to parse arguments in python
import argparse
def parse_args():
parser = argparse.ArgumentParser()
# hyperparameters sent by the client are passed as command-line arguments to the script
parser.add_argument('--epochs', type=int, default=1)
parser.add_argument('--batch_size', type=int, default=64)
@sengstacken
sengstacken / one_hot.py
Created July 20, 2020 16:58
one hot encoding - pandas
import pandas as pd
# df now has two columns: name and country
df = pd.DataFrame({
'name': ['josef','michael','john','bawool','klaus'],
'country': ['russia', 'germany', 'australia','korea','germany']
})
# use pd.concat to join the new columns with your original dataframe
df = pd.concat([df,pd.get_dummies(df['country'], prefix='country')],axis=1)
@sengstacken
sengstacken / confusion_matrix.py
Created June 16, 2020 14:27
confusion matrix python
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=None):
"""
This function prints and plots the confusion matrix.
@sengstacken
sengstacken / video2frames.py
Created June 3, 2020 13:23
python code to save video frames to images
import cv2
fname = 'eye_detection.avi'
vidcap = cv2.VideoCapture(fname)
success,image = vidcap.read()
count = 0
while success:
cv2.imwrite("frame%d.jpg" % count, image) # save frame as JPEG file
success,image = vidcap.read()
print('Read a new frame: ', success)
count += 1
import json
with open('./data/generated/1127_22.json') as f:
data = json.load(f)
@sengstacken
sengstacken / scale_normalize.py
Created March 24, 2020 14:19
scale dataframes after train test split
# scale data between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(train_df.values)
train_scaled = scaler.transform(train_df.values)
val_scaled = scaler.transform(val_df.values)
test_scaled = scaler.transform(test_df.values)
@sengstacken
sengstacken / train_test_val_split_seq.py
Last active September 23, 2021 19:27
code to perform the train test validation split on a pandas dataframe for a time sequence
trainpct = 0.7
trainidx = int(np.round(len(df)*trainpct))
train_df = df.iloc[0:trainidx,:]
valpct = 0.2
validx = int(np.round(len(df)*(trainpct+valpct)))
val_df = df.iloc[trainidx:validx,:]
test_df = df.iloc[validx::,:]
@sengstacken
sengstacken / s3read.py
Created March 17, 2020 18:55
Read S3 File Into Python
import boto3
import json
bucket = 'blah'
f = 'fileblah'
# get file from s3, read it, convert to json
s3 = boto3.resource('s3')
content_object = s3.Object(bucket, f)
file_content = content_object.get()['Body'].read().decode('utf-8')