Skip to content

Instantly share code, notes, and snippets.

View RoaldSchuring's full-sized avatar

Roald Schuring RoaldSchuring

  • Amsterdam
View GitHub Profile
@RoaldSchuring
RoaldSchuring / lambda_function.py
Created July 13, 2019 14:07
retrieve_additional_wine_info
wine_name_lookup = client.get_object(Bucket='data-science-wine-reviews',
Key='nearest_neighbors/data/wine_reviews_select_cols.csv')
wine_name_lookup = pd.read_csv(wine_name_lookup['Body'])
recommendation_indices = list(result[1])
recommendation_indices = [int(n) for n in recommendation_indices]
recommendations = []
for i in recommendation_indices:
suggested_wine = wine_name_lookup.at[i, 'Name']
@RoaldSchuring
RoaldSchuring / lambda_function.py
Created July 13, 2019 13:59
call_nearest_neighbors
response = runtime.invoke_endpoint(EndpointName='sagemaker-scikit-learn-2019-07-04-13-00-07-919',
ContentType='application/json',
Body=wine_vector_output)
def decode(s, encoding="ascii", errors="ignore"):
return s.decode(encoding=encoding, errors=errors)
result = json.loads(decode(response['Body'].read()))
@RoaldSchuring
RoaldSchuring / lambda_function.py
Created July 13, 2019 13:53
create_wine_embedding
wine_vector = sum(word_vectors) / len(word_vectors)
wine_vector_output = json.dumps(wine_vector.tolist())
@RoaldSchuring
RoaldSchuring / lambda_function.py
Created July 13, 2019 13:46
retrieve_idf_weighted_word_embeddings
obj = client.get_object(Bucket='data-science-wine-reviews', Key='word_vectors_idf.csv')
wine_df = pd.read_csv(obj['Body'])
wine_df.set_index(['word'], inplace=True)
word_vectors = []
for p in payload:
word_vector_string = wine_df.at[p, 'word_vec_idf']
word_vector_string = word_vector_string.replace('[', '').replace(r'\n', '').replace(']', '')
word_vector = np.fromstring(word_vector_string, dtype=float, sep=' ')
word_vectors.append(word_vector)
@RoaldSchuring
RoaldSchuring / lambda_function.py
Last active July 13, 2019 13:34
import_functions
import json
import boto3
import pandas as pd
import numpy as np
from six import BytesIO
def lambda_handler(event, context):
client = boto3.client('s3')
runtime = boto3.client('runtime.sagemaker')
@RoaldSchuring
RoaldSchuring / sagemaker_functions.py
Created July 9, 2019 02:08
extracting_info_from_vectorstxt
from sklearn.preprocessing import normalize
# open the vectors.txt file containing all the trained word embeddings, extracting the descriptors & embeddings
num_points = len(open('vectors.txt','r').read().split('\n'))
first_line = True
index_to_word = []
with open("vectors.txt","r") as f:
for line_num, line in enumerate(f):
if first_line:
@RoaldSchuring
RoaldSchuring / sagemaker_functions.py
Last active July 8, 2019 03:08
download_model_objects
s3 = boto3.resource('s3')
key = bt_model.model_data[bt_model.model_data.find("/", 5)+1:]
s3.Bucket(bucket).download_file(key, 'model.tar.gz')
!tar -xvzf model.tar.gz
@RoaldSchuring
RoaldSchuring / sagemaker_functions.py
Last active July 8, 2019 02:50
set_training_data_input_channel
train_data = sagemaker.session.s3_input(train_data, distribution='FullyReplicated',
content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data}
bt_model.fit(inputs=data_channels, logs=True)
sess = sagemaker.Session()
# define the specifications of the sagemaker training instance
bt_model = sagemaker.estimator.Estimator(container,
role,
train_instance_count=2,
train_instance_type='ml.c4.2xlarge',
train_volume_size = 5,
train_max_run = 360000,
input_mode= 'File',
@RoaldSchuring
RoaldSchuring / sagemaker_functions.py
Last active July 8, 2019 02:08
setup_blazingtext_container2
train_data = 's3://{}/wine-corpus.txt'.format(bucket)
s3_output_location = 's3://{}/output'.format(bucket)
region_name = boto3.Session().region_name
container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest")
print('Using SageMaker BlazingText container: {} ({})'.format(container, region_name))