This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wine_name_lookup = client.get_object(Bucket='data-science-wine-reviews', | |
Key='nearest_neighbors/data/wine_reviews_select_cols.csv') | |
wine_name_lookup = pd.read_csv(wine_name_lookup['Body']) | |
recommendation_indices = list(result[1]) | |
recommendation_indices = [int(n) for n in recommendation_indices] | |
recommendations = [] | |
for i in recommendation_indices: | |
suggested_wine = wine_name_lookup.at[i, 'Name'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
response = runtime.invoke_endpoint(EndpointName='sagemaker-scikit-learn-2019-07-04-13-00-07-919', | |
ContentType='application/json', | |
Body=wine_vector_output) | |
def decode(s, encoding="ascii", errors="ignore"): | |
return s.decode(encoding=encoding, errors=errors) | |
result = json.loads(decode(response['Body'].read())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wine_vector = sum(word_vectors) / len(word_vectors) | |
wine_vector_output = json.dumps(wine_vector.tolist()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
obj = client.get_object(Bucket='data-science-wine-reviews', Key='word_vectors_idf.csv') | |
wine_df = pd.read_csv(obj['Body']) | |
wine_df.set_index(['word'], inplace=True) | |
word_vectors = [] | |
for p in payload: | |
word_vector_string = wine_df.at[p, 'word_vec_idf'] | |
word_vector_string = word_vector_string.replace('[', '').replace(r'\n', '').replace(']', '') | |
word_vector = np.fromstring(word_vector_string, dtype=float, sep=' ') | |
word_vectors.append(word_vector) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import boto3 | |
import pandas as pd | |
import numpy as np | |
from six import BytesIO | |
def lambda_handler(event, context): | |
client = boto3.client('s3') | |
runtime = boto3.client('runtime.sagemaker') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import normalize | |
# open the vectors.txt file containing all the trained word embeddings, extracting the descriptors & embeddings | |
num_points = len(open('vectors.txt','r').read().split('\n')) | |
first_line = True | |
index_to_word = [] | |
with open("vectors.txt","r") as f: | |
for line_num, line in enumerate(f): | |
if first_line: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
s3 = boto3.resource('s3') | |
key = bt_model.model_data[bt_model.model_data.find("/", 5)+1:] | |
s3.Bucket(bucket).download_file(key, 'model.tar.gz') | |
!tar -xvzf model.tar.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_data = sagemaker.session.s3_input(train_data, distribution='FullyReplicated', | |
content_type='text/plain', s3_data_type='S3Prefix') | |
data_channels = {'train': train_data} | |
bt_model.fit(inputs=data_channels, logs=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sess = sagemaker.Session() | |
# define the specifications of the sagemaker training instance | |
bt_model = sagemaker.estimator.Estimator(container, | |
role, | |
train_instance_count=2, | |
train_instance_type='ml.c4.2xlarge', | |
train_volume_size = 5, | |
train_max_run = 360000, | |
input_mode= 'File', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_data = 's3://{}/wine-corpus.txt'.format(bucket) | |
s3_output_location = 's3://{}/output'.format(bucket) | |
region_name = boto3.Session().region_name | |
container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest") | |
print('Using SageMaker BlazingText container: {} ({})'.format(container, region_name)) |
NewerOlder