l1m2p3 · July 31, 2018 10:04
diff --git a/dynamo_access.py b/dynamo_access.py
 import boto3
 import numpy
 import pickle
 import spacy

 table_name = 'wordvec' # table name on DynamoDB

 # batch size specified by DynamoDB. See DynamoDB's doc for more details
 write_batch_size = 25
 read_batch_size = 100


 # DynamoDB client
 client = boto3.client('dynamodb')
 # helps turn words into tokens, from which we can find word vector
 tokenizer = spacy.load('en')

 # helper function to divide list into sublists
 def sublist(l, batch_size):
    return [l[i:i+batch_size] for i in range(0,len(l), batch_size)]

 # helper function to convert a word to a put request
 def word_to_put_req(word):
    vector = tokenizer(unicode(word, encoding='utf-8'))[0].vector
    
    return {
        'PutRequest': {
            'Item': {
                'word': {
                    'S': word
                },
                'vector': {
                    'L': [{'N': str(n)} for n in vector]
                }
            }
        }
    }


 #
 # upload lookup for word in `words`
 #
 def put_words(words):
    # request cannot contain duplicate keys. remove duplicates
    words = list(set(words)) 
    batches = sublist(words, write_batch_size)
    
    for batch in batches:
        request = [word_to_put_req(word) for word in batch]
        response = client.batch_write_item(
            RequestItems = {
                table_name: request
            }
        )

 #
 # returns a lookup for word in `words`
 # if a word is absent on DynamoDB, it won't be in the returned lookup's keys
 #
 def get_words(words):
    # request cannot contain duplicate keys. remove duplicates
    words_no_dup = list(set(words))
    batches = sublist(words_no_dup, read_batch_size)
    wordvec_a = []
    for batch in batches:
        request = [{'word':{'S':word}} for word in batch]
        
        response = client.batch_get_item(
            RequestItems = {
                table_name: {
                    'Keys': request
                }
            }
        )

        wordvec_a = wordvec_a + [(d['word']['S'], d['vector']['L']) for d in response['Responses'][table_name]]

    return dict(wordvec_a)
	import boto3
	import numpy
	import pickle
	import spacy

	table_name = 'wordvec' # table name on DynamoDB

	# batch size specified by DynamoDB. See DynamoDB's doc for more details
	write_batch_size = 25
	read_batch_size = 100


	# DynamoDB client
	client = boto3.client('dynamodb')
	# helps turn words into tokens, from which we can find word vector
	tokenizer = spacy.load('en')

	# helper function to divide list into sublists
	def sublist(l, batch_size):
	return [l[i:i+batch_size] for i in range(0,len(l), batch_size)]

	# helper function to convert a word to a put request
	def word_to_put_req(word):
	vector = tokenizer(unicode(word, encoding='utf-8'))[0].vector

	return {
	'PutRequest': {
	'Item': {
	'word': {
	'S': word
	},
	'vector': {
	'L': [{'N': str(n)} for n in vector]
	}
	}
	}
	}


	#
	# upload lookup for word in `words`
	#
	def put_words(words):
	# request cannot contain duplicate keys. remove duplicates
	words = list(set(words))
	batches = sublist(words, write_batch_size)

	for batch in batches:
	request = [word_to_put_req(word) for word in batch]
	response = client.batch_write_item(
	RequestItems = {
	table_name: request
	}
	)

	#
	# returns a lookup for word in `words`
	# if a word is absent on DynamoDB, it won't be in the returned lookup's keys
	#
	def get_words(words):
	# request cannot contain duplicate keys. remove duplicates
	words_no_dup = list(set(words))
	batches = sublist(words_no_dup, read_batch_size)
	wordvec_a = []
	for batch in batches:
	request = [{'word':{'S':word}} for word in batch]

	response = client.batch_get_item(
	RequestItems = {
	table_name: {
	'Keys': request
	}
	}
	)

	wordvec_a = wordvec_a + [(d['word']['S'], d['vector']['L']) for d in response['Responses'][table_name]]

	return dict(wordvec_a)