tokestermw · September 6, 2018 21:27
diff --git a/play_elmo_embeddings_softmax.py b/play_elmo_embeddings_softmax.py
 """
 To use it inside ELMo script

 To get the embeddings:

    allennlp elmo sample_sents.txt out1.hdf5 --top
    python -c "import h5py; f = h5py.File('out1.hdf5'); print(f['0'][:], f['0'].shape)"

 To get probabilities:

    allennlp elmo sample_sents.txt out2.hdf5 --top \
        --softmax-weight-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5 \
        --softmax-vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
    python -c "import h5py; f = h5py.File('out2.hdf5'); print(f['0'][:], f['0'].shape)"

 Save new example softmax file

    import h5py
    import numpy as np

    e = h5py.File('./allennlp/tests/fixtures/elmo/elmo_token_embeddings.hdf5')
    W = e['embedding'][:]
    b = np.zeros_like(W[:, 0])
    with h5py.File('elmo_softmax_weights.hdf5', 'w') as f:
        grp = f.create_group('softmax')
        grp.create_dataset('W', W.shape, dtype='float32', data=W)
        grp.create_dataset('b', b.shape, dtype='float32', data=b)

 To test

    pytest allennlp/tests/modules/elmo_test.py

 """

 from allennlp.modules.elmo import _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
 from allennlp.commands.elmo import DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE, DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE


 def _tokenize(text):
    return text.split()


 if __name__ == '__main__':
    # elmo_char_encoder - _ElmoCharacterEncoder
    elmo_bilm = _ElmoBiLm(DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE)
    elmo_softmax = _ElmoSoftmax(DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE)

    sentences = [
        'How are you ?',
        'how are you ?',
        'How are you .',
        'You are how ?',
    ]
    sentences = [_tokenize(i) for i in sentences]

    char_ids, word_ids = batch_to_ids(sentences, elmo_softmax.vocab)

    bilm_outputs = elmo_bilm(char_ids)

    softmax_log_probs, softmax_mask = elmo_softmax(
        bilm_outputs, word_ids, aggregation_fun='mean')

    # average backward and forward log probs
    print(softmax_log_probs.shape)
    print(softmax_mask.shape)
	"""
	To use it inside ELMo script

	To get the embeddings:

	allennlp elmo sample_sents.txt out1.hdf5 --top
	python -c "import h5py; f = h5py.File('out1.hdf5'); print(f['0'][:], f['0'].shape)"

	To get probabilities:

	allennlp elmo sample_sents.txt out2.hdf5 --top \
	--softmax-weight-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5 \
	--softmax-vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
	python -c "import h5py; f = h5py.File('out2.hdf5'); print(f['0'][:], f['0'].shape)"

	Save new example softmax file

	import h5py
	import numpy as np

	e = h5py.File('./allennlp/tests/fixtures/elmo/elmo_token_embeddings.hdf5')
	W = e['embedding'][:]
	b = np.zeros_like(W[:, 0])
	with h5py.File('elmo_softmax_weights.hdf5', 'w') as f:
	grp = f.create_group('softmax')
	grp.create_dataset('W', W.shape, dtype='float32', data=W)
	grp.create_dataset('b', b.shape, dtype='float32', data=b)

	To test

	pytest allennlp/tests/modules/elmo_test.py

	"""

	from allennlp.modules.elmo import _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
	from allennlp.commands.elmo import DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE, DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE


	def _tokenize(text):
	return text.split()


	if __name__ == '__main__':
	# elmo_char_encoder - _ElmoCharacterEncoder
	elmo_bilm = _ElmoBiLm(DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE)
	elmo_softmax = _ElmoSoftmax(DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE)

	sentences = [
	'How are you ?',
	'how are you ?',
	'How are you .',
	'You are how ?',
	]
	sentences = [_tokenize(i) for i in sentences]

	char_ids, word_ids = batch_to_ids(sentences, elmo_softmax.vocab)

	bilm_outputs = elmo_bilm(char_ids)

	softmax_log_probs, softmax_mask = elmo_softmax(
	bilm_outputs, word_ids, aggregation_fun='mean')

	# average backward and forward log probs
	print(softmax_log_probs.shape)
	print(softmax_mask.shape)