mbednarski · May 28, 2022 15:43 · unedited-despair · May 28, 2022
diff --git a/vocab.py b/vocab.py
 vocabulary = []
 for sentence in tokenized_corpus:
    for token in sentence:
        if token not in vocabulary:
            vocabulary.append(token)

 word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
 idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}

 vocabulary_size = len(vocabulary)
	vocabulary = []
	for sentence in tokenized_corpus:
	for token in sentence:
	if token not in vocabulary:
	vocabulary.append(token)

	word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
	idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}

	vocabulary_size = len(vocabulary)