This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #removing the redundant lines | |
| start_time = time.time() | |
| unique_data = [] | |
| for i in range(len(data)): | |
| if data['description'][i] not in unique_data: | |
| unique_data.append(data['description'][i]) | |
| if i % 5000 == 0: | |
| print('{0}'.format(i)+' lines have been processed') | |
| else: | |
| None |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def read_file(filepath): | |
| with open(filepath) as f: | |
| str_text = f.read() | |
| return str_text | |
| text = read_file('NameofYourFile.txt') | |
| tokens = text.split(" ") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| train_len = 3+1 | |
| text_sequences = [] | |
| for i in range(train_len,len(tokens)): | |
| seq = tokens[i-train_len:i] | |
| text_sequences.append(seq) | |
| sequences = {} | |
| count = 1 | |
| for i in range(len(tokens)): | |
| if tokens[i] not in sequences: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| tokenizer = Tokenizer() | |
| tokenizer.fit_on_texts(text_sequences) | |
| sequences = tokenizer.texts_to_sequences(text_sequences) | |
| #Collecting some information | |
| vocabulary_size = len(tokenizer.word_counts) | |
| n_sequences = np.empty([len(sequences),train_len], dtype='int32') | |
| for i in range(len(sequences)): | |
| n_sequences[i] = sequences[i] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| train_inputs = n_sequences[:,:-1] | |
| train_targets = n_sequences[:,-1] | |
| train_targets = to_categorical(train_targets, num_classes=vocabulary_size+1) | |
| seq_len = train_inputs.shape[1] | |
| train_inputs.shape |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| train_inputs = n_sequences[:,:-1] | |
| train_targets = n_sequences[:,-1] | |
| train_targets = to_categorical(train_targets, num_classes=vocabulary_size+1) | |
| seq_len = train_inputs.shape[1] | |
| train_inputs.shape |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def create_model(vocabulary_size, seq_len): | |
| model = Sequential() | |
| model.add(Embedding(vocabulary_size, seq_len,input_length=seq_len)) | |
| model.add(LSTM(50,return_sequences=True)) | |
| model.add(LSTM(50)) | |
| model.add(Dense(50,activation='relu')) | |
| model.add(Dense(vocabulary_size,activation='softmax')) | |
| opt_adam = optimizers.adam(lr=0.001) | |
| #You can simply pass 'adam' to optimizer in compile method. Default learning rate 0.001 | |
| #But here we are using adam optimzer from optimizer class to change the LR. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| model = create_model(vocabulary_size+1,seq_len) | |
| path = './checkpoints/word_pred_Model4.h5' | |
| checkpoint = ModelCheckpoint(path, monitor='loss', verbose=1, save_best_only=True, mode='min') | |
| model.fit(train_inputs,train_targets,batch_size=128,epochs=500,verbose=1,callbacks=[checkpoint]) | |
| dump(tokenizer,open('tokenizer_Model4','wb')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| model = load_model('word_pred_Model4.h5') | |
| tokenizer = load(open('tokenizer_Model4','rb')) | |
| seq_len = 3 | |
| def gen_text(model, tokenizer, seq_len, seed_text, num_gen_words): | |
| output_text = [] | |
| input_text = seed_text | |
| for i in range(num_gen_words): | |
| encoded_text = tokenizer.texts_to_sequences([input_text])[0] | |
| pad_encoded = pad_sequences([encoded_text], maxlen=seq_len,truncating='pre') | |
| pred_word_ind = model.predict_classes(pad_encoded,verbose=0)[0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| tf.compat.v1.disable_eager_execution() # need to disable eager in TF2.x | |
| x = tf.compat.v1.placeholder(tf.float32, shape=(1024, 1024)) | |
| y = tf.matmul(x, x) | |
| with tf.compat.v1.Session() as sess: | |
| rand_array = np.random.rand(1024, 1024) | |
| print(sess.run(y, feed_dict={x: rand_array})) # Will succeed. |
OlderNewer