This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pdfminer.high_level | |
| import datetime | |
| import requests | |
| import sys | |
| import os | |
| import re | |
| import unidecode | |
| import collections | |
| def split(delimiters, string, maxsplit=0): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def text_to_index(sentence): | |
| # Remove punctuation characters except for the apostrophe | |
| translator = str.maketrans('', '', string.punctuation.replace("'", '')) | |
| tokens = sentence.translate(translator).lower().split() | |
| return np.array([1] + [word_index[t] if t in word_index else 2 for t in tokens]) | |
| def print_predictions(sentences, classifier): | |
| indexes = [text_to_index(sentence) for sentence in sentences] | |
| x = sequence.pad_sequences(indexes, | |
| maxlen=sentence_size, | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def my_initializer(shape=None, dtype=tf.float32, partition_info=None): | |
| assert dtype is tf.float32 | |
| return embedding_matrix | |
| params = {'embedding_initializer': my_initializer} | |
| cnn_pretrained_classifier = tf.estimator.Estimator( | |
| model_fn=cnn_model_fn, | |
| model_dir=os.path.join(model_dir, 'cnn_pretrained'), | |
| params=params) | |
| train_and_evaluate(cnn_pretrained_classifier) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | embedding_matrix = np.random.uniform(-1, 1, size=(vocab_size, embedding_size)) | |
| for w, i in word_index.items(): | |
| v = embeddings.get(w) | |
| if v is not None and i < vocab_size: | |
| embedding_matrix[i] = v | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | embeddings = {} | |
| with open('glove.6B.50d.txt', 'r', encoding='utf-8') as f: | |
| for line in f: | |
| values = line.strip().split() | |
| w = values[0] | |
| vectors = np.asarray(values[1:], dtype='float32') | |
| embeddings[w] = vectors | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(100) | |
| _, final_states = tf.nn.dynamic_rnn( | |
| lstm_cell, inputs, sequence_length=features['len'], dtype=tf.float32) | |
| logits = tf.layers.dense(inputs=final_states.h, units=1) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | initializer = tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0)) | |
| params = {'embedding_initializer': initializer} | |
| cnn_classifier = tf.estimator.Estimator(model_fn=model_fn, | |
| model_dir=os.path.join(model_dir, 'cnn'), | |
| params=params) | |
| train_and_evaluate(cnn_classifier) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | head = tf.contrib.estimator.binary_classification_head() | |
| optimizer = tf.train.AdamOptimizer() | |
| def _train_op_fn(loss): | |
| tf.summary.scalar('loss', loss) | |
| return optimizer.minimize( | |
| loss=loss, | |
| global_step=tf.train.get_global_step()) | |
| return head.create_estimator_spec( | |
| features=features, | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | training = (mode == tf.estimator.ModeKeys.TRAIN) | |
| dropout_emb = tf.layers.dropout(inputs=input_layer, | |
| rate=0.2, | |
| training=training) | |
| conv = tf.layers.conv1d( | |
| inputs=dropout_emb, | |
| filters=32, | |
| kernel_size=3, | |
| padding="same", | |
| activation=tf.nn.relu) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | input_layer = tf.contrib.layers.embed_sequence( | |
| features['x'], | |
| vocab_size, | |
| embedding_size, | |
| initializer=params['embedding_initializer']) | 
NewerOlder