| 
          import json | 
        
        
           | 
          import keras | 
        
        
           | 
          import keras.preprocessing.text as kpt | 
        
        
           | 
          from keras.preprocessing.text import Tokenizer | 
        
        
           | 
          import numpy as np | 
        
        
           | 
          
 | 
        
        
           | 
          # extract data from a csv | 
        
        
           | 
          # notice the cool options to skip lines at the beginning | 
        
        
           | 
          # and to only take data from certain columns | 
        
        
           | 
          training = np.genfromtxt('/path/to/your/data.csv', delimiter=',', skip_header=1, usecols=(1, 3), dtype=None) | 
        
        
           | 
          
 | 
        
        
           | 
          # create our training data from the tweets | 
        
        
           | 
          train_x = [x[1] for x in training] | 
        
        
           | 
          # index all the sentiment labels | 
        
        
           | 
          train_y = np.asarray([x[0] for x in training]) | 
        
        
           | 
          
 | 
        
        
           | 
          # only work with the 3000 most popular words found in our dataset | 
        
        
           | 
          max_words = 3000 | 
        
        
           | 
          
 | 
        
        
           | 
          # create a new Tokenizer | 
        
        
           | 
          tokenizer = Tokenizer(num_words=max_words) | 
        
        
           | 
          # feed our tweets to the Tokenizer | 
        
        
           | 
          tokenizer.fit_on_texts(train_x) | 
        
        
           | 
          
 | 
        
        
           | 
          # Tokenizers come with a convenient list of words and IDs | 
        
        
           | 
          dictionary = tokenizer.word_index | 
        
        
           | 
          # Let's save this out so we can use it later | 
        
        
           | 
          with open('dictionary.json', 'w') as dictionary_file: | 
        
        
           | 
              json.dump(dictionary, dictionary_file) | 
        
        
           | 
          
 | 
        
        
           | 
          def convert_text_to_index_array(text): | 
        
        
           | 
              # one really important thing that `text_to_word_sequence` does | 
        
        
           | 
              # is make all texts the same length -- in this case, the length | 
        
        
           | 
              # of the longest text in the set. | 
        
        
           | 
              return [dictionary[word] for word in kpt.text_to_word_sequence(text)] | 
        
        
           | 
          
 | 
        
        
           | 
          allWordIndices = [] | 
        
        
           | 
          # for each tweet, change each token to its ID in the Tokenizer's word_index | 
        
        
           | 
          for text in train_x: | 
        
        
           | 
              wordIndices = convert_text_to_index_array(text) | 
        
        
           | 
              allWordIndices.append(wordIndices) | 
        
        
           | 
          
 | 
        
        
           | 
          # now we have a list of all tweets converted to index arrays. | 
        
        
           | 
          # cast as an array for future usage. | 
        
        
           | 
          allWordIndices = np.asarray(allWordIndices) | 
        
        
           | 
          
 | 
        
        
           | 
          # create one-hot matrices out of the indexed tweets | 
        
        
           | 
          train_x = tokenizer.sequences_to_matrix(allWordIndices, mode='binary') | 
        
        
           | 
          # treat the labels as categories | 
        
        
           | 
          train_y = keras.utils.to_categorical(train_y, 2) | 
        
        
           | 
          
 | 
        
        
           | 
          from keras.models import Sequential | 
        
        
           | 
          from keras.layers import Dense, Dropout, Activation | 
        
        
           | 
          
 | 
        
        
           | 
          model = Sequential() | 
        
        
           | 
          model.add(Dense(512, input_shape=(max_words,), activation='relu')) | 
        
        
           | 
          model.add(Dropout(0.5)) | 
        
        
           | 
          model.add(Dense(256, activation='sigmoid')) | 
        
        
           | 
          model.add(Dropout(0.5)) | 
        
        
           | 
          model.add(Dense(2, activation='softmax')) | 
        
        
           | 
          
 | 
        
        
           | 
          model.compile(loss='categorical_crossentropy', | 
        
        
           | 
              optimizer='adam', | 
        
        
           | 
              metrics=['accuracy']) | 
        
        
           | 
          
 | 
        
        
           | 
          model.fit(train_x, train_y, | 
        
        
           | 
              batch_size=32, | 
        
        
           | 
              epochs=5, | 
        
        
           | 
              verbose=1, | 
        
        
           | 
              validation_split=0.1, | 
        
        
           | 
              shuffle=True) | 
        
        
           | 
          
 | 
        
        
           | 
          model_json = model.to_json() | 
        
        
           | 
          with open('model.json', 'w') as json_file: | 
        
        
           | 
              json_file.write(model_json) | 
        
        
           | 
          
 | 
        
        
           | 
          model.save_weights('model.h5') | 
        
        
           | 
          
 | 
        
        
           | 
          print('saved model!') | 
        
  
Hey I'm getting a MemoryError on line 48.. anybody else got that?