Created
March 1, 2016 16:32
-
-
Save shyamupa/06209500e202a8d57084 to your computer and use it in GitHub Desktop.
Why does training loss not go down at all?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_H_n(X): | |
| ans=X[:, -1, :] # get last element from time dim | |
| return ans | |
| def build_model(options, verbose=False): | |
| model = Graph() | |
| k = 2 * options.lstm_units | |
| L = options.xmaxlen | |
| N = options.xmaxlen + options.ymaxlen + 1 # for delim | |
| print("x len", L, "total len", N) | |
| model.add_input(name='input', input_shape=(N,), dtype=int) | |
| model.add_node(Embedding(options.max_features, options.wx_emb, input_length=N), name='emb', | |
| input='input') | |
| model.add_node(LSTM(options.lstm_units, return_sequences=True), name='forward', input='emb') | |
| model.add_node(LSTM(options.lstm_units, return_sequences=True, go_backwards=True), name='backward', input='emb') | |
| model.add_node(Dropout(0.5), name='dropout', inputs=['forward','backward']) | |
| model.add_node(Lambda(get_H_n, output_shape=(k,)), name='h_n', input='dropout') | |
| model.add_node(Dense(1, activation='softmax'), name='out', input='h_n') | |
| model.add_output(name='output', input='out') | |
| if verbose: | |
| model.summary() | |
| plot(model, 'model.png') | |
| model.compile(loss={'output':'binary_crossentropy'}, optimizer=Adam()) | |
| return model | |
| def create_train_examples(X,Y,yspace,num=-1,neglabel=0.0): | |
| ''' | |
| :param X: X seq | |
| :param Y: Y seq | |
| :param yspace: from which to sample | |
| :param num: how many negs, -1 means all of it | |
| :return: x,y,z such that if x,y in X,Y then z=1 else 0 | |
| ''' | |
| X_inp=[] | |
| Y_inp=[] | |
| outp=[] | |
| for x,y in zip(X,Y): | |
| X_inp.append(x) | |
| Y_inp.append(y) | |
| outp.append(1.0) | |
| if num==-1: | |
| neg_samples=yspace[:] # copy | |
| # print(neg_samples) | |
| # print(y) | |
| neg_samples.remove(y) | |
| else: | |
| neg_samples=[i for i in random.sample(yspace,num) if i != y] | |
| # print(yneg) | |
| for yn in neg_samples: | |
| # if yn==y: | |
| # continue | |
| X_inp.append(x) | |
| Y_inp.append(yn) | |
| outp.append(neglabel) # negative label 0 or -1 per your taste | |
| return X_inp,Y_inp,outp | |
| def concat_in_out(X,y,vocab): | |
| numex = X.shape[0] # num examples | |
| inp_train = np.concatenate((X, vocab["DELIMITER"]*np.ones(numex).reshape(numex,1),y),axis=1) | |
| return inp_train | |
| X_train, y_train, z_train=create_train_examples(X_train,y_train,yspace,10) | |
| X_dev, y_dev, z_dev=create_train_examples(X_dev,y_dev,yspace,10) | |
| xpadval = vocab["## UNK ##"] | |
| X_train = pad_sequences(X_train, maxlen=options.xmaxlen, value=xpadval, truncating="post") | |
| X_dev = pad_sequences(X_dev, maxlen=options.xmaxlen, value=xpadval,truncating="post") | |
| ypadval = vocab["## UNK ##"] | |
| y_train = pad_sequences(y_train, maxlen=options.ymaxlen, value=ypadval,padding="post") | |
| y_dev = pad_sequences(y_dev, maxlen=options.ymaxlen, value=ypadval,padding="post") | |
| y_train = np.array(y_train) | |
| y_dev = np.array(y_dev) | |
| z_train = np.array(z_train) | |
| z_dev = np.array(z_dev) | |
| inp_train=concat_in_out(X_train,y_train,vocab) | |
| inp_dev=concat_in_out(X_dev,y_dev,vocab) | |
| model = build_model(options,True) | |
| # show_output(model,'emb',train_dict) | |
| # sys.exit(0) | |
| history = model.fit(train_dict, | |
| batch_size=options.batch_size, | |
| nb_epoch=options.epochs, | |
| validation_data=dev_dict, | |
| callbacks=[EarlyStopping(monitor='val_loss', | |
| patience=20, | |
| verbose=0, | |
| mode='auto')] | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment