This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import cPickle as pickle | |
import gym | |
import torch | |
import torch.nn as nn | |
from torch import optim | |
from torch.autograd import Variable | |
from torch import optim | |
import torch.nn.functional as F |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Basic text data cleaning script | |
Tokenization, remove punctuation | |
''' | |
import sys | |
import re | |
import string | |
from nltk.tokenize import word_tokenize |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Example to print flag as key,values. So that can be saved as json. | |
tf version '1.10.0' | |
''' | |
import tensorflow as tf | |
tf.app.flags.DEFINE_string('source_vocabulary', 'data/europarl-v7.1.4M.de.json', 'Path to source vocabulary') | |
tf.app.flags.DEFINE_string('target_vocabulary', 'data/europarl-v7.1.4M.fr.json', 'Path to target vocabulary') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def log_softmax(x): | |
e_x = np.exp(x - np.max(x)) | |
return np.log(e_x / e_x.sum()) | |
def lognormalize(x): | |
a = np.logaddexp.reduce(x) | |
return np.exp(x - a) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Example Generate basic documeentation for arguments. :) | |
python generate_doc.py -md > doc.md | |
""" | |
import argparse | |
def add_md_help_argument(parser): | |
""" md help parser """ | |
parser.add_argument('-md', action=MarkdownHelpAction, | |
help='print Markdown-formatted help text and exit.') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Download bert from command line | |
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1f_LEWVgrtZLRuoiExJa5fNzTS8-WcAX9' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1f_LEWVgrtZLRuoiExJa5fNzTS8-WcAX9" -O pytorch_model_uncased_L-12_H-768_A-12.bin && rm -rf /tmp/cookies.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
https://www.clips.uantwerpen.be/conll2001/clauses/ | |
Clauses are word sequences which contain a subject and a predicate. Here is an example of a sentence and its clauses obtained from Wall Street Journal section 15 of the Penn Treebank [MSM93]: | |
(S The deregulation of railroads and trucking companies | |
(SBAR that | |
(S began in 1980) | |
) | |
enabled | |
(S shippers to bargain for transportation) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flair.data import Sentence | |
from flair.models import SequenceTagger | |
import sys | |
class FlairChunker(): | |
def __init__(self): | |
self.chunker = SequenceTagger.load('chunk') | |
def get_chunk_spans(self, s): | |
sentence = Sentence(s) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class GlobalMaxPooling1D(nn.Module): | |
''' | |
https://keras.io/api/layers/pooling_layers/global_max_pooling1d/ | |
Code: https://discuss.pytorch.org/t/equivalent-of-keras-globalmaxpooling1d/45770/5 | |
Input: | |
* If data_format='channels_last': 3D tensor with shape: (batch_size, steps, features) | |
* If data_format='channels_first': 3D tensor with shape: (batch_size, features, steps) | |
Output: | |
* 2D tensor with shape (batch_size, features). | |
''' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
#nltk.download('omw-1.4') | |
import tqdm | |
from nltk.corpus import wordnet as wn | |
all_nouns = [word for synset in wn.all_synsets('n') for word in synset.lemma_names()] | |
inputphrase= '' | |
wordlens = [len(w) for w in inputphrase.split()] | |
t=0 |
OlderNewer