This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dgl.nn.tensorflow import GATConv | |
class GAT(tf.keras.Model): | |
def __init__(self, feat_dim, hidden_dim, class_num): | |
super(GAT, self).__init__() | |
self.num_heads = 8 | |
self.h1 = GATConv(in_feats=feat_dim, out_feats=hidden_dim, feat_drop=0.5, num_heads=self.num_heads, allow_zero_in_degree=True) | |
self.h2 = GATConv(in_feats=hidden_dim*self.num_heads, out_feats=class_num, feat_drop=0.5, num_heads=1, allow_zero_in_degree=True) | |
def call(self, g, features): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gensim | |
import gensim.downloader | |
model = gensim.downloader.load('glove-twitter-50') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def nn_sentence_aug(sentence, factor=2, model): | |
sentences = [] | |
words = sentence.split() | |
for i in range(len(words)): | |
ps = model.most_similar(words[i], topn=factor) | |
for p in ps: | |
new_words = words[0:i] + [p[0]] + words[i+1:] | |
sentences.append(' '.join(new_words)) | |
return sentences |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ngram_paraphrase(words, phrases, ngram, factor=2): | |
sentences = [] | |
for i in range(0, len(words)-ngram): | |
phrase = '' | |
for j in range(0, ngram): | |
phrase += words[i+j] + ' ' | |
phrase = phrase.strip() | |
if not phrase in phrases: | |
continue | |
for p in list(phrases[phrase])[0:factor]: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.decomposition import PCA | |
def project(words, phrases, model): | |
embeddings = [] | |
associated_words = [] | |
pca = PCA(n_components=2) | |
for word in words: | |
associated_words.append(word) | |
embeddings.append(model[word]) | |
for pphrase in phrases[word]: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow_datasets as tfds | |
import tensorflow as tf | |
dataset, info = tfds.load('imdb_reviews', with_info=True, | |
as_supervised=True) | |
train_dataset, test_dataset = dataset['train'], dataset['test'] | |
def experiment(train, test): | |
VOCAB_SIZE = 1000 | |
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(max_tokens=VOCAB_SIZE, standardize=None) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def preprocess(dataset, fn): | |
features = [] | |
labels = [] | |
for example, label in dataset: | |
features.append(fn(example.numpy().decode('utf-8'))) | |
labels.append(label.numpy()) | |
features_dataset = tf.data.Dataset.from_tensor_slices(features) | |
labels_dataset = tf.data.Dataset.from_tensor_slices(labels) | |
return tf.data.Dataset.zip((features_dataset, labels_dataset)) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def lower(txt): | |
return txt.lower() | |
train = preprocess(train_dataset, lower) | |
test = preprocess(test_dataset, lower) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def lower_remove_punctuation(txt): | |
txt = txt.lower() | |
return re.sub(r'[^\w\s]','',txt) | |
train = preprocess(train_dataset, lower_remove_punctuation) | |
test = preprocess(test_dataset, lower_remove_punctuation) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.stem import PorterStemmer | |
from nltk.stem import WordNetLemmatizer | |
def lower_remove_punctuation_stem(txt): | |
txt = lower_remove_punctuation(txt) | |
ps = PorterStemmer() | |
words = [ps.stem(w) for w in txt.split()] | |
return ' '.join(words) | |
def lower_remove_punctuation_lemmatize(txt): |