Skip to content

Instantly share code, notes, and snippets.

@eileen-code4fun
eileen-code4fun / gan_def.py
Created January 7, 2022 20:05
Graph Attention Network Definition
from dgl.nn.tensorflow import GATConv
class GAT(tf.keras.Model):
def __init__(self, feat_dim, hidden_dim, class_num):
super(GAT, self).__init__()
self.num_heads = 8
self.h1 = GATConv(in_feats=feat_dim, out_feats=hidden_dim, feat_drop=0.5, num_heads=self.num_heads, allow_zero_in_degree=True)
self.h2 = GATConv(in_feats=hidden_dim*self.num_heads, out_feats=class_num, feat_drop=0.5, num_heads=1, allow_zero_in_degree=True)
def call(self, g, features):
@eileen-code4fun
eileen-code4fun / load_wm.py
Last active January 15, 2022 14:59
Load Word Embeddings
import gensim
import gensim.downloader
model = gensim.downloader.load('glove-twitter-50')
@eileen-code4fun
eileen-code4fun / we_aug.py
Created January 15, 2022 14:26
Word Embedding Augmentation
def nn_sentence_aug(sentence, factor=2, model):
sentences = []
words = sentence.split()
for i in range(len(words)):
ps = model.most_similar(words[i], topn=factor)
for p in ps:
new_words = words[0:i] + [p[0]] + words[i+1:]
sentences.append(' '.join(new_words))
return sentences
@eileen-code4fun
eileen-code4fun / ppdb_aug.py
Last active January 15, 2022 15:13
Paraphrase Augmentation
def ngram_paraphrase(words, phrases, ngram, factor=2):
sentences = []
for i in range(0, len(words)-ngram):
phrase = ''
for j in range(0, ngram):
phrase += words[i+j] + ' '
phrase = phrase.strip()
if not phrase in phrases:
continue
for p in list(phrases[phrase])[0:factor]:
@eileen-code4fun
eileen-code4fun / project_we.py
Created January 15, 2022 15:09
Project Paraphrases Embedding
from sklearn.decomposition import PCA
def project(words, phrases, model):
embeddings = []
associated_words = []
pca = PCA(n_components=2)
for word in words:
associated_words.append(word)
embeddings.append(model[word])
for pphrase in phrases[word]:
@eileen-code4fun
eileen-code4fun / nlp_classification.py
Created January 17, 2022 20:40
NLP Classification
import tensorflow_datasets as tfds
import tensorflow as tf
dataset, info = tfds.load('imdb_reviews', with_info=True,
as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
def experiment(train, test):
VOCAB_SIZE = 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(max_tokens=VOCAB_SIZE, standardize=None)
@eileen-code4fun
eileen-code4fun / no_op.py
Last active January 17, 2022 20:44
NO_OP
def preprocess(dataset, fn):
features = []
labels = []
for example, label in dataset:
features.append(fn(example.numpy().decode('utf-8')))
labels.append(label.numpy())
features_dataset = tf.data.Dataset.from_tensor_slices(features)
labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
return tf.data.Dataset.zip((features_dataset, labels_dataset))
@eileen-code4fun
eileen-code4fun / nlp_lower.py
Last active January 17, 2022 20:50
nlp_ower
def lower(txt):
return txt.lower()
train = preprocess(train_dataset, lower)
test = preprocess(test_dataset, lower)
import re
def lower_remove_punctuation(txt):
txt = txt.lower()
return re.sub(r'[^\w\s]','',txt)
train = preprocess(train_dataset, lower_remove_punctuation)
test = preprocess(test_dataset, lower_remove_punctuation)
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
def lower_remove_punctuation_stem(txt):
txt = lower_remove_punctuation(txt)
ps = PorterStemmer()
words = [ps.stem(w) for w in txt.split()]
return ' '.join(words)
def lower_remove_punctuation_lemmatize(txt):