This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
USAGE | |
model = build_model() | |
attributor = Attributor(model, target_class=1, tokenizer=tokenizer) | |
... | |
# viz = interactive vizualization that you can dump into a file and look at in a web browser | |
# t2a = map of token to its attribution score | |
viz, t2a, attrs, y_prob, y_hat = attributor.attr_and_visualize( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.decomposition import TruncatedSVD | |
def compute_pc(X,npc=1): | |
""" | |
Compute the principal components. | |
X: numpy array [data, features] | |
npc: num principal components | |
""" | |
svd = TruncatedSVD(n_components=npc, n_iter=7, random_state=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def rm_refs(x): | |
REF_RE = '<ref([-\w=" <>]+)?>.*?<([ ]+)?\/([ ]+)?ref>' | |
x = re.sub(REF_RE, ' ', x) | |
# leading </ref> | |
if '</ref>' in x: | |
x = re.sub(REF_RE, ' ', '<ref>' + x) | |
# trailing <ref> | |
if '<ref' in x: | |
x = re.sub(REF_RE, ' ', x + '</ref>') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Usage (for our feedforward context): | |
make sure you initialize the layer with | |
score_fn='bahdanau' | |
and then when you use the module in your forward() | |
method, you can feed it a vector of zeros for your query: | |
query = torch.zeros(rnn_outputs[:, 0, :].shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# working version of https://gist.github.com/jayrambhia/1678382 | |
import urllib2 | |
import json | |
def search(query): | |
get_url = 'http://theapache64.xyz:8080/movie_db/search?keyword=%s' % query | |
response = urllib2.urlopen(get_url).read().decode('utf-8') | |
return json.loads(response) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
# Processes the v2 subtitles/aspec corpora into one combined corpus | |
# Requirements | |
# - Processed subtitlesv2 corpus | |
# - ASPEC | |
CORPUS1=$1 | |
CORPUS2=$2 | |
TARGET=$3 # target language = [ja, zh, fr] | |
LOC="/scr/rpryzant/chinese_english_corpora/" |