This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import BertTokenizer, BertModel, BertForMaskedLM | |
import logging | |
logging.basicConfig(level=logging.INFO)# OPTIONAL | |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
model = BertForMaskedLM.from_pretrained('bert-base-uncased') | |
model.eval() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import numpy as np | |
from scipy.special import softmax | |
def model_init(model_string, cuda): | |
if model_string.startswith("gpt2"): | |
tokenizer = GPT2Tokenizer.from_pretrained(model_string) | |
model = GPT2LMHeadModel.from_pretrained(model_string) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` | |
wget https://s3.amazonaws.com/conceptnet/downloads/2017/edges/conceptnet-assertions-5.5.5.csv.gz | |
gunzip -k conceptnet-assertions-5.5.5.csv.gz | |
``` | |
import json | |
def del_pos(s): | |
""" | |
Deletes part-of-speech encoding from an entity string, if present. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import numpy as np | |
from tqdm import tqdm | |
from fairseq.models.roberta import RobertaModel | |
from fairseq.data.data_utils import collate_tokens | |
from torch.utils.data import DataLoader, SequentialSampler | |
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli') | |
roberta.eval() | |
roberta.cuda() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sentence_transformers import SentenceTransformer # pip install -U sentence-transformers | |
from sklearn.cluster import KMeans | |
from collections import defaultdict | |
INPUT_FILE = "/tmp/test_input.txt" | |
with open(INPUT_FILE, "r") as f: | |
lines = f.read().splitlines() | |
print(len(lines)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Usage: bash mv.sh /path/to/src/ /path/to/target/ | |
sourcedir=$1 | |
targetdir=$2 | |
filecount=$(find $sourcedir | wc -l) | |
echo $filecount # Print the number of the total files. | |
mkdir $targetdir | |
mv -v $sourcedir $targetdir | pv -l -s $filecount > /tmp/mv_log.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from argparse import Namespace | |
from numpy.core.defchararray import index | |
from semanticdebugger.debug_algs.cl_simple_alg import ContinualFinetuning | |
from tqdm import tqdm | |
import torch | |
from semanticdebugger.models.utils import trim_batch | |
import json | |
from semanticdebugger.debug_algs import run_lifelong_finetune |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
# stanza.download() | |
# http://nlp.stanford.edu/software/stanza/1.0.0/en/default.zip | |
Example usage: | |
CUDA_VISIBLE_DEVICES=1 \ | |
python parsing.py \ | |
--input_corpus_path ./corpora/gkb_best_sent.txt \ | |
--output_json_path ./parses/gkb_best.parses.jsonl \ | |
--prefix gkb_best --num_shards 10000 --shard_id 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo "HW3 Report" > $vocareumReportFile | |
echo "Programming language..." >> $vocareumReportFile | |
filename=$(ls|grep NeuralNetwork) | |
if [[ $filename =~ (^|[[:space:]])"NeuralNetwork.py"($|[[:space:]]) ]]; then | |
cmd="python NeuralNetwork.py train_image.csv train_label.csv test_image.csv" | |
mnist_cmd="python NeuralNetwork.py grading_train_image.csv grading_train_label.csv grading_test_image.csv" | |
ta_cmd="python NeuralNetwork.py grading_train_image.csv grading_train_label.csv additional_test_image.csv" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
acc = float(sys.argv[1]) # mnist acc | |
ta_acc = float(sys.argv[2]) # ta acc | |
t1=50.00 | |
t2=90.00 | |
tt1=30.0 | |
tt2=60.0 |
OlderNewer