This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh | |
# CUDA installation | |
vim cuda.sh | |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin | |
sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600 | |
wget https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-ubuntu1604-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb | |
sudo dpkg -i cuda-repo-ubuntu1604-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb | |
sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
import csv | |
from collections import defaultdict | |
import nltk | |
class GeoExtractor(object): | |
def __init__(self): | |
self.zipcode_to_state = {} | |
self.statenames_to_state = {} | |
self.countynames_to_states = defaultdict(set) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image | |
import os | |
import glob | |
import numpy as np | |
def crop(im, height, width): | |
# im = Image.open(infile) | |
imgwidth, imgheight = im.size | |
rows = np.int(imgheight/height) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################### | |
# serialization of indexes to byte arrays | |
########################################### | |
def serialize_index(index): | |
""" convert an index to a numpy uint8 array """ | |
writer = faiss.VectorIOWriter() | |
faiss.write_index(index, writer) | |
return faiss.vector_to_array(writer.data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
We write a way to convert LaTex to CSV | |
""" | |
import csv | |
import re | |
def to_csv(latex_text, file_name): | |
"""We learn to parse the text. | |
We assume the very first line tells the format of the table! | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
utterances = ["blue", "green", "square", "circle"] | |
objects = ['blue square', 'blue circle', 'green square'] | |
def meaning(utt, obj): | |
return int(utt in obj) | |
def normalize(space): | |
denom = sum(space.values()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PQI V5.0 does not really have lower-level codes... | |
# PQI 05 and PQI 15 are actually different (contain different codes), but must use AGE as a differentiator | |
codes = { | |
"PQI 01 Diabetes Short-term Complications Admission Rate": { | |
"Diabetes short-term complications diagnosis codes": ["25010","25022","25011","25023","25012","25030","25013","25031","25020","25032","25021","25033"] | |
}, | |
"PQI 02 Perforated Appendix Admission Rate": { | |
"Perforations or abscesses of appendix diagnosis codes": ["5400", "5401"], | |
"Appendicitis diagnosis codes": ["5400", "5401", "5409", "541"] | |
}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PQI 05 and PQI 15 is age-based. PQI 15 has age 18 through 39 years. PQI 05 has age 40 years and older. | |
codes = { | |
"PQI 01 Diabetes Short-term Complications Admission Rate": { | |
"Diabetes short-term complications diagnosis codes: (ACDIASD)": | |
["E1010", "E1011", "E10641", "E1100", "E1101", "E11641", "E1110", "E1111"] | |
}, | |
"PQI 02 Perforated Appendix Admission Rate": { | |
"Perforations or abscesses of appendix diagnosis codes: (ACSAPPD)": | |
["K3580", "K3589", "K37"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Traditional python regression packages like sklearn and statsmodel can't handle number of examples as large as >1M | |
# or when the feature space | |
# Currently this method uses mini-batch gradient optimization method (Adam) | |
# We also have a NullLogit model that only has intercept (used to compute pseudo R-squred for Logit model) | |
import torch | |
from torch.utils.data import TensorDataset, DataLoader, RandomSampler | |
import torch.nn as nn | |
from scipy.spatial.distance import cosine |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
""" | |
python report_bleu.py --target_file /mnt/fs5/anie/OpenNMT-py/data/twitter/twitter-seq2seq-2019mar3-clean-tgt-test.txt \ | |
--generated_file /mnt/fs5/anie/OpenNMT-py/save/twitter_transformer_clean_char/twitter_test_mar4_step50000_greedy_word_level.txt \ | |
--base_dir /home/anie/OpenNMT-py | |
""" | |
parser = argparse.ArgumentParser(description='Clean Seq2Seq data') | |
parser.add_argument('--target_file', type=str, help="target evaluation file") |
NewerOlder