For example, kill command contains python3 -u experiment_main.py
kill $(ps aux | grep '[p]ython3 -u experiment_main.py' | awk '{print $2}')
hdfs dfs -ls / | sort -k6,7
{"name": "HumanEval_79_decimal_to_binary", "language": "py", "prompt": "def decimal_to_binary(decimal: int) -> str:\n \"\"\"You will be given a number in decimal form and your task is to convert it to\n binary format. The function should return a string, with each character representing a binary\n number. Each character in the string will be '0' or '1'.\n\n There will be an extra couple of characters 'db' at the beginning and at the end of the string.\n The extra characters are there to help with the format.\n\n Examples:\n >>> decimal_to_binary(15)\n 'db1111db'\n >>> decimal_to_binary(32)\n 'db100000db'\n \"\"\"\n", "doctests": "transform", "original": "/home/arjun/repos/nuprl/MultiPL-E/datasets/../datasets/originals-with-cleaned-doctests/HumanEval_79_decimal_to_binary.py", "prompt_terminology": "reworded", "stop_tokens": ["\ndef", "\n#", "\nif", "\nclass"], "entry_point": "decimal_to_binary", "test": "def check(candidate):\n assert candidate(0) == 'db0db'\n assert cand |
""" | |
This is a simple example to show how to calculate the p_value of two models' accuracy | |
Bootstrapint t-test | |
""" | |
import random | |
random.seed(42) | |
# assume we have test set 1000 samples | |
# we just create dummy results to demo | |
groundtruth = [random.choice(['A', 'B', 'C']) for _ in range(1000)] |
from torch.utils.data import DataLoader | |
from transformers import AutoTokenizer, PreTrainedTokenizerFast, set_seed, AutoModelForCausalLM, AutoConfig | |
from tqdm import tqdm | |
import argparse | |
import torch | |
import torch.nn as nn | |
import logging | |
from typing import Dict, Tuple | |
from accelerate import Accelerator, DistributedDataParallelKwargs | |
from accelerate.logging import get_logger |
import torch | |
from fairseq.models.bart import BARTModel | |
bart = BARTModel.from_pretrained( | |
'model_files/bart-large-model', | |
checkpoint_file='checkpoint_best.pt', | |
data_name_or_path='data/cloze_replace_all-bin' | |
) | |
bart.cuda() |
<!DOCTYPE html> | |
<head> | |
<!--Little CSS fade in --> | |
<style> | |
.fade-in{ | |
-webkit-animation: fade-in 2s ease; | |
-moz-animation: fade-in ease-in-out 2s both; | |
-ms-animation: fade-in ease-in-out 2s both; | |
-o-animation: fade-in ease-in-out 2s both; |
pip install streamlit | |
pip install spacy | |
python -m spacy download en_core_web_sm | |
python -m spacy download en_core_web_md | |
python -m spacy download de_core_news_sm |
local bert_model = "bert-base-uncased"; | |
local train_path = "./datasets/coref/train.english.v4_gold_conll"; | |
local dev_path = "./datasets/coref/dev.english.v4_gold_conll"; | |
local test_path = "./datasets/coref/test.english.v4_gold_conll"; | |
{ | |
"dataset_reader": { | |
"type": "coref", | |
"token_indexers": { | |
"bert": { |
from typing import List, TypeVar, Callable | |
import numpy as np | |
T = TypeVar('T') | |
def bootstrap_paired_ttest(results_a: List[T], | |
results_b: List[T], | |
evaluate_func: Callable[[List[T]], float], | |
sample_times: int = 10000, |
import streamlit as st | |
# To make things easier later, we're also importing numpy and pandas for working with sample data. | |
import numpy | |
import pandas | |
# Don't worry, we'll explain this method in the next section. We need to make at least one | |
# call to Streamlit in order to generate a report. | |
st.title("Demo Test") | |
# streamlit.header("I'm a large heading") | |
# streamlit.subheader("I'm not a large heading") |