This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from flask import Flask, render_template_string | |
app = Flask(__name__) | |
@app.route('/') | |
def dashboard(): | |
return render_template_string(r''' | |
<!DOCTYPE html> | |
<html lang="en"> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
manager = multiprocessing.Manager() | |
all_hashes_set = manager.dict() | |
def deduplicate(examples, all_hashes_set): | |
print(len(all_hashes_set)) | |
input_ids = examples['input_ids'] | |
hashes = [ | |
hash(tuple(input_ids[i])) | |
for i in range(len(input_ids)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def verify_ddp_weights_equal(model: torch.nn.Module, atol: float = 1e-5) -> None: | |
if hasattr(model, "module"): | |
model = model.module | |
world_size = get_world_size() | |
for name, param in model.named_parameters(): | |
gathered_param = gather(param).reshape((world_size, -1)) | |
absolute_diffs = (gathered_param[None, 0, :] - gathered_param).abs() | |
rank_params_eq = (absolute_diffs < atol).all() | |
assert rank_params_eq, f"❌ param [{name}] not equal - got max_absolute_diff={absolute_diffs.max()}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
def slice_sparse_tensor_rows(t: torch.sparse.Tensor, min_row: int, max_row: int) -> torch.sparse.Tensor: | |
row_idxs = t.indices()[0] | |
index_mask = (min_row <= row_idxs) & (row_idxs < max_row) | |
num_rows = (max_row - min_row) | |
num_cols = t.shape[1] | |
idxs = t.indices()[:, index_mask] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Iterable | |
import concurrent | |
imoprt datasets | |
import glob | |
import json | |
import multiprocessing | |
import os | |
def load_dataset_tables( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import glob | |
import datasets | |
import pandas as pd | |
def load_datasets(data_folder): | |
train_file = glob.glob(f"{data_folder}/train*.jsonl")[0] | |
test_file = f"{data_folder}/test.jsonl" | |
dev_file = glob.glob(f"{data_folder}/dev*.jsonl")[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Dict, Tuple | |
import logging | |
import os | |
import pathlib | |
import requests | |
import zipfile | |
import beir | |
import beir.datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python | |
map a function to a list: — map (f, list) — NOT the other way around | |
set a breakpoint: import pdb; pdb.set_trace() | |
—> ACTUALLY starting in python 3.7 you can just do breakpoint() ! | |
best way to profile any python code: pip install pyinstrument; python -m pyinstrument ./myprog.py | |
run a pytest test by pattern: pytest -k <pattern> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# jm8wx 11/2/20 | |
import subprocess | |
import re | |
airpods_name = "Jack’s AirPods Pro" | |
def _color(s): | |
return "\033[94m" + s + "\033[0m" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git rev-list --all --objects | \ | |
sed -n $(git rev-list --objects --all | \ | |
cut -f1 -d' ' | \ | |
git cat-file --batch-check | \ | |
grep blob | \ | |
sort -n -k 3 | \ | |
tail -n40 | \ | |
while read hash type size; do | |
echo -n "-e s/$hash/$size/p "; | |
done) | \ |
NewerOlder