Change paths then run keep_evaling.sh
Will keep evaluating openclip during training
to send to wandb, can also run while [ 1 ]; do python3 eval_to_wandb.py; sleep 300; done
#!/bin/bash | |
#SBATCH --partition=gpu | |
#SBATCH --job-name=gputest | |
#SBATCH --nodes 1 | |
#SBATCH --ntasks-per-node 8 | |
#SBATCH --cpus-per-gpu=6 | |
#SBATCH --gres=gpu:8 | |
#SBATCH --nodelist gpu-st-p4d-24xlarge-42 | |
#SBATCH --output=%x_%j.out | |
#SBATCH --open-mode=append |
python3.8 -m venv .env | |
source .env/bin/activate | |
pip install -U pip | |
pip install "jax[cuda11_cudnn82]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html | |
python | |
import jax | |
jax.default_backend() | |
jax.devices() |
import json | |
import pandas as pd | |
import subprocess | |
import sys | |
def get_msg( | |
backticks=True # whether to add backticks for Discord formatting or not | |
): | |
"gets a list of cluster usage from squeue and creates a text message from it" | |
a = json.loads(subprocess.check_output(['squeue','--json']).decode("utf8")) |
Change paths then run keep_evaling.sh
Will keep evaluating openclip during training
to send to wandb, can also run while [ 1 ]; do python3 eval_to_wandb.py; sleep 300; done
create an env:
python3.8 -m venv .env
source .env/bin/activate
pip install -U pip
pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
fix the paths in simple.sh
class WDSDataset(data.IterableDataset): | |
def __init__(self, min_size, transform=None, target_transform=None): | |
self.min_size = min_size | |
self.transform = transform if transform is not None else nn.Identity() | |
self.target_transform = target_transform if target_transform is not None else nn.Identity() | |
self.kv = OnDiskKV(file='/home/ubuntu/laion5B-watermark-safety-ordered', key_format='q', value_format='ee') | |
self.kv_aesthetic = OnDiskKV(file='/home/ubuntu/laion5B-aesthetic-tags-kv', key_format='q', value_format='e') | |
self.pwatermark_threshold = 0.8 | |
self.punsafe_threshold = 0.5 | |
self.aesthetic_threshold = 5. |
https://wandb.ai/rom1504/dalle2_train_decoder/runs/mic5buox/files/decoder_config.json
get dalle2
get the config file
get these 2 .sh
run sbatch start_big.sh
import pandas as pd | |
df = pd.read_parquet("aethetic_multi/0000.parquet") | |
buckets = [(i, i+1) for i in range(10)] | |
html= "<h1>Aesthetic subsets in Laion2B-multi</h1>" | |
for [a,b] in buckets: | |
total_part = df[(df["prediction"] >= a) & (df["prediction"] <= b)] | |
count_part = len(total_part) / len(df) * 100 |
A red colored car. | |
A black colored car. | |
A pink colored car. | |
A black colored dog. | |
A red colored dog. | |
A blue colored dog. | |
A green colored banana. | |
A red colored banana. | |
A black colored banana. | |
A white colored sandwich. |