python main.py data=in32_pickle sg.params.condition_method=cluster dynamic=unet_fast dynamic.params.model_channels=128 data.params.batch_size=128 sg.params.cond_dim=10000 data.h5_file=sg_data/cluster/v3_in32p_cluster10000_iter30minp200_nns-1_dino_vitb16_2022-08-17T21_7b919c8.h5 sg.params.cond_drop_prob=0.1 sg.params.cond_scale=2 data.trainer.max_epochs=800 data.fid_every_n_epoch=10 name=aaaa_ep800_4gpu pl.trainer.strategy=ddp devices=4 debug=0

python main.py data=in32_pickle dynamic=unet_fast sg.params.condition_method=label sg.params.cond_drop_prob=0.1 sg.params.cond_scale=2 dynamic.params.model_channels=128 sg.params.cond_dim=1000 data.trainer.max_epochs=20 data.fid_every_n_epoch=4 data.params.batch_size=128 name=aaaa_v1.6.2_label_in32p_unet_fast_ep8800 debug=0 data.trainer.max_epochs=8800 data.fid_every_n_epoch=10 pl.trainer.strategy=ddp devices=4 debug=0

python main.py data=in32_pickle dynamic=unet_fast sg.params.condition_method=label sg.params.cond_drop_prob=0.1 sg.params.cond_scale=2 dynamic.params.model_channels=128 sg.params.cond_dim=1000 data.trainer.max_epochs=20 data.fid_every_n_epoch=4 data.params.batch_size=128 name=aaaa_v1.6.2_label_in32p_unet_fast_ep8888 debug=0 data.trainer.max_epochs=8888 data.fid_every_n_epoch=10 debug=0

ssd path

/ssdstore for ivi-cn022
/local for das6
/var/scratch for das5
/local-ssd for das5

name	local	ssd
das5	/var/scratch	/local-ssd
ivi	.	/ssdstore
das6/local/thu

4GPU--das5

srun -u --pty --gres=gpu:4 --time=6-23 bash -i srun -u --pty -p fatq --gres=gpu:4 --time=6-23 bash -i

4GPU--ivi:

1GPU:

srun -u --pty --gres=gpu:1 --mem=60G --cpus-per-task=10 --time=6-23 bash -i

4GPU:

srun -u --pty --gres=gpu:4 --mem=120G --cpus-per-task=20 --time=6-23 bash -i

srun -u --pty --gres=gpu:4 --account=ceesusers --nodelist=ivi-cn022 --time=6-23 --mem=250G --cpus-per-task=110 -p cees bash -i

8GPU:

srun -u --pty --gres=gpu:8 --account=ceesusers --nodelist=ivi-cn022 --time=6-23 --mem=250G --cpus-per-task=110 -p cees bash -i

srun -u --pty --nodelist=ivi-cn019 --gres=gpu:8 --mem=200G --cpus-per-task=42 --time=6-23 -p biggpu bash -i

srun -u --pty --gres=gpu:8 --account=quvausers --nodelist=ivi-cn023 --time=6-23 --mem=250G --cpus-per-task=110 -p quva bash -i

srun -u --pty --gres=gpu:4 --account=quvausers --nodelist=ivi-cn023 --time=6-23 --mem=110G --cpus-per-task=55 -p quva bash -i

snellius

 srun -p gpu -t 4-23 [email protected]  --mail-type=ALL --cpus-per-task=32 --gres=gpu:2 -u --pty bash -i

 srun -p gpu -t 4-23 [email protected]  --mail-type=ALL --cpus-per-task=72 --gres=gpu:4 -u --pty bash -i

accinfo to check budge

"squeue -u thu32 " to check current jobs of you

use

/scratch-local/
/scratch-shared/

You have several TBs quota in that path

but the data will be deleted after 6 days(/scatch/local) / 14 days (/scratch-shared)

scp -r lsun thu32@snellius:/scratch-shared/thu/data/ scp -r ffhq thu32@snellius:/scratch-shared/thu/data/

check quota

ivi:

 df -h|grep thu

das6:

quota -sv

das5, go to jumphost

quota -sv

check current dir disk usage

du -hd 1

Large

srun -u --pty --gres=gpu:4 --account=ceesusers --nodelist=ivi-cn022 --time=6-23 --mem=250G --cpus-per-task=110 -p cees bash -i

module

module avail|grep cuda1

check your loaded modules

module list

https://curc.readthedocs.io/en/latest/compute/modules.html

dataset

Imagenet

das5: /home/koelma/pytorch_work/ilsvrc2012_{train,val}.zip
ivi:
das6:

tmux

how to resize-pane:

https://michaelsoolee.com/resize-tmux-panes/

ctrl+b, :, :resize-pane -R 10

linux

go to beginning of the line

Ctrl+A

dongzhuoyao/ivi-install-gcc5.4.md