Skip to content

Instantly share code, notes, and snippets.

View williamFalcon's full-sized avatar
🎯
Focusing

William Falcon williamFalcon

🎯
Focusing
View GitHub Profile
class MyModule(LightningModule):
def __init__():
self.encoder = RNN(...)
self.decoder = RNN(...)
def forward(x):
# models won't be moved after the first forward because
# they are already on the correct GPUs
self.encoder.cuda(0)
self.decoder.cuda(1)
out = self.encoder(x)
# change these lines
self.encoder = RNN(...)
self.decoder = RNN(...)
# to these
# now each RNN is based on a different gpu set
self.encoder = DataParallel(self.encoder, devices=[0, 1, 2, 3])
self.decoder = DataParallel(self.encoder, devices=[4, 5, 6, 7])
# in forward...
def tng_dataloader():
d = MNIST()
# 4: Add distributed sampler
# sampler sends a portion of tng data to each machine
dist_sampler = DistributedSampler(dataset)
dataloader = DataLoader(d, shuffle=False, sampler=dist_sampler)
def main_process_entrypoint(gpu_nb):
# 2: set up connections between all gpus across all machines
# train on 1024 gpus across 128 nodes
trainer = Trainer(nb_gpu_nodes=128, gpus=[0, 1, 2, 3, 4, 5, 6, 7])
# train on 4 gpus on the same machine MUCH faster than DataParallel
trainer = Trainer(distributed_backend='ddp', gpus=[0, 1, 2, 3])
import os
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
import pytorch_lightning as ptl
class CoolModel(ptl.LightningModule):
from pytorch_lightning import Trainer
from test_tube import Experiment
model = CoolModel()
exp = Experiment(save_dir=os.getcwd())
# train on cpu using only 10% of the data and limit to 1 epoch (for demo purposes)
trainer = Trainer(experiment=exp, max_nb_epochs=1, train_percent_check=0.1)
trainer.fit(model)
trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=[0])
trainer.fit(model)
trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=[0, 1, 2, 3])
trainer.fit(model)
from pytorch_lightning import Trainer
from test_tube import Experiment
# RUN THIS SCRIPT ON A MACHINE WITH THE IDs OF THE AVAILABLE GPUS
def main():
model = CoolModel()
exp = Experiment(save_dir=os.getcwd())
trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=[0, 1, 2, 3])