Created
August 1, 2024 07:43
-
-
Save tuulos/34a4144a11f4977c451250800b75f3e8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
classes: { | |
invisible: { | |
style.opacity: 0 | |
label: a | |
} | |
} | |
frame: { | |
*: { | |
style.font-size: 40 | |
} | |
label: _ | |
style.font-color: white | |
grid-rows: 2 | |
*: { | |
style.stroke-width: 0 | |
style.fill: white | |
} | |
style.fill: white | |
style.stroke-width: 0 | |
ic: { | |
style.font-color: white | |
shape: image | |
icon: https://metaflow.org/images/metaflow.svg | |
icon.near: outside-top-center | |
style.fill: white | |
} | |
Cheat Sheet: { | |
label: Metaflow Cheat Sheet 0.1 | |
grid-rows: 2 | |
grid-columns: 2 | |
*: { | |
style.font-size: 30 | |
} | |
Flow Structures: { | |
*: { | |
style.font-size: 30 | |
} | |
grid-rows: 3 | |
Task Parallelism: { | |
style: { | |
fill: white | |
} | |
code: |python | |
# Process two or more functions | |
# concurrently | |
self.next(self.a, self.b) | |
| | |
code.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
box: { | |
style.opacity: 0 | |
grid-rows: 2 | |
grid-columns: 3 | |
foo1.class: invisible | |
step | |
foo2.class: invisible | |
step a | |
foo3.class: invisible | |
step b | |
step -> step a: {style.animated: true} | |
step -> step b: {style.animated: true} | |
} | |
} | |
Data Parallelism: { | |
style: { | |
fill: white | |
} | |
code: |python | |
# Process the elements of a list | |
# concurrently | |
self.mylist = ['A', 'B'] | |
self.next(self.a, foreach='mylist') | |
| | |
code.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
box: { | |
style.opacity: 0 | |
grid-rows: 2 | |
step | |
step a: { | |
style.multiple: true | |
} | |
step -> step a: { | |
style.animated: true | |
} | |
} | |
} | |
Distributed Computing: { | |
style: { | |
fill: white | |
} | |
code: |python | |
# Set up an ephemeral cluster | |
# for distributed computing | |
self.next(self.a, num_parallel=N) | |
@pytorch, @ray, @mpi, ... | |
| | |
code.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
box: { | |
style.opacity: 0 | |
grid-rows: 2 | |
grid-columns: 3 | |
foo1.class: invisible | |
step | |
foo2.class: invisible | |
step a/1 | |
foo3.class: invisible | |
step a/2 | |
step -> step a/1: {style.animated: true} | |
step -> step a/2: {style.animated: true} | |
step a/1 <-> step a/2: {style.animated: true} | |
} | |
} | |
} | |
Decorators: { | |
grid-rows: 5 | |
*: { | |
style.font-size: 20 | |
} | |
Cloud Compute: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code1: |python | |
@batch # Run the step on AWS Batch | |
@kubernetes # Run the step on Kubernetes | |
@resources # Specify resource requirements for the step | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Dependency Management: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code1: |python | |
@conda # Specify dependencies for the step with Conda | |
@conda_base # Specify dependencies for the flow with Conda | |
@pypi # Specify dependencies for the step with Pip | |
@pypi_base # Specify dependencies for the step with Pip | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Making Flows Reliable: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code1: |python | |
@retry # Retry the step after a failure | |
@catch # Let the flow continue even if the step fails | |
@timeout # Interrupt the step after the specified time | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Configuring Step Environment: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code1: |python | |
@environment # Specify environment variables for the step | |
@secrets # Fetch secrets from a secrets manager for the step | |
@card # Visualize the step results | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Scheduling a production deployment: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code1: |python | |
@project # Enable branched namespaces | |
@schedule # Schedule the flow to run at a specified time | |
@trigger # Schedule the flow to run when an event is received | |
@trigger_on_finish # Schedule the flow to run when another flow completes | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
} | |
Notebook Tips: { | |
grid-rows: 5 | |
grid-columns: 1 | |
*: { | |
style.font-size: 20 | |
} | |
Find my past runs: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |python | |
# Fetch the results of my latest HelloFlow run | |
run = Flow('HelloFlow').latest_run | |
# Fetch the second newest run of HelloFlow by me | |
run = list(Flow('HelloFlow'))[1] | |
# Fetch the latest run by me tagged as 'goodmodel' | |
run = list(Flow('HelloFlow').runs('goodmodel'))[0] | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Find runs by others: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |python | |
# Fetch the results of Alice's latest HelloFlow run | |
namespace('user:alice') | |
run = Flow('HelloFlow').latest_run | |
# Fetch the results of a specific run, produced by anyone | |
namespace(None) | |
run = Run('HelloFlow/323') | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Fetching Results: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |python | |
# Fetch an artifact from a specific step (fast!) | |
model = run['train'].task['model'].data | |
# Fetch all artifact from a run (loads everything) | |
model = run.data.model | |
# Inspect logs of a specific step | |
run['train'].task.stdout | |
# Inspect cards of a specific step | |
from metaflow.cards import get_cards | |
get_cards(run['train'].task) | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Running a flow in a notebook: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |python | |
# in a cell define a flow | |
from metaflow import FlowSpec, step, NBRunner | |
class HelloFlow(FlowSpec): | |
... | |
# and run it! | |
run = NBRunner(HelloFlow).nbrun() | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Trigger a production flow via an event: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |python | |
from metaflow.integrations import ArgoEvent | |
# Trigger all flows waiting for my_event, set parameter alpha=16 | |
ArgoEvent('my_event').publish({'alpha': 16}) | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
} | |
Running and deploying: { | |
grid-rows: 5 | |
grid-columns: 1 | |
*: { | |
style.font-size: 20 | |
} | |
Local development on the CLI: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |bash | |
# Run a flow locally, setting parameters | |
python myflow.py run --alpha 16 --country 'South Korea' | |
# Run a flow on Kubernetes, scheduling at most 64 pods in parallel. | |
python myflow.py run --with kubernetes --max-workers 64 | |
# Resume the latest run, skipping over successful steps | |
python myflow.py resume | |
# Resume a specific step of a specific run | |
python myflow.py resume train --origin-run-id 1234 | |
# See the latest card in the train step | |
python myflow.py card view train | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Run a flow programmatically: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |python | |
# Set a parameter, run a flow, and wait for its completion | |
from metaflow import Runner | |
with Runner('helloflow.py').run(alpha=16)) as running: | |
print(f'{running.run} finished') | |
model = run.data.model | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Deploy to a production orchestrator: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |bash | |
# Deploy to Argo Workflows | |
python helloflow.py argo-workflows create | |
# Deploy to Step Functions | |
python helloflow.py step-functions create | |
# Deploy to Airflow | |
python helloflow.py airflow create | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Deploy an isolated branch deployment: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |bash | |
# Add @project, then deploy a branch | |
python helloflow.py --branch new_model argo-workflows create | |
# Add @project, then deploy to main production | |
python helloflow.py --production argo-workflows create | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
Deploy a flow programmatically: { | |
style: { | |
fill: white | |
font-size: 30 | |
} | |
code: |python | |
# Deploy a flow, trigger it, and access the run object | |
from metaflow import Deployer | |
deployer = Deployer('helloflow.py') | |
deployed_flow = deployer.argo_workflows().create() | |
| | |
*.style: { | |
stroke-width: 0 | |
font-size: 20 | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment