Skip to content

Instantly share code, notes, and snippets.

@tzwm
Created June 24, 2025 12:00
Show Gist options
  • Save tzwm/93dfa9789e9572e1da59e7781249bcdd to your computer and use it in GitHub Desktop.
Save tzwm/93dfa9789e9572e1da59e7781249bcdd to your computer and use it in GitHub Desktop.
autodl_redeploy.py
import argparse
import time
AUTODL_BASE_URL = 'https://api.autodl.com'
AUTODL_HEADERS = {
"Authorization": os.environ.get('AUTODL_TOKEN'),
"Content-Type": "application/json"
}
def stop_container(deployment_container_uuid, decrease_one_replica_num = True, cmd_before_shutdown = None):
PATH = '/api/v1/dev/deployment/container/stop'
payload = {
'deployment_container_uuid': deployment_container_uuid,
'decrease_one_replica_num': decrease_one_replica_num,
}
if cmd_before_shutdown:
payload['cmd_before_shutdown'] = cmd_before_shutdown
res = requests.put(
AUTODL_BASE_URL + PATH,
headers=AUTODL_HEADERS,
json=payload,
)
if not res.ok:
print(f"[autodl] stop container list error: {res.text}")
data = res.json()
if not data['code'] == 'Success':
print(f"[autodl] stop container list error: {data['msg']}")
return True
def redeploy(deployment_uuid, batch_size, interval_sec, dry_run, max_num):
running_containers = fetch_container_list(deployment_uuid)
running_containers.reverse()
if max_num is not None:
running_containers = running_containers[:int(max_num)]
container_uuids = [c['uuid'] for c in running_containers]
t0 = time.time()
for container_uuid_idx in range(0, len(container_uuids), batch_size):
batch = container_uuids[container_uuid_idx : container_uuid_idx + batch_size]
for container_uuid in batch:
print(f"stopping {container_uuid}")
if dry_run == False:
stop_container(
deployment_container_uuid=container_uuid,
decrease_one_replica_num=False,
cmd_before_shutdown=f"bash /root/autodl-fs/prod/background/scripts/kill_main.sh",
)
print(f"sleep {interval_sec} seconds")
time.sleep(interval_sec)
print(f"total time for {len(container_uuids)} containers: {time.time() - t0:.2f} seconds")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("deployment_uuid", type=str)
parser.add_argument("--dry-run", default=False, action="store_true")
parser.add_argument("--batch-size", default=2, type=int)
parser.add_argument("--interval-sec", default=60, type=int)
parser.add_argument("--max-num", default=None, type=int)
args = parser.parse_args()
redeploy(
args.deployment_uuid,
args.batch_size,
args.interval_sec,
args.dry_run,
args.max_num,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment