Skip to content

Instantly share code, notes, and snippets.

@luckylionheart
Last active April 9, 2026 15:41
Show Gist options
  • Select an option

  • Save luckylionheart/5fc0f6baee9a445a71f364d0266db800 to your computer and use it in GitHub Desktop.

Select an option

Save luckylionheart/5fc0f6baee9a445a71f364d0266db800 to your computer and use it in GitHub Desktop.
Make the VastAI instance using api key
import requests
import json
import sys
import socket
import time
API_KEY = "***"
# Instance requirements
MIN_VRAM_MB = 80 * 1024 # 81 920 MB β†’ covers full 80 GB cards
DISK_GB = 80 # local disk for model weights + outputs
NUM_GPUS = 1
MAX_PRICE_PER_HR = 1.50 # USD – raise if no results found
DOCKER_IMAGE = "pytorch/pytorch:2.2.2-cuda12.1-cudnn8-devel"
ONSTART_SCRIPT = """#!/usr/bin/env bash
set -euo pipefail
LOG_FILE="/root/onstart.log"
exec > >(tee -a $LOG_FILE) 2>&1
echo "[BOOT] Starting onstart script at $(date)"
cd /root
retry() {
local n=0
local max=5
local delay=5
until "$@"; do
n=$((n+1))
if [ $n -ge $max ]; then
echo "[ERROR] Command failed after $n attempts: $*"
return 1
fi
echo "[WARN] Retry $n/$max: $*"
sleep $delay
done
}
echo "[SETUP] System packages..."
retry apt-get update
retry apt-get install -y git wget curl vim python3-venv
retry apt-get install lsof nano -y
echo "[SETUP] Creating venv..."
pip install uv
uv venv -p 3.11 venv
source venv/bin/activate
echo "[SETUP] Python deps..."
retry pip install --upgrade pip
python -m ensurepip
retry pip install flask numba netius "numpy<2" Pillow
echo "[SETUP] Cloning repo..."
git clone https://github.com/deepseek-ai/DeepSeek-VL2
cd DeepSeek-VL2
echo "[SETUP] Installing project..."
retry pip install --timeout 1000 .
echo "[SETUP] Downloading server..."
retry wget -q https://tilde.pro/deepseek_server.py -O deepseek_server.py
echo "[SERVER] Starting..."
export HOST=0.0.0.0
export PORT=8384
pkill -f deepseek_server.py || true
(
while true; do
echo "[SERVER] Launching..."
python deepseek_server.py || echo "[CRASH] Restarting..."
sleep 3
done
) &
echo "[HEALTH] Waiting..."
for i in {1..60}; do
if curl -s http://127.0.0.1:8384 >/dev/null; then
echo "[SUCCESS] Server is up!"
break
fi
sleep 2
done
echo "[DONE]"
"""
BASE_URL = "https://console.vast.ai/api/v0"
def headers():
return {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
def wait_for_ssh(host, port, timeout=120):
start = time.time()
while time.time() - start < timeout:
try:
s = socket.create_connection((host, int(port)), timeout=5)
s.close()
return True
except:
time.sleep(3)
return False
def search_offers() -> list[dict]:
"""
Search for available on-demand GPU instances with >= 80 GB VRAM.
Returns a list of offers sorted by price (cheapest first).
"""
params = {
"q": json.dumps({
"gpu_ram": {"gte": MIN_VRAM_MB},
"num_gpus": {"eq": NUM_GPUS},
"gpu_frac": {"eq": 1.0}, # πŸ”₯ critical
"rentable": {"eq": True},
"rented": {"eq": False},
"dph_total": {"lte": MAX_PRICE_PER_HR},
"type": "on-demand",
"order": [["dph_total", "asc"]],
"limit": 20,
})
}
resp = requests.get(f"{BASE_URL}/bundles/", headers=headers(), params=params, timeout=30)
resp.raise_for_status()
offers = resp.json().get("offers", [])
return offers
def display_offers(offers: list[dict]) -> None:
print(f"\n{'─'*72}")
print(f" {'#':<4} {'ID':<12} {'GPU':<22} {'VRAM (GB)':<12} {'$/hr':<8} {'DLPerf':<8}")
print(f"{'─'*72}")
for i, o in enumerate(offers, 1):
vram_gb = o.get("gpu_ram", 0) / 1024
gpu_name = o.get("gpu_name", "unknown")
price = o.get("dph_total", 0)
dlperf = o.get("dlperf", 0)
oid = o.get("id", "?")
print(f" {i:<4} {oid:<12} {gpu_name:<22} {vram_gb:<12.0f} {price:<8.3f} {dlperf:<8.1f}")
print(f"{'─'*72}\n")
def create_instance(offer_id: int) -> dict:
payload = {
"client_id": "me",
"image": DOCKER_IMAGE,
"disk": DISK_GB,
"onstart": ONSTART_SCRIPT,
"runtype": "ssh_direct",
"docker_options": "-p 8384:8384",
"env": {
"HOST": "0.0.0.0",
"PORT": "8384"
},
"force": False,
}
resp = requests.put(
f"{BASE_URL}/asks/{offer_id}/",
headers=headers(),
json=payload,
timeout=30,
)
resp.raise_for_status()
return resp.json()
def get_instance_info(instance_id: int) -> dict:
"""Fetch current status of a running instance."""
resp = requests.get(
f"{BASE_URL}/instances/{instance_id}/",
headers=headers(),
timeout=30,
)
resp.raise_for_status()
data = resp.json()
inner = data.get("instances")
if isinstance(inner, dict):
return inner
if isinstance(inner, list) and inner:
return inner[0]
return data
def wait_for_running(instance_id: int, timeout_sec: int = 600) -> dict:
"""Poll until instance status == 'running' or timeout, printing live status."""
print("⏳ Waiting for instance to start…")
deadline = time.time() + timeout_sec
last_status = None
first_poll = True
while time.time() < deadline:
info = get_instance_info(instance_id)
if first_poll:
# dump raw keys on first poll so we can diagnose response shape issues
print(f" [debug] instance keys: {list(info.keys())}")
first_poll = False
status = info.get("actual_status") or info.get("status") or "unknown"
if status != last_status:
ts = time.strftime("%H:%M:%S")
print(f" [{ts}] status β†’ {status}")
last_status = status
if status == "running":
return info
time.sleep(8)
raise TimeoutError(f"Instance {instance_id} did not reach 'running' within {timeout_sec}s")
def fetch_logs(instance_id: int) -> str:
"""Fetch the current onstart/container log tail for an instance."""
candidates = [
f"{BASE_URL}/instances/request_logs/{instance_id}/",
f"{BASE_URL}/instances/{instance_id}/logs/",
]
for url in candidates:
try:
resp = requests.get(url, headers=headers(), params={"tail": 200}, timeout=30)
if resp.status_code == 404:
continue
resp.raise_for_status()
data = resp.json()
# vast.ai returns {"result": "..."} or {"logs_tail": "..."} or {"logs": "..."}
text = data.get("result") or data.get("logs_tail") or data.get("logs") or ""
if text:
return text
except requests.RequestException:
continue
try:
info = get_instance_info(instance_id)
msg = info.get("status_msg") or info.get("extra") or ""
return msg
except Exception:
return ""
def stream_onstart_logs(instance_id: int, timeout_sec: int = 900) -> None:
"""
Poll the instance log endpoint and print new lines as they arrive.
Stops when the server startup line is detected or timeout is reached.
"""
print("\nπŸ“‹ Streaming onstart script output (Ctrl-C to skip)…")
print("─" * 72)
seen_lines: set[str] = set()
deadline = time.time() + timeout_sec
try:
while time.time() < deadline:
raw = fetch_logs(instance_id)
if raw:
for line in raw.splitlines():
if line not in seen_lines:
seen_lines.add(line)
ts = time.strftime("%H:%M:%S")
print(f" [{ts}] {line}")
if "deepseek server started" in line.lower() or "pid" in line.lower() and "started" in line.lower():
print("─" * 72)
print("βœ… DeepSeek server started!")
return
time.sleep(10)
except KeyboardInterrupt:
print("\n (log streaming interrupted by user)")
print("─" * 72)
def wait_for_http(url, timeout=120):
import requests, time
start = time.time()
while time.time() - start < timeout:
try:
r = requests.get(url, timeout=5)
if r.status_code == 200:
return True
except Exception:
pass
time.sleep(3)
return False
def main():
if API_KEY == "YOUR_VAST_API_KEY_HERE":
sys.exit("❌ Please set your API_KEY at the top of the script.")
print(f"πŸ” Searching for offers with β‰₯{MIN_VRAM_MB // 1024} GB VRAM "
f"at ≀ ${MAX_PRICE_PER_HR}/hr …")
offers = search_offers()
if not offers:
sys.exit("❌ No matching offers found. Try raising MAX_PRICE_PER_HR or relaxing filters.")
display_offers(offers)
if len(offers) == 1:
chosen = offers[0]
print(f"βœ… Auto-selecting the only match: ID {chosen['id']}")
else:
raw = input(f"Enter offer number to rent [1-{len(offers)}] (Enter = cheapest): ").strip()
idx = (int(raw) - 1) if raw.isdigit() else 0
chosen = offers[max(0, min(idx, len(offers) - 1))]
offer_id = chosen["id"]
gpu_name = chosen.get("gpu_name", "?")
vram_gb = chosen.get("gpu_ram", 0) / 1024
price = chosen.get("dph_total", 0)
print(f"\nπŸ–₯️ Renting {gpu_name} ({vram_gb:.0f} GB VRAM) @ ${price:.3f}/hr [offer {offer_id}]")
confirm = input("Confirm? [y/N]: ").strip().lower()
if confirm != "y":
sys.exit("Aborted.")
# 3. Create instance
print("πŸš€ Creating instance …")
result = create_instance(offer_id)
if not result.get("success"):
sys.exit(f"❌ Failed to create instance:\n{json.dumps(result, indent=2)}")
instance_id = result.get("new_contract")
print(f"βœ… Instance created β†’ ID: {instance_id}")
try:
info = wait_for_running(instance_id)
except TimeoutError as e:
print(f"⚠️ {e}")
print(" Check https://cloud.vast.ai/instances/ manually.")
return
stream_onstart_logs(instance_id)
info = get_instance_info(instance_id)
ssh_host = info.get("ssh_host", "")
ssh_port = info.get("ssh_port", "")
public_ip = info.get("public_ipaddr")
port_info = info.get("ports", {})
external_port = None
if "8384/tcp" in port_info and port_info["8384/tcp"]:
external_port = port_info["8384/tcp"][0].get("HostPort")
if public_ip and external_port:
llm_url = f"http://{public_ip}:{external_port}"
else:
llm_url = None
if llm_url:
print("⏳ Waiting for server to be ready...")
if wait_for_http(llm_url):
print("βœ… Server is reachable")
else:
print("⚠️ Server not responding yet")
print("⏳ Waiting for SSH...")
if wait_for_ssh(ssh_host, ssh_port):
print("βœ… SSH is ready")
else:
print("⚠️ SSH not reachable yet")
print("\n" + "═"*60)
print(" πŸŽ‰ Instance is RUNNING!")
print("═"*60)
print(f" Instance ID : {instance_id}")
print(f" GPU : {info.get('gpu_name')} Γ—{info.get('num_gpus', 1)}")
print(f" VRAM : {info.get('gpu_ram', 0)/1024:.0f} GB")
print(f" Price : ${info.get('dph_total', 0):.3f} / hr")
print(f" SSH : ssh -p {ssh_port} root@{ssh_host}")
print(f" LLM server : {llm_url}")
print("═"*60)
print("\n Run 'python vast_destroy.py' (or use the dashboard) to stop billing.\n")
if __name__ == "__main__":
main()

Comments are disabled for this gist.