Last active
April 9, 2026 15:41
-
-
Save luckylionheart/5fc0f6baee9a445a71f364d0266db800 to your computer and use it in GitHub Desktop.
Make the VastAI instance using api key
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import json | |
| import sys | |
| import socket | |
| import time | |
| API_KEY = "***" | |
| # Instance requirements | |
| MIN_VRAM_MB = 80 * 1024 # 81 920 MB β covers full 80 GB cards | |
| DISK_GB = 80 # local disk for model weights + outputs | |
| NUM_GPUS = 1 | |
| MAX_PRICE_PER_HR = 1.50 # USD β raise if no results found | |
| DOCKER_IMAGE = "pytorch/pytorch:2.2.2-cuda12.1-cudnn8-devel" | |
| ONSTART_SCRIPT = """#!/usr/bin/env bash | |
| set -euo pipefail | |
| LOG_FILE="/root/onstart.log" | |
| exec > >(tee -a $LOG_FILE) 2>&1 | |
| echo "[BOOT] Starting onstart script at $(date)" | |
| cd /root | |
| retry() { | |
| local n=0 | |
| local max=5 | |
| local delay=5 | |
| until "$@"; do | |
| n=$((n+1)) | |
| if [ $n -ge $max ]; then | |
| echo "[ERROR] Command failed after $n attempts: $*" | |
| return 1 | |
| fi | |
| echo "[WARN] Retry $n/$max: $*" | |
| sleep $delay | |
| done | |
| } | |
| echo "[SETUP] System packages..." | |
| retry apt-get update | |
| retry apt-get install -y git wget curl vim python3-venv | |
| retry apt-get install lsof nano -y | |
| echo "[SETUP] Creating venv..." | |
| pip install uv | |
| uv venv -p 3.11 venv | |
| source venv/bin/activate | |
| echo "[SETUP] Python deps..." | |
| retry pip install --upgrade pip | |
| python -m ensurepip | |
| retry pip install flask numba netius "numpy<2" Pillow | |
| echo "[SETUP] Cloning repo..." | |
| git clone https://github.com/deepseek-ai/DeepSeek-VL2 | |
| cd DeepSeek-VL2 | |
| echo "[SETUP] Installing project..." | |
| retry pip install --timeout 1000 . | |
| echo "[SETUP] Downloading server..." | |
| retry wget -q https://tilde.pro/deepseek_server.py -O deepseek_server.py | |
| echo "[SERVER] Starting..." | |
| export HOST=0.0.0.0 | |
| export PORT=8384 | |
| pkill -f deepseek_server.py || true | |
| ( | |
| while true; do | |
| echo "[SERVER] Launching..." | |
| python deepseek_server.py || echo "[CRASH] Restarting..." | |
| sleep 3 | |
| done | |
| ) & | |
| echo "[HEALTH] Waiting..." | |
| for i in {1..60}; do | |
| if curl -s http://127.0.0.1:8384 >/dev/null; then | |
| echo "[SUCCESS] Server is up!" | |
| break | |
| fi | |
| sleep 2 | |
| done | |
| echo "[DONE]" | |
| """ | |
| BASE_URL = "https://console.vast.ai/api/v0" | |
| def headers(): | |
| return { | |
| "Authorization": f"Bearer {API_KEY}", | |
| "Content-Type": "application/json", | |
| } | |
| def wait_for_ssh(host, port, timeout=120): | |
| start = time.time() | |
| while time.time() - start < timeout: | |
| try: | |
| s = socket.create_connection((host, int(port)), timeout=5) | |
| s.close() | |
| return True | |
| except: | |
| time.sleep(3) | |
| return False | |
| def search_offers() -> list[dict]: | |
| """ | |
| Search for available on-demand GPU instances with >= 80 GB VRAM. | |
| Returns a list of offers sorted by price (cheapest first). | |
| """ | |
| params = { | |
| "q": json.dumps({ | |
| "gpu_ram": {"gte": MIN_VRAM_MB}, | |
| "num_gpus": {"eq": NUM_GPUS}, | |
| "gpu_frac": {"eq": 1.0}, # π₯ critical | |
| "rentable": {"eq": True}, | |
| "rented": {"eq": False}, | |
| "dph_total": {"lte": MAX_PRICE_PER_HR}, | |
| "type": "on-demand", | |
| "order": [["dph_total", "asc"]], | |
| "limit": 20, | |
| }) | |
| } | |
| resp = requests.get(f"{BASE_URL}/bundles/", headers=headers(), params=params, timeout=30) | |
| resp.raise_for_status() | |
| offers = resp.json().get("offers", []) | |
| return offers | |
| def display_offers(offers: list[dict]) -> None: | |
| print(f"\n{'β'*72}") | |
| print(f" {'#':<4} {'ID':<12} {'GPU':<22} {'VRAM (GB)':<12} {'$/hr':<8} {'DLPerf':<8}") | |
| print(f"{'β'*72}") | |
| for i, o in enumerate(offers, 1): | |
| vram_gb = o.get("gpu_ram", 0) / 1024 | |
| gpu_name = o.get("gpu_name", "unknown") | |
| price = o.get("dph_total", 0) | |
| dlperf = o.get("dlperf", 0) | |
| oid = o.get("id", "?") | |
| print(f" {i:<4} {oid:<12} {gpu_name:<22} {vram_gb:<12.0f} {price:<8.3f} {dlperf:<8.1f}") | |
| print(f"{'β'*72}\n") | |
| def create_instance(offer_id: int) -> dict: | |
| payload = { | |
| "client_id": "me", | |
| "image": DOCKER_IMAGE, | |
| "disk": DISK_GB, | |
| "onstart": ONSTART_SCRIPT, | |
| "runtype": "ssh_direct", | |
| "docker_options": "-p 8384:8384", | |
| "env": { | |
| "HOST": "0.0.0.0", | |
| "PORT": "8384" | |
| }, | |
| "force": False, | |
| } | |
| resp = requests.put( | |
| f"{BASE_URL}/asks/{offer_id}/", | |
| headers=headers(), | |
| json=payload, | |
| timeout=30, | |
| ) | |
| resp.raise_for_status() | |
| return resp.json() | |
| def get_instance_info(instance_id: int) -> dict: | |
| """Fetch current status of a running instance.""" | |
| resp = requests.get( | |
| f"{BASE_URL}/instances/{instance_id}/", | |
| headers=headers(), | |
| timeout=30, | |
| ) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| inner = data.get("instances") | |
| if isinstance(inner, dict): | |
| return inner | |
| if isinstance(inner, list) and inner: | |
| return inner[0] | |
| return data | |
| def wait_for_running(instance_id: int, timeout_sec: int = 600) -> dict: | |
| """Poll until instance status == 'running' or timeout, printing live status.""" | |
| print("β³ Waiting for instance to startβ¦") | |
| deadline = time.time() + timeout_sec | |
| last_status = None | |
| first_poll = True | |
| while time.time() < deadline: | |
| info = get_instance_info(instance_id) | |
| if first_poll: | |
| # dump raw keys on first poll so we can diagnose response shape issues | |
| print(f" [debug] instance keys: {list(info.keys())}") | |
| first_poll = False | |
| status = info.get("actual_status") or info.get("status") or "unknown" | |
| if status != last_status: | |
| ts = time.strftime("%H:%M:%S") | |
| print(f" [{ts}] status β {status}") | |
| last_status = status | |
| if status == "running": | |
| return info | |
| time.sleep(8) | |
| raise TimeoutError(f"Instance {instance_id} did not reach 'running' within {timeout_sec}s") | |
| def fetch_logs(instance_id: int) -> str: | |
| """Fetch the current onstart/container log tail for an instance.""" | |
| candidates = [ | |
| f"{BASE_URL}/instances/request_logs/{instance_id}/", | |
| f"{BASE_URL}/instances/{instance_id}/logs/", | |
| ] | |
| for url in candidates: | |
| try: | |
| resp = requests.get(url, headers=headers(), params={"tail": 200}, timeout=30) | |
| if resp.status_code == 404: | |
| continue | |
| resp.raise_for_status() | |
| data = resp.json() | |
| # vast.ai returns {"result": "..."} or {"logs_tail": "..."} or {"logs": "..."} | |
| text = data.get("result") or data.get("logs_tail") or data.get("logs") or "" | |
| if text: | |
| return text | |
| except requests.RequestException: | |
| continue | |
| try: | |
| info = get_instance_info(instance_id) | |
| msg = info.get("status_msg") or info.get("extra") or "" | |
| return msg | |
| except Exception: | |
| return "" | |
| def stream_onstart_logs(instance_id: int, timeout_sec: int = 900) -> None: | |
| """ | |
| Poll the instance log endpoint and print new lines as they arrive. | |
| Stops when the server startup line is detected or timeout is reached. | |
| """ | |
| print("\nπ Streaming onstart script output (Ctrl-C to skip)β¦") | |
| print("β" * 72) | |
| seen_lines: set[str] = set() | |
| deadline = time.time() + timeout_sec | |
| try: | |
| while time.time() < deadline: | |
| raw = fetch_logs(instance_id) | |
| if raw: | |
| for line in raw.splitlines(): | |
| if line not in seen_lines: | |
| seen_lines.add(line) | |
| ts = time.strftime("%H:%M:%S") | |
| print(f" [{ts}] {line}") | |
| if "deepseek server started" in line.lower() or "pid" in line.lower() and "started" in line.lower(): | |
| print("β" * 72) | |
| print("β DeepSeek server started!") | |
| return | |
| time.sleep(10) | |
| except KeyboardInterrupt: | |
| print("\n (log streaming interrupted by user)") | |
| print("β" * 72) | |
| def wait_for_http(url, timeout=120): | |
| import requests, time | |
| start = time.time() | |
| while time.time() - start < timeout: | |
| try: | |
| r = requests.get(url, timeout=5) | |
| if r.status_code == 200: | |
| return True | |
| except Exception: | |
| pass | |
| time.sleep(3) | |
| return False | |
| def main(): | |
| if API_KEY == "YOUR_VAST_API_KEY_HERE": | |
| sys.exit("β Please set your API_KEY at the top of the script.") | |
| print(f"π Searching for offers with β₯{MIN_VRAM_MB // 1024} GB VRAM " | |
| f"at β€ ${MAX_PRICE_PER_HR}/hr β¦") | |
| offers = search_offers() | |
| if not offers: | |
| sys.exit("β No matching offers found. Try raising MAX_PRICE_PER_HR or relaxing filters.") | |
| display_offers(offers) | |
| if len(offers) == 1: | |
| chosen = offers[0] | |
| print(f"β Auto-selecting the only match: ID {chosen['id']}") | |
| else: | |
| raw = input(f"Enter offer number to rent [1-{len(offers)}] (Enter = cheapest): ").strip() | |
| idx = (int(raw) - 1) if raw.isdigit() else 0 | |
| chosen = offers[max(0, min(idx, len(offers) - 1))] | |
| offer_id = chosen["id"] | |
| gpu_name = chosen.get("gpu_name", "?") | |
| vram_gb = chosen.get("gpu_ram", 0) / 1024 | |
| price = chosen.get("dph_total", 0) | |
| print(f"\nπ₯οΈ Renting {gpu_name} ({vram_gb:.0f} GB VRAM) @ ${price:.3f}/hr [offer {offer_id}]") | |
| confirm = input("Confirm? [y/N]: ").strip().lower() | |
| if confirm != "y": | |
| sys.exit("Aborted.") | |
| # 3. Create instance | |
| print("π Creating instance β¦") | |
| result = create_instance(offer_id) | |
| if not result.get("success"): | |
| sys.exit(f"β Failed to create instance:\n{json.dumps(result, indent=2)}") | |
| instance_id = result.get("new_contract") | |
| print(f"β Instance created β ID: {instance_id}") | |
| try: | |
| info = wait_for_running(instance_id) | |
| except TimeoutError as e: | |
| print(f"β οΈ {e}") | |
| print(" Check https://cloud.vast.ai/instances/ manually.") | |
| return | |
| stream_onstart_logs(instance_id) | |
| info = get_instance_info(instance_id) | |
| ssh_host = info.get("ssh_host", "") | |
| ssh_port = info.get("ssh_port", "") | |
| public_ip = info.get("public_ipaddr") | |
| port_info = info.get("ports", {}) | |
| external_port = None | |
| if "8384/tcp" in port_info and port_info["8384/tcp"]: | |
| external_port = port_info["8384/tcp"][0].get("HostPort") | |
| if public_ip and external_port: | |
| llm_url = f"http://{public_ip}:{external_port}" | |
| else: | |
| llm_url = None | |
| if llm_url: | |
| print("β³ Waiting for server to be ready...") | |
| if wait_for_http(llm_url): | |
| print("β Server is reachable") | |
| else: | |
| print("β οΈ Server not responding yet") | |
| print("β³ Waiting for SSH...") | |
| if wait_for_ssh(ssh_host, ssh_port): | |
| print("β SSH is ready") | |
| else: | |
| print("β οΈ SSH not reachable yet") | |
| print("\n" + "β"*60) | |
| print(" π Instance is RUNNING!") | |
| print("β"*60) | |
| print(f" Instance ID : {instance_id}") | |
| print(f" GPU : {info.get('gpu_name')} Γ{info.get('num_gpus', 1)}") | |
| print(f" VRAM : {info.get('gpu_ram', 0)/1024:.0f} GB") | |
| print(f" Price : ${info.get('dph_total', 0):.3f} / hr") | |
| print(f" SSH : ssh -p {ssh_port} root@{ssh_host}") | |
| print(f" LLM server : {llm_url}") | |
| print("β"*60) | |
| print("\n Run 'python vast_destroy.py' (or use the dashboard) to stop billing.\n") | |
| if __name__ == "__main__": | |
| main() |
Comments are disabled for this gist.