luckylionheart · April 29, 2026 16:34
diff --git a/create_instance_vastAI.py b/create_instance_vastAI.py
 import requests
 import json
 import sys
 import socket
 import time

 API_KEY = "***"

 # Instance requirements
 MIN_VRAM_MB       = 80 * 1024        # 81 920 MB → covers full 80 GB cards
 DISK_GB           = 80               # local disk for model weights + outputs
 NUM_GPUS          = 1
 MAX_PRICE_PER_HR  = 1.50             # USD – raise if no results found

 DOCKER_IMAGE      = "pytorch/pytorch:2.2.2-cuda12.1-cudnn8-devel"
 ONSTART_SCRIPT    = """#!/usr/bin/env bash
 set -euo pipefail

 LOG_FILE="/root/onstart.log"
 exec > >(tee -a $LOG_FILE) 2>&1

 echo "[BOOT] Starting onstart script at $(date)"
 cd /root

 retry() {
  local n=0
  local max=5
  local delay=5
  until "$@"; do
    n=$((n+1))
    if [ $n -ge $max ]; then
      echo "[ERROR] Command failed after $n attempts: $*"
      return 1
    fi
    echo "[WARN] Retry $n/$max: $*"
    sleep $delay
  done
 }

 echo "[SETUP] System packages..."
 retry apt-get update
 retry apt-get install -y git wget curl vim python3-venv
 retry apt-get install lsof nano -y

 echo "[SETUP] Creating venv..."
 pip install uv
 uv venv -p 3.11 venv
 source venv/bin/activate

 echo "[SETUP] Python deps..."
 retry pip install --upgrade pip
 python -m ensurepip
 retry pip install flask numba netius "numpy<2" Pillow

 echo "[SETUP] Cloning repo..."
 git clone https://github.com/deepseek-ai/DeepSeek-VL2
 cd DeepSeek-VL2

 echo "[SETUP] Installing project..."
 retry pip install --timeout 1000 .

 echo "[SETUP] Downloading server..."
 retry wget -q https://tilde.pro/deepseek_server.py -O deepseek_server.py

 echo "[SERVER] Starting..."

 export HOST=0.0.0.0
 export PORT=8384

 pkill -f deepseek_server.py || true

 (
  while true; do
    echo "[SERVER] Launching..."
    python deepseek_server.py || echo "[CRASH] Restarting..."
    sleep 3
  done
 ) &

 echo "[HEALTH] Waiting..."

 for i in {1..60}; do
  if curl -s http://127.0.0.1:8384 >/dev/null; then
    echo "[SUCCESS] Server is up!"
    break
  fi
  sleep 2
 done

 echo "[DONE]"
 """

 BASE_URL = "https://console.vast.ai/api/v0"

 def headers():
    return {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json",
    }


 def wait_for_ssh(host, port, timeout=120):
    start = time.time()
    while time.time() - start < timeout:
        try:
            s = socket.create_connection((host, int(port)), timeout=5)
            s.close()
            return True
        except:
            time.sleep(3)
    return False

 def search_offers() -> list[dict]:
    """
    Search for available on-demand GPU instances with >= 80 GB VRAM.
    Returns a list of offers sorted by price (cheapest first).
    """
    params = {
        "q": json.dumps({
            "gpu_ram": {"gte": MIN_VRAM_MB},
            "num_gpus": {"eq": NUM_GPUS},
            "gpu_frac": {"eq": 1.0},   # 🔥 critical
            "rentable": {"eq": True},
            "rented": {"eq": False},
            "dph_total": {"lte": MAX_PRICE_PER_HR},
            "type": "on-demand",
            "order": [["dph_total", "asc"]],
            "limit": 20,
        })
    }
    resp = requests.get(f"{BASE_URL}/bundles/", headers=headers(), params=params, timeout=30)
    resp.raise_for_status()
    offers = resp.json().get("offers", [])
    return offers


 def display_offers(offers: list[dict]) -> None:
    print(f"\n{'─'*72}")
    print(f"  {'#':<4} {'ID':<12} {'GPU':<22} {'VRAM (GB)':<12} {'$/hr':<8} {'DLPerf':<8}")
    print(f"{'─'*72}")
    for i, o in enumerate(offers, 1):
        vram_gb  = o.get("gpu_ram", 0) / 1024
        gpu_name = o.get("gpu_name", "unknown")
        price    = o.get("dph_total", 0)
        dlperf   = o.get("dlperf", 0)
        oid      = o.get("id", "?")
        print(f"  {i:<4} {oid:<12} {gpu_name:<22} {vram_gb:<12.0f} {price:<8.3f} {dlperf:<8.1f}")
    print(f"{'─'*72}\n")


 def create_instance(offer_id: int) -> dict:
    payload = {
        "client_id": "me",
        "image": DOCKER_IMAGE,
        "disk": DISK_GB,
        "onstart": ONSTART_SCRIPT,
        "runtype": "ssh_direct",
        "docker_options": "-p 8384:8384",
        "env": {
            "HOST": "0.0.0.0",
            "PORT": "8384"
        },
        "force": False,
    }
    
    resp = requests.put(
        f"{BASE_URL}/asks/{offer_id}/",
        headers=headers(),
        json=payload,
        timeout=30,
    )
    resp.raise_for_status()
    return resp.json()


 def get_instance_info(instance_id: int) -> dict:
    """Fetch current status of a running instance."""
    resp = requests.get(
        f"{BASE_URL}/instances/{instance_id}/",
        headers=headers(),
        timeout=30,
    )
    resp.raise_for_status()
    data = resp.json()
    inner = data.get("instances")
    if isinstance(inner, dict):
        return inner
    if isinstance(inner, list) and inner:
        return inner[0]
    return data


 def wait_for_running(instance_id: int, timeout_sec: int = 600) -> dict:
    """Poll until instance status == 'running' or timeout, printing live status."""
    print("⏳ Waiting for instance to start…")
    deadline = time.time() + timeout_sec
    last_status = None
    first_poll = True
    while time.time() < deadline:
        info   = get_instance_info(instance_id)
        if first_poll:
            # dump raw keys on first poll so we can diagnose response shape issues
            print(f"  [debug] instance keys: {list(info.keys())}")
            first_poll = False
        status = info.get("actual_status") or info.get("status") or "unknown"
        if status != last_status:
            ts = time.strftime("%H:%M:%S")
            print(f"  [{ts}] status → {status}")
            last_status = status
        if status == "running":
            return info
        time.sleep(8)
    raise TimeoutError(f"Instance {instance_id} did not reach 'running' within {timeout_sec}s")


 def fetch_logs(instance_id: int) -> str:
    """Fetch the current onstart/container log tail for an instance."""
    candidates = [
        f"{BASE_URL}/instances/request_logs/{instance_id}/",
        f"{BASE_URL}/instances/{instance_id}/logs/",
    ]
    for url in candidates:
        try:
            resp = requests.get(url, headers=headers(), params={"tail": 200}, timeout=30)
            if resp.status_code == 404:
                continue
            resp.raise_for_status()
            data = resp.json()
            # vast.ai returns {"result": "..."} or {"logs_tail": "..."} or {"logs": "..."}
            text = data.get("result") or data.get("logs_tail") or data.get("logs") or ""
            if text:
                return text
        except requests.RequestException:
            continue
    try:
        info = get_instance_info(instance_id)
        msg = info.get("status_msg") or info.get("extra") or ""
        return msg
    except Exception:
        return ""


 def stream_onstart_logs(instance_id: int, timeout_sec: int = 900) -> None:
    """
    Poll the instance log endpoint and print new lines as they arrive.
    Stops when the server startup line is detected or timeout is reached.
    """
    print("\n📋  Streaming onstart script output (Ctrl-C to skip)…")
    print("─" * 72)
    seen_lines: set[str] = set()
    deadline = time.time() + timeout_sec
    try:
        while time.time() < deadline:
            raw = fetch_logs(instance_id)
            if raw:
                for line in raw.splitlines():
                    if line not in seen_lines:
                        seen_lines.add(line)
                        ts = time.strftime("%H:%M:%S")
                        print(f"  [{ts}] {line}")
                        if "deepseek server started" in line.lower() or "pid" in line.lower() and "started" in line.lower():
                            print("─" * 72)
                            print("✅  DeepSeek server started!")
                            return
            time.sleep(10)
    except KeyboardInterrupt:
        print("\n  (log streaming interrupted by user)")
    print("─" * 72)

 def wait_for_http(url, timeout=120):
    import requests, time
    start = time.time()
    while time.time() - start < timeout:
        try:
            r = requests.get(url, timeout=5)
            if r.status_code == 200:
                return True
        except Exception:
            pass
        time.sleep(3)
    return False

 def main():
    if API_KEY == "YOUR_VAST_API_KEY_HERE":
        sys.exit("❌  Please set your API_KEY at the top of the script.")

    print(f"🔍  Searching for offers with ≥{MIN_VRAM_MB // 1024} GB VRAM "
          f"at ≤ ${MAX_PRICE_PER_HR}/hr …")
    offers = search_offers()

    if not offers:
        sys.exit("❌  No matching offers found. Try raising MAX_PRICE_PER_HR or relaxing filters.")

    display_offers(offers)

    if len(offers) == 1:
        chosen = offers[0]
        print(f"✅  Auto-selecting the only match: ID {chosen['id']}")
    else:
        raw = input(f"Enter offer number to rent [1-{len(offers)}] (Enter = cheapest): ").strip()
        idx = (int(raw) - 1) if raw.isdigit() else 0
        chosen = offers[max(0, min(idx, len(offers) - 1))]

    offer_id  = chosen["id"]
    gpu_name  = chosen.get("gpu_name", "?")
    vram_gb   = chosen.get("gpu_ram", 0) / 1024
    price     = chosen.get("dph_total", 0)

    print(f"\n🖥️   Renting  {gpu_name}  ({vram_gb:.0f} GB VRAM)  @ ${price:.3f}/hr  [offer {offer_id}]")
    confirm = input("Confirm? [y/N]: ").strip().lower()
    if confirm != "y":
        sys.exit("Aborted.")

    # 3. Create instance
    print("🚀  Creating instance …")
    result = create_instance(offer_id)

    if not result.get("success"):
        sys.exit(f"❌  Failed to create instance:\n{json.dumps(result, indent=2)}")

    instance_id = result.get("new_contract")
    print(f"✅  Instance created  →  ID: {instance_id}")

    try:
        info = wait_for_running(instance_id)
    except TimeoutError as e:
        print(f"⚠️  {e}")
        print("   Check https://cloud.vast.ai/instances/ manually.")
        return

    stream_onstart_logs(instance_id)

    info = get_instance_info(instance_id)
    ssh_host = info.get("ssh_host", "")
    ssh_port = info.get("ssh_port", "")
    
    public_ip = info.get("public_ipaddr")

    port_info = info.get("ports", {})
    external_port = None

    if "8384/tcp" in port_info and port_info["8384/tcp"]:
        external_port = port_info["8384/tcp"][0].get("HostPort")

    if public_ip and external_port:
        llm_url = f"http://{public_ip}:{external_port}"
    else:
        llm_url = None

    if llm_url:
        print("⏳ Waiting for server to be ready...")
        
        if wait_for_http(llm_url):
            print("✅ Server is reachable")
        else:
            print("⚠️ Server not responding yet")
            
        print("⏳ Waiting for SSH...")

        if wait_for_ssh(ssh_host, ssh_port):
            print("✅ SSH is ready")
        else:
            print("⚠️ SSH not reachable yet")

    print("\n" + "═"*60)
    print("  🎉  Instance is RUNNING!")
    print("═"*60)
    print(f"  Instance ID : {instance_id}")
    print(f"  GPU         : {info.get('gpu_name')}  ×{info.get('num_gpus', 1)}")
    print(f"  VRAM        : {info.get('gpu_ram', 0)/1024:.0f} GB")
    print(f"  Price       : ${info.get('dph_total', 0):.3f} / hr")
    print(f"  SSH         : ssh -p {ssh_port} root@{ssh_host}")
    print(f"  LLM server  : {llm_url}")
    print("═"*60)
    print("\n  Run 'python vast_destroy.py' (or use the dashboard) to stop billing.\n")


 if __name__ == "__main__":
    main()
	import requests
	import json
	import sys
	import socket
	import time

	API_KEY = "***"

	# Instance requirements
	MIN_VRAM_MB = 80 * 1024 # 81 920 MB → covers full 80 GB cards
	DISK_GB = 80 # local disk for model weights + outputs
	NUM_GPUS = 1
	MAX_PRICE_PER_HR = 1.50 # USD – raise if no results found

	DOCKER_IMAGE = "pytorch/pytorch:2.2.2-cuda12.1-cudnn8-devel"
	ONSTART_SCRIPT = """#!/usr/bin/env bash
	set -euo pipefail

	LOG_FILE="/root/onstart.log"
	exec > >(tee -a $LOG_FILE) 2>&1

	echo "[BOOT] Starting onstart script at $(date)"
	cd /root

	retry() {
	local n=0
	local max=5
	local delay=5
	until "$@"; do
	n=$((n+1))
	if [ $n -ge $max ]; then
	echo "[ERROR] Command failed after $n attempts: $*"
	return 1
	fi
	echo "[WARN] Retry $n/$max: $*"
	sleep $delay
	done
	}

	echo "[SETUP] System packages..."
	retry apt-get update
	retry apt-get install -y git wget curl vim python3-venv
	retry apt-get install lsof nano -y

	echo "[SETUP] Creating venv..."
	pip install uv
	uv venv -p 3.11 venv
	source venv/bin/activate

	echo "[SETUP] Python deps..."
	retry pip install --upgrade pip
	python -m ensurepip
	retry pip install flask numba netius "numpy<2" Pillow

	echo "[SETUP] Cloning repo..."
	git clone https://github.com/deepseek-ai/DeepSeek-VL2
	cd DeepSeek-VL2

	echo "[SETUP] Installing project..."
	retry pip install --timeout 1000 .

	echo "[SETUP] Downloading server..."
	retry wget -q https://tilde.pro/deepseek_server.py -O deepseek_server.py

	echo "[SERVER] Starting..."

	export HOST=0.0.0.0
	export PORT=8384

	pkill -f deepseek_server.py \|\| true

	(
	while true; do
	echo "[SERVER] Launching..."
	python deepseek_server.py \|\| echo "[CRASH] Restarting..."
	sleep 3
	done
	) &

	echo "[HEALTH] Waiting..."

	for i in {1..60}; do
	if curl -s http://127.0.0.1:8384 >/dev/null; then
	echo "[SUCCESS] Server is up!"
	break
	fi
	sleep 2
	done

	echo "[DONE]"
	"""

	BASE_URL = "https://console.vast.ai/api/v0"

	def headers():
	return {
	"Authorization": f"Bearer {API_KEY}",
	"Content-Type": "application/json",
	}


	def wait_for_ssh(host, port, timeout=120):
	start = time.time()
	while time.time() - start < timeout:
	try:
	s = socket.create_connection((host, int(port)), timeout=5)
	s.close()
	return True
	except:
	time.sleep(3)
	return False

	def search_offers() -> list[dict]:
	"""
	Search for available on-demand GPU instances with >= 80 GB VRAM.
	Returns a list of offers sorted by price (cheapest first).
	"""
	params = {
	"q": json.dumps({
	"gpu_ram": {"gte": MIN_VRAM_MB},
	"num_gpus": {"eq": NUM_GPUS},
	"gpu_frac": {"eq": 1.0}, # 🔥 critical
	"rentable": {"eq": True},
	"rented": {"eq": False},
	"dph_total": {"lte": MAX_PRICE_PER_HR},
	"type": "on-demand",
	"order": [["dph_total", "asc"]],
	"limit": 20,
	})
	}
	resp = requests.get(f"{BASE_URL}/bundles/", headers=headers(), params=params, timeout=30)
	resp.raise_for_status()
	offers = resp.json().get("offers", [])
	return offers


	def display_offers(offers: list[dict]) -> None:
	print(f"\n{'─'*72}")
	print(f" {'#':<4} {'ID':<12} {'GPU':<22} {'VRAM (GB)':<12} {'$/hr':<8} {'DLPerf':<8}")
	print(f"{'─'*72}")
	for i, o in enumerate(offers, 1):
	vram_gb = o.get("gpu_ram", 0) / 1024
	gpu_name = o.get("gpu_name", "unknown")
	price = o.get("dph_total", 0)
	dlperf = o.get("dlperf", 0)
	oid = o.get("id", "?")
	print(f" {i:<4} {oid:<12} {gpu_name:<22} {vram_gb:<12.0f} {price:<8.3f} {dlperf:<8.1f}")
	print(f"{'─'*72}\n")


	def create_instance(offer_id: int) -> dict:
	payload = {
	"client_id": "me",
	"image": DOCKER_IMAGE,
	"disk": DISK_GB,
	"onstart": ONSTART_SCRIPT,
	"runtype": "ssh_direct",
	"docker_options": "-p 8384:8384",
	"env": {
	"HOST": "0.0.0.0",
	"PORT": "8384"
	},
	"force": False,
	}

	resp = requests.put(
	f"{BASE_URL}/asks/{offer_id}/",
	headers=headers(),
	json=payload,
	timeout=30,
	)
	resp.raise_for_status()
	return resp.json()


	def get_instance_info(instance_id: int) -> dict:
	"""Fetch current status of a running instance."""
	resp = requests.get(
	f"{BASE_URL}/instances/{instance_id}/",
	headers=headers(),
	timeout=30,
	)
	resp.raise_for_status()
	data = resp.json()
	inner = data.get("instances")
	if isinstance(inner, dict):
	return inner
	if isinstance(inner, list) and inner:
	return inner[0]
	return data


	def wait_for_running(instance_id: int, timeout_sec: int = 600) -> dict:
	"""Poll until instance status == 'running' or timeout, printing live status."""
	print("⏳ Waiting for instance to start…")
	deadline = time.time() + timeout_sec
	last_status = None
	first_poll = True
	while time.time() < deadline:
	info = get_instance_info(instance_id)
	if first_poll:
	# dump raw keys on first poll so we can diagnose response shape issues
	print(f" [debug] instance keys: {list(info.keys())}")
	first_poll = False
	status = info.get("actual_status") or info.get("status") or "unknown"
	if status != last_status:
	ts = time.strftime("%H:%M:%S")
	print(f" [{ts}] status → {status}")
	last_status = status
	if status == "running":
	return info
	time.sleep(8)
	raise TimeoutError(f"Instance {instance_id} did not reach 'running' within {timeout_sec}s")


	def fetch_logs(instance_id: int) -> str:
	"""Fetch the current onstart/container log tail for an instance."""
	candidates = [
	f"{BASE_URL}/instances/request_logs/{instance_id}/",
	f"{BASE_URL}/instances/{instance_id}/logs/",
	]
	for url in candidates:
	try:
	resp = requests.get(url, headers=headers(), params={"tail": 200}, timeout=30)
	if resp.status_code == 404:
	continue
	resp.raise_for_status()
	data = resp.json()
	# vast.ai returns {"result": "..."} or {"logs_tail": "..."} or {"logs": "..."}
	text = data.get("result") or data.get("logs_tail") or data.get("logs") or ""
	if text:
	return text
	except requests.RequestException:
	continue
	try:
	info = get_instance_info(instance_id)
	msg = info.get("status_msg") or info.get("extra") or ""
	return msg
	except Exception:
	return ""


	def stream_onstart_logs(instance_id: int, timeout_sec: int = 900) -> None:
	"""
	Poll the instance log endpoint and print new lines as they arrive.
	Stops when the server startup line is detected or timeout is reached.
	"""
	print("\n📋 Streaming onstart script output (Ctrl-C to skip)…")
	print("─" * 72)
	seen_lines: set[str] = set()
	deadline = time.time() + timeout_sec
	try:
	while time.time() < deadline:
	raw = fetch_logs(instance_id)
	if raw:
	for line in raw.splitlines():
	if line not in seen_lines:
	seen_lines.add(line)
	ts = time.strftime("%H:%M:%S")
	print(f" [{ts}] {line}")
	if "deepseek server started" in line.lower() or "pid" in line.lower() and "started" in line.lower():
	print("─" * 72)
	print("✅ DeepSeek server started!")
	return
	time.sleep(10)
	except KeyboardInterrupt:
	print("\n (log streaming interrupted by user)")
	print("─" * 72)

	def wait_for_http(url, timeout=120):
	import requests, time
	start = time.time()
	while time.time() - start < timeout:
	try:
	r = requests.get(url, timeout=5)
	if r.status_code == 200:
	return True
	except Exception:
	pass
	time.sleep(3)
	return False

	def main():
	if API_KEY == "YOUR_VAST_API_KEY_HERE":
	sys.exit("❌ Please set your API_KEY at the top of the script.")

	print(f"🔍 Searching for offers with ≥{MIN_VRAM_MB // 1024} GB VRAM "
	f"at ≤ ${MAX_PRICE_PER_HR}/hr …")
	offers = search_offers()

	if not offers:
	sys.exit("❌ No matching offers found. Try raising MAX_PRICE_PER_HR or relaxing filters.")

	display_offers(offers)

	if len(offers) == 1:
	chosen = offers[0]
	print(f"✅ Auto-selecting the only match: ID {chosen['id']}")
	else:
	raw = input(f"Enter offer number to rent [1-{len(offers)}] (Enter = cheapest): ").strip()
	idx = (int(raw) - 1) if raw.isdigit() else 0
	chosen = offers[max(0, min(idx, len(offers) - 1))]

	offer_id = chosen["id"]
	gpu_name = chosen.get("gpu_name", "?")
	vram_gb = chosen.get("gpu_ram", 0) / 1024
	price = chosen.get("dph_total", 0)

	print(f"\n🖥️ Renting {gpu_name} ({vram_gb:.0f} GB VRAM) @ ${price:.3f}/hr [offer {offer_id}]")
	confirm = input("Confirm? [y/N]: ").strip().lower()
	if confirm != "y":
	sys.exit("Aborted.")

	# 3. Create instance
	print("🚀 Creating instance …")
	result = create_instance(offer_id)

	if not result.get("success"):
	sys.exit(f"❌ Failed to create instance:\n{json.dumps(result, indent=2)}")

	instance_id = result.get("new_contract")
	print(f"✅ Instance created → ID: {instance_id}")

	try:
	info = wait_for_running(instance_id)
	except TimeoutError as e:
	print(f"⚠️ {e}")
	print(" Check https://cloud.vast.ai/instances/ manually.")
	return

	stream_onstart_logs(instance_id)

	info = get_instance_info(instance_id)
	ssh_host = info.get("ssh_host", "")
	ssh_port = info.get("ssh_port", "")

	public_ip = info.get("public_ipaddr")

	port_info = info.get("ports", {})
	external_port = None

	if "8384/tcp" in port_info and port_info["8384/tcp"]:
	external_port = port_info["8384/tcp"][0].get("HostPort")

	if public_ip and external_port:
	llm_url = f"http://{public_ip}:{external_port}"
	else:
	llm_url = None

	if llm_url:
	print("⏳ Waiting for server to be ready...")

	if wait_for_http(llm_url):
	print("✅ Server is reachable")
	else:
	print("⚠️ Server not responding yet")

	print("⏳ Waiting for SSH...")

	if wait_for_ssh(ssh_host, ssh_port):
	print("✅ SSH is ready")
	else:
	print("⚠️ SSH not reachable yet")

	print("\n" + "═"*60)
	print(" 🎉 Instance is RUNNING!")
	print("═"*60)
	print(f" Instance ID : {instance_id}")
	print(f" GPU : {info.get('gpu_name')} ×{info.get('num_gpus', 1)}")
	print(f" VRAM : {info.get('gpu_ram', 0)/1024:.0f} GB")
	print(f" Price : ${info.get('dph_total', 0):.3f} / hr")
	print(f" SSH : ssh -p {ssh_port} root@{ssh_host}")
	print(f" LLM server : {llm_url}")
	print("═"*60)
	print("\n Run 'python vast_destroy.py' (or use the dashboard) to stop billing.\n")


	if __name__ == "__main__":
	main()
No results found