vicyap · February 8, 2022 17:07
diff --git a/ray operator log b/ray operator log
 ======== Autoscaler status: 2022-02-08 09:02:10.177916 ========
 Node status
 ---------------------------------------------------------------
 Healthy:
 1 head
 1 wkr-15cpu30g-ondemand
 Pending:
 (no pending nodes)
 Recent failures:
 (no failures)

 Resources
 ---------------------------------------------------------------
 Usage:
 0.0/15.0 CPU
 0.00/25.900 GiB memory
 0.00/10.270 GiB object_store_memory

 Demands:
 (no resource demands)
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,232	DEBUG gcs_utils.py:245 -- internal_kv_put b'__autoscaling_status_legacy' b"Cluster status: 1 nodes\n - MostDelayedHeartbeats: {'10.16.86.133': 0.5881059169769287, '10.16.102.85': 0.5880551338195801}\n - NodeIdleSeconds: Min=71278 Mean=71285 Max=71292\n - ResourceUsage: 0.0/15.0 CPU, 0.0 GiB/25.9 GiB memory, 0.0 GiB/10.27 GiB object_store_memory\n - TimeSinceLastHeartbeat: Min=0 Mean=0 Max=0\nWorker node types:\n - wkr-15cpu30g-ondemand: 1" True None
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,236	DEBUG legacy_info_string.py:26 -- Cluster status: 1 nodes
 - MostDelayedHeartbeats: {'10.16.86.133': 0.5881059169769287, '10.16.102.85': 0.5880551338195801}
 - NodeIdleSeconds: Min=71278 Mean=71285 Max=71292
 - ResourceUsage: 0.0/15.0 CPU, 0.0 GiB/25.9 GiB memory, 0.0 GiB/10.27 GiB object_store_memory
 - TimeSinceLastHeartbeat: Min=0 Mean=0 Max=0
 Worker node types:
 - wkr-15cpu30g-ondemand: 1
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,384	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-15cpu30g--ondemand-4pvk5 is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,454	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-15cpu30g--ondemand-4pvk5 is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,672	DEBUG resource_demand_scheduler.py:201 -- Cluster resources: [{'node:10.16.102.85': 1.0, 'object_store_memory': 2053491916.0, 'memory': 5261334937.0}, {'node:10.16.86.133': 1.0, 'memory': 22548578304.0, 'object_store_memory': 8973495091.0, 'CPU': 15.0}]
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,672	DEBUG resource_demand_scheduler.py:202 -- Node counts: defaultdict(<class 'int'>, {'head': 1, 'wkr-15cpu30g-ondemand': 1})
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,672	DEBUG resource_demand_scheduler.py:219 -- Placement group demands: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,672	DEBUG resource_demand_scheduler.py:283 -- Resource demands: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,672	DEBUG resource_demand_scheduler.py:284 -- Unfulfilled demands: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,672	DEBUG resource_demand_scheduler.py:292 -- Final unfulfilled: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,815	DEBUG resource_demand_scheduler.py:317 -- Node requests: {}
 ray-py38-cu112,karpenter:2022-02-08 09:02:10,876	DEBUG gcs_utils.py:245 -- internal_kv_put b'__autoscaling_status' b'{"load_metrics_report": {"usage": {"node:10.16.102.85": [0.0, 1.0], "object_store_memory": [0.0, 11026987007.0], "memory": [0.0, 27809913241.0], "CPU": [0.0, 15.0], "node:10.16.86.133": [0.0, 1.0]}, "resource_demand": [], "pg_demand": [], "request_demand": [], "node_types": [[{"object_store_memory": 2053491916.0, "memory": 5261334937.0, "node:10.16.102.85": 1.0}, 1], [{"object_store_memory": 8973495091.0, "node:10.16.86.133": 1.0, "memory": 22548578304.0, "CPU": 15.0}, 1]], "head_ip": null}, "time": 1644339729.5924182, "monitor_pid": 9165, "autoscaler_report": {"active_nodes": {"head": 1, "wkr-15cpu30g-ondemand": 1}, "pending_nodes": [], "pending_launches": {}, "failed_nodes": []}}' True None
 ray-py38-cu112,karpenter:2022-02-08 09:02:15,883	INFO monitor.py:521 -- Logging raw resource message pulled from GCS.
 ray-py38-cu112,karpenter:2022-02-08 09:02:15,883	INFO monitor.py:522 -- batch {
  node_id: "\033\270?-a\253\n\211X>`\223\\Y5\371g\325\240\023\254\343\335\022\305\360\251X"
  resources_available {
    key: "CPU"
    value: 8.0
  }
  resources_available {
    key: "memory"
    value: 22548578304.0
  }
  resources_available {
    key: "node:10.16.86.133"
    value: 1.0
  }
  resources_available {
    key: "object_store_memory"
    value: 8973495091.0
  }
  resources_available_changed: true
  resources_total {
    key: "CPU"
    value: 15.0
  }
  resources_total {
    key: "memory"
    value: 22548578304.0
  }
  resources_total {
    key: "node:10.16.86.133"
    value: 1.0
  }
  resources_total {
    key: "object_store_memory"
    value: 8973495091.0
  }
  resource_load {
    key: "CPU"
    value: 1.0
  }
  resource_load_by_shape {
    resource_demands {
      shape {
        key: "CPU"
        value: 1.0
      }
      num_ready_requests_queued: 1
      backlog_size: 193
    }
  }
  node_manager_address: "10.16.86.133"
 }
 batch {
  node_id: "\245\250\333\361\356\245\266\370\273\023\007\265I_\030\344_\354\215\354Rv\342epX\0039"
  resources_available {
    key: "memory"
    value: 5261334937.0
  }
  resources_available {
    key: "node:10.16.102.85"
    value: 1.0
  }
  resources_available {
    key: "object_store_memory"
    value: 2053489974.0
  }
  resources_available_changed: true
  resources_total {
    key: "memory"
    value: 5261334937.0
  }
  resources_total {
    key: "node:10.16.102.85"
    value: 1.0
  }
  resources_total {
    key: "object_store_memory"
    value: 2053491916.0
  }
  resource_load_by_shape {
  }
  node_manager_address: "10.16.102.85"
 }
 resource_load_by_shape {
  resource_demands {
    shape {
      key: "CPU"
      value: 1.0
    }
    num_ready_requests_queued: 1
    backlog_size: 193
  }
 }
 placement_group_load {
 }

 ray-py38-cu112,karpenter:2022-02-08 09:02:15,883	INFO monitor.py:523 -- Done logging raw resource message.
 ray-py38-cu112,karpenter:2022-02-08 09:02:15,883	DEBUG gcs_utils.py:228 -- internal_kv_get b'autoscaler_resource_request' None
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,464	INFO autoscaler.py:327 -- 
 ======== Autoscaler status: 2022-02-08 09:02:16.464651 ========
 Node status
 ---------------------------------------------------------------
 Healthy:
 1 head
 1 wkr-15cpu30g-ondemand
 Pending:
 (no pending nodes)
 Recent failures:
 (no failures)

 Resources
 ---------------------------------------------------------------
 Usage:
 7.0/15.0 CPU
 0.00/25.900 GiB memory
 0.00/10.270 GiB object_store_memory

 Demands:
 {'CPU': 1.0}: 194+ pending tasks/actors
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,527	DEBUG gcs_utils.py:245 -- internal_kv_put b'__autoscaling_status_legacy' b"Cluster status: 1 nodes\n - MostDelayedHeartbeats: {'10.16.86.133': 0.5811362266540527, '10.16.102.85': 0.5810155868530273}\n - NodeIdleSeconds: Min=0 Mean=0 Max=0\n - ResourceUsage: 7.0/15.0 CPU, 0.0 GiB/25.9 GiB memory, 0.0 GiB/10.27 GiB object_store_memory\n - TimeSinceLastHeartbeat: Min=0 Mean=0 Max=0\nWorker node types:\n - wkr-15cpu30g-ondemand: 1" True None
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,532	DEBUG legacy_info_string.py:26 -- Cluster status: 1 nodes
 - MostDelayedHeartbeats: {'10.16.86.133': 0.5811362266540527, '10.16.102.85': 0.5810155868530273}
 - NodeIdleSeconds: Min=0 Mean=0 Max=0
 - ResourceUsage: 7.0/15.0 CPU, 0.0 GiB/25.9 GiB memory, 0.0 GiB/10.27 GiB object_store_memory
 - TimeSinceLastHeartbeat: Min=0 Mean=0 Max=0
 Worker node types:
 - wkr-15cpu30g-ondemand: 1
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,645	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-15cpu30g--ondemand-4pvk5 is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,700	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-15cpu30g--ondemand-4pvk5 is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,883	DEBUG resource_demand_scheduler.py:201 -- Cluster resources: [{'memory': 5261334937.0, 'object_store_memory': 2053489974.0, 'node:10.16.102.85': 1.0}, {'node:10.16.86.133': 1.0, 'CPU': 8.0, 'memory': 22548578304.0, 'object_store_memory': 8973495091.0}]
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,884	DEBUG resource_demand_scheduler.py:202 -- Node counts: defaultdict(<class 'int'>, {'head': 1, 'wkr-15cpu30g-ondemand': 1})
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,884	DEBUG resource_demand_scheduler.py:219 -- Placement group demands: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,885	DEBUG resource_demand_scheduler.py:283 -- Resource demands: [{'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}]
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,885	DEBUG resource_demand_scheduler.py:284 -- Unfulfilled demands: [{'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}]
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,895	DEBUG resource_demand_scheduler.py:292 -- Final unfulfilled: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,961	DEBUG resource_demand_scheduler.py:317 -- Node requests: {'wkr-30cpu60g-spot': 6, 'wkr-7cpu14g-spot': 1}
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,962	INFO autoscaler.py:1216 -- StandardAutoscaler: Queue 6 new nodes for launch
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,965	INFO autoscaler.py:1216 -- StandardAutoscaler: Queue 1 new nodes for launch
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,965	INFO node_launcher.py:123 -- NodeLauncher0: Got 6 nodes to launch.
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,970	INFO node_launcher.py:123 -- NodeLauncher0: Launching 6 nodes, type wkr-30cpu60g-spot.
 ray-py38-cu112,karpenter:2022-02-08 09:02:16,970	INFO node_provider.py:145 -- KubernetesNodeProvider: calling create_namespaced_pod (count=6).
 ray-py38-cu112,karpenter:2022-02-08 09:02:17,049	INFO monitor.py:386 -- :event_summary:Adding 6 nodes of type wkr-30cpu60g-spot.
 ray-py38-cu112,karpenter:2022-02-08 09:02:17,049	INFO monitor.py:386 -- :event_summary:Adding 1 nodes of type wkr-7cpu14g-spot.
 ray-py38-cu112,karpenter:2022-02-08 09:02:17,050	DEBUG gcs_utils.py:245 -- internal_kv_put b'__autoscaling_status' b'{"load_metrics_report": {"usage": {"memory": [0.0, 27809913241.0], "node:10.16.102.85": [0.0, 1.0], "object_store_memory": [1942.0, 11026987007.0], "CPU": [7.0, 15.0], "node:10.16.86.133": [0.0, 1.0]}, "resource_demand": [[{"CPU": 1.0}, 194]], "pg_demand": [], "request_demand": [], "node_types": [[{"object_store_memory": 2053491916.0, "memory": 5261334937.0, "node:10.16.102.85": 1.0}, 1], [{"CPU": 15.0, "node:10.16.86.133": 1.0, "object_store_memory": 8973495091.0, "memory": 22548578304.0}, 1]], "head_ip": null}, "time": 1644339735.8861976, "monitor_pid": 9165, "autoscaler_report": {"active_nodes": {"head": 1, "wkr-15cpu30g-ondemand": 1}, "pending_nodes": [], "pending_launches": {"wkr-30cpu60g-spot": 6, "wkr-7cpu14g-spot": 1}, "failed_nodes": []}}' True None
 ray-py38-cu112,karpenter:2022-02-08 09:02:17,234	INFO node_launcher.py:123 -- NodeLauncher0: Got 1 nodes to launch.
 ray-py38-cu112,karpenter:2022-02-08 09:02:17,234	INFO node_launcher.py:123 -- NodeLauncher0: Launching 1 nodes, type wkr-7cpu14g-spot.
 ray-py38-cu112,karpenter:2022-02-08 09:02:17,235	INFO node_provider.py:145 -- KubernetesNodeProvider: calling create_namespaced_pod (count=1).
 ray-py38-cu112,karpenter:2022-02-08 09:02:22,067	INFO monitor.py:521 -- Logging raw resource message pulled from GCS.
 ray-py38-cu112,karpenter:2022-02-08 09:02:22,067	INFO monitor.py:522 -- batch {
  node_id: "\033\270?-a\253\n\211X>`\223\\Y5\371g\325\240\023\254\343\335\022\305\360\251X"
  resources_available {
    key: "memory"
    value: 22548578304.0
  }
  resources_available {
    key: "node:10.16.86.133"
    value: 1.0
  }
  resources_available {
    key: "object_store_memory"
    value: 8973495091.0
  }
  resources_available_changed: true
  resources_total {
    key: "CPU"
    value: 15.0
  }
  resources_total {
    key: "memory"
    value: 22548578304.0
  }
  resources_total {
    key: "node:10.16.86.133"
    value: 1.0
  }
  resources_total {
    key: "object_store_memory"
    value: 8973495091.0
  }
  resource_load {
    key: "CPU"
    value: 1.0
  }
  resource_load_by_shape {
    resource_demands {
      shape {
        key: "CPU"
        value: 1.0
      }
      num_ready_requests_queued: 1
      backlog_size: 184
    }
  }
  node_manager_address: "10.16.86.133"
 }
 batch {
  node_id: "\245\250\333\361\356\245\266\370\273\023\007\265I_\030\344_\354\215\354Rv\342epX\0039"
  resources_available {
    key: "memory"
    value: 5261334937.0
  }
  resources_available {
    key: "node:10.16.102.85"
    value: 1.0
  }
  resources_available {
    key: "object_store_memory"
    value: 2053489974.0
  }
  resources_available_changed: true
  resources_total {
    key: "memory"
    value: 5261334937.0
  }
  resources_total {
    key: "node:10.16.102.85"
    value: 1.0
  }
  resources_total {
    key: "object_store_memory"
    value: 2053491916.0
  }
  resource_load_by_shape {
  }
  node_manager_address: "10.16.102.85"
 }
 resource_load_by_shape {
  resource_demands {
    shape {
      key: "CPU"
      value: 1.0
    }
    num_ready_requests_queued: 1
    backlog_size: 184
  }
 }
 placement_group_load {
 }

 ray-py38-cu112,karpenter:2022-02-08 09:02:22,067	INFO monitor.py:523 -- Done logging raw resource message.
 ray-py38-cu112,karpenter:2022-02-08 09:02:22,068	DEBUG gcs_utils.py:228 -- internal_kv_get b'autoscaler_resource_request' None
 ray-py38-cu112,karpenter:2022-02-08 09:02:23,164	INFO autoscaler.py:327 -- 
 ======== Autoscaler status: 2022-02-08 09:02:23.164075 ========
 Node status
 ---------------------------------------------------------------
 Healthy:
 1 head
 1 wkr-15cpu30g-ondemand
 Pending:
 None: wkr-30cpu60g-spot, uninitialized
 None: wkr-30cpu60g-spot, uninitialized
 None: wkr-30cpu60g-spot, uninitialized
 None: wkr-30cpu60g-spot, uninitialized
 None: wkr-30cpu60g-spot, uninitialized
 None: wkr-30cpu60g-spot, uninitialized
 None: wkr-7cpu14g-spot, uninitialized
 Recent failures:
 (no failures)

 Resources
 ---------------------------------------------------------------
 Usage:
 15.0/15.0 CPU
 0.00/25.900 GiB memory
 0.00/10.270 GiB object_store_memory

 Demands:
 {'CPU': 1.0}: 185+ pending tasks/actors
 ray-py38-cu112,karpenter:2022-02-08 09:02:23,421	DEBUG gcs_utils.py:245 -- internal_kv_put b'__autoscaling_status_legacy' b"Cluster status: 8 nodes\n - MostDelayedHeartbeats: {'10.16.86.133': 1.0961933135986328, '10.16.102.85': 1.0961103439331055}\n - NodeIdleSeconds: Min=1 Mean=1 Max=1\n - ResourceUsage: 15.0/15.0 CPU, 0.0 GiB/25.9 GiB memory, 0.0 GiB/10.27 GiB object_store_memory\n - TimeSinceLastHeartbeat: Min=1 Mean=1 Max=1\nWorker node types:\n - wkr-15cpu30g-ondemand: 1\n - wkr-30cpu60g-spot: 6\n - wkr-7cpu14g-spot: 1" True None
 ray-py38-cu112,karpenter:2022-02-08 09:02:23,422	DEBUG legacy_info_string.py:26 -- Cluster status: 8 nodes
 - MostDelayedHeartbeats: {'10.16.86.133': 1.0961933135986328, '10.16.102.85': 1.0961103439331055}
 - NodeIdleSeconds: Min=1 Mean=1 Max=1
 - ResourceUsage: 15.0/15.0 CPU, 0.0 GiB/25.9 GiB memory, 0.0 GiB/10.27 GiB object_store_memory
 - TimeSinceLastHeartbeat: Min=1 Mean=1 Max=1
 Worker node types:
 - wkr-15cpu30g-ondemand: 1
 - wkr-30cpu60g-spot: 6
 - wkr-7cpu14g-spot: 1
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,048	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-15cpu30g--ondemand-4pvk5 is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,088	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-30cpu60g--spot-4k69f is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,125	DEBUG autoscaler.py:606 -- ray-py38-cu112-wkr-30cpu60g--spot-4k69f: Starting new thread runner.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,137	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-30cpu60g--spot-qccxm is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,173	DEBUG autoscaler.py:606 -- ray-py38-cu112-wkr-30cpu60g--spot-qccxm: Starting new thread runner.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,186	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-30cpu60g--spot-rdtm6 is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,227	DEBUG autoscaler.py:606 -- ray-py38-cu112-wkr-30cpu60g--spot-rdtm6: Starting new thread runner.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,241	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-30cpu60g--spot-twg8x is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,282	DEBUG autoscaler.py:606 -- ray-py38-cu112-wkr-30cpu60g--spot-twg8x: Starting new thread runner.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,298	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-30cpu60g--spot-wj5j4 is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,335	DEBUG autoscaler.py:606 -- ray-py38-cu112-wkr-30cpu60g--spot-wj5j4: Starting new thread runner.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,348	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-30cpu60g--spot-xdx6d is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,384	DEBUG autoscaler.py:606 -- ray-py38-cu112-wkr-30cpu60g--spot-xdx6d: Starting new thread runner.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,400	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-7cpu14g--spot-5fbjz is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,442	DEBUG autoscaler.py:606 -- ray-py38-cu112-wkr-7cpu14g--spot-5fbjz: Starting new thread runner.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,443	INFO autoscaler.py:1165 -- Creating new (spawn_updater) updater thread for node ray-py38-cu112-wkr-30cpu60g--spot-4k69f.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,445	INFO autoscaler.py:1165 -- Creating new (spawn_updater) updater thread for node ray-py38-cu112-wkr-30cpu60g--spot-qccxm.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,446	INFO autoscaler.py:1165 -- Creating new (spawn_updater) updater thread for node ray-py38-cu112-wkr-30cpu60g--spot-rdtm6.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,447	INFO autoscaler.py:1165 -- Creating new (spawn_updater) updater thread for node ray-py38-cu112-wkr-30cpu60g--spot-twg8x.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,449	INFO autoscaler.py:1165 -- Creating new (spawn_updater) updater thread for node ray-py38-cu112-wkr-30cpu60g--spot-wj5j4.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,452	INFO autoscaler.py:1165 -- Creating new (spawn_updater) updater thread for node ray-py38-cu112-wkr-30cpu60g--spot-xdx6d.
 ray-py38-cu112,karpenter:2022-02-08 09:02:24,475	INFO autoscaler.py:1165 -- Creating new (spawn_updater) updater thread for node ray-py38-cu112-wkr-7cpu14g--spot-5fbjz.
 ray-py38-cu112,karpenter:2022-02-08 09:02:25,410	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-qccxm: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-qccxm -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 ray-py38-cu112,karpenter:2022-02-08 09:02:25,559	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-7cpu14g--spot-5fbjz: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-7cpu14g--spot-5fbjz -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 ray-py38-cu112,karpenter:2022-02-08 09:02:25,716	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-4k69f: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-4k69f -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 ray-py38-cu112,karpenter:2022-02-08 09:02:25,796	DEBUG autoscaler.py:1210 -- ray-py38-cu112-wkr-15cpu30g--ondemand-4pvk5 is not being updated and passes config check (can_update=True).
 ray-py38-cu112,karpenter:2022-02-08 09:02:26,298	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-wj5j4: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-wj5j4 -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 Unable to use a TTY - input is not a terminal or the right kind of file
 ray-py38-cu112,karpenter:2022-02-08 09:02:26,417	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-xdx6d: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-xdx6d -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 Error from server: no preferred addresses found; known addresses: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:26,509	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-twg8x: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-twg8x -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 ray-py38-cu112,karpenter:2022-02-08 09:02:26,518	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-rdtm6: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-rdtm6 -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 Unable to use a TTY - input is not a terminal or the right kind of file
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 Error from server: no preferred addresses found; known addresses: []
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:27,727	DEBUG resource_demand_scheduler.py:201 -- Cluster resources: [{'memory': 5261334937.0, 'node:10.16.102.85': 1.0, 'object_store_memory': 2053489974.0}, {'memory': 22548578304.0, 'node:10.16.86.133': 1.0, 'object_store_memory': 8973495091.0, 'CPU': 0.0}, {'CPU': 30, 'GPU': 0, 'memory': 45097156608}, {'CPU': 30, 'GPU': 0, 'memory': 45097156608}, {'CPU': 30, 'GPU': 0, 'memory': 45097156608}, {'CPU': 30, 'GPU': 0, 'memory': 45097156608}, {'CPU': 30, 'GPU': 0, 'memory': 45097156608}, {'CPU': 30, 'GPU': 0, 'memory': 45097156608}, {'CPU': 7, 'GPU': 0, 'memory': 10522669875}]
 ray-py38-cu112,karpenter:2022-02-08 09:02:27,727	DEBUG resource_demand_scheduler.py:202 -- Node counts: defaultdict(<class 'int'>, {'head': 1, 'wkr-15cpu30g-ondemand': 1, 'wkr-30cpu60g-spot': 6, 'wkr-7cpu14g-spot': 1})
 ray-py38-cu112,karpenter:2022-02-08 09:02:27,728	DEBUG resource_demand_scheduler.py:219 -- Placement group demands: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:27,730	DEBUG resource_demand_scheduler.py:283 -- Resource demands: [{'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}, {'CPU': 1.0}]
 ray-py38-cu112,karpenter:2022-02-08 09:02:27,730	DEBUG resource_demand_scheduler.py:284 -- Unfulfilled demands: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:27,730	DEBUG resource_demand_scheduler.py:292 -- Final unfulfilled: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:28,016	DEBUG resource_demand_scheduler.py:317 -- Node requests: {}
 ray-py38-cu112,karpenter:2022-02-08 09:02:28,244	DEBUG gcs_utils.py:245 -- internal_kv_put b'__autoscaling_status' b'{"load_metrics_report": {"usage": {"memory": [0.0, 27809913241.0], "object_store_memory": [1942.0, 11026987007.0], "node:10.16.102.85": [0.0, 1.0], "CPU": [15.0, 15.0], "node:10.16.86.133": [0.0, 1.0]}, "resource_demand": [[{"CPU": 1.0}, 185]], "pg_demand": [], "request_demand": [], "node_types": [[{"object_store_memory": 2053491916.0, "memory": 5261334937.0, "node:10.16.102.85": 1.0}, 1], [{"object_store_memory": 8973495091.0, "node:10.16.86.133": 1.0, "memory": 22548578304.0, "CPU": 15.0}, 1]], "head_ip": null}, "time": 1644339742.0694299, "monitor_pid": 9165, "autoscaler_report": {"active_nodes": {"head": 1, "wkr-15cpu30g-ondemand": 1}, "pending_nodes": [[null, "wkr-30cpu60g-spot", "waiting-for-ssh"], [null, "wkr-30cpu60g-spot", "waiting-for-ssh"], [null, "wkr-30cpu60g-spot", "waiting-for-ssh"], [null, "wkr-30cpu60g-spot", "waiting-for-ssh"], [null, "wkr-30cpu60g-spot", "waiting-for-ssh"], [null, "wkr-30cpu60g-spot", "waiting-for-ssh"], [null, "wkr-7cpu14g-spot", "waiting-for-ssh"]], "pending_launches": {}, "failed_nodes": []}}' True None
 ray-py38-cu112,karpenter:2022-02-08 09:02:31,524	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-qccxm: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-qccxm -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:31,844	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-7cpu14g--spot-5fbjz: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-7cpu14g--spot-5fbjz -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 ray-py38-cu112,karpenter:2022-02-08 09:02:31,935	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-wj5j4: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-wj5j4 -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:32,026	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-4k69f: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-4k69f -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 ray-py38-cu112,karpenter:2022-02-08 09:02:32,098	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-xdx6d: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-xdx6d -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:32,320	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-twg8x: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-twg8x -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 ray-py38-cu112,karpenter:2022-02-08 09:02:32,322	INFO command_runner.py:179 -- NodeUpdater: ray-py38-cu112-wkr-30cpu60g--spot-rdtm6: Running kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-rdtm6 -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []

 2022-02-07 13:13:28,763	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-7cpu14g--spot-gf2hj -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-07 13:13:33,927	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-7cpu14g--spot-gf2hj -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-07 13:13:39,740	SUCC updater.py:279 -- [32mSuccess.[39m
 2022-02-07 13:13:39,740	INFO log_timer.py:30 -- NodeUpdater: ray-py38-cu112-wkr-7cpu14g--spot-gf2hj: Got remote shell  [LogTimer=354278ms]
 2022-02-07 13:13:39,761	INFO updater.py:374 -- Updating cluster configuration.[0m[2m [hash=0583f296a9e95cee648f39db3bf2330c60a73c5f][22m[0m
 2022-02-07 13:13:39,827	INFO updater.py:380 -- [37mNew status[39m: [1msyncing-files[22m
 2022-02-07 13:13:39,827	INFO updater.py:238 -- [2m[2/7][22m [36mProcessing file mounts[39m
 2022-02-07 13:13:39,827	INFO updater.py:256 -- [2m[3/7][22m No worker file mounts to sync
 2022-02-07 13:13:39,876	INFO updater.py:391 -- [37mNew status[39m: [1msetting-up[22m
 2022-02-07 13:13:39,876	INFO updater.py:434 -- [2m[4/7][22m No initialization commands to run.
 2022-02-07 13:13:39,876	INFO updater.py:439 -- [2m[5/7][22m [36mInitalizing command runner[39m
 2022-02-07 13:13:39,877	INFO updater.py:485 -- [2m[6/7][22m No setup commands to run.
 2022-02-07 13:13:39,877	INFO updater.py:489 -- [2m[7/7][22m [36mStarting the Ray runtime[39m
 2022-02-07 13:13:44,786	INFO log_timer.py:30 -- NodeUpdater: ray-py38-cu112-wkr-7cpu14g--spot-gf2hj: Ray start commands succeeded [LogTimer=4909ms]
 2022-02-07 13:13:44,786	INFO log_timer.py:30 -- NodeUpdater: ray-py38-cu112-wkr-7cpu14g--spot-gf2hj: Applied config 0583f296a9e95cee648f39db3bf2330c60a73c5f  [LogTimer=359371ms]
 2022-02-07 13:13:44,934	INFO updater.py:187 -- [37mNew status[39m: [1mup-to-date[22m
 2022-02-08 09:02:25,227	INFO updater.py:323 -- [37mNew status[39m: [1mwaiting-for-ssh[22m
 2022-02-08 09:02:25,227	INFO updater.py:261 -- [2m[1/7][22m [36mWaiting for SSH to become available[39m
 2022-02-08 09:02:25,227	INFO updater.py:265 -- Running `[1muptime[22m[26m` as a test.
 2022-02-08 09:02:25,367	INFO updater.py:323 -- [37mNew status[39m: [1mwaiting-for-ssh[22m
 2022-02-08 09:02:25,368	INFO updater.py:261 -- [2m[1/7][22m [36mWaiting for SSH to become available[39m
 2022-02-08 09:02:25,368	INFO updater.py:265 -- Running `[1muptime[22m[26m` as a test.
 2022-02-08 09:02:25,446	INFO updater.py:323 -- [37mNew status[39m: [1mwaiting-for-ssh[22m
 2022-02-08 09:02:25,446	INFO updater.py:261 -- [2m[1/7][22m [36mWaiting for SSH to become available[39m
 2022-02-08 09:02:25,446	INFO updater.py:265 -- Running `[1muptime[22m[26m` as a test.
 2022-02-08 09:02:26,007	INFO updater.py:323 -- [37mNew status[39m: [1mwaiting-for-ssh[22m
 2022-02-08 09:02:26,007	INFO updater.py:261 -- [2m[1/7][22m [36mWaiting for SSH to become available[39m
 2022-02-08 09:02:26,007	INFO updater.py:265 -- Running `[1muptime[22m[26m` as a test.
 2022-02-08 09:02:26,016	INFO updater.py:323 -- [37mNew status[39m: [1mwaiting-for-ssh[22m
 2022-02-08 09:02:26,016	INFO updater.py:261 -- [2m[1/7][22m [36mWaiting for SSH to become available[39m
 2022-02-08 09:02:26,016	INFO updater.py:265 -- Running `[1muptime[22m[26m` as a test.
 2022-02-08 09:02:26,115	INFO updater.py:323 -- [37mNew status[39m: [1mwaiting-for-ssh[22m
 2022-02-08 09:02:26,116	INFO updater.py:261 -- [2m[1/7][22m [36mWaiting for SSH to become available[39m
 2022-02-08 09:02:26,116	INFO updater.py:265 -- Running `[1muptime[22m[26m` as a test.
 2022-02-08 09:02:26,201	INFO updater.py:323 -- [37mNew status[39m: [1mwaiting-for-ssh[22m
 2022-02-08 09:02:26,202	INFO updater.py:261 -- [2m[1/7][22m [36mWaiting for SSH to become available[39m
 2022-02-08 09:02:26,203	INFO updater.py:265 -- Running `[1muptime[22m[26m` as a test.
 2022-02-08 09:02:26,501	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-qccxm -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:26,813	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-7cpu14g--spot-5fbjz -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:26,908	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-wj5j4 -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:27,000	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-4k69f -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:27,013	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-xdx6d -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:27,100	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-rdtm6 -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:27,141	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-twg8x -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:31,657	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-qccxm -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:32,006	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-7cpu14g--spot-5fbjz -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:32,298	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-wj5j4 -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:32,517	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-4k69f -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 2022-02-08 09:02:32,615	INFO updater.py:314 -- SSH still not available [2m(Exit Status 1): kubectl -n karpenter exec -it ray-py38-cu112-wkr-30cpu60g--spot-xdx6d -- bash --login -c -i 'true && source ~/.bashrc && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore && (uptime)'[22m[26m, retrying in [1m5[22m[26m seconds.
 Unable to use a TTY - input is not a terminal or the right kind of file
 Error from server: no preferred addresses found; known addresses: []
 ray-py38-cu112,karpenter:2022-02-08 09:02:33,252	INFO monitor.py:521 -- Logging raw resource message pulled from GCS.
 ray-py38-cu112,karpenter:2022-02-08 09:02:33,252	INFO monitor.py:522 -- batch {
  node_id: "\033\270?-a\253\n\211X>`\223\\Y5\371g\325\240\023\254\343\335\022\305\360\251X"
  resources_available {
    key: "memory"
    value: 22548578304.0
  }
  resources_available {
    key: "node:10.16.86.133"
    value: 1.0
  }
  resources_available {
    key: "object_store_memory"
    value: 8973495091.0
  }
  resources_available_changed: true
  resources_total {
    key: "CPU"
    value: 15.0
  }
  resources_total {
    key: "memory"
    value: 22548578304.0
  }
  resources_total {
    key: "node:10.16.86.133"
    value: 1.0
  }
  resources_total {
    key: "object_store_memory"
    value: 8973495091.0
  }
  resource_load {
    key: "CPU"
    value: 1.0
  }
  resource_load_by_shape {
    resource_demands {
      shape {
        key: "CPU"
        value: 1.0
      }
      num_ready_requests_queued: 1
      backlog_size: 184
    }
  }
  node_manager_address: "10.16.86.133"
 }
 batch {
  node_id: "\245\250\333\361\356\245\266\370\273\023\007\265I_\030\344_\354\215\354Rv\342epX\0039"
  resources_available {
    key: "memory"
    value: 5261334937.0
  }
  resources_available {
    key: "node:10.16.102.85"
    value: 1.0
  }
  resources_available {
    key: "object_store_memory"
    value: 2053489974.0
  }
  resources_available_changed: true
  resources_total {
    key: "memory"
    value: 5261334937.0
  }
  resources_total {
    key: "node:10.16.102.85"
    value: 1.0
  }
  resources_total {
    key: "object_store_memory"
    value: 2053491916.0
  }
  resource_load_by_shape {
  }
  node_manager_address: "10.16.102.85"
 }
 resource_load_by_shape {
  resource_demands {
    shape {
      key: "CPU"
      value: 1.0
    }
    num_ready_requests_queued: 1
    backlog_size: 184
  }
 }
 placement_group_load {
 }

 ray-py38-cu112,karpenter:2022-02-08 09:02:33,252	INFO monitor.py:523 -- Done logging raw resource message.
 ray-py38-cu112,karpenter:2022-02-08 09:02:33,253	DEBUG gcs_utils.py:228 -- internal_kv_get b'autoscaler_resource_request' None
 ray-py38-cu112,karpenter:2022-02-08 09:02:34,311	INFO autoscaler.py:327 --