Created
December 27, 2022 20:40
-
-
Save bveeramani/f93d5f9cc10106055e4c59d112f49125 to your computer and use it in GitHub Desktop.
linux://python/ray/train:test_base_trainer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=================================== FAILURES =================================== | |
__________________________ test_reserved_cpu_warnings __________________________ | |
ray_start_4_cpus = RayContext(dashboard_url='127.0.0.1:8265', python_version='3.7.9', ray_version='3.0.0.dev0', ray_commit='{{RAY_COMMIT_...3:62813', 'dashboard_agent_listen_port': 52365, 'node_id': 'b30884dd0998047db0283b31753c2e3ceded2c0be637b6ce93a1d93d'}) | |
mock_tuner_internal_logger = <python.ray.train.tests.test_base_trainer.mock_tuner_internal_logger.<locals>.MockLogger object at 0x7f9140bab590> | |
def test_reserved_cpu_warnings(ray_start_4_cpus, mock_tuner_internal_logger): | |
def train_loop(config): | |
pass | |
# Fraction correctly specified. | |
trainer = DummyTrainer( | |
train_loop, | |
scaling_config=ScalingConfig(num_workers=1, _max_cpu_fraction_per_node=0.9), | |
datasets={"train": ray.data.range(10)}, | |
) | |
trainer.fit() | |
assert not mock_tuner_internal_logger.warnings | |
# No datasets, no fraction. | |
trainer = DummyTrainer( | |
train_loop, | |
scaling_config=ScalingConfig(num_workers=1), | |
) | |
trainer.fit() | |
assert not mock_tuner_internal_logger.warnings | |
# Should warn. | |
trainer = DummyTrainer( | |
train_loop, | |
scaling_config=ScalingConfig(num_workers=3), | |
datasets={"train": ray.data.range(10)}, | |
) | |
trainer.fit() | |
assert ( | |
len(mock_tuner_internal_logger.warnings) == 1 | |
), mock_tuner_internal_logger.warnings | |
assert "_max_cpu_fraction_per_node" in mock_tuner_internal_logger.warnings[0] | |
mock_tuner_internal_logger.clear() | |
# Warn if num_samples is configured | |
trainer = DummyTrainer( | |
train_loop, | |
scaling_config=ScalingConfig(num_workers=1), | |
datasets={"train": ray.data.range(10)}, | |
) | |
tuner = tune.Tuner(trainer, tune_config=tune.TuneConfig(num_samples=3)) | |
tuner.fit() | |
assert ( | |
len(mock_tuner_internal_logger.warnings) == 1 | |
), mock_tuner_internal_logger.warnings | |
assert "_max_cpu_fraction_per_node" in mock_tuner_internal_logger.warnings[0] | |
mock_tuner_internal_logger.clear() | |
# Don't warn if resources * samples < 0.8 | |
trainer = DummyTrainer( | |
train_loop, | |
scaling_config=ScalingConfig(num_workers=1, trainer_resources={"CPU": 0}), | |
datasets={"train": ray.data.range(10)}, | |
) | |
tuner = tune.Tuner(trainer, tune_config=tune.TuneConfig(num_samples=3)) | |
tuner.fit() | |
> assert not mock_tuner_internal_logger.warnings | |
E AssertionError: assert not ['Executing `.fit()` may leave less than 20% of CPUs in this cluster for Dataset execution, which can lead to resource contention or hangs. To avoid this, reserve at least 20% of node CPUs for Dataset execution by setting `_max_cpu_fraction_per_node = 0.8` in the Trainer scaling_config. See https://docs.ray.io/en/master/data/dataset-internals.html#datasets-and-tune for more info.'] | |
E + where ['Executing `.fit()` may leave less than 20% of CPUs in this cluster for Dataset execution, which can lead to resource contention or hangs. To avoid this, reserve at least 20% of node CPUs for Dataset execution by setting `_max_cpu_fraction_per_node = 0.8` in the Trainer scaling_config. See https://docs.ray.io/en/master/data/dataset-internals.html#datasets-and-tune for more info.'] = <python.ray.train.tests.test_base_trainer.mock_tuner_internal_logger.<locals>.MockLogger object at 0x7f9140bab590>.warnings | |
python/ray/train/tests/test_base_trainer.py:294: AssertionError | |
---------------------------- Captured stderr setup ----------------------------- | |
2022-12-27 17:42:49,447 WARNING services.py:1790 -- WARNING: The object store is using /tmp instead of /dev/shm because /dev/shm has only 2684354560 bytes available. This will harm performance! You may be able to free up space by deleting files in /dev/shm. If you are inside a Docker container, you can increase /dev/shm size by passing '--shm-size=9.59gb' to 'docker run' (or add it to the run_options list in a Ray cluster config). Make sure to set this to more than 30% of available RAM. | |
2022-12-27 17:42:49,582 INFO worker.py:1542 -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8265 | |
----------------------------- Captured stdout call ----------------------------- | |
== Status == | |
Current time: 2022-12-27 17:42:53 (running for 00:00:02.17) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 2.0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-42-50 | |
Number of trials: 1/1 (1 RUNNING) | |
+--------------------------+----------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+----------+------------------| | |
| DummyTrainer_e2623_00000 | RUNNING | 172.16.16.3:2470 | | |
+--------------------------+----------+------------------+ | |
Trial DummyTrainer_e2623_00000 completed. Last result: | |
== Status == | |
Current time: 2022-12-27 17:42:53 (running for 00:00:02.17) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-42-50 | |
Number of trials: 1/1 (1 TERMINATED) | |
+--------------------------+------------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+------------+------------------| | |
| DummyTrainer_e2623_00000 | TERMINATED | 172.16.16.3:2470 | | |
+--------------------------+------------+------------------+ | |
== Status == | |
Current time: 2022-12-27 17:42:55 (running for 00:00:02.07) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 2.0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-42-53 | |
Number of trials: 1/1 (1 RUNNING) | |
+--------------------------+----------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+----------+------------------| | |
| DummyTrainer_e3aee_00000 | RUNNING | 172.16.16.3:2579 | | |
+--------------------------+----------+------------------+ | |
Trial DummyTrainer_e3aee_00000 completed. Last result: | |
== Status == | |
Current time: 2022-12-27 17:42:55 (running for 00:00:02.07) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-42-53 | |
Number of trials: 1/1 (1 TERMINATED) | |
+--------------------------+------------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+------------+------------------| | |
| DummyTrainer_e3aee_00000 | TERMINATED | 172.16.16.3:2579 | | |
+--------------------------+------------+------------------+ | |
== Status == | |
Current time: 2022-12-27 17:42:57 (running for 00:00:02.14) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 4.0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-42-55 | |
Number of trials: 1/1 (1 RUNNING) | |
+--------------------------+----------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+----------+------------------| | |
| DummyTrainer_e4eda_00000 | RUNNING | 172.16.16.3:2649 | | |
+--------------------------+----------+------------------+ | |
Trial DummyTrainer_e4eda_00000 completed. Last result: | |
== Status == | |
Current time: 2022-12-27 17:42:57 (running for 00:00:02.14) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-42-55 | |
Number of trials: 1/1 (1 TERMINATED) | |
+--------------------------+------------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+------------+------------------| | |
| DummyTrainer_e4eda_00000 | TERMINATED | 172.16.16.3:2649 | | |
+--------------------------+------------+------------------+ | |
== Status == | |
Current time: 2022-12-27 17:42:59 (running for 00:00:02.04) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 2.0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-42-57 | |
Number of trials: 3/3 (2 PENDING, 1 RUNNING) | |
+--------------------------+----------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+----------+------------------| | |
| DummyTrainer_e6376_00000 | RUNNING | 172.16.16.3:2711 | | |
| DummyTrainer_e6376_00001 | PENDING | | | |
| DummyTrainer_e6376_00002 | PENDING | | | |
+--------------------------+----------+------------------+ | |
Trial DummyTrainer_e6376_00000 completed. Last result: | |
Trial DummyTrainer_e6376_00001 completed. Last result: | |
Trial DummyTrainer_e6376_00002 completed. Last result: | |
== Status == | |
Current time: 2022-12-27 17:43:01 (running for 00:00:04.32) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-42-57 | |
Number of trials: 3/3 (3 TERMINATED) | |
+--------------------------+------------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+------------+------------------| | |
| DummyTrainer_e6376_00000 | TERMINATED | 172.16.16.3:2711 | | |
| DummyTrainer_e6376_00001 | TERMINATED | 172.16.16.3:2772 | | |
| DummyTrainer_e6376_00002 | TERMINATED | 172.16.16.3:2785 | | |
+--------------------------+------------+------------------+ | |
== Status == | |
Current time: 2022-12-27 17:43:04 (running for 00:00:02.93) | |
Memory usage on this node: 3.2/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 1.0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-43-01 | |
Number of trials: 3/3 (2 PENDING, 1 RUNNING) | |
+--------------------------+----------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+----------+------------------| | |
| DummyTrainer_e8da8_00000 | RUNNING | 172.16.16.3:3088 | | |
| DummyTrainer_e8da8_00001 | PENDING | | | |
| DummyTrainer_e8da8_00002 | PENDING | | | |
+--------------------------+----------+------------------+ | |
Trial DummyTrainer_e8da8_00000 completed. Last result: | |
Trial DummyTrainer_e8da8_00002 completed. Last result: | |
Trial DummyTrainer_e8da8_00001 completed. Last result: | |
== Status == | |
Current time: 2022-12-27 17:43:06 (running for 00:00:05.03) | |
Memory usage on this node: 3.1/30.9 GiB | |
Using FIFO scheduling algorithm. | |
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/17.44 GiB heap, 0.0/8.72 GiB objects | |
Result logdir: /root/.cache/bazel/_bazel_root/5fe90af4e7d1ed9fcf52f59e39e126f5/execroot/com_github_ray_project_ray/_tmp/0e61e6409beffe78495221763c6e2f16/DummyTrainer_2022-12-27_17-43-01 | |
Number of trials: 3/3 (3 TERMINATED) | |
+--------------------------+------------+------------------+ | |
| Trial name | status | loc | | |
|--------------------------+------------+------------------| | |
| DummyTrainer_e8da8_00000 | TERMINATED | 172.16.16.3:3088 | | |
| DummyTrainer_e8da8_00001 | TERMINATED | 172.16.16.3:3168 | | |
| DummyTrainer_e8da8_00002 | TERMINATED | 172.16.16.3:3171 | | |
+--------------------------+------------+------------------+ | |
----------------------------- Captured stderr call ----------------------------- | |
2022-12-27 17:42:53,088 INFO tune.py:775 -- Total run time: 2.18 seconds (2.17 seconds for the tuning loop). | |
2022-12-27 17:42:55,167 INFO tune.py:775 -- Total run time: 2.08 seconds (2.07 seconds for the tuning loop). | |
2022-12-27 17:42:57,329 INFO tune.py:775 -- Total run time: 2.15 seconds (2.14 seconds for the tuning loop). | |
2022-12-27 17:43:01,663 INFO tune.py:775 -- Total run time: 4.32 seconds (4.31 seconds for the tuning loop). | |
2022-12-27 17:43:06,799 INFO tune.py:775 -- Total run time: 5.03 seconds (5.02 seconds for the tuning loop). | |
=============================== warnings summary =============================== | |
python/ray/train/tests/test_base_trainer.py::test_trainer_fit | |
/opt/miniconda/lib/python3.7/site-packages/botocore/httpsession.py:17: DeprecationWarning: 'urllib3.contrib.pyopenssl' module is deprecated and will be removed in a future release of urllib3 2.x. Read more in this issue: https://github.com/urllib3/urllib3/issues/2680 | |
from urllib3.contrib.pyopenssl import orig_util_SSLContext as SSLContext | |
python/ray/train/tests/test_base_trainer.py: 62 warnings | |
/opt/miniconda/lib/python3.7/site-packages/tensorboardX/record_writer.py:194: DeprecationWarning: crc32c.crc32 will be eventually removed, use crc32c.crc32c instead | |
x = u32(crc32c(data)) | |
python/ray/train/tests/test_base_trainer.py: 2871 warnings | |
/opt/miniconda/lib/python3.7/site-packages/pip/_vendor/packaging/version.py:114: DeprecationWarning: Creating a LegacyVersion has been deprecated and will be removed in the next major release | |
DeprecationWarning, | |
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html | |
=========================== short test summary info ============================ | |
FAILED python/ray/train/tests/test_base_trainer.py::test_reserved_cpu_warnings | |
!!!!!!!!!!!!!!!!!!!!!!!!!! stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!! | |
============ 1 failed, 9 passed, 2934 warnings in 124.97s (0:02:04) ============ | |
================================================================================ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment