Last active
July 5, 2022 01:16
-
-
Save st-le/6a1ea5614a764d304bef360daf84588a to your computer and use it in GitHub Desktop.
num_workers > 0 issue
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[07/01 16:54:04 d2.data.detection_utils]: TransformGens used in training: [ResizeShortestEdge(short_edge_length=(800,), max_size=1333, sample_style='choice')] | |
>>> cfg stats: ('trainset',) False | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 239, in _feed | |
File "/usr/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 329, in reduce_storage | |
File "/usr/lib/python3.8/multiprocessing/reduction.py", line 198, in DupFd | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 48, in __init__ | |
new_fd = os.dup(fd) | |
OSError: [Errno 24] Too many open files | |
Process Process-1: | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 58, in detach | |
return reduction.recv_handle(conn) | |
File "/usr/lib/python3.8/multiprocessing/reduction.py", line 188, in recv_handle | |
with socket.fromfd(conn.fileno(), socket.AF_UNIX, socket.SOCK_STREAM) as s: | |
File "/usr/lib/python3.8/socket.py", line 543, in fromfd | |
nfd = dup(fd) | |
OSError: [Errno 24] Too many open files | |
Traceback (most recent call last): | |
File "main_moco.py", line 496, in <module> | |
main() | |
File "main_moco.py", line 180, in main | |
main_worker(args.gpu, ngpus_per_node, args) | |
File "main_moco.py", line 217, in main_worker | |
mrcnn_tool_k = slmdptc_main(transform=transf, image_folder=args.data, arch=args.arch, args=args) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 1641, in main | |
mrcnn_tool.train(True, get_model_only=use_ssl, use_ssl=use_ssl) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 1004, in train | |
self.trainer = SolomSSLTrainer(self.cfg, mapper=mapper) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 209, in __init__ | |
super().__init__(cfg) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/engine/defaults.py", line 319, in __init__ | |
data_loader = self.build_train_loader(cfg) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 213, in build_train_loader | |
data_loader, cls.train_dataset = build_detection_train_loader(cfg, mapper=cls.mapper, get_dataset=True) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/data/build.py", line 313, in build_detection_train_loader | |
dataset_dicts = get_detection_dataset_dicts( | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/data/build.py", line 231, in get_detection_dataset_dicts | |
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names] | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/data/build.py", line 231, in <listcomp> | |
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names] | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/data/catalog.py", line 62, in get | |
return f() | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/data/datasets/solom_ssl.py", line 19, in <lambda> | |
DatasetCatalog.register(name, lambda: load_solom_ssl_json(json_file, image_root, name)) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/data/datasets/solom_ssl.py", line 38, in load_solom_ssl_json | |
sample, _ = train_dataset.__getitem__(i) | |
File "/usr/local/lib/python3.8/dist-packages/torchvision/datasets/folder.py", line 232, in __getitem__ | |
sample = self.loader(path) | |
File "/usr/local/lib/python3.8/dist-packages/torchvision/datasets/folder.py", line 269, in default_loader | |
return pil_loader(path) | |
File "/usr/local/lib/python3.8/dist-packages/torchvision/datasets/folder.py", line 251, in pil_loader | |
return img.convert('RGB') | |
File "/usr/local/lib/python3.8/dist-packages/PIL/Image.py", line 904, in convert | |
self.load() | |
File "/usr/local/lib/python3.8/dist-packages/PIL/ImageFile.py", line 266, in load | |
n, err_code = decoder.decode(b) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler | |
_error_if_any_worker_fails() | |
RuntimeError: DataLoader worker (pid 2440898) exited unexpectedly with exit code 1. Details are lost due to multiprocessing. Rerunning with num_workers=0 may give better error trace. | |
###################################################################### | |
A longer log when num_worker=32, batch size=256, nprocs=2 | |
###################################################################### | |
[07/04 17:22:15 d2.data.detection_utils]: TransformGens used in training: [ResizeShortestEdge(short_edge_length=(800,), max_size=1333, sample_style='choice')] | |
Import torch OK | |
Initail import finished. | |
Process Process-1:31: | |
Process Process-1:1: | |
Process Process-1:32: | |
Process Process-1:19: | |
Process Process-1:29: | |
Process Process-1:22: | |
Process Process-1:16: | |
Process Process-1:20: | |
Process Process-1:7: | |
Process Process-1:15: | |
Process Process-1:6: | |
Process Process-1:23: | |
Process Process-1:26: | |
Process Process-1:17: | |
Process Process-1:9: | |
Process Process-1:24: | |
Process Process-1:28: | |
Process Process-1:30: | |
Process Process-1:13: | |
Process Process-1:5: | |
Process Process-1:2: | |
Process Process-1:14: | |
Process Process-1:21: | |
Process Process-1:11: | |
Process Process-1:12: | |
Process Process-1:3: | |
Traceback (most recent call last): | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
Traceback (most recent call last): | |
Traceback (most recent call last): | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
Traceback (most recent call last): | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
Traceback (most recent call last): | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
Traceback (most recent call last): | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
Traceback (most recent call last): | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
Process Process-1:27: | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
Process Process-1:25: | |
ConnectionRefusedError: [Errno 111] Connection refused | |
Process Process-1:18: | |
Process Process-1:8: | |
Process Process-1:10: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 58, in detach | |
return reduction.recv_handle(conn) | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/reduction.py", line 189, in recv_handle | |
return recvfds(s, 1)[0] | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/reduction.py", line 159, in recvfds | |
raise EOFError | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
EOFError | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
Process Process-1:4: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 508, in Client | |
answer_challenge(c, authkey) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 752, in answer_challenge | |
message = connection.recv_bytes(256) # reject large message | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 508, in Client | |
answer_challenge(c, authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in recv_bytes | |
buf = self._recv_bytes(maxlength) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 752, in answer_challenge | |
message = connection.recv_bytes(256) # reject large message | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in recv_bytes | |
buf = self._recv_bytes(maxlength) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes | |
buf = self._recv(4) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes | |
buf = self._recv(4) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv | |
chunk = read(handle, remaining) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv | |
chunk = read(handle, remaining) | |
ConnectionResetError: [Errno 104] Connection reset by peer | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
ConnectionResetError: [Errno 104] Connection reset by peer | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
Traceback (most recent call last): | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
ConnectionRefusedError: [Errno 111] Connection refused | |
ConnectionRefusedError: [Errno 111] Connection refused | |
Traceback (most recent call last): | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap | |
self.run() | |
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run | |
self._target(*self._args, **self._kwargs) | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 260, in _worker_loop | |
r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL) | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 116, in get | |
return _ForkingPickler.loads(res) | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 289, in rebuild_storage_fd | |
fd = df.detach() | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 57, in detach | |
with _resource_sharer.get_connection(self._id) as conn: | |
File "/usr/lib/python3.8/multiprocessing/resource_sharer.py", line 87, in get_connection | |
c = Client(address, authkey=process.current_process().authkey) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client | |
c = SocketClient(address) | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 630, in SocketClient | |
s.connect(address) | |
ConnectionRefusedError: [Errno 111] Connection refused | |
Traceback (most recent call last): | |
File "main_moco.py", line 496, in <module> | |
main() | |
File "main_moco.py", line 177, in main | |
launch(main_worker, ngpus_per_node, machine_rank=args.rank, dist_url=args.dist_url, args=(args.gpu, ngpus_per_node, args)) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/engine/launch.py", line 48, in launch | |
mp.spawn( | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 230, in spawn | |
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn') | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 188, in start_processes | |
while not context.join(): | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 150, in join | |
raise ProcessRaisedException(msg, error_index, failed_process.pid) | |
torch.multiprocessing.spawn.ProcessRaisedException: | |
-- Process 1 terminated with the following error: | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 59, in _wrap | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/engine/launch.py", line 87, in _distributed_worker | |
File "/home/solomon/public/Shawn/2022/Jun/moco/main_moco.py", line 217, in main_worker | |
mrcnn_tool_k = slmdptc_main(transform=transf, image_folder=args.data, arch=args.arch, args=args) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 1635, in main | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 656, in __init__ | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 584, in __init__ | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 380, in __init__ | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 487, in loadConfigFromFile | |
File "/usr/local/lib/python3.8/dist-packages/numpy/lib/npyio.py", line 1065, in loadtxt | |
File "/usr/local/lib/python3.8/dist-packages/numpy/lib/_datasource.py", line 194, in open | |
File "/usr/local/lib/python3.8/dist-packages/numpy/lib/_datasource.py", line 528, in open | |
OSError: [Errno 24] Too many open files: '/home/solomon/public/Shawn/2022/Feb/continual_learning/unknown_full_benchmark/blender150/Images/Annotation/class_name.txt' | |
/usr/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 104 leaked semaphore objects to clean up at shutdown | |
warnings.warn('resource_tracker: There appear to be %d ' | |
################################################################ | |
ANOTHER ERROR WITH A DIFFERENT TEXT FILE OPENING | |
################################################################ | |
-- Process 1 terminated with the following error: | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 59, in _wrap | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/engine/launch.py", line 87, in _distributed_worker | |
File "/home/solomon/public/Shawn/2022/Jun/moco/main_moco.py", line 217, in main_worker | |
mrcnn_tool_k = slmdptc_main(transform=transf, image_folder=args.data, arch=args.arch, args=args) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 1641, in main | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 993, in train | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/main.py", line 749, in initail_cfg | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/engine/../../detectron2/config/config.py", line 24, in merge_from_file | |
File "/usr/local/lib/python3.8/dist-packages/fvcore/common/config.py", line 59, in load_yaml_with_base | |
File "/usr/local/lib/python3.8/dist-packages/fvcore/common/config.py", line 40, in _open_cfg | |
File "/usr/local/lib/python3.8/dist-packages/iopath/common/file_io.py", line 929, in open | |
File "/usr/local/lib/python3.8/dist-packages/iopath/common/file_io.py", line 589, in _open | |
OSError: [Errno 24] Too many open files: '/home/solomon/public/Pawn/detectron2-0.6/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml' | |
################################################################ | |
Set serialize in DT2 = False | |
################################################################ | |
-- Process 1 terminated with the following error: | |
Traceback (most recent call last): | |
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 59, in _wrap | |
File "/home/solomon/public/Shawn/2022/Jun/moco/slmdptc/detectron2/detectron2/engine/launch.py", line 87, in _distributed_worker | |
File "/home/solomon/public/Shawn/2022/Jun/moco/main_moco.py", line 333, in main_worker | |
train(train_loader, model, criterion, optimizer, epoch, args) | |
File "/home/solomon/public/Shawn/2022/Jun/moco/main_moco.py", line 363, in train | |
for i, images in enumerate(train_loader): | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 359, in __iter__ | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 305, in _get_iterator | |
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 891, in __init__ | |
File "/usr/lib/python3.8/multiprocessing/context.py", line 103, in Queue | |
File "/usr/lib/python3.8/multiprocessing/queues.py", line 41, in __init__ | |
File "/usr/lib/python3.8/multiprocessing/connection.py", line 527, in Pipe | |
OSError: [Errno 24] Too many open files | |
Traceback (most recent call last): | |
File "<string>", line 1, in <module> | |
File "/usr/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main | |
exitcode = _main(fd, parent_sentinel) | |
File "/usr/lib/python3.8/multiprocessing/spawn.py", line 126, in _main | |
self = reduction.pickle.load(from_parent) | |
_pickle.UnpicklingError: pickle data was truncated |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment