Created
October 5, 2020 15:28
-
-
Save phizaz/fb190374a07fec59bf7dc67c4e42c59e to your computer and use it in GitHub Desktop.
filelock.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import errno | |
import os | |
import time | |
from collections import defaultdict | |
from contextlib import ContextDecorator, contextmanager | |
from itertools import count | |
import torch | |
CUDA_ALLOC_FILE = os.path.expanduser('~/mlkit.alloc') | |
def global_lock(n=1, delay=3.0, verbose=True, enable=True): | |
"""using a global lock file shared across the user""" | |
if enable: | |
return _FileLock( | |
None, | |
path=os.path.expanduser('~/mlkit.lock'), | |
n=n, | |
delay=delay, | |
verbose=verbose | |
) | |
else: | |
return nullcontext() | |
def wait_global_lock(n=1, delay=3.0, verbose=True): | |
"""wait for at least one of the locks to be available but not acquiring it""" | |
with _FileLock( | |
None, path=os.path.expanduser('~/mlkit.lock'), n=n, delay=delay, verbose=verbose | |
): | |
pass | |
@contextmanager | |
def cuda_round_robin(devices=[0], verbose=False, enable=True): | |
""" | |
Args: | |
devices: list of ints | |
""" | |
assert len(devices) > 0, "no device available" | |
if not enable: | |
dev = f'cuda:{devices[0]}' | |
yield dev | |
else: | |
# get alloc rights | |
with _get_alloc_right(verbose=verbose): | |
# count the cuda locks | |
locks = _list_cuda_locks() | |
min_dev = None | |
min_cnt = float('inf') | |
for dev in devices: | |
cnt = len(locks[dev]) | |
if cnt < min_cnt: | |
min_cnt = cnt | |
min_dev = dev | |
# lock the cuda file | |
dirname = os.path.expanduser('~') | |
for i in count(start=0): | |
if i not in locks[min_dev]: break | |
fd, path = _lockfile(os.path.join(dirname, f'mlkit.cuda{min_dev}.{i}')) | |
if verbose: print(f'locked {path}') | |
try: | |
# yield | |
dev = f'cuda:{min_dev}' | |
yield dev | |
finally: | |
# remove the cuda file | |
try: | |
os.close(fd) | |
os.unlink(path) | |
except: | |
pass | |
if verbose: print(f'released {path}') | |
def _lockfile(path): | |
try: | |
fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_RDWR) | |
return fd, path | |
except OSError as e: | |
# it should not exist | |
raise e | |
def _get_alloc_right(verbose=False): | |
"""global allocation lock""" | |
return _FileLock(CUDA_ALLOC_FILE, delay=0.1, verbose=verbose) | |
def _list_cuda_locks(): | |
# locks are mlkit.cuda{dev}.{i} | |
dirname = os.path.expanduser('~') | |
locks = defaultdict(list) | |
for f in os.listdir(dirname): | |
if 'mlkit.cuda' in f: | |
_, dev, i = f.split('.') | |
dev = int(dev[4:]) # from cuda* | |
i = int(i) | |
locks[dev].append(i) | |
return locks | |
class _FileLockException(Exception): | |
pass | |
class _FileLock(ContextDecorator): | |
""" A file locking mechanism that has context-manager support so | |
you can use it in a with statement. This should be relatively cross | |
compatible as it doesn't rely on msvcrt or fcntl for the locking. | |
From: https://github.com/dmfrey/FileLock/blob/master/filelock/filelock.py | |
""" | |
def __init__(self, file_name, n=1, delay=1.0, verbose=True, path=None): | |
self.is_locked = False | |
def lockfile_path(i): | |
if path is None: | |
return os.path.join( | |
os.getcwd(), f'{file_name}.lock.{i}' | |
) # use working directory | |
else: | |
return f'{path}.{i}' | |
self.all_lockfiles = [lockfile_path(i) for i in range(n)] | |
self.lockfile = None | |
self.delay = delay | |
self.verbose = verbose | |
def acquire(self): | |
if self.verbose: print('Acquiring for a lockfile') | |
while True: | |
for lockfile in self.all_lockfiles: | |
try: | |
self.fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) | |
self.lockfile = lockfile | |
self.is_locked = True | |
if self.verbose: print(f'Lockfile {lockfile} acquired') | |
return | |
except OSError as e: | |
if e.errno != errno.EEXIST: | |
raise | |
if not self.is_locked: | |
time.sleep(self.delay) | |
def release(self): | |
""" Get rid of the lock by deleting the lockfile. | |
When working in a `with` statement, this gets automatically | |
called at the end. | |
""" | |
if self.is_locked: | |
try: | |
os.close(self.fd) | |
os.unlink(self.lockfile) | |
if self.verbose: print(f'Lockfile {self.lockfile} released') | |
except Exception as e: # ignore errors | |
print(f'error releasing lock file {self.lockfile}:', e) | |
self.is_locked = False | |
def __enter__(self): | |
""" Activated when used in the with statement. | |
Should automatically acquire a lock to be used in the with block. | |
""" | |
if not self.is_locked: | |
self.acquire() | |
return self | |
def __exit__(self, type, value, traceback): | |
""" Activated at the end of the with statement. | |
It automatically releases the lock if it isn't locked. | |
""" | |
if self.is_locked: | |
self.release() | |
def __del__(self): | |
""" Make sure that the FileLock instance doesn't leave a lockfile | |
lying around. | |
""" | |
self.release() | |
@contextmanager | |
def nullcontext(): | |
"""a context manager than yield a null object which can be called without any result""" | |
class NullCls: | |
def __getattr__(self, name): | |
return _nullfn | |
yield NullCls() | |
def _nullfn(*args, **kwargs): | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment