Created
June 5, 2012 13:13
-
-
Save erikvanzijst/2874887 to your computer and use it in GitHub Desktop.
lock: fixed race condition in file lock acquisition
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# HG changeset patch | |
# User Erik van Zijst <[email protected]> | |
# Date 1315554086 -36000 | |
# Branch stable | |
# Node ID ec4dafac91fc69903dea3ac3f77fdaf11de84a00 | |
# Parent 01cdfba22f0c1bfdc21feeb95b4d5d36219bafdd | |
lock: fixed race condition in file lock acquisition | |
This fixes a race condition in lock.trylock() where, if you try to acquire a | |
lock that is held by someone else just at the moment at which that other party | |
releases it, an uncaught OSError is raised. | |
Consider us calling lock.lock(). This will descend into trylock(), which | |
attempts to makelock(). If the lock is held, this will raise OSError(EEXIST). | |
We then call testlock() to trace its owner (so that we can steal the lock | |
should the owner be dead). In testlock() we call readlock(). If right at this | |
point the owner releases the lock (thereby deleting the symlink), readlock() | |
will raise OSError(ENOENT). This is uncaught and propagates to the caller. | |
This patch catches OSError(ENOENT) in testlock() and return None (indicating | |
as per the method's docstring that the lock is invalid). | |
Now back in trylock that will simply loop us back to makelock() to attempt the | |
acquisition again. | |
diff -r 01cdfba22f0c -r ec4dafac91fc mercurial/lock.py | |
--- a/mercurial/lock.py Fri Aug 26 16:07:16 2011 -0500 | |
+++ b/mercurial/lock.py Fri Sep 09 17:41:26 2011 +1000 | |
@@ -96,27 +96,33 @@ | |
The lock file is only deleted when None is returned. | |
""" | |
- locker = util.readlock(self.f) | |
try: | |
- host, pid = locker.split(":", 1) | |
- except ValueError: | |
- return locker | |
- if host != lock._host: | |
- return locker | |
- try: | |
- pid = int(pid) | |
- except ValueError: | |
- return locker | |
- if util.testpid(pid): | |
- return locker | |
- # if locker dead, break lock. must do this with another lock | |
- # held, or can race and break valid lock. | |
- try: | |
- l = lock(self.f + '.break', timeout=0) | |
- util.unlink(self.f) | |
- l.release() | |
- except error.LockError: | |
- return locker | |
+ locker = util.readlock(self.f) | |
+ except OSError, why: | |
+ # if self.f no longer exists, return None | |
+ if why.errno != errno.ENOENT: | |
+ raise | |
+ else: | |
+ try: | |
+ host, pid = locker.split(":", 1) | |
+ except ValueError: | |
+ return locker | |
+ if host != lock._host: | |
+ return locker | |
+ try: | |
+ pid = int(pid) | |
+ except ValueError: | |
+ return locker | |
+ if util.testpid(pid): | |
+ return locker | |
+ # if locker dead, break lock. must do this with another lock | |
+ # held, or can race and break valid lock. | |
+ try: | |
+ l = lock(self.f + '.break', timeout=0) | |
+ util.unlink(self.f) | |
+ l.release() | |
+ except error.LockError: | |
+ return locker | |
def release(self): | |
if self.held > 1: | |
@@ -134,4 +140,3 @@ | |
for lock in locks: | |
if lock is not None: | |
lock.release() | |
- | |
diff -r 01cdfba22f0c -r ec4dafac91fc tests/test-lock-release.py | |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 | |
+++ b/tests/test-lock-release.py Fri Sep 09 17:41:26 2011 +1000 | |
@@ -0,0 +1,68 @@ | |
+import tempfile | |
+import threading | |
+import time | |
+import sys | |
+import errno | |
+ | |
+sys.path.insert(0, './build/lib.linux-x86_64-2.6/mercurial') | |
+sys.path.insert(0, '.') | |
+from mercurial.lock import lock | |
+ | |
+ | |
+def test_release_race_condition(fn): | |
+ """lock.release() must not have a race condition with another process or | |
+ thread trying to acquire the lock concurrently. | |
+ | |
+ Note that this test attempt to trigger a situation where concurrent | |
+ acquisition and release run at the same time. | |
+ | |
+ Unfortunately, as with all race conditions, this test can only prove the | |
+ presence of a race condition, not the absence of one. When this test | |
+ succeeds, the code may or may not be sound. However if it fails, it shows | |
+ there is a race condition. | |
+ """ | |
+ | |
+ error = [] | |
+ def run(): | |
+ try: | |
+ lck = lock(fn) | |
+ except OSError, why: | |
+ if why.errno == errno.ENOENT: | |
+ error.append(why) | |
+ else: | |
+ lck.release() | |
+ | |
+ # To test this, we first acquire the lock in the main thread. We then | |
+ # start a second thread that also tries to acquire it. It will fail | |
+ # and spin for 1 second before trying again. | |
+ l = lock(fn) | |
+ thread = threading.Thread(target=run) | |
+ thread.start() | |
+ | |
+ # Now sleep for exactly 1 second, so that by the time we release the lock | |
+ # on the next line, the background thread will wake up from its own | |
+ # 1-second sleep in mercurial/lock.py:lock(). | |
+ # | |
+ # When the timing is right, the background thread will retry to acquire | |
+ # the lock just prior to our release, will then inspect the lock to trace | |
+ # its owner (testlock()), but while it gets ready to read the symlink, we | |
+ # release the lock, removing the symlink, causing util.readlock() to raise | |
+ # OSError:ENOENT | |
+ | |
+ time.sleep(1) # wait for the background thread to retry acquisition | |
+ l.release() | |
+ | |
+ thread.join() | |
+ assert not error, 'Race condition between testlock/makelock: %s' % error | |
+ | |
+if __name__ == '__main__': | |
+ | |
+ # pick a filename that is guaranteed not to be in use on this | |
+ # system: | |
+ with tempfile.NamedTemporaryFile() as file: | |
+ fn = file.name | |
+ | |
+ # You'd normally want to run this many times, but since lock.lock() has a | |
+ # 1 second sleep, doing it more than once is impractical during a normal | |
+ # test run. | |
+ test_release_race_condition(fn) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment