Skip to content

Instantly share code, notes, and snippets.

@prashanthpai
Created December 23, 2014 06:32
Show Gist options
  • Save prashanthpai/014d91e54bad81b4f982 to your computer and use it in GitHub Desktop.
Save prashanthpai/014d91e54bad81b4f982 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# pip install benchmark
# Comparison of makedirs_count() vs swift.common.utils.mkdirs()
import benchmark
import os
import uuid
import errno
import shutil
import random
import hashlib
from swift.common.utils import mkdirs
def makedirs_count(path, count=0):
"""
Same as os.makedirs() except that this method returns the number of
new directories that had to be created.
https://hg.python.org/cpython/file/v2.7.3/Lib/os.py#l136
Also, this does not raise an error if target directory already exists.
This behaviour is similar to Python 3.x's os.makedirs() called with
exist_ok=True
https://hg.python.org/cpython/file/v3.4.2/Lib/os.py#l212
"""
head, tail = os.path.split(path)
if not tail:
head, tail = os.path.split(head)
if head and tail and not os.path.exists(head):
try:
count = makedirs_count(head, count)
except OSError, e:
if e.errno != errno.EEXIST:
raise
if tail == os.path.curdir:
return
try:
os.mkdir(path)
except OSError, e:
if e.errno != errno.EEXIST or not os.path.isdir(path):
raise
else:
count += 1
return count
def clean():
for sp in ('objects', 'objects-1', 'objects-2'):
shutil.rmtree(os.path.join(MOUNT_PATH, sp), ignore_errors=True)
# flush fs buffers
os.system('sync')
# free pagecache, dentries and inodes
with open('/proc/sys/vm/drop_caches', 'w') as f:
f.write('3\n')
MOUNT_PATH = '/mnt/test'
class MkdirsComparison(benchmark.Benchmark):
def setUp(self):
# Silly way to create object dir path names. Swift object path template:
# /objects-<sp_index>/<partition-number>/<last-3-characters-of-hash>/<hash>/<timestamp>.data
self.paths = []
for sp in ('objects', 'objects-1', 'objects-2'):
for partnumber in range(0, 100):
for i in range(0, 200):
md5hash = hashlib.md5(str(uuid.uuid4())).hexdigest()
path = '/'.join([MOUNT_PATH, sp, str(partnumber), md5hash[-3:], md5hash])
self.paths.append(path)
random.shuffle(self.paths)
print "Total object paths = %d" % len(self.paths)
def eachSetUp(self):
clean()
def test_mkdirs(self):
for path in self.paths:
mkdirs(path)
def test_mkdirs_count(self):
for path in self.paths:
makedirs_count(path)
def tearDown(self):
clean()
if __name__ == '__main__':
benchmark.main(each=5)
@prashanthpai
Copy link
Author

Total object paths = 60000

Benchmark Report

MkdirsComparison

name rank runs mean sd timesBaseline
mkdirs count 1 5 3.984 0.05563 1.0
mkdirs 2 5 4.344 0.03462 1.09042402982

Each of the above 10 runs were run in random, non-consecutive order by
benchmark v0.1.5 (http://jspi.es/benchmark) with Python 2.7.5
Linux-3.16.4-200.fc20.x86_64-x86_64 on 2014-12-23 06:25:55.

@prashanthpai
Copy link
Author

Total object paths = 600

Benchmark Report

MkdirsComparison

name rank runs mean sd timesBaseline
mkdirs 1 25 0.1218 0.008133 1.0
mkdirs count 2 25 0.1279 0.02166 1.05064548637

Each of the above 50 runs were run in random, non-consecutive order by
benchmark v0.1.5 (http://jspi.es/benchmark) with Python 2.7.5
Linux-3.16.4-200.fc20.x86_64-x86_64 on 2014-12-23 06:34:43.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment