Last active
January 17, 2019 13:51
-
-
Save busla/bf609cada73c620906a9c6ab9e94af97 to your computer and use it in GitHub Desktop.
Simple (and probably naive) comparison between os and pathlib.Path when recursively crawling directories
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
from pathlib import Path | |
from pathlib import Path | |
from datetime import datetime | |
import os | |
CHUNKS_DIR='/media/data/chunks' | |
PREFIX='segment-' | |
EXT='mp4' | |
def get_timestamp(value): | |
return datetime.fromtimestamp(os.stat(value).st_ctime_ns / 1e9) | |
def populate_chunks_pathlib(): | |
p = Path(CHUNKS_DIR) | |
chunks = tuple((str(file.parts[-2]), | |
get_timestamp(str(file)), | |
str(file)) | |
for file in p.rglob(f"{PREFIX}*.{EXT}") | |
if file.is_file()) | |
return chunks | |
def populate_chunks_os(): | |
chunks = tuple((os.path.join(root, f).split(os.sep)[-2], | |
get_timestamp(os.path.join(root, f)), | |
os.path.join(root, f)) | |
for root, dirs, files in os.walk(CHUNKS_DIR) | |
for f in files | |
if os.path.isfile(os.path.join(root, f)) and f.startswith(PREFIX) and f.endswith(EXT)) | |
return chunks | |
# compute chunk search with os lib | |
def os_time(): | |
SETUP_CODE = '''from __main__ import populate_chunks_os''' | |
TEST_CODE = '''populate_chunks_os()''' | |
# timeit.repeat statement | |
times = timeit.repeat(setup = SETUP_CODE, | |
stmt = TEST_CODE, | |
repeat = 1, | |
number = 1) | |
print('Chunks search with os lib: {} seconds'.format(min(times))) | |
def pathlib_time(): | |
SETUP_CODE = '''from __main__ import populate_chunks_pathlib''' | |
TEST_CODE = '''populate_chunks_pathlib()''' | |
# timeit.repeat statement | |
times = timeit.repeat(setup = SETUP_CODE, | |
stmt = TEST_CODE, | |
repeat = 1, | |
number = 1) | |
print('Chunks search with pathlib: {} seconds'.format(min(times))) | |
if __name__ == '__main__': | |
pathlib_time() | |
os_time() | |
>>> Chunks search with pathlib: 64.39873635303229 seconds | |
>>> Chunks search with os lib: 41.62020164402202 seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment