Created
October 14, 2019 05:37
-
-
Save ivirshup/436c7678d29e6985e2bdb11c44e51252 to your computer and use it in GitHub Desktop.
h5py memory cache performance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Python 3.7.4 (default, Sep 7 2019, 18:27:02) | |
| Type 'copyright', 'credits' or 'license' for more information | |
| IPython 7.8.0 -- An enhanced Interactive Python. Type '?' for help. | |
| In [1]: import h5py | |
| In [2]: import numpy as np | |
| In [3]: indices = np.sort(np.random.choice(int(1e7), int(1e4), replace=False)) | |
| In [4]: f = h5py.File("test.h5", "r") # Using same file as before | |
| ...: dset = f["x"] | |
| # Defining some helper functions | |
| In [5]: def read_dset_by_indices(dset, indices): | |
| ...: for i in range(len(indices) - 1): | |
| ...: s = slice(indices[i], indices[i+1]) | |
| ...: dset[s] | |
| ...: | |
| In [6]: def read_dset_by_chunks(dset): | |
| ...: cs = dset.chunks[0] | |
| ...: ts = dset.shape[0] | |
| ...: slice_gen = (slice(i*cs, min((i+1)*cs, ts)) for i in range(ts // cs + 1)) | |
| ...: for s in slice_gen: | |
| ...: dset[s] | |
| ...: | |
| # Initial test looking at effect of purging OS cache with default file opening | |
| In [7]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_indices(dset, indices) | |
| ...: | |
| ...: | |
| 1.23 s ± 12.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [8]: %%timeit | |
| ...: read_dset_by_indices(dset, indices) | |
| ...: | |
| ...: | |
| 1.13 s ± 19.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [9]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 221 ms ± 14.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [10]: %%timeit | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 141 ms ± 2.49 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [11]: f.close() | |
| # Testing with a large cache | |
| In [12]: f = h5py.File("test.h5", "r", rdcc_nbytes=100 * (1024 ** 2), rdcc_nslots=50000, rdcc_w0=.5) | |
| ...: dset = f["x"] | |
| In [13]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_indices(dset, indices) | |
| ...: | |
| ...: | |
| 1.22 s ± 34.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [14]: %%timeit | |
| ...: read_dset_by_indices(dset, indices) | |
| ...: | |
| ...: | |
| 1.12 s ± 18.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [15]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 131 ms ± 2.01 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [16]: %%timeit | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 123 ms ± 502 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [17]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 130 ms ± 1.38 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [18]: f.close() | |
| # Using no cache | |
| In [19]: f = h5py.File("test.h5", "r", rdcc_nbytes=0) | |
| ...: dset = f["x"] | |
| In [20]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_indices(dset, indices) | |
| ...: | |
| ...: | |
| 1.29 s ± 7.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [21]: %%timeit | |
| ...: read_dset_by_indices(dset, indices) | |
| ...: | |
| ...: | |
| 1.2 s ± 41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [22]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 145 ms ± 1.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [23]: %%timeit | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 135 ms ± 2.63 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [24]: 101 % 100 | |
| Out[24]: 1 | |
| In [25]: np.remainder(101, 100) | |
| Out[25]: 1 | |
| In [26]: f.close() | |
| # Trying a very large cache again | |
| In [27]: f = h5py.File("test.h5", "r", rdcc_nbytes=(1024 ** 3), rdcc_nslots=50000, rdcc_w0=.5) | |
| ...: dset = f["x"] | |
| In [28]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 155 ms ± 6.25 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [29]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 132 ms ± 3.05 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [30]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 131 ms ± 1.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [31]: %%timeit | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 124 ms ± 1.94 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [32]: %%timeit | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 121 ms ± 738 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [33]: f.close() | |
| # Some potential reproducibility problems | |
| In [34]: f = h5py.File("test.h5", "r", rdcc_nbytes=0) | |
| ...: dset = f["x"] | |
| In [35]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 197 ms ± 4.44 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [36]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 143 ms ± 972 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [37]: f.close() | |
| In [38]: f = h5py.File("test.h5", "r") | |
| ...: dset = f["x"] | |
| In [39]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 216 ms ± 11.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
| In [40]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 153 ms ± 1.83 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
| In [41]: %%timeit !sync && sudo purge | |
| ...: read_dset_by_chunks(dset) | |
| ...: | |
| ...: | |
| 213 ms ± 7.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment