Last active
January 7, 2016 17:40
-
-
Save emallson/0e9ff54c14c85ba486e3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import h5py\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Generate Some Data ###\n", | |
| "\n", | |
| "A 10,000 x 10,000 random float matrix. Approximately 763MB when stored in the `test.h5`" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "with h5py.File(\"test.h5\") as f:\n", | |
| " f['/test'] = np.random.rand(10000, 10000)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Define numpy mmap ###" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def _mmap_h5(path, h5path):\n", | |
| " with h5py.File(path) as f:\n", | |
| " ds = f[h5path]\n", | |
| " # We get the dataset address in the HDF5 fiel.\n", | |
| " offset = ds.id.get_offset()\n", | |
| " # We ensure we have a non-compressed contiguous array.\n", | |
| " assert ds.chunks is None\n", | |
| " assert ds.compression is None\n", | |
| " assert offset > 0\n", | |
| " dtype = ds.dtype\n", | |
| " shape = ds.shape\n", | |
| " arr = np.memmap(path, mode='r', shape=shape, offset=offset, dtype=dtype)\n", | |
| " return arr" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Experiment 1: Load Data, Slice rectangle [:2, :3]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### h5py" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "10 loops, best of 3: 2.01 s per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit -n 10 -r 3\n", | |
| "f = h5py.File(\"test.h5\", 'r')\n", | |
| "x = f['/test'][...]\n", | |
| "print(x[:2, :3])\n", | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### numpy" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "10 loops, best of 3: 3.42 ms per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit -n 10 -r 3\n", | |
| "x = _mmap_h5('test.h5', '/test')[...]\n", | |
| "print(x[:2, :3])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Experiment 2: Don't Load, Just Slice [:2, :3]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### h5py" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "10 loops, best of 3: 4.36 ms per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit -n 10 -r 3\n", | |
| "f = h5py.File(\"test.h5\", 'r')\n", | |
| "x = f['/test']\n", | |
| "print(x[:2, :3])\n", | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### numpy" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "[[ 0.35610554 0.88638298 0.61585581]\n", | |
| " [ 0.52239096 0.26190995 0.02339244]]\n", | |
| "10 loops, best of 3: 3.67 ms per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit -n 10 -r 3\n", | |
| "x = _mmap_h5('test.h5', '/test')\n", | |
| "print(x[:2, :3])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Experiment 3: Aggregate over all data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### h5py" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "50000732.3183\n", | |
| "10 loops, best of 3: 4.68 s per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit -n 10 -r 3\n", | |
| "f = h5py.File(\"test.h5\", 'r')\n", | |
| "x = f['/test']\n", | |
| "# h5py.Dataset does not have a sum() function. \n", | |
| "# The options are to use [...] to load all, or use np.sum on the dataset itself\n", | |
| "print(np.sum(x))\n", | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### numpy" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "50000732.3183242\n", | |
| "10 loops, best of 3: 276 ms per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit -n 10 -r 3\n", | |
| "x = _mmap_h5('test.h5', '/test')\n", | |
| "print(x.sum())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Conclusion\n", | |
| "\n", | |
| "`np.memmap` and `h5py` perform equally well when you don't load the entire dataset (`[...]`). However, when the entire dataset is loaded or aggregation is performed, then `np.memmap` significantly outperforms `h5py`" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Final Timings\n", | |
| "\n", | |
| "| Method | Experiment 1 | Experiment 2 | Experiment 3 |\n", | |
| "|-------------|--------------|--------------|--------------|\n", | |
| "| `h5py` | 2.01s | 4.36ms | 4.68s |\n", | |
| "| `np.memmap` | **3.42ms** | **3.67ms** | **276ms** |" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.4.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment