Last active
August 29, 2015 14:18
-
-
Save d2207197/1c1cda1387d97a5e24b5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# 1, 2, 3 層 dictionary 效能測試" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 125, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1 loops, best of 3: 8.4 s per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%timeit d = { k:k**2 for k in range(10000000)}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 126, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1 loops, best of 3: 9.38 s per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%timeit d = { (n1,n2): n1*n2 for n1 in range(10000) for n2 in range(1000)}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 127, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1 loops, best of 3: 5.18 s per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%timeit d = {n1:{ n2: n1*n2 for n2 in range(1000)} for n1 in range(10000)}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "切成兩層 dictionary 時,效能有顯著提升。" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 128, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1 loops, best of 3: 6.77 s per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%timeit d = {n1:{ n2: {n3: (n1*100+n2)*n3 for n3 in range(1000)} for n2 in range(100)} for n1 in range(100)}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "切到三層時稍微變慢了點,應該是 100 個 keys 跟 1000 個 keys 效率差異不大。" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# 建立多層 dictionary \n", | |
| "\n", | |
| "通常使用 defaultdict 來減少初始化的麻煩" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### defaultdict\n", | |
| "\n", | |
| "提供具有預設值的 dictionary" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 129, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from collections import defaultdict" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 131, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0" | |
| ] | |
| }, | |
| "execution_count": 131, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "intd = defaultdict(int)\n", | |
| "intd['hello']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 132, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "1" | |
| ] | |
| }, | |
| "execution_count": 132, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "intd['hey'] += 1 # 可以直接增加空白 key 的值\n", | |
| "intd['hey']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 133, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0.0" | |
| ] | |
| }, | |
| "execution_count": 133, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "floatd = defaultdict(float)\n", | |
| "floatd['hello']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 137, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "1" | |
| ] | |
| }, | |
| "execution_count": 137, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "_1dict = defaultdict(lambda: 1) # 預設值為 1\n", | |
| "_1dict['hello']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 建立兩層 dictionary\n", | |
| "\n", | |
| "使用 dict 作為 defaultdict 的 default factory" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1 loops, best of 3: 5.14 s per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit \n", | |
| "d = defaultdict(dict)\n", | |
| "for n1 in range(10000):\n", | |
| " for n2 in range(1000):\n", | |
| " d[n1][n2] = n1*n2" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "如需要預設值為 0,需連接兩個 defaultdict" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 134, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0" | |
| ] | |
| }, | |
| "execution_count": 134, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "int_2lvl_dict = defaultdict(lambda: defaultdict(int))\n", | |
| "int_2lvl_dict['hey']['hello']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 三層 dictionary \n", | |
| "可以使用兩個 defaultdict\n", | |
| "\n", | |
| "簡單好懂的寫法是,但會有無法pickle 的缺點\n", | |
| "```python\n", | |
| "d = defaultdict(lambda: defaultdict(dict))\n", | |
| "```\n", | |
| "\n", | |
| "配合 partial 就可以 pickle,而且速度似乎會稍微快一點點\n", | |
| "```python\n", | |
| "d = defaultdict(partial(defaultdict, dict))\n", | |
| "```\n", | |
| "\n", | |
| "如果需要預設值為 0 就須連結三層 defaultdict\n", | |
| "\n", | |
| "```python\n", | |
| "d = defaultdict(lambda: defaultdict(lambda: int))\n", | |
| "d = defaultdict(partial(defaultdict, partial(defualtdict, int)))\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 61, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from functools import partial" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 63, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1 loops, best of 3: 6.53 s per loop\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "d = defaultdict(partial(defaultdict, dict))\n", | |
| "for n1 in range(100):\n", | |
| " for n2 in range(100):\n", | |
| " for n3 in range(1000):\n", | |
| " d[n1][n2][n3] = (n1*100+n2)*n3" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Counter\n", | |
| "\n", | |
| "`Counter` 本身就是 dictionary ,不過預設值都是 0,很類似 `defaultdict(int)` ,不過有幾點不同\n", | |
| "\n", | |
| "- 有 `most_common()` method,可以直接取出 count 數量前幾名的 key。\n", | |
| "- `__getitem__(key)` 後不會產生 key" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[('stop', 1000), (('hey', 'yo'), 7)]\n", | |
| "Counter({'stop': 1000, ('hey', 'yo'): 7, 'hello': 3, 'world': 1})\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "from collections import Counter\n", | |
| "word_cnt = Counter()\n", | |
| "word_cnt['hello'] +=3\n", | |
| "word_cnt['world'] +=1\n", | |
| "word_cnt['stop'] = 1000\n", | |
| "word_cnt[('hey', 'yo')] = 7\n", | |
| "print word_cnt.most_common(2) # 列出 count 前幾名的字,依數量排序\n", | |
| "print word_cnt" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "defaultdict(<type 'int'>, {'hey': 0})\n", | |
| "Counter()\n", | |
| "dint has 'hey'\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "dint = defaultdict(int)\n", | |
| "cnt = Counter()\n", | |
| "\n", | |
| "dint['hey']\n", | |
| "cnt['hey']\n", | |
| "\n", | |
| "print dint\n", | |
| "print cnt\n", | |
| "\n", | |
| "for k in dint:\n", | |
| " print 'dint has', repr(k)\n", | |
| "for k in cnt:\n", | |
| " print 'cnt has', repr(k)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# 三層以上的 dictionary + counter" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "_1levelcounter = Counter()\n", | |
| "_2levelcounter = defaultdict(Counter)\n", | |
| "_3levelcounter = defaultdict(partial(defaultdict, Counter))\n", | |
| "_4levelcounter = defaultdict(partial(defaultdict, partial(defaultdict, Counter)))\n", | |
| "_5levelcounter = defaultdict(partial(defaultdict, partial(defaultdict, partial(defaultdict, Counter))))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "_1levelcounter['hello'] += 10\n", | |
| "_2levelcounter['hello']['world'] += 10\n", | |
| "_3levelcounter['hello']['world']['hey'] += 10\n", | |
| "_4levelcounter['hello']['world']['hey'][-5] += 10\n", | |
| "_5levelcounter['hello']['world']['hey'][-5][('foo', 'bar')] += 10" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "10\n", | |
| "10\n", | |
| "10\n", | |
| "10\n", | |
| "10\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print _1levelcounter['hello']\n", | |
| "print _2levelcounter['hello']['world']\n", | |
| "print _3levelcounter['hello']['world']['hey']\n", | |
| "print _4levelcounter['hello']['world']['hey'][-5]\n", | |
| "print _5levelcounter['hello']['world']['hey'][-5][('foo', 'bar')]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# n level counter generator" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 118, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def n_lvl_Counter(n):\n", | |
| " _n_lvl_Counter = lambda n: partial(defaultdict, _n_lvl_Counter(n-1)) if n > 1 else Counter\n", | |
| " return _n_lvl_Counter(n)()\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 121, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0" | |
| ] | |
| }, | |
| "execution_count": 121, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "_1lvl = n_lvl_Counter(1)\n", | |
| "_1lvl['hey']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 122, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0" | |
| ] | |
| }, | |
| "execution_count": 122, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "_2lvl = n_lvl_Counter(2)\n", | |
| "_2lvl['hey']['hey']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 115, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0" | |
| ] | |
| }, | |
| "execution_count": 115, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "_3lvl = n_lvl_Counter(3)\n", | |
| "_3lvl['hey']['hey']['hey']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 124, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "defaultdict(<functools.partial object at 0x10bc4cc58>, {})" | |
| ] | |
| }, | |
| "execution_count": 124, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "n_lvl_Counter(3)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# 低效率的 in dict.keys() 操作" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 166, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "d = { k:k**2 for k in range(1000000)}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 167, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 115 ms, sys: 2.8 ms, total: 118 ms\n", | |
| "Wall time: 123 ms\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 167, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%time 9999 in d.keys()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 168, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 1.66 ms, sys: 44 µs, total: 1.7 ms\n", | |
| "Wall time: 2.39 ms\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 168, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%time 9999 in d.iterkeys()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 170, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 6 µs, sys: 1 µs, total: 7 µs\n", | |
| "Wall time: 10 µs\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 170, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%time 9999 in d.viewkeys()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 169, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n", | |
| "Wall time: 7.87 µs\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 169, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "%time 9999 in d" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "速度約是 \n", | |
| " \n", | |
| "> `in d` > `in d.viewkeys()` >>> `in d.iterkeys()` >> `in d.keys()`" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 156, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "https://gist.github.com/1c1cda1387d97a5e24b5\r\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!gist -u 1c1cda1387d97a5e24b5 multilevel\\ dictionary.ipynb" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.9" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment