Last active
February 4, 2018 12:44
-
-
Save dboyliao/1cc4b41efadaa89d69fd2a1ce26dabd1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:09.486882Z", | |
| "start_time": "2018-02-04T12:41:09.386518Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import funcy as fy" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:09.757005Z", | |
| "start_time": "2018-02-04T12:41:09.754161Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "names = ['Mosky', 'Peter', 'Joe', 'Mike']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:10.101281Z", | |
| "start_time": "2018-02-04T12:41:10.089583Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'Mosky'" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "np.random.choice(names, 1)[0]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:10.400755Z", | |
| "start_time": "2018-02-04T12:41:10.392006Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "raw_data = [(np.random.choice(names, 1)[0], 2*np.random.random()-1, 3.14*(2*np.random.random()-1)) \n", | |
| " for _ in range(100)]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:10.786383Z", | |
| "start_time": "2018-02-04T12:41:10.780428Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "data = np.array(raw_data, dtype={'names': ['name', 'metric_1', 'metric_2'],\n", | |
| " 'formats': ['U10', 'f4', 'f4']})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:11.281956Z", | |
| "start_time": "2018-02-04T12:41:11.276237Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([('Joe', 0.1070719 , -2.5197952 ),\n", | |
| " ('Peter', -0.14473368, -2.0009336 ),\n", | |
| " ('Mike', -0.06643482, -0.56792307)],\n", | |
| " dtype=[('name', '<U10'), ('metric_1', '<f4'), ('metric_2', '<f4')])" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data[:3]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:11.724819Z", | |
| "start_time": "2018-02-04T12:41:11.721639Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "v = data[['name', 'metric_2']] # column(s) selection just like pandas" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:12.326790Z", | |
| "start_time": "2018-02-04T12:41:12.322404Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "False" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# v is a view\n", | |
| "v.base is None" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:12.789208Z", | |
| "start_time": "2018-02-04T12:41:12.784399Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "False" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# a view again\n", | |
| "data['name'].base is None" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:13.316065Z", | |
| "start_time": "2018-02-04T12:41:13.312527Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# groupby with funcy\n", | |
| "grouped = fy.group_by(lambda rec: rec['name'], data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:14.060878Z", | |
| "start_time": "2018-02-04T12:41:14.056314Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "dict_keys(['Joe', 'Peter', 'Mike', 'Mosky'])" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "grouped.keys()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:14.607651Z", | |
| "start_time": "2018-02-04T12:41:14.597002Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('Mike', -0.06643482, -0.56792307),\n", | |
| " ('Mike', 0.73707867, -1.6110512),\n", | |
| " ('Mike', -0.05223163, 1.5332286),\n", | |
| " ('Mike', -0.48683003, 0.6469435),\n", | |
| " ('Mike', -0.84819245, 2.311447),\n", | |
| " ('Mike', 0.26887587, 0.031909),\n", | |
| " ('Mike', -0.09210482, -2.1643085),\n", | |
| " ('Mike', -0.13959667, 1.5170095),\n", | |
| " ('Mike', 0.24344262, -0.759242),\n", | |
| " ('Mike', 0.7978497, 0.56815547),\n", | |
| " ('Mike', -0.25374797, 1.0467075),\n", | |
| " ('Mike', 0.67096055, 2.098985),\n", | |
| " ('Mike', 0.09782977, -2.3367562),\n", | |
| " ('Mike', -0.10517392, -0.95219094),\n", | |
| " ('Mike', 0.4829485, -0.8935754),\n", | |
| " ('Mike', 0.5658518, 0.99489796),\n", | |
| " ('Mike', -0.26230794, 1.6541808),\n", | |
| " ('Mike', -0.0158083, 2.2399232),\n", | |
| " ('Mike', -0.9850974, -2.8314314),\n", | |
| " ('Mike', 0.07627551, 1.6018937),\n", | |
| " ('Mike', 0.5487822, 2.0611145),\n", | |
| " ('Mike', 0.72884953, 3.0039177),\n", | |
| " ('Mike', 0.60551614, 1.481823),\n", | |
| " ('Mike', -0.25505015, 1.6596674),\n", | |
| " ('Mike', 0.25586078, 3.1297228),\n", | |
| " ('Mike', -0.7842752, 1.4828275),\n", | |
| " ('Mike', -0.72217345, -0.86756504)]" | |
| ] | |
| }, | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "grouped['Mike']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:15.171545Z", | |
| "start_time": "2018-02-04T12:41:15.168141Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# basic counting\n", | |
| "grouped_count = fy.walk_values(len, grouped)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:15.712781Z", | |
| "start_time": "2018-02-04T12:41:15.708087Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "defaultdict(<function funcy.funcs.compose.<locals>.<lambda>.<locals>.<lambda>>,\n", | |
| " {'Joe': 23, 'Mike': 27, 'Mosky': 24, 'Peter': 26})" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "grouped_count" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:16.231399Z", | |
| "start_time": "2018-02-04T12:41:16.224520Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(-0.63547385, -1.3424294),\n", | |
| " (0.6281584, -2.057282),\n", | |
| " (-0.4738648, -0.0808474),\n", | |
| " (-0.5345796, -0.6454197),\n", | |
| " (-0.3050625, 1.902929),\n", | |
| " (-0.6662556, 0.021470945),\n", | |
| " (-0.101290904, -1.5588135),\n", | |
| " (0.82477844, 1.0069695),\n", | |
| " (0.39589137, 0.60789853),\n", | |
| " (0.4595319, 0.80701464),\n", | |
| " (0.15273465, 0.30082098),\n", | |
| " (-0.48108417, -0.55198413),\n", | |
| " (-0.023597449, 0.44540608),\n", | |
| " (-0.6753356, -0.45688018),\n", | |
| " (0.43100178, -1.5748075),\n", | |
| " (-0.5353703, -1.4568768),\n", | |
| " (0.010891579, -2.1669335),\n", | |
| " (0.81374466, 2.9644322),\n", | |
| " (-0.46803284, 1.6221269),\n", | |
| " (0.8715897, 2.3632638),\n", | |
| " (-0.391293, 2.956264),\n", | |
| " (0.77644855, 2.2807734),\n", | |
| " (0.5871367, 1.0215662),\n", | |
| " (-0.2359981, 0.5203461)]" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "[(rec['metric_1'], rec['metric_2']) for rec in grouped['Mosky']]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:17.013492Z", | |
| "start_time": "2018-02-04T12:41:17.007786Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "defaultdict(<function funcy.funcs.compose.<locals>.<lambda>.<locals>.<lambda>>,\n", | |
| " {'Joe': -0.18226026,\n", | |
| " 'Mike': 0.03744804,\n", | |
| " 'Mosky': 0.017694555,\n", | |
| " 'Peter': 0.01730326})" | |
| ] | |
| }, | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "fy.walk_values(lambda recs: np.mean([r[1] for r in recs]), grouped)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:17.631740Z", | |
| "start_time": "2018-02-04T12:41:17.627741Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def trans_pair(rec):\n", | |
| " metric1, metric2 = rec[1], rec[2]\n", | |
| " return [(metric1, metric2), (metric2, metric1)]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:18.127235Z", | |
| "start_time": "2018-02-04T12:41:18.123826Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "pairs = fy.walk_values(fy.partial(fy.map, trans_pair), grouped)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-04T12:41:18.565427Z", | |
| "start_time": "2018-02-04T12:41:18.560334Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(0.1070719, -2.5197952),\n", | |
| " (-2.5197952, 0.1070719),\n", | |
| " (-0.800511, 2.105351),\n", | |
| " (2.105351, -0.800511)]" | |
| ] | |
| }, | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "fy.take(4, fy.concat(*pairs['Joe']))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.3" | |
| }, | |
| "toc": { | |
| "nav_menu": {}, | |
| "number_sections": true, | |
| "sideBar": true, | |
| "skip_h1_title": false, | |
| "toc_cell": false, | |
| "toc_position": {}, | |
| "toc_section_display": "block", | |
| "toc_window_display": false | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment