Skip to content

Instantly share code, notes, and snippets.

@dboyliao
Last active February 4, 2018 12:44
Show Gist options
  • Select an option

  • Save dboyliao/1cc4b41efadaa89d69fd2a1ce26dabd1 to your computer and use it in GitHub Desktop.

Select an option

Save dboyliao/1cc4b41efadaa89d69fd2a1ce26dabd1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:09.486882Z",
"start_time": "2018-02-04T12:41:09.386518Z"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import funcy as fy"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:09.757005Z",
"start_time": "2018-02-04T12:41:09.754161Z"
}
},
"outputs": [],
"source": [
"names = ['Mosky', 'Peter', 'Joe', 'Mike']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:10.101281Z",
"start_time": "2018-02-04T12:41:10.089583Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'Mosky'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.random.choice(names, 1)[0]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:10.400755Z",
"start_time": "2018-02-04T12:41:10.392006Z"
}
},
"outputs": [],
"source": [
"raw_data = [(np.random.choice(names, 1)[0], 2*np.random.random()-1, 3.14*(2*np.random.random()-1)) \n",
" for _ in range(100)]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:10.786383Z",
"start_time": "2018-02-04T12:41:10.780428Z"
}
},
"outputs": [],
"source": [
"data = np.array(raw_data, dtype={'names': ['name', 'metric_1', 'metric_2'],\n",
" 'formats': ['U10', 'f4', 'f4']})"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:11.281956Z",
"start_time": "2018-02-04T12:41:11.276237Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"array([('Joe', 0.1070719 , -2.5197952 ),\n",
" ('Peter', -0.14473368, -2.0009336 ),\n",
" ('Mike', -0.06643482, -0.56792307)],\n",
" dtype=[('name', '<U10'), ('metric_1', '<f4'), ('metric_2', '<f4')])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[:3]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:11.724819Z",
"start_time": "2018-02-04T12:41:11.721639Z"
}
},
"outputs": [],
"source": [
"v = data[['name', 'metric_2']] # column(s) selection just like pandas"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:12.326790Z",
"start_time": "2018-02-04T12:41:12.322404Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# v is a view\n",
"v.base is None"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:12.789208Z",
"start_time": "2018-02-04T12:41:12.784399Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# a view again\n",
"data['name'].base is None"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:13.316065Z",
"start_time": "2018-02-04T12:41:13.312527Z"
}
},
"outputs": [],
"source": [
"# groupby with funcy\n",
"grouped = fy.group_by(lambda rec: rec['name'], data)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:14.060878Z",
"start_time": "2018-02-04T12:41:14.056314Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['Joe', 'Peter', 'Mike', 'Mosky'])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grouped.keys()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:14.607651Z",
"start_time": "2018-02-04T12:41:14.597002Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[('Mike', -0.06643482, -0.56792307),\n",
" ('Mike', 0.73707867, -1.6110512),\n",
" ('Mike', -0.05223163, 1.5332286),\n",
" ('Mike', -0.48683003, 0.6469435),\n",
" ('Mike', -0.84819245, 2.311447),\n",
" ('Mike', 0.26887587, 0.031909),\n",
" ('Mike', -0.09210482, -2.1643085),\n",
" ('Mike', -0.13959667, 1.5170095),\n",
" ('Mike', 0.24344262, -0.759242),\n",
" ('Mike', 0.7978497, 0.56815547),\n",
" ('Mike', -0.25374797, 1.0467075),\n",
" ('Mike', 0.67096055, 2.098985),\n",
" ('Mike', 0.09782977, -2.3367562),\n",
" ('Mike', -0.10517392, -0.95219094),\n",
" ('Mike', 0.4829485, -0.8935754),\n",
" ('Mike', 0.5658518, 0.99489796),\n",
" ('Mike', -0.26230794, 1.6541808),\n",
" ('Mike', -0.0158083, 2.2399232),\n",
" ('Mike', -0.9850974, -2.8314314),\n",
" ('Mike', 0.07627551, 1.6018937),\n",
" ('Mike', 0.5487822, 2.0611145),\n",
" ('Mike', 0.72884953, 3.0039177),\n",
" ('Mike', 0.60551614, 1.481823),\n",
" ('Mike', -0.25505015, 1.6596674),\n",
" ('Mike', 0.25586078, 3.1297228),\n",
" ('Mike', -0.7842752, 1.4828275),\n",
" ('Mike', -0.72217345, -0.86756504)]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grouped['Mike']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:15.171545Z",
"start_time": "2018-02-04T12:41:15.168141Z"
}
},
"outputs": [],
"source": [
"# basic counting\n",
"grouped_count = fy.walk_values(len, grouped)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:15.712781Z",
"start_time": "2018-02-04T12:41:15.708087Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(<function funcy.funcs.compose.<locals>.<lambda>.<locals>.<lambda>>,\n",
" {'Joe': 23, 'Mike': 27, 'Mosky': 24, 'Peter': 26})"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grouped_count"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:16.231399Z",
"start_time": "2018-02-04T12:41:16.224520Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[(-0.63547385, -1.3424294),\n",
" (0.6281584, -2.057282),\n",
" (-0.4738648, -0.0808474),\n",
" (-0.5345796, -0.6454197),\n",
" (-0.3050625, 1.902929),\n",
" (-0.6662556, 0.021470945),\n",
" (-0.101290904, -1.5588135),\n",
" (0.82477844, 1.0069695),\n",
" (0.39589137, 0.60789853),\n",
" (0.4595319, 0.80701464),\n",
" (0.15273465, 0.30082098),\n",
" (-0.48108417, -0.55198413),\n",
" (-0.023597449, 0.44540608),\n",
" (-0.6753356, -0.45688018),\n",
" (0.43100178, -1.5748075),\n",
" (-0.5353703, -1.4568768),\n",
" (0.010891579, -2.1669335),\n",
" (0.81374466, 2.9644322),\n",
" (-0.46803284, 1.6221269),\n",
" (0.8715897, 2.3632638),\n",
" (-0.391293, 2.956264),\n",
" (0.77644855, 2.2807734),\n",
" (0.5871367, 1.0215662),\n",
" (-0.2359981, 0.5203461)]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[(rec['metric_1'], rec['metric_2']) for rec in grouped['Mosky']]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:17.013492Z",
"start_time": "2018-02-04T12:41:17.007786Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(<function funcy.funcs.compose.<locals>.<lambda>.<locals>.<lambda>>,\n",
" {'Joe': -0.18226026,\n",
" 'Mike': 0.03744804,\n",
" 'Mosky': 0.017694555,\n",
" 'Peter': 0.01730326})"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fy.walk_values(lambda recs: np.mean([r[1] for r in recs]), grouped)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:17.631740Z",
"start_time": "2018-02-04T12:41:17.627741Z"
}
},
"outputs": [],
"source": [
"def trans_pair(rec):\n",
" metric1, metric2 = rec[1], rec[2]\n",
" return [(metric1, metric2), (metric2, metric1)]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:18.127235Z",
"start_time": "2018-02-04T12:41:18.123826Z"
}
},
"outputs": [],
"source": [
"pairs = fy.walk_values(fy.partial(fy.map, trans_pair), grouped)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-04T12:41:18.565427Z",
"start_time": "2018-02-04T12:41:18.560334Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[(0.1070719, -2.5197952),\n",
" (-2.5197952, 0.1070719),\n",
" (-0.800511, 2.105351),\n",
" (2.105351, -0.800511)]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fy.take(4, fy.concat(*pairs['Joe']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"toc_cell": false,
"toc_position": {},
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment