Created
March 10, 2020 01:05
-
-
Save xmnlab/257070144639b5a2af1aad2d6eaa75ec to your computer and use it in GitHub Desktop.
POC - Ibis OmniSciDB DDL Update
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# https://github.com/ibis-project/ibis/pull/2112\n", | |
"from typing import Dict, Optional\n", | |
"import ibis\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['diamonds', 'batting', 'awards_players', 'functional_alltypes', 'geo']" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"db_conf = dict(\n", | |
" host='localhost', port='6274', \n", | |
" user='admin', password='HyperInteractive', \n", | |
" database='ibis_testing'\n", | |
")\n", | |
"con = ibis.omniscidb.connect(**db_conf)\n", | |
"\n", | |
"con.list_tables()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"series size expected: 10\n", | |
"name: functional_alltypes\n", | |
"cond: WHERE (\"index\" >= 5) AND\n", | |
" (\"index\" < 15)\n", | |
"================================================================================\n", | |
">>> UPDATE functional_alltypes SET int_col=(\"index\" + 1) WHERE (\"index\" >= 5) AND (\"index\" < 15)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1000') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 5)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1001') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 6)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1002') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 7)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1003') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 8)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1004') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 9)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1005') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 10)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1006') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 11)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1007') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 12)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1008') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 13)\n", | |
">>> UPDATE functional_alltypes SET \"string_col\"=('1009') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 14)\n" | |
] | |
} | |
], | |
"source": [ | |
"def update(\n", | |
" con, \n", | |
" table_name: str, \n", | |
" condition: ibis.expr.types.Expr, \n", | |
" input_expr: Optional[Dict[str, ibis.expr.types.Expr]] is None,\n", | |
" input_data: pd.DataFrame is None\n", | |
"):\n", | |
" \"\"\"\n", | |
" Update method.\n", | |
"\n", | |
" Parameters\n", | |
" ----------\n", | |
" table_name : str\n", | |
" condition : ibis.expr.types.Expr\n", | |
" input_expr : dict of ibis.expr.types.Expr, optional, default None\n", | |
" input_data : pandas.DataFrame, optional, default None\n", | |
" \"\"\"\n", | |
" if input_expr is None and input_data is None:\n", | |
" raise Exception('No set clause given.')\n", | |
" \n", | |
" # setup\n", | |
" t = con.table(table_name)\n", | |
" \n", | |
" cond_str = t[condition].compile()\n", | |
" cond_str = cond_str[cond_str.index('WHERE'):]\n", | |
"\n", | |
" update_stmt = ''\n", | |
" update_tmpl = 'UPDATE {}'.format(table_name) + ' SET {} {};\\n'\n", | |
" \n", | |
" update_set = {}\n", | |
" series = {}\n", | |
" \n", | |
" size = t[cond].count().execute()\n", | |
" print('series size expected:', size)\n", | |
" \n", | |
" # create update statement for input_expr\n", | |
" if input_expr is not None:\n", | |
" for k, v in input_expr.items():\n", | |
" if not isinstance(v, ibis.expr.types.Expr):\n", | |
" raise Exception(\n", | |
" 'Data expression format expected: ibis.expr.types.Expr. '\n", | |
" 'Data format given: {}'.format(type(v))\n", | |
" )\n", | |
"\n", | |
" value = v.compile()\n", | |
" try:\n", | |
" i_end = value.index('FROM')\n", | |
" i_end -= 8\n", | |
" except ValueError:\n", | |
" i_end = -7\n", | |
" value = value[7:i_end]\n", | |
" update_set[k] = value\n", | |
"\n", | |
" update_stmt = update_tmpl.format(\n", | |
" ', '.join(['{}=({})'.format(k, v) for k, v in update_set.items()]),\n", | |
" cond_str\n", | |
" )\n", | |
" \n", | |
" # create update statement for input_data\n", | |
" if input_data is not None:\n", | |
" key_name = input_data.index.name\n", | |
" if cond_str == '':\n", | |
" cond_str = ' WHERE 1=1 AND'\n", | |
" for i, row in input_data.iterrows():\n", | |
" update_stmt += update_tmpl.format(\n", | |
" ', '.join(['\"{}\"=({})'.format(k, repr(v)) for k, v in row.items()]),\n", | |
" cond_str + ' AND (\"{}\" = {})'.format(key_name, repr(i))\n", | |
" )\n", | |
" \n", | |
" \n", | |
" print('name:', t.name)\n", | |
" print('cond:', cond_str)\n", | |
" print('=' * 80)\n", | |
" for stmt in update_stmt.split(';'):\n", | |
" stmt = stmt.strip().replace('\\n', '')\n", | |
" if not stmt:\n", | |
" continue\n", | |
" print('>>>', stmt)\n", | |
" con.con.execute(stmt)\n", | |
"\n", | |
"\n", | |
"# test data\n", | |
"table_name = 'functional_alltypes'\n", | |
"t = con.table(table_name)\n", | |
"cond = (5 <= t.index) & (t.index < 15)\n", | |
"\n", | |
"df = t[cond].execute()\n", | |
"# set index, used as primary key\n", | |
"df.set_index('index', inplace=True, drop=False)\n", | |
"\n", | |
"# prepare new data\n", | |
"string_se = pd.Series([str(x) for x in range(1000, 1010)])\n", | |
"string_se.index = df.index\n", | |
"df['string_col'] = string_se\n", | |
"\n", | |
"update(\n", | |
" con, \n", | |
" table_name=table_name,\n", | |
" condition=cond,\n", | |
" input_expr={'int_col':t.index+1},\n", | |
" input_data=df[['string_col']],\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_check = t[cond].execute()\n", | |
"# set index, used as primary key\n", | |
"df_check.set_index('index', inplace=True, drop=False)\n", | |
"assert all(df_check['string_col'] == string_se)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment