Skip to content

Instantly share code, notes, and snippets.

@xmnlab
Created March 10, 2020 01:05
Show Gist options
  • Save xmnlab/257070144639b5a2af1aad2d6eaa75ec to your computer and use it in GitHub Desktop.
Save xmnlab/257070144639b5a2af1aad2d6eaa75ec to your computer and use it in GitHub Desktop.
POC - Ibis OmniSciDB DDL Update
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# https://github.com/ibis-project/ibis/pull/2112\n",
"from typing import Dict, Optional\n",
"import ibis\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['diamonds', 'batting', 'awards_players', 'functional_alltypes', 'geo']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db_conf = dict(\n",
" host='localhost', port='6274', \n",
" user='admin', password='HyperInteractive', \n",
" database='ibis_testing'\n",
")\n",
"con = ibis.omniscidb.connect(**db_conf)\n",
"\n",
"con.list_tables()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"series size expected: 10\n",
"name: functional_alltypes\n",
"cond: WHERE (\"index\" >= 5) AND\n",
" (\"index\" < 15)\n",
"================================================================================\n",
">>> UPDATE functional_alltypes SET int_col=(\"index\" + 1) WHERE (\"index\" >= 5) AND (\"index\" < 15)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1000') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 5)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1001') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 6)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1002') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 7)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1003') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 8)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1004') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 9)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1005') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 10)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1006') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 11)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1007') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 12)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1008') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 13)\n",
">>> UPDATE functional_alltypes SET \"string_col\"=('1009') WHERE (\"index\" >= 5) AND (\"index\" < 15) AND (\"index\" = 14)\n"
]
}
],
"source": [
"def update(\n",
" con, \n",
" table_name: str, \n",
" condition: ibis.expr.types.Expr, \n",
" input_expr: Optional[Dict[str, ibis.expr.types.Expr]] is None,\n",
" input_data: pd.DataFrame is None\n",
"):\n",
" \"\"\"\n",
" Update method.\n",
"\n",
" Parameters\n",
" ----------\n",
" table_name : str\n",
" condition : ibis.expr.types.Expr\n",
" input_expr : dict of ibis.expr.types.Expr, optional, default None\n",
" input_data : pandas.DataFrame, optional, default None\n",
" \"\"\"\n",
" if input_expr is None and input_data is None:\n",
" raise Exception('No set clause given.')\n",
" \n",
" # setup\n",
" t = con.table(table_name)\n",
" \n",
" cond_str = t[condition].compile()\n",
" cond_str = cond_str[cond_str.index('WHERE'):]\n",
"\n",
" update_stmt = ''\n",
" update_tmpl = 'UPDATE {}'.format(table_name) + ' SET {} {};\\n'\n",
" \n",
" update_set = {}\n",
" series = {}\n",
" \n",
" size = t[cond].count().execute()\n",
" print('series size expected:', size)\n",
" \n",
" # create update statement for input_expr\n",
" if input_expr is not None:\n",
" for k, v in input_expr.items():\n",
" if not isinstance(v, ibis.expr.types.Expr):\n",
" raise Exception(\n",
" 'Data expression format expected: ibis.expr.types.Expr. '\n",
" 'Data format given: {}'.format(type(v))\n",
" )\n",
"\n",
" value = v.compile()\n",
" try:\n",
" i_end = value.index('FROM')\n",
" i_end -= 8\n",
" except ValueError:\n",
" i_end = -7\n",
" value = value[7:i_end]\n",
" update_set[k] = value\n",
"\n",
" update_stmt = update_tmpl.format(\n",
" ', '.join(['{}=({})'.format(k, v) for k, v in update_set.items()]),\n",
" cond_str\n",
" )\n",
" \n",
" # create update statement for input_data\n",
" if input_data is not None:\n",
" key_name = input_data.index.name\n",
" if cond_str == '':\n",
" cond_str = ' WHERE 1=1 AND'\n",
" for i, row in input_data.iterrows():\n",
" update_stmt += update_tmpl.format(\n",
" ', '.join(['\"{}\"=({})'.format(k, repr(v)) for k, v in row.items()]),\n",
" cond_str + ' AND (\"{}\" = {})'.format(key_name, repr(i))\n",
" )\n",
" \n",
" \n",
" print('name:', t.name)\n",
" print('cond:', cond_str)\n",
" print('=' * 80)\n",
" for stmt in update_stmt.split(';'):\n",
" stmt = stmt.strip().replace('\\n', '')\n",
" if not stmt:\n",
" continue\n",
" print('>>>', stmt)\n",
" con.con.execute(stmt)\n",
"\n",
"\n",
"# test data\n",
"table_name = 'functional_alltypes'\n",
"t = con.table(table_name)\n",
"cond = (5 <= t.index) & (t.index < 15)\n",
"\n",
"df = t[cond].execute()\n",
"# set index, used as primary key\n",
"df.set_index('index', inplace=True, drop=False)\n",
"\n",
"# prepare new data\n",
"string_se = pd.Series([str(x) for x in range(1000, 1010)])\n",
"string_se.index = df.index\n",
"df['string_col'] = string_se\n",
"\n",
"update(\n",
" con, \n",
" table_name=table_name,\n",
" condition=cond,\n",
" input_expr={'int_col':t.index+1},\n",
" input_data=df[['string_col']],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df_check = t[cond].execute()\n",
"# set index, used as primary key\n",
"df_check.set_index('index', inplace=True, drop=False)\n",
"assert all(df_check['string_col'] == string_se)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment