Skip to content

Instantly share code, notes, and snippets.

@stonebig
Created December 18, 2013 21:32
Show Gist options
  • Save stonebig/8030208 to your computer and use it in GitHub Desktop.
Save stonebig/8030208 to your computer and use it in GitHub Desktop.
make sql easier for bare python users
# -*- coding: utf-8 -*-
import re
import sqlite3 as sqlite
import numpy as np
import pandas as pd
from pandas.io.sql import write_frame, execute
class baresql(object):
"""
baresql allows you to query in sql any of your python datas.
in the sql :
- '$s' refers to the variable 's'
- 'l$$' refers to the table created with the list/array/dictionnary 'l'
columns of 'l$$' will be l$$.c0 ... l$$.cN (unless pre-defined)
example :
#create the object
from baresql import baresql
bsql=baresql()
user = [(i, "user N°"+str(i)) for i in range(7)]
limit = 4
sql="select * from user$$ where c0 <= $limit"
bsql.df(sql,locals())
baresql re-use or re-implement parts of the code of
. github.com/yhat/pandasql (MIT licence, Copyright 2013 Yhat, Inc)
. github.com/pydata/pandas (BSD simplified licence
Copyright 2011-2012, Lambda Foundry, Inc. and PyData Development Team)
"""
def __init__(self, connection="sqlite://"):
"""
conn = connection string
example :
"sqlite://" = sqlite in memory
"sqlite:///.baresql.db" = sqlite on disk database ".baresql.db"
"""
#identify sql engine and database
self.connection = connection
self.engine = connection.split("://")[0]
if self.engine == "sqlite" or self.engine == "mysql":
self.dbname = "/".join(((connection+"").split("/")[3:]))
if self.dbname.strip() == "":
self.dbname=":memory:"
else:
print (self.engine)
raise Exception("Only sqlite and mysql are supported yet")
#realize connexion
self.conn = sqlite.connect(self.dbname,
detect_types = sqlite.PARSE_DECLTYPES)
self.tmp_tables = []
def close(self):
"for memory scared"
self.conn.close
def remove_tmp_tables(self):
"for memory scared"
for table_sql in self.tmp_tables:
pre_q=" ;DROP TABLE IF EXISTS [%s] ;" % table_sql
cur = execute(pre_q, self.conn, params=env)
def _ensure_data_frame(self, obj, name):
"""
obj a python object to be converted to a DataFrame
take an object and make sure that it's a pandas data frame
"""
#we accept pandas Dataframe, and also dictionaries, lists, tuples
#we'll just convert them to Pandas Dataframe
if isinstance(obj, pd.DataFrame):
df = obj
elif isinstance(obj, (tuple, list)) :
#tuple and list case
if len(obj) == 0:
return pd.Dataframe()
firstrow = obj[0]
if isinstance(firstrow, (tuple, list)):
#multiple-columns
colnames = ["c%d" % i for i in range(len(firstrow))]
df = pd.DataFrame(obj, columns=colnames)
else:
#mono-column
df = pd.DataFrame(obj, columns = ["c0"])
elif isinstance(obj, dict) :
#dictionary case
df = pd.DataFrame([(k,v) for k, v in obj.items()],
columns = ["c0","c1"])
if not isinstance(df, pd.DataFrame) :
raise Exception("%s is no Dataframe/Tuple/List/Dictionary" % name)
for col in df:
if df[col].dtype == np.int64:
df[col] = df[col].astype(np.float)
return df
def _extract_table_names(self, q, env):
"""
extracts table names from a sql query whose :
- name if postfixed by $$,
- name is found in given 'env' environnement
example : "select * from a$$, b$$, a$$" may return ['a', 'b']
"""
tables = set()
next_is_table = False
for query in q.split("$$"):
table_candidate = query.split(' ')[-1]
if table_candidate in env:
tables.add(table_candidate)
self.tmp_tables=list(set(tables))
return self.tmp_tables
def _write_table(self, tablename, df, conn):
"writes a dataframe to the sqlite database"
for col in df.columns:
if re.search("[() ]", col):
msg = "please follow SQLite column naming conventions: "
msg += "http://www.sqlite.org/lang_keywords.html"
raise Exception(msg)
write_frame(df, name = tablename, con = self.conn, flavor = 'sqlite')
def cur(self, q, env):
"""
query python or sql datas, returns a cursor of last instruction
q: sql instructions, with
$x refering to a variable 'x' defined in the dictionnary
x$$ refering to a table created with the variable 'x'
env: dictionnary of variables available to the sql instructions
locals() and globals() are your python local/global variables
dict(globals(),**locals()) is the default python view of variables
"""
tables = self._extract_table_names(q, env)
for table_ref in tables:
table_sql=table_ref+"$$"
df = env[table_ref]
df = self._ensure_data_frame(df, table_ref)
#pre_destroy temporary table
pre_q=" ;DROP TABLE IF EXISTS [%s] ;" % table_sql
cur = execute(pre_q, self.conn, params=env)
self._write_table( table_sql, df, self.conn)
#multiple sql must be separated per ; and a new line
for q_single in q.split(';\n') :
cur = execute(q_single, self.conn, params=env)
return cur
def rows(self, q, env):
"same as .cur , but returns a list of rows"
result=self.cur( q, env).fetchall()
self.remove_tmp_tables
return result
def df(self, q, env):
"same as .cur , but returns a pandas dataframe"
cur=self.cur( q, env)
result = None
rows = cur.fetchall()
if not isinstance(rows, list):
rows = list(rows)
if cur.description is not None:
columns = [col_desc[0] for col_desc in cur.description]
result = pd.DataFrame(rows, columns=columns)
self.remove_tmp_tables
return result
if __name__ == '__main__':
#create the object
bsql=baresql() # in memory
user = [(i, "user N°"+str(i)) for i in range(5)]
limit = 2
sql="select * from user$$ where c0 <= $limit"
print (bsql.df(sql,locals()))
#more sophisticate
bsql=baresql("sqlite:///.baresql.db") # on disk
bsqldf = lambda q: bsql.df(q, dict(globals(),**locals()))
sql='''drop table if exists winner;
create table winner as
select c0 No, c1 Name from user$$ where c0 > $limit ;
select * from winner'''
print ( bsqldf(sql) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment