Last active
November 21, 2018 16:45
-
-
Save luthfianto/28bf95b8ddc80010cf7852ef4fc85927 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const sqlite3 = require("sqlite3") | |
class SqliteDF { | |
table: any; | |
created: boolean; | |
con: any; | |
max_index: number; | |
_group_cols: any; | |
verbose: boolean; | |
_cache: any; | |
constructor(con = null, table = null, group_cols = null, cache = null, verbose = false) { | |
` | |
Keyword arguments: | |
con -- A SQLite connection object. | |
table -- the name of the table associated with this dataframe | |
group_cols -- for grouping operations, the set of columns we are grouping by | |
cache -- shared across instances in order to memoize select statements | |
verbose - print SQL statements. | |
` | |
if (!table) { | |
this.table = this._make_table_name() | |
this.created = false | |
} else { | |
this.table = table | |
this.created = true | |
} | |
if (!con) { | |
this.con = sqlite3.connect(':memory:') | |
this.max_index = 0 | |
} else { | |
this.con = con | |
this.max_index = this.con.cursor().execute('select max(ROWID) from {}'.format(this.table)).fetchone()[0] | |
} | |
this._group_cols = group_cols | |
this._cache = cache|| {} | |
this.verbose = verbose | |
} | |
private toString() { | |
return 'SqliteDF({} x {})'.format(this.max_index, len(this.keys())) | |
} | |
private __setitem__(this, key, values) { | |
`Primary way to get data in. Sets one column, which must be aligned with existing columns. | |
Arguments: | |
key -- the name of the column to create | |
value -- an iterable of values the column takes, must be ordered | |
` | |
if (this.created) { | |
this.con.execute('alter table {} add column {} int'.format(this.table, key)) | |
} else { | |
this.con.cursor().execute('create table {} ({} int)'.format(this.table, key)) | |
this.created = true | |
} | |
if (len(values) > this.max_index) { | |
to_insert = [(i,) for i in range(this.max_index, len(values))] | |
this.con.cursor().executemany('insert into {} (ROWID) values (?)'.format(this.table, key), to_insert) | |
this.max_index = len(values) | |
} | |
this.con.cursor().executemany('update {} set {}=? where ROWID=?'.format(this.table, key), | |
[(v, i) for i, v in enumerate(values)]) | |
} | |
private keys() { | |
`Return all columns in the data frame.` | |
const tables = this.con.cursor().execute("pragma table_info({})".format(this.table)).fetchall() | |
return [t[1] for t in tables] | |
} | |
private __getitem__(this, key) { | |
`Get an individual column from the data frame.` | |
const rows = this.con.cursor().execute('select {} from {} order by ROWID'.format(key, this.table)).fetchall() | |
return [row[0] for row in rows] | |
} | |
private print(...args: any[]) { | |
`Render the data frame to a string and print.` | |
const rows = this.con.cursor().execute('select * from {}'.format(this.table)).fetchall() | |
print(tabulate(rows, headers = this.keys())) | |
} | |
private tabulate(...args: any[]) { | |
return args; | |
} | |
private _make_table_name() { | |
`Generate a random name for a create table statement.` | |
return ''.join(random.choices(string.ascii_letters[: 26], k = 10)) | |
} | |
private _create_table(this, select, memoize=true){ | |
`Create a new table and return a data frame linked to it. | |
Arguments: | |
select -- a query that selects data from the current set of available tables. | |
memoize -- whether to cache the select query or re-run it. | |
` | |
if (memoize && select in this._cache){ | |
const table = this._cache[select] | |
return SqliteDF(this.con, table, cache=this._cache)} | |
const table = this._make_table_name() | |
const q = 'create table {} as {}'.format(table, select) | |
if (this.verbose) | |
print(select) | |
this.con.cursor().execute(q) | |
if (memoize) | |
this._cache[select] = table | |
return new SqliteDF(this.con, table, this._cache) | |
} | |
public head(n: number=5){ | |
const q = 'select * from {} order by ROWID limit {}'.format(this.table, n) | |
return this._create_table(q) | |
} | |
private tail(n: number=5){ | |
const q = 'select * from {} order by ROWID desc limit {}'.format(this.table, n) | |
return this._create_table(q) | |
} | |
// private group_by(this, *columns): | |
// `Returns the same data frame with group columns added.` | |
// return SqliteDF(this.con, this.table, group_cols=columns, cache=this._cache) | |
// private ungroup(){} | |
// `Returns the same data frame with group columns removed.` | |
// return SqliteDF(this.con, this.table, group_cols=null, cache=this._cache) | |
// private count(){} | |
// if not this._group_cols: | |
// q = 'select count(1) as n from {}'.format(this.table) | |
// else: | |
// q = 'select {}, count(1) as n from {} group by {}'.format( | |
// ', '.join(this._group_cols), | |
// this.table, | |
// ', '.join(this._group_cols), | |
// ) | |
// return this._create_table(q) | |
// private summarise(this, **exprs): | |
// summaries_list = [] | |
// for key, value in exprs.items(): | |
// summaries_list.append('{} as {}'.format(value, key)) | |
// summaries = ', '.join(summaries_list) | |
// if not this._group_cols: | |
// q = 'select {} from {}'.format(summaries, this.table) | |
// else: | |
// q = 'select {}, {} from {} group by {}'.format( | |
// ', '.join(this._group_cols), | |
// summaries, | |
// this.table, | |
// ', '.join(this._group_cols), | |
// ) | |
// return this._create_table(q) | |
// private select(this, *columns): | |
// q = 'select {} from {}'.format( | |
// ', '.join(columns), | |
// this.table, | |
// ) | |
// return this._create_table(q) | |
// private mutate(this, **exprs): | |
// mutations_list = [] | |
// for key, value in exprs.items(): | |
// mutations_list.append('{} as {}'.format(value, key)) | |
// keys = this.keys() | |
// mutations = ', '.join(mutations_list) | |
// q = 'select {}, {} from {}'.format( | |
// ', '.join(keys), | |
// mutations, | |
// this.table, | |
// ) | |
// return this._create_table(q) | |
// private inner_join(this, other_df): | |
// left_keys = this.keys() | |
// right_keys = other_df.keys() | |
// overlap = [lk for lk in left_keys if lk in right_keys] | |
// on_statement = ' and '.join('a.{0} = b.{0}'.format(col) for col in overlap) | |
// cols_to_select = ( | |
// ['a.{}'.format(o) for o in overlap] | |
// + ['a.{}'.format(lk) for lk in left_keys if lk not in overlap] | |
// + ['b.{}'.format(rk) for rk in right_keys if (rk not in overlap) and (rk not in left_keys)] | |
// ) | |
// q = 'select {} from {} a inner join {} b on {}'.format( | |
// ', '.join(cols_to_select), | |
// this.table, | |
// other_df.table, | |
// on_statement, | |
// ) | |
// return this._create_table(q) | |
// private bind_rows(this, other_df): | |
// left_keys = this.keys() | |
// right_keys = other_df.keys() | |
// overlap = [lk for lk in left_keys if lk in right_keys] | |
// cols = ', '.join(overlap) | |
// q = 'select {} FROM {} UNION ALL SELECT {} FROM {}'.format( | |
// cols, | |
// this.table, | |
// cols, | |
// other_df.table, | |
// ) | |
// return this._create_table(q) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment