Last active
August 29, 2015 14:06
-
-
Save dapangmao/b4ac940343498408c7a2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
import glob | |
import sas7bdat | |
class SASexport(sas7bdat.SAS7BDAT): | |
def head(self, n=5): | |
for i, row in enumerate(self.readData()): | |
print row | |
if i == n: | |
break | |
if n > self.header.rowcount: | |
print 'n exceeds the total number and here shows all rows!' | |
def meta(self): | |
print self.header | |
def to_sqlite(self, sqlitedb): | |
if sqlitedb is None or not isinstance(sqlitedb, str): | |
sqlitedb = ':memory' | |
print 'not valid output name and instead use in-memory database' | |
cols = self.header.cols | |
strs = [''] * len(cols) | |
for i, n in enumerate(cols): | |
if n.attr.type == "numeric": | |
strs[i] = n.name + ' real' | |
else: | |
strs[i] = n.name + ' varchar({})'.format(n.attr.length) | |
table = self.header.dataset.title() | |
cmd1 = "CREATE TABLE {} ({})".format(table, ', '.join(strs)) | |
cmd2 = 'INSERT INTO {} VALUES ( {} )'.format(table, | |
','.join(['?']*len(cols))) | |
conn = sqlite3.connect(sqlitedb) | |
c = conn.cursor() | |
for i, line in enumerate(self.readData()): | |
if i == 0: | |
c.execute('DROP TABLE IF EXISTS {}'.format(table)) | |
c.execute(cmd1) | |
else: | |
c.execute(cmd2, line) | |
conn.commit() | |
c.close() | |
def to_pandas(self): | |
try: | |
import pandas | |
except ImportError: | |
raise ImportError('No pandas moduleis available and this' | |
'method cannot used') | |
rst = [] | |
for i, x in enumerate(self.readData()): | |
if i == 0: | |
colnames = x | |
else: | |
rst.append(x) | |
df = pandas.DataFrame(data = rst, columns = colnames) | |
return df | |
class SASbatchexport: | |
def __init__(self, directory): | |
if directory is None or not isinstance(directory, str): | |
raise ValueError('SAS library path has to be specified') | |
self.directory = directory | |
def to_sqlitedb(self, dest=None): | |
"""Export all SAS data sets to a SQLite database""" | |
if dest is None or not isinstance(dest, str): | |
print 'The output SQLite db will be name as SASOUTPUT.db' | |
dest = 'SASOUTPUT.db' | |
s = self.directory + '/*.sas7bdat' | |
for sasfile in glob.glob(s): | |
_data = SASexport(sasfile) | |
_data.to_sqlite(dest) | |
print 'SAS dataset {} has been successfully exported'.format( \ | |
_data.header.dataset) | |
if __name__ == '__main__': | |
# Test the export feature | |
data = SASexport('d:/google/onedrive/class.sas7bdat') | |
data.head(30) | |
data.meta() | |
data.to_sqlite('d:/tmp/test3.db') | |
data = SASexport('d:/google/onedrive/prdsal2.sas7bdat') | |
data.head() | |
df = data.to_pandas() | |
print df | |
# Test the batch export feature | |
lib = SASbatchexport('d:/google/onedrive/') | |
lib.to_sqlitedb('d:/tmp/test.db') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey, just FYI, I forked your code and updated it to work with the current version of the sas7bdat module (which was rewritten last October)
https://gist.github.com/ottothecow/abb41d485e821b361e80