Skip to content

Instantly share code, notes, and snippets.

@dapangmao
Last active August 29, 2015 14:06
Show Gist options
  • Save dapangmao/b4ac940343498408c7a2 to your computer and use it in GitHub Desktop.
Save dapangmao/b4ac940343498408c7a2 to your computer and use it in GitHub Desktop.
import sqlite3
import glob
import sas7bdat
class SASexport(sas7bdat.SAS7BDAT):
def head(self, n=5):
for i, row in enumerate(self.readData()):
print row
if i == n:
break
if n > self.header.rowcount:
print 'n exceeds the total number and here shows all rows!'
def meta(self):
print self.header
def to_sqlite(self, sqlitedb):
if sqlitedb is None or not isinstance(sqlitedb, str):
sqlitedb = ':memory'
print 'not valid output name and instead use in-memory database'
cols = self.header.cols
strs = [''] * len(cols)
for i, n in enumerate(cols):
if n.attr.type == "numeric":
strs[i] = n.name + ' real'
else:
strs[i] = n.name + ' varchar({})'.format(n.attr.length)
table = self.header.dataset.title()
cmd1 = "CREATE TABLE {} ({})".format(table, ', '.join(strs))
cmd2 = 'INSERT INTO {} VALUES ( {} )'.format(table,
','.join(['?']*len(cols)))
conn = sqlite3.connect(sqlitedb)
c = conn.cursor()
for i, line in enumerate(self.readData()):
if i == 0:
c.execute('DROP TABLE IF EXISTS {}'.format(table))
c.execute(cmd1)
else:
c.execute(cmd2, line)
conn.commit()
c.close()
def to_pandas(self):
try:
import pandas
except ImportError:
raise ImportError('No pandas moduleis available and this'
'method cannot used')
rst = []
for i, x in enumerate(self.readData()):
if i == 0:
colnames = x
else:
rst.append(x)
df = pandas.DataFrame(data = rst, columns = colnames)
return df
class SASbatchexport:
def __init__(self, directory):
if directory is None or not isinstance(directory, str):
raise ValueError('SAS library path has to be specified')
self.directory = directory
def to_sqlitedb(self, dest=None):
"""Export all SAS data sets to a SQLite database"""
if dest is None or not isinstance(dest, str):
print 'The output SQLite db will be name as SASOUTPUT.db'
dest = 'SASOUTPUT.db'
s = self.directory + '/*.sas7bdat'
for sasfile in glob.glob(s):
_data = SASexport(sasfile)
_data.to_sqlite(dest)
print 'SAS dataset {} has been successfully exported'.format( \
_data.header.dataset)
if __name__ == '__main__':
# Test the export feature
data = SASexport('d:/google/onedrive/class.sas7bdat')
data.head(30)
data.meta()
data.to_sqlite('d:/tmp/test3.db')
data = SASexport('d:/google/onedrive/prdsal2.sas7bdat')
data.head()
df = data.to_pandas()
print df
# Test the batch export feature
lib = SASbatchexport('d:/google/onedrive/')
lib.to_sqlitedb('d:/tmp/test.db')
@ottothecow
Copy link

Hey, just FYI, I forked your code and updated it to work with the current version of the sas7bdat module (which was rewritten last October)

https://gist.github.com/ottothecow/abb41d485e821b361e80

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment