-
-
Save kmatt/601a0d97d0632066ba48263e45102cc9 to your computer and use it in GitHub Desktop.
High-performance Pandas dataframe to SQL Server - uses pyodbc executemany with fast_executemany = True. This is an alternative to out-of-the-box Pandas df_to_sql, which is slow for larger dataframes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def df_to_sql_fast(df, table_name, numeric_columns, date_columns, append_or_replace, conn): | |
""" | |
Appends or overwrites a SQL Server table | |
using data from a Pandas DataFrame. | |
Submits df records at once for faster performance | |
compared to df_to_sql. | |
Parameters: | |
df (DataFrame): df used to create/append table | |
table_name (str): Name of existing SQL Server table | |
numeric_columns (list): List of numeric columns | |
date_columns (list): List of date or datetime columns | |
append_or_replace (str): Values 'append', 'replace' accepted | |
conn (object): pyodbc/pypyodbc connection object | |
Returns: | |
Nothing | |
""" | |
try: | |
cursor = conn.cursor() | |
if append_or_replace == 'replace': | |
cursor.execute('delete from ' + table_name) | |
cursor.fast_executemany = True | |
if numeric_columns: | |
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce', axis=1) | |
if date_columns: | |
# convert date columns first to datetime, then to string -- otherwise an error is raised | |
df[date_columns] = df[date_columns].apply(pd.to_datetime) | |
df[date_columns] = df[date_columns].astype(str) | |
# with cursor.fast_executemany enabled, empty datetime can't be inserted, so replace w/ 1/1/1900 | |
df[date_columns] = df[date_columns].replace({'NaT':'1900-01-01'}) | |
df.replace({np.nan: None}, inplace=True) | |
lstcol = ['[' + c + ']' for c in df] | |
strcols = ','.join(lstcol) | |
lstvals = '?' * len(df.columns) | |
strvals = ','.join(lstvals) | |
sql = 'insert into ' + table_name + ' (' + strcols + ') values (' + strvals + ')' | |
cursor.executemany(sql, df.values.tolist()) | |
cursor.commit() | |
cursor.close() | |
except: | |
raise |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment