Created
January 17, 2022 19:08
-
-
Save bthaman/c0ed0d114446c36c8ebf23949add2848 to your computer and use it in GitHub Desktop.
High-performance Pandas dataframe to SQL Server - uses pyodbc executemany with fast_executemany = True. This is an alternative to out-of-the-box Pandas df_to_sql, which is slow for larger dataframes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def df_to_sql_fast(df, table_name, numeric_columns, date_columns, append_or_replace, conn): | |
""" | |
Appends or overwrites a SQL Server table | |
using data from a Pandas DataFrame. | |
Submits df records at once for faster performance | |
compared to df_to_sql. | |
Parameters: | |
df (DataFrame): df used to create/append table | |
table_name (str): Name of existing SQL Server table | |
numeric_columns (list): List of numeric columns | |
date_columns (list): List of date or datetime columns | |
append_or_replace (str): Values 'append', 'replace' accepted | |
conn (object): pyodbc/pypyodbc connection object | |
Returns: | |
Nothing | |
""" | |
try: | |
cursor = conn.cursor() | |
if append_or_replace == 'replace': | |
cursor.execute('delete from ' + table_name) | |
cursor.fast_executemany = True | |
if numeric_columns: | |
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce', axis=1) | |
if date_columns: | |
# convert date columns first to datetime, then to string -- otherwise an error is raised | |
df[date_columns] = df[date_columns].apply(pd.to_datetime) | |
df[date_columns] = df[date_columns].astype(str) | |
# with cursor.fast_executemany enabled, empty datetime can't be inserted, so replace w/ 1/1/1900 | |
df[date_columns] = df[date_columns].replace({'NaT':'1900-01-01'}) | |
df.replace({np.nan: None}, inplace=True) | |
lstcol = ['[' + c + ']' for c in df] | |
strcols = ','.join(lstcol) | |
lstvals = '?' * len(df.columns) | |
strvals = ','.join(lstvals) | |
sql = 'insert into ' + table_name + ' (' + strcols + ') values (' + strvals + ')' | |
cursor.executemany(sql, df.values.tolist()) | |
cursor.commit() | |
cursor.close() | |
except: | |
raise |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment