Last active
January 29, 2022 14:11
-
-
Save martinyung/6d9d126cec4c63d9c95a96129eee3862 to your computer and use it in GitHub Desktop.
python etl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def etl(query, source_cnx, target_cnx): | |
# extract data from source db | |
source_cursor = source_cnx.cursor() | |
source_cursor.execute(query.extract_query) | |
data = source_cursor.fetchall() | |
source_cursor.close() | |
# load data into warehouse db | |
if data: | |
target_cursor = target_cnx.cursor() | |
target_cursor.execute("USE {}".format(datawarehouse_name)) | |
target_cursor.executemany(query.load_query, data) | |
print('data loaded to warehouse db') | |
target_cursor.close() | |
else: | |
print('data is empty') | |
def etl_process(queries, target_cnx, source_db_config, db_platform): | |
# establish source db connection | |
if db_platform == 'mysql': | |
source_cnx = mysql.connector.connect(**source_db_config) | |
elif db_platform == 'sqlserver': | |
source_cnx = pyodbc.connect(**source_db_config) | |
elif db_platform == 'firebird': | |
source_cnx = fdb.connect(**source_db_config) | |
else: | |
return 'Error! unrecognised db platform' | |
# loop through sql queries | |
for query in queries: | |
etl(query, source_cnx, target_cnx) | |
# close the source db connection | |
source_cnx.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment