This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ETL example | |
on: | |
push: | |
branches: | |
- main | |
jobs: | |
extract-transform-load: | |
runs-on: ubuntu-latest | |
env: | |
PG_CONN_STRING: ${{ secrets.PG_CONN_STRING }} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@pytest.mark.parametrize( | |
"input, expected", | |
[ | |
('01001', '01001'), # 5-character string in, 5-character string out | |
('1001', '01001'), # 4-character string in, 5-character string out | |
(1001, '01001'), # int in, 5-character string out | |
(1001.0, '01001'), # float in, 5-character string out | |
('11001', '11001'), # Similar to before, but with two digit state code | |
(11001, '11001'), # Similar to before, but with two digit state code | |
(11001.0, '11001'), # Similar to before, but with two digit state code |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fips_cleaner(code): | |
"""Standardizes county FIPS codes as 5-digit strings.""" | |
return code.astype(str).str.extract('(^[^/.]*).*', expand=False).str.zfill(5) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def test_data(df, tests): | |
"""Run provided data tests on provided data.""" | |
results = [] | |
for test_func, failure_message in tests: | |
results.append(test_func(df.copy())) | |
if results[-1]: | |
logger.info(f'Data test {test_func.__name__} passed.') | |
else: | |
logger.error(f'Data test {test_func.__name__} failed. {failure_message}') | |
logger.info(f'{sum(results)}/{len(results)} passed.') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cases_vs_deaths(df): | |
"""Checks that death count is no more than case count.""" | |
return (df['deaths'] <= df['cases']).all() | |
def unique_records(df): | |
"""Checks that each date and FIPs combination is unique.""" | |
return df[['date', 'fips']].drop_duplicates().shape[0] == df.shape[0] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import pytest | |
def fips_cleaner(code): | |
"""Standardizes county FIPS codes as 5-digit strings.""" | |
return code.astype(str).str.extract('(^[^/.]*).*', expand=False).str.zfill(5) | |
@pytest.mark.parametrize( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##### | |
# Connect to bit.io | |
import bitdotio | |
b = bitdotio.bitdotio(<YOUR_BITIO_KEY>) | |
# You can call SDK methods directly from the b object | |
b.list_repos() | |
# The b object also provides access to a psycopg2 cursor for arbitrary SQL | |
conn = bit_conn.get_connection() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a repo | |
import bitdotio | |
# Connect to bit.io | |
b = bitdotio.bitdotio(<YOUR_BITIO_KEY>) | |
# Construct a repo object | |
r = bitdotio.model.repo.Repo(name='my_new_repo', | |
description='My new repository.', | |
is_private=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Connect to bit.io | |
import bitdotio | |
bit_conn = bitdotio.bitdotio(<YOUR_BITIO_KEY>) | |
# You can use the connection with the bit module, here we list repos | |
from bitdotio import bit | |
bit.list(bit_conn) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bitdotio | |
columns = ['datetime', 'location', 'sensor_id', 'pm_2_5', 'pm_10'] | |
record_list = [record[col] for col in columns] | |
bit = bitdotio.bitdotio(BITDOTIO_API_KEY) | |
# Replace the line below with your schema-qualified table name | |
qualified_table = '"air_quality_log_test/air_quality"."pm_measurements"' |