Skip to content

Instantly share code, notes, and snippets.

View andrewdoss-bit's full-sized avatar

andrewdoss-bit

View GitHub Profile
@andrewdoss-bit
andrewdoss-bit / example-etl.yaml
Created September 28, 2021 01:30
Example workflow
name: ETL example
on:
push:
branches:
- main
jobs:
extract-transform-load:
runs-on: ubuntu-latest
env:
PG_CONN_STRING: ${{ secrets.PG_CONN_STRING }}
@andrewdoss-bit
andrewdoss-bit / fips_cleaner_pytest.py
Created September 23, 2021 18:05
Unit testing fips_cleaner
@pytest.mark.parametrize(
"input, expected",
[
('01001', '01001'), # 5-character string in, 5-character string out
('1001', '01001'), # 4-character string in, 5-character string out
(1001, '01001'), # int in, 5-character string out
(1001.0, '01001'), # float in, 5-character string out
('11001', '11001'), # Similar to before, but with two digit state code
(11001, '11001'), # Similar to before, but with two digit state code
(11001.0, '11001'), # Similar to before, but with two digit state code
@andrewdoss-bit
andrewdoss-bit / fips_cleaner.py
Created September 23, 2021 18:03
FIPS Cleaner
def fips_cleaner(code):
"""Standardizes county FIPS codes as 5-digit strings."""
return code.astype(str).str.extract('(^[^/.]*).*', expand=False).str.zfill(5)
@andrewdoss-bit
andrewdoss-bit / validation_test_wrapper.py
Last active September 23, 2021 01:18
validation_test_wrapper
def test_data(df, tests):
"""Run provided data tests on provided data."""
results = []
for test_func, failure_message in tests:
results.append(test_func(df.copy()))
if results[-1]:
logger.info(f'Data test {test_func.__name__} passed.')
else:
logger.error(f'Data test {test_func.__name__} failed. {failure_message}')
logger.info(f'{sum(results)}/{len(results)} passed.')
def cases_vs_deaths(df):
"""Checks that death count is no more than case count."""
return (df['deaths'] <= df['cases']).all()
def unique_records(df):
"""Checks that each date and FIPs combination is unique."""
return df[['date', 'fips']].drop_duplicates().shape[0] == df.shape[0]
@andrewdoss-bit
andrewdoss-bit / test_transform.py
Last active September 23, 2021 00:18
Unit testing a FIPS code cleaning function
import pandas as pd
import pytest
def fips_cleaner(code):
"""Standardizes county FIPS codes as 5-digit strings."""
return code.astype(str).str.extract('(^[^/.]*).*', expand=False).str.zfill(5)
@pytest.mark.parametrize(
@andrewdoss-bit
andrewdoss-bit / lp_python_snippet.py
Last active September 9, 2021 20:40
Landing page Python snippets
#####
# Connect to bit.io
import bitdotio
b = bitdotio.bitdotio(<YOUR_BITIO_KEY>)
# You can call SDK methods directly from the b object
b.list_repos()
# The b object also provides access to a psycopg2 cursor for arbitrary SQL
conn = bit_conn.get_connection()
@andrewdoss-bit
andrewdoss-bit / create_repo.py
Created September 9, 2021 19:22
create repo
# Create a repo
import bitdotio
# Connect to bit.io
b = bitdotio.bitdotio(<YOUR_BITIO_KEY>)
# Construct a repo object
r = bitdotio.model.repo.Repo(name='my_new_repo',
description='My new repository.',
is_private=True)
@andrewdoss-bit
andrewdoss-bit / bit_connect.py
Created September 9, 2021 19:18
bit connect
# Connect to bit.io
import bitdotio
bit_conn = bitdotio.bitdotio(<YOUR_BITIO_KEY>)
# You can use the connection with the bit module, here we list repos
from bitdotio import bit
bit.list(bit_conn)
@andrewdoss-bit
andrewdoss-bit / insert_record.py
Last active September 2, 2021 04:20
Insert record
import bitdotio
columns = ['datetime', 'location', 'sensor_id', 'pm_2_5', 'pm_10']
record_list = [record[col] for col in columns]
bit = bitdotio.bitdotio(BITDOTIO_API_KEY)
# Replace the line below with your schema-qualified table name
qualified_table = '"air_quality_log_test/air_quality"."pm_measurements"'