Skip to content

Instantly share code, notes, and snippets.

View andrewdoss-bit's full-sized avatar

andrewdoss-bit

View GitHub Profile
"""This is an example of a simple ETL pipeline for loading data into bit.io.
This example omits many best practices (e.g. logging, error handling,
parameterizatin + config files, etc.) for the sake of a brief, minimal example.
"""
import os
import sys
from dotenv import load_dotenv
import extract
# Truncated for Medium, see github.com/bitdotioinc/simple-pipeline
if __name__ == '__main__':
# Parse command line options and arguments
opts = [opt[1:] for opt in sys.argv[1:] if opt.startswith("-")]
local_source = 'local_source' in opts
opts = [opt for opt in opts if opt != 'local_source']
args = [arg for arg in sys.argv[1:] if not arg.startswith("-")]
# Validation
if len(args) != len(opts) + 2:
@andrewdoss-bit
andrewdoss-bit / load.py
Last active August 12, 2021 20:38
Load
"""Load pandas DataFrames to PostgreSQL on bit.io"""
from sqlalchemy import create_engine
def to_table(df, destination, pg_conn_string):
"""
Loads a pandas DataFrame to a bit.io database.
Parameters
----------
@andrewdoss-bit
andrewdoss-bit / transform.py
Last active August 12, 2021 21:03
Transform
"""Provides optional transform functions for different data sources."""
import pandas as pd
def nyt_cases_counties(df):
"""Transforms NYT county-level COVID data"""
# Cast date as datetime
df['date'] = pd.to_datetime(df['date'])
# Store FIPS codes as standard 5 digit strings
@andrewdoss-bit
andrewdoss-bit / extract.py
Last active August 12, 2021 19:15
Extract
"""Provides extraction functions.
Currently only supports GET from URL or local file.
"""
import io
import pandas as pd
import requests
@andrewdoss-bit
andrewdoss-bit / requests_csv_upload.py
Created August 10, 2021 23:04
Upload csv to bit.io from Python w/ requests
import requests
import pandas as pd
# This is to provide a reproducible csv,
# you can ignore and use your own csv
df_test = pd.DataFrame(
data=[[0, 1, 2], [3, 4, 5]],
columns=['a', 'b', 'c'])
df_test.to_csv('test.csv', index=False)
@andrewdoss-bit
andrewdoss-bit / bitdotio_csv_upload.py
Last active August 21, 2022 07:07
Upload csv to bit.io from Python w/ psycopg2
import bitdotio
import pandas as pd
# This is to provide a reproducible csv,
# you can ignore and use your own csv
df_test = pd.DataFrame(
data=[[0, 1, 2], [3, 4, 5]],
columns=['a', 'b', 'c'])
df_test.to_csv('test.csv', index=False)