Skip to content

Instantly share code, notes, and snippets.

View andrewdoss-bit's full-sized avatar

andrewdoss-bit

View GitHub Profile
@andrewdoss-bit
andrewdoss-bit / bitdotio_csv_upload.py
Last active August 21, 2022 07:07
Upload csv to bit.io from Python w/ psycopg2
import bitdotio
import pandas as pd
# This is to provide a reproducible csv,
# you can ignore and use your own csv
df_test = pd.DataFrame(
data=[[0, 1, 2], [3, 4, 5]],
columns=['a', 'b', 'c'])
df_test.to_csv('test.csv', index=False)
@andrewdoss-bit
andrewdoss-bit / requests_csv_upload.py
Created August 10, 2021 23:04
Upload csv to bit.io from Python w/ requests
import requests
import pandas as pd
# This is to provide a reproducible csv,
# you can ignore and use your own csv
df_test = pd.DataFrame(
data=[[0, 1, 2], [3, 4, 5]],
columns=['a', 'b', 'c'])
df_test.to_csv('test.csv', index=False)
@andrewdoss-bit
andrewdoss-bit / extract.py
Last active August 12, 2021 19:15
Extract
"""Provides extraction functions.
Currently only supports GET from URL or local file.
"""
import io
import pandas as pd
import requests
@andrewdoss-bit
andrewdoss-bit / transform.py
Last active August 12, 2021 21:03
Transform
"""Provides optional transform functions for different data sources."""
import pandas as pd
def nyt_cases_counties(df):
"""Transforms NYT county-level COVID data"""
# Cast date as datetime
df['date'] = pd.to_datetime(df['date'])
# Store FIPS codes as standard 5 digit strings
@andrewdoss-bit
andrewdoss-bit / load.py
Last active August 12, 2021 20:38
Load
"""Load pandas DataFrames to PostgreSQL on bit.io"""
from sqlalchemy import create_engine
def to_table(df, destination, pg_conn_string):
"""
Loads a pandas DataFrame to a bit.io database.
Parameters
----------
# Truncated for Medium, see github.com/bitdotioinc/simple-pipeline
if __name__ == '__main__':
# Parse command line options and arguments
opts = [opt[1:] for opt in sys.argv[1:] if opt.startswith("-")]
local_source = 'local_source' in opts
opts = [opt for opt in opts if opt != 'local_source']
args = [arg for arg in sys.argv[1:] if not arg.startswith("-")]
# Validation
if len(args) != len(opts) + 2:
"""This is an example of a simple ETL pipeline for loading data into bit.io.
This example omits many best practices (e.g. logging, error handling,
parameterizatin + config files, etc.) for the sake of a brief, minimal example.
"""
import os
import sys
from dotenv import load_dotenv
import extract
@andrewdoss-bit
andrewdoss-bit / scheduled_run.sh
Last active August 13, 2021 01:05
Scheduled job
#!/bin/bash
# Activate the venv and navigate to the location of main.py
source venv/bin/activate
cd simple_pipeline
# Uncomment the line below if you would like to re-run the population data pipeline
# The population data is only updated annually by the Census Bureau
# python main.py -local_source -name acs_population_counties \
# acs_5yr_population_data.csv bitdotio/simple_pipeline.population_counties
python main.py -name nyt_cases_counties \
'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv' \
45 09 * * * cd ~/Documents/simple_pipeline && ./scheduled_run.sh
@andrewdoss-bit
andrewdoss-bit / pattern.sh
Created August 18, 2021 22:52
Shell command syntax for main.py
python main.py -name <TRANSFORM_FUNCTION_NAME> '<DATA_SOURCE_URL>' '<USERNAME/REPO_NAME.DESTINATION_TABLE_NAME>'