This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bitdotio | |
import pandas as pd | |
# This is to provide a reproducible csv, | |
# you can ignore and use your own csv | |
df_test = pd.DataFrame( | |
data=[[0, 1, 2], [3, 4, 5]], | |
columns=['a', 'b', 'c']) | |
df_test.to_csv('test.csv', index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
# This is to provide a reproducible csv, | |
# you can ignore and use your own csv | |
df_test = pd.DataFrame( | |
data=[[0, 1, 2], [3, 4, 5]], | |
columns=['a', 'b', 'c']) | |
df_test.to_csv('test.csv', index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Provides extraction functions. | |
Currently only supports GET from URL or local file. | |
""" | |
import io | |
import pandas as pd | |
import requests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Provides optional transform functions for different data sources.""" | |
import pandas as pd | |
def nyt_cases_counties(df): | |
"""Transforms NYT county-level COVID data""" | |
# Cast date as datetime | |
df['date'] = pd.to_datetime(df['date']) | |
# Store FIPS codes as standard 5 digit strings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Load pandas DataFrames to PostgreSQL on bit.io""" | |
from sqlalchemy import create_engine | |
def to_table(df, destination, pg_conn_string): | |
""" | |
Loads a pandas DataFrame to a bit.io database. | |
Parameters | |
---------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Truncated for Medium, see github.com/bitdotioinc/simple-pipeline | |
if __name__ == '__main__': | |
# Parse command line options and arguments | |
opts = [opt[1:] for opt in sys.argv[1:] if opt.startswith("-")] | |
local_source = 'local_source' in opts | |
opts = [opt for opt in opts if opt != 'local_source'] | |
args = [arg for arg in sys.argv[1:] if not arg.startswith("-")] | |
# Validation | |
if len(args) != len(opts) + 2: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""This is an example of a simple ETL pipeline for loading data into bit.io. | |
This example omits many best practices (e.g. logging, error handling, | |
parameterizatin + config files, etc.) for the sake of a brief, minimal example. | |
""" | |
import os | |
import sys | |
from dotenv import load_dotenv | |
import extract |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Activate the venv and navigate to the location of main.py | |
source venv/bin/activate | |
cd simple_pipeline | |
# Uncomment the line below if you would like to re-run the population data pipeline | |
# The population data is only updated annually by the Census Bureau | |
# python main.py -local_source -name acs_population_counties \ | |
# acs_5yr_population_data.csv bitdotio/simple_pipeline.population_counties | |
python main.py -name nyt_cases_counties \ | |
'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv' \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
45 09 * * * cd ~/Documents/simple_pipeline && ./scheduled_run.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python main.py -name <TRANSFORM_FUNCTION_NAME> '<DATA_SOURCE_URL>' '<USERNAME/REPO_NAME.DESTINATION_TABLE_NAME>' |
OlderNewer