Skip to content

Instantly share code, notes, and snippets.

View nickefy's full-sized avatar

Nicholas nickefy

View GitHub Profile
@nickefy
nickefy / DataSourceToCsv.py
Last active June 20, 2019 09:13
Apache Airflow WriteToCsv Operator
# airflow related
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
# other packages
from datetime import datetime, timedelta
from os import environ
import csv
class DataSourceToCsv(BaseOperator):
"""
@nickefy
nickefy / Sample Extraction Dag.py
Last active June 21, 2019 02:59
Sample Extraction Dag (Apache Airflow)
# airflow related
from airflow import models
from airflow import DAG
# other packages
from datetime import datetime, timedelta
# import operators from the 'operators' file
from operators import DataSourceToCsv
default_dag_args = {
# Setting start date as yesterday starts the DAG immediately when it is
@nickefy
nickefy / EmailToGCS.py
Created July 6, 2019 17:39
Gmail To GCS Operator
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
import os
from os import environ
from datetime import timedelta
import getpass, imaplib
import sys
import string
@nickefy
nickefy / StorageToBQ.py
Created July 7, 2019 05:45
Airflow Operator for loading data from GCS
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from os import environ
from datetime import timedelta
from google.cloud import bigquery
import pandas as pd
import logging
import os
@nickefy
nickefy / CheckDupBQ.py
Created July 7, 2019 11:12
Airflow Operator for checking duplication in Google BQ
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from os import environ
from google.cloud import bigquery
from datetime import datetime, timedelta
import logging
import time
class CheckBQDuplication(BaseOperator):
@nickefy
nickefy / Gmail Data Pipeline.py
Created July 7, 2019 16:03
Airflow DAG file for Gmail Data Pipeline
from airflow.exceptions import AirflowException
from airflow.operators import ShortCircuitOperator
from airflow import models
from airflow import DAG
from operators import GmailToGCS
from operators import StorageToBQ
from operators import CheckDupBQ
from operators import WriteLogs
from operators import SendEmail
from airflow.utils.email import send_email
@nickefy
nickefy / Google Drive Data Pipeline.js
Last active July 30, 2019 09:43
Code for Google Drive Data Pipeline in Google App Script
function scan_files() {
var folder = DriveApp.getFolderById(folderID);
var processedfolder = DriveApp.getFolderById(processedfolderId);
var files = folder.getFiles();
while (files.hasNext()) {
var file = files.next();
if (file.getName().substring(0, 10) != 'processed_' || file.getName().substring(0, 7) != 'loaded_') {
loadCSVfromdrive(file);
}
}
@nickefy
nickefy / airflow sensors.py
Last active May 17, 2022 15:35
example dag for sensors
from airflow.exceptions import AirflowException
from airflow import models
from airflow import DAG
from datetime import datetime, timedelta
from airflow.utils.email import send_email
from dateutil.relativedelta import relativedelta
import os
schedule_interval_dag = timedelta(days=1)
@nickefy
nickefy / A Guide to Plotly Dash Interactive Visualizations.py
Last active November 8, 2020 16:11
Python For Data Science - A Guide to Plotly Dash Interactive Visualizations
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
import pandas as pd
from dash.dependencies import Input, Output
from numpy import random
app = dash.Dash()
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import plotly.graph_objs as go
from numpy import random
app = dash.Dash()
# initiate dataframe