Skip to content

Instantly share code, notes, and snippets.

View nickefy's full-sized avatar

Nicholas nickefy

View GitHub Profile
@nickefy
nickefy / How I Built a Data Lakehouse with Delta Lake Architecture.py
Created September 15, 2023 08:20
How I Built a Data Lakehouse with Delta Lake Architecture
pip install delta-spark==2.4.0
pip install pyspark
import pyspark
from delta import *
builder = pyspark.sql.SparkSession.builder.appName("MyApp") \
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
from airflow import models
from airflow import DAG
from datetime import datetime, timedelta
from operators import DataSourceToCsv
from operators import CsvToBigquery
from operators import ExternalSensor
transformation_query_sample = """Select
a.user_id,
b.country,
from airflow import models
from airflow import DAG
from datetime import datetime, timedelta
from operators import DataSourceToCsv
from operators import CsvToBigquery
extract_query_source = """Select
a.user_id,
b.country,
a.revenue
from airflow import models
from airflow import DAG
from datetime import datetime, timedelta
from operators import DataSourceToCsv
from operators import CsvToBigquery
extract_query_source = """select * from transactions"""
default_dag_args = {
'start_date': datetime(2019, 5, 1, 7),
@nickefy
nickefy / SNP500_Expense_vs_Returns.py
Created July 20, 2021 10:50
How Much and How Long Should You Invest to Retire Early?
import pandas as pd
# Standard plotly imports
import plotly as py
import plotly.tools as tls
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import cufflinks
# Using plotly + cufflinks in offline mode
@nickefy
nickefy / expense_vs_returns.py
Created June 29, 2021 09:28
Simulation of Yearly Expensive and Annual Returns
import pandas as pd
# Standard plotly imports
import plotly as py
import plotly.tools as tls
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import cufflinks
# Using plotly + cufflinks in offline mode
@nickefy
nickefy / create_dag.py
Last active June 23, 2021 03:16
Create DAG Operator for Airflow Framework
import os,sys,glob
from pathlib import Path
import json
from datetime import datetime, timedelta
import time
from airflow import models
from airflow.utils import trigger_rule
from airflow import DAG, AirflowException
from airflow.operators.dummy_operator import DummyOperator
@nickefy
nickefy / sample_dag.py
Created June 7, 2021 06:32
sample DAG file for Airflow Framework
from airflow import DAG
from operators.create_dag_operator import CreateDagOperator
default_dag_args = {
'owner': 'airflow',
}
dag = CreateDagOperator.create_dag('Hello_world',default_dag_args)
@nickefy
nickefy / task_config.json
Last active June 23, 2021 03:17
task config for Airflow Framework
{
"destination": [
{
"task_name": "Print Hello World 1",
"destination_action" : "print",
"text_to_print": "Hello World!"
},
{
"task_name": "Print Hello World 2",
"destination_action" : "print",
@nickefy
nickefy / job_config.json
Created June 7, 2021 06:31
job config for Airflow Framework
{
"job_name": "Hello_World!",
"retries": "0",
"start_date": "2020-02-27",
"email": ["[email protected]"],
"schedule_interval": "None"
}