Skip to content

Instantly share code, notes, and snippets.

View prmoore77's full-sized avatar

Philip Moore prmoore77

View GitHub Profile
@prmoore77
prmoore77 / write_csv_with_pyarrow.py
Last active September 24, 2024 14:43
Write out a CSV file with PyArrow
import os
from dotenv import load_dotenv
import pyarrow as pa
from pyarrow.fs import S3FileSystem
from pyarrow import csv
# Load environment
load_dotenv(dotenv_path="env.txt")
@prmoore77
prmoore77 / tar_and_gzip_dir.sh
Last active August 1, 2024 16:57
Tar and zip directory
#!/bin/bash
# Create the gzipped tar file with:
pushd /path/to/dir
tar -czf filename.tar.gz .
popd
# Extract the gzipped tar file with:
tar -xvf filename.tar.gz
@prmoore77
prmoore77 / install_aws_cli.sh
Created June 6, 2024 20:20
Install AWS CLI
# Install AWS CLI
pushd /tmp
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
./aws/install
popd
product_level_name product_node_name sum_sales_amount sum_unit_quantity distinct_customer_count count_of_fact_records
Total Products All Products 23.000 8.000 3 3
-------Category -------Candy 23.000 8.000 3 3
--------------UPC --------------Hershey Bar 18.000 6.000 2 2
--------------UPC --------------Nerds 5.000 2.000 1 1
SELECT -- Use LPAD here to indent the node information based upon its depth in the hierarchy
LPAD ('-', (products.ancestor_level_number - 1) * 7, '-')
|| products.ancestor_level_name AS product_level_name
, LPAD ('-', (products.ancestor_level_number - 1) * 7, '-')
|| products.ancestor_node_name AS product_node_name
-- Aggregates
, SUM (facts.sales_amount) AS sum_sales_amount
, SUM (facts.unit_quantity) AS sum_unit_quantity
, COUNT (DISTINCT facts.customer_id) AS distinct_customer_count
, COUNT (*) AS count_of_fact_records
product_ancestor_level_name product_ancestor_node_name product_descendant_level_name product_descendant_node_name customer_id sales_amount
Total Products All Products --------------UPC --------------Hershey Bar Lottie 15.000
Total Products All Products --------------UPC --------------Hershey Bar Phil 3.000
Total Products All Products --------------UPC --------------Nerds Kalie 5.000
-------Category -------Candy --------------UPC --------------Hershey Bar Lottie 15.000
-------Category -------Candy --------------UPC --------------Hershey Bar Phil 3.000
-------Category -------Candy --------------UPC --------------Nerds Kalie 5.000
--------------UPC --------------Hershey Bar --------------UPC --------------Hershey Bar Lottie 15.000
--------------UPC --------------Hershey Bar --------------UPC --------------Hershey Bar Phil 3.000
--------------UPC --------------Nerds --------------UPC --------------Nerds Kalie 5.000
SELECT
-- Product Ancestor Attributes
LPAD ('-', (products.ancestor_level_number - 1) * 7, '-')
|| products.ancestor_level_name AS product_ancestor_level_name
, LPAD ('-', (products.ancestor_level_number - 1) * 7, '-')
|| products.ancestor_node_name AS product_ancestor_node_name
-- Product Descendant Attributes
, LPAD ('-', (products.descendant_level_number - 1) * 7, '-')
|| products.descendant_level_name AS product_descendant_level_name
, LPAD ('-', (products.descendant_level_number - 1) * 7, '-')
SELECT
-- Product Dimension
LPAD (' ', (products.ancestor_level_number - 1) * 7, ' ')
|| products.ancestor_level_name AS product_level_name
, LPAD (' ', (products.ancestor_level_number - 1) * 7, ' ')
|| products.ancestor_node_name AS product_node_name
-- Geography Dimension
, LPAD (' ', (geographies.ancestor_level_number - 1) * 7, ' ')
|| geographies.ancestor_level_name AS geography_level_name
, LPAD (' ', (geographies.ancestor_level_number - 1) * 7, ' ')
product_level_name product_node_name sum_sales_amount sum_unit_quantity distinct_customer_count count_of_fact_records
Total Products All Products 33.000 24.000 5 8
-------Category -------Candy 23.000 8.000 3 3
--------------UPC --------------Hershey Bar 18.000 6.000 2 2
--------------UPC --------------Nerds 5.000 2.000 1 1
-------Category -------Produce 10.000 16.000 3 5
--------------UPC --------------Spinach 8.000 14.000 2 4
--------------UPC --------------Tomatoes 2.000 2.000 1 1
WITH rollup_aggregations AS (
SELECT CASE WHEN GROUPING (level_3_node_id) = 0
THEN products.level_3_node_id
WHEN GROUPING (level_2_node_id) = 0
THEN products.level_2_node_id
WHEN GROUPING (level_1_node_id) = 0
THEN products.level_1_node_id
END AS product_node_id
-- Aggregates
, SUM (facts.sales_amount) AS sum_sales_amount