First run this in snowflake:
(
src variant
)
AS SELECT parse_json(column1) as src
FROM values
('{
"topleveldate" : "2017-04-28",
| import modin.pandas as pd | |
| import snowflake.snowpark.modin.plugin | |
| from prophet import Prophet | |
| def model( dbt, session ): | |
| dbt.config( | |
| materialized="table", # the incremental materialization is also supported | |
| packages=['pandas==2.2.1','modin==0.28.1','Prophet','holidays==0.18','snowflake-snowpark-python[modin]'], # how to import python libraries in dbt's context | |
| python_version="3.11" |
| import enum | |
| import os | |
| import time | |
| # Be sure to `pip install requests` in your python environment | |
| import requests | |
| ACCOUNT_ID = 39 | |
| JOB_ID = 302 |
| {% materialization incremental_lastrun_auto, adapter='snowflake' -%} | |
| {% set original_query_tag = set_query_tag() %} | |
| {%- set unique_key = config.get('unique_key') -%} | |
| {%- set full_refresh_mode = (should_full_refresh()) -%} | |
| {% set target_relation = this %} | |
| {% set existing_relation = load_relation(this) %} | |
| {% set tmp_relation = make_temp_relation(this) %} |
| #example from https://towardsdatascience.com/reduce-warehouse-space-with-the-pareto-principle-using-python-e722a6babe0e | |
| #optimizing warehouse location based on sku frequency using pareto 80/20 principle | |
| import pandas as pd | |
| def model(dbt, session): | |
| dbt.config( | |
| materialized="table", | |
| packages=["pandas"] #https://repo.anaconda.com/pkgs/snowflake/ |
| {% macro add_search_optimization(relation,columns) %} | |
| {# check if the relation has search optimization added #} | |
| {%- call statement('search_optimization', fetch_result=True) -%} | |
| describe search optimization on {{ relation }} | |
| {%- endcall %} | |
| {% if execute %} | |
| {% set result = load_result('search_optimization') %} | |
| {%- set result_data = result['data'] -%} |
| === | |
| Compiled SQL: | |
| select | |
| EXTRACT(YEAR from ship_date), | |
| sum(case when ship_mode = 'FOB' then gross_item_sales_amount end) as FOB_A_amount, | |
| sum(case when ship_mode = 'TRUCK' then gross_item_sales_amount end) as TRUCK_N_amount, | |
| sum(case when ship_mode = 'MAIL' then gross_item_sales_amount end) as MAIL_N_amount, | |
| sum(case when ship_mode = 'SHIP' then gross_item_sales_amount end) as SHIP_R_amount, |
| /* Create a pivot table with dynamic columns based on the ship modes that are in the system */ | |
| {%- call statement('result', fetch_result=True) -%} | |
| {# this pulls the unique ship modes from the fct_order_items table #} | |
| select ship_mode from {{ ref('fct_order_items') }} group by 1 | |
| {%- endcall %} | |
| {% set ship_modes = load_result('result').table.columns[0].values() %} |
First run this in snowflake:
(
src variant
)
AS SELECT parse_json(column1) as src
FROM values
('{
"topleveldate" : "2017-04-28",
| models: | |
| - name: dim_customers | |
| description: Customer dimensions table | |
| columns: | |
| - name: customer_key | |
| description: Primary key on the customers table | |
| tests: | |
| - unique | |
| - not_null | |
| - name: region |