Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save rifkiamil/a97be0ea53de735f8a4ff984ce699485 to your computer and use it in GitHub Desktop.
Save rifkiamil/a97be0ea53de735f8a4ff984ce699485 to your computer and use it in GitHub Desktop.
Template for Azure Synapse Dedicated Pool Table Creation from Query with CLUSTERED COLUMNSTORE INDEX ORDER
-- Template for Azure Synapse Dedicated Pool Table Creation from Query with CLUSTERED COLUMNSTORE INDEX ORDER
-- Schema: testing_rif_k
-- Goal: Make easy to find the commands that only work on Azure Synapse Dedicated Pool ( SQL DW )
CREATE TABLE testing_rif_k.banking_data
WITH
(
DISTRIBUTION = HASH(AccountId), -- Recommended for even data distribution based on a key
CLUSTERED COLUMNSTORE INDEX ORDER (TransactionDate DESC) -- Good for analytical queries on date ranges
-- DISTRIBUTION = ROUND_ROBIN, -- Useful for small tables or when no clear distribution key exists
-- DISTRIBUTION = REPLICATE -- Suitable for small, frequently joined tables
)
AS
WITH CustomerBankingData AS (
SELECT
CAST(c.CustomerId AS NVARCHAR(36)) AS CustomerId,
c.CustomerName,
c.AccountOpeningDate,
-- Potentially sensitive, consider masking or encryption
c.AccountNumber,
c.AccountType,
CAST(b.AccountId AS NVARCHAR(36)) AS AccountId,
ROW_NUMBER() OVER (PARTITION BY c.CustomerId ORDER BY (SELECT NULL)) AS rn_all_same, -- Arbitrary single row per customer
ROW_NUMBER() OVER (PARTITION BY c.CustomerId ORDER BY c.AccountOpeningDate) AS rn_by_date
FROM
testing_rif_k.Customers c -- Assuming a Customers table exists
JOIN testing_rif_k.Accounts b ON c.CustomerId = b.CustomerId
-- WHERE /* Add any filtering conditions here */
),
TransactionBankingData AS (
SELECT
CAST(t.TransactionId AS NVARCHAR(36)) AS TransactionId,
CAST(t.AccountId AS NVARCHAR(36)) AS AccountId,
t.TransactionDate,
t.TransactionType,
t.TransactionAmount,
t.TransactionCurrency,
ROW_NUMBER() OVER (PARTITION BY t.AccountId ORDER BY (SELECT NULL)) AS rn_all_same, -- Arbitrary single transaction per account
ROW_NUMBER() OVER (PARTITION BY t.AccountId, t.TransactionDate, t.TransactionAmount ORDER BY t.TransactionId) AS rn_by_details
FROM
testing_rif_k.Transactions t -- Assuming a Transactions table exists
-- WHERE /* Add any filtering conditions here */
),
LoanBankingData AS (
SELECT
CAST(l.LoanId AS NVARCHAR(36)) AS LoanId,
CAST(l.AccountId AS NVARCHAR(36)) AS AccountId,
l.LoanAmount,
l.InterestRate,
l.LoanStartDate,
l.LoanEndDate,
ROW_NUMBER() OVER (PARTITION BY l.AccountId ORDER BY (SELECT NULL)) AS rn_all_same, -- Arbitrary single loan per account
ROW_NUMBER() OVER (PARTITION BY l.AccountId, l.LoanId ORDER BY l.LoanStartDate DESC) AS rn_by_date
FROM
testing_rif_k.Loans l -- Assuming a Loans table exists
-- WHERE /* Add any filtering conditions here */
)
SELECT
cbd.CustomerId,
cbd.CustomerName,
cbd.AccountOpeningDate,
cbd.AccountNumber,
cbd.AccountType,
cbd.AccountId,
NULL AS TransactionId,
NULL AS TransactionDate,
NULL AS TransactionType,
NULL AS TransactionAmount,
NULL AS TransactionCurrency,
NULL AS LoanId,
NULL AS LoanAmount,
NULL AS InterestRate,
NULL AS LoanStartDate,
NULL AS LoanEndDate
FROM CustomerBankingData cbd
WHERE cbd.rn_by_date = 1 -- Keeping the first account opening date per customer (example deduplication)
UNION ALL
SELECT
NULL AS CustomerId,
NULL AS CustomerName,
NULL AS AccountOpeningDate,
NULL AS AccountNumber,
NULL AS AccountType,
tbd.AccountId,
tbd.TransactionId,
tbd.TransactionDate,
tbd.TransactionType,
tbd.TransactionAmount,
tbd.TransactionCurrency,
NULL AS LoanId,
NULL AS LoanAmount,
NULL AS InterestRate,
NULL AS LoanStartDate,
NULL AS LoanEndDate
FROM TransactionBankingData tbd
WHERE tbd.rn_by_details = 1 -- Keeping the first transaction based on ID for unique account/date/amount
UNION ALL
SELECT
NULL AS CustomerId,
NULL AS CustomerName,
NULL AS AccountOpeningDate,
NULL AS AccountNumber,
NULL AS AccountType,
lbd.AccountId,
NULL AS TransactionId,
NULL AS TransactionDate,
NULL AS TransactionType,
NULL AS TransactionAmount,
NULL AS TransactionCurrency,
lbd.LoanId,
lbd.LoanAmount,
lbd.InterestRate,
lbd.LoanStartDate,
lbd.LoanEndDate
FROM LoanBankingData lbd
WHERE lbd.rn_by_date = 1 -- Keeping the loan with the most recent start date per account and loan ID
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment