Created
April 11, 2025 16:39
-
-
Save rifkiamil/a97be0ea53de735f8a4ff984ce699485 to your computer and use it in GitHub Desktop.
Template for Azure Synapse Dedicated Pool Table Creation from Query with CLUSTERED COLUMNSTORE INDEX ORDER
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Template for Azure Synapse Dedicated Pool Table Creation from Query with CLUSTERED COLUMNSTORE INDEX ORDER | |
-- Schema: testing_rif_k | |
-- Goal: Make easy to find the commands that only work on Azure Synapse Dedicated Pool ( SQL DW ) | |
CREATE TABLE testing_rif_k.banking_data | |
WITH | |
( | |
DISTRIBUTION = HASH(AccountId), -- Recommended for even data distribution based on a key | |
CLUSTERED COLUMNSTORE INDEX ORDER (TransactionDate DESC) -- Good for analytical queries on date ranges | |
-- DISTRIBUTION = ROUND_ROBIN, -- Useful for small tables or when no clear distribution key exists | |
-- DISTRIBUTION = REPLICATE -- Suitable for small, frequently joined tables | |
) | |
AS | |
WITH CustomerBankingData AS ( | |
SELECT | |
CAST(c.CustomerId AS NVARCHAR(36)) AS CustomerId, | |
c.CustomerName, | |
c.AccountOpeningDate, | |
-- Potentially sensitive, consider masking or encryption | |
c.AccountNumber, | |
c.AccountType, | |
CAST(b.AccountId AS NVARCHAR(36)) AS AccountId, | |
ROW_NUMBER() OVER (PARTITION BY c.CustomerId ORDER BY (SELECT NULL)) AS rn_all_same, -- Arbitrary single row per customer | |
ROW_NUMBER() OVER (PARTITION BY c.CustomerId ORDER BY c.AccountOpeningDate) AS rn_by_date | |
FROM | |
testing_rif_k.Customers c -- Assuming a Customers table exists | |
JOIN testing_rif_k.Accounts b ON c.CustomerId = b.CustomerId | |
-- WHERE /* Add any filtering conditions here */ | |
), | |
TransactionBankingData AS ( | |
SELECT | |
CAST(t.TransactionId AS NVARCHAR(36)) AS TransactionId, | |
CAST(t.AccountId AS NVARCHAR(36)) AS AccountId, | |
t.TransactionDate, | |
t.TransactionType, | |
t.TransactionAmount, | |
t.TransactionCurrency, | |
ROW_NUMBER() OVER (PARTITION BY t.AccountId ORDER BY (SELECT NULL)) AS rn_all_same, -- Arbitrary single transaction per account | |
ROW_NUMBER() OVER (PARTITION BY t.AccountId, t.TransactionDate, t.TransactionAmount ORDER BY t.TransactionId) AS rn_by_details | |
FROM | |
testing_rif_k.Transactions t -- Assuming a Transactions table exists | |
-- WHERE /* Add any filtering conditions here */ | |
), | |
LoanBankingData AS ( | |
SELECT | |
CAST(l.LoanId AS NVARCHAR(36)) AS LoanId, | |
CAST(l.AccountId AS NVARCHAR(36)) AS AccountId, | |
l.LoanAmount, | |
l.InterestRate, | |
l.LoanStartDate, | |
l.LoanEndDate, | |
ROW_NUMBER() OVER (PARTITION BY l.AccountId ORDER BY (SELECT NULL)) AS rn_all_same, -- Arbitrary single loan per account | |
ROW_NUMBER() OVER (PARTITION BY l.AccountId, l.LoanId ORDER BY l.LoanStartDate DESC) AS rn_by_date | |
FROM | |
testing_rif_k.Loans l -- Assuming a Loans table exists | |
-- WHERE /* Add any filtering conditions here */ | |
) | |
SELECT | |
cbd.CustomerId, | |
cbd.CustomerName, | |
cbd.AccountOpeningDate, | |
cbd.AccountNumber, | |
cbd.AccountType, | |
cbd.AccountId, | |
NULL AS TransactionId, | |
NULL AS TransactionDate, | |
NULL AS TransactionType, | |
NULL AS TransactionAmount, | |
NULL AS TransactionCurrency, | |
NULL AS LoanId, | |
NULL AS LoanAmount, | |
NULL AS InterestRate, | |
NULL AS LoanStartDate, | |
NULL AS LoanEndDate | |
FROM CustomerBankingData cbd | |
WHERE cbd.rn_by_date = 1 -- Keeping the first account opening date per customer (example deduplication) | |
UNION ALL | |
SELECT | |
NULL AS CustomerId, | |
NULL AS CustomerName, | |
NULL AS AccountOpeningDate, | |
NULL AS AccountNumber, | |
NULL AS AccountType, | |
tbd.AccountId, | |
tbd.TransactionId, | |
tbd.TransactionDate, | |
tbd.TransactionType, | |
tbd.TransactionAmount, | |
tbd.TransactionCurrency, | |
NULL AS LoanId, | |
NULL AS LoanAmount, | |
NULL AS InterestRate, | |
NULL AS LoanStartDate, | |
NULL AS LoanEndDate | |
FROM TransactionBankingData tbd | |
WHERE tbd.rn_by_details = 1 -- Keeping the first transaction based on ID for unique account/date/amount | |
UNION ALL | |
SELECT | |
NULL AS CustomerId, | |
NULL AS CustomerName, | |
NULL AS AccountOpeningDate, | |
NULL AS AccountNumber, | |
NULL AS AccountType, | |
lbd.AccountId, | |
NULL AS TransactionId, | |
NULL AS TransactionDate, | |
NULL AS TransactionType, | |
NULL AS TransactionAmount, | |
NULL AS TransactionCurrency, | |
lbd.LoanId, | |
lbd.LoanAmount, | |
lbd.InterestRate, | |
lbd.LoanStartDate, | |
lbd.LoanEndDate | |
FROM LoanBankingData lbd | |
WHERE lbd.rn_by_date = 1 -- Keeping the loan with the most recent start date per account and loan ID |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment