Skip to content

Instantly share code, notes, and snippets.

View MarkPryceMaherMSFT's full-sized avatar

Mark Pryce-Maher MarkPryceMaherMSFT

View GitHub Profile
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / wait_for_table_to_be_updated.py
Created September 27, 2024 20:51
Test how long it tasks to refresh data in the SQL Endpoint
import pandas as pd
import struct
import sqlalchemy
import pyodbc
import notebookutils
import sempy.fabric as fabric
import time
from pyspark.sql import functions as fn
from datetime import datetime
def create_engine(connection_string : str):
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / Query SQL Endpoint.py
Last active September 28, 2024 09:23
Script that gets the sql endpoint for a workspace and lakehouse
import pandas as pd
import struct
import sqlalchemy
import pyodbc
import notebookutils
import sempy.fabric as fabric
def create_engine(connection_string : str):
token = notebookutils.credentials.getToken('https://analysis.windows.net/powerbi/api').encode("UTF-16-LE")
token_struct = struct.pack(f'<I{len(token)}s', len(token), token)
SQL_COPT_SS_ACCESS_TOKEN = 1256
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB.
from notebookutils import mssparkutils
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options.
# Remove the path and the lakehouse name.
delta_table_path = "abfss://[email protected]/"
HowManyLogsIsTooManyLogs = 50 ## 50 feels like a biggest number
HowLargeALogCanBeBeforeItsAIssue= 1 ## Value in MB
# List all partitions for given delta table
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB.
from notebookutils import mssparkutils
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options.
# Remove the path and the lakehouse name.
delta_table_path = "abfss://[email protected]/"
HowManyLogsIsTooManyLogs = 50 ## 50 feels like a biggest number
HowLargeALogCanBeBeforeItsAIssue= 1 ## Value in MB
# List all partitions for given delta table
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB.
from notebookutils import mssparkutils
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options.
delta_table_path = "abfss://ITEM_NAME@ONELAKE_PATH.fabric.microsoft.com/YOURLAKEHOUSE_NAME.Lakehouse/Tables"
# List all partitions for given delta table
tables_list = mssparkutils.fs.ls(delta_table_path)
# Initialize a dictionary to store partition details
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / Query_SQL_Endpoint.py
Last active August 21, 2024 21:10
Code to query a sql endpoint from a notebook. Cut and paste into a cell, update the sql_endpoint and database variables.
import pandas as pd
import struct
import sqlalchemy
import pyodbc
import notebookutils
def create_engine(connection_string : str):
token = notebookutils.credentials.getToken('https://analysis.windows.net/powerbi/api').encode("UTF-16-LE")
token_struct = struct.pack(f'<I{len(token)}s', len(token), token)
SQL_COPT_SS_ACCESS_TOKEN = 1256
return sqlalchemy.create_engine("mssql+pyodbc://", creator=lambda: pyodbc.connect(connection_string, attrs_before={SQL_COPT_SS_ACCESS_TOKEN: token_struct}))
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / Delta Lake.sql
Last active September 6, 2022 18:10
Example of reading Delta Lake with Azure Synapse Dedicated SQL Pool. This is a POC, just to see if this is possible. *NON-PRODUCTION CODE*
/*
IF OBJECT_ID('mpmtest') IS NOT NULL
BEGIN;
DROP TABLE mpmtest
END
declare @path varchar(400), @dt datetime2, @credential varchar(500),@outputable varchar(500),@display int, @debug int;
set @path = 'https://cstestwss.blob.core.windows.net/csetestws/delta/demo/'
--set @dt = convert(datetime2,'2022/07/06 18:37:00'); --getdate(); -- for time travel --
set @dt = getdate(); -- for time travel --
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / Import_data.sql
Created May 6, 2022 15:58
This script uses two procedures; create_external_table.sql and export_all_tables. It uses the control table from export_all_tables to drive the imports and hold all the metadata,
create proc import_data as
begin
declare @dummy_datetime datetime2 ;
declare @sSQL varchar(8000);
if exists (
select 1 from sys.schemas s inner join sys.tables t
on s.schema_id = t.schema_id
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / create_external_table.sql
Last active May 6, 2022 15:46
This script creates an external table using the schema of a normal table. Give this proc the name/schema of the table, the location/file format for the data and it will create an external table using these details.
create PROC [dbo].[create_external_table_from_table] @tbname [varchar](200),@schema [varchar](200),@storageacc [varchar](200),
@datafolder [varchar](400),@fileformat [varchar](400),@external_Table_Name [varchar](200),@external_Table_Schema_Name [varchar](200) AS
BEGIN
SET NOCOUNT ON
/*
This proc creates an external table based on the shape of the table we want to import into.
Usage:
exec [dbo].[create_external_table_from_table]] 'tablename','schema','MyAzureStorage','/path/path/file.csv','FormatCSV','external_tbl_name','ext_schema'
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / sp_kill_transactions.sql
Created May 3, 2022 20:37
Proc to kill long running transactions
CREATE PROC [dbo].[sp_kill_transactions] AS
IF OBJECT_ID('tempdb..#stats_ddl') IS NOT NULL
BEGIN;
DROP TABLE #stats_ddl;
END;
CREATE TABLE #stats_ddl
WITH