Skip to content

Instantly share code, notes, and snippets.

View MarkPryceMaherMSFT's full-sized avatar

Mark Pryce-Maher MarkPryceMaherMSFT

View GitHub Profile
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / generate_different_types_of_table.py
Created October 31, 2024 17:39
script to generate different delta tables
def create_new_table(tablename, columnmapping,delete_vectors,partitions,columnmappingbyid,liquidclustering,optimize,vacuum,writeStatsAsStruct,writeStatsAsJson):
# What the function does goes here
# use the default sample table, publicholidays
tablename = "PARQUET_2_0"
if columnmapping==True and columnmappingbyid==False:
tablename=tablename+"_columnmapping_by_name"
if delete_vectors==True:
tablename=tablename+"_delete_vectors"
if partitions==True:
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / Create a case-insensitive collation warehouse.ipynb
Created October 17, 2024 11:09
Fabric Notebook to create a case-insensitive collation warehouse
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / createwarehouse.py
Last active November 30, 2024 10:53
Create Fabric warehouse with case insensitive collation
# details from Article : https://learn.microsoft.com/en-us/fabric/data-warehouse/collation
# default collation is Latin1_General_100_BIN2_UTF8
# new collation is Latin1_General_100_CI_AS_KS_WS_SC_UTF8
#sempy version 0.4.0 or higher
!pip install semantic-link --q
import json
import sempy.fabric as fabric
from sempy.fabric.exceptions import FabricHTTPException, WorkspaceNotFoundException
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / test_the_sync.py
Created September 27, 2024 21:11
test the performance of the sync
import pandas as pd
import struct
import sqlalchemy
import pyodbc
import notebookutils
import sempy.fabric as fabric
import time
from pyspark.sql import functions as fn
from datetime import datetime
def create_engine(connection_string : str):
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / wait_for_table_to_be_updated.py
Created September 27, 2024 20:51
Test how long it tasks to refresh data in the SQL Endpoint
import pandas as pd
import struct
import sqlalchemy
import pyodbc
import notebookutils
import sempy.fabric as fabric
import time
from pyspark.sql import functions as fn
from datetime import datetime
def create_engine(connection_string : str):
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / Query SQL Endpoint.py
Last active September 28, 2024 09:23
Script that gets the sql endpoint for a workspace and lakehouse
import pandas as pd
import struct
import sqlalchemy
import pyodbc
import notebookutils
import sempy.fabric as fabric
def create_engine(connection_string : str):
token = notebookutils.credentials.getToken('https://analysis.windows.net/powerbi/api').encode("UTF-16-LE")
token_struct = struct.pack(f'<I{len(token)}s', len(token), token)
SQL_COPT_SS_ACCESS_TOKEN = 1256
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB.
from notebookutils import mssparkutils
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options.
# Remove the path and the lakehouse name.
delta_table_path = "abfss://[email protected]/"
HowManyLogsIsTooManyLogs = 50 ## 50 feels like a biggest number
HowLargeALogCanBeBeforeItsAIssue= 1 ## Value in MB
# List all partitions for given delta table
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB.
from notebookutils import mssparkutils
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options.
# Remove the path and the lakehouse name.
delta_table_path = "abfss://[email protected]/"
HowManyLogsIsTooManyLogs = 50 ## 50 feels like a biggest number
HowLargeALogCanBeBeforeItsAIssue= 1 ## Value in MB
# List all partitions for given delta table
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB.
from notebookutils import mssparkutils
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options.
delta_table_path = "abfss://ITEM_NAME@ONELAKE_PATH.fabric.microsoft.com/YOURLAKEHOUSE_NAME.Lakehouse/Tables"
# List all partitions for given delta table
tables_list = mssparkutils.fs.ls(delta_table_path)
# Initialize a dictionary to store partition details
@MarkPryceMaherMSFT
MarkPryceMaherMSFT / Query_SQL_Endpoint.py
Last active August 21, 2024 21:10
Code to query a sql endpoint from a notebook. Cut and paste into a cell, update the sql_endpoint and database variables.
import pandas as pd
import struct
import sqlalchemy
import pyodbc
import notebookutils
def create_engine(connection_string : str):
token = notebookutils.credentials.getToken('https://analysis.windows.net/powerbi/api').encode("UTF-16-LE")
token_struct = struct.pack(f'<I{len(token)}s', len(token), token)
SQL_COPT_SS_ACCESS_TOKEN = 1256
return sqlalchemy.create_engine("mssql+pyodbc://", creator=lambda: pyodbc.connect(connection_string, attrs_before={SQL_COPT_SS_ACCESS_TOKEN: token_struct}))