Skip to content

Instantly share code, notes, and snippets.

@MarkPryceMaherMSFT
Last active August 28, 2024 13:50
Show Gist options
  • Save MarkPryceMaherMSFT/2af9c203fd17157a4b68509bf055e0d9 to your computer and use it in GitHub Desktop.
Save MarkPryceMaherMSFT/2af9c203fd17157a4b68509bf055e0d9 to your computer and use it in GitHub Desktop.
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB.
from notebookutils import mssparkutils
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options.
# Remove the path and the lakehouse name.
delta_table_path = "abfss://[email protected]/"
HowManyLogsIsTooManyLogs = 50 ## 50 feels like a biggest number
HowLargeALogCanBeBeforeItsAIssue= 1 ## Value in MB
# List all partitions for given delta table
lakehouses = mssparkutils.fs.ls(delta_table_path)
for lakehouse in lakehouses:
if lakehouse.name.endswith('.Lakehouse'):
if lakehouse.isDir:
lakehouse_name = lakehouse.name
lakehouse_path = lakehouse.path
print("****************************************************************")
print(lakehouse_name)
print(lakehouse_path)
# List all partitions for given delta table
if mssparkutils.fs.exists(lakehouse_path + "/Tables"):
tables_list = mssparkutils.fs.ls(lakehouse_path + "/Tables")
# Iterate through each partition
for tables in tables_list:
if tables.isDir:
tables_name = tables.name
tables_path = tables.path
if mssparkutils.fs.exists(tables_path + "/_delta_log/"):
files = mssparkutils.fs.ls(tables_path + "/_delta_log/")
largelog = False
maxlogfilesize =0
for file in files:
if(file.size/1024/1024 > HowLargeALogCanBeBeforeItsAIssue): largelog = True
if(file.size> maxlogfilesize): maxlogfilesize=file.size
if(largelog): print(f" ***** Warning: Large logs.*****")
total_size = sum(file.size for file in files if not file.isDir)
total_size = total_size/ 1024/1024
maxlogfilesize = maxlogfilesize / 1024/1024
file_count = sum(1 for file in files if not file.isDir)
if(file_count>HowManyLogsIsTooManyLogs): print(f" ***** Warning: many logs ***** ")
print(f" Table:{tables_name}, Log Size: {total_size:.2f} MB, File Count: {file_count}, maxlogfilesize {maxlogfilesize:.2f} MB")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment