Skip to content

Instantly share code, notes, and snippets.

@MarkPryceMaherMSFT
Created August 22, 2024 15:25
Show Gist options
  • Save MarkPryceMaherMSFT/026dff32103ce0d41cd205468be1bd94 to your computer and use it in GitHub Desktop.
Save MarkPryceMaherMSFT/026dff32103ce0d41cd205468be1bd94 to your computer and use it in GitHub Desktop.
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB.
from notebookutils import mssparkutils
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options.
# Remove the path and the lakehouse name.
delta_table_path = "abfss://[email protected]/"
HowManyLogsIsTooManyLogs = 50 ## 50 feels like a biggest number
HowLargeALogCanBeBeforeItsAIssue= 1 ## Value in MB
# List all partitions for given delta table
lakehouses = mssparkutils.fs.ls(delta_table_path)
for lakehouse in lakehouses:
if lakehouse.name.endswith('.Lakehouse'):
if lakehouse.isDir:
lakehouse_name = lakehouse.name
lakehouse_path = lakehouse.path
print("****************************************************************")
print(lakehouse_name)
print(lakehouse_path)
# List all partitions for given delta table
if mssparkutils.fs.exists(lakehouse_path + "/Tables"):
tables_list = mssparkutils.fs.ls(lakehouse_path + "/Tables")
# Iterate through each partition
for tables in tables_list:
if tables.isDir:
tables_name = tables.name
tables_path = tables.path
print(f" Table:{tables_name}")
if mssparkutils.fs.exists(tables_path + "/_delta_log/"):
files = mssparkutils.fs.ls(tables_path + "/_delta_log/")
largelog = False
for file in files:
if(file.size/1024/1024 > HowLargeALogCanBeBeforeItsAIssue): largelog = True
if(largelog): print(f" ***** Warning: Large logs.*****")
total_size = sum(file.size for file in files if not file.isDir)
total_size = total_size/ 1024/1024
file_count = sum(1 for file in files if not file.isDir)
if(file_count>HowManyLogsIsTooManyLogs): print(f" ***** Warning: many logs ***** ")
print(f" Log Size: {total_size:.2f} MB, File Count: {file_count}")
print(f"");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment