Created
August 22, 2024 15:25
-
-
Save MarkPryceMaherMSFT/026dff32103ce0d41cd205468be1bd94 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Purpose: Print out details of partitions, files per partitions, and size per partition in GB. | |
from notebookutils import mssparkutils | |
# Define ABFSS path for your delta table. You can get ABFSS path of a delta table by simply right-clicking on table name and selecting COPY PATH from the list of options. | |
# Remove the path and the lakehouse name. | |
delta_table_path = "abfss://[email protected]/" | |
HowManyLogsIsTooManyLogs = 50 ## 50 feels like a biggest number | |
HowLargeALogCanBeBeforeItsAIssue= 1 ## Value in MB | |
# List all partitions for given delta table | |
lakehouses = mssparkutils.fs.ls(delta_table_path) | |
for lakehouse in lakehouses: | |
if lakehouse.name.endswith('.Lakehouse'): | |
if lakehouse.isDir: | |
lakehouse_name = lakehouse.name | |
lakehouse_path = lakehouse.path | |
print("****************************************************************") | |
print(lakehouse_name) | |
print(lakehouse_path) | |
# List all partitions for given delta table | |
if mssparkutils.fs.exists(lakehouse_path + "/Tables"): | |
tables_list = mssparkutils.fs.ls(lakehouse_path + "/Tables") | |
# Iterate through each partition | |
for tables in tables_list: | |
if tables.isDir: | |
tables_name = tables.name | |
tables_path = tables.path | |
print(f" Table:{tables_name}") | |
if mssparkutils.fs.exists(tables_path + "/_delta_log/"): | |
files = mssparkutils.fs.ls(tables_path + "/_delta_log/") | |
largelog = False | |
for file in files: | |
if(file.size/1024/1024 > HowLargeALogCanBeBeforeItsAIssue): largelog = True | |
if(largelog): print(f" ***** Warning: Large logs.*****") | |
total_size = sum(file.size for file in files if not file.isDir) | |
total_size = total_size/ 1024/1024 | |
file_count = sum(1 for file in files if not file.isDir) | |
if(file_count>HowManyLogsIsTooManyLogs): print(f" ***** Warning: many logs ***** ") | |
print(f" Log Size: {total_size:.2f} MB, File Count: {file_count}") | |
print(f""); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment