Created
July 12, 2024 09:08
-
-
Save t-book/85239e8edb962777d9c829ec6b4e3256 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import argparse | |
import xml.etree.ElementTree as ET | |
def get_published_layers(workspace_dir): | |
published_layers = set() | |
for root, dirs, files in os.walk(workspace_dir): | |
for file in files: | |
if file.endswith('.xml'): | |
xml_path = os.path.join(root, file) | |
try: | |
tree = ET.parse(xml_path) | |
root_elem = tree.getroot() | |
for layer in root_elem.findall(".//layer/name"): | |
published_layers.add(layer.text) | |
for coverage in root_elem.findall(".//coverage/name"): | |
published_layers.add(coverage.text) | |
except Exception as e: | |
print(f"Error parsing {xml_path}: {e}") | |
return published_layers | |
def scan_data_directory(data_dir, published_layers, delete=False): | |
obsolete_files = [] | |
total_size = 0 | |
raster_extensions = ('.tif', '.tiff', '.img', '.ecw', '.jp2', '.nc', '.hdf', '.geotiff', '.geotif') | |
vector_extensions = ('.shp', '.geojson') | |
for root, dirs, files in os.walk(data_dir): | |
for file in files: | |
full_path = os.path.join(root, file) | |
if file.lower().endswith(raster_extensions): | |
layer_name = os.path.splitext(file)[0] | |
if layer_name not in published_layers: | |
obsolete_files.append(full_path) | |
total_size += os.path.getsize(full_path) | |
if delete: | |
try: | |
os.remove(full_path) | |
print(f"Deleted: {full_path}") | |
except Exception as e: | |
print(f"Error deleting {full_path}: {e}") | |
elif file.lower().endswith(vector_extensions): | |
file_name, file_ext = os.path.splitext(file) | |
layer_name = f"{os.path.basename(root)}:{file_name}" | |
if layer_name not in published_layers: | |
for ext in ['.shp', '.shx', '.dbf', '.prj', '.cpg']: | |
file_to_remove = os.path.join(root, file_name + ext) | |
if os.path.exists(file_to_remove): | |
obsolete_files.append(file_to_remove) | |
total_size += os.path.getsize(file_to_remove) | |
if delete: | |
try: | |
os.remove(file_to_remove) | |
print(f"Deleted: {file_to_remove}") | |
except Exception as e: | |
print(f"Error deleting {file_to_remove}: {e}") | |
return obsolete_files, total_size | |
def main(): | |
parser = argparse.ArgumentParser(description="Scan GeoServer data directory for obsolete files") | |
parser.add_argument("data_dir", help="Path to the GeoServer data directory") | |
parser.add_argument("workspace_dir", help="Path to the GeoServer workspace directory containing XML configuration files") | |
parser.add_argument("--delete", action="store_true", help="Delete obsolete files") | |
args = parser.parse_args() | |
try: | |
published_layers = get_published_layers(args.workspace_dir) | |
except Exception as e: | |
print(f"Error: {e}") | |
return | |
obsolete_files, total_size = scan_data_directory(args.data_dir, published_layers, args.delete) | |
if args.delete: | |
print("Obsolete files deleted:") | |
else: | |
print("Obsolete files found (dry run):") | |
for file in obsolete_files: | |
print(file) | |
print(f"\nTotal obsolete files: {len(obsolete_files)}") | |
print(f"Total space used by obsolete files: {total_size / (1024 * 1024):.2f} MB") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Run with
python script.py /path/to/data/directory /path/to/workspace/directory --delete