Skip to content

Instantly share code, notes, and snippets.

@ianhi
Created September 2, 2025 19:09
Show Gist options
  • Save ianhi/c1a793e86efd7b2266b8afe650c36ae6 to your computer and use it in GitHub Desktop.
Save ianhi/c1a793e86efd7b2266b8afe650c36ae6 to your computer and use it in GitHub Desktop.
icechunk-files.py
#!/usr/bin/env python3
"""
Arraylake repository test with configurable FD limits and concurrent requests.
Usage: ar.py [--fd-limit N] [--max-concurrent N] [--repo REPO_NAME]
"""
from arraylake import Client
import time
import resource
import icechunk
import argparse
def log_event(message: str):
"""Log an event with timestamp for fdprof parsing."""
print(f"EVENT: {time.time():.9f} {message}")
def set_fd_limit(limit: int):
"""Set the file descriptor limit."""
# log_event(f"Setting file descriptor limit to {limit}")
try:
current_soft, current_hard = resource.getrlimit(resource.RLIMIT_NOFILE)
# log_event(f"Current FD limits: soft={current_soft}, hard={current_hard}")
# Set new limit (can't exceed hard limit)
new_limit = min(limit, current_hard)
resource.setrlimit(resource.RLIMIT_NOFILE, (new_limit, current_hard))
# Verify the change
new_soft, new_hard = resource.getrlimit(resource.RLIMIT_NOFILE)
# log_event(f"New FD limits: soft={new_soft}, hard={new_hard}")
except Exception as e:
log_event(f"Error setting FD limit: {e}")
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Test Arraylake repository with configurable limits"
)
parser.add_argument(
"--fd-limit",
type=int,
default=256,
help="File descriptor limit to set (default: 256)",
)
parser.add_argument(
"--max-concurrent",
type=int,
default=250,
help="Maximum concurrent requests (default: 250)",
)
parser.add_argument(
"--repo",
type=str,
default="earthmover-public/aifs-outputs",
help="Repository name to access (default: earthmover-public/aifs-outputs)",
)
parser.add_argument(
"--trace-logs", action="store_true", help="Enable icechunk trace logging"
)
return parser.parse_args()
def main():
"""Main function."""
args = parse_args()
# Enable trace logging if requested
if args.trace_logs:
log_event("Enabling icechunk trace logging")
icechunk.set_logs_filter("icechunk=trace")
# Set file descriptor limit
set_fd_limit(args.fd_limit)
# log_event("Creating Arraylake client")
client = Client()
# log_event(f"Getting repository: {args.repo}")
config = icechunk.RepositoryConfig(max_concurrent_requests=args.max_concurrent)
# log_event(f"Using max_concurrent_requests: {args.max_concurrent}")
repo = client.get_repo(args.repo, config=config)
_ = repo.readonly_session("main")
log_event("session opened")
# Keep the session alive for monitoring
time.sleep(5)
log_event("done sleep 1")
if __name__ == "__main__":
main()
@ianhi
Copy link
Author

ianhi commented Sep 2, 2025

I recommend running with fdprof https://github.com/ianhi/fdprof#fdprof

fdprof --interval 0.001 --plot python ar.py --repo earthmover-public/aifs-outputs

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment