Created
September 2, 2025 19:09
-
-
Save ianhi/c1a793e86efd7b2266b8afe650c36ae6 to your computer and use it in GitHub Desktop.
icechunk-files.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Arraylake repository test with configurable FD limits and concurrent requests. | |
| Usage: ar.py [--fd-limit N] [--max-concurrent N] [--repo REPO_NAME] | |
| """ | |
| from arraylake import Client | |
| import time | |
| import resource | |
| import icechunk | |
| import argparse | |
| def log_event(message: str): | |
| """Log an event with timestamp for fdprof parsing.""" | |
| print(f"EVENT: {time.time():.9f} {message}") | |
| def set_fd_limit(limit: int): | |
| """Set the file descriptor limit.""" | |
| # log_event(f"Setting file descriptor limit to {limit}") | |
| try: | |
| current_soft, current_hard = resource.getrlimit(resource.RLIMIT_NOFILE) | |
| # log_event(f"Current FD limits: soft={current_soft}, hard={current_hard}") | |
| # Set new limit (can't exceed hard limit) | |
| new_limit = min(limit, current_hard) | |
| resource.setrlimit(resource.RLIMIT_NOFILE, (new_limit, current_hard)) | |
| # Verify the change | |
| new_soft, new_hard = resource.getrlimit(resource.RLIMIT_NOFILE) | |
| # log_event(f"New FD limits: soft={new_soft}, hard={new_hard}") | |
| except Exception as e: | |
| log_event(f"Error setting FD limit: {e}") | |
| def parse_args(): | |
| """Parse command line arguments.""" | |
| parser = argparse.ArgumentParser( | |
| description="Test Arraylake repository with configurable limits" | |
| ) | |
| parser.add_argument( | |
| "--fd-limit", | |
| type=int, | |
| default=256, | |
| help="File descriptor limit to set (default: 256)", | |
| ) | |
| parser.add_argument( | |
| "--max-concurrent", | |
| type=int, | |
| default=250, | |
| help="Maximum concurrent requests (default: 250)", | |
| ) | |
| parser.add_argument( | |
| "--repo", | |
| type=str, | |
| default="earthmover-public/aifs-outputs", | |
| help="Repository name to access (default: earthmover-public/aifs-outputs)", | |
| ) | |
| parser.add_argument( | |
| "--trace-logs", action="store_true", help="Enable icechunk trace logging" | |
| ) | |
| return parser.parse_args() | |
| def main(): | |
| """Main function.""" | |
| args = parse_args() | |
| # Enable trace logging if requested | |
| if args.trace_logs: | |
| log_event("Enabling icechunk trace logging") | |
| icechunk.set_logs_filter("icechunk=trace") | |
| # Set file descriptor limit | |
| set_fd_limit(args.fd_limit) | |
| # log_event("Creating Arraylake client") | |
| client = Client() | |
| # log_event(f"Getting repository: {args.repo}") | |
| config = icechunk.RepositoryConfig(max_concurrent_requests=args.max_concurrent) | |
| # log_event(f"Using max_concurrent_requests: {args.max_concurrent}") | |
| repo = client.get_repo(args.repo, config=config) | |
| _ = repo.readonly_session("main") | |
| log_event("session opened") | |
| # Keep the session alive for monitoring | |
| time.sleep(5) | |
| log_event("done sleep 1") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I recommend running with
fdprofhttps://github.com/ianhi/fdprof#fdproffdprof --interval 0.001 --plot python ar.py --repo earthmover-public/aifs-outputs