Skip to content

Instantly share code, notes, and snippets.

@mvdbeek
Last active August 6, 2025 14:13
Show Gist options
  • Save mvdbeek/14850657b66c640cb4838ab443e610c8 to your computer and use it in GitHub Desktop.
Save mvdbeek/14850657b66c640cb4838ab443e610c8 to your computer and use it in GitHub Desktop.
benchmark collection state PR
#!/usr/bin/env python3
"""
Benchmarking script for Galaxy API endpoints performance testing.
This script creates a test history with a configurable list:list collection,
then benchmarks two API endpoints:
1. /api/histories/{history_id}/contents
2. /api/dataset_collections/{hdca_id}/contents/{collection_id}
"""
import json
import statistics
import time
from typing import Dict, List, Any, Optional
import requests
import argparse
import sys
class GalaxyBenchmarker:
def __init__(self, base_url: str = "http://localhost:8081", api_key: Optional[str] = None):
self.base_url = base_url.rstrip("/")
self.api_key = api_key
self.session = requests.Session()
if api_key:
self.session.headers.update({"x-api-key": api_key})
def make_request(self, method: str, endpoint: str, **kwargs) -> requests.Response:
"""Make HTTP request to Galaxy API"""
url = f"{self.base_url}/api/{endpoint}"
response = self.session.request(method, url, **kwargs)
if not response.status_code == 200:
raise Exception(response.text)
return response
def create_history(self) -> str:
"""Create a new history for testing"""
print("Creating test history...")
response = self.make_request("POST", "histories", json={"name": "Benchmark Test History"})
history_id = response.json()["id"]
print(f"Created history: {history_id}")
return history_id
def setup_test_data(self, outer_size: int = 10, inner_size: int = 10) -> tuple[str, str, str]:
"""Create test history with list:list collection using fetch API"""
print("Setting up test data...")
# Create history
history_id = self.create_history()
# Create list:list collection using fetch API
print(f"Creating list:list collection ({outer_size} lists × {inner_size} datasets)...")
# Build elements for list:list collection using the fetch API format
elements = []
for outer_idx in range(outer_size): # outer lists
sub_elements = []
for inner_idx in range(inner_size): # datasets per list
content = f"test data {outer_idx}_{inner_idx}\n"
sub_elements.append({
"src": "pasted",
"paste_content": content,
"name": f"dataset_{inner_idx}",
"ext": "txt"
})
elements.append({
"name": f"list_{outer_idx}",
"elements": sub_elements
})
# Create the collection using the fetch API with proper HdcaDataItemsTarget structure
targets = [{
"destination": {"type": "hdca"},
"collection_type": "list:list",
"name": "benchmark_collection",
"elements": elements
}]
payload = {
"history_id": history_id,
"targets": targets
}
response = self.make_request("POST", "tools/fetch", json=payload)
fetch_result = response.json()
print(f"Fetch response status: {response.status_code}")
print(f"Fetch response keys: {list(fetch_result.keys())}")
# Get the collection info from the fetch response
if not fetch_result.get("output_collections"):
print(f"Full fetch response: {json.dumps(fetch_result, indent=2)}")
raise Exception("No output collections returned from fetch")
hdca_data = fetch_result["output_collections"][0]
hdca_id = hdca_data["id"]
collection_id = hdca_data["collection_id"]
print(f"Collection created with HDCA ID: {hdca_id}")
print(f"Collection details: {json.dumps(hdca_data, indent=2)}")
# Wait for the fetch jobs to complete
print("Waiting for fetch jobs to complete...")
jobs = fetch_result.get("jobs", [])
print(f"Found {len(jobs)} jobs to wait for")
if jobs:
for job in jobs:
job_id = job["id"]
print(f" Job {job_id}: {job.get('state', 'unknown')} - {job.get('tool_id', 'unknown tool')}")
self._wait_for_job(job_id)
else:
print(" No jobs found - this might be why collections aren't populating")
# Give collection some time to populate after jobs complete
print("Waiting for collection to populate...")
time.sleep(10)
# Check collection status but don't fail if not fully populated
response = self.make_request("GET", f"dataset_collections/{hdca_id}")
data = response.json()
print(f" Collection state: {data.get('populated_state')}, populated: {data.get('populated')}, elements: {data.get('element_count', 0)}")
print(f"Test data setup complete:")
print(f" History ID: {history_id}")
print(f" HDCA ID: {hdca_id}")
print(f" Collection ID: {collection_id}")
return history_id, hdca_id, collection_id
def _create_dataset(self, history_id: str, content: str, name: str) -> dict:
"""Create a dataset using the upload tool"""
payload = {
"history_id": history_id,
"tool_id": "upload1",
"inputs": {
"files_0|file_data": content,
"files_0|NAME": name,
"files_0|file_type": "txt"
}
}
response = self.make_request("POST", "tools", json=payload)
tool_result = response.json()
return tool_result["outputs"][0]
def _wait_for_dataset(self, history_id: str, dataset_id: str, timeout: int = 60):
"""Wait for dataset to be ready"""
start_time = time.time()
while time.time() - start_time < timeout:
response = self.make_request("GET", f"histories/{history_id}/contents/{dataset_id}")
dataset_data = response.json()
state = dataset_data.get("state")
if state == "ok":
return
elif state in ["error", "discarded"]:
raise Exception(f"Dataset {dataset_id} failed with state: {state}")
time.sleep(1)
raise TimeoutError(f"Dataset {dataset_id} did not complete within {timeout} seconds")
def _create_base64_url(self, content: str) -> str:
"""Create a Galaxy-compatible base64 URL for the given content"""
import base64
encoded = base64.b64encode(content.encode('utf-8')).decode('utf-8')
return f"base64://{encoded}"
def _wait_for_job(self, job_id: str, timeout: int = 120):
"""Wait for a job to complete"""
start_time = time.time()
while time.time() - start_time < timeout:
response = self.make_request("GET", f"jobs/{job_id}")
job_data = response.json()
state = job_data.get("state")
print(f" Job {job_id} state: {state}")
if state == "ok":
return
elif state in ["error", "failed"]:
raise Exception(f"Job {job_id} failed with state: {state}")
time.sleep(2)
raise TimeoutError(f"Job {job_id} did not complete within {timeout} seconds")
def _wait_for_collection(self, hdca_id: str, timeout: int = 60):
"""Wait for collection to be ready (shorter timeout for deferred datasets)"""
start_time = time.time()
while time.time() - start_time < timeout:
response = self.make_request("GET", f"dataset_collections/{hdca_id}")
data = response.json()
state = data.get("populated_state", "new")
populated = data.get("populated", False)
element_count = data.get("element_count", 0)
print(f" Collection state: {state}, populated: {populated}, elements: {element_count}")
# For deferred datasets, we can proceed once the structure is populated
# even if individual datasets are still in deferred state
if populated and element_count > 0:
return
elif state in ["error", "failed"]:
raise Exception(f"Collection {hdca_id} failed with state: {state}")
time.sleep(3)
raise TimeoutError(f"Collection {hdca_id} did not complete within {timeout} seconds")
def benchmark_endpoint(self, endpoint: str, runs: int = 10) -> Dict[str, float]:
"""Benchmark a specific endpoint"""
print(f"Benchmarking {endpoint} with {runs} runs...")
response_times = []
response_sizes = []
for i in range(runs):
start_time = time.time()
try:
response = self.make_request("GET", endpoint)
end_time = time.time()
response_time = end_time - start_time
response_size = len(response.content)
response_times.append(response_time)
response_sizes.append(response_size)
print(f" Run {i+1}/{runs}: {response_time:.3f}s ({response_size} bytes)")
except Exception as e:
print(f" Run {i+1}/{runs}: ERROR - {str(e)}")
continue
if not response_times:
return {"error": "All requests failed"}
return {
"runs": len(response_times),
"min_time": min(response_times),
"max_time": max(response_times),
"mean_time": statistics.mean(response_times),
"median_time": statistics.median(response_times),
"std_dev_time": statistics.stdev(response_times) if len(response_times) > 1 else 0.0,
"mean_response_size": statistics.mean(response_sizes),
"total_data_transfer": sum(response_sizes)
}
def run_benchmark(self, runs: int = 10, outer_size: int = 10, inner_size: int = 10) -> Dict[str, Any]:
"""Run the complete benchmark suite"""
print("Starting Galaxy API Performance Benchmark")
print("=" * 50)
# Setup test data
history_id, hdca_id, collection_id = self.setup_test_data(outer_size, inner_size)
# Define endpoints to benchmark
endpoints = {
"history_contents": f"histories/{history_id}/contents?v=dev&order=hid&offset=0&limit=100&q=deleted&qv=false&q=visible&qv=true",
"collection_contents": f"dataset_collections/{hdca_id}/contents/{collection_id}?instance_type=history&offset=0&limit=50"
}
results = {}
print("\n" + "=" * 50)
print("BENCHMARKING ENDPOINTS")
print("=" * 50)
for name, endpoint in endpoints.items():
results[name] = self.benchmark_endpoint(endpoint, runs)
print()
# Cleanup
print("Cleaning up test data...")
try:
self.make_request("DELETE", f"histories/{history_id}")
print("Test history deleted successfully")
except Exception as e:
print(f"Warning: Failed to cleanup history: {e}")
return {
"test_config": {
"base_url": self.base_url,
"runs_per_endpoint": runs,
"test_data": {
"history_id": history_id,
"hdca_id": hdca_id,
"collection_id": collection_id,
"structure": f"list:list with {outer_size} lists of {inner_size} datasets each ({outer_size * inner_size} total datasets)"
}
},
"endpoints": endpoints,
"results": results
}
def print_results(self, results: Dict[str, Any]):
"""Print benchmark results in a formatted way"""
print("\n" + "=" * 60)
print("BENCHMARK RESULTS")
print("=" * 60)
print(f"Base URL: {results['test_config']['base_url']}")
print(f"Runs per endpoint: {results['test_config']['runs_per_endpoint']}")
print(f"Test structure: {results['test_config']['test_data']['structure']}")
print()
for name, metrics in results["results"].items():
endpoint_name = name.upper().replace('_', ' ')
print(f"{endpoint_name}:")
print(f" Endpoint: {results['endpoints'][name]}")
if "error" in metrics:
print(f" ERROR: {metrics['error']}")
else:
print(f" Runs completed: {metrics['runs']}")
print(f" Response times:")
print(f" Min: {metrics['min_time']:.3f}s")
print(f" Max: {metrics['max_time']:.3f}s")
print(f" Mean: {metrics['mean_time']:.3f}s")
print(f" Median: {metrics['median_time']:.3f}s")
print(f" Std Dev: {metrics['std_dev_time']:.3f}s")
print(f" Data transfer:")
print(f" Avg response size: {metrics['mean_response_size']:.0f} bytes")
print(f" Total transferred: {metrics['total_data_transfer']:.0f} bytes")
print()
def main():
parser = argparse.ArgumentParser(description="Benchmark Galaxy API endpoints")
parser.add_argument("--url", default="http://localhost:8081",
help="Galaxy base URL (default: http://localhost:8081)")
parser.add_argument("--api-key",
help="Galaxy API key (required for authenticated requests)")
parser.add_argument("--runs", type=int, default=10,
help="Number of benchmark runs per endpoint (default: 10)")
parser.add_argument("--output",
help="Output file for JSON results (optional)")
parser.add_argument("--outer-size", type=int, default=10,
help="Number of outer lists in the collection (default: 10)")
parser.add_argument("--inner-size", type=int, default=10,
help="Number of datasets per inner list (default: 10)")
args = parser.parse_args()
if not args.api_key:
print("Warning: No API key provided. Some operations may fail.")
print("Use --api-key to provide authentication.")
try:
benchmarker = GalaxyBenchmarker(args.url, args.api_key)
results = benchmarker.run_benchmark(args.runs, args.outer_size, args.inner_size)
benchmarker.print_results(results)
if args.output:
with open(args.output, 'w') as f:
json.dump(results, f, indent=2)
print(f"Results saved to: {args.output}")
except KeyboardInterrupt:
print("\nBenchmark interrupted by user")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()
{
"test_config": {
"base_url": "http://localhost:8081",
"runs_per_endpoint": 10,
"test_data": {
"history_id": "0dc190e40260f87c",
"hdca_id": "36863d73ffc1a3cf",
"collection_id": "e3d86a6d65f2b94a",
"structure": "list:list with 10 lists of 10 datasets each (100 total datasets)"
}
},
"endpoints": {
"history_contents": "histories/0dc190e40260f87c/contents?v=dev&order=hid&offset=0&limit=100&q=deleted&qv=false&q=visible&qv=true",
"collection_contents": "dataset_collections/36863d73ffc1a3cf/contents/e3d86a6d65f2b94a?instance_type=history&offset=0&limit=50"
},
"results": {
"history_contents": {
"runs": 10,
"min_time": 0.034497976303100586,
"max_time": 0.057617902755737305,
"mean_time": 0.03988263607025146,
"median_time": 0.036520957946777344,
"std_dev_time": 0.007385816227888958,
"mean_response_size": 872,
"total_data_transfer": 8720
},
"collection_contents": {
"runs": 10,
"min_time": 0.01600813865661621,
"max_time": 0.02509784698486328,
"mean_time": 0.01764817237854004,
"median_time": 0.016832590103149414,
"std_dev_time": 0.0026668919261499556,
"mean_response_size": 2,
"total_data_transfer": 20
}
}
}
{
"test_config": {
"base_url": "http://localhost:8081",
"runs_per_endpoint": 10,
"test_data": {
"history_id": "da1446eb2983b20a",
"hdca_id": "0f9a2f763e041b62",
"collection_id": "2de69fc1cf5fbc77",
"structure": "list:list with 10 lists of 100 datasets each (1000 total datasets)"
}
},
"endpoints": {
"history_contents": "histories/da1446eb2983b20a/contents?v=dev&order=hid&offset=0&limit=100&q=deleted&qv=false&q=visible&qv=true",
"collection_contents": "dataset_collections/0f9a2f763e041b62/contents/2de69fc1cf5fbc77?instance_type=history&offset=0&limit=50"
},
"results": {
"history_contents": {
"runs": 10,
"min_time": 0.035893917083740234,
"max_time": 0.05881786346435547,
"mean_time": 0.040116524696350096,
"median_time": 0.03788149356842041,
"std_dev_time": 0.006817544491502362,
"mean_response_size": 872,
"total_data_transfer": 8720
},
"collection_contents": {
"runs": 10,
"min_time": 0.020723342895507812,
"max_time": 0.03148984909057617,
"mean_time": 0.023198962211608887,
"median_time": 0.02231156826019287,
"std_dev_time": 0.0031422447467946194,
"mean_response_size": 3961,
"total_data_transfer": 39610
}
}
}
{
"test_config": {
"base_url": "http://localhost:8081",
"runs_per_endpoint": 10,
"test_data": {
"history_id": "7d8860253027c42f",
"hdca_id": "c191c4e25064129c",
"collection_id": "72a7c1e135a392db",
"structure": "list:list with 10 lists of 10 datasets each (100 total datasets)"
}
},
"endpoints": {
"history_contents": "histories/7d8860253027c42f/contents?v=dev&order=hid&offset=0&limit=100&q=deleted&qv=false&q=visible&qv=true",
"collection_contents": "dataset_collections/c191c4e25064129c/contents/72a7c1e135a392db?instance_type=history&offset=0&limit=50"
},
"results": {
"history_contents": {
"runs": 10,
"min_time": 0.03725004196166992,
"max_time": 0.0573880672454834,
"mean_time": 0.04005122184753418,
"median_time": 0.038257598876953125,
"std_dev_time": 0.006124406096351057,
"mean_response_size": 914,
"total_data_transfer": 9140
},
"collection_contents": {
"runs": 10,
"min_time": 0.016052961349487305,
"max_time": 0.02619194984436035,
"mean_time": 0.017267346382141113,
"median_time": 0.016287922859191895,
"std_dev_time": 0.0031400079781711662,
"mean_response_size": 2,
"total_data_transfer": 20
}
}
}
{
"test_config": {
"base_url": "http://localhost:8081",
"runs_per_endpoint": 10,
"test_data": {
"history_id": "f36e69a2df3825ac",
"hdca_id": "6ad947b9d8c483f8",
"collection_id": "eb28ed0b9dcfe034",
"structure": "list:list with 10 lists of 100 datasets each (1000 total datasets)"
}
},
"endpoints": {
"history_contents": "histories/f36e69a2df3825ac/contents?v=dev&order=hid&offset=0&limit=100&q=deleted&qv=false&q=visible&qv=true",
"collection_contents": "dataset_collections/6ad947b9d8c483f8/contents/eb28ed0b9dcfe034?instance_type=history&offset=0&limit=50"
},
"results": {
"history_contents": {
"runs": 10,
"min_time": 0.038572072982788086,
"max_time": 0.04874277114868164,
"mean_time": 0.042290759086608884,
"median_time": 0.04095625877380371,
"std_dev_time": 0.0034245786332742912,
"mean_response_size": 914,
"total_data_transfer": 9140
},
"collection_contents": {
"runs": 10,
"min_time": 0.016376972198486328,
"max_time": 0.01825571060180664,
"mean_time": 0.017121243476867675,
"median_time": 0.01716005802154541,
"std_dev_time": 0.0005127236250332352,
"mean_response_size": 2,
"total_data_transfer": 20
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment