fffej · March 2, 2025 13:20
diff --git a/main.py b/main.py
 import random
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import seaborn as sns
 from mpl_toolkits.mplot3d import Axes3D
 from matplotlib import cm

 def simulate_batch(N, T, p, batch_size):
    """
    Simulate processing N tasks in batches with testing.
    
    Parameters:
      N: Total number of tasks
      T: Time for testing a batch
      p: Probability a single task passes its test
      batch_size: Maximum tasks in a batch
      
    Returns:
      Total time taken
    """
    total_time = 0
    tasks_remaining = N
    
    while tasks_remaining > 0:
        current_batch = min(batch_size, tasks_remaining)
        
        # Process batch until it passes testing
        while True:
            # Time to process tasks + test
            total_time += current_batch + T
            
            # Check if batch passes (all tasks must pass)
            if random.random() < (p ** current_batch):
                break
                
        tasks_remaining -= current_batch
        
    return total_time

 def expected_time_per_task(T, p, batch_size, max_value=1000):
    """
    Calculate expected time per task for a given batch size.
    
    Parameters:
      T: Testing time
      p: Success probability
      batch_size: Batch size
      
    Returns:
      Expected time per task
    """
    # Handle edge cases
    if p >= 0.9999:
        return 1 + T/batch_size
    
    try:
        # Expected attempts until success: 1/(p^batch_size)
        expected_attempts = min(np.exp(-batch_size * np.log(p)), max_value)
        
        # Expected time per task
        result = ((batch_size + T) * expected_attempts) / batch_size
        return min(result, max_value)
    except:
        return max_value

 def find_optimal_batch_size(T, p, max_batch_size=20):
    """Find batch size that minimizes expected time per task."""
    min_time = float('inf')
    optimal_batch = 1
    
    for batch_size in range(1, max_batch_size + 1):
        time = expected_time_per_task(T, p, batch_size)
        if time < min_time:
            min_time = time
            optimal_batch = batch_size
    
    return optimal_batch, min_time

 def theoretical_optimal_batch_size(T, p):
    """Calculate theoretical optimal batch size."""
    if p == 1:
        return float('inf')  # With perfect reliability, larger batches are always better
    
    optimal = np.sqrt(T / (-np.log(p)))
    return max(1, int(round(optimal)))

 def create_batch_size_analysis_plot(T_samples, p_samples, max_batch_size=20):
    """Create plot showing effect of batch size on expected time."""
    batch_sizes = np.arange(1, max_batch_size + 1)
    
    plt.figure(figsize=(10, 6))
    line_styles = ['-', '--', '-.', ':']
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
    
    for i, T in enumerate(T_samples):
        for j, p in enumerate(p_samples):
            times = [expected_time_per_task(T, p, b) for b in batch_sizes]
            plt.plot(batch_sizes, times, 
                     marker='o', markersize=4,
                     linestyle=line_styles[i % len(line_styles)], 
                     color=colors[j % len(colors)],
                     label=f'T={T}, p={p}')
    
    plt.xlabel('Batch Size')
    plt.ylabel('Expected Time per Task')
    plt.title('Effect of Batch Size on Expected Time')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.savefig('batch_size_effect.png', dpi=300)
    
 def create_optimal_batch_size_heatmap(T_range, p_range):
    """Create heatmap showing optimal batch size for different T and p values."""
    T_values = np.linspace(T_range[0], T_range[1], 40)
    p_values = np.linspace(p_range[0], p_range[1], 40)
    T_grid, p_grid = np.meshgrid(T_values, p_values)
    
    optimal_batch = np.zeros_like(T_grid)
    
    for i in range(len(p_values)):
        for j in range(len(T_values)):
            optimal_batch[i, j], _ = find_optimal_batch_size(
                T_grid[i, j], p_grid[i, j])
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(optimal_batch, 
                xticklabels=np.round(T_values[::5], 1), 
                yticklabels=np.round(p_values[::5], 3),
                cmap='viridis', 
                cbar_kws={'label': 'Optimal Batch Size'})
    plt.xlabel('Test Time (T)')
    plt.ylabel('Probability of Success (p)')
    plt.title('Optimal Batch Size for Minimizing Expected Time per Task')
    plt.savefig('optimal_batch_size.png', dpi=300)

 def run_simulations_and_plot(N, T, p, max_batch_size, num_simulations=100):
    """Run simulations for different batch sizes and plot the results."""
    batch_sizes = list(range(1, min(max_batch_size, N) + 1))
    
    # Run simulations
    data = []
    for batch_size in batch_sizes:
        batch_times = [simulate_batch(N, T, p, batch_size) for _ in range(num_simulations)]
        for time in batch_times:
            data.append({'Batch Size': batch_size, 'Total Time': time})
    
    df = pd.DataFrame(data)
    
    # Calculate statistics
    means = df.groupby('Batch Size')['Total Time'].mean()
    errors = df.groupby('Batch Size')['Total Time'].sem() * 1.96  # 95% confidence interval
    
    # Create plot
    plt.figure(figsize=(10, 6))
    plt.errorbar(x=means.index, y=means.values, yerr=errors, 
                fmt='ro-', markersize=6, linewidth=1.5, capsize=4, label='Mean ± 95% CI')
    
    # Set x-axis labels
    step = max(1, max_batch_size // 10)
    tick_positions = list(range(1, max_batch_size + 1, step))
    if max_batch_size not in tick_positions:
        tick_positions.append(max_batch_size)
    plt.xticks(tick_positions)
    
    # Add labels and title
    plt.xlabel('Batch Size')
    plt.ylabel('Total Time')
    plt.title(f'Distribution of Total Time vs Batch Size (N={N}, T={T}, p={p})')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig('simulation_results.png')

 if __name__ == "__main__":
    # Create directory for results
    import os
    os.makedirs('results', exist_ok=True)
    
    # Example parameters
    N = 100  # Total tasks
    T = 10   # Testing time
    p = 0.95 # Success probability
    
    # Show optimal batch size calculation
    optimal = theoretical_optimal_batch_size(T, p)
    print(f"Theoretical optimal batch size for T={T}, p={p}: {optimal}")
    
    # Create batch size analysis plot
    T_samples = [1, 5, 10, 20]
    p_samples = [0.8, 0.9, 0.95, 0.99]
    create_batch_size_analysis_plot(T_samples, p_samples)
    
    # Create optimal batch size heatmap
    create_optimal_batch_size_heatmap([1, 20], [0.8, 0.999])
    
    # Run simulations
    run_simulations_and_plot(N, T, p, 20)
    
    print("Analysis completed. Results saved in the current directory.")
	import random
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	import seaborn as sns
	from mpl_toolkits.mplot3d import Axes3D
	from matplotlib import cm

	def simulate_batch(N, T, p, batch_size):
	"""
	Simulate processing N tasks in batches with testing.

	Parameters:
	N: Total number of tasks
	T: Time for testing a batch
	p: Probability a single task passes its test
	batch_size: Maximum tasks in a batch

	Returns:
	Total time taken
	"""
	total_time = 0
	tasks_remaining = N

	while tasks_remaining > 0:
	current_batch = min(batch_size, tasks_remaining)

	# Process batch until it passes testing
	while True:
	# Time to process tasks + test
	total_time += current_batch + T

	# Check if batch passes (all tasks must pass)
	if random.random() < (p ** current_batch):
	break

	tasks_remaining -= current_batch

	return total_time

	def expected_time_per_task(T, p, batch_size, max_value=1000):
	"""
	Calculate expected time per task for a given batch size.

	Parameters:
	T: Testing time
	p: Success probability
	batch_size: Batch size

	Returns:
	Expected time per task
	"""
	# Handle edge cases
	if p >= 0.9999:
	return 1 + T/batch_size

	try:
	# Expected attempts until success: 1/(p^batch_size)
	expected_attempts = min(np.exp(-batch_size * np.log(p)), max_value)

	# Expected time per task
	result = ((batch_size + T) * expected_attempts) / batch_size
	return min(result, max_value)
	except:
	return max_value

	def find_optimal_batch_size(T, p, max_batch_size=20):
	"""Find batch size that minimizes expected time per task."""
	min_time = float('inf')
	optimal_batch = 1

	for batch_size in range(1, max_batch_size + 1):
	time = expected_time_per_task(T, p, batch_size)
	if time < min_time:
	min_time = time
	optimal_batch = batch_size

	return optimal_batch, min_time

	def theoretical_optimal_batch_size(T, p):
	"""Calculate theoretical optimal batch size."""
	if p == 1:
	return float('inf') # With perfect reliability, larger batches are always better

	optimal = np.sqrt(T / (-np.log(p)))
	return max(1, int(round(optimal)))

	def create_batch_size_analysis_plot(T_samples, p_samples, max_batch_size=20):
	"""Create plot showing effect of batch size on expected time."""
	batch_sizes = np.arange(1, max_batch_size + 1)

	plt.figure(figsize=(10, 6))
	line_styles = ['-', '--', '-.', ':']
	colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

	for i, T in enumerate(T_samples):
	for j, p in enumerate(p_samples):
	times = [expected_time_per_task(T, p, b) for b in batch_sizes]
	plt.plot(batch_sizes, times,
	marker='o', markersize=4,
	linestyle=line_styles[i % len(line_styles)],
	color=colors[j % len(colors)],
	label=f'T={T}, p={p}')

	plt.xlabel('Batch Size')
	plt.ylabel('Expected Time per Task')
	plt.title('Effect of Batch Size on Expected Time')
	plt.grid(True)
	plt.legend()
	plt.tight_layout()
	plt.savefig('batch_size_effect.png', dpi=300)

	def create_optimal_batch_size_heatmap(T_range, p_range):
	"""Create heatmap showing optimal batch size for different T and p values."""
	T_values = np.linspace(T_range[0], T_range[1], 40)
	p_values = np.linspace(p_range[0], p_range[1], 40)
	T_grid, p_grid = np.meshgrid(T_values, p_values)

	optimal_batch = np.zeros_like(T_grid)

	for i in range(len(p_values)):
	for j in range(len(T_values)):
	optimal_batch[i, j], _ = find_optimal_batch_size(
	T_grid[i, j], p_grid[i, j])

	plt.figure(figsize=(10, 8))
	sns.heatmap(optimal_batch,
	xticklabels=np.round(T_values[::5], 1),
	yticklabels=np.round(p_values[::5], 3),
	cmap='viridis',
	cbar_kws={'label': 'Optimal Batch Size'})
	plt.xlabel('Test Time (T)')
	plt.ylabel('Probability of Success (p)')
	plt.title('Optimal Batch Size for Minimizing Expected Time per Task')
	plt.savefig('optimal_batch_size.png', dpi=300)

	def run_simulations_and_plot(N, T, p, max_batch_size, num_simulations=100):
	"""Run simulations for different batch sizes and plot the results."""
	batch_sizes = list(range(1, min(max_batch_size, N) + 1))

	# Run simulations
	data = []
	for batch_size in batch_sizes:
	batch_times = [simulate_batch(N, T, p, batch_size) for _ in range(num_simulations)]
	for time in batch_times:
	data.append({'Batch Size': batch_size, 'Total Time': time})

	df = pd.DataFrame(data)

	# Calculate statistics
	means = df.groupby('Batch Size')['Total Time'].mean()
	errors = df.groupby('Batch Size')['Total Time'].sem() * 1.96 # 95% confidence interval

	# Create plot
	plt.figure(figsize=(10, 6))
	plt.errorbar(x=means.index, y=means.values, yerr=errors,
	fmt='ro-', markersize=6, linewidth=1.5, capsize=4, label='Mean ± 95% CI')

	# Set x-axis labels
	step = max(1, max_batch_size // 10)
	tick_positions = list(range(1, max_batch_size + 1, step))
	if max_batch_size not in tick_positions:
	tick_positions.append(max_batch_size)
	plt.xticks(tick_positions)

	# Add labels and title
	plt.xlabel('Batch Size')
	plt.ylabel('Total Time')
	plt.title(f'Distribution of Total Time vs Batch Size (N={N}, T={T}, p={p})')
	plt.legend()
	plt.grid(True, linestyle='--', alpha=0.7)
	plt.tight_layout()
	plt.savefig('simulation_results.png')

	if __name__ == "__main__":
	# Create directory for results
	import os
	os.makedirs('results', exist_ok=True)

	# Example parameters
	N = 100 # Total tasks
	T = 10 # Testing time
	p = 0.95 # Success probability

	# Show optimal batch size calculation
	optimal = theoretical_optimal_batch_size(T, p)
	print(f"Theoretical optimal batch size for T={T}, p={p}: {optimal}")

	# Create batch size analysis plot
	T_samples = [1, 5, 10, 20]
	p_samples = [0.8, 0.9, 0.95, 0.99]
	create_batch_size_analysis_plot(T_samples, p_samples)

	# Create optimal batch size heatmap
	create_optimal_batch_size_heatmap([1, 20], [0.8, 0.999])

	# Run simulations
	run_simulations_and_plot(N, T, p, 20)

	print("Analysis completed. Results saved in the current directory.")