Fashad-Ahmed · November 21, 2025 15:33
diff --git a/generate_images.py b/generate_images.py
 # ============================================================================
 # Phase 5.5: Generate Visualization Images for LaTeX Report
 # ============================================================================
 print("=== Phase 5.5: Generating Visualization Images ===")

 import os

 # Create images directory if it doesn't exist
 os.makedirs('images', exist_ok=True)
 print("✓ Created/verified 'images' directory")

 # ============================================================================
 # 1. BN Structures Plot
 # ============================================================================
 print("\n1. Generating BN structures plot...")
 try:
    available_bns = []
    titles = []
    
    # Collect available BN models - Prioritize Extended (8-feature) models
    # Extended models (8 features) - preferred

    if 'bn_pc' in globals() and bn_pc:
        available_bns.append(bn_pc)
        titles.append('PC BN')
    if 'bn_naive_ext' in globals() and bn_naive_ext:
        available_bns.append(bn_naive_ext)
        titles.append('Naive Bayes BN (Extended)')
    if 'bn_expert_ext' in globals() and bn_expert_ext:
        available_bns.append(bn_expert_ext)
        titles.append('Expert BN (Extended)')

    
    # Basic models (4 features) - fallback if extended not available
    if len(available_bns) == 0:
        if 'bn_naive' in globals() and bn_naive:
            available_bns.append(bn_naive)
            titles.append('Naive Bayes BN')
        if 'bn_expert' in globals() and bn_expert:
            available_bns.append(bn_expert)
            titles.append('Expert BN')
        if 'bn_learned' in globals() and bn_learned:
            available_bns.append(bn_learned)
            titles.append('Learned BN (MMHC)')
    
    if len(available_bns) > 0:
        # Create subplots
        n_plots = len(available_bns)
        cols = min(2, n_plots)
        rows = (n_plots + cols - 1) // cols
        fig, axes = plt.subplots(rows, cols, figsize=(12*cols, 10*rows))
        if n_plots == 1:
            axes = [axes]
        else:
            axes = axes.flatten() if rows > 1 else axes
        
        for i, (bn, title) in enumerate(zip(available_bns, titles)):
            ax = axes[i] if n_plots > 1 else axes[0]
            plot_bn_structure(bn, title, ax)
        
        # Hide unused subplots
        for i in range(n_plots, len(axes)):
            axes[i].axis('off')
        
        plt.suptitle('Bayesian Network Structures Comparison', fontsize=16, fontweight='bold', y=0.98)
        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.savefig('images/bn_structures.png', bbox_inches='tight', dpi=300, facecolor='white')
        plt.close()
        print("  ✓ Saved images/bn_structures.png")
    else:
        print("  ⚠ No BN models available to plot")
 except Exception as e:
    print(f"  ✗ Error generating BN structures: {e}")

 # ============================================================================
 # 2. Confusion Matrix for Best Model
 # ============================================================================
 print("\n2. Generating confusion matrix for best model...")
 try:
    if 'best_model_name' in globals() and 'predictions_dict' in globals():
        best_name = best_model_name
        if best_name in predictions_dict:
            y_pred_proba_best = predictions_dict[best_name]
            y_pred_best = np.argmax(y_pred_proba_best, axis=1)
            
            # Get y_test from Phase 5
            if 'y_test' in globals():
                fig, ax = plt.subplots(figsize=(10, 8))
                plot_confusion_matrix(y_test, y_pred_best, best_name, ax)
                plt.tight_layout()
                plt.savefig('images/confusion_matrix.png', bbox_inches='tight', dpi=300, facecolor='white')
                plt.close()
                print(f"  ✓ Saved images/confusion_matrix.png (Best Model: {best_name})")
            else:
                print("  ⚠ y_test not available. Please run Phase 5 first.")
        else:
            print(f"  ⚠ Predictions not available for {best_name}")
    else:
        print("  ⚠ Best model not found. Please run Phase 5 first.")
 except Exception as e:
    print(f"  ✗ Error generating confusion matrix: {e}")

 # ============================================================================
 # 3. Calibration Curve for Best Model
 # ============================================================================
 print("\n3. Generating calibration curve for best model...")
 try:
    if 'best_model_name' in globals() and 'predictions_dict' in globals():
        best_name = best_model_name
        if best_name in predictions_dict:
            y_pred_proba_best = predictions_dict[best_name]
            
            # Get y_test from Phase 5
            if 'y_test' in globals():
                fig, ax = plt.subplots(figsize=(10, 8))
                plot_calibration_curve(y_test, y_pred_proba_best, best_name, ax)
                plt.tight_layout()
                plt.savefig('images/calibration_curve.png', bbox_inches='tight', dpi=300, facecolor='white')
                plt.close()
                print(f"  ✓ Saved images/calibration_curve.png (Best Model: {best_name})")
            else:
                print("  ⚠ y_test not available. Please run Phase 5 first.")
        else:
            print(f"  ⚠ Predictions not available for {best_name}")
    else:
        print("  ⚠ Best model not found. Please run Phase 5 first.")
 except Exception as e:
    print(f"  ✗ Error generating calibration curve: {e}")

 # ============================================================================
 # 4. HMM Transition Matrix Heatmap
 # ============================================================================
 print("\n4. Generating HMM transition matrix heatmap...")
 try:
    # Try to get a representative HMM model
    # Option 1: Check if team_hmms is stored globally
    hmm_model = None
    if 'team_hmms' in globals() and team_hmms:
        # Get the first available HMM
        first_team = list(team_hmms.keys())[0]
        hmm_model = team_hmms[first_team]
        print(f"  Using HMM from team: {first_team}")
    else:
        # Option 2: Train a representative HMM on a sample team
        # Find a team with enough matches
        if 'df' in globals() and len(df) > 0:
            team_counts = df['HomeTeam'].value_counts() + df['AwayTeam'].value_counts()
            top_team = team_counts.index[0]
            print(f"  Training representative HMM for team: {top_team}")
            
            # Get n_components from globals or use default
            n_comp = n_components if 'n_components' in globals() else 5
            
            # Train HMM
            from hmmlearn import hmm
            try:
                HMMClass = hmm.CategoricalHMM
            except AttributeError:
                HMMClass = hmm.MultinomialHMM
            
            # Create sequences for this team
            team_matches = df[(df['HomeTeam'] == top_team) | (df['AwayTeam'] == top_team)].copy()
            team_matches = team_matches.sort_values('Date' if 'Date' in team_matches.columns else team_matches.index)
            
            sequences = []
            for idx, row in team_matches.iterrows():
                if row['HomeTeam'] == top_team:
                    result = row['FTR']
                    if result == 'H':
                        obs = 0
                    elif result == 'D':
                        obs = 1
                    else:
                        obs = 2
                else:  # Away team
                    result = row['FTR']
                    if result == 'A':
                        obs = 0
                    elif result == 'D':
                        obs = 1
                    else:
                        obs = 2
                sequences.append(obs)
            
            if len(sequences) >= 30:
                X = np.array(sequences).reshape(-1, 1)
                lengths = [len(sequences)]
                
                # Adaptive n_components
                if len(sequences) < 200:
                    n_comp = 2
                elif len(sequences) < 400:
                    n_comp = 3
                else:
                    n_comp = 5
                
                hmm_model = HMMClass(n_components=n_comp, n_iter=50, random_state=42, tol=1e-2)
                hmm_model.fit(X, lengths=lengths)
            else:
                print("  ⚠ Not enough matches to train representative HMM")
    
    if hmm_model is not None:
        # Get transition matrix
        transmat = hmm_model.transmat_
        n_states = transmat.shape[0]
        
        # Create heatmap
        fig, ax = plt.subplots(figsize=(10, 8))
        sns.heatmap(transmat, annot=True, fmt='.3f', cmap='YlOrRd', 
                   xticklabels=[f'State {i}' for i in range(n_states)],
                   yticklabels=[f'State {i}' for i in range(n_states)],
                   cbar_kws={'label': 'Transition Probability'}, ax=ax)
        ax.set_xlabel('To State', fontsize=12, fontweight='bold')
        ax.set_ylabel('From State', fontsize=12, fontweight='bold')
        ax.set_title('HMM Transition Matrix\n(Probability of transitioning between hidden states)', 
                    fontsize=14, fontweight='bold', pad=15)
        plt.tight_layout()
        plt.savefig('images/hmm_matrix.png', bbox_inches='tight', dpi=300, facecolor='white')
        plt.close()
        print(f"  ✓ Saved images/hmm_matrix.png ({n_states}x{n_states} transition matrix)")
    else:
        print("  ⚠ Could not access or train HMM model for transition matrix")
 except Exception as e:
    print(f"  ✗ Error generating HMM transition matrix: {e}")
    import traceback
    traceback.print_exc()

 print("\n" + "="*70)
 print("✓ Image generation complete!")
 print("="*70)
	# ============================================================================
	# Phase 5.5: Generate Visualization Images for LaTeX Report
	# ============================================================================
	print("=== Phase 5.5: Generating Visualization Images ===")

	import os

	# Create images directory if it doesn't exist
	os.makedirs('images', exist_ok=True)
	print("✓ Created/verified 'images' directory")

	# ============================================================================
	# 1. BN Structures Plot
	# ============================================================================
	print("\n1. Generating BN structures plot...")
	try:
	available_bns = []
	titles = []

	# Collect available BN models - Prioritize Extended (8-feature) models
	# Extended models (8 features) - preferred

	if 'bn_pc' in globals() and bn_pc:
	available_bns.append(bn_pc)
	titles.append('PC BN')
	if 'bn_naive_ext' in globals() and bn_naive_ext:
	available_bns.append(bn_naive_ext)
	titles.append('Naive Bayes BN (Extended)')
	if 'bn_expert_ext' in globals() and bn_expert_ext:
	available_bns.append(bn_expert_ext)
	titles.append('Expert BN (Extended)')


	# Basic models (4 features) - fallback if extended not available
	if len(available_bns) == 0:
	if 'bn_naive' in globals() and bn_naive:
	available_bns.append(bn_naive)
	titles.append('Naive Bayes BN')
	if 'bn_expert' in globals() and bn_expert:
	available_bns.append(bn_expert)
	titles.append('Expert BN')
	if 'bn_learned' in globals() and bn_learned:
	available_bns.append(bn_learned)
	titles.append('Learned BN (MMHC)')

	if len(available_bns) > 0:
	# Create subplots
	n_plots = len(available_bns)
	cols = min(2, n_plots)
	rows = (n_plots + cols - 1) // cols
	fig, axes = plt.subplots(rows, cols, figsize=(12cols, 10rows))
	if n_plots == 1:
	axes = [axes]
	else:
	axes = axes.flatten() if rows > 1 else axes

	for i, (bn, title) in enumerate(zip(available_bns, titles)):
	ax = axes[i] if n_plots > 1 else axes[0]
	plot_bn_structure(bn, title, ax)

	# Hide unused subplots
	for i in range(n_plots, len(axes)):
	axes[i].axis('off')

	plt.suptitle('Bayesian Network Structures Comparison', fontsize=16, fontweight='bold', y=0.98)
	plt.tight_layout(rect=[0, 0, 1, 0.96])
	plt.savefig('images/bn_structures.png', bbox_inches='tight', dpi=300, facecolor='white')
	plt.close()
	print(" ✓ Saved images/bn_structures.png")
	else:
	print(" ⚠ No BN models available to plot")
	except Exception as e:
	print(f" ✗ Error generating BN structures: {e}")

	# ============================================================================
	# 2. Confusion Matrix for Best Model
	# ============================================================================
	print("\n2. Generating confusion matrix for best model...")
	try:
	if 'best_model_name' in globals() and 'predictions_dict' in globals():
	best_name = best_model_name
	if best_name in predictions_dict:
	y_pred_proba_best = predictions_dict[best_name]
	y_pred_best = np.argmax(y_pred_proba_best, axis=1)

	# Get y_test from Phase 5
	if 'y_test' in globals():
	fig, ax = plt.subplots(figsize=(10, 8))
	plot_confusion_matrix(y_test, y_pred_best, best_name, ax)
	plt.tight_layout()
	plt.savefig('images/confusion_matrix.png', bbox_inches='tight', dpi=300, facecolor='white')
	plt.close()
	print(f" ✓ Saved images/confusion_matrix.png (Best Model: {best_name})")
	else:
	print(" ⚠ y_test not available. Please run Phase 5 first.")
	else:
	print(f" ⚠ Predictions not available for {best_name}")
	else:
	print(" ⚠ Best model not found. Please run Phase 5 first.")
	except Exception as e:
	print(f" ✗ Error generating confusion matrix: {e}")

	# ============================================================================
	# 3. Calibration Curve for Best Model
	# ============================================================================
	print("\n3. Generating calibration curve for best model...")
	try:
	if 'best_model_name' in globals() and 'predictions_dict' in globals():
	best_name = best_model_name
	if best_name in predictions_dict:
	y_pred_proba_best = predictions_dict[best_name]

	# Get y_test from Phase 5
	if 'y_test' in globals():
	fig, ax = plt.subplots(figsize=(10, 8))
	plot_calibration_curve(y_test, y_pred_proba_best, best_name, ax)
	plt.tight_layout()
	plt.savefig('images/calibration_curve.png', bbox_inches='tight', dpi=300, facecolor='white')
	plt.close()
	print(f" ✓ Saved images/calibration_curve.png (Best Model: {best_name})")
	else:
	print(" ⚠ y_test not available. Please run Phase 5 first.")
	else:
	print(f" ⚠ Predictions not available for {best_name}")
	else:
	print(" ⚠ Best model not found. Please run Phase 5 first.")
	except Exception as e:
	print(f" ✗ Error generating calibration curve: {e}")

	# ============================================================================
	# 4. HMM Transition Matrix Heatmap
	# ============================================================================
	print("\n4. Generating HMM transition matrix heatmap...")
	try:
	# Try to get a representative HMM model
	# Option 1: Check if team_hmms is stored globally
	hmm_model = None
	if 'team_hmms' in globals() and team_hmms:
	# Get the first available HMM
	first_team = list(team_hmms.keys())[0]
	hmm_model = team_hmms[first_team]
	print(f" Using HMM from team: {first_team}")
	else:
	# Option 2: Train a representative HMM on a sample team
	# Find a team with enough matches
	if 'df' in globals() and len(df) > 0:
	team_counts = df['HomeTeam'].value_counts() + df['AwayTeam'].value_counts()
	top_team = team_counts.index[0]
	print(f" Training representative HMM for team: {top_team}")

	# Get n_components from globals or use default
	n_comp = n_components if 'n_components' in globals() else 5

	# Train HMM
	from hmmlearn import hmm
	try:
	HMMClass = hmm.CategoricalHMM
	except AttributeError:
	HMMClass = hmm.MultinomialHMM

	# Create sequences for this team
	team_matches = df[(df['HomeTeam'] == top_team) \| (df['AwayTeam'] == top_team)].copy()
	team_matches = team_matches.sort_values('Date' if 'Date' in team_matches.columns else team_matches.index)

	sequences = []
	for idx, row in team_matches.iterrows():
	if row['HomeTeam'] == top_team:
	result = row['FTR']
	if result == 'H':
	obs = 0
	elif result == 'D':
	obs = 1
	else:
	obs = 2
	else: # Away team
	result = row['FTR']
	if result == 'A':
	obs = 0
	elif result == 'D':
	obs = 1
	else:
	obs = 2
	sequences.append(obs)

	if len(sequences) >= 30:
	X = np.array(sequences).reshape(-1, 1)
	lengths = [len(sequences)]

	# Adaptive n_components
	if len(sequences) < 200:
	n_comp = 2
	elif len(sequences) < 400:
	n_comp = 3
	else:
	n_comp = 5

	hmm_model = HMMClass(n_components=n_comp, n_iter=50, random_state=42, tol=1e-2)
	hmm_model.fit(X, lengths=lengths)
	else:
	print(" ⚠ Not enough matches to train representative HMM")

	if hmm_model is not None:
	# Get transition matrix
	transmat = hmm_model.transmat_
	n_states = transmat.shape[0]

	# Create heatmap
	fig, ax = plt.subplots(figsize=(10, 8))
	sns.heatmap(transmat, annot=True, fmt='.3f', cmap='YlOrRd',
	xticklabels=[f'State {i}' for i in range(n_states)],
	yticklabels=[f'State {i}' for i in range(n_states)],
	cbar_kws={'label': 'Transition Probability'}, ax=ax)
	ax.set_xlabel('To State', fontsize=12, fontweight='bold')
	ax.set_ylabel('From State', fontsize=12, fontweight='bold')
	ax.set_title('HMM Transition Matrix\n(Probability of transitioning between hidden states)',
	fontsize=14, fontweight='bold', pad=15)
	plt.tight_layout()
	plt.savefig('images/hmm_matrix.png', bbox_inches='tight', dpi=300, facecolor='white')
	plt.close()
	print(f" ✓ Saved images/hmm_matrix.png ({n_states}x{n_states} transition matrix)")
	else:
	print(" ⚠ Could not access or train HMM model for transition matrix")
	except Exception as e:
	print(f" ✗ Error generating HMM transition matrix: {e}")
	import traceback
	traceback.print_exc()

	print("\n" + "="*70)
	print("✓ Image generation complete!")
	print("="*70)
No results found