bigsnarfdude · March 15, 2025 07:17 · bigsnarfdude · Mar 15, 2025
diff --git a/meetings.py b/meetings.py
 import json
 import pandas as pd
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 import numpy as np
 import dash
 from dash import dcc, html
 from dash.dependencies import Input, Output

 # Load the data
 with open("combined_workshop_data.json", "r") as f:
    data = json.load(f)

 df_workshops = pd.DataFrame(data["workshops"])
 df_yearly_stats = pd.DataFrame(data["yearly_stats"]).transpose().reset_index()
 df_yearly_stats.rename(columns={"index": "year"}, inplace=True)

 # Convert relevant columns to numeric, handling potential errors
 df_yearly_stats['year'] = pd.to_numeric(df_yearly_stats['year'], errors='coerce')
 df_yearly_stats['avg_duration_days'] = pd.to_numeric(df_yearly_stats['avg_duration_days'], errors='coerce')
 df_yearly_stats['total_lectures'] = pd.to_numeric(df_yearly_stats['total_lectures'], errors='coerce')
 df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
 df_workshops['end_date'] = pd.to_datetime(df_workshops['end_date'], errors='coerce')  # Convert end_date too
 df_workshops['day_of_week'] = df_workshops['start_date'].dt.dayofweek  # 0=Monday, 6=Sunday

 # --- Image 2: Event Duration Trends (REPLACEMENT FOR PROBLEMATIC CHART) ---
 def analyze_event_duration_trends(df_workshops):
    """
    Analyzes how event durations have changed over time
    """
    # Ensure date fields are datetime
    df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
    
    # Extract year from start_date
    df_workshops['year'] = df_workshops['start_date'].dt.year
    
    # Create figure with subplots
    fig = make_subplots(
        rows=2, cols=1, 
        subplot_titles=("Average Event Duration by Year", "Content Distribution by Year"),
        vertical_spacing=0.15,
        shared_xaxes=True
    )
    
    # For this example, let's create a proxy using total_events and workshop duration
    df_workshops['avg_event_duration_mins'] = (df_workshops['duration_days'] * 8 * 60) / df_workshops['total_events']
    
    # Calculate average by year
    yearly_duration = df_workshops.groupby('year')['avg_event_duration_mins'].mean().reset_index()
    yearly_duration_std = df_workshops.groupby('year')['avg_event_duration_mins'].std().reset_index()
    yearly_duration = pd.merge(yearly_duration, yearly_duration_std, on='year', how='left')
    yearly_duration.columns = ['year', 'avg_duration_mins', 'std_duration_mins']
    
    # Add line for average event duration
    fig.add_trace(
        go.Scatter(
            x=yearly_duration['year'],
            y=yearly_duration['avg_duration_mins'],
            mode='lines+markers',
            name='Avg Event Duration (mins)',
            line=dict(color='#4c78a8', width=3),
            marker=dict(size=8),
            hovertemplate='<b>Year</b>: %{x}<br><b>Avg Duration</b>: %{y:.1f} mins<extra></extra>'
        ),
        row=1, col=1
    )
    
    # Add confidence interval
    upper_bound = yearly_duration['avg_duration_mins'] + yearly_duration['std_duration_mins']
    lower_bound = yearly_duration['avg_duration_mins'] - yearly_duration['std_duration_mins']
    
    fig.add_trace(
        go.Scatter(
            x=yearly_duration['year'],
            y=upper_bound,
            mode='lines',
            line=dict(width=0),
            showlegend=False,
            hoverinfo='skip'
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=yearly_duration['year'],
            y=lower_bound,
            mode='lines',
            line=dict(width=0),
            fill='tonexty',
            fillcolor='rgba(76, 120, 168, 0.2)',
            showlegend=False,
            hoverinfo='skip'
        ),
        row=1, col=1
    )
    
    # --- Second subplot: Content distribution by year ---
    yearly_content = df_workshops.groupby('year').agg({
        'lecture_count': 'sum',
        'total_events': 'sum'
    }).reset_index()
    
    yearly_content['filler_count'] = yearly_content['total_events'] - yearly_content['lecture_count']
    yearly_content['lecture_percentage'] = yearly_content['lecture_count'] / yearly_content['total_events'] * 100
    yearly_content['filler_percentage'] = yearly_content['filler_count'] / yearly_content['total_events'] * 100
    
    # Add stacked bar chart
    fig.add_trace(
        go.Bar(
            x=yearly_content['year'],
            y=yearly_content['lecture_percentage'],
            name='Lectures',
            marker_color='#4c78a8',
            hovertemplate='<b>Year</b>: %{x}<br><b>Lectures</b>: %{y:.1f}%<extra></extra>'
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Bar(
            x=yearly_content['year'],
            y=yearly_content['filler_percentage'],
            name='Discussions/Breaks/Other',
            marker_color='#ff9d45',
            hovertemplate='<b>Year</b>: %{x}<br><b>Other Activities</b>: %{y:.1f}%<extra></extra>'
        ),
        row=2, col=1
    )
    
    # Add a trend line for lecture percentage
    x_values = yearly_content['year']
    y_values = yearly_content['lecture_percentage']
    
    # Linear regression
    slope, intercept = np.polyfit(x_values, y_values, 1)
    
    fig.add_trace(
        go.Scatter(
            x=x_values,
            y=slope * x_values + intercept,
            mode='lines',
            name=f'Lecture % Trend (slope: {slope:.2f}%/year)',
            line=dict(color='red', dash='dash'),
        ),
        row=2, col=1
    )
    
    # Update layout
    fig.update_layout(
        title_text="Workshop Content Evolution Over Time",
        height=800,
        plot_bgcolor='white',
        barmode='stack',
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )
    
    # Update axes
    fig.update_xaxes(title_text="Year", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=2, col=1)
    fig.update_yaxes(title_text="Average Duration (minutes)", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=1, col=1)
    fig.update_yaxes(title_text="Percentage (%)", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=2, col=1)
    
    # Add annotations explaining the insights
    fig.add_annotation(
        xref="paper", yref="paper",
        x=0.01, y=0.95,
        text="<b>Event Duration Trend Analysis</b><br>Shows whether events are getting shorter or longer over time",
        showarrow=False,
        font=dict(size=10),
        align="left",
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="lightgrey",
        borderwidth=1,
        borderpad=4
    )
    
    fig.add_annotation(
        xref="paper", yref="paper",
        x=0.01, y=0.45,
        text="<b>Content Quality Indicator</b><br>Higher lecture percentage may indicate more substantive content",
        showarrow=False,
        font=dict(size=10),
        align="left",
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="lightgrey",
        borderwidth=1,
        borderpad=4
    )
    
    return fig


 # --- Image 3: Day-of-Week Distribution ---
 def create_day_of_week_distribution(data):
    day_counts = data['visualizations']['day_of_week_counts']
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    total_events = sum(day_counts)
    percentages = [(count / total_events) * 100 for count in day_counts]

    fig = go.Figure(data=[go.Bar(
        x=days,
        y=day_counts,
        marker_color='#5975A4',  # Use the correct blue color
        text=[f'{p:.1f}%' for p in percentages],  # Display percentages with one decimal place
        textposition='outside', #position the text
        hovertemplate=
            '<b>Day</b>: %{x}<br>' +
            '<b>Count</b>: %{y}<br>' +
            '<b>Percent</b>: %{text}<extra></extra>'
    )])

    fig.update_layout(
        title='Events by Day of Week',
        xaxis_title='',  # Remove x-axis title
        yaxis_title='count',
        plot_bgcolor='white',  # Set background color to white
        xaxis=dict(tickangle=-45) # Rotate x-axis labels for readability
    )
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey') #add the grey gridlines
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey')

    return fig


 # --- Image 4: Lecture Count by Year ---
 def create_lecture_count_by_year(df):

    fig = go.Figure()

    fig.add_trace(go.Bar(x=df['year'], y=df['total_lectures'],
                         name='Lecture Count',
                         marker_color='steelblue',
                         hovertemplate=
                            '<b>Year</b>: %{x}<br>'+
                            '<b>Lecture Count</b>: %{y}<extra></extra>'
                         ))
    # Add trendline
    z = np.polyfit(df['year'], df['total_lectures'], 1)
    p = np.poly1d(z)
    fig.add_trace(go.Scatter(x=df['year'], y=p(df['year']), mode='lines',
        name='Trendline',
        line=dict(color='firebrick', dash='dash')))

    fig.update_layout(title='Total Lecture Count by Year',
                      xaxis_title='Year',
                      yaxis_title='Lecture Count',
                      showlegend=True)


     # Create a string for the yearly summary (formatted as a table)
    summary_table_str = "<b>Yearly Statistical Summary</b><br><br>"
    summary_table_str += "<table style='width:100%; border-collapse: collapse;'>"  # Start table
    summary_table_str += "<tr style='border-bottom: 1px solid black;'><th style='text-align:left;'>Year</th><th style='text-align:right;'>Workshops</th><th style='text-align:right;'>Events</th><th style='text-align:right;'>Lectures</th><th style='text-align:right;'>Avg. Duration (Days)</th></tr>" # Table header
    for index, row in df.iterrows():
        summary_table_str += f"<tr style='border-bottom: 1px solid lightgrey;'><td style='text-align:left;'>{int(row['year'])}</td><td style='text-align:right;'>{int(row['total_workshops'])}</td><td style='text-align:right;'>{int(row['total_events'])}</td><td style='text-align:right;'>{int(row['total_lectures'])}</td><td style='text-align:right;'>{row['avg_duration_days']:.2f}</td></tr>"
    summary_table_str += "</table>"  # End table

    # Add the table as an annotation
    fig.add_annotation(
        text=summary_table_str,
        xref="paper",
        yref="paper",
        x=0,  #left
        y=-0.3,  #position
        showarrow=False,
        font=dict(size=12),
        align="left",
        bordercolor="black",
        borderwidth=1,
        borderpad=4,
        bgcolor="white",
        opacity=0.8
    )

    fig.update_layout(
      margin=dict(l=20, r=20, t=40, b=160),
      )

    return fig


 # --- New Chart: Online Count by Year ---
 def create_online_count_by_year(df_workshops):
    """
    Creates a bar chart showing the total online event count by year with trend line,
    similar to the lecture count by year visualization.
    """
    # Ensure date fields are datetime and create a copy
    df_workshops = df_workshops.copy()
    df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
    
    # Extract year from start_date
    df_workshops['year'] = df_workshops['start_date'].dt.year
    
    # Group by year and calculate total online count
    yearly_online = df_workshops.groupby('year').agg({
        'online_count': 'sum',
        'total_events': 'sum',
        'code': 'count'  # count of workshops per year
    }).reset_index()
    
    # Rename columns for clarity
    yearly_online.rename(columns={'code': 'total_workshops'}, inplace=True)
    
    # Calculate percentage of online events per year
    yearly_online['online_percentage'] = (yearly_online['online_count'] / yearly_online['total_events'] * 100).round(1)
    
    # Create figure
    fig = go.Figure()

    # Add bar chart for online count
    fig.add_trace(go.Bar(
        x=yearly_online['year'], 
        y=yearly_online['online_count'],
        name='Online Event Count',
        marker_color='#4c78a8',  # Blue color
        hovertemplate=
            '<b>Year</b>: %{x}<br>' +
            '<b>Online Events</b>: %{y}<br>' +
            '<b>Percentage</b>: %{text}%<extra></extra>',
        text=yearly_online['online_percentage']
    ))
    
    # Add trendline using linear regression
    x_values = yearly_online['year']
    y_values = yearly_online['online_count']
    
    if len(x_values) > 1:  # Need at least 2 points for a trend line
        z = np.polyfit(x_values, y_values, 1)
        p = np.poly1d(z)
        
        fig.add_trace(go.Scatter(
            x=x_values, 
            y=p(x_values), 
            mode='lines',
            name=f'Trendline (slope: {z[0]:.1f} events/year)',
            line=dict(color='firebrick', dash='dash')
        ))
    
    # Update layout
    fig.update_layout(
        title='Online Events by Year',
        xaxis_title='Year',
        yaxis_title='Online Event Count',
        showlegend=True,
        plot_bgcolor='white',
        xaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgrey'),
        yaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgrey')
    )
    
    # Create a string for the yearly summary (formatted as a table)
    summary_table_str = "<b>Yearly Online Events Summary</b><br><br>"
    summary_table_str += "<table style='width:100%; border-collapse: collapse;'>"
    summary_table_str += "<tr style='border-bottom: 1px solid black;'><th style='text-align:left;'>Year</th><th style='text-align:right;'>Workshops</th><th style='text-align:right;'>Total Events</th><th style='text-align:right;'>Online Events</th><th style='text-align:right;'>Online %</th></tr>"
    
    for index, row in yearly_online.iterrows():
        summary_table_str += f"<tr style='border-bottom: 1px solid lightgrey;'><td style='text-align:left;'>{int(row['year'])}</td><td style='text-align:right;'>{int(row['total_workshops'])}</td><td style='text-align:right;'>{int(row['total_events'])}</td><td style='text-align:right;'>{int(row['online_count'])}</td><td style='text-align:right;'>{row['online_percentage']:.1f}%</td></tr>"
    
    summary_table_str += "</table>"
    
    # Add the table as an annotation
    fig.add_annotation(
        text=summary_table_str,
        xref="paper",
        yref="paper",
        x=0,  # left
        y=-0.3,  # position
        showarrow=False,
        font=dict(size=12),
        align="left",
        bordercolor="black",
        borderwidth=1,
        borderpad=4,
        bgcolor="white",
        opacity=0.8
    )
    
    fig.update_layout(
        margin=dict(l=20, r=20, t=40, b=160),
    )
    
    return fig


 # --- Image 5: Online Workshop Trends (Filtered Bubble Chart) ---
 def analyze_online_workshop_trends(df_workshops):
    """
    Creates a bubble chart visualization showing only workshops with significant online events.
    Filters out workshops with 2 or fewer online events to avoid showing placeholders.
    """
    # Ensure date fields are datetime and create a copy to avoid modifying original
    df_workshops = df_workshops.copy()
    df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
    
    # Filter out workshops with 2 or fewer online events (placeholders)
    df_online = df_workshops[df_workshops['online_count'] > 2].copy()
    
    # Sort chronologically
    df_online = df_online.sort_values('start_date')
    
    # Create a bubble chart figure
    fig = go.Figure()
    
    # Add bubbles for workshops with significant online events
    for _, workshop in df_online.iterrows():
        # Scale the bubble size based on online count
        size = workshop['online_count'] * 3  # Simple multiplier for visibility
        
        # Add the workshop as a bubble
        fig.add_trace(go.Scatter(
            x=[workshop['start_date']],
            y=[workshop['online_count']],
            mode='markers',
            marker=dict(
                size=size,
                color=workshop['online_count'],
                colorscale='Viridis',
                colorbar=dict(title='Online Events'),
                showscale=True,
                line=dict(width=1, color='rgba(0,0,0,0.3)')
            ),
            name=workshop.get('code', ''),
            text=f"Workshop: {workshop.get('code', 'N/A')}<br>" +
                 f"Date: {workshop['start_date'].strftime('%Y-%m-%d')}<br>" +
                 f"Online Events: {workshop['online_count']}",
            hoverinfo='text',
            showlegend=False,
        ))
    
    # Highlight workshop with maximum online events
    if len(df_online) > 0:
        max_online_idx = df_online['online_count'].idxmax()
        max_workshop = df_online.loc[max_online_idx]
        
        fig.add_annotation(
            x=max_workshop['start_date'],
            y=max_workshop['online_count'],
            text=f"Peak: {max_workshop['online_count']} online events",
            showarrow=True,
            arrowhead=1,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="black",
            ax=20,
            ay=-30,
            font=dict(size=12),
            bgcolor="rgba(255, 255, 255, 0.8)",
            bordercolor="black",
            borderwidth=1
        )
    
    # Update layout
    fig.update_layout(
        title="Significant Online Workshop Events (>2 online events)",
        height=700,
        plot_bgcolor='white',
        xaxis=dict(
            title="Workshop Date",
            showgrid=True,
            gridcolor='lightgrey',
            tickangle=45
        ),
        yaxis=dict(
            title="Number of Online Events",
            showgrid=True,
            gridcolor='lightgrey'
        ),
        margin=dict(l=50, r=50, t=80, b=50)
    )
    
    # Add a text box with summary statistics focusing only on online events
    total_workshops = len(df_workshops)
    total_online_workshops = len(df_online)
    filtered_out = len(df_workshops[df_workshops['online_count'] > 0]) - len(df_online)
    total_online_events = df_online['online_count'].sum() if not df_online.empty else 0
    avg_online_per_workshop = df_online['online_count'].mean() if not df_online.empty else 0
    max_online = df_online['online_count'].max() if not df_online.empty else 0
    
    stats_text = (
        f"<b>Online Events Summary:</b><br>"
        f"Total Workshops: {total_workshops}<br>"
        f"Workshops with Significant Online Events (>2): {total_online_workshops}<br>"
        f"Workshops with 1-2 Online Events (filtered out): {filtered_out}<br>"
        f"Total Online Events (in significant workshops): {total_online_events}<br>"
        f"Average Online Events per Workshop: {avg_online_per_workshop:.1f}<br>"
        f"Maximum Online Events: {max_online}"
    )
    
    fig.add_annotation(
        x=0.02,
        y=0.98,
        xref="paper",
        yref="paper",
        text=stats_text,
        showarrow=False,
        font=dict(size=12),
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="black",
        borderwidth=1,
        borderpad=4,
        align="left"
    )
    
    return fig


 # Create Dash app
 app = dash.Dash(__name__)

 # Generate all figures
 fig2 = analyze_event_duration_trends(df_workshops)
 fig3 = create_day_of_week_distribution(data)
 fig4 = create_lecture_count_by_year(df_yearly_stats)
 fig5 = analyze_online_workshop_trends(df_workshops)
 fig6 = create_online_count_by_year(df_workshops)  # Generate the new figure

 # Define app layout with tabs
 app.layout = html.Div([
    html.H1("Workshop Analysis Dashboard", style={'textAlign': 'center', 'margin-bottom': '20px', 'margin-top': '20px'}),
    
    dcc.Tabs(id='dashboard-tabs', value='tab-online-events', children=[
        dcc.Tab(label='Online Workshop Events', value='tab-online-events', children=[
            dcc.Graph(figure=fig5, style={'height': '750px'})
        ]),
        dcc.Tab(label='Online Count by Year', value='tab-online-count', children=[
            dcc.Graph(figure=fig6, style={'height': '850px'})
        ]),
        dcc.Tab(label='Event Duration Trends', value='tab-duration-trends', children=[
            dcc.Graph(figure=fig2, style={'height': '850px'})
        ]),
        dcc.Tab(label='Day of Week Distribution', value='tab-day-distribution', children=[
            dcc.Graph(figure=fig3, style={'height': '600px'})
        ]),
        dcc.Tab(label='Lecture Count by Year', value='tab-lecture-count', children=[
            dcc.Graph(figure=fig4, style={'height': '850px'})
        ]),
    ]),
    
    html.Div([
        html.P("Workshop Data Analysis Dashboard", style={'textAlign': 'center', 'margin-top': '20px', 'color': 'gray'})
    ])
 ])

 # Run the app
 if __name__ == '__main__':
    # Run with debug mode OFF to avoid the error with Python 3.12
    app.run_server(debug=False)
	import json
	import pandas as pd
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import numpy as np
	import dash
	from dash import dcc, html
	from dash.dependencies import Input, Output

	# Load the data
	with open("combined_workshop_data.json", "r") as f:
	data = json.load(f)

	df_workshops = pd.DataFrame(data["workshops"])
	df_yearly_stats = pd.DataFrame(data["yearly_stats"]).transpose().reset_index()
	df_yearly_stats.rename(columns={"index": "year"}, inplace=True)

	# Convert relevant columns to numeric, handling potential errors
	df_yearly_stats['year'] = pd.to_numeric(df_yearly_stats['year'], errors='coerce')
	df_yearly_stats['avg_duration_days'] = pd.to_numeric(df_yearly_stats['avg_duration_days'], errors='coerce')
	df_yearly_stats['total_lectures'] = pd.to_numeric(df_yearly_stats['total_lectures'], errors='coerce')
	df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
	df_workshops['end_date'] = pd.to_datetime(df_workshops['end_date'], errors='coerce') # Convert end_date too
	df_workshops['day_of_week'] = df_workshops['start_date'].dt.dayofweek # 0=Monday, 6=Sunday

	# --- Image 2: Event Duration Trends (REPLACEMENT FOR PROBLEMATIC CHART) ---
	def analyze_event_duration_trends(df_workshops):
	"""
	Analyzes how event durations have changed over time
	"""
	# Ensure date fields are datetime
	df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')

	# Extract year from start_date
	df_workshops['year'] = df_workshops['start_date'].dt.year

	# Create figure with subplots
	fig = make_subplots(
	rows=2, cols=1,
	subplot_titles=("Average Event Duration by Year", "Content Distribution by Year"),
	vertical_spacing=0.15,
	shared_xaxes=True
	)

	# For this example, let's create a proxy using total_events and workshop duration
	df_workshops['avg_event_duration_mins'] = (df_workshops['duration_days'] * 8 * 60) / df_workshops['total_events']

	# Calculate average by year
	yearly_duration = df_workshops.groupby('year')['avg_event_duration_mins'].mean().reset_index()
	yearly_duration_std = df_workshops.groupby('year')['avg_event_duration_mins'].std().reset_index()
	yearly_duration = pd.merge(yearly_duration, yearly_duration_std, on='year', how='left')
	yearly_duration.columns = ['year', 'avg_duration_mins', 'std_duration_mins']

	# Add line for average event duration
	fig.add_trace(
	go.Scatter(
	x=yearly_duration['year'],
	y=yearly_duration['avg_duration_mins'],
	mode='lines+markers',
	name='Avg Event Duration (mins)',
	line=dict(color='#4c78a8', width=3),
	marker=dict(size=8),
	hovertemplate='<b>Year</b>: %{x}<br><b>Avg Duration</b>: %{y:.1f} mins<extra></extra>'
	),
	row=1, col=1
	)

	# Add confidence interval
	upper_bound = yearly_duration['avg_duration_mins'] + yearly_duration['std_duration_mins']
	lower_bound = yearly_duration['avg_duration_mins'] - yearly_duration['std_duration_mins']

	fig.add_trace(
	go.Scatter(
	x=yearly_duration['year'],
	y=upper_bound,
	mode='lines',
	line=dict(width=0),
	showlegend=False,
	hoverinfo='skip'
	),
	row=1, col=1
	)

	fig.add_trace(
	go.Scatter(
	x=yearly_duration['year'],
	y=lower_bound,
	mode='lines',
	line=dict(width=0),
	fill='tonexty',
	fillcolor='rgba(76, 120, 168, 0.2)',
	showlegend=False,
	hoverinfo='skip'
	),
	row=1, col=1
	)

	# --- Second subplot: Content distribution by year ---
	yearly_content = df_workshops.groupby('year').agg({
	'lecture_count': 'sum',
	'total_events': 'sum'
	}).reset_index()

	yearly_content['filler_count'] = yearly_content['total_events'] - yearly_content['lecture_count']
	yearly_content['lecture_percentage'] = yearly_content['lecture_count'] / yearly_content['total_events'] * 100
	yearly_content['filler_percentage'] = yearly_content['filler_count'] / yearly_content['total_events'] * 100

	# Add stacked bar chart
	fig.add_trace(
	go.Bar(
	x=yearly_content['year'],
	y=yearly_content['lecture_percentage'],
	name='Lectures',
	marker_color='#4c78a8',
	hovertemplate='<b>Year</b>: %{x}<br><b>Lectures</b>: %{y:.1f}%<extra></extra>'
	),
	row=2, col=1
	)

	fig.add_trace(
	go.Bar(
	x=yearly_content['year'],
	y=yearly_content['filler_percentage'],
	name='Discussions/Breaks/Other',
	marker_color='#ff9d45',
	hovertemplate='<b>Year</b>: %{x}<br><b>Other Activities</b>: %{y:.1f}%<extra></extra>'
	),
	row=2, col=1
	)

	# Add a trend line for lecture percentage
	x_values = yearly_content['year']
	y_values = yearly_content['lecture_percentage']

	# Linear regression
	slope, intercept = np.polyfit(x_values, y_values, 1)

	fig.add_trace(
	go.Scatter(
	x=x_values,
	y=slope * x_values + intercept,
	mode='lines',
	name=f'Lecture % Trend (slope: {slope:.2f}%/year)',
	line=dict(color='red', dash='dash'),
	),
	row=2, col=1
	)

	# Update layout
	fig.update_layout(
	title_text="Workshop Content Evolution Over Time",
	height=800,
	plot_bgcolor='white',
	barmode='stack',
	legend=dict(
	orientation="h",
	yanchor="bottom",
	y=1.02,
	xanchor="right",
	x=1
	)
	)

	# Update axes
	fig.update_xaxes(title_text="Year", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=2, col=1)
	fig.update_yaxes(title_text="Average Duration (minutes)", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=1, col=1)
	fig.update_yaxes(title_text="Percentage (%)", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=2, col=1)

	# Add annotations explaining the insights
	fig.add_annotation(
	xref="paper", yref="paper",
	x=0.01, y=0.95,
	text="<b>Event Duration Trend Analysis</b><br>Shows whether events are getting shorter or longer over time",
	showarrow=False,
	font=dict(size=10),
	align="left",
	bgcolor="rgba(255, 255, 255, 0.8)",
	bordercolor="lightgrey",
	borderwidth=1,
	borderpad=4
	)

	fig.add_annotation(
	xref="paper", yref="paper",
	x=0.01, y=0.45,
	text="<b>Content Quality Indicator</b><br>Higher lecture percentage may indicate more substantive content",
	showarrow=False,
	font=dict(size=10),
	align="left",
	bgcolor="rgba(255, 255, 255, 0.8)",
	bordercolor="lightgrey",
	borderwidth=1,
	borderpad=4
	)

	return fig


	# --- Image 3: Day-of-Week Distribution ---
	def create_day_of_week_distribution(data):
	day_counts = data['visualizations']['day_of_week_counts']
	days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
	total_events = sum(day_counts)
	percentages = [(count / total_events) * 100 for count in day_counts]

	fig = go.Figure(data=[go.Bar(
	x=days,
	y=day_counts,
	marker_color='#5975A4', # Use the correct blue color
	text=[f'{p:.1f}%' for p in percentages], # Display percentages with one decimal place
	textposition='outside', #position the text
	hovertemplate=
	'<b>Day</b>: %{x}<br>' +
	'<b>Count</b>: %{y}<br>' +
	'<b>Percent</b>: %{text}<extra></extra>'
	)])

	fig.update_layout(
	title='Events by Day of Week',
	xaxis_title='', # Remove x-axis title
	yaxis_title='count',
	plot_bgcolor='white', # Set background color to white
	xaxis=dict(tickangle=-45) # Rotate x-axis labels for readability
	)
	fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey') #add the grey gridlines
	fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey')

	return fig


	# --- Image 4: Lecture Count by Year ---
	def create_lecture_count_by_year(df):

	fig = go.Figure()

	fig.add_trace(go.Bar(x=df['year'], y=df['total_lectures'],
	name='Lecture Count',
	marker_color='steelblue',
	hovertemplate=
	'<b>Year</b>: %{x}<br>'+
	'<b>Lecture Count</b>: %{y}<extra></extra>'
	))
	# Add trendline
	z = np.polyfit(df['year'], df['total_lectures'], 1)
	p = np.poly1d(z)
	fig.add_trace(go.Scatter(x=df['year'], y=p(df['year']), mode='lines',
	name='Trendline',
	line=dict(color='firebrick', dash='dash')))

	fig.update_layout(title='Total Lecture Count by Year',
	xaxis_title='Year',
	yaxis_title='Lecture Count',
	showlegend=True)


	# Create a string for the yearly summary (formatted as a table)
	summary_table_str = "<b>Yearly Statistical Summary</b><br><br>"
	summary_table_str += "<table style='width:100%; border-collapse: collapse;'>" # Start table
	summary_table_str += "<tr style='border-bottom: 1px solid black;'><th style='text-align:left;'>Year</th><th style='text-align:right;'>Workshops</th><th style='text-align:right;'>Events</th><th style='text-align:right;'>Lectures</th><th style='text-align:right;'>Avg. Duration (Days)</th></tr>" # Table header
	for index, row in df.iterrows():
	summary_table_str += f"<tr style='border-bottom: 1px solid lightgrey;'><td style='text-align:left;'>{int(row['year'])}</td><td style='text-align:right;'>{int(row['total_workshops'])}</td><td style='text-align:right;'>{int(row['total_events'])}</td><td style='text-align:right;'>{int(row['total_lectures'])}</td><td style='text-align:right;'>{row['avg_duration_days']:.2f}</td></tr>"
	summary_table_str += "</table>" # End table

	# Add the table as an annotation
	fig.add_annotation(
	text=summary_table_str,
	xref="paper",
	yref="paper",
	x=0, #left
	y=-0.3, #position
	showarrow=False,
	font=dict(size=12),
	align="left",
	bordercolor="black",
	borderwidth=1,
	borderpad=4,
	bgcolor="white",
	opacity=0.8
	)

	fig.update_layout(
	margin=dict(l=20, r=20, t=40, b=160),
	)

	return fig


	# --- New Chart: Online Count by Year ---
	def create_online_count_by_year(df_workshops):
	"""
	Creates a bar chart showing the total online event count by year with trend line,
	similar to the lecture count by year visualization.
	"""
	# Ensure date fields are datetime and create a copy
	df_workshops = df_workshops.copy()
	df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')

	# Extract year from start_date
	df_workshops['year'] = df_workshops['start_date'].dt.year

	# Group by year and calculate total online count
	yearly_online = df_workshops.groupby('year').agg({
	'online_count': 'sum',
	'total_events': 'sum',
	'code': 'count' # count of workshops per year
	}).reset_index()

	# Rename columns for clarity
	yearly_online.rename(columns={'code': 'total_workshops'}, inplace=True)

	# Calculate percentage of online events per year
	yearly_online['online_percentage'] = (yearly_online['online_count'] / yearly_online['total_events'] * 100).round(1)

	# Create figure
	fig = go.Figure()

	# Add bar chart for online count
	fig.add_trace(go.Bar(
	x=yearly_online['year'],
	y=yearly_online['online_count'],
	name='Online Event Count',
	marker_color='#4c78a8', # Blue color
	hovertemplate=
	'<b>Year</b>: %{x}<br>' +
	'<b>Online Events</b>: %{y}<br>' +
	'<b>Percentage</b>: %{text}%<extra></extra>',
	text=yearly_online['online_percentage']
	))

	# Add trendline using linear regression
	x_values = yearly_online['year']
	y_values = yearly_online['online_count']

	if len(x_values) > 1: # Need at least 2 points for a trend line
	z = np.polyfit(x_values, y_values, 1)
	p = np.poly1d(z)

	fig.add_trace(go.Scatter(
	x=x_values,
	y=p(x_values),
	mode='lines',
	name=f'Trendline (slope: {z[0]:.1f} events/year)',
	line=dict(color='firebrick', dash='dash')
	))

	# Update layout
	fig.update_layout(
	title='Online Events by Year',
	xaxis_title='Year',
	yaxis_title='Online Event Count',
	showlegend=True,
	plot_bgcolor='white',
	xaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgrey'),
	yaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgrey')
	)

	# Create a string for the yearly summary (formatted as a table)
	summary_table_str = "<b>Yearly Online Events Summary</b><br><br>"
	summary_table_str += "<table style='width:100%; border-collapse: collapse;'>"
	summary_table_str += "<tr style='border-bottom: 1px solid black;'><th style='text-align:left;'>Year</th><th style='text-align:right;'>Workshops</th><th style='text-align:right;'>Total Events</th><th style='text-align:right;'>Online Events</th><th style='text-align:right;'>Online %</th></tr>"

	for index, row in yearly_online.iterrows():
	summary_table_str += f"<tr style='border-bottom: 1px solid lightgrey;'><td style='text-align:left;'>{int(row['year'])}</td><td style='text-align:right;'>{int(row['total_workshops'])}</td><td style='text-align:right;'>{int(row['total_events'])}</td><td style='text-align:right;'>{int(row['online_count'])}</td><td style='text-align:right;'>{row['online_percentage']:.1f}%</td></tr>"

	summary_table_str += "</table>"

	# Add the table as an annotation
	fig.add_annotation(
	text=summary_table_str,
	xref="paper",
	yref="paper",
	x=0, # left
	y=-0.3, # position
	showarrow=False,
	font=dict(size=12),
	align="left",
	bordercolor="black",
	borderwidth=1,
	borderpad=4,
	bgcolor="white",
	opacity=0.8
	)

	fig.update_layout(
	margin=dict(l=20, r=20, t=40, b=160),
	)

	return fig


	# --- Image 5: Online Workshop Trends (Filtered Bubble Chart) ---
	def analyze_online_workshop_trends(df_workshops):
	"""
	Creates a bubble chart visualization showing only workshops with significant online events.
	Filters out workshops with 2 or fewer online events to avoid showing placeholders.
	"""
	# Ensure date fields are datetime and create a copy to avoid modifying original
	df_workshops = df_workshops.copy()
	df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')

	# Filter out workshops with 2 or fewer online events (placeholders)
	df_online = df_workshops[df_workshops['online_count'] > 2].copy()

	# Sort chronologically
	df_online = df_online.sort_values('start_date')

	# Create a bubble chart figure
	fig = go.Figure()

	# Add bubbles for workshops with significant online events
	for _, workshop in df_online.iterrows():
	# Scale the bubble size based on online count
	size = workshop['online_count'] * 3 # Simple multiplier for visibility

	# Add the workshop as a bubble
	fig.add_trace(go.Scatter(
	x=[workshop['start_date']],
	y=[workshop['online_count']],
	mode='markers',
	marker=dict(
	size=size,
	color=workshop['online_count'],
	colorscale='Viridis',
	colorbar=dict(title='Online Events'),
	showscale=True,
	line=dict(width=1, color='rgba(0,0,0,0.3)')
	),
	name=workshop.get('code', ''),
	text=f"Workshop: {workshop.get('code', 'N/A')}<br>" +
	f"Date: {workshop['start_date'].strftime('%Y-%m-%d')}<br>" +
	f"Online Events: {workshop['online_count']}",
	hoverinfo='text',
	showlegend=False,
	))

	# Highlight workshop with maximum online events
	if len(df_online) > 0:
	max_online_idx = df_online['online_count'].idxmax()
	max_workshop = df_online.loc[max_online_idx]

	fig.add_annotation(
	x=max_workshop['start_date'],
	y=max_workshop['online_count'],
	text=f"Peak: {max_workshop['online_count']} online events",
	showarrow=True,
	arrowhead=1,
	arrowsize=1,
	arrowwidth=2,
	arrowcolor="black",
	ax=20,
	ay=-30,
	font=dict(size=12),
	bgcolor="rgba(255, 255, 255, 0.8)",
	bordercolor="black",
	borderwidth=1
	)

	# Update layout
	fig.update_layout(
	title="Significant Online Workshop Events (>2 online events)",
	height=700,
	plot_bgcolor='white',
	xaxis=dict(
	title="Workshop Date",
	showgrid=True,
	gridcolor='lightgrey',
	tickangle=45
	),
	yaxis=dict(
	title="Number of Online Events",
	showgrid=True,
	gridcolor='lightgrey'
	),
	margin=dict(l=50, r=50, t=80, b=50)
	)

	# Add a text box with summary statistics focusing only on online events
	total_workshops = len(df_workshops)
	total_online_workshops = len(df_online)
	filtered_out = len(df_workshops[df_workshops['online_count'] > 0]) - len(df_online)
	total_online_events = df_online['online_count'].sum() if not df_online.empty else 0
	avg_online_per_workshop = df_online['online_count'].mean() if not df_online.empty else 0
	max_online = df_online['online_count'].max() if not df_online.empty else 0

	stats_text = (
	f"<b>Online Events Summary:</b><br>"
	f"Total Workshops: {total_workshops}<br>"
	f"Workshops with Significant Online Events (>2): {total_online_workshops}<br>"
	f"Workshops with 1-2 Online Events (filtered out): {filtered_out}<br>"
	f"Total Online Events (in significant workshops): {total_online_events}<br>"
	f"Average Online Events per Workshop: {avg_online_per_workshop:.1f}<br>"
	f"Maximum Online Events: {max_online}"
	)

	fig.add_annotation(
	x=0.02,
	y=0.98,
	xref="paper",
	yref="paper",
	text=stats_text,
	showarrow=False,
	font=dict(size=12),
	bgcolor="rgba(255, 255, 255, 0.8)",
	bordercolor="black",
	borderwidth=1,
	borderpad=4,
	align="left"
	)

	return fig


	# Create Dash app
	app = dash.Dash(__name__)

	# Generate all figures
	fig2 = analyze_event_duration_trends(df_workshops)
	fig3 = create_day_of_week_distribution(data)
	fig4 = create_lecture_count_by_year(df_yearly_stats)
	fig5 = analyze_online_workshop_trends(df_workshops)
	fig6 = create_online_count_by_year(df_workshops) # Generate the new figure

	# Define app layout with tabs
	app.layout = html.Div([
	html.H1("Workshop Analysis Dashboard", style={'textAlign': 'center', 'margin-bottom': '20px', 'margin-top': '20px'}),

	dcc.Tabs(id='dashboard-tabs', value='tab-online-events', children=[
	dcc.Tab(label='Online Workshop Events', value='tab-online-events', children=[
	dcc.Graph(figure=fig5, style={'height': '750px'})
	]),
	dcc.Tab(label='Online Count by Year', value='tab-online-count', children=[
	dcc.Graph(figure=fig6, style={'height': '850px'})
	]),
	dcc.Tab(label='Event Duration Trends', value='tab-duration-trends', children=[
	dcc.Graph(figure=fig2, style={'height': '850px'})
	]),
	dcc.Tab(label='Day of Week Distribution', value='tab-day-distribution', children=[
	dcc.Graph(figure=fig3, style={'height': '600px'})
	]),
	dcc.Tab(label='Lecture Count by Year', value='tab-lecture-count', children=[
	dcc.Graph(figure=fig4, style={'height': '850px'})
	]),
	]),

	html.Div([
	html.P("Workshop Data Analysis Dashboard", style={'textAlign': 'center', 'margin-top': '20px', 'color': 'gray'})
	])
	])

	# Run the app
	if __name__ == '__main__':
	# Run with debug mode OFF to avoid the error with Python 3.12
	app.run_server(debug=False)