Skip to content

Instantly share code, notes, and snippets.

@bigsnarfdude
Last active March 15, 2025 07:17
Show Gist options
  • Save bigsnarfdude/9ff9ee2b6cbee9ba600548001b2f4f53 to your computer and use it in GitHub Desktop.
Save bigsnarfdude/9ff9ee2b6cbee9ba600548001b2f4f53 to your computer and use it in GitHub Desktop.
meetings.py
import json
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
# Load the data
with open("combined_workshop_data.json", "r") as f:
data = json.load(f)
df_workshops = pd.DataFrame(data["workshops"])
df_yearly_stats = pd.DataFrame(data["yearly_stats"]).transpose().reset_index()
df_yearly_stats.rename(columns={"index": "year"}, inplace=True)
# Convert relevant columns to numeric, handling potential errors
df_yearly_stats['year'] = pd.to_numeric(df_yearly_stats['year'], errors='coerce')
df_yearly_stats['avg_duration_days'] = pd.to_numeric(df_yearly_stats['avg_duration_days'], errors='coerce')
df_yearly_stats['total_lectures'] = pd.to_numeric(df_yearly_stats['total_lectures'], errors='coerce')
df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
df_workshops['end_date'] = pd.to_datetime(df_workshops['end_date'], errors='coerce') # Convert end_date too
df_workshops['day_of_week'] = df_workshops['start_date'].dt.dayofweek # 0=Monday, 6=Sunday
# --- Image 2: Event Duration Trends (REPLACEMENT FOR PROBLEMATIC CHART) ---
def analyze_event_duration_trends(df_workshops):
"""
Analyzes how event durations have changed over time
"""
# Ensure date fields are datetime
df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
# Extract year from start_date
df_workshops['year'] = df_workshops['start_date'].dt.year
# Create figure with subplots
fig = make_subplots(
rows=2, cols=1,
subplot_titles=("Average Event Duration by Year", "Content Distribution by Year"),
vertical_spacing=0.15,
shared_xaxes=True
)
# For this example, let's create a proxy using total_events and workshop duration
df_workshops['avg_event_duration_mins'] = (df_workshops['duration_days'] * 8 * 60) / df_workshops['total_events']
# Calculate average by year
yearly_duration = df_workshops.groupby('year')['avg_event_duration_mins'].mean().reset_index()
yearly_duration_std = df_workshops.groupby('year')['avg_event_duration_mins'].std().reset_index()
yearly_duration = pd.merge(yearly_duration, yearly_duration_std, on='year', how='left')
yearly_duration.columns = ['year', 'avg_duration_mins', 'std_duration_mins']
# Add line for average event duration
fig.add_trace(
go.Scatter(
x=yearly_duration['year'],
y=yearly_duration['avg_duration_mins'],
mode='lines+markers',
name='Avg Event Duration (mins)',
line=dict(color='#4c78a8', width=3),
marker=dict(size=8),
hovertemplate='<b>Year</b>: %{x}<br><b>Avg Duration</b>: %{y:.1f} mins<extra></extra>'
),
row=1, col=1
)
# Add confidence interval
upper_bound = yearly_duration['avg_duration_mins'] + yearly_duration['std_duration_mins']
lower_bound = yearly_duration['avg_duration_mins'] - yearly_duration['std_duration_mins']
fig.add_trace(
go.Scatter(
x=yearly_duration['year'],
y=upper_bound,
mode='lines',
line=dict(width=0),
showlegend=False,
hoverinfo='skip'
),
row=1, col=1
)
fig.add_trace(
go.Scatter(
x=yearly_duration['year'],
y=lower_bound,
mode='lines',
line=dict(width=0),
fill='tonexty',
fillcolor='rgba(76, 120, 168, 0.2)',
showlegend=False,
hoverinfo='skip'
),
row=1, col=1
)
# --- Second subplot: Content distribution by year ---
yearly_content = df_workshops.groupby('year').agg({
'lecture_count': 'sum',
'total_events': 'sum'
}).reset_index()
yearly_content['filler_count'] = yearly_content['total_events'] - yearly_content['lecture_count']
yearly_content['lecture_percentage'] = yearly_content['lecture_count'] / yearly_content['total_events'] * 100
yearly_content['filler_percentage'] = yearly_content['filler_count'] / yearly_content['total_events'] * 100
# Add stacked bar chart
fig.add_trace(
go.Bar(
x=yearly_content['year'],
y=yearly_content['lecture_percentage'],
name='Lectures',
marker_color='#4c78a8',
hovertemplate='<b>Year</b>: %{x}<br><b>Lectures</b>: %{y:.1f}%<extra></extra>'
),
row=2, col=1
)
fig.add_trace(
go.Bar(
x=yearly_content['year'],
y=yearly_content['filler_percentage'],
name='Discussions/Breaks/Other',
marker_color='#ff9d45',
hovertemplate='<b>Year</b>: %{x}<br><b>Other Activities</b>: %{y:.1f}%<extra></extra>'
),
row=2, col=1
)
# Add a trend line for lecture percentage
x_values = yearly_content['year']
y_values = yearly_content['lecture_percentage']
# Linear regression
slope, intercept = np.polyfit(x_values, y_values, 1)
fig.add_trace(
go.Scatter(
x=x_values,
y=slope * x_values + intercept,
mode='lines',
name=f'Lecture % Trend (slope: {slope:.2f}%/year)',
line=dict(color='red', dash='dash'),
),
row=2, col=1
)
# Update layout
fig.update_layout(
title_text="Workshop Content Evolution Over Time",
height=800,
plot_bgcolor='white',
barmode='stack',
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
# Update axes
fig.update_xaxes(title_text="Year", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=2, col=1)
fig.update_yaxes(title_text="Average Duration (minutes)", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=1, col=1)
fig.update_yaxes(title_text="Percentage (%)", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=2, col=1)
# Add annotations explaining the insights
fig.add_annotation(
xref="paper", yref="paper",
x=0.01, y=0.95,
text="<b>Event Duration Trend Analysis</b><br>Shows whether events are getting shorter or longer over time",
showarrow=False,
font=dict(size=10),
align="left",
bgcolor="rgba(255, 255, 255, 0.8)",
bordercolor="lightgrey",
borderwidth=1,
borderpad=4
)
fig.add_annotation(
xref="paper", yref="paper",
x=0.01, y=0.45,
text="<b>Content Quality Indicator</b><br>Higher lecture percentage may indicate more substantive content",
showarrow=False,
font=dict(size=10),
align="left",
bgcolor="rgba(255, 255, 255, 0.8)",
bordercolor="lightgrey",
borderwidth=1,
borderpad=4
)
return fig
# --- Image 3: Day-of-Week Distribution ---
def create_day_of_week_distribution(data):
day_counts = data['visualizations']['day_of_week_counts']
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
total_events = sum(day_counts)
percentages = [(count / total_events) * 100 for count in day_counts]
fig = go.Figure(data=[go.Bar(
x=days,
y=day_counts,
marker_color='#5975A4', # Use the correct blue color
text=[f'{p:.1f}%' for p in percentages], # Display percentages with one decimal place
textposition='outside', #position the text
hovertemplate=
'<b>Day</b>: %{x}<br>' +
'<b>Count</b>: %{y}<br>' +
'<b>Percent</b>: %{text}<extra></extra>'
)])
fig.update_layout(
title='Events by Day of Week',
xaxis_title='', # Remove x-axis title
yaxis_title='count',
plot_bgcolor='white', # Set background color to white
xaxis=dict(tickangle=-45) # Rotate x-axis labels for readability
)
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey') #add the grey gridlines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey')
return fig
# --- Image 4: Lecture Count by Year ---
def create_lecture_count_by_year(df):
fig = go.Figure()
fig.add_trace(go.Bar(x=df['year'], y=df['total_lectures'],
name='Lecture Count',
marker_color='steelblue',
hovertemplate=
'<b>Year</b>: %{x}<br>'+
'<b>Lecture Count</b>: %{y}<extra></extra>'
))
# Add trendline
z = np.polyfit(df['year'], df['total_lectures'], 1)
p = np.poly1d(z)
fig.add_trace(go.Scatter(x=df['year'], y=p(df['year']), mode='lines',
name='Trendline',
line=dict(color='firebrick', dash='dash')))
fig.update_layout(title='Total Lecture Count by Year',
xaxis_title='Year',
yaxis_title='Lecture Count',
showlegend=True)
# Create a string for the yearly summary (formatted as a table)
summary_table_str = "<b>Yearly Statistical Summary</b><br><br>"
summary_table_str += "<table style='width:100%; border-collapse: collapse;'>" # Start table
summary_table_str += "<tr style='border-bottom: 1px solid black;'><th style='text-align:left;'>Year</th><th style='text-align:right;'>Workshops</th><th style='text-align:right;'>Events</th><th style='text-align:right;'>Lectures</th><th style='text-align:right;'>Avg. Duration (Days)</th></tr>" # Table header
for index, row in df.iterrows():
summary_table_str += f"<tr style='border-bottom: 1px solid lightgrey;'><td style='text-align:left;'>{int(row['year'])}</td><td style='text-align:right;'>{int(row['total_workshops'])}</td><td style='text-align:right;'>{int(row['total_events'])}</td><td style='text-align:right;'>{int(row['total_lectures'])}</td><td style='text-align:right;'>{row['avg_duration_days']:.2f}</td></tr>"
summary_table_str += "</table>" # End table
# Add the table as an annotation
fig.add_annotation(
text=summary_table_str,
xref="paper",
yref="paper",
x=0, #left
y=-0.3, #position
showarrow=False,
font=dict(size=12),
align="left",
bordercolor="black",
borderwidth=1,
borderpad=4,
bgcolor="white",
opacity=0.8
)
fig.update_layout(
margin=dict(l=20, r=20, t=40, b=160),
)
return fig
# --- New Chart: Online Count by Year ---
def create_online_count_by_year(df_workshops):
"""
Creates a bar chart showing the total online event count by year with trend line,
similar to the lecture count by year visualization.
"""
# Ensure date fields are datetime and create a copy
df_workshops = df_workshops.copy()
df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
# Extract year from start_date
df_workshops['year'] = df_workshops['start_date'].dt.year
# Group by year and calculate total online count
yearly_online = df_workshops.groupby('year').agg({
'online_count': 'sum',
'total_events': 'sum',
'code': 'count' # count of workshops per year
}).reset_index()
# Rename columns for clarity
yearly_online.rename(columns={'code': 'total_workshops'}, inplace=True)
# Calculate percentage of online events per year
yearly_online['online_percentage'] = (yearly_online['online_count'] / yearly_online['total_events'] * 100).round(1)
# Create figure
fig = go.Figure()
# Add bar chart for online count
fig.add_trace(go.Bar(
x=yearly_online['year'],
y=yearly_online['online_count'],
name='Online Event Count',
marker_color='#4c78a8', # Blue color
hovertemplate=
'<b>Year</b>: %{x}<br>' +
'<b>Online Events</b>: %{y}<br>' +
'<b>Percentage</b>: %{text}%<extra></extra>',
text=yearly_online['online_percentage']
))
# Add trendline using linear regression
x_values = yearly_online['year']
y_values = yearly_online['online_count']
if len(x_values) > 1: # Need at least 2 points for a trend line
z = np.polyfit(x_values, y_values, 1)
p = np.poly1d(z)
fig.add_trace(go.Scatter(
x=x_values,
y=p(x_values),
mode='lines',
name=f'Trendline (slope: {z[0]:.1f} events/year)',
line=dict(color='firebrick', dash='dash')
))
# Update layout
fig.update_layout(
title='Online Events by Year',
xaxis_title='Year',
yaxis_title='Online Event Count',
showlegend=True,
plot_bgcolor='white',
xaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgrey'),
yaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgrey')
)
# Create a string for the yearly summary (formatted as a table)
summary_table_str = "<b>Yearly Online Events Summary</b><br><br>"
summary_table_str += "<table style='width:100%; border-collapse: collapse;'>"
summary_table_str += "<tr style='border-bottom: 1px solid black;'><th style='text-align:left;'>Year</th><th style='text-align:right;'>Workshops</th><th style='text-align:right;'>Total Events</th><th style='text-align:right;'>Online Events</th><th style='text-align:right;'>Online %</th></tr>"
for index, row in yearly_online.iterrows():
summary_table_str += f"<tr style='border-bottom: 1px solid lightgrey;'><td style='text-align:left;'>{int(row['year'])}</td><td style='text-align:right;'>{int(row['total_workshops'])}</td><td style='text-align:right;'>{int(row['total_events'])}</td><td style='text-align:right;'>{int(row['online_count'])}</td><td style='text-align:right;'>{row['online_percentage']:.1f}%</td></tr>"
summary_table_str += "</table>"
# Add the table as an annotation
fig.add_annotation(
text=summary_table_str,
xref="paper",
yref="paper",
x=0, # left
y=-0.3, # position
showarrow=False,
font=dict(size=12),
align="left",
bordercolor="black",
borderwidth=1,
borderpad=4,
bgcolor="white",
opacity=0.8
)
fig.update_layout(
margin=dict(l=20, r=20, t=40, b=160),
)
return fig
# --- Image 5: Online Workshop Trends (Filtered Bubble Chart) ---
def analyze_online_workshop_trends(df_workshops):
"""
Creates a bubble chart visualization showing only workshops with significant online events.
Filters out workshops with 2 or fewer online events to avoid showing placeholders.
"""
# Ensure date fields are datetime and create a copy to avoid modifying original
df_workshops = df_workshops.copy()
df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce')
# Filter out workshops with 2 or fewer online events (placeholders)
df_online = df_workshops[df_workshops['online_count'] > 2].copy()
# Sort chronologically
df_online = df_online.sort_values('start_date')
# Create a bubble chart figure
fig = go.Figure()
# Add bubbles for workshops with significant online events
for _, workshop in df_online.iterrows():
# Scale the bubble size based on online count
size = workshop['online_count'] * 3 # Simple multiplier for visibility
# Add the workshop as a bubble
fig.add_trace(go.Scatter(
x=[workshop['start_date']],
y=[workshop['online_count']],
mode='markers',
marker=dict(
size=size,
color=workshop['online_count'],
colorscale='Viridis',
colorbar=dict(title='Online Events'),
showscale=True,
line=dict(width=1, color='rgba(0,0,0,0.3)')
),
name=workshop.get('code', ''),
text=f"Workshop: {workshop.get('code', 'N/A')}<br>" +
f"Date: {workshop['start_date'].strftime('%Y-%m-%d')}<br>" +
f"Online Events: {workshop['online_count']}",
hoverinfo='text',
showlegend=False,
))
# Highlight workshop with maximum online events
if len(df_online) > 0:
max_online_idx = df_online['online_count'].idxmax()
max_workshop = df_online.loc[max_online_idx]
fig.add_annotation(
x=max_workshop['start_date'],
y=max_workshop['online_count'],
text=f"Peak: {max_workshop['online_count']} online events",
showarrow=True,
arrowhead=1,
arrowsize=1,
arrowwidth=2,
arrowcolor="black",
ax=20,
ay=-30,
font=dict(size=12),
bgcolor="rgba(255, 255, 255, 0.8)",
bordercolor="black",
borderwidth=1
)
# Update layout
fig.update_layout(
title="Significant Online Workshop Events (>2 online events)",
height=700,
plot_bgcolor='white',
xaxis=dict(
title="Workshop Date",
showgrid=True,
gridcolor='lightgrey',
tickangle=45
),
yaxis=dict(
title="Number of Online Events",
showgrid=True,
gridcolor='lightgrey'
),
margin=dict(l=50, r=50, t=80, b=50)
)
# Add a text box with summary statistics focusing only on online events
total_workshops = len(df_workshops)
total_online_workshops = len(df_online)
filtered_out = len(df_workshops[df_workshops['online_count'] > 0]) - len(df_online)
total_online_events = df_online['online_count'].sum() if not df_online.empty else 0
avg_online_per_workshop = df_online['online_count'].mean() if not df_online.empty else 0
max_online = df_online['online_count'].max() if not df_online.empty else 0
stats_text = (
f"<b>Online Events Summary:</b><br>"
f"Total Workshops: {total_workshops}<br>"
f"Workshops with Significant Online Events (>2): {total_online_workshops}<br>"
f"Workshops with 1-2 Online Events (filtered out): {filtered_out}<br>"
f"Total Online Events (in significant workshops): {total_online_events}<br>"
f"Average Online Events per Workshop: {avg_online_per_workshop:.1f}<br>"
f"Maximum Online Events: {max_online}"
)
fig.add_annotation(
x=0.02,
y=0.98,
xref="paper",
yref="paper",
text=stats_text,
showarrow=False,
font=dict(size=12),
bgcolor="rgba(255, 255, 255, 0.8)",
bordercolor="black",
borderwidth=1,
borderpad=4,
align="left"
)
return fig
# Create Dash app
app = dash.Dash(__name__)
# Generate all figures
fig2 = analyze_event_duration_trends(df_workshops)
fig3 = create_day_of_week_distribution(data)
fig4 = create_lecture_count_by_year(df_yearly_stats)
fig5 = analyze_online_workshop_trends(df_workshops)
fig6 = create_online_count_by_year(df_workshops) # Generate the new figure
# Define app layout with tabs
app.layout = html.Div([
html.H1("Workshop Analysis Dashboard", style={'textAlign': 'center', 'margin-bottom': '20px', 'margin-top': '20px'}),
dcc.Tabs(id='dashboard-tabs', value='tab-online-events', children=[
dcc.Tab(label='Online Workshop Events', value='tab-online-events', children=[
dcc.Graph(figure=fig5, style={'height': '750px'})
]),
dcc.Tab(label='Online Count by Year', value='tab-online-count', children=[
dcc.Graph(figure=fig6, style={'height': '850px'})
]),
dcc.Tab(label='Event Duration Trends', value='tab-duration-trends', children=[
dcc.Graph(figure=fig2, style={'height': '850px'})
]),
dcc.Tab(label='Day of Week Distribution', value='tab-day-distribution', children=[
dcc.Graph(figure=fig3, style={'height': '600px'})
]),
dcc.Tab(label='Lecture Count by Year', value='tab-lecture-count', children=[
dcc.Graph(figure=fig4, style={'height': '850px'})
]),
]),
html.Div([
html.P("Workshop Data Analysis Dashboard", style={'textAlign': 'center', 'margin-top': '20px', 'color': 'gray'})
])
])
# Run the app
if __name__ == '__main__':
# Run with debug mode OFF to avoid the error with Python 3.12
app.run_server(debug=False)
@bigsnarfdude
Copy link
Author

gotta figure out how to

  1. update json folder with old schedules weekly
  2. consolidate blob with stats metadata
  3. generate app

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment