Last active
March 15, 2025 07:17
-
-
Save bigsnarfdude/9ff9ee2b6cbee9ba600548001b2f4f53 to your computer and use it in GitHub Desktop.
meetings.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pandas as pd | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
import numpy as np | |
import dash | |
from dash import dcc, html | |
from dash.dependencies import Input, Output | |
# Load the data | |
with open("combined_workshop_data.json", "r") as f: | |
data = json.load(f) | |
df_workshops = pd.DataFrame(data["workshops"]) | |
df_yearly_stats = pd.DataFrame(data["yearly_stats"]).transpose().reset_index() | |
df_yearly_stats.rename(columns={"index": "year"}, inplace=True) | |
# Convert relevant columns to numeric, handling potential errors | |
df_yearly_stats['year'] = pd.to_numeric(df_yearly_stats['year'], errors='coerce') | |
df_yearly_stats['avg_duration_days'] = pd.to_numeric(df_yearly_stats['avg_duration_days'], errors='coerce') | |
df_yearly_stats['total_lectures'] = pd.to_numeric(df_yearly_stats['total_lectures'], errors='coerce') | |
df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce') | |
df_workshops['end_date'] = pd.to_datetime(df_workshops['end_date'], errors='coerce') # Convert end_date too | |
df_workshops['day_of_week'] = df_workshops['start_date'].dt.dayofweek # 0=Monday, 6=Sunday | |
# --- Image 2: Event Duration Trends (REPLACEMENT FOR PROBLEMATIC CHART) --- | |
def analyze_event_duration_trends(df_workshops): | |
""" | |
Analyzes how event durations have changed over time | |
""" | |
# Ensure date fields are datetime | |
df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce') | |
# Extract year from start_date | |
df_workshops['year'] = df_workshops['start_date'].dt.year | |
# Create figure with subplots | |
fig = make_subplots( | |
rows=2, cols=1, | |
subplot_titles=("Average Event Duration by Year", "Content Distribution by Year"), | |
vertical_spacing=0.15, | |
shared_xaxes=True | |
) | |
# For this example, let's create a proxy using total_events and workshop duration | |
df_workshops['avg_event_duration_mins'] = (df_workshops['duration_days'] * 8 * 60) / df_workshops['total_events'] | |
# Calculate average by year | |
yearly_duration = df_workshops.groupby('year')['avg_event_duration_mins'].mean().reset_index() | |
yearly_duration_std = df_workshops.groupby('year')['avg_event_duration_mins'].std().reset_index() | |
yearly_duration = pd.merge(yearly_duration, yearly_duration_std, on='year', how='left') | |
yearly_duration.columns = ['year', 'avg_duration_mins', 'std_duration_mins'] | |
# Add line for average event duration | |
fig.add_trace( | |
go.Scatter( | |
x=yearly_duration['year'], | |
y=yearly_duration['avg_duration_mins'], | |
mode='lines+markers', | |
name='Avg Event Duration (mins)', | |
line=dict(color='#4c78a8', width=3), | |
marker=dict(size=8), | |
hovertemplate='<b>Year</b>: %{x}<br><b>Avg Duration</b>: %{y:.1f} mins<extra></extra>' | |
), | |
row=1, col=1 | |
) | |
# Add confidence interval | |
upper_bound = yearly_duration['avg_duration_mins'] + yearly_duration['std_duration_mins'] | |
lower_bound = yearly_duration['avg_duration_mins'] - yearly_duration['std_duration_mins'] | |
fig.add_trace( | |
go.Scatter( | |
x=yearly_duration['year'], | |
y=upper_bound, | |
mode='lines', | |
line=dict(width=0), | |
showlegend=False, | |
hoverinfo='skip' | |
), | |
row=1, col=1 | |
) | |
fig.add_trace( | |
go.Scatter( | |
x=yearly_duration['year'], | |
y=lower_bound, | |
mode='lines', | |
line=dict(width=0), | |
fill='tonexty', | |
fillcolor='rgba(76, 120, 168, 0.2)', | |
showlegend=False, | |
hoverinfo='skip' | |
), | |
row=1, col=1 | |
) | |
# --- Second subplot: Content distribution by year --- | |
yearly_content = df_workshops.groupby('year').agg({ | |
'lecture_count': 'sum', | |
'total_events': 'sum' | |
}).reset_index() | |
yearly_content['filler_count'] = yearly_content['total_events'] - yearly_content['lecture_count'] | |
yearly_content['lecture_percentage'] = yearly_content['lecture_count'] / yearly_content['total_events'] * 100 | |
yearly_content['filler_percentage'] = yearly_content['filler_count'] / yearly_content['total_events'] * 100 | |
# Add stacked bar chart | |
fig.add_trace( | |
go.Bar( | |
x=yearly_content['year'], | |
y=yearly_content['lecture_percentage'], | |
name='Lectures', | |
marker_color='#4c78a8', | |
hovertemplate='<b>Year</b>: %{x}<br><b>Lectures</b>: %{y:.1f}%<extra></extra>' | |
), | |
row=2, col=1 | |
) | |
fig.add_trace( | |
go.Bar( | |
x=yearly_content['year'], | |
y=yearly_content['filler_percentage'], | |
name='Discussions/Breaks/Other', | |
marker_color='#ff9d45', | |
hovertemplate='<b>Year</b>: %{x}<br><b>Other Activities</b>: %{y:.1f}%<extra></extra>' | |
), | |
row=2, col=1 | |
) | |
# Add a trend line for lecture percentage | |
x_values = yearly_content['year'] | |
y_values = yearly_content['lecture_percentage'] | |
# Linear regression | |
slope, intercept = np.polyfit(x_values, y_values, 1) | |
fig.add_trace( | |
go.Scatter( | |
x=x_values, | |
y=slope * x_values + intercept, | |
mode='lines', | |
name=f'Lecture % Trend (slope: {slope:.2f}%/year)', | |
line=dict(color='red', dash='dash'), | |
), | |
row=2, col=1 | |
) | |
# Update layout | |
fig.update_layout( | |
title_text="Workshop Content Evolution Over Time", | |
height=800, | |
plot_bgcolor='white', | |
barmode='stack', | |
legend=dict( | |
orientation="h", | |
yanchor="bottom", | |
y=1.02, | |
xanchor="right", | |
x=1 | |
) | |
) | |
# Update axes | |
fig.update_xaxes(title_text="Year", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=2, col=1) | |
fig.update_yaxes(title_text="Average Duration (minutes)", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=1, col=1) | |
fig.update_yaxes(title_text="Percentage (%)", showgrid=True, gridwidth=1, gridcolor='lightgrey', row=2, col=1) | |
# Add annotations explaining the insights | |
fig.add_annotation( | |
xref="paper", yref="paper", | |
x=0.01, y=0.95, | |
text="<b>Event Duration Trend Analysis</b><br>Shows whether events are getting shorter or longer over time", | |
showarrow=False, | |
font=dict(size=10), | |
align="left", | |
bgcolor="rgba(255, 255, 255, 0.8)", | |
bordercolor="lightgrey", | |
borderwidth=1, | |
borderpad=4 | |
) | |
fig.add_annotation( | |
xref="paper", yref="paper", | |
x=0.01, y=0.45, | |
text="<b>Content Quality Indicator</b><br>Higher lecture percentage may indicate more substantive content", | |
showarrow=False, | |
font=dict(size=10), | |
align="left", | |
bgcolor="rgba(255, 255, 255, 0.8)", | |
bordercolor="lightgrey", | |
borderwidth=1, | |
borderpad=4 | |
) | |
return fig | |
# --- Image 3: Day-of-Week Distribution --- | |
def create_day_of_week_distribution(data): | |
day_counts = data['visualizations']['day_of_week_counts'] | |
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] | |
total_events = sum(day_counts) | |
percentages = [(count / total_events) * 100 for count in day_counts] | |
fig = go.Figure(data=[go.Bar( | |
x=days, | |
y=day_counts, | |
marker_color='#5975A4', # Use the correct blue color | |
text=[f'{p:.1f}%' for p in percentages], # Display percentages with one decimal place | |
textposition='outside', #position the text | |
hovertemplate= | |
'<b>Day</b>: %{x}<br>' + | |
'<b>Count</b>: %{y}<br>' + | |
'<b>Percent</b>: %{text}<extra></extra>' | |
)]) | |
fig.update_layout( | |
title='Events by Day of Week', | |
xaxis_title='', # Remove x-axis title | |
yaxis_title='count', | |
plot_bgcolor='white', # Set background color to white | |
xaxis=dict(tickangle=-45) # Rotate x-axis labels for readability | |
) | |
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey') #add the grey gridlines | |
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey') | |
return fig | |
# --- Image 4: Lecture Count by Year --- | |
def create_lecture_count_by_year(df): | |
fig = go.Figure() | |
fig.add_trace(go.Bar(x=df['year'], y=df['total_lectures'], | |
name='Lecture Count', | |
marker_color='steelblue', | |
hovertemplate= | |
'<b>Year</b>: %{x}<br>'+ | |
'<b>Lecture Count</b>: %{y}<extra></extra>' | |
)) | |
# Add trendline | |
z = np.polyfit(df['year'], df['total_lectures'], 1) | |
p = np.poly1d(z) | |
fig.add_trace(go.Scatter(x=df['year'], y=p(df['year']), mode='lines', | |
name='Trendline', | |
line=dict(color='firebrick', dash='dash'))) | |
fig.update_layout(title='Total Lecture Count by Year', | |
xaxis_title='Year', | |
yaxis_title='Lecture Count', | |
showlegend=True) | |
# Create a string for the yearly summary (formatted as a table) | |
summary_table_str = "<b>Yearly Statistical Summary</b><br><br>" | |
summary_table_str += "<table style='width:100%; border-collapse: collapse;'>" # Start table | |
summary_table_str += "<tr style='border-bottom: 1px solid black;'><th style='text-align:left;'>Year</th><th style='text-align:right;'>Workshops</th><th style='text-align:right;'>Events</th><th style='text-align:right;'>Lectures</th><th style='text-align:right;'>Avg. Duration (Days)</th></tr>" # Table header | |
for index, row in df.iterrows(): | |
summary_table_str += f"<tr style='border-bottom: 1px solid lightgrey;'><td style='text-align:left;'>{int(row['year'])}</td><td style='text-align:right;'>{int(row['total_workshops'])}</td><td style='text-align:right;'>{int(row['total_events'])}</td><td style='text-align:right;'>{int(row['total_lectures'])}</td><td style='text-align:right;'>{row['avg_duration_days']:.2f}</td></tr>" | |
summary_table_str += "</table>" # End table | |
# Add the table as an annotation | |
fig.add_annotation( | |
text=summary_table_str, | |
xref="paper", | |
yref="paper", | |
x=0, #left | |
y=-0.3, #position | |
showarrow=False, | |
font=dict(size=12), | |
align="left", | |
bordercolor="black", | |
borderwidth=1, | |
borderpad=4, | |
bgcolor="white", | |
opacity=0.8 | |
) | |
fig.update_layout( | |
margin=dict(l=20, r=20, t=40, b=160), | |
) | |
return fig | |
# --- New Chart: Online Count by Year --- | |
def create_online_count_by_year(df_workshops): | |
""" | |
Creates a bar chart showing the total online event count by year with trend line, | |
similar to the lecture count by year visualization. | |
""" | |
# Ensure date fields are datetime and create a copy | |
df_workshops = df_workshops.copy() | |
df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce') | |
# Extract year from start_date | |
df_workshops['year'] = df_workshops['start_date'].dt.year | |
# Group by year and calculate total online count | |
yearly_online = df_workshops.groupby('year').agg({ | |
'online_count': 'sum', | |
'total_events': 'sum', | |
'code': 'count' # count of workshops per year | |
}).reset_index() | |
# Rename columns for clarity | |
yearly_online.rename(columns={'code': 'total_workshops'}, inplace=True) | |
# Calculate percentage of online events per year | |
yearly_online['online_percentage'] = (yearly_online['online_count'] / yearly_online['total_events'] * 100).round(1) | |
# Create figure | |
fig = go.Figure() | |
# Add bar chart for online count | |
fig.add_trace(go.Bar( | |
x=yearly_online['year'], | |
y=yearly_online['online_count'], | |
name='Online Event Count', | |
marker_color='#4c78a8', # Blue color | |
hovertemplate= | |
'<b>Year</b>: %{x}<br>' + | |
'<b>Online Events</b>: %{y}<br>' + | |
'<b>Percentage</b>: %{text}%<extra></extra>', | |
text=yearly_online['online_percentage'] | |
)) | |
# Add trendline using linear regression | |
x_values = yearly_online['year'] | |
y_values = yearly_online['online_count'] | |
if len(x_values) > 1: # Need at least 2 points for a trend line | |
z = np.polyfit(x_values, y_values, 1) | |
p = np.poly1d(z) | |
fig.add_trace(go.Scatter( | |
x=x_values, | |
y=p(x_values), | |
mode='lines', | |
name=f'Trendline (slope: {z[0]:.1f} events/year)', | |
line=dict(color='firebrick', dash='dash') | |
)) | |
# Update layout | |
fig.update_layout( | |
title='Online Events by Year', | |
xaxis_title='Year', | |
yaxis_title='Online Event Count', | |
showlegend=True, | |
plot_bgcolor='white', | |
xaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgrey'), | |
yaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgrey') | |
) | |
# Create a string for the yearly summary (formatted as a table) | |
summary_table_str = "<b>Yearly Online Events Summary</b><br><br>" | |
summary_table_str += "<table style='width:100%; border-collapse: collapse;'>" | |
summary_table_str += "<tr style='border-bottom: 1px solid black;'><th style='text-align:left;'>Year</th><th style='text-align:right;'>Workshops</th><th style='text-align:right;'>Total Events</th><th style='text-align:right;'>Online Events</th><th style='text-align:right;'>Online %</th></tr>" | |
for index, row in yearly_online.iterrows(): | |
summary_table_str += f"<tr style='border-bottom: 1px solid lightgrey;'><td style='text-align:left;'>{int(row['year'])}</td><td style='text-align:right;'>{int(row['total_workshops'])}</td><td style='text-align:right;'>{int(row['total_events'])}</td><td style='text-align:right;'>{int(row['online_count'])}</td><td style='text-align:right;'>{row['online_percentage']:.1f}%</td></tr>" | |
summary_table_str += "</table>" | |
# Add the table as an annotation | |
fig.add_annotation( | |
text=summary_table_str, | |
xref="paper", | |
yref="paper", | |
x=0, # left | |
y=-0.3, # position | |
showarrow=False, | |
font=dict(size=12), | |
align="left", | |
bordercolor="black", | |
borderwidth=1, | |
borderpad=4, | |
bgcolor="white", | |
opacity=0.8 | |
) | |
fig.update_layout( | |
margin=dict(l=20, r=20, t=40, b=160), | |
) | |
return fig | |
# --- Image 5: Online Workshop Trends (Filtered Bubble Chart) --- | |
def analyze_online_workshop_trends(df_workshops): | |
""" | |
Creates a bubble chart visualization showing only workshops with significant online events. | |
Filters out workshops with 2 or fewer online events to avoid showing placeholders. | |
""" | |
# Ensure date fields are datetime and create a copy to avoid modifying original | |
df_workshops = df_workshops.copy() | |
df_workshops['start_date'] = pd.to_datetime(df_workshops['start_date'], errors='coerce') | |
# Filter out workshops with 2 or fewer online events (placeholders) | |
df_online = df_workshops[df_workshops['online_count'] > 2].copy() | |
# Sort chronologically | |
df_online = df_online.sort_values('start_date') | |
# Create a bubble chart figure | |
fig = go.Figure() | |
# Add bubbles for workshops with significant online events | |
for _, workshop in df_online.iterrows(): | |
# Scale the bubble size based on online count | |
size = workshop['online_count'] * 3 # Simple multiplier for visibility | |
# Add the workshop as a bubble | |
fig.add_trace(go.Scatter( | |
x=[workshop['start_date']], | |
y=[workshop['online_count']], | |
mode='markers', | |
marker=dict( | |
size=size, | |
color=workshop['online_count'], | |
colorscale='Viridis', | |
colorbar=dict(title='Online Events'), | |
showscale=True, | |
line=dict(width=1, color='rgba(0,0,0,0.3)') | |
), | |
name=workshop.get('code', ''), | |
text=f"Workshop: {workshop.get('code', 'N/A')}<br>" + | |
f"Date: {workshop['start_date'].strftime('%Y-%m-%d')}<br>" + | |
f"Online Events: {workshop['online_count']}", | |
hoverinfo='text', | |
showlegend=False, | |
)) | |
# Highlight workshop with maximum online events | |
if len(df_online) > 0: | |
max_online_idx = df_online['online_count'].idxmax() | |
max_workshop = df_online.loc[max_online_idx] | |
fig.add_annotation( | |
x=max_workshop['start_date'], | |
y=max_workshop['online_count'], | |
text=f"Peak: {max_workshop['online_count']} online events", | |
showarrow=True, | |
arrowhead=1, | |
arrowsize=1, | |
arrowwidth=2, | |
arrowcolor="black", | |
ax=20, | |
ay=-30, | |
font=dict(size=12), | |
bgcolor="rgba(255, 255, 255, 0.8)", | |
bordercolor="black", | |
borderwidth=1 | |
) | |
# Update layout | |
fig.update_layout( | |
title="Significant Online Workshop Events (>2 online events)", | |
height=700, | |
plot_bgcolor='white', | |
xaxis=dict( | |
title="Workshop Date", | |
showgrid=True, | |
gridcolor='lightgrey', | |
tickangle=45 | |
), | |
yaxis=dict( | |
title="Number of Online Events", | |
showgrid=True, | |
gridcolor='lightgrey' | |
), | |
margin=dict(l=50, r=50, t=80, b=50) | |
) | |
# Add a text box with summary statistics focusing only on online events | |
total_workshops = len(df_workshops) | |
total_online_workshops = len(df_online) | |
filtered_out = len(df_workshops[df_workshops['online_count'] > 0]) - len(df_online) | |
total_online_events = df_online['online_count'].sum() if not df_online.empty else 0 | |
avg_online_per_workshop = df_online['online_count'].mean() if not df_online.empty else 0 | |
max_online = df_online['online_count'].max() if not df_online.empty else 0 | |
stats_text = ( | |
f"<b>Online Events Summary:</b><br>" | |
f"Total Workshops: {total_workshops}<br>" | |
f"Workshops with Significant Online Events (>2): {total_online_workshops}<br>" | |
f"Workshops with 1-2 Online Events (filtered out): {filtered_out}<br>" | |
f"Total Online Events (in significant workshops): {total_online_events}<br>" | |
f"Average Online Events per Workshop: {avg_online_per_workshop:.1f}<br>" | |
f"Maximum Online Events: {max_online}" | |
) | |
fig.add_annotation( | |
x=0.02, | |
y=0.98, | |
xref="paper", | |
yref="paper", | |
text=stats_text, | |
showarrow=False, | |
font=dict(size=12), | |
bgcolor="rgba(255, 255, 255, 0.8)", | |
bordercolor="black", | |
borderwidth=1, | |
borderpad=4, | |
align="left" | |
) | |
return fig | |
# Create Dash app | |
app = dash.Dash(__name__) | |
# Generate all figures | |
fig2 = analyze_event_duration_trends(df_workshops) | |
fig3 = create_day_of_week_distribution(data) | |
fig4 = create_lecture_count_by_year(df_yearly_stats) | |
fig5 = analyze_online_workshop_trends(df_workshops) | |
fig6 = create_online_count_by_year(df_workshops) # Generate the new figure | |
# Define app layout with tabs | |
app.layout = html.Div([ | |
html.H1("Workshop Analysis Dashboard", style={'textAlign': 'center', 'margin-bottom': '20px', 'margin-top': '20px'}), | |
dcc.Tabs(id='dashboard-tabs', value='tab-online-events', children=[ | |
dcc.Tab(label='Online Workshop Events', value='tab-online-events', children=[ | |
dcc.Graph(figure=fig5, style={'height': '750px'}) | |
]), | |
dcc.Tab(label='Online Count by Year', value='tab-online-count', children=[ | |
dcc.Graph(figure=fig6, style={'height': '850px'}) | |
]), | |
dcc.Tab(label='Event Duration Trends', value='tab-duration-trends', children=[ | |
dcc.Graph(figure=fig2, style={'height': '850px'}) | |
]), | |
dcc.Tab(label='Day of Week Distribution', value='tab-day-distribution', children=[ | |
dcc.Graph(figure=fig3, style={'height': '600px'}) | |
]), | |
dcc.Tab(label='Lecture Count by Year', value='tab-lecture-count', children=[ | |
dcc.Graph(figure=fig4, style={'height': '850px'}) | |
]), | |
]), | |
html.Div([ | |
html.P("Workshop Data Analysis Dashboard", style={'textAlign': 'center', 'margin-top': '20px', 'color': 'gray'}) | |
]) | |
]) | |
# Run the app | |
if __name__ == '__main__': | |
# Run with debug mode OFF to avoid the error with Python 3.12 | |
app.run_server(debug=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
gotta figure out how to