Created
June 9, 2025 12:43
-
-
Save JamesClarke7283/3d628add2e0ed33b4a21adda55daea44 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Copyright 2025 James David Clarke <[email protected]> | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. | |
""" | |
# Idea & Chart inspired from Gergely Orosz's blog post: https://blog.pragmaticengineer.com/stack-overflow-is-almost-dead/ | |
# SQL Query used is from Theodore R. Smith's Gist: https://gist.github.com/hopeseekr/f522e380e35745bd5bdc3269a9f0b132 | |
# Generated 'QueryResults.csv' from: https://data.stackexchange.com/stackoverflow/query/edit/1903717 | |
# DISCLAIMER: This is just a reproduction of what has already been done here on the topic of Stack Overflow's declining use as a Q&A platform, it is not a definitive metric of traffic and popularity of the site. it's sole purpose is to make it easier for people to have a similar chart, but with any future data. | |
# NOTE: I made it ignore the current month to avoid inaccurate readings. | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import matplotlib.dates as mdates | |
# Set the file path to your local CSV file. | |
file_path = 'QueryResults.csv' | |
try: | |
df = pd.read_csv(file_path) | |
df['Date'] = pd.to_datetime(df[['Year', 'Month']].assign(day=1)) | |
df.rename(columns={'NumQuestions': 'Questions'}, inplace=True) | |
df.sort_values('Date', inplace=True, ascending=True) | |
# Exclude the current, incomplete month | |
first_day_of_current_month = pd.Timestamp.now().to_period('M').to_timestamp() | |
df = df[df['Date'] < first_day_of_current_month] | |
# --- Plotting & Style Adjustments --- | |
plt.style.use('default') | |
fig, ax = plt.subplots(figsize=(16, 8)) | |
# Add the chart title. | |
ax.set_title("Monthly questions asked on Stack Overflow", fontsize=18, fontweight='bold', pad=20) | |
# Plot the main data line. | |
ax.plot(df['Date'], df['Questions'], linestyle='-', color='#1f77b4', linewidth=2) | |
# Add the blue filling under the line. | |
ax.fill_between(df['Date'], df['Questions'], alpha=0.2, color='#1f77b4') | |
# Add a dotted horizontal line from a specific date. | |
try: | |
comparison_date = pd.Timestamp('2009-01-01') | |
comparison_value = df.loc[df['Date'] == comparison_date, 'Questions'].iloc[0] | |
ax.axhline(y=comparison_value, color='red', linestyle='--', linewidth=1.5, label='January 1st 2009') | |
except IndexError: | |
print("Warning: Could not find data for January 2009 to draw the comparison line.") | |
# --- NEW: Annotate the end of the chart with adjusted position and line --- | |
# Get the last data point from the filtered dataframe | |
last_date = df['Date'].iloc[-1] | |
last_value = df['Questions'].iloc[-1] | |
# Format the date as DD/MM | |
annotation_text = last_date.strftime('%d/%m') | |
# Add the annotation to the plot | |
ax.annotate(text=annotation_text, | |
xy=(last_date, last_value), | |
# Position the text above the point | |
xytext=(0, 35), | |
textcoords='offset points', | |
fontsize=10, | |
color='grey', | |
# Add a simple line (arrow) connecting the text to the point | |
arrowprops=dict(arrowstyle='-', color='grey') | |
) | |
# Format the Y-axis. | |
ax.set_ylabel('Number of Questions', fontsize=12) | |
y_max = df['Questions'].max() | |
ax.set_yticks(range(0, int(y_max) + 50000, 50000)) | |
ax.set_ylim(bottom=0) | |
# Format the X-axis. | |
ax.set_xlabel('Year', fontsize=12) | |
ax.xaxis.set_major_locator(mdates.YearLocator()) | |
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y')) | |
# Set quarterly minor ticks (divots) WITHOUT labels. | |
ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1, 4, 7, 10])) | |
# Style adjustments. | |
ax.yaxis.grid(True) | |
ax.xaxis.grid(False) | |
ax.spines['top'].set_visible(False) | |
ax.spines['right'].set_visible(False) | |
ax.legend() | |
plt.tight_layout() | |
plt.show() | |
except FileNotFoundError: | |
print(f"ERROR: The file '{file_path}' was not found in the same directory as the script.") | |
except KeyError as e: | |
print(f"ERROR: A required column was not found in the CSV: {e}") | |
print("Please ensure your CSV file contains the columns 'Year', 'Month', and 'NumQuestions'.") | |
except Exception as e: | |
print(f"An unexpected error occurred: {e}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Stack Overflow, Monthly Questions Asked, Matplotlib chart.
Idea & Chart reproduced from Gergely Orosz's blog post: https://blog.pragmaticengineer.com/stack-overflow-is-almost-dead/
SQL Query used is from Theodore R. Smith's Gist: https://gist.github.com/hopeseekr/f522e380e35745bd5bdc3269a9f0b132
Generated 'QueryResults.csv' from: https://data.stackexchange.com/stackoverflow/query/edit/1903717
DISCLAIMER: This is just a reproduction of what has already been done here on the topic of Stack Overflow's declining use as a Q&A platform, it is not a definitive metric of traffic and popularity of the site. it's sole purpose is to make it easier for people to have a similar chart, but with any future data.
NOTE: I made it ignore the current month to avoid inaccurate readings.