Skip to content

Instantly share code, notes, and snippets.

@cavedave
Last active August 4, 2025 11:37
Show Gist options
  • Save cavedave/a389166c145b0390a10f8c42ce1f466d to your computer and use it in GitHub Desktop.
Save cavedave/a389166c145b0390a10f8c42ce1f466d to your computer and use it in GitHub Desktop.
how much are bls estimates out by?
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import datetime
# data from https://www.bls.gov/news.release/empsit.nr0.htm
excel_file = '/content/cesvin00.xlsx'
df_data = pd.read_excel(excel_file, sheet_name='Data', header=2)
# Reference‐months
date_cols = [c for c in df_data.columns if isinstance(c, str) and '_' in c]
# Grab first & final rows
label_col = 'Relating to Period'
first_row = df_data[df_data[label_col]
.str.contains("First Preliminary", na=False)].iloc[0]
third_row = df_data[df_data[label_col]
.str.contains("Third Preliminary", na=False)].iloc[0]
# Build the error DataFrame
df = pd.DataFrame({
'Reference Month': date_cols, # Keep as string initially for manual parsing
'First' : first_row [date_cols].astype(float).values,
'Final' : third_row [date_cols].astype(float).values
})
df['ErrorPct'] = (df['Final'] - df['First']) / df['First'] * 100
# Manually parse reference-month strings into correct datetimes
def parse_ref(s):
mon, yy = s.split('_')
year = int(yy)
# Years 00–25 → 2000–2025; 26–99 → 1926–1999
full_year = 2000 + year if year <= 25 else 1900 + year
month_num = datetime.datetime.strptime(mon, '%b').month
return datetime.date(full_year, month_num, 1)
df['Reference Month'] = df['Reference Month'].apply(parse_ref)
# — FILTER to June 2003 through last available (Dec 2024) — CHANGE THIS In FUTURE
start, end = datetime.date(2003, 1, 1), datetime.date(2024, 12, 1)
df_plot = df[(df['Reference Month'] >= start) & (df['Reference Month'] <= end)].copy()
import pandas as pd
# First, make sure Reference Month is datetime:
df_plot['Reference Month'] = pd.to_datetime(df_plot['Reference Month'])
# Build a list of the new rows you need:
new_rows = [
{'Reference Month': '2025-02-01', 'First': 159218.0, 'Final': 159155.0, 'ErrorPct': -0.039568},
{'Reference Month': '2025-03-01', 'First': 159398.0, 'Final': 159275.0, 'ErrorPct': -0.077165},
{'Reference Month': '2025-04-01', 'First': 159517.0, 'Final': 159433.0, 'ErrorPct': -0.052659},
{'Reference Month': '2025-05-01', 'First': 159561.0, 'Final': 159452.0, 'ErrorPct': -0.068312},
{'Reference Month': '2025-06-01', 'First': 159724.0, 'Final': 159466.0, 'ErrorPct': -0.161529},
]
# Turn them into a DataFrame (ensuring the date column is parsed):
df_new = pd.DataFrame(new_rows)
df_new['Reference Month'] = pd.to_datetime(df_new['Reference Month'])
# Append and then drop any duplicates (keeping the appended rows):
df_plot = pd.concat([df_plot, df_new], ignore_index=True)
df_plot = (
df_plot
.drop_duplicates(subset='Reference Month', keep='last')
.sort_values('Reference Month')
.reset_index(drop=True)
)
# Verify:
print(df_plot.tail(7))
# Presidential bands (optional)
terms = [
('1993-01-20','2001-01-20','Democratic'),
('2001-01-20','2009-01-20','Republican'),
('2009-01-20','2017-01-20','Democratic'),
('2017-01-20','2021-01-20','Republican'),
('2021-01-20','2025-01-20','Democratic'), # Biden’s term extends into 2025
('2025-01-20','2026-01-20','Republican'),
]
# — PLOT —
fig, ax = plt.subplots(figsize=(10,5))
# shade parties
for a,b,party in terms:
start_dt = pd.to_datetime(a)
end_dt = pd.to_datetime(b)
ax.axvspan(start_dt, end_dt, color=('blue' if party=='Democratic' else 'red'),
alpha=0.1, edgecolor='none')
# plot error
ax.plot(df_plot['Reference Month'], df_plot['ErrorPct'], lw=1)
ax.set_xlim(start, end) # Use datetime.date objects for xlim
ax.set_ylim(-1.2, 1.2) # Symmetric limits
#ax.set_xlabel('Reference Month')
ax.set_ylabel('Error (% difference)')
ax.set_title('Error Rates in Monthly Payroll Employment Estimates\n'
'% difference between initial (1st) and final (3rd) estimate')
# Data credit
plt.figtext(
0.98, 0.01,
"data:bls.gov by @iamreddave",
ha="right", va="bottom",
fontsize=8, alpha=0.7
)
# legend for shading
legend_handles = [
Patch(facecolor='blue', alpha=0.1, label='Democratic'),
Patch(facecolor='red', alpha=0.1, label='Republican'),
]
ax.legend(handles=legend_handles, loc='upper left')
fig.autofmt_xdate()
plt.tight_layout()
fig.savefig("payroll_error_rates_2000_2025.png", dpi=300, bbox_inches='tight')
plt.show()
@cavedave
Copy link
Author

cavedave commented Aug 4, 2025

payroll_error_rates_2000_2025 (1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment