Last active
August 4, 2025 11:37
-
-
Save cavedave/a389166c145b0390a10f8c42ce1f466d to your computer and use it in GitHub Desktop.
how much are bls estimates out by?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
from matplotlib.patches import Patch | |
import datetime | |
# data from https://www.bls.gov/news.release/empsit.nr0.htm | |
excel_file = '/content/cesvin00.xlsx' | |
df_data = pd.read_excel(excel_file, sheet_name='Data', header=2) | |
# Reference‐months | |
date_cols = [c for c in df_data.columns if isinstance(c, str) and '_' in c] | |
# Grab first & final rows | |
label_col = 'Relating to Period' | |
first_row = df_data[df_data[label_col] | |
.str.contains("First Preliminary", na=False)].iloc[0] | |
third_row = df_data[df_data[label_col] | |
.str.contains("Third Preliminary", na=False)].iloc[0] | |
# Build the error DataFrame | |
df = pd.DataFrame({ | |
'Reference Month': date_cols, # Keep as string initially for manual parsing | |
'First' : first_row [date_cols].astype(float).values, | |
'Final' : third_row [date_cols].astype(float).values | |
}) | |
df['ErrorPct'] = (df['Final'] - df['First']) / df['First'] * 100 | |
# Manually parse reference-month strings into correct datetimes | |
def parse_ref(s): | |
mon, yy = s.split('_') | |
year = int(yy) | |
# Years 00–25 → 2000–2025; 26–99 → 1926–1999 | |
full_year = 2000 + year if year <= 25 else 1900 + year | |
month_num = datetime.datetime.strptime(mon, '%b').month | |
return datetime.date(full_year, month_num, 1) | |
df['Reference Month'] = df['Reference Month'].apply(parse_ref) | |
# — FILTER to June 2003 through last available (Dec 2024) — CHANGE THIS In FUTURE | |
start, end = datetime.date(2003, 1, 1), datetime.date(2024, 12, 1) | |
df_plot = df[(df['Reference Month'] >= start) & (df['Reference Month'] <= end)].copy() | |
import pandas as pd | |
# First, make sure Reference Month is datetime: | |
df_plot['Reference Month'] = pd.to_datetime(df_plot['Reference Month']) | |
# Build a list of the new rows you need: | |
new_rows = [ | |
{'Reference Month': '2025-02-01', 'First': 159218.0, 'Final': 159155.0, 'ErrorPct': -0.039568}, | |
{'Reference Month': '2025-03-01', 'First': 159398.0, 'Final': 159275.0, 'ErrorPct': -0.077165}, | |
{'Reference Month': '2025-04-01', 'First': 159517.0, 'Final': 159433.0, 'ErrorPct': -0.052659}, | |
{'Reference Month': '2025-05-01', 'First': 159561.0, 'Final': 159452.0, 'ErrorPct': -0.068312}, | |
{'Reference Month': '2025-06-01', 'First': 159724.0, 'Final': 159466.0, 'ErrorPct': -0.161529}, | |
] | |
# Turn them into a DataFrame (ensuring the date column is parsed): | |
df_new = pd.DataFrame(new_rows) | |
df_new['Reference Month'] = pd.to_datetime(df_new['Reference Month']) | |
# Append and then drop any duplicates (keeping the appended rows): | |
df_plot = pd.concat([df_plot, df_new], ignore_index=True) | |
df_plot = ( | |
df_plot | |
.drop_duplicates(subset='Reference Month', keep='last') | |
.sort_values('Reference Month') | |
.reset_index(drop=True) | |
) | |
# Verify: | |
print(df_plot.tail(7)) | |
# Presidential bands (optional) | |
terms = [ | |
('1993-01-20','2001-01-20','Democratic'), | |
('2001-01-20','2009-01-20','Republican'), | |
('2009-01-20','2017-01-20','Democratic'), | |
('2017-01-20','2021-01-20','Republican'), | |
('2021-01-20','2025-01-20','Democratic'), # Biden’s term extends into 2025 | |
('2025-01-20','2026-01-20','Republican'), | |
] | |
# — PLOT — | |
fig, ax = plt.subplots(figsize=(10,5)) | |
# shade parties | |
for a,b,party in terms: | |
start_dt = pd.to_datetime(a) | |
end_dt = pd.to_datetime(b) | |
ax.axvspan(start_dt, end_dt, color=('blue' if party=='Democratic' else 'red'), | |
alpha=0.1, edgecolor='none') | |
# plot error | |
ax.plot(df_plot['Reference Month'], df_plot['ErrorPct'], lw=1) | |
ax.set_xlim(start, end) # Use datetime.date objects for xlim | |
ax.set_ylim(-1.2, 1.2) # Symmetric limits | |
#ax.set_xlabel('Reference Month') | |
ax.set_ylabel('Error (% difference)') | |
ax.set_title('Error Rates in Monthly Payroll Employment Estimates\n' | |
'% difference between initial (1st) and final (3rd) estimate') | |
# Data credit | |
plt.figtext( | |
0.98, 0.01, | |
"data:bls.gov by @iamreddave", | |
ha="right", va="bottom", | |
fontsize=8, alpha=0.7 | |
) | |
# legend for shading | |
legend_handles = [ | |
Patch(facecolor='blue', alpha=0.1, label='Democratic'), | |
Patch(facecolor='red', alpha=0.1, label='Republican'), | |
] | |
ax.legend(handles=legend_handles, loc='upper left') | |
fig.autofmt_xdate() | |
plt.tight_layout() | |
fig.savefig("payroll_error_rates_2000_2025.png", dpi=300, bbox_inches='tight') | |
plt.show() |
Author
cavedave
commented
Aug 4, 2025

Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment