cavedave · August 4, 2025 11:37 · cavedave · Aug 4, 2025
diff --git a/bls.py b/bls.py
 import pandas as pd
 import matplotlib.pyplot as plt
 from matplotlib.patches import Patch
 import datetime

 # data from https://www.bls.gov/news.release/empsit.nr0.htm
 excel_file = '/content/cesvin00.xlsx'
 df_data    = pd.read_excel(excel_file, sheet_name='Data', header=2)

 # Reference‐months
 date_cols  = [c for c in df_data.columns if isinstance(c, str) and '_' in c]

 # Grab first & final rows
 label_col  = 'Relating to Period'
 first_row  = df_data[df_data[label_col]
                     .str.contains("First Preliminary", na=False)].iloc[0]
 third_row  = df_data[df_data[label_col]
                     .str.contains("Third Preliminary", na=False)].iloc[0]

 # Build the error DataFrame
 df = pd.DataFrame({
    'Reference Month': date_cols, # Keep as string initially for manual parsing
    'First'          : first_row [date_cols].astype(float).values,
    'Final'          : third_row [date_cols].astype(float).values
 })
 df['ErrorPct'] = (df['Final'] - df['First']) / df['First'] * 100

 # Manually parse reference-month strings into correct datetimes
 def parse_ref(s):
    mon, yy = s.split('_')
    year = int(yy)
    # Years 00–25 → 2000–2025; 26–99 → 1926–1999
    full_year = 2000 + year if year <= 25 else 1900 + year
    month_num = datetime.datetime.strptime(mon, '%b').month
    return datetime.date(full_year, month_num, 1)

 df['Reference Month'] = df['Reference Month'].apply(parse_ref)



 # — FILTER to June 2003 through last available (Dec 2024) — CHANGE THIS In FUTURE
 start, end = datetime.date(2003, 1, 1), datetime.date(2024, 12, 1)
 df_plot = df[(df['Reference Month'] >= start) & (df['Reference Month'] <= end)].copy()


 import pandas as pd

 # First, make sure Reference Month is datetime:
 df_plot['Reference Month'] = pd.to_datetime(df_plot['Reference Month'])

 # Build a list of the new rows you need:
 new_rows = [
    {'Reference Month': '2025-02-01', 'First': 159218.0, 'Final': 159155.0, 'ErrorPct': -0.039568},
    {'Reference Month': '2025-03-01', 'First': 159398.0, 'Final': 159275.0, 'ErrorPct': -0.077165},
    {'Reference Month': '2025-04-01', 'First': 159517.0, 'Final': 159433.0, 'ErrorPct': -0.052659},
    {'Reference Month': '2025-05-01', 'First': 159561.0, 'Final': 159452.0, 'ErrorPct': -0.068312},
    {'Reference Month': '2025-06-01', 'First': 159724.0, 'Final': 159466.0, 'ErrorPct': -0.161529},
 ]

 # Turn them into a DataFrame (ensuring the date column is parsed):
 df_new = pd.DataFrame(new_rows)
 df_new['Reference Month'] = pd.to_datetime(df_new['Reference Month'])

 # Append and then drop any duplicates (keeping the appended rows):
 df_plot = pd.concat([df_plot, df_new], ignore_index=True)
 df_plot = (
    df_plot
    .drop_duplicates(subset='Reference Month', keep='last')
    .sort_values('Reference Month')
    .reset_index(drop=True)
 )

 # Verify:
 print(df_plot.tail(7))


 # Presidential bands (optional)
 terms = [
    ('1993-01-20','2001-01-20','Democratic'),
    ('2001-01-20','2009-01-20','Republican'),
    ('2009-01-20','2017-01-20','Democratic'),
    ('2017-01-20','2021-01-20','Republican'),
    ('2021-01-20','2025-01-20','Democratic'),  # Biden’s term extends into 2025
    ('2025-01-20','2026-01-20','Republican'), 
 ]

 # — PLOT —
 fig, ax = plt.subplots(figsize=(10,5))

 # shade parties
 for a,b,party in terms:
    start_dt = pd.to_datetime(a)
    end_dt = pd.to_datetime(b)
    ax.axvspan(start_dt, end_dt, color=('blue' if party=='Democratic' else 'red'),
               alpha=0.1, edgecolor='none')

 # plot error
 ax.plot(df_plot['Reference Month'], df_plot['ErrorPct'], lw=1)
 ax.set_xlim(start, end) # Use datetime.date objects for xlim
 ax.set_ylim(-1.2, 1.2)  # Symmetric limits
 #ax.set_xlabel('Reference Month')
 ax.set_ylabel('Error (% difference)')
 ax.set_title('Error Rates in Monthly Payroll Employment Estimates\n'
             '% difference between initial (1st) and final (3rd) estimate')

 # Data credit
 plt.figtext(
    0.98, 0.01,
    "data:bls.gov by @iamreddave",
    ha="right", va="bottom",
    fontsize=8, alpha=0.7
 )


 # legend for shading
 legend_handles = [
    Patch(facecolor='blue', alpha=0.1, label='Democratic'),
    Patch(facecolor='red', alpha=0.1, label='Republican'),
 ]
 ax.legend(handles=legend_handles, loc='upper left')

 fig.autofmt_xdate()
 plt.tight_layout()
 fig.savefig("payroll_error_rates_2000_2025.png", dpi=300, bbox_inches='tight')

 plt.show()
	import pandas as pd
	import matplotlib.pyplot as plt
	from matplotlib.patches import Patch
	import datetime

	# data from https://www.bls.gov/news.release/empsit.nr0.htm
	excel_file = '/content/cesvin00.xlsx'
	df_data = pd.read_excel(excel_file, sheet_name='Data', header=2)

	# Reference‐months
	date_cols = [c for c in df_data.columns if isinstance(c, str) and '_' in c]

	# Grab first & final rows
	label_col = 'Relating to Period'
	first_row = df_data[df_data[label_col]
	.str.contains("First Preliminary", na=False)].iloc[0]
	third_row = df_data[df_data[label_col]
	.str.contains("Third Preliminary", na=False)].iloc[0]

	# Build the error DataFrame
	df = pd.DataFrame({
	'Reference Month': date_cols, # Keep as string initially for manual parsing
	'First' : first_row [date_cols].astype(float).values,
	'Final' : third_row [date_cols].astype(float).values
	})
	df['ErrorPct'] = (df['Final'] - df['First']) / df['First'] * 100

	# Manually parse reference-month strings into correct datetimes
	def parse_ref(s):
	mon, yy = s.split('_')
	year = int(yy)
	# Years 00–25 → 2000–2025; 26–99 → 1926–1999
	full_year = 2000 + year if year <= 25 else 1900 + year
	month_num = datetime.datetime.strptime(mon, '%b').month
	return datetime.date(full_year, month_num, 1)

	df['Reference Month'] = df['Reference Month'].apply(parse_ref)



	# — FILTER to June 2003 through last available (Dec 2024) — CHANGE THIS In FUTURE
	start, end = datetime.date(2003, 1, 1), datetime.date(2024, 12, 1)
	df_plot = df[(df['Reference Month'] >= start) & (df['Reference Month'] <= end)].copy()


	import pandas as pd

	# First, make sure Reference Month is datetime:
	df_plot['Reference Month'] = pd.to_datetime(df_plot['Reference Month'])

	# Build a list of the new rows you need:
	new_rows = [
	{'Reference Month': '2025-02-01', 'First': 159218.0, 'Final': 159155.0, 'ErrorPct': -0.039568},
	{'Reference Month': '2025-03-01', 'First': 159398.0, 'Final': 159275.0, 'ErrorPct': -0.077165},
	{'Reference Month': '2025-04-01', 'First': 159517.0, 'Final': 159433.0, 'ErrorPct': -0.052659},
	{'Reference Month': '2025-05-01', 'First': 159561.0, 'Final': 159452.0, 'ErrorPct': -0.068312},
	{'Reference Month': '2025-06-01', 'First': 159724.0, 'Final': 159466.0, 'ErrorPct': -0.161529},
	]

	# Turn them into a DataFrame (ensuring the date column is parsed):
	df_new = pd.DataFrame(new_rows)
	df_new['Reference Month'] = pd.to_datetime(df_new['Reference Month'])

	# Append and then drop any duplicates (keeping the appended rows):
	df_plot = pd.concat([df_plot, df_new], ignore_index=True)
	df_plot = (
	df_plot
	.drop_duplicates(subset='Reference Month', keep='last')
	.sort_values('Reference Month')
	.reset_index(drop=True)
	)

	# Verify:
	print(df_plot.tail(7))


	# Presidential bands (optional)
	terms = [
	('1993-01-20','2001-01-20','Democratic'),
	('2001-01-20','2009-01-20','Republican'),
	('2009-01-20','2017-01-20','Democratic'),
	('2017-01-20','2021-01-20','Republican'),
	('2021-01-20','2025-01-20','Democratic'), # Biden’s term extends into 2025
	('2025-01-20','2026-01-20','Republican'),
	]

	# — PLOT —
	fig, ax = plt.subplots(figsize=(10,5))

	# shade parties
	for a,b,party in terms:
	start_dt = pd.to_datetime(a)
	end_dt = pd.to_datetime(b)
	ax.axvspan(start_dt, end_dt, color=('blue' if party=='Democratic' else 'red'),
	alpha=0.1, edgecolor='none')

	# plot error
	ax.plot(df_plot['Reference Month'], df_plot['ErrorPct'], lw=1)
	ax.set_xlim(start, end) # Use datetime.date objects for xlim
	ax.set_ylim(-1.2, 1.2) # Symmetric limits
	#ax.set_xlabel('Reference Month')
	ax.set_ylabel('Error (% difference)')
	ax.set_title('Error Rates in Monthly Payroll Employment Estimates\n'
	'% difference between initial (1st) and final (3rd) estimate')

	# Data credit
	plt.figtext(
	0.98, 0.01,
	"data:bls.gov by @iamreddave",
	ha="right", va="bottom",
	fontsize=8, alpha=0.7
	)


	# legend for shading
	legend_handles = [
	Patch(facecolor='blue', alpha=0.1, label='Democratic'),
	Patch(facecolor='red', alpha=0.1, label='Republican'),
	]
	ax.legend(handles=legend_handles, loc='upper left')

	fig.autofmt_xdate()
	plt.tight_layout()
	fig.savefig("payroll_error_rates_2000_2025.png", dpi=300, bbox_inches='tight')

	plt.show()