Created
April 12, 2025 04:03
-
-
Save zoharbabin/aa50170e49c7c9e44fa40b3ae74a92bd to your computer and use it in GitHub Desktop.
Generate a comparative visualization of global market performance (indices normalized in USD) and volatility (VIX) from the GFC era to present day. This script highlights market reactions around key historical events using Python, yfinance, and Matplotlib.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
compare_stocks_invert_fx_grid_events.py | |
Generates a chart comparing the *relative behavior* of selected global indices and VIX, | |
visualizing how different market segments reacted to major historical events. | |
Key features: | |
- Data normalized to a common start date (typically Jan 2007 = 100). | |
- All indices converted to USD for direct comparison of magnitude during events. | |
- VIX included to show market volatility/fear reactions. | |
- Major historical events annotated. | |
Purpose & Interpretation: | |
- Analyze *relative performance trajectories* and *market reactions* around events. | |
- Observe divergence/convergence between different regions/indices. | |
- Use VIX spikes to identify periods of high market stress. | |
- NOTE: Shows *price behavior* in USD, not total investment returns (dividends excluded). | |
Currency conversion effects are included. VIX scale indicates volatility *change* | |
relative to its baseline, not value comparable to price indices. | |
Data processing steps: | |
1) Download daily index and FX data from Yahoo Finance. | |
2) Handle potential download errors and missing data. | |
3) Convert daily non-USD index values to USD using daily FX rates. | |
4) Resample all USD-denominated series to monthly frequency (end of month). | |
5) Align data: Find the first date where all series have data and trim. | |
6) Normalize each series so that the first common monthly close equals 100. | |
7) Plot lines, baseline, starting values block, final USD side labels with tickers. | |
8) Annotate historical events in a grid layout & mark dates on X-axis. | |
9) Add metadata to the chart (source, run date, normalization base, FX method). | |
""" | |
import logging | |
from datetime import datetime, timedelta | |
import math | |
import yfinance as yf | |
import pandas as pd | |
import numpy as np | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import matplotlib.dates as mdates | |
from matplotlib.offsetbox import AnchoredText # For the start values block | |
import matplotlib.patches as patches | |
import seaborn as sns | |
from typing import Dict, List, Tuple, Optional | |
# --- Logging Setup --- | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
# --- Configuration --- | |
matplotlib.rcParams['font.family'] = 'DejaVu Sans' # Or 'Arial', 'Calibri' if installed | |
# Event label layout | |
NUM_ROWS = 7 # Number of vertical layers for event labels (Increased for spacing) | |
DELTA_X_DAYS = 200 # Horizontal offset (days) per event label column (Adjusted) | |
# Data Range | |
START_DATE = '2007-01-01' | |
# Use current date for END_DATE to get latest data | |
END_DATE = datetime.today().strftime('%Y-%m-%d') | |
# --- Data Definitions --- | |
INDEXES: Dict[str, Dict[str, str]] = { | |
'S&P 500 (US)': {'ticker': '^GSPC', 'currency': 'USD'}, # S&P 500 | |
'Nasdaq Comp (US)': {'ticker': '^IXIC', 'currency': 'USD'}, # Nasdaq Composite | |
'Russell 2000 (US)': {'ticker': '^RUT', 'currency': 'USD'}, # Russell 2000 (Small Cap) | |
'DJIA (US)': {'ticker': '^DJI', 'currency': 'USD'}, # Dow Jones Industrial Average | |
'S&P/TSX (CAN)': {'ticker': '^GSPTSE', 'currency': 'CAD'}, # S&P/TSX Composite | |
# 'DAX (GER)': {'ticker': '^GDAXI', 'currency': 'EUR'}, | |
'EURO STOXX 50': {'ticker': '^STOXX50E','currency': 'EUR'}, # Eurozone 50 | |
'Shanghai Comp (CHN)': {'ticker': '000001.SS','currency': 'CNY'}, # Shanghai Composite | |
'FTSE 100 (UK)': {'ticker': '^FTSE', 'currency': 'GBP'}, # FTSE 100 UK | |
'Nikkei 225 (JPN)': {'ticker': '^N225', 'currency': 'JPY'}, # Nikkei 225 Japan | |
# 'Tadawul (SAU)': {'ticker': '^TASI.SR', 'currency': 'SAR'}, # Tadawul All Share Index | |
'MSCI EM (ETF: EEM)': {'ticker': 'EEM', 'currency': 'USD'}, # ETF Proxy for Emerging Markets | |
'FTSE All-World ex-US (ETF: VEU)': {'ticker': 'VEU', 'currency': 'USD'}, # ETF Proxy for Developed + Emerging Markets ex-US | |
# 'Vanguard Total World (ETF: VT)': {'ticker': 'VT', 'currency': 'USD'}, # ETF Proxy for World Developed + Emerging | |
# 'MSCI World (ETF: URTH)':{'ticker': 'URTH', 'currency': 'USD'}, # ETF Proxy for World Developed | |
# 'MOEX Russia': {'ticker': '^IMOEX', 'currency': 'RUB'}, # Russian Index (Data may be inconsistent) | |
# 'MSCI BRIC (ETF: BKF)': {'ticker': 'BKF', 'currency': 'USD'}, # Proxy for BRIC countries, is in USD | |
'VIX (Volatility)': {'ticker': '^VIX', 'currency': 'USD'} # VIX is in USD | |
} | |
# Yahoo Finance FX tickers (Price of 1 USD in Foreign Currency) | |
FX_TICKERS: Dict[str, Optional[str]] = { | |
'CAD': 'CAD=X', # Canadian Dollar | |
'EUR': 'EUR=X', # Euro | |
'CNY': 'CNY=X', # Chinese Yuan | |
'GBP': 'GBP=X', # British Pound | |
'JPY': 'JPY=X', # Japanese Yen | |
'RUB': 'RUB=X', # Russian Ruble | |
'SAR': 'SAR=X', # Saudi Riyal | |
# Add SAR=X, AED=X, QAR=X etc. if adding Arab indices | |
'USD': None # Base currency | |
} | |
# --- Considerations --- | |
# Add a note in your comments or logging: | |
# WARNING: Data for MOEX Russia (^IMOEX) may be inconsistent or unavailable, | |
# especially after early 2022 due to market closures and sanctions. | |
# The script's error handling will attempt to proceed without it if download fails. | |
# NOTE: EEM and URTH are ETFs used as proxies for MSCI EM and MSCI World indices, respectively. | |
# They incur tracking error and expense ratios compared to the theoretical index. | |
# Events to Annotate | |
EVENTS: List[Dict[str, str]] = [ | |
# 2007-2009 GFC Era | |
{'date': '2007-10-09', 'label': 'Market Peak of Oct 2007'}, | |
{'date': '2008-09-15', 'label': 'Lehman Bankruptcy'}, | |
{'date': '2009-03-09', 'label': 'GFC Bottom'}, | |
# 2010s | |
{'date': '2010-05-06', 'label': 'Flash Crash'}, | |
{'date': '2011-08-05', 'label': 'US Credit Downgrade'}, | |
{'date': '2014-06-20', 'label': 'Oil Price Crash Start'}, | |
{'date': '2015-12-16', 'label': 'Fed 1st Rate Hike (Post-GFC)'}, | |
{'date': '2016-06-23', 'label': 'Brexit Referendum'}, | |
{'date': '2017-01-20', 'label': 'Trump Inauguration'}, # First Term | |
{'date': '2018-07-06', 'label': 'US-China Tariffs #1'}, | |
{'date': '2019-01-25', 'label': 'US Gov Shutdown Ends'}, | |
{'date': '2019-05-10', 'label': 'Trade War Escalation'}, | |
# 2020s Pre-2024 | |
{'date': '2020-03-11', 'label': 'COVID-19 Declared Pandemic'}, | |
{'date': '2021-01-20', 'label': 'Biden Inauguration'}, | |
{'date': '2022-02-24', 'label': 'Russia Invades Ukraine'}, | |
{'date': '2023-10-07', 'label': 'Israel-Hamas War'}, | |
# 2024 Events | |
{'date': '2024-08-05', 'label': 'The Aug24 VIX Spike'}, | |
{'date': '2024-09-18', 'label': 'Fed 1st Rate Cut'}, | |
{'date': '2024-11-05', 'label': 'US Presidential Election'}, | |
# 2025 Events | |
{'date': '2025-01-20', 'label': 'Trump Inauguration'}, # Second Term | |
{'date': '2025-02-02', 'label': 'Tariffs on China'}, | |
{'date': '2025-04-02', 'label': 'Global Tariffs'}, | |
{'date': '2025-04-09', 'label': 'Tariffs Delay 90d'}, | |
] | |
# Consistent colors for event markers (using tab10 colormap) | |
EVENT_COLORS: List[str] = list(plt.get_cmap('tab10').colors) | |
# --- Data Download & Processing Functions --- | |
# (Keep download_data, get_fx_rates, get_index_data, convert_to_usd, resample_and_normalize functions as they were in the previous version - they are correct) | |
def download_data(ticker: str, start: str, end: str) -> pd.Series: | |
"""Downloads daily closing prices for a given ticker.""" | |
logging.info(f"Downloading {ticker} from {start} to {end}") | |
try: | |
df = yf.download(ticker, start=start, end=end, interval='1d', progress=False, auto_adjust=True) | |
if df.empty: | |
logging.warning(f"No data returned for {ticker}") | |
return pd.Series(dtype=float) | |
# Ensure 'Close' column exists | |
if 'Close' not in df.columns: | |
logging.warning(f"'Close' column not found for {ticker}. Available: {df.columns}") | |
# Heuristic: Use the last column if 'Close' is missing (common for some FX tickers) | |
if len(df.columns) > 0: | |
return df.iloc[:, -1].dropna() | |
else: | |
return pd.Series(dtype=float) | |
return df['Close'].dropna() | |
except Exception as e: | |
logging.error(f"Error downloading {ticker}: {e}") | |
return pd.Series(dtype=float) | |
def get_fx_rates(currencies: List[str], start: str, end: str) -> pd.DataFrame: | |
"""Downloads daily FX rates (units of currency per 1 USD) and forward fills missing values.""" | |
fx_df = pd.DataFrame() | |
for curr in currencies: | |
ticker = FX_TICKERS.get(curr) | |
if ticker: | |
fx_series = download_data(ticker, start, end) | |
# Check if data is valid before assigning | |
if not fx_series.empty: | |
fx_df[curr] = fx_series | |
else: | |
logging.warning(f"FX Rate for {curr} ({ticker}) could not be downloaded or is empty.") | |
fx_df[curr] = np.nan # Assign NaN explicitly if download failed | |
# Add USD base rate | |
if not fx_df.empty: | |
fx_df['USD'] = 1.0 | |
# Forward fill missing FX rates - use with caution, implies rate stability | |
fx_df = fx_df.ffill() | |
else: | |
logging.error("Could not download any FX rates. FX DataFrame is empty.") | |
return fx_df | |
def get_index_data(indexes_dict: Dict[str, Dict[str, str]], start: str, end: str) -> pd.DataFrame: | |
"""Downloads daily closing prices for all specified indices.""" | |
index_df = pd.DataFrame() | |
for label, info in indexes_dict.items(): | |
ticker = info['ticker'] | |
index_series = download_data(ticker, start, end) | |
if not index_series.empty: | |
index_df[label] = index_series | |
else: | |
logging.warning(f"Index data for {label} ({ticker}) could not be downloaded or is empty.") | |
index_df[label] = np.nan # Assign NaN explicitly if download failed | |
return index_df | |
def convert_to_usd(index_df: pd.DataFrame, fx_df: pd.DataFrame, indexes_dict: Dict[str, Dict[str, str]]) -> pd.DataFrame: | |
"""Converts daily index data to USD using daily FX rates.""" | |
usd_df = pd.DataFrame(index=index_df.index) | |
# Ensure FX data covers the index data range by reindexing and forward filling again | |
fx_aligned = fx_df.reindex(index_df.index).ffill() | |
for label, info in indexes_dict.items(): | |
if label not in index_df.columns: # Skip if index data wasn't downloaded | |
logging.warning(f"Skipping USD conversion for {label} as index data is missing.") | |
continue | |
currency = info['currency'] | |
if currency == 'USD': | |
usd_df[label] = index_df[label] | |
else: | |
if currency in fx_aligned.columns and not fx_aligned[currency].isnull().all(): | |
# The FX ticker gives Currency/USD. We need Index * (USD/Currency) | |
# So we divide Index by (Currency/USD) | |
fx_rate = fx_aligned[currency] | |
usd_df[label] = index_df[label] / fx_rate | |
# Check for division by zero or NaN FX rates | |
if (fx_rate == 0).any(): | |
logging.warning(f"Zero FX rate encountered for {currency} on some dates.") | |
usd_df[label] = usd_df[label].replace([np.inf, -np.inf], np.nan) | |
else: | |
logging.warning(f"FX rate for {currency} not available or all NaN. Cannot convert {label} to USD.") | |
usd_df[label] = np.nan # Set to NaN if conversion isn't possible | |
return usd_df.dropna(axis=1, how='all') # Drop columns that are entirely NaN | |
def resample_and_normalize(daily_usd_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.Timestamp]]: | |
"""Resamples to monthly, finds common start date, and normalizes.""" | |
# 1. Resample to Month End frequency | |
monthly_df = daily_usd_df.resample('ME').last() | |
# 2. Find first date where *all* series have non-NaN data | |
first_valid_idx_per_series = monthly_df.apply(pd.Series.first_valid_index) | |
if first_valid_idx_per_series.isnull().any(): | |
logging.warning(f"Some series have no valid data after resampling: {first_valid_idx_per_series[first_valid_idx_per_series.isnull()].index.tolist()}") | |
# Drop series that are entirely NaN | |
monthly_df = monthly_df.dropna(axis=1, how='all') | |
first_valid_idx_per_series = monthly_df.apply(pd.Series.first_valid_index) | |
if monthly_df.empty: | |
logging.error("No data available after resampling and dropping empty series.") | |
return pd.DataFrame(), monthly_df, None | |
start_norm_date = first_valid_idx_per_series.max() | |
if pd.isna(start_norm_date): | |
logging.error("Could not determine a common start date for normalization.") | |
return pd.DataFrame(), monthly_df, None # Return empty normalized df | |
logging.info(f"Using {start_norm_date.strftime('%Y-%m-%d')} as normalization base date (first date with data for all series).") | |
# 3. Trim data to start from the normalization date | |
monthly_df_trimmed = monthly_df[monthly_df.index >= start_norm_date].copy() | |
if monthly_df_trimmed.empty: | |
logging.error(f"Dataframe is empty after trimming to start date {start_norm_date}.") | |
return pd.DataFrame(), monthly_df, start_norm_date # Return empty normalized df | |
# Check if start_norm_date exists in monthly_df (before trimming) to get start values | |
if start_norm_date not in monthly_df.index: | |
logging.error(f"Normalization base date {start_norm_date} not found in monthly data. Cannot extract start values.") | |
# Decide how to handle this - maybe return None for start_values_df? | |
# For now, let's proceed, but start values might be inaccurate/missing. | |
# 4. Normalize: Divide by the first row's values and multiply by 100 | |
first_row_values = monthly_df_trimmed.iloc[0] | |
if first_row_values.isnull().any(): | |
logging.warning(f"NaN values found in the first row ({start_norm_date}) used for normalization. Affected series: {first_row_values[first_row_values.isnull()].index.tolist()}") | |
norm_df = monthly_df_trimmed.div(first_row_values) * 100 | |
# Return normalized, original monthly (containing the start date), and the base date | |
return norm_df.dropna(axis=1, how='all'), monthly_df.dropna(axis=1, how='all'), start_norm_date | |
# --- Plotting Functions --- | |
def get_color_map(columns: List[str]) -> Dict[str, str]: | |
"""Generates a color map for the given columns, assigning purple to VIX.""" | |
color_map = {} | |
vix_color = 'purple' | |
base_palette = plt.get_cmap('tab10') | |
color_idx_counter = 0 | |
for col in columns: | |
if col == 'VIX (Volatility)': | |
color_map[col] = vix_color | |
else: | |
color_map[col] = base_palette(color_idx_counter % base_palette.N) | |
color_idx_counter += 1 | |
return color_map | |
def annotate_events_grid(ax, events: List[Dict[str, str]], y_data_min: float, y_data_max: float, y_axis_min: float, num_rows: int = 5, delta_x_days: int = 35): | |
"""Annotates events in a grid layout above the plot area and marks dates on X-axis.""" | |
# Filter events to be within the plot's actual end date before sorting | |
try: | |
end_date_dt = pd.to_datetime(END_DATE) | |
valid_events = [e for e in events if pd.to_datetime(e['date']) <= end_date_dt] | |
except Exception as e: | |
logging.warning(f"Could not filter events by END_DATE {END_DATE}: {e}. Using all events.") | |
valid_events = events | |
sorted_events = sorted(valid_events, key=lambda e: pd.to_datetime(e['date'])) | |
n = len(sorted_events) | |
if n == 0: | |
return | |
# --- Event Label Positioning --- | |
vertical_buffer = 0 # Base buffer above the highest data point | |
row_spread = 120 # Total vertical space the rows will occupy | |
label_region_top_y = y_data_max + vertical_buffer + row_spread | |
label_region_bottom_y = y_data_max + vertical_buffer | |
row_positions = np.linspace(label_region_top_y, label_region_bottom_y, num_rows) if num_rows > 1 else [(label_region_top_y + label_region_bottom_y) / 2] | |
xmin_num, xmax_num = ax.get_xlim() | |
plotted_event_dates = [] # To avoid plotting axis markers multiple times for same date | |
for i, ev in enumerate(sorted_events): | |
try: | |
edate = pd.to_datetime(ev['date']) | |
edate_num = mdates.date2num(edate) | |
except ValueError: | |
logging.warning(f"Invalid date format for event '{ev.get('label', 'N/A')}': {ev.get('date')}. Skipping.") | |
continue | |
# Skip events outside the plot's date range (axis limits) | |
if edate_num < xmin_num or edate_num > xmax_num: | |
continue | |
row_idx = i % num_rows | |
col_idx = i // num_rows | |
x_text_date = edate + timedelta(days=delta_x_days * col_idx) | |
x_text_num = mdates.date2num(x_text_date) | |
# Adjust label X position if it goes too far right | |
if x_text_num > xmax_num + delta_x_days: | |
x_text_num = xmax_num + delta_x_days | |
logging.debug(f"Adjusted x position for event label: {ev['label']}") | |
color = EVENT_COLORS[i % len(EVENT_COLORS)] | |
y_coord = row_positions[row_idx] | |
# --- Plotting elements --- | |
# Vertical line for the event | |
ax.axvline(edate, color=color, linestyle='--', linewidth=1, alpha=0.5, zorder=1) | |
# Connector line | |
ax.plot([edate_num, x_text_num], [y_data_max, y_coord], color=color, linewidth=0.8, alpha=0.8, zorder=1) | |
# Text label | |
ax.text(mdates.num2date(x_text_num), y_coord, f" {ev['label']} ", fontsize=8, color='white', weight='bold', | |
ha='left', va='center', bbox=dict(facecolor=color, edgecolor='none', alpha=0.9, boxstyle='round,pad=0.2'), | |
zorder=50) | |
# --- Mark event date on X-axis --- | |
# Check if we already plotted a marker for this specific date to avoid duplicates | |
if edate_num not in plotted_event_dates: | |
ax.scatter([edate_num], [y_axis_min], marker='^', color=color, s=30, # s is size | |
clip_on=False, # Make marker visible even if at axis edge | |
zorder=10, label=f"_nolegend_") # Use scatter for single points | |
plotted_event_dates.append(edate_num) | |
def plot_side_labeled_lines(ax, norm_df: pd.DataFrame, monthly_usd_df: pd.DataFrame, color_map: Dict[str, str], indexes_dict: Dict[str, Dict[str, str]]): | |
"""Plots the normalized lines and adds side labels with final USD values and tickers.""" | |
# Plot lines | |
for col in norm_df.columns: | |
c = color_map.get(col, 'black') # Use black if color not found | |
ls = '--' if col == 'VIX (Volatility)' else '-' | |
ax.plot(norm_df.index, norm_df[col], color=c, linestyle=ls, linewidth=1.3, alpha=0.9, label=f"_nolegend_{col}") # No legend needed here | |
# --- Side Labels (Final Values) --- | |
last_date = norm_df.index[-1] | |
final_info = [] | |
# Check if monthly_usd_df covers the last date of norm_df | |
if last_date not in monthly_usd_df.index: | |
logging.warning(f"Monthly USD data does not contain the last date {last_date} from normalized data. Side labels might be inaccurate.") | |
last_usd_date = monthly_usd_df.index[-1] | |
logging.warning(f"Using values from {last_usd_date} for side labels.") | |
else: | |
last_usd_date = last_date | |
for col in norm_df.columns: | |
# Ensure column exists in monthly_usd_df before trying to access loc | |
if col in monthly_usd_df.columns and last_usd_date in monthly_usd_df.index: | |
if not pd.isna(monthly_usd_df.loc[last_usd_date, col]): | |
val_norm = norm_df[col].iloc[-1] # Final normalized value | |
val_usd = monthly_usd_df.loc[last_usd_date, col] # Final USD value | |
final_info.append({'label': col, 'norm_val': val_norm, 'usd_val': val_usd, 'color': color_map.get(col, 'black')}) | |
else: | |
logging.warning(f"Final USD value for {col} on {last_usd_date} is NaN. Skipping side label.") | |
else: | |
logging.warning(f"Could not get final USD value for {col} on {last_usd_date}. Column or date missing in monthly_usd_df. Skipping side label.") | |
# Sort labels vertically based on final normalized value to minimize overlap | |
final_info.sort(key=lambda x: x['norm_val'], reverse=True) | |
# Adjust vertical positions to prevent overlap | |
label_spacing_y = 12 # Adjust as needed based on y-axis scale | |
last_label_y = np.inf | |
for item in final_info: | |
target_y = item['norm_val'] | |
# If this label would overlap the previous one, push it down | |
if target_y > last_label_y - label_spacing_y: | |
target_y = last_label_y - label_spacing_y | |
last_label_y = target_y | |
# Draw connector line | |
ax.plot([last_date, last_date + pd.Timedelta(days=30)], [item['norm_val'], target_y], | |
color=item['color'], linewidth=0.8, alpha=0.8) | |
# Add the text label with Ticker | |
ticker = indexes_dict.get(item['label'], {}).get('ticker', '') # Safely get ticker | |
label_text = f"{item['label']} ({ticker}) ${item['usd_val']:,.0f}" if ticker else f"{item['label']} ${item['usd_val']:,.0f}" | |
ax.text(last_date + pd.Timedelta(days=35), target_y, label_text, | |
color=item['color'], fontsize=8, weight='bold', va='center', ha='left') | |
def plot_start_values_block(ax, monthly_usd_df: pd.DataFrame, norm_base_date: pd.Timestamp, color_map: Dict[str, str], indexes_dict: Dict[str, Dict[str, str]]): | |
"""Adds a text block with a background, showing starting USD values.""" | |
start_info = [] | |
if norm_base_date not in monthly_usd_df.index: | |
logging.warning(f"Normalization base date {norm_base_date} not found in monthly data for start values block.") | |
return | |
for label, info in indexes_dict.items(): | |
if label in monthly_usd_df.columns: | |
start_val = monthly_usd_df.loc[norm_base_date, label] | |
if not pd.isna(start_val): | |
ticker = info.get('ticker', '') | |
color = color_map.get(label, 'black') | |
start_info.append({ | |
'label': label, | |
'ticker': ticker, | |
'value': start_val, | |
'color': color | |
}) | |
else: | |
logging.warning(f"Start value for {label} on {norm_base_date} is NaN.") | |
else: | |
logging.warning(f"Column {label} not found in monthly_usd_df for start values.") | |
start_info.sort(key=lambda x: x['label']) | |
if not start_info: # Don't draw anything if there's no data | |
return | |
# --- Positioning and Sizing --- | |
start_x = 0.01 # 2% from left edge | |
top_y = 0.80 # Start X% down from the top edge. | |
line_height = 0.028 # Vertical distance between lines | |
padding = 0.01 # Padding around text within the rectangle | |
# Estimate required dimensions based on content | |
# Height: (Number of lines + title) * line_height + padding | |
num_lines = len(start_info) + 1 # +1 for the title line | |
total_text_height = num_lines * line_height | |
rect_height = total_text_height + padding * 2 | |
# Width: Estimate based on typical label length - adjust as needed | |
rect_width_estimate = 0.18 # Percentage of axes width - TUNE THIS VALUE | |
rect_width = rect_width_estimate + padding * 2 | |
# Calculate bottom-left corner coordinates for the rectangle | |
rect_bottom_y = top_y - rect_height + padding # Adjust for va='top' and padding | |
rect_left_x = start_x - padding | |
# --- Draw Background Rectangle --- | |
rect = patches.Rectangle( | |
(rect_left_x, rect_bottom_y), # Bottom-left corner (in axes coordinates) | |
rect_width, rect_height, # Width and height | |
linewidth=0.5, # Edge line width | |
edgecolor='grey', # Edge line color | |
facecolor='white', # Background color | |
alpha=0.75, # Transparency (0=transparent, 1=opaque) | |
transform=ax.transAxes, # Use axes coordinates | |
zorder=2 # Draw behind text (text default zorder is often 3) | |
) | |
ax.add_patch(rect) | |
# --- End of Rectangle Drawing --- | |
# --- Draw Text (On Top of Rectangle) --- | |
# Place the title text block | |
title_text = f"Start Values ({norm_base_date.strftime('%b %Y')}, USD):" | |
ax.text(start_x, top_y, title_text, | |
transform=ax.transAxes, | |
fontsize=8, | |
weight='bold', | |
ha='left', | |
va='top', # Anchor text at its top | |
color='black', | |
zorder=3) # Ensure text is above rectangle | |
# Place each value line below the title | |
for i, item in enumerate(start_info): | |
y_pos = top_y - line_height * (i + 1) # Calculate Y pos for the top of this line | |
label_text = f"{item['label']} ({item['ticker']}): ${item['value']:,.0f}" | |
ax.text(start_x, y_pos, label_text, | |
transform=ax.transAxes, | |
fontsize=8, | |
weight='bold', | |
color=item['color'], # Use specific color for each line | |
ha='left', | |
va='top', # Anchor text at its top | |
zorder=3) # Ensure text is above rectangle | |
# --- End of Text Drawing --- | |
def plot_chart(norm_df: pd.DataFrame, monthly_usd_df: pd.DataFrame, norm_base_date: Optional[pd.Timestamp]): | |
"""Creates the final plot with all elements.""" | |
sns.set_style('whitegrid') | |
fig, ax = plt.subplots(figsize=(16, 9)) | |
if norm_df.empty: | |
logging.error("Normalized data is empty. Cannot generate plot.") | |
ax.text(0.5, 0.5, "Error: No data available to plot.", ha='center', va='center', fontsize=16, color='red') | |
return | |
if norm_base_date is None: | |
logging.error("Normalization base date is missing. Cannot generate plot accurately.") | |
# Handle error appropriately, maybe plot without normalization info? | |
return # Exit for now | |
# --- Generate Color Map --- | |
color_map = get_color_map(list(norm_df.columns)) | |
# Determine actual data range for annotations | |
y_data_min_actual = norm_df.min().min() | |
y_data_max_actual = norm_df.max().max() | |
# Plot the main lines and side labels (passing needed info) | |
plot_side_labeled_lines(ax, norm_df, monthly_usd_df, color_map, INDEXES) | |
# Emphasize the normalization baseline | |
ax.axhline(100, color='gray', linestyle=':', linewidth=1.5, alpha=0.8, zorder=0) | |
# Determine plot limits *after* plotting lines and baseline | |
current_ymin, current_ymax = ax.get_ylim() | |
# Use actual data min/max unless ylim expanded significantly by side labels | |
effective_ymax = max(y_data_max_actual, current_ymax) | |
# Ensure y starts near 0 if possible, else slightly below min data (but respect baseline) | |
final_ymin = max(0, min(y_data_min_actual * 0.9, 90)) # Don't clip baseline if data stays above | |
# --- Calculate required Y limits BEFORE annotating events --- | |
vertical_buffer = 60 | |
row_spread = 80 | |
required_top_y = y_data_max_actual + vertical_buffer + row_spread + 20 # Add padding | |
# Set final Y limits | |
final_ymax = max(effective_ymax, required_top_y) | |
ax.set_ylim(bottom=final_ymin, top=final_ymax) | |
# Add Start Values Block | |
plot_start_values_block(ax, monthly_usd_df, norm_base_date, color_map, INDEXES) | |
# Annotate events & Mark X-axis (pass actual data range and axis min) | |
annotate_events_grid(ax, EVENTS, y_data_min_actual, y_data_max_actual, final_ymin, num_rows=NUM_ROWS, delta_x_days=DELTA_X_DAYS) | |
# --- Final Touches & Verifiability Info --- | |
norm_base_str = norm_base_date.strftime("%b %Y") | |
title_norm_part = f"(Normalized, {norm_base_str} = 100)" | |
subtitle_norm_part = f"Normalized to first common data point: ({norm_base_str}) = 100." | |
ax.set_ylabel(f"Normalized Value", fontsize=11) | |
ax.set_title(f"Global Indices & VIX in USD {title_norm_part}", fontsize=14, weight='bold') | |
ax.xaxis.set_major_locator(mdates.YearLocator(2)) # Ticks every 2 years | |
ax.xaxis.set_minor_locator(mdates.YearLocator(1)) # Minor ticks every year | |
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y')) | |
ax.tick_params(axis='x', rotation=0, labelsize=9) | |
ax.tick_params(axis='y', labelsize=9) | |
# Add grid lines for minor ticks as well | |
ax.grid(True, which='minor', axis='x', linestyle=':', linewidth=0.5, alpha=0.7) | |
ax.grid(True, which='major', axis='x', linestyle='-', linewidth=0.6, alpha=0.8) | |
ax.grid(True, which='major', axis='y', linestyle='-', linewidth=0.6, alpha=0.8) | |
# Overall Figure Title and Subtitle for Metadata | |
run_date_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z") | |
fig.suptitle(f"Comparing Market Reactions Since {norm_base_str} (Started at 100, in USD)", fontsize=16, weight='bold', y=0.98) | |
subtitle_purpose = "Shows how markets and volatility changed around key historical world events." | |
metadata_text = ( | |
f"{subtitle_purpose}\n" | |
f"All series start at 100 for normalized comparison. Side labels show approx. start/end USD values.\n" | |
f"Source: Yahoo Finance (Monthly). Generated: {run_date_str}\n" | |
f"Note: Prices only (no dividends). VIX (purple dashed-line) tracks volatility change, not comparable value." | |
) | |
fig.text(0.5, 0.95, metadata_text, ha='center', va='top', fontsize=9, wrap=True, linespacing=1.2) | |
# (Optional) Add a comment near VIX plotting/normalization: | |
# Reminder: VIX normalization shows % change from its own base level, | |
# useful for timing/magnitude of *volatility* spikes, not direct value comparison with indices. | |
# Adjust layout | |
plt.margins(x=0.02) # Add small horizontal margins | |
ax.set_xlim(right=ax.get_xlim()[1] * 1.05) # Ensure space for side labels | |
plt.tight_layout(rect=[0.02, 0.02, 0.98, 0.92]) | |
plt.show() | |
# --- Main Execution --- | |
def main(): | |
logging.info("Starting script execution.") | |
# 1. Download FX Data | |
required_currencies = list(set(info['currency'] for info in INDEXES.values())) | |
fx_df_daily = get_fx_rates(required_currencies, START_DATE, END_DATE) | |
if fx_df_daily.empty: | |
logging.error("Fatal: Could not retrieve necessary FX data. Exiting.") | |
return | |
# 2. Download Index Data | |
index_df_daily = get_index_data(INDEXES, START_DATE, END_DATE) | |
if index_df_daily.empty: | |
logging.error("Fatal: Could not retrieve any index data. Exiting.") | |
return | |
# 3. Convert to USD (Daily) | |
usd_df_daily = convert_to_usd(index_df_daily, fx_df_daily, INDEXES) | |
if usd_df_daily.empty: | |
logging.error("Fatal: Data conversion to USD resulted in an empty DataFrame. Exiting.") | |
return | |
# 4. Resample & Normalize | |
# Returns: normalized_df, monthly_original_usd_df, normalization_base_date | |
norm_df, monthly_usd_df, norm_base_date = resample_and_normalize(usd_df_daily) | |
if norm_df.empty or monthly_usd_df.empty: | |
logging.error("Fatal: Normalization failed or resulted in empty data. Cannot plot. Exiting.") | |
return | |
# 5. Plot | |
logging.info("Data processing complete. Generating plot...") | |
plot_chart(norm_df, monthly_usd_df, norm_base_date) | |
logging.info("Script finished.") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment