Skip to content

Instantly share code, notes, and snippets.

@zonca
Created May 18, 2026 17:56
Show Gist options
  • Select an option

  • Save zonca/71774b5e3cd00480558dfc3825c6ed2b to your computer and use it in GitHub Desktop.

Select an option

Save zonca/71774b5e3cd00480558dfc3825c6ed2b to your computer and use it in GitHub Desktop.
"""
Test script for Issue #268: Do not modify the input astropy table when converting to pandas dataframe.
This script extracts all Python code blocks from episode 03-transform.md and executes them
sequentially to verify the episode content runs correctly end-to-end.
NOTE: The assert statements below (marked with "ASSERT: not part of the lesson") are added
for automated testing purposes only. They are NOT part of the lesson content.
"""
import sys
sys.path.insert(0, 'student_download')
import re
# Read the episode markdown
with open('episodes/03-transform.md', 'r') as f:
content = f.read()
# Extract all python code blocks (not output blocks)
code_blocks = []
in_python = False
in_output = False
current_block = []
for line in content.split('\n'):
if line.strip() == '```python':
in_python = True
current_block = []
continue
elif line.strip() == '```output':
in_output = True
continue
elif line.strip() == '```':
if in_python:
code_blocks.append('\n'.join(current_block))
in_python = False
in_output = False
continue
if in_python:
current_block.append(line)
print(f"Extracted {len(code_blocks)} Python code blocks from episode 03-transform.md")
# Now execute them sequentially
import pandas as pd
from astropy.table import Table
from astropy.coordinates import SkyCoord
import astropy.units as u
from gd1 import GD1Koposov10
from reflex import reflex_correct
import numpy as np
from matplotlib import pyplot as plt
plt.ioff() # Don't display plots
# Set up the data path - the episode expects 'gd1_results.fits'
import os
os.chdir('student_download/data')
# Track which blocks fail
failed = []
for i, block in enumerate(code_blocks):
# Skip Jupyter magic
if block.strip().startswith('%'):
print(f"Block {i+1}: SKIPPED (Jupyter magic)")
continue
try:
exec(block, globals())
print(f"Block {i+1}: OK")
except Exception as e:
print(f"Block {i+1}: FAILED - {type(e).__name__}: {e}")
failed.append((i+1, block, e))
# ============================================================================
# ASSERTS: not part of the lesson — added for automated verification of Issue #268
# ============================================================================
# ASSERT: not part of the lesson — all code blocks must execute
assert len(failed) == 0, f"Code blocks failed: {[(n, str(e)) for n, _, e in failed]}"
# ASSERT: not part of the lesson — polygon_results must NOT be modified
# (the core fix of Issue #268: GD-1 columns should NOT be added to the input table)
assert 'polygon_results' in globals(), "polygon_results not found in globals"
assert list(polygon_results.colnames) == ['source_id', 'ra', 'dec', 'pmra', 'pmdec', 'parallax'], \
f"polygon_results was modified! Columns: {polygon_results.colnames}"
# ASSERT: not part of the lesson — results_df must have all 10 columns
assert 'results_df' in globals(), "results_df not found in globals"
assert results_df.shape == (140339, 10), \
f"results_df shape is {results_df.shape}, expected (140339, 10)"
assert list(results_df.columns) == ['source_id', 'ra', 'dec', 'pmra', 'pmdec', 'parallax', 'phi1', 'phi2', 'pm_phi1', 'pm_phi2'], \
f"results_df columns: {list(results_df.columns)}"
# ASSERT: not part of the lesson — GD-1 columns in DataFrame must be plain floats, not Quantity
for col in ['phi1', 'phi2', 'pm_phi1', 'pm_phi2']:
assert results_df[col].dtype in [np.float64, np.float32], \
f"Column '{col}' dtype is {results_df[col].dtype}, expected float (not Quantity)"
# ASSERT: not part of the lesson — make_dataframe must not modify input table
polygon_results_fresh = Table.read('gd1_results.fits')
df_from_func = make_dataframe(polygon_results_fresh)
assert list(polygon_results_fresh.colnames) == ['source_id', 'ra', 'dec', 'pmra', 'pmdec', 'parallax'], \
f"make_dataframe modified input table! Columns: {polygon_results_fresh.colnames}"
# ASSERT: not part of the lesson — make_dataframe must return 10 columns
assert df_from_func.shape == (140339, 10), \
f"make_dataframe returned shape {df_from_func.shape}, expected (140339, 10)"
# ASSERT: not part of the lesson — values from step-by-step and make_dataframe must match
for col in ['phi1', 'phi2', 'pm_phi1', 'pm_phi2']:
assert np.allclose(results_df[col].values, df_from_func[col].values, rtol=1e-10), \
f"Column '{col}' differs between step-by-step and make_dataframe()"
# ASSERT: not part of the lesson — describe() must work (was broken with Quantity columns in pandas 1.3+)
desc = results_df.describe()
assert desc.shape == (8, 10), f"describe() returned shape {desc.shape}, expected (8, 10)"
# ============================================================================
# End of asserts
# ============================================================================
print(f"\n{'='*60}")
print(f"SUCCESS: All {len(code_blocks)} code blocks executed without errors")
print(f" (1 skipped as Jupyter magic)")
print(f"All asserts passed — Issue #268 fix verified:")
print(f" - Input table NOT modified")
print(f" - DataFrame has correct 10 columns with float dtype")
print(f" - make_dataframe() matches step-by-step results")
print(f" - describe() works (no pandas Quantity bug)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment