Skip to content

Instantly share code, notes, and snippets.

@thehappycheese
Created May 4, 2025 15:59
Show Gist options
  • Save thehappycheese/90cad8c946303d38d947261746966fcf to your computer and use it in GitHub Desktop.
Save thehappycheese/90cad8c946303d38d947261746966fcf to your computer and use it in GitHub Desktop.
pre-commit hook stops .ipynb outputs
#! ./python
import json
import sys
from pathlib import Path
import subprocess
def get_staged_files():
"""Get list of files that are staged for commit."""
try:
result = subprocess.run(
['git', 'diff', '--cached', '--name-only'],
capture_output=True,
text=True
)
if result.returncode != 0:
print(f"Error getting staged files: {result.stderr}")
return []
staged_files = result.stdout.strip().split('\n')
# Filter for only .ipynb files
return [file for file in staged_files if file.endswith('.ipynb')]
except Exception as e:
print(f"Error getting staged files: {e}")
return []
def check_notebook_outputs(notebook_path):
"""Check if a Jupyter notebook has any outputs or execution counts."""
try:
# Read the notebook file
notebook_content = Path(notebook_path).read_text()
notebook = json.loads(notebook_content)
# Check if it's a valid notebook format
if 'cells' not in notebook:
return False
# Process each cell to check for outputs
for cell in notebook.get('cells', []):
if cell.get('cell_type') == 'code':
# Check execution count
if 'execution_count' in cell and cell['execution_count'] is not None:
return True
# Check outputs
if 'outputs' in cell and cell['outputs']:
return True
# No outputs found
return False
except Exception as e:
print(f"Error processing {notebook_path}: {e}")
return False
def main():
"""Find staged .ipynb files in the repository and check for outputs."""
# Get staged files first
staged_notebooks = get_staged_files()
if not staged_notebooks:
print("No staged notebooks found for commit.")
return 0
notebooks_with_outputs = []
# Check each staged notebook
for notebook_path in staged_notebooks:
if check_notebook_outputs(notebook_path):
notebooks_with_outputs.append(notebook_path)
# If any notebooks have outputs, abort the commit
if notebooks_with_outputs:
# Output error as one line with files separated by semicolons
print(f"ERROR: Please clear notebook(s) with outputs: {'; '.join(str(path) for path in notebooks_with_outputs)}")
return 1
print("All notebooks are clean (no outputs).")
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment