Created
May 4, 2025 15:59
-
-
Save thehappycheese/90cad8c946303d38d947261746966fcf to your computer and use it in GitHub Desktop.
pre-commit hook stops .ipynb outputs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! ./python | |
import json | |
import sys | |
from pathlib import Path | |
import subprocess | |
def get_staged_files(): | |
"""Get list of files that are staged for commit.""" | |
try: | |
result = subprocess.run( | |
['git', 'diff', '--cached', '--name-only'], | |
capture_output=True, | |
text=True | |
) | |
if result.returncode != 0: | |
print(f"Error getting staged files: {result.stderr}") | |
return [] | |
staged_files = result.stdout.strip().split('\n') | |
# Filter for only .ipynb files | |
return [file for file in staged_files if file.endswith('.ipynb')] | |
except Exception as e: | |
print(f"Error getting staged files: {e}") | |
return [] | |
def check_notebook_outputs(notebook_path): | |
"""Check if a Jupyter notebook has any outputs or execution counts.""" | |
try: | |
# Read the notebook file | |
notebook_content = Path(notebook_path).read_text() | |
notebook = json.loads(notebook_content) | |
# Check if it's a valid notebook format | |
if 'cells' not in notebook: | |
return False | |
# Process each cell to check for outputs | |
for cell in notebook.get('cells', []): | |
if cell.get('cell_type') == 'code': | |
# Check execution count | |
if 'execution_count' in cell and cell['execution_count'] is not None: | |
return True | |
# Check outputs | |
if 'outputs' in cell and cell['outputs']: | |
return True | |
# No outputs found | |
return False | |
except Exception as e: | |
print(f"Error processing {notebook_path}: {e}") | |
return False | |
def main(): | |
"""Find staged .ipynb files in the repository and check for outputs.""" | |
# Get staged files first | |
staged_notebooks = get_staged_files() | |
if not staged_notebooks: | |
print("No staged notebooks found for commit.") | |
return 0 | |
notebooks_with_outputs = [] | |
# Check each staged notebook | |
for notebook_path in staged_notebooks: | |
if check_notebook_outputs(notebook_path): | |
notebooks_with_outputs.append(notebook_path) | |
# If any notebooks have outputs, abort the commit | |
if notebooks_with_outputs: | |
# Output error as one line with files separated by semicolons | |
print(f"ERROR: Please clear notebook(s) with outputs: {'; '.join(str(path) for path in notebooks_with_outputs)}") | |
return 1 | |
print("All notebooks are clean (no outputs).") | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment