Skip to content

Instantly share code, notes, and snippets.

@ircwaves
Created March 13, 2026 16:55
Show Gist options
  • Select an option

  • Save ircwaves/3e96e5d6cecbc7fad03094c996add7f8 to your computer and use it in GitHub Desktop.

Select an option

Save ircwaves/3e96e5d6cecbc7fad03094c996add7f8 to your computer and use it in GitHub Desktop.
scipy mishandles scalar variables

At the bottom of this gist is a script which demonstrates that this is a scipy bug. The issue manifests when unlimited (record) variables are utilized in the same file with scalar variables (coordinates, which are non-record aka not unlimited). In this scenario, scipy mistakenly puts all the variables into the record section of the data section, where time should end up in the non-record section. The last block of the scrip includes verification that mixed unlimited+limited variables are handled correctly in the absence of scalar variables.

The bold assertions about where these variables should be placed in the netCDF file come from my reading of both the the NetCDF Format Spec 1.2 and the draft NetCDF Format Spec 2.0, which indicates that the data section of the file is

data := non_rec rec

where non_rec is the set of limited variables, and rec is the set of unlimited variables.

(xarray:test-py311-with-typing) [icooke] ~/src/xarray (git)-[main] % python direct_scipy_scalar_example.py
======================================================================
  scipy mishandles scalars when mixed with unlimited (record) variables
======================================================================
Creating scipy_scalar_NOunlimited.nc ...
Inspect scipy_scalar_NOunlimited.nc
  +++ scipy reports:
    - Dimensions:  {'x': 5}
    - shape or <scalar>:
      * 'x':  (5,)
      * 'temperature':  (5,)
      * 'time':  <scalar>
  +++
  - netCDF4 can read it: 'time' = 123456789

Creating scipy_scalar_unlimited.nc ...
Inspect scipy_scalar_unlimited.nc
  +++ scipy reports:
    - Dimensions:  {'x': None}
    - shape or <scalar>:
      * 'x':  (5,)
      * 'temperature':  (5,)
      * 'time':  <scalar>
  +++
  - netCDF4 cannot read it: [Errno -51] NetCDF: Unknown file format: 'scipy_scalar_unlimited.nc'

Creating scipy_NOscalar_unlimited.nc ...
Inspect scipy_NOscalar_unlimited.nc
  +++ scipy reports:
    - Dimensions:  {'x': None, 'time': 5}
    - shape or <scalar>:
      * 'time':  (5,)
      * 'x':  (5,)
      * 'temperature':  (5,)
  +++
  - netCDF4 can read it: 'time' = [13 14 15 16 17]

======================================================================
Summary:
  - scipy without unlimited dims: creates valid NetCDF3 file
  - scipy WITH unlimited dims + scalars: Creates malformed file!
    (scipy can read it, but netCDF4 library rejects it)
  - scipy with mixed limited+unlimited dims w/o scalars: Creates valid NetCDF3 file
======================================================================

Script to reproduce

import contextlib
import os

import numpy as np
from scipy.io import netcdf_file
import netCDF4


def check_netcdf4(path):
    try:
        with netCDF4.Dataset(path, "r") as nc:
            print(f"  - netCDF4 can read it: 'time' = {nc.variables['time'][:]}")
    except Exception as e:
        print(f"  - netCDF4 cannot read it: {e}")


def check_scipy(path):
    print(f"Inspect {path}")
    # Read it back to verify
    with netcdf_file(path, "r") as f:
        print("  +++ scipy reports:")
        print(f"    - Dimensions:  {f.dimensions}")
        print("    - shape or <scalar>:")
        for var in f.variables:
            print(
                f"    * '{var}':  {'<scalar>' if f.variables[var].shape == () else f.variables[var].shape}"
            )
        print("  +++")


def create_with_scipy_direct(no_unlim_path):
    """Createa NetCDF file with scalar variable using scipy.io.netcdf_file directly."""
    print(f"Creating {no_unlim_path} ...")

    # Open file for writing
    with netcdf_file(no_unlim_path, "w", mmap=None, version=2) as f:
        # Create a dimension
        f.createDimension("x", 5)

        # Create a scalar variable (no dimensions)
        time_var = f.createVariable("time", "i4", ())
        time_var.data[()] = 123456789
        time_var.long_name = "Scalar time coordinate"

        # Create a 1D variable
        x_var = f.createVariable("x", "i4", ("x",))
        x_var[:] = np.arange(5)
        x_var.long_name = "X coordinate"

        # Create a data variable
        temp_var = f.createVariable("temperature", "f8", ("x",))
        temp_var[:] = np.random.randn(5)
        temp_var.units = "degC"
        temp_var.long_name = "Temperature"

        # Add global attributes
        f.description = "Example file with scalar variable WITHOUT unlimited dimensions"
        f.history = "Created with scipy.io.netcdf_file"

    check_scipy(no_unlim_path)
    check_netcdf4(no_unlim_path)


def create_with_scipy_direct_unlimited(unlim_path):
    """Create a NetCDF file with scalar variable using scipy with UNLIMITED dimension.

    This demonstrates the scipy bug.
    """
    print(f"\nCreating {unlim_path} ...")

    # Open file for writing
    with netcdf_file(unlim_path, "w", version=2) as f:
        # Create an unlimited dimension
        f.createDimension("x", None)  # None means unlimited

        # Create a scalar variable (no dimensions)
        time_var = f.createVariable("time", "i4", ())
        time_var.data[()] = 123456789
        time_var.long_name = "Scalar time coordinate"

        # Create a 1D variable using the unlimited dimension
        x_var = f.createVariable("x", "i4", ("x",))
        x_var[:] = np.arange(5)
        x_var.long_name = "X coordinate"

        # Create a data variable using the unlimited dimension
        temp_var = f.createVariable("temperature", "f8", ("x",))
        temp_var[:] = np.random.randn(5)
        temp_var.units = "degC"
        temp_var.long_name = "Temperature"

        # Add global attributes
        f.description = "Example file with scalar variable WITH unlimited dimension"
        f.history = "Created with scipy.io.netcdf_file"

    check_scipy(unlim_path)
    check_netcdf4(unlim_path)


def create_with_scipy_direct_unlimited_noscalar(noscalar_unlim_path):
    print(f"\nCreating {noscalar_unlim_path} ...")

    # Open file for writing
    with netcdf_file(noscalar_unlim_path, "w", version=2) as f:
        # Create an unlimited dimension
        f.createDimension("x", None)  # None means unlimited
        # Create a limited dimension
        f.createDimension("time", 5)

        # Create a 1D variable using the unlimited dimension
        x_var = f.createVariable("x", "i4", ("x",))
        x_var[:] = np.arange(5)
        x_var.long_name = "X coordinate"

        # Time is no longer a scalar
        time_var = f.createVariable("time", "i4", ("time",))
        time_var.data[:] = np.arange(13, 18)
        time_var.long_name = "time coordinate"

        # Create a data variable using the unlimited dimension
        temp_var = f.createVariable("temperature", "f8", ("x",))
        temp_var[:] = np.random.randn(5)
        temp_var.units = "degC"
        temp_var.long_name = "Temperature"

        # Add global attributes
        f.description = "Example file WITHOUT scalar variable WITH unlimited dimension"
        f.history = "Created with scipy.io.netcdf_file"

    check_scipy(noscalar_unlim_path)
    check_netcdf4(noscalar_unlim_path)


if __name__ == "__main__":
    print("=" * 70)
    print("  scipy mishandles scalars when mixed with unlimited (record) variables")
    print("=" * 70)

    no_unlim_path = "scipy_scalar_NOunlimited.nc"
    unlim_path = "scipy_scalar_unlimited.nc"
    noscalar_unlim_path = "scipy_NOscalar_unlimited.nc"
    for p in [unlim_path, no_unlim_path, noscalar_unlim_path]:
        with contextlib.suppress(FileNotFoundError):
            os.remove(p)

    # Example 1: Without unlimited dimensions (safe)
    create_with_scipy_direct(no_unlim_path)

    # Example 2: With unlimited dimensions (demonstrates bug)
    create_with_scipy_direct_unlimited(unlim_path)

    # Example 3: With limited and unlimited, but no scalars
    create_with_scipy_direct_unlimited_noscalar(noscalar_unlim_path)

    print("\n" + "=" * 70)
    print("Summary:")
    print("  - scipy without unlimited dims: creates valid NetCDF3 file")
    print("  - scipy WITH unlimited dims + scalars: Creates malformed file!")
    print("    (scipy can read it, but netCDF4 library rejects it)")
    print(
        "  - scipy with mixed limited+unlimited dims w/o scalars: Creates valid NetCDF3 file"
    )
    print("=" * 70)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment