At the bottom of this gist is a script which demonstrates that this is a scipy bug. The issue manifests when unlimited (record) variables are utilized in the same file with scalar variables (coordinates, which are non-record aka not unlimited). In this scenario, scipy mistakenly puts all the variables into the record section of the data section, where time should end up in the non-record section. The last block of the scrip includes verification that mixed unlimited+limited variables are handled correctly in the absence of scalar variables.
The bold assertions about where these variables should be placed in the netCDF file come from my reading of both the the NetCDF Format Spec 1.2 and the draft NetCDF Format Spec 2.0, which indicates that the data section of the file is
data := non_rec rec
where non_rec is the set of limited variables, and rec is the set of unlimited variables.
(xarray:test-py311-with-typing) [icooke] ~/src/xarray (git)-[main] % python direct_scipy_scalar_example.py
======================================================================
scipy mishandles scalars when mixed with unlimited (record) variables
======================================================================
Creating scipy_scalar_NOunlimited.nc ...
Inspect scipy_scalar_NOunlimited.nc
+++ scipy reports:
- Dimensions: {'x': 5}
- shape or <scalar>:
* 'x': (5,)
* 'temperature': (5,)
* 'time': <scalar>
+++
- netCDF4 can read it: 'time' = 123456789
Creating scipy_scalar_unlimited.nc ...
Inspect scipy_scalar_unlimited.nc
+++ scipy reports:
- Dimensions: {'x': None}
- shape or <scalar>:
* 'x': (5,)
* 'temperature': (5,)
* 'time': <scalar>
+++
- netCDF4 cannot read it: [Errno -51] NetCDF: Unknown file format: 'scipy_scalar_unlimited.nc'
Creating scipy_NOscalar_unlimited.nc ...
Inspect scipy_NOscalar_unlimited.nc
+++ scipy reports:
- Dimensions: {'x': None, 'time': 5}
- shape or <scalar>:
* 'time': (5,)
* 'x': (5,)
* 'temperature': (5,)
+++
- netCDF4 can read it: 'time' = [13 14 15 16 17]
======================================================================
Summary:
- scipy without unlimited dims: creates valid NetCDF3 file
- scipy WITH unlimited dims + scalars: Creates malformed file!
(scipy can read it, but netCDF4 library rejects it)
- scipy with mixed limited+unlimited dims w/o scalars: Creates valid NetCDF3 file
======================================================================import contextlib
import os
import numpy as np
from scipy.io import netcdf_file
import netCDF4
def check_netcdf4(path):
try:
with netCDF4.Dataset(path, "r") as nc:
print(f" - netCDF4 can read it: 'time' = {nc.variables['time'][:]}")
except Exception as e:
print(f" - netCDF4 cannot read it: {e}")
def check_scipy(path):
print(f"Inspect {path}")
# Read it back to verify
with netcdf_file(path, "r") as f:
print(" +++ scipy reports:")
print(f" - Dimensions: {f.dimensions}")
print(" - shape or <scalar>:")
for var in f.variables:
print(
f" * '{var}': {'<scalar>' if f.variables[var].shape == () else f.variables[var].shape}"
)
print(" +++")
def create_with_scipy_direct(no_unlim_path):
"""Createa NetCDF file with scalar variable using scipy.io.netcdf_file directly."""
print(f"Creating {no_unlim_path} ...")
# Open file for writing
with netcdf_file(no_unlim_path, "w", mmap=None, version=2) as f:
# Create a dimension
f.createDimension("x", 5)
# Create a scalar variable (no dimensions)
time_var = f.createVariable("time", "i4", ())
time_var.data[()] = 123456789
time_var.long_name = "Scalar time coordinate"
# Create a 1D variable
x_var = f.createVariable("x", "i4", ("x",))
x_var[:] = np.arange(5)
x_var.long_name = "X coordinate"
# Create a data variable
temp_var = f.createVariable("temperature", "f8", ("x",))
temp_var[:] = np.random.randn(5)
temp_var.units = "degC"
temp_var.long_name = "Temperature"
# Add global attributes
f.description = "Example file with scalar variable WITHOUT unlimited dimensions"
f.history = "Created with scipy.io.netcdf_file"
check_scipy(no_unlim_path)
check_netcdf4(no_unlim_path)
def create_with_scipy_direct_unlimited(unlim_path):
"""Create a NetCDF file with scalar variable using scipy with UNLIMITED dimension.
This demonstrates the scipy bug.
"""
print(f"\nCreating {unlim_path} ...")
# Open file for writing
with netcdf_file(unlim_path, "w", version=2) as f:
# Create an unlimited dimension
f.createDimension("x", None) # None means unlimited
# Create a scalar variable (no dimensions)
time_var = f.createVariable("time", "i4", ())
time_var.data[()] = 123456789
time_var.long_name = "Scalar time coordinate"
# Create a 1D variable using the unlimited dimension
x_var = f.createVariable("x", "i4", ("x",))
x_var[:] = np.arange(5)
x_var.long_name = "X coordinate"
# Create a data variable using the unlimited dimension
temp_var = f.createVariable("temperature", "f8", ("x",))
temp_var[:] = np.random.randn(5)
temp_var.units = "degC"
temp_var.long_name = "Temperature"
# Add global attributes
f.description = "Example file with scalar variable WITH unlimited dimension"
f.history = "Created with scipy.io.netcdf_file"
check_scipy(unlim_path)
check_netcdf4(unlim_path)
def create_with_scipy_direct_unlimited_noscalar(noscalar_unlim_path):
print(f"\nCreating {noscalar_unlim_path} ...")
# Open file for writing
with netcdf_file(noscalar_unlim_path, "w", version=2) as f:
# Create an unlimited dimension
f.createDimension("x", None) # None means unlimited
# Create a limited dimension
f.createDimension("time", 5)
# Create a 1D variable using the unlimited dimension
x_var = f.createVariable("x", "i4", ("x",))
x_var[:] = np.arange(5)
x_var.long_name = "X coordinate"
# Time is no longer a scalar
time_var = f.createVariable("time", "i4", ("time",))
time_var.data[:] = np.arange(13, 18)
time_var.long_name = "time coordinate"
# Create a data variable using the unlimited dimension
temp_var = f.createVariable("temperature", "f8", ("x",))
temp_var[:] = np.random.randn(5)
temp_var.units = "degC"
temp_var.long_name = "Temperature"
# Add global attributes
f.description = "Example file WITHOUT scalar variable WITH unlimited dimension"
f.history = "Created with scipy.io.netcdf_file"
check_scipy(noscalar_unlim_path)
check_netcdf4(noscalar_unlim_path)
if __name__ == "__main__":
print("=" * 70)
print(" scipy mishandles scalars when mixed with unlimited (record) variables")
print("=" * 70)
no_unlim_path = "scipy_scalar_NOunlimited.nc"
unlim_path = "scipy_scalar_unlimited.nc"
noscalar_unlim_path = "scipy_NOscalar_unlimited.nc"
for p in [unlim_path, no_unlim_path, noscalar_unlim_path]:
with contextlib.suppress(FileNotFoundError):
os.remove(p)
# Example 1: Without unlimited dimensions (safe)
create_with_scipy_direct(no_unlim_path)
# Example 2: With unlimited dimensions (demonstrates bug)
create_with_scipy_direct_unlimited(unlim_path)
# Example 3: With limited and unlimited, but no scalars
create_with_scipy_direct_unlimited_noscalar(noscalar_unlim_path)
print("\n" + "=" * 70)
print("Summary:")
print(" - scipy without unlimited dims: creates valid NetCDF3 file")
print(" - scipy WITH unlimited dims + scalars: Creates malformed file!")
print(" (scipy can read it, but netCDF4 library rejects it)")
print(
" - scipy with mixed limited+unlimited dims w/o scalars: Creates valid NetCDF3 file"
)
print("=" * 70)