NetCDF Class #

The NetCDF class extends Dataset for structured (regular grid) NetCDF files. It wraps GDAL's Multidimensional API to provide variable access, time dimension handling, and CF-compliant metadata.

Uses the GDAL Multidimensional API (groups, arrays, dimensions) when the file was opened with open_as_multi_dimensional=True. Falls back to the classic NETCDF_DIM_* parser (dimensions.py) when opened in classic mode (no root group available).

Cached on first access. Invalidated by add_variable/remove_variable.

Returns:

Type	Description
`NetCDFMetadata`	NetCDFMetadata

`dimension_names` `property` #

Names of all dimensions in the root group (e.g., ["x", "y", "time"]).

Returns:

Type	Description
`list[str] \| None`	list[str] or None: Dimension names, or None if no root group
`list[str] \| None`	is available (classic mode).

`group_names` `property` #

Names of sub-groups in the root group.

Returns:

Type	Description
`list[str]`	list[str]: Sub-group names (e.g. `["forecast", "analysis"]`).
`list[str]`	Empty list if no sub-groups exist or the dataset is in
`list[str]`	classic mode.

`is_subset` `property` #

Whether this object represents a single-variable subset.

Returns:

Name	Type	Description
`bool`	`bool`	True if the dataset is a variable subset extracted via `get_variable()`.

`is_md_array` `property` #

Whether this dataset was opened in multidimensional mode.

Returns:

Name	Type	Description
`bool`		True if the dataset was opened via `gdal.OF_MULTIDIM_RASTER` and supports groups, MDArrays, and dimensions.

`global_attributes` `property` #

Global attributes from the root group.

Returns a live dict read from the GDAL root group each time. For MDIM mode, reads from the root group's attributes. For classic mode, reads from GDAL's GetMetadata().

Returns:

Type	Description
`dict[str, Any]`	dict[str, Any]: Key-value mapping of global attributes.

`init(src, access='read_only', open_as_multi_dimensional=True)` #

Initialize a NetCDF dataset wrapper.

Parameters:

Name	Type	Description	Default
`src`	`Dataset`	A GDAL dataset handle (either classic or multidimensional).	required
`access`	`str`	Access mode, either `"read_only"` or `"write"`. Defaults to `"read_only"`.	`'read_only'`
`open_as_multi_dimensional`	`bool`	If True the dataset was opened with `gdal.OF_MULTIDIM_RASTER` and supports groups, MDArrays, and dimensions. If False it was opened in classic raster mode (subdatasets, bands). Defaults to True.	`True`

Source code in src/pyramids/netcdf/netcdf.py

def __init__(
    self,
    src: gdal.Dataset,
    access: str = "read_only",
    open_as_multi_dimensional: bool = True,
):
    """Initialize a NetCDF dataset wrapper.

    Args:
        src: A GDAL dataset handle (either classic or multidimensional).
        access: Access mode, either ``"read_only"`` or ``"write"``.
            Defaults to ``"read_only"``.
        open_as_multi_dimensional: If True the dataset was opened with
            ``gdal.OF_MULTIDIM_RASTER`` and supports groups, MDArrays,
            and dimensions.  If False it was opened in classic raster
            mode (subdatasets, bands). Defaults to True.
    """
    super().__init__(src, access=access)
    # set the is_subset to false before retrieving the variables
    if open_as_multi_dimensional:
        self._is_md_array = True
        self._is_subset = False
    else:
        self._is_md_array = False
        self._is_subset = False
    # Caches (invalidated by _replace_raster, add_variable, remove_variable)
    self._cached_variables: dict[str, NetCDF] | None = None
    self._cached_meta_data: NetCDFMetadata | None = None
    # Origin-tracking attributes set by get_variable (RT-4)
    self._parent_nc: NetCDF | None = None
    self._source_var_name: str | None = None
    self._gdal_md_arr_ref: Any = None
    self._gdal_rg_ref: Any = None
    self._md_array_dims: list[str] = []
    self._band_dim_name: str | None = None
    self._band_dim_values: list[Any] | None = None
    self._variable_attrs: dict[str, Any] = {}
    self._scale: float | None = None
    self._offset: float | None = None

`str()` #

Return a human-readable summary of the NetCDF dataset.

Source code in src/pyramids/netcdf/netcdf.py

def __str__(self):
    """Return a human-readable summary of the NetCDF dataset."""
    message = f"""
        Cell size: {self.cell_size}
        Dimension: {self.rows} * {self.columns}
        EPSG: {self.epsg}
        projection: {self.crs}
        Variables: {self.variable_names}
        Metadata: {self.meta_data}
        File: {self.file_name}
    """
    return message

`repr()` #

repr.

Source code in src/pyramids/netcdf/netcdf.py

def __repr__(self):
    """__repr__."""
    return super().__repr__()

`plot(band=None, **kwargs)` #

Plot a band of the dataset.

Blocked on root MDIM containers — extract a variable first.

Raises:

Type	Description
`ValueError`	If called on a root MDIM container.

Source code in src/pyramids/netcdf/netcdf.py

def plot(self, band=None, **kwargs):
    """Plot a band of the dataset.

    Blocked on root MDIM containers — extract a variable first.

    Raises:
        ValueError: If called on a root MDIM container.
    """
    self._check_not_container("plot")
    return super().plot(band=band, **kwargs)

`read_array(band=None, window=None, unpack=False)` #

Read array from the dataset.

Parameters:

Name	Type	Description	Default
`band`	`int \| None`	Band index to read, or None for all bands.	`None`
`window`	`list[int] \| None`	Spatial window to read.	`None`
`unpack`	`bool`	If True and the variable has CF `scale_factor` and/or `add_offset`, apply the transformation `real = raw * scale + offset`. Defaults to False.	`False`

Returns:

Type	Description
`ndarray`	np.ndarray: The array data, optionally unpacked.

Raises:

Type	Description
`ValueError`	If called on a root MDIM container.

Source code in src/pyramids/netcdf/netcdf.py

def read_array(
    self,
    band: int | None = None,
    window: list[int] | None = None,
    unpack: bool = False,
) -> np.ndarray:
    """Read array from the dataset.

    Args:
        band: Band index to read, or None for all bands.
        window: Spatial window to read.
        unpack: If True and the variable has CF ``scale_factor``
            and/or ``add_offset``, apply the transformation
            ``real = raw * scale + offset``. Defaults to False.

    Returns:
        np.ndarray: The array data, optionally unpacked.

    Raises:
        ValueError: If called on a root MDIM container.
    """
    self._check_not_container("read_array")
    result = super().read_array(band=band, window=window)
    if unpack:
        scale = getattr(self, "_scale", None)
        offset = getattr(self, "_offset", None)
        if scale is not None or offset is not None:
            result = result.astype(np.float64)
            if scale is not None:
                result = result * scale
            if offset is not None:
                result = result + offset
    return result

`crop(mask, touch=True)` #

Crop the dataset using a polygon or raster mask.

On a root MDIM container this crops every variable and returns a new in-memory NetCDF container with the cropped results. On a variable subset it delegates to the parent Dataset.crop() and wraps the result as NetCDF to preserve variable metadata (_band_dim_name, _band_dim_values, sel(), etc.).

Parameters:

Name	Type	Description	Default
`mask`	`Any`	GeoDataFrame with polygon geometry, or a Dataset to use as a spatial mask.	required
`touch`	`bool`	If True, include cells that touch the mask boundary. Defaults to True.	`True`

Returns:

Name	Type	Description
`NetCDF`	`'NetCDF'`	Cropped container or variable subset.

Source code in src/pyramids/netcdf/netcdf.py

def crop(self, mask: Any, touch: bool = True) -> "NetCDF":
    """Crop the dataset using a polygon or raster mask.

    On a **root MDIM container** this crops every variable and
    returns a new in-memory NetCDF container with the cropped
    results.  On a **variable subset** it delegates to the
    parent ``Dataset.crop()`` and wraps the result as ``NetCDF``
    to preserve variable metadata (``_band_dim_name``,
    ``_band_dim_values``, ``sel()``, etc.).

    Args:
        mask: GeoDataFrame with polygon geometry, or a Dataset
            to use as a spatial mask.
        touch: If True, include cells that touch the mask
            boundary. Defaults to True.

    Returns:
        NetCDF: Cropped container or variable subset.
    """
    if self._is_md_array and not self._is_subset and self.band_count == 0:
        result = self._apply_to_all_variables(
            "crop", {"mask": mask, "touch": touch},
        )
    else:
        result = super().crop(mask=mask, touch=touch)
        result = self._preserve_netcdf_metadata(result)
    return result

`to_crs(to_epsg, method='nearest neighbor', maintain_alignment=False)` #

Reproject the dataset to a different CRS.

On a root MDIM container this reprojects every variable and returns a new container. On a variable subset it delegates to Dataset.to_crs() and wraps the result as NetCDF to preserve variable metadata.

Parameters:

Name	Type	Description	Default
`to_epsg`	`int`	Target EPSG code (e.g., 4326, 32637).	required
`method`	`str`	Resampling method. Defaults to `"nearest neighbor"`.	`'nearest neighbor'`
`maintain_alignment`	`bool`	If True, keep the same number of rows and columns. Defaults to False.	`False`

Returns:

Name	Type	Description
`NetCDF`	`'NetCDF'`	Reprojected container or variable subset.

Source code in src/pyramids/netcdf/netcdf.py

def to_crs(
    self,
    to_epsg: int,
    method: str = "nearest neighbor",
    maintain_alignment: bool = False,
) -> "NetCDF":
    """Reproject the dataset to a different CRS.

    On a **root MDIM container** this reprojects every variable
    and returns a new container. On a **variable subset** it
    delegates to ``Dataset.to_crs()`` and wraps the result as
    ``NetCDF`` to preserve variable metadata.

    Args:
        to_epsg: Target EPSG code (e.g., 4326, 32637).
        method: Resampling method. Defaults to ``"nearest neighbor"``.
        maintain_alignment: If True, keep the same number of rows
            and columns. Defaults to False.

    Returns:
        NetCDF: Reprojected container or variable subset.
    """
    if self._is_md_array and not self._is_subset and self.band_count == 0:
        result = self._apply_to_all_variables(
            "to_crs",
            {"to_epsg": to_epsg, "method": method,
             "maintain_alignment": maintain_alignment},
        )
    else:
        result = super().to_crs(
            to_epsg=to_epsg,
            method=method,
            maintain_alignment=maintain_alignment,
        )
        result = self._preserve_netcdf_metadata(result)
    return result

`resample(cell_size, method='nearest neighbor')` #

Resample the dataset to a different cell size.

On a root MDIM container this resamples every variable and returns a new container. On a variable subset it delegates to Dataset.resample() and wraps the result as NetCDF to preserve variable metadata.

Parameters:

Name	Type	Description	Default
`cell_size`	`float`	New cell size.	required
`method`	`str`	Resampling method. Defaults to `"nearest neighbor"`.	`'nearest neighbor'`

Returns:

Name	Type	Description
`NetCDF`	`'NetCDF'`	Resampled container or variable subset.

Source code in src/pyramids/netcdf/netcdf.py

def resample(
    self,
    cell_size: float,
    method: str = "nearest neighbor",
) -> "NetCDF":
    """Resample the dataset to a different cell size.

    On a **root MDIM container** this resamples every variable
    and returns a new container. On a **variable subset** it
    delegates to ``Dataset.resample()`` and wraps the result as
    ``NetCDF`` to preserve variable metadata.

    Args:
        cell_size: New cell size.
        method: Resampling method. Defaults to ``"nearest neighbor"``.

    Returns:
        NetCDF: Resampled container or variable subset.
    """
    if self._is_md_array and not self._is_subset and self.band_count == 0:
        result = self._apply_to_all_variables(
            "resample",
            {"cell_size": cell_size, "method": method},
        )
    else:
        result = super().resample(
            cell_size=cell_size, method=method,
        )
        result = self._preserve_netcdf_metadata(result)
    return result

`sel(**kwargs)` #

Select a subset of bands by coordinate values.

Extracts bands whose coordinate values match the given criteria. Works on variable subsets that have _band_dim_name and _band_dim_values set by get_variable().

The result is always a NetCDF instance with the same variable metadata preserved, so that sel() can be chained and NetCDF-specific methods like read_array(unpack=True) remain available.

Parameters:

Name	Type	Description	Default
`**kwargs`	`Any`	One keyword argument where the key is the dimension name and the value is one of: A single number: select one band by exact value. A list of numbers: select multiple bands. A `slice(start, stop)`: select bands where `start <= coord <= stop`.	`{}`

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	A new NetCDF variable subset with only the selected bands and all variable metadata preserved.

Raises:

Type	Description
`ValueError`	If the dimension name doesn't match `_band_dim_name`, or no matching bands are found.

Examples:

Select a single time step::

var.sel(time=6)

Select multiple time steps::

var.sel(time=[0, 12, 24])

Select a range::

var.sel(time=slice(6, 18))

Source code in src/pyramids/netcdf/netcdf.py

def sel(self, **kwargs: Any) -> NetCDF:
    """Select a subset of bands by coordinate values.

    Extracts bands whose coordinate values match the given
    criteria.  Works on variable subsets that have
    ``_band_dim_name`` and ``_band_dim_values`` set by
    ``get_variable()``.

    The result is always a ``NetCDF`` instance with the same
    variable metadata preserved, so that ``sel()`` can be
    chained and NetCDF-specific methods like
    ``read_array(unpack=True)`` remain available.

    Args:
        **kwargs: One keyword argument where the key is the
            dimension name and the value is one of:

            - A single number: select one band by exact value.
            - A list of numbers: select multiple bands.
            - A ``slice(start, stop)``: select bands where
              ``start <= coord <= stop``.

    Returns:
        NetCDF: A new NetCDF variable subset with only the
            selected bands and all variable metadata preserved.

    Raises:
        ValueError: If the dimension name doesn't match
            ``_band_dim_name``, or no matching bands are found.

    Examples:
        Select a single time step::

            var.sel(time=6)

        Select multiple time steps::

            var.sel(time=[0, 12, 24])

        Select a range::

            var.sel(time=slice(6, 18))
    """
    if len(kwargs) != 1:
        raise ValueError("sel() requires exactly one keyword argument.")

    dim_name, selector = next(iter(kwargs.items()))

    if self._band_dim_name is None:
        raise ValueError(
            "sel() requires a variable with a non-spatial dimension. "
            "This variable has no band dimension tracked."
        )
    if dim_name != self._band_dim_name:
        raise ValueError(
            f"Dimension '{dim_name}' does not match the band "
            f"dimension '{self._band_dim_name}'."
        )
    if self._band_dim_values is None:
        raise ValueError(
            "No coordinate values available for dimension "
            f"'{dim_name}'."
        )

    coords = self._band_dim_values

    if isinstance(selector, slice):
        start = selector.start if selector.start is not None else coords[0]
        stop = selector.stop if selector.stop is not None else coords[-1]
        band_indices = [
            i for i, v in enumerate(coords) if start <= v <= stop
        ]
    elif isinstance(selector, list):
        coord_set = set(selector)
        band_indices = [
            i for i, v in enumerate(coords) if v in coord_set
        ]
    else:
        band_indices = [
            i for i, v in enumerate(coords) if v == selector
        ]

    if not band_indices:
        raise ValueError(
            f"No bands match {dim_name}={selector}. "
            f"Available values: {coords}"
        )

    selected_coords = [coords[i] for i in band_indices]

    # Read only the selected bands instead of loading the full array.
    # Each band index maps to a 1-based GDAL band in the classic
    # dataset view created by get_variable().
    #
    # Trade-off: band-by-band reads avoid loading the entire variable
    # into memory, which matters for large variables with few selected
    # bands.  However, when *most* bands are selected the per-band
    # GDAL overhead may be slower than a single full read followed by
    # NumPy slicing.  In practice the difference is small because GDAL
    # MEM driver reads are cheap; revisit if profiling shows a
    # bottleneck for large on-disk NetCDFs.
    band_arrays = [
        self.read_array(band=i) for i in band_indices
    ]
    if len(band_arrays) == 1:
        selected = band_arrays[0]
    else:
        selected = np.stack(band_arrays, axis=0)

    ndv = self.no_data_value
    ndv_scalar = ndv[0] if isinstance(ndv, list) and ndv else ndv
    ds_result = Dataset.create_from_array(
        selected, geo=self.geotransform, epsg=self.epsg,
        no_data_value=ndv_scalar,
    )
    result = self._preserve_netcdf_metadata(ds_result)
    result._band_dim_values = selected_coords

    return result

`read_file(path, read_only=True, open_as_multi_dimensional=True)` `classmethod` #

Open a NetCDF file from disk.

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Path to the `.nc` file.	required
`read_only`	`bool`	If True, open in read-only mode. Set to False for write access. Defaults to True.	`True`
`open_as_multi_dimensional`	`bool`	If True, open with `gdal.OF_MULTIDIM_RASTER` to access the full group / dimension / variable hierarchy. If False, open in classic raster mode where each variable is a subdataset. Defaults to True.	`True`

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	The opened dataset.

Source code in src/pyramids/netcdf/netcdf.py

@classmethod
def read_file(  # type: ignore[override]
    cls,
    path: str | Path,
    read_only: bool = True,
    open_as_multi_dimensional: bool = True,
) -> NetCDF:
    """Open a NetCDF file from disk.

    Args:
        path: Path to the ``.nc`` file.
        read_only: If True, open in read-only mode. Set to False for
            write access. Defaults to True.
        open_as_multi_dimensional: If True, open with
            ``gdal.OF_MULTIDIM_RASTER`` to access the full group /
            dimension / variable hierarchy.  If False, open in classic
            raster mode where each variable is a subdataset.
            Defaults to True.

    Returns:
        NetCDF: The opened dataset.
    """
    src = _io.read_file(path, read_only, open_as_multi_dimensional)
    if read_only:
        read_only = "read_only"
    else:
        read_only = "write"
    return cls(
        src, access=read_only, open_as_multi_dimensional=open_as_multi_dimensional
    )

`get_all_metadata(open_options=None)` #

Get full MDIM metadata (uncached).

Unlike meta_data (which is cached), this always re-traverses the GDAL multidimensional structure.

Parameters:

Name	Type	Description	Default
`open_options`	`dict \| None`	Driver-specific open options forwarded to `get_metadata()`. Defaults to None.	`None`

Returns:

Type	Description
`NetCDFMetadata`	NetCDFMetadata

Source code in src/pyramids/netcdf/netcdf.py

def get_all_metadata(self, open_options: dict | None = None) -> NetCDFMetadata:
    """Get full MDIM metadata (uncached).

    Unlike ``meta_data`` (which is cached), this always re-traverses
    the GDAL multidimensional structure.

    Args:
        open_options: Driver-specific open options forwarded to
            ``get_metadata()``. Defaults to None.

    Returns:
        NetCDFMetadata
    """
    result = get_metadata(self._raster, open_options)
    return result

`get_time_variable(var_name='time', time_format='%Y-%m-%d')` #

Parse the time coordinate variable into formatted date strings.

Reads the units attribute (e.g., "days since 1979-01-01") from the dimension metadata and converts raw numeric values to human-readable date strings.

Parameters:

Name	Type	Description	Default
`var_name`	`str`	Name of the time dimension / variable. Defaults to `"time"`.	`'time'`
`time_format`	`str`	strftime format for the output strings. Defaults to `"%Y-%m-%d"`.	`'%Y-%m-%d'`

Returns:

Type	Description
`list[str] \| None`	list[str] or None: Formatted time strings, or None if the
`list[str] \| None`	time dimension is not found or lacks a `units` attribute.

Source code in src/pyramids/netcdf/netcdf.py

def get_time_variable(
    self, var_name: str = "time", time_format: str = "%Y-%m-%d"
) -> list[str] | None:
    """Parse the time coordinate variable into formatted date strings.

    Reads the ``units`` attribute (e.g., ``"days since 1979-01-01"``)
    from the dimension metadata and converts raw numeric values to
    human-readable date strings.

    Args:
        var_name: Name of the time dimension / variable.
            Defaults to ``"time"``.
        time_format: strftime format for the output strings.
            Defaults to ``"%Y-%m-%d"``.

    Returns:
        list[str] or None: Formatted time strings, or None if the
        time dimension is not found or lacks a ``units`` attribute.
    """
    time_stamp = None
    time_dim = self.meta_data.get_dimension(var_name)
    if time_dim is not None:
        units = time_dim.attrs.get("units")
        if units is not None:
            calendar = time_dim.attrs.get("calendar", "standard")
            time_vals = self._read_variable(var_name)
            if time_vals is not None:
                func = create_time_conversion_func(
                    units, time_format, calendar=calendar
                )
                time_stamp = list(map(func, time_vals.reshape(-1)))
    return time_stamp

`get_group(group_name)` #

Open a sub-group as a NetCDF container.

The returned object wraps the sub-group's GDAL dataset and exposes the sub-group's variables and dimensions via the same API as the root container.

Parameters:

Name	Type	Description	Default
`group_name`	`str`	Name of the sub-group. Supports nested paths separated by `/` (e.g. `"forecast/surface"`).	required

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	A container backed by the sub-group.

Raises:

Type	Description
`ValueError`	If the group doesn't exist or the dataset has no root group.

Source code in src/pyramids/netcdf/netcdf.py

def get_group(self, group_name: str) -> NetCDF:
    """Open a sub-group as a NetCDF container.

    The returned object wraps the sub-group's GDAL dataset and
    exposes the sub-group's variables and dimensions via the
    same API as the root container.

    Args:
        group_name: Name of the sub-group. Supports nested paths
            separated by ``/`` (e.g. ``"forecast/surface"``).

    Returns:
        NetCDF: A container backed by the sub-group.

    Raises:
        ValueError: If the group doesn't exist or the dataset
            has no root group.
    """
    rg = self._raster.GetRootGroup()
    if rg is None:
        raise ValueError(
            "get_group requires a multidimensional container."
        )

    # Navigate nested paths: "forecast/surface" → open each level
    group = rg
    parts = group_name.split("/")
    for part in parts:
        try:
            group = group.OpenGroup(part)
        except Exception:
            group = None
        if group is None:
            raise ValueError(
                f"Group '{group_name}' not found. "
                f"Available groups: {self.group_names}"
            )

    # Create a multidimensional dataset from the sub-group.
    # GDAL doesn't have a direct "group → dataset" conversion,
    # so we build a MEM MDIM dataset and copy the group's
    # arrays and dimensions into it.
    dst = gdal.GetDriverByName("MEM").CreateMultiDimensional("group")
    dst_rg = dst.GetRootGroup()
    dtype = gdal.ExtendedDataType.Create(gdal.GDT_Float64)

    # Copy dimensions from the sub-group
    dim_map = {}
    for gdal_dim in (group.GetDimensions() or []):
        dim_name = gdal_dim.GetName()
        new_dim = dst_rg.CreateDimension(
            dim_name, gdal_dim.GetType(), None, gdal_dim.GetSize()
        )
        iv = gdal_dim.GetIndexingVariable()
        if iv is not None:
            coord_arr = dst_rg.CreateMDArray(
                dim_name, [new_dim],
                gdal.ExtendedDataType.Create(
                    numpy_to_gdal_dtype(iv.ReadAsArray())
                ),
            )
            coord_arr.Write(iv.ReadAsArray())
            new_dim.SetIndexingVariable(coord_arr)
        dim_map[dim_name] = new_dim

    # Copy arrays from the sub-group
    for arr_name in (group.GetMDArrayNames() or []):
        md_arr = group.OpenMDArray(arr_name)
        if md_arr is None:
            continue
        arr_dims = md_arr.GetDimensions()
        # Map source dims to destination dims (by name)
        new_dims = []
        for d in arr_dims:
            d_name = d.GetName()
            if d_name in dim_map:
                new_dims.append(dim_map[d_name])
            else:
                # Dimension from parent group — create locally
                new_d = dst_rg.CreateDimension(
                    d_name, d.GetType(), None, d.GetSize()
                )
                dim_map[d_name] = new_d
                new_dims.append(new_d)
        arr_data = md_arr.ReadAsArray()
        arr_dtype = gdal.ExtendedDataType.Create(
            numpy_to_gdal_dtype(arr_data)
        )
        new_arr = dst_rg.CreateMDArray(arr_name, new_dims, arr_dtype)
        new_arr.Write(arr_data)
        ndv = md_arr.GetNoDataValue()
        if ndv is not None:
            new_arr.SetNoDataValueDouble(ndv)
        srs = md_arr.GetSpatialRef()
        if srs is not None:
            new_arr.SetSpatialRef(srs)

    result = NetCDF(dst)
    return result

`get_variable_names()` #

Return names of data variables, excluding dimension coordinates.

Uses CF classification when metadata is cached (fast path). Otherwise queries GetMDArrayNames() and filters out dimension arrays and 0-dimensional scalar variables (grid_mapping etc.). In classic mode, parses subdataset metadata.

Returns:

Type	Description
`list[str]`	list[str]: Variable names (e.g., `["temperature", "precipitation"]`).

Source code in src/pyramids/netcdf/netcdf.py

def get_variable_names(self) -> list[str]:
    """Return names of data variables, excluding dimension coordinates.

    Uses CF classification when metadata is cached (fast path).
    Otherwise queries ``GetMDArrayNames()`` and filters out dimension
    arrays and 0-dimensional scalar variables (grid_mapping etc.).
    In classic mode, parses subdataset metadata.

    Returns:
        list[str]: Variable names (e.g., ``["temperature", "precipitation"]``).
    """
    if (
        self._cached_meta_data is not None
        and self._cached_meta_data.cf is not None
    ):
        variable_names = list(self._cached_meta_data.cf.data_variable_names)
    else:
        rg = self._raster.GetRootGroup()
        if rg is not None:
            all_names = rg.GetMDArrayNames()
            dim_names = {dim.GetName() for dim in rg.GetDimensions()}
            filtered = []
            for var in all_names:
                if var in dim_names:
                    continue
                md_arr = rg.OpenMDArray(var)
                if md_arr is not None and len(md_arr.GetDimensions()) == 0:
                    continue
                filtered.append(var)
            variable_names = filtered
        else:
            variable_names = [
                var[1].split(" ")[1]
                for var in self._raster.GetSubDatasets()
            ]

    return variable_names

`get_variable(variable_name)` #

Extract a single variable as a classic-raster NetCDF object.

The returned object carries origin metadata so that modified data can be written back via set_variable().

Supports group-qualified names: "forecast/temperature" first navigates to the forecast sub-group, then extracts temperature from it.

Parameters:

Name	Type	Description	Default
`variable_name`	`str`	Name of the variable to extract. Use `/` to separate group path from variable name.	required

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	A subset backed by a classic dataset where non-spatial dimensions are mapped to bands.

Raises:

Type	Description
`ValueError`	If `variable_name` is not present in the dataset.

Source code in src/pyramids/netcdf/netcdf.py

def get_variable(self, variable_name: str) -> NetCDF:
    """Extract a single variable as a classic-raster NetCDF object.

    The returned object carries origin metadata so that modified data
    can be written back via ``set_variable()``.

    Supports group-qualified names: ``"forecast/temperature"`` first
    navigates to the ``forecast`` sub-group, then extracts
    ``temperature`` from it.

    Args:
        variable_name: Name of the variable to extract. Use ``/``
            to separate group path from variable name.

    Returns:
        NetCDF: A subset backed by a classic dataset where
            non-spatial dimensions are mapped to bands.

    Raises:
        ValueError: If ``variable_name`` is not present in the dataset.
    """
    # Handle group-qualified names: "forecast/temperature"
    if "/" in variable_name:
        parts = variable_name.rsplit("/", 1)
        group_nc = self.get_group(parts[0])
        cube = group_nc.get_variable(parts[1])
        return cube  # single return below handles non-group path

    if variable_name not in self.variable_names:
        raise ValueError(
            f"{variable_name} is not a valid variable name in {self.variable_names}"
        )

    prefix = self.driver_type.upper()
    rg = self._raster.GetRootGroup()
    md_arr_ref = None
    rg_ref = None

    if prefix == "MEMORY" or rg is not None:
        src, md_arr_ref, rg_ref = self._read_md_array(variable_name)
        if isinstance(src, gdal.Dataset):
            cube = NetCDF(src)
            cube._is_md_array = True
            # _read_md_array uses GetView to flip the data lazily,
            # and GDAL usually corrects the geotransform.  But when
            # the Y dimension has no indexing variable (e.g. WRF
            # "south_north"), the geotransform may still be wrong.
            # Fix it on the wrapper object (no data copy).
            gt = cube._geotransform
            if gt[5] > 0:
                cube._geotransform = (
                    gt[0],
                    gt[1],
                    gt[2],
                    gt[3] + gt[5] * cube._rows,
                    gt[4],
                    -gt[5],
                )
                cube._cell_size = abs(gt[1])
        else:
            cube = src
        # Keep GDAL SWIG references alive — AsClassicDataset returns a
        # view whose C++ backing is owned by the MDArray/root group.
        # Without these the view becomes a dangling pointer on Windows.
        cube._gdal_md_arr_ref = md_arr_ref
        cube._gdal_rg_ref = rg_ref
    else:
        src = gdal.Open(f"{prefix}:{self.file_name}:{variable_name}")
        if src is None:
            raise ValueError(
                f"Could not open variable '{variable_name}' via "
                f"'{prefix}:{self.file_name}:{variable_name}'"
            )
        cube = NetCDF(src)
        cube._is_md_array = False

    cube._is_subset = True

    # --- RT-4: Track variable origin for round-trip ---
    cube._parent_nc = self
    cube._source_var_name = variable_name

    md_arr = md_arr_ref if rg is not None else None
    if rg is not None:
        if md_arr is not None:
            dims = md_arr.GetDimensions()
            cube._md_array_dims = [d.GetName() for d in dims]

            # Identify which dimension became bands (all except X/Y)
            if len(dims) > 2:
                spatial_indices = {len(dims) - 1, len(dims) - 2}
                band_dims = [
                    d for i, d in enumerate(dims) if i not in spatial_indices
                ]
                if len(band_dims) == 1:
                    cube._band_dim_name = band_dims[0].GetName()
                    iv = band_dims[0].GetIndexingVariable()
                    try:
                        cube._band_dim_values = (
                            iv.ReadAsArray().tolist() if iv is not None else None
                        )
                    except RuntimeError:
                        # String-typed indexing variables (e.g. WRF
                        # "Times") can't be read via ReadAsArray in
                        # GDAL SWIG bindings — fall back to indices.
                        cube._band_dim_values = list(range(band_dims[0].GetSize()))
                else:
                    cube._band_dim_name = None
                    cube._band_dim_values = None
            else:
                cube._band_dim_name = None
                cube._band_dim_values = None

            # Copy variable attributes
            cube._variable_attrs = {}
            try:
                for attr in md_arr.GetAttributes():
                    cube._variable_attrs[attr.GetName()] = attr.Read()
            except Exception:
                pass  # nosec B110

            # Scale/offset for CF packed data
            try:
                cube._scale = md_arr.GetScale()
                cube._offset = md_arr.GetOffset()
            except Exception:
                cube._scale = None
                cube._offset = None
        else:
            cube._md_array_dims = []
            cube._band_dim_name = None
            cube._band_dim_values = None
            cube._variable_attrs = {}
            cube._scale = None
            cube._offset = None
    else:
        cube._md_array_dims = []
        cube._band_dim_name = None
        cube._band_dim_values = None
        cube._variable_attrs = {}
        cube._scale = None
        cube._offset = None

    return cube

`to_file(path, **kwargs)` #

Save the dataset to disk.

For .nc / .nc4 files the full multidimensional structure (groups, dimensions, variables, attributes) is preserved via CreateCopy with the netCDF driver. For other extensions (e.g. .tif), the parent Dataset.to_file is used — but only on variable subsets, not on root MDIM containers.

Parameters:

Name	Type	Description	Default
`path`	`str \| Path`	Destination file path. The extension determines the output driver (`.nc` -> netCDF, `.tif` -> GeoTIFF, etc.).	required
`**kwargs`	`Any`	Forwarded to `Dataset.to_file` for non-NetCDF extensions (e.g. `tile_length`, `creation_options`).	`{}`

Raises:

Type	Description
`RuntimeError`	If the netCDF `CreateCopy` call fails.
`ValueError`	If a root MDIM container is saved to a non-NC extension (use `.nc` or extract a variable first).

Source code in src/pyramids/netcdf/netcdf.py

def to_file(  # type: ignore[override]
    self,
    path: str | Path,
    **kwargs: Any,
) -> None:
    """Save the dataset to disk.

    For ``.nc`` / ``.nc4`` files the full multidimensional structure
    (groups, dimensions, variables, attributes) is preserved via
    ``CreateCopy`` with the netCDF driver.  For other extensions
    (e.g. ``.tif``), the parent ``Dataset.to_file`` is used — but only
    on variable subsets, not on root MDIM containers.

    Args:
        path: Destination file path. The extension determines the
            output driver (``.nc`` -> netCDF, ``.tif`` -> GeoTIFF, etc.).
        **kwargs: Forwarded to ``Dataset.to_file`` for non-NetCDF
            extensions (e.g. ``tile_length``, ``creation_options``).

    Raises:
        RuntimeError: If the netCDF ``CreateCopy`` call fails.
        ValueError: If a root MDIM container is saved to a non-NC
            extension (use ``.nc`` or extract a variable first).
    """
    path = Path(path)
    extension = path.suffix[1:].lower()
    if extension in ("nc", "nc4"):
        dst = gdal.GetDriverByName("netCDF").CreateCopy(str(path), self._raster, 0)
        if dst is None:
            raise RuntimeError(f"Failed to save NetCDF to {path}")
        dst.FlushCache()
        dst = None
    else:
        if self._is_md_array and not self._is_subset:
            raise ValueError(
                "Cannot save a multidimensional NetCDF container as "
                f"'{extension}'. Use .nc extension or extract a "
                "variable first with .get_variable()."
            )
        super().to_file(path, **kwargs)

`copy(path=None)` #

Create a deep copy of this NetCDF dataset.

Parameters:

Name	Type	Description	Default
`path`	`str \| Path \| None`	Destination file path. If None, the copy is created in memory using the MEM driver. Defaults to None.	`None`

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	A new NetCDF object with copied data.

Raises:

Type	Description
`RuntimeError`	If `CreateCopy` fails.

Source code in src/pyramids/netcdf/netcdf.py

def copy(self, path: str | Path | None = None) -> NetCDF:
    """Create a deep copy of this NetCDF dataset.

    Args:
        path: Destination file path. If None, the copy is created
            in memory using the MEM driver. Defaults to None.

    Returns:
        NetCDF: A new NetCDF object with copied data.

    Raises:
        RuntimeError: If ``CreateCopy`` fails.
    """
    if path is None:
        path = ""
        driver = "MEM"
    else:
        driver = "netCDF"

    src = gdal.GetDriverByName(driver).CreateCopy(str(path), self._raster)
    if src is None:
        raise RuntimeError(f"Failed to copy NetCDF dataset to '{path}'")
    return NetCDF(src, access="write")

`create_main_dimension(group, dim_name, dtype, values)` `staticmethod` #

Create a NetCDF dimension with an indexing variable.

The dimension type is inferred from dim_name: y/lat/latitude -> horizontal Y, x/lon/longitude -> horizontal X, bands/time -> temporal.

The dimension is registered in the group together with a matching MDArray that stores the coordinate values.

Parameters:

Name	Type	Description	Default
`group`	`Group`	Root group (or sub-group) of the multidimensional dataset.	required
`dim_name`	`str`	Name of the dimension to create.	required
`dtype`	`int`	GDAL `ExtendedDataType` for the indexing variable.	required
`values`	`ndarray`	Coordinate values for the dimension.	required

Returns:

Type	Description
`Dimension`	gdal.Dimension: The newly created dimension.

Source code in src/pyramids/netcdf/netcdf.py

@staticmethod
def create_main_dimension(
    group: gdal.Group, dim_name: str, dtype: int, values: np.ndarray
) -> gdal.Dimension:
    """Create a NetCDF dimension with an indexing variable.

    The dimension type is inferred from ``dim_name``:
    ``y``/``lat``/``latitude`` -> horizontal Y,
    ``x``/``lon``/``longitude`` -> horizontal X,
    ``bands``/``time`` -> temporal.

    The dimension is registered in the group together with a
    matching MDArray that stores the coordinate values.

    Args:
        group: Root group (or sub-group) of the multidimensional
            dataset.
        dim_name: Name of the dimension to create.
        dtype: GDAL ``ExtendedDataType`` for the indexing variable.
        values: Coordinate values for the dimension.

    Returns:
        gdal.Dimension: The newly created dimension.
    """
    if dim_name in ["y", "lat", "latitude"]:
        dim_type = gdal.DIM_TYPE_HORIZONTAL_Y
    elif dim_name in ["x", "lon", "longitude"]:
        dim_type = gdal.DIM_TYPE_HORIZONTAL_X
    elif dim_name in ["bands", "time"]:
        dim_type = gdal.DIM_TYPE_TEMPORAL
    else:
        dim_type = None
    dim = group.CreateDimension(dim_name, dim_type, None, values.shape[0])
    x_values = group.CreateMDArray(dim_name, [dim], dtype)
    x_values.Write(values)
    dim.SetIndexingVariable(x_values)
    return dim

`create_from_array(arr, geo=None, epsg=4326, no_data_value=DEFAULT_NO_DATA_VALUE, path=None, variable_name=None, extra_dim_name='time', extra_dim_values=None, top_left_corner=None, cell_size=None, chunk_sizes=None, compression=None, compression_level=None, title=None, institution=None, source=None, history=None)` `classmethod` #

Create a NetCDF dataset from a NumPy array and geotransform.

For 3-D arrays the first axis is treated as a non-spatial dimension (time, level, depth, etc.) whose name and coordinate values are controlled by extra_dim_name and extra_dim_values.

The driver is inferred from path: if path is None the dataset is created in memory (MEM driver); if a path is provided the netCDF driver writes to disk.

Parameters:

Name	Type	Description	Default
`arr`	`ndarray`	2-D `(rows, cols)` or 3-D `(extra_dim, rows, cols)` NumPy array.	required
`geo`	`tuple[float, float, float, float, float, float] \| None`	Geotransform tuple `(x_min, pixel_size, rotation, y_max, rotation, pixel_size)`.	`None`
`epsg`	`str \| int`	EPSG code for the spatial reference. Defaults to 4326.	`4326`
`no_data_value`	`Any \| list`	Sentinel value for cells outside the domain. Defaults to DEFAULT_NO_DATA_VALUE.	`DEFAULT_NO_DATA_VALUE`
`path`	`str \| Path \| None`	Output file path. If `None`, the dataset is created in memory. Defaults to None.	`None`
`variable_name`	`str \| None`	Name of the data variable in the NetCDF file. Defaults to `"data"`.	`None`
`extra_dim_name`	`str`	Name of the non-spatial dimension for 3-D arrays (e.g. `"time"`, `"level"`, `"depth"`). Ignored for 2-D arrays. Defaults to `"time"`.	`'time'`
`extra_dim_values`	`list \| None`	Coordinate values for the non-spatial dimension. Must have length `arr.shape[0]` for 3-D arrays. Defaults to `[0, 1, 2, ..., N-1]`.	`None`
`top_left_corner`	`tuple[float, float] \| None`	`(x, y)` of the top-left corner. Used with `cell_size` to build `geo` when `geo` is not provided. Defaults to None.	`None`
`cell_size`	`int \| float \| None`	Pixel size. Used with `top_left_corner` to build `geo`. Defaults to None.	`None`
`chunk_sizes`	`tuple \| list \| None`	Chunk sizes for the data variable as a tuple matching the array dimensions (e.g. `(1, 256, 256)` for 3-D). Only effective when writing to disk. Defaults to None (GDAL default chunking).	`None`
`compression`	`str \| None`	Compression algorithm name (`"DEFLATE"`, `"ZSTD"`, etc.). Only effective when writing to disk. Defaults to None (no compression).	`None`
`compression_level`	`int \| None`	Compression level (e.g. 1-9 for DEFLATE). Defaults to None (GDAL default).	`None`
`title`	`str \| None`	CF global attribute `title`. Short description of the dataset. Defaults to None.	`None`
`institution`	`str \| None`	CF global attribute `institution`. Where the data was produced. Defaults to None.	`None`
`source`	`str \| None`	CF global attribute `source`. How the data was produced. Defaults to None.	`None`
`history`	`str \| None`	CF global attribute `history`. Audit trail of processing steps. Defaults to None.	`None`

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	The newly created NetCDF dataset.

Source code in src/pyramids/netcdf/netcdf.py

@classmethod
def create_from_array(  # type: ignore[override]
    cls,
    arr: np.ndarray,
    geo: tuple[float, float, float, float, float, float] | None = None,
    epsg: str | int = 4326,
    no_data_value: Any | list = DEFAULT_NO_DATA_VALUE,
    path: str | Path | None = None,
    variable_name: str | None = None,
    extra_dim_name: str = "time",
    extra_dim_values: list | None = None,
    top_left_corner: tuple[float, float] | None = None,
    cell_size: int | float | None = None,
    chunk_sizes: tuple | list | None = None,
    compression: str | None = None,
    compression_level: int | None = None,
    title: str | None = None,
    institution: str | None = None,
    source: str | None = None,
    history: str | None = None,
) -> NetCDF:
    """Create a NetCDF dataset from a NumPy array and geotransform.

    For 3-D arrays the first axis is treated as a non-spatial
    dimension (time, level, depth, etc.) whose name and coordinate
    values are controlled by ``extra_dim_name`` and
    ``extra_dim_values``.

    The driver is inferred from ``path``: if ``path`` is ``None``
    the dataset is created in memory (MEM driver); if a path is
    provided the netCDF driver writes to disk.

    Args:
        arr: 2-D ``(rows, cols)`` or 3-D
            ``(extra_dim, rows, cols)`` NumPy array.
        geo: Geotransform tuple ``(x_min, pixel_size, rotation,
            y_max, rotation, pixel_size)``.
        epsg: EPSG code for the spatial reference.
            Defaults to 4326.
        no_data_value: Sentinel value for cells outside the
            domain. Defaults to DEFAULT_NO_DATA_VALUE.
        path: Output file path. If ``None``, the dataset is
            created in memory. Defaults to None.
        variable_name: Name of the data variable in the NetCDF
            file. Defaults to ``"data"``.
        extra_dim_name: Name of the non-spatial dimension for 3-D
            arrays (e.g. ``"time"``, ``"level"``, ``"depth"``).
            Ignored for 2-D arrays. Defaults to ``"time"``.
        extra_dim_values: Coordinate values for the non-spatial
            dimension. Must have length ``arr.shape[0]`` for 3-D
            arrays. Defaults to ``[0, 1, 2, ..., N-1]``.
        top_left_corner: ``(x, y)`` of the top-left corner. Used
            with ``cell_size`` to build ``geo`` when ``geo`` is
            not provided. Defaults to None.
        cell_size: Pixel size. Used with ``top_left_corner`` to
            build ``geo``. Defaults to None.
        chunk_sizes: Chunk sizes for the data variable as a tuple
            matching the array dimensions (e.g. ``(1, 256, 256)``
            for 3-D). Only effective when writing to disk.
            Defaults to None (GDAL default chunking).
        compression: Compression algorithm name (``"DEFLATE"``,
            ``"ZSTD"``, etc.). Only effective when writing to
            disk. Defaults to None (no compression).
        compression_level: Compression level (e.g. 1-9 for
            DEFLATE). Defaults to None (GDAL default).
        title: CF global attribute ``title``. Short
            description of the dataset. Defaults to None.
        institution: CF global attribute ``institution``.
            Where the data was produced. Defaults to None.
        source: CF global attribute ``source``. How the
            data was produced. Defaults to None.
        history: CF global attribute ``history``. Audit
            trail of processing steps. Defaults to None.

    Returns:
        NetCDF: The newly created NetCDF dataset.
    """
    if geo is None and top_left_corner is not None and cell_size is not None:
        geo = (
            top_left_corner[0],
            cell_size,
            0,
            top_left_corner[1],
            0,
            -cell_size,
        )
    if geo is None:
        raise ValueError(
            "Either 'geo' or both 'top_left_corner' and "
            "'cell_size' must be provided."
        )

    if arr.ndim == 2:
        rows = int(arr.shape[0])
        cols = int(arr.shape[1])
    else:
        rows = int(arr.shape[1])
        cols = int(arr.shape[2])

    if extra_dim_values is None and arr.ndim == 3:
        extra_dim_values = list(range(arr.shape[0]))

    if arr.ndim == 3:
        DimMetaData(
            name=extra_dim_name,
            size=arr.shape[0],
            values=extra_dim_values,
        )

    if variable_name is None:
        variable_name = "data"

    dst_ds = cls._create_netcdf_from_array(
        arr,
        variable_name,
        cols,
        rows,
        extra_dim_name,
        extra_dim_values,
        geo,
        epsg,
        no_data_value,
        path=path,
        chunk_sizes=chunk_sizes,
        compression=compression,
        compression_level=compression_level,
        title=title,
        institution=institution,
        source=source,
        history=history,
    )
    result = cls(dst_ds)

    return result

`set_global_attribute(name, value)` #

Set a global attribute on the root group.

Creates or updates a single attribute on the root group.

Parameters:

Name	Type	Description	Default
`name`	`str`	Attribute name (e.g. `"history"`, `"Conventions"`).	required
`value`	`Any`	Attribute value. Supports str, int, float.	required

Raises:

Type	Description
`ValueError`	If the dataset has no root group (not opened in MDIM mode).

Source code in src/pyramids/netcdf/netcdf.py

def set_global_attribute(self, name: str, value: Any):
    """Set a global attribute on the root group.

    Creates or updates a single attribute on the root group.

    Args:
        name: Attribute name (e.g. ``"history"``,
            ``"Conventions"``).
        value: Attribute value. Supports str, int, float.

    Raises:
        ValueError: If the dataset has no root group
            (not opened in MDIM mode).
    """
    rg = self._raster.GetRootGroup()
    if rg is None:
        raise ValueError(
            "set_global_attribute requires a multidimensional "
            "container. Open the file with "
            "open_as_multi_dimensional=True."
        )
    # Delete existing attribute if present (GDAL raises on duplicate)
    try:
        rg.DeleteAttribute(name)
    except Exception:
        pass
    if isinstance(value, str):
        attr = rg.CreateAttribute(
            name, [], gdal.ExtendedDataType.CreateString()
        )
    elif isinstance(value, float):
        attr = rg.CreateAttribute(
            name, [], gdal.ExtendedDataType.Create(gdal.GDT_Float64)
        )
    elif isinstance(value, int):
        attr = rg.CreateAttribute(
            name, [], gdal.ExtendedDataType.Create(gdal.GDT_Int32)
        )
    else:
        attr = rg.CreateAttribute(
            name, [], gdal.ExtendedDataType.CreateString()
        )
        value = str(value)
    attr.Write(value)
    self._invalidate_caches()

`delete_global_attribute(name)` #

Delete a global attribute from the root group.

If the attribute does not exist, the call is silently ignored.

Parameters:

Name	Type	Description	Default
`name`	`str`	Attribute name to delete.	required

Raises:

Type	Description
`ValueError`	If the dataset has no root group.

Source code in src/pyramids/netcdf/netcdf.py

def delete_global_attribute(self, name: str):
    """Delete a global attribute from the root group.

    If the attribute does not exist, the call is silently ignored.

    Args:
        name: Attribute name to delete.

    Raises:
        ValueError: If the dataset has no root group.
    """
    rg = self._raster.GetRootGroup()
    if rg is None:
        raise ValueError(
            "delete_global_attribute requires a multidimensional "
            "container."
        )
    try:
        rg.DeleteAttribute(name)
    except Exception:
        pass  # attribute may not exist — silently ignored
    self._invalidate_caches()

`set_variable(variable_name, dataset, band_dim_name=None, band_dim_values=None, attrs=None)` #

Write a classic Dataset back as an MDArray variable in this container.

This is the reverse of get_variable(). After performing GIS operations (crop, reproject, etc.) on a variable subset, use this method to store the result back into the NetCDF container.

Parameters:

Name	Type	Description	Default
`variable_name`	`str`	Name for the variable in this container. If a variable with this name already exists it is replaced.	required
`dataset`	`Dataset`	A classic raster dataset, typically the result of a GIS operation on a variable obtained via `get_variable()`.	required
`band_dim_name`	`str \| None`	Name of the dimension that maps to bands (e.g. `"time"`, `"bands"`). Auto-detected from the dataset's `_band_dim_name` attribute when available. Defaults to None.	`None`
`band_dim_values`	`list \| None`	Coordinate values for the band dimension. Auto-detected from `_band_dim_values` when available. Defaults to None.	`None`
`attrs`	`dict \| None`	Variable attributes to set (e.g. `{"units": "K"}`). Auto-detected from `_variable_attrs` when available. Defaults to None.	`None`

Raises:

Type	Description
`ValueError`	If called on a dataset without a root group (not opened in multidimensional mode).

Source code in src/pyramids/netcdf/netcdf.py

def set_variable(
    self,
    variable_name: str,
    dataset: Dataset,
    band_dim_name: str | None = None,
    band_dim_values: list | None = None,
    attrs: dict | None = None,
):
    """Write a classic Dataset back as an MDArray variable in this container.

    This is the reverse of ``get_variable()``.  After performing GIS
    operations (crop, reproject, etc.) on a variable subset, use this
    method to store the result back into the NetCDF container.

    Args:
        variable_name: Name for the variable in this container.  If a
            variable with this name already exists it is replaced.
        dataset: A classic raster dataset, typically the result of a
            GIS operation on a variable obtained via ``get_variable()``.
        band_dim_name: Name of the dimension that maps to bands
            (e.g. ``"time"``, ``"bands"``).  Auto-detected from the
            dataset's ``_band_dim_name`` attribute when available.
            Defaults to None.
        band_dim_values: Coordinate values for the band dimension.
            Auto-detected from ``_band_dim_values`` when available.
            Defaults to None.
        attrs: Variable attributes to set (e.g. ``{"units": "K"}``).
            Auto-detected from ``_variable_attrs`` when available.
            Defaults to None.

    Raises:
        ValueError: If called on a dataset without a root group
            (not opened in multidimensional mode).
    """
    rg = self._raster.GetRootGroup()
    if rg is None:
        raise ValueError(
            "set_variable requires a multidimensional container. "
            "Open the file with open_as_multi_dimensional=True."
        )

    # Auto-detect from tracked origin metadata (RT-4)
    if band_dim_name is None and hasattr(dataset, "_band_dim_name"):
        band_dim_name = dataset._band_dim_name
    if band_dim_values is None and hasattr(dataset, "_band_dim_values"):
        band_dim_values = dataset._band_dim_values
    if attrs is None and hasattr(dataset, "_variable_attrs"):
        attrs = dataset._variable_attrs

    # Delete existing variable if present
    if variable_name in self.variable_names:
        rg.DeleteMDArray(variable_name)

    # Read data from the classic dataset
    arr = dataset.read_array()
    gt: tuple[float, float, float, float, float, float] = dataset.geotransform
    data_dtype = gdal.ExtendedDataType.Create(numpy_to_gdal_dtype(arr))
    # Coordinate dimensions must always be float64 to avoid truncation
    # when the data array is integer (e.g., classified rasters).
    coord_dtype = gdal.ExtendedDataType.Create(gdal.GDT_Float64)

    # Build spatial dimensions from the geotransform
    x_values = np.array(
        NetCDF.get_x_lon_dimension_array(gt[0], gt[1], dataset.columns)
    )
    y_values = np.array(
        NetCDF.get_y_lat_dimension_array(gt[3], abs(gt[5]), dataset.rows)
    )
    dim_x = self._get_or_create_dimension(
        rg, "x", x_values, coord_dtype, gdal.DIM_TYPE_HORIZONTAL_X
    )
    dim_y = self._get_or_create_dimension(
        rg, "y", y_values, coord_dtype, gdal.DIM_TYPE_HORIZONTAL_Y
    )

    # Build band dimension if the data is 3D
    if arr.ndim == 3:
        if band_dim_name is None:
            band_dim_name = "bands"
        if band_dim_values is None:
            band_dim_values = list(range(arr.shape[0]))
        dim_band = self._get_or_create_dimension(
            rg,
            band_dim_name,
            np.array(band_dim_values, dtype=np.float64),
            coord_dtype,
            gdal.DIM_TYPE_TEMPORAL,
        )
        md_arr = rg.CreateMDArray(
            variable_name, [dim_band, dim_y, dim_x], data_dtype
        )
    else:
        md_arr = rg.CreateMDArray(variable_name, [dim_y, dim_x], data_dtype)

    # Write array data
    md_arr.Write(arr)

    # Set spatial reference (RT-7: attribute copying)
    if dataset.epsg:
        srs = Dataset._create_sr_from_epsg(dataset.epsg)
        md_arr.SetSpatialRef(srs)

    # Set no-data value
    if dataset.no_data_value and dataset.no_data_value[0] is not None:
        try:
            md_arr.SetNoDataValueDouble(float(dataset.no_data_value[0]))
        except Exception:
            pass  # nosec B110

    # Set variable attributes (RT-7)
    if attrs:
        write_attributes_to_md_array(md_arr, attrs)

    self._invalidate_caches()

`crop_variable(variable_name, mask, touch=True)` #

Crop a single variable and store the result back.

Convenience method that combines get_variable → crop → set_variable in one call.

Parameters:

Name	Type	Description	Default
`variable_name`	`str`	Name of the variable to crop.	required
`mask`	`Any`	GeoDataFrame with polygon geometry, or a Dataset to use as a spatial mask.	required
`touch`	`bool`	If True, include cells touching the mask boundary. Defaults to True.	`True`

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	This container (modified in-place).

Source code in src/pyramids/netcdf/netcdf.py

def crop_variable(
    self, variable_name: str, mask: Any, touch: bool = True
) -> NetCDF:
    """Crop a single variable and store the result back.

    Convenience method that combines ``get_variable`` → ``crop``
    → ``set_variable`` in one call.

    Args:
        variable_name: Name of the variable to crop.
        mask: GeoDataFrame with polygon geometry, or a Dataset
            to use as a spatial mask.
        touch: If True, include cells touching the mask boundary.
            Defaults to True.

    Returns:
        NetCDF: This container (modified in-place).
    """
    var = self.get_variable(variable_name)
    cropped = var.crop(mask, touch=touch)
    self.set_variable(variable_name, cropped)
    return self

`reproject_variable(variable_name, to_epsg, method='nearest neighbor')` #

Reproject a single variable and store the result back.

Convenience method that combines get_variable → to_crs → set_variable in one call.

Parameters:

Name	Type	Description	Default
`variable_name`	`str`	Name of the variable to reproject.	required
`to_epsg`	`int`	Target EPSG code (e.g. 4326, 32637).	required
`method`	`str`	Resampling method. Defaults to `"nearest neighbor"`.	`'nearest neighbor'`

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	This container (modified in-place).

Source code in src/pyramids/netcdf/netcdf.py

def reproject_variable(
    self, variable_name: str, to_epsg: int, method: str = "nearest neighbor"
) -> NetCDF:
    """Reproject a single variable and store the result back.

    Convenience method that combines ``get_variable`` → ``to_crs``
    → ``set_variable`` in one call.

    Args:
        variable_name: Name of the variable to reproject.
        to_epsg: Target EPSG code (e.g. 4326, 32637).
        method: Resampling method. Defaults to
            ``"nearest neighbor"``.

    Returns:
        NetCDF: This container (modified in-place).
    """
    var = self.get_variable(variable_name)
    reprojected = var.to_crs(to_epsg, method=method)
    # to_crs returns a VRT-backed dataset — materialize it into
    # a MEM dataset so the data survives after the VRT source
    # (the variable subset) is garbage collected.
    arr = reprojected.read_array()
    no_data_value = reprojected.no_data_value
    ndv_scalar = no_data_value[0] if isinstance(no_data_value, list) and no_data_value else no_data_value
    materialized = Dataset.create_from_array(
        arr, geo=reprojected.geotransform, epsg=reprojected.epsg,
        no_data_value=ndv_scalar,
    )
    materialized._band_dim_name = var._band_dim_name
    materialized._band_dim_values = var._band_dim_values
    materialized._variable_attrs = var._variable_attrs
    self.set_variable(variable_name, materialized)
    return self

`resample_variable(variable_name, cell_size, method='nearest neighbor')` #

Resample a single variable and store the result back.

Convenience method that combines get_variable → resample → set_variable in one call.

Parameters:

Name	Type	Description	Default
`variable_name`	`str`	Name of the variable to resample.	required
`cell_size`	`int \| float`	New cell size.	required
`method`	`str`	Resampling method. Defaults to `"nearest neighbor"`.	`'nearest neighbor'`

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	This container (modified in-place).

Source code in src/pyramids/netcdf/netcdf.py

def resample_variable(
    self, variable_name: str, cell_size: int | float,
    method: str = "nearest neighbor"
) -> NetCDF:
    """Resample a single variable and store the result back.

    Convenience method that combines ``get_variable`` → ``resample``
    → ``set_variable`` in one call.

    Args:
        variable_name: Name of the variable to resample.
        cell_size: New cell size.
        method: Resampling method. Defaults to
            ``"nearest neighbor"``.

    Returns:
        NetCDF: This container (modified in-place).
    """
    var = self.get_variable(variable_name)
    resampled = var.resample(cell_size, method=method)
    self.set_variable(variable_name, resampled)
    return self

`add_variable(dataset, variable_name=None)` #

Copy MDArray variables from another NetCDF into this container.

Parameters:

Name	Type	Description	Default
`dataset`	`Dataset \| NetCDF`	Source NetCDF dataset whose variables will be copied. Must have a root group (opened in MDIM mode).	required
`variable_name`	`str \| None`	Specific variable name(s) to copy. If None, all variables from the source are copied. If a variable with the same name already exists, it is renamed with a `"-new"` suffix.	`None`

Source code in src/pyramids/netcdf/netcdf.py

def add_variable(self, dataset: Dataset | NetCDF, variable_name: str | None = None):
    """Copy MDArray variables from another NetCDF into this container.

    Args:
        dataset: Source NetCDF dataset whose variables will be copied.
            Must have a root group (opened in MDIM mode).
        variable_name: Specific variable name(s) to copy. If None, all
            variables from the source are copied. If a variable with
            the same name already exists, it is renamed with a
            ``"-new"`` suffix.
    """
    src_rg = self._raster.GetRootGroup()
    var_rg = dataset._raster.GetRootGroup()
    names_to_copy: list[str]
    if variable_name is not None:
        names_to_copy = [variable_name]
    elif isinstance(dataset, NetCDF):
        names_to_copy = dataset.variable_names
    else:
        names_to_copy = []

    for var in names_to_copy:
        md_arr = var_rg.OpenMDArray(var)
        # If the variable name already exists in the destination dataset,
        # use a suffixed name to avoid overwriting the original.
        target_name = f"{var}-new" if var in self.variable_names else var
        self._add_md_array_to_group(src_rg, target_name, md_arr)
    self._invalidate_caches()

`remove_variable(variable_name)` #

Delete a variable from this container.

If the dataset is backed by a file on disk, a MEM copy is made first so that the on-disk file is not modified. The internal raster reference is replaced with the modified copy.

Parameters:

Name	Type	Description	Default
`variable_name`	`str`	Name of the variable to remove.	required

Source code in src/pyramids/netcdf/netcdf.py

def remove_variable(self, variable_name: str):
    """Delete a variable from this container.

    If the dataset is backed by a file on disk, a MEM copy is made first
    so that the on-disk file is not modified.  The internal raster
    reference is replaced with the modified copy.

    Args:
        variable_name: Name of the variable to remove.
    """
    if self.driver_type == "memory":
        dst = self._raster
    else:
        dst = gdal.GetDriverByName("MEM").CreateCopy("", self._raster, 0)

    rg = dst.GetRootGroup()
    rg.DeleteMDArray(variable_name)

    self._replace_raster(dst)

`rename_variable(old_name, new_name)` #

Rename a variable in this container.

Internally extracts the variable data and metadata, creates a new variable with the new name, and removes the old one.

Parameters:

Name	Type	Description	Default
`old_name`	`str`	Current name of the variable.	required
`new_name`	`str`	Desired new name.	required

Raises:

Type	Description
`ValueError`	If `old_name` doesn't exist or `new_name` already exists.

Source code in src/pyramids/netcdf/netcdf.py

def rename_variable(self, old_name: str, new_name: str):
    """Rename a variable in this container.

    Internally extracts the variable data and metadata, creates
    a new variable with the new name, and removes the old one.

    Args:
        old_name: Current name of the variable.
        new_name: Desired new name.

    Raises:
        ValueError: If ``old_name`` doesn't exist or ``new_name``
            already exists.
    """
    if old_name not in self.variable_names:
        raise ValueError(
            f"Variable '{old_name}' not found. "
            f"Available: {self.variable_names}"
        )
    if new_name in self.variable_names:
        raise ValueError(
            f"Variable '{new_name}' already exists."
        )

    rg = self._raster.GetRootGroup()
    if rg is None:
        raise ValueError(
            "rename_variable requires a multidimensional container."
        )

    md_arr = rg.OpenMDArray(old_name)
    self._add_md_array_to_group(rg, new_name, md_arr)
    rg.DeleteMDArray(old_name)
    self._invalidate_caches()

`to_xarray()` #

Convert this NetCDF container to an xarray.Dataset.

Builds an in-memory xarray.Dataset that mirrors the variables, coordinates, dimensions, and global attributes of this pyramids NetCDF container.

For file-backed containers the conversion delegates to xr.open_dataset(self.file_name) which lets xarray use its own optimised NetCDF reader.

For in-memory containers (MEM driver, no file on disk) the method reads each variable via the MDIM API, constructs coordinate arrays from the dimension indexing variables, and assembles them into an xr.Dataset manually.

Requires the optional xarray package. Install it with::

pip install xarray

Returns:

Type	Description
`Any`	xarray.Dataset: An xarray Dataset with the same variables, coordinates, and global attributes.

Raises:

Type	Description
`OptionalPackageDoesNotExist`	If `xarray` is not installed.

Examples:

Convert a pyramids NetCDF to xarray::

nc = NetCDF.read_file("temperature.nc")
ds = nc.to_xarray()
print(ds)

Source code in src/pyramids/netcdf/netcdf.py

def to_xarray(self) -> Any:
    """Convert this NetCDF container to an ``xarray.Dataset``.

    Builds an in-memory ``xarray.Dataset`` that mirrors the
    variables, coordinates, dimensions, and global attributes
    of this pyramids NetCDF container.

    For **file-backed** containers the conversion delegates to
    ``xr.open_dataset(self.file_name)`` which lets xarray use
    its own optimised NetCDF reader.

    For **in-memory** containers (MEM driver, no file on disk)
    the method reads each variable via the MDIM API, constructs
    coordinate arrays from the dimension indexing variables, and
    assembles them into an ``xr.Dataset`` manually.

    Requires the optional ``xarray`` package.  Install it with::

        pip install xarray

    Returns:
        xarray.Dataset: An xarray Dataset with the same
            variables, coordinates, and global attributes.

    Raises:
        pyramids.base._errors.OptionalPackageDoesNotExist:
            If ``xarray`` is not installed.

    Examples:
        Convert a pyramids NetCDF to xarray::

            nc = NetCDF.read_file("temperature.nc")
            ds = nc.to_xarray()
            print(ds)
    """
    try:
        import xarray as xr
    except ImportError:
        raise OptionalPackageDoesNotExist(
            "xarray is required for to_xarray(). "
            "Install it with: pip install xarray"
        )

    file_path = self.file_name
    is_file_backed = (
        file_path
        and not file_path.startswith("/vsimem/")
        and Path(file_path).exists()
    )

    if is_file_backed:
        result = xr.open_dataset(file_path)
    else:
        rg = self._raster.GetRootGroup()
        if rg is None:
            raise ValueError(
                "to_xarray requires a multidimensional container. "
                "Open the file with open_as_multi_dimensional=True."
            )

        coords: dict[str, Any] = {}
        dims = rg.GetDimensions() or []
        for d in dims:
            dim_name = d.GetName()
            iv = d.GetIndexingVariable()
            if iv is not None:
                coords[dim_name] = ([dim_name], iv.ReadAsArray())

        data_vars: dict[str, Any] = {}
        for var_name in self.variable_names:
            md_arr = rg.OpenMDArray(var_name)
            if md_arr is None:
                continue
            arr_dims = md_arr.GetDimensions() or []
            arr_dim_names = [ad.GetName() for ad in arr_dims]
            arr_data = md_arr.ReadAsArray()
            var_attrs: dict[str, Any] = {}
            try:
                for attr in md_arr.GetAttributes():
                    var_attrs[attr.GetName()] = attr.Read()
            except Exception:
                pass
            data_vars[var_name] = (arr_dim_names, arr_data, var_attrs)

        global_attrs = self.global_attributes
        result = xr.Dataset(
            data_vars=data_vars,
            coords=coords,
            attrs=global_attrs,
        )

    return result

`from_xarray(dataset, path=None)` `classmethod` #

Create a pyramids NetCDF from an xarray.Dataset.

Serialises the xarray Dataset to a NetCDF file (on disk or in a GDAL /vsimem/ memory file) and reads it back as a pyramids NetCDF container.

This is the inverse of to_xarray() and enables workflows that mix xarray analysis with pyramids spatial operations::

ds = xr.open_dataset("input.nc")
# ... xarray processing ...
nc = NetCDF.from_xarray(ds)
var = nc.get_variable("temperature")
cropped = var.crop(mask)

Requires the optional xarray package.

Parameters:

Name	Type	Description	Default
`dataset`	`Any`	An `xarray.Dataset` instance.	required
`path`	`str \| Path \| None`	File path where the intermediate NetCDF will be written. If `None`, a GDAL in-memory file (`/vsimem/`) is used and cleaned up automatically when the returned object is garbage-collected.	`None`

Returns:

Name	Type	Description
`NetCDF`	`NetCDF`	A pyramids NetCDF container backed by the data from the xarray Dataset.

Raises:

Type	Description
`OptionalPackageDoesNotExist`	If `xarray` is not installed.
`TypeError`	If dataset is not an `xarray.Dataset`.

Source code in src/pyramids/netcdf/netcdf.py

@classmethod
def from_xarray(
    cls,
    dataset: Any,
    path: str | Path | None = None,
) -> NetCDF:
    """Create a pyramids NetCDF from an ``xarray.Dataset``.

    Serialises the xarray Dataset to a NetCDF file (on disk or
    in a GDAL ``/vsimem/`` memory file) and reads it back as a
    pyramids ``NetCDF`` container.

    This is the inverse of ``to_xarray()`` and enables workflows
    that mix xarray analysis with pyramids spatial operations::

        ds = xr.open_dataset("input.nc")
        # ... xarray processing ...
        nc = NetCDF.from_xarray(ds)
        var = nc.get_variable("temperature")
        cropped = var.crop(mask)

    Requires the optional ``xarray`` package.

    Args:
        dataset: An ``xarray.Dataset`` instance.
        path: File path where the intermediate NetCDF will be
            written.  If ``None``, a GDAL in-memory file
            (``/vsimem/``) is used and cleaned up automatically
            when the returned object is garbage-collected.

    Returns:
        NetCDF: A pyramids NetCDF container backed by the data
            from the xarray Dataset.

    Raises:
        pyramids.base._errors.OptionalPackageDoesNotExist:
            If ``xarray`` is not installed.
        TypeError: If *dataset* is not an ``xarray.Dataset``.
    """
    try:
        import xarray as xr
    except ImportError:
        raise OptionalPackageDoesNotExist(
            "xarray is required for from_xarray(). "
            "Install it with: pip install xarray"
        )

    if not isinstance(dataset, xr.Dataset):
        raise TypeError(
            f"Expected xarray.Dataset, got {type(dataset).__name__}"
        )

    cleanup_temp = False
    if path is not None:
        path = str(path)
    else:
        tmp = tempfile.NamedTemporaryFile(
            suffix=".nc", delete=False,
        )
        path = tmp.name
        tmp.close()
        cleanup_temp = True

    dataset.to_netcdf(path)
    result = cls.read_file(path, read_only=True)

    if cleanup_temp:
        result._xarray_temp_path = path
        weakref.finalize(result, os.unlink, path)

    return result

Type	Description
`ndarray`	np.ndarray or None: Flattened coordinate array, or None if
`ndarray`	neither `lon` nor `x` exists in the dataset.

Type	Description
`list[str]`	list[str]: Variable names. For MDIM mode these come from
`list[str]`	`GetMDArrayNames()` minus dimension names; for classic mode
`list[str]`	from `GetSubDatasets()`.