AbstractDataSource #

Base classes that define the interface for all data source implementations.

`earthlens.base.AbstractDataSource` #

Bases: ABC

Blueprint for every concrete data-source backend.

Subclasses encapsulate the request shape, authentication, and download orchestration for a single provider (CHIRPS, ERA5 on AWS S3, ECMWF CDS, Google Earth Engine). The base class wires the abstract hooks (:meth:_initialize, :meth:_create_grid, :meth:_check_input_dates) into a uniform __init__ shape and exposes a single :meth:download entry point.

Attributes:

Name	Type	Description
`OUTPUT_KIND`	`OutputKind`	Class-level declaration of the natural output shape this backend emits. Read by :class:`earthlens.earthlens.EarthLens` at facade `download()` time to gate the `aggregate=` argument: `"raster"` accepts it (the existing pyramids-backed `aggregate_netcdf` flow); `"vector"` and `"tabular"` reject it with :class:`NotImplementedError`; `"mixed"` forwards it unchanged. Subclasses override the class attribute; the default is `"raster"` so the four backends shipped before C1 (CHIRPS, S3, ECMWF, GEE) all keep their existing behaviour with no source change.

Source code in src/earthlens/base/abstractdatasource.py

class AbstractDataSource(ABC):
    """Blueprint for every concrete data-source backend.

    Subclasses encapsulate the request shape, authentication, and
    download orchestration for a single provider (CHIRPS, ERA5 on AWS
    S3, ECMWF CDS, Google Earth Engine). The base class wires the
    abstract hooks (:meth:`_initialize`, :meth:`_create_grid`,
    :meth:`_check_input_dates`) into a uniform `__init__` shape and
    exposes a single :meth:`download` entry point.

    Attributes:
        OUTPUT_KIND: Class-level declaration of the natural output
            shape this backend emits. Read by
            :class:`earthlens.earthlens.EarthLens` at facade
            `download()` time to gate the `aggregate=` argument:
            `"raster"` accepts it (the existing pyramids-backed
            `aggregate_netcdf` flow); `"vector"` and `"tabular"`
            reject it with :class:`NotImplementedError`; `"mixed"`
            forwards it unchanged. Subclasses override the class
            attribute; the default is `"raster"` so the four
            backends shipped before C1 (CHIRPS, S3, ECMWF, GEE) all
            keep their existing behaviour with no source change.
    """

    OUTPUT_KIND: OutputKind = "raster"

    def __init__(
        self,
        start: str,
        end: str,
        variables: dict[str, list[str]] | list[str],
        lat_lim: list[float],
        lon_lim: list[float],
        temporal_resolution: str = "daily",
        fmt: str = "%Y-%m-%d",
        path: Path | str = "",
    ):
        """Initialize a data source instance.

        Captures the return values of the abstract hooks so subclasses
        do not have to wire them onto `self` themselves:

        * `self.client` — whatever :meth:`_initialize` returns (a CDS
          client, an S3 client, `None` for FTP). Subclasses that
          assign `self.client` inside :meth:`_initialize` (e.g.
          :class:`S3`) keep their own assignment; the parent only sets
          the attribute when :meth:`_initialize` returns a non-`None`
          value.
        * `self.space` — the dict returned by :meth:`_create_grid`,
          containing `lat_lim` and `lon_lim`. Subclasses that
          override :meth:`_create_grid` to set attributes directly (e.g.
          :class:`CHIRPS`) and return `None` are unaffected.
        * `self.time` — the dict returned by :meth:`_check_input_dates`,
          containing `start_date`, `end_date`, `time_freq` and
          `dates`. Same opt-in semantics as `self.space`.
        * `self.root_dir` — the absolute :class:`pathlib.Path` of the
          output directory. `self.path` is kept as a legacy alias so
          older backends (CHIRPS, S3) continue to work.

        Args:
            start: Inclusive start date as a string. Format controlled
                by `fmt`. Defaults to `None`.
            end: Inclusive end date as a string. Defaults to `None`.
            variables: List of variable short codes to download.
            temporal_resolution: `"daily"` or `"monthly"`. Defaults
                to `"daily"`.
            lat_lim: `[lat_min, lat_max]`.
            lon_lim: `[lon_min, lon_max]`.
            fmt: `strptime` format for `start` / `end`. Defaults
                to `"%Y-%m-%d"`.
            path: Output directory. Created if it does not exist.
                Defaults to the current working directory.
        """
        client = self._initialize()
        if client is not None:
            self.client = client

        self.temporal_resolution = temporal_resolution
        self.vars = variables

        space = self._create_grid(lat_lim, lon_lim)
        if isinstance(space, SpatialExtent):
            self.space = space
        elif isinstance(space, dict):
            self.space = SpatialExtent.from_pairs(
                lat_lim=space["lat_lim"], lon_lim=space["lon_lim"]
            )

        time = self._check_input_dates(start, end, temporal_resolution, fmt)
        if isinstance(time, TemporalExtent):
            self.time = time
        elif isinstance(time, dict):
            self.time = TemporalExtent(
                start_date=time["start_date"],
                end_date=time["end_date"],
                resolution=time.get("resolution", time.get("time_freq")),
                dates=time["dates"],
            )

        self.root_dir = Path(path).absolute()
        self.path = self.root_dir
        if not os.path.exists(self.root_dir):
            os.makedirs(self.root_dir)

    @abstractmethod
    def _check_input_dates(
        self, start: str, end: str, temporal_resolution: str, fmt: str
    ):
        """Check validity of input dates. Called by `__init__`."""
        pass

    @abstractmethod
    def _initialize(self, *args, **kwargs):
        """Initialize connection with the data source server (for non-FTP servers).

        Called once by :meth:`__init__`; the return value is captured
        into `self.client` when non-`None`.
        """
        pass

    @abstractmethod
    def _create_grid(self, lat_lim: list, lon_lim: list):
        """Create a grid from the lat/lon boundaries. Called by `__init__`."""
        pass

    @abstractmethod
    def download(self):
        """Wrapper over all the given variables."""
        # loop over dates if the downloaded rasters/netcdf are for a specific date out of the required
        # list of dates
        pass

    def _download_dataset(self):
        """Download a single variable/dataset (called by :meth:`download`)."""
        pass

    @abstractmethod
    def _api(self, *args, **kwargs):
        """Send / receive a single request to the data source server.

        Called by :meth:`download` (or :meth:`_download_dataset`) once
        per `(dataset, variable)` pair. The signature is
        backend-specific.

        New backends (C3 onward) should implement :meth:`_search` and
        :meth:`_fetch` instead and let the default
        :meth:`_api_via_search_fetch` compose them; existing backends
        (CHIRPS, S3, ECMWF, GEE) continue to override `_api` directly.
        """
        pass

    # ------------------------------------------------------------------
    # C3 — optional search/fetch decomposition.
    #
    # The existing four backends (CHIRPS, S3, ECMWF, GEE) keep their
    # `_api` overrides unchanged: nothing below is abstract, so they do
    # not have to implement `_search` / `_fetch` to stay green.
    #
    # New backends (earthlens.stac, earthlens.earthdata, earthlens.fdsn,
    # earthlens.openaq, …) should override `_search` and `_fetch`
    # instead — `_search` returns a list of `RemoteProduct`s and
    # `_fetch` consumes them. The :meth:`_api_via_search_fetch` helper
    # is the canonical composition; backends can opt into it by
    # overriding `_api` as `return self._api_via_search_fetch()`.
    # ------------------------------------------------------------------

    def _search(self) -> list[RemoteProduct]:
        """List the remote products that satisfy this download request.

        Default raises `NotImplementedError` so backends that do not
        opt into the search/fetch split (the four shipped before C3)
        keep their `_api`-only flow unchanged. Backends that opt in
        override this to return one `RemoteProduct` per item the
        server's catalog says they should download.

        The split exists to make dry-run inspection cheap (`_search`
        does not hit the bulk-download endpoint) and to make
        per-product parallelism explicit (`_fetch` is the
        parallelisable half).

        Returns:
            list[RemoteProduct]: One item per product to download.
                The empty list is a legal result (the catalog matched
                nothing) and short-circuits `_api_via_search_fetch`
                without ever calling `_fetch`.

        Raises:
            NotImplementedError: When the subclass keeps the legacy
                `_api`-only flow. The message names the subclass
                class so the user can find the offending backend.
        """
        raise NotImplementedError(
            f"{type(self).__name__} does not implement _search; "
            f"either override _api directly (legacy) or override both "
            f"_search and _fetch (post-C3)."
        )

    def _fetch(self, products: list[RemoteProduct]) -> list[Path]:
        """Download the bytes of every product `_search` returned.

        Default raises `NotImplementedError` (see `_search`).
        Backends that opt into the search/fetch split override this
        to iterate over `products` — either sequentially or via
        `joblib.Parallel` / `concurrent.futures` — and write each
        one to disk.

        Args:
            products: The list returned by `_search` (or a
                user-filtered subset). The empty list is allowed and
                returns an empty list.

        Returns:
            list[Path]: The local file paths written, in the same
                order as `products`. Empty list when `products` is
                empty (no-op fetch is legal).

        Raises:
            NotImplementedError: When the subclass keeps the legacy
                `_api`-only flow.
        """
        raise NotImplementedError(
            f"{type(self).__name__} does not implement _fetch; "
            f"either override _api directly (legacy) or override both "
            f"_search and _fetch (post-C3)."
        )

    def _api_via_search_fetch(self) -> list[Path]:
        """Canonical `_api` body for backends using the C3 split.

        Backends that override `_search` and `_fetch` usually want
        `_api` to just compose them; this helper is that
        composition, factored once so each new backend's `_api`
        body becomes a single line:

        ```python
        def _api(self):
            return self._api_via_search_fetch()
        ```

        The helper short-circuits on an empty search result so
        `_fetch` is only called when there is something to fetch —
        a tiny but meaningful win when many backends are queried in
        parallel and most return nothing.

        Returns:
            list[Path]: Whatever `_fetch` returned. An empty list
                when `_search` returned no products.
        """
        products = self._search()
        if not products:
            return []
        return self._fetch(products)

`init(start, end, variables, lat_lim, lon_lim, temporal_resolution='daily', fmt='%Y-%m-%d', path='')` #

Initialize a data source instance.

Captures the return values of the abstract hooks so subclasses do not have to wire them onto self themselves:

self.client — whatever :meth:_initialize returns (a CDS client, an S3 client, None for FTP). Subclasses that assign self.client inside :meth:_initialize (e.g. :class:S3) keep their own assignment; the parent only sets the attribute when :meth:_initialize returns a non-None value.
self.space — the dict returned by :meth:_create_grid, containing lat_lim and lon_lim. Subclasses that override :meth:_create_grid to set attributes directly (e.g. :class:CHIRPS) and return None are unaffected.
self.time — the dict returned by :meth:_check_input_dates, containing start_date, end_date, time_freq and dates. Same opt-in semantics as self.space.
self.root_dir — the absolute :class:pathlib.Path of the output directory. self.path is kept as a legacy alias so older backends (CHIRPS, S3) continue to work.

Parameters:

Name	Type	Description	Default
`start`	`str`	Inclusive start date as a string. Format controlled by `fmt`. Defaults to `None`.	required
`end`	`str`	Inclusive end date as a string. Defaults to `None`.	required
`variables`	`dict[str, list[str]] \| list[str]`	List of variable short codes to download.	required
`temporal_resolution`	`str`	`"daily"` or `"monthly"`. Defaults to `"daily"`.	`'daily'`
`lat_lim`	`list[float]`	`[lat_min, lat_max]`.	required
`lon_lim`	`list[float]`	`[lon_min, lon_max]`.	required
`fmt`	`str`	`strptime` format for `start` / `end`. Defaults to `"%Y-%m-%d"`.	`'%Y-%m-%d'`
`path`	`Path \| str`	Output directory. Created if it does not exist. Defaults to the current working directory.	`''`

Source code in src/earthlens/base/abstractdatasource.py

def __init__(
    self,
    start: str,
    end: str,
    variables: dict[str, list[str]] | list[str],
    lat_lim: list[float],
    lon_lim: list[float],
    temporal_resolution: str = "daily",
    fmt: str = "%Y-%m-%d",
    path: Path | str = "",
):
    """Initialize a data source instance.

    Captures the return values of the abstract hooks so subclasses
    do not have to wire them onto `self` themselves:

    * `self.client` — whatever :meth:`_initialize` returns (a CDS
      client, an S3 client, `None` for FTP). Subclasses that
      assign `self.client` inside :meth:`_initialize` (e.g.
      :class:`S3`) keep their own assignment; the parent only sets
      the attribute when :meth:`_initialize` returns a non-`None`
      value.
    * `self.space` — the dict returned by :meth:`_create_grid`,
      containing `lat_lim` and `lon_lim`. Subclasses that
      override :meth:`_create_grid` to set attributes directly (e.g.
      :class:`CHIRPS`) and return `None` are unaffected.
    * `self.time` — the dict returned by :meth:`_check_input_dates`,
      containing `start_date`, `end_date`, `time_freq` and
      `dates`. Same opt-in semantics as `self.space`.
    * `self.root_dir` — the absolute :class:`pathlib.Path` of the
      output directory. `self.path` is kept as a legacy alias so
      older backends (CHIRPS, S3) continue to work.

    Args:
        start: Inclusive start date as a string. Format controlled
            by `fmt`. Defaults to `None`.
        end: Inclusive end date as a string. Defaults to `None`.
        variables: List of variable short codes to download.
        temporal_resolution: `"daily"` or `"monthly"`. Defaults
            to `"daily"`.
        lat_lim: `[lat_min, lat_max]`.
        lon_lim: `[lon_min, lon_max]`.
        fmt: `strptime` format for `start` / `end`. Defaults
            to `"%Y-%m-%d"`.
        path: Output directory. Created if it does not exist.
            Defaults to the current working directory.
    """
    client = self._initialize()
    if client is not None:
        self.client = client

    self.temporal_resolution = temporal_resolution
    self.vars = variables

    space = self._create_grid(lat_lim, lon_lim)
    if isinstance(space, SpatialExtent):
        self.space = space
    elif isinstance(space, dict):
        self.space = SpatialExtent.from_pairs(
            lat_lim=space["lat_lim"], lon_lim=space["lon_lim"]
        )

    time = self._check_input_dates(start, end, temporal_resolution, fmt)
    if isinstance(time, TemporalExtent):
        self.time = time
    elif isinstance(time, dict):
        self.time = TemporalExtent(
            start_date=time["start_date"],
            end_date=time["end_date"],
            resolution=time.get("resolution", time.get("time_freq")),
            dates=time["dates"],
        )

    self.root_dir = Path(path).absolute()
    self.path = self.root_dir
    if not os.path.exists(self.root_dir):
        os.makedirs(self.root_dir)

`download()` `abstractmethod` #

Wrapper over all the given variables.

Source code in src/earthlens/base/abstractdatasource.py

@abstractmethod
def download(self):
    """Wrapper over all the given variables."""
    # loop over dates if the downloaded rasters/netcdf are for a specific date out of the required
    # list of dates
    pass

`earthlens.base.AbstractCatalog` #

Bases: BaseModel

Abstract base class for per-data-source variable catalogs.

Subclasses load a backend-specific catalog (a YAML file, an in-code dict, or a remote query) in :meth:get_catalog and expose individual entries via :meth:get_variable. The :func:model_post_init hook eagerly populates :attr:catalog after pydantic validation runs, so subclasses can treat the catalog as a dict thereafter without writing their own __init__.

Subclasses pass through pydantic's normal BaseModel.__init__ — declare any backend-specific construction parameters as pydantic fields rather than __init__ arguments. Override :meth:get_catalog (and optionally :meth:get_variable); the base implementations raise :class:NotImplementedError to flag a missing override at first use rather than silently returning an empty mapping.

Attributes:

Name	Type	Description
`catalog`	`dict[str, Any]`	The full catalog mapping returned by :meth:`get_catalog`. Populated post-init; defaults to an empty dict so the field is always present. Type and shape are backend-specific (a concrete subclass typically stores typed value objects, e.g. `dict[str, Variable]` for the ECMWF backend).

Source code in src/earthlens/base/abstractdatasource.py

class AbstractCatalog(BaseModel):
    """Abstract base class for per-data-source variable catalogs.

    Subclasses load a backend-specific catalog (a YAML file, an
    in-code dict, or a remote query) in :meth:`get_catalog` and
    expose individual entries via :meth:`get_variable`. The
    :func:`model_post_init` hook eagerly populates :attr:`catalog`
    after pydantic validation runs, so subclasses can treat the
    catalog as a dict thereafter without writing their own
    `__init__`.

    Subclasses pass through pydantic's normal `BaseModel.__init__`
    — declare any backend-specific construction parameters as
    pydantic fields rather than `__init__` arguments. Override
    :meth:`get_catalog` (and optionally :meth:`get_variable`); the
    base implementations raise :class:`NotImplementedError` to flag
    a missing override at first use rather than silently returning
    an empty mapping.

    Attributes:
        catalog: The full catalog mapping returned by
            :meth:`get_catalog`. Populated post-init; defaults to an
            empty dict so the field is always present. Type and
            shape are backend-specific (a concrete subclass typically
            stores typed value objects, e.g. `dict[str, Variable]`
            for the ECMWF backend).
    """

    model_config = ConfigDict(arbitrary_types_allowed=True)

    #: Short label used by :meth:`get_dataset`'s did-you-mean error
    #: message — concrete subclasses override (e.g. `"GEE catalog"`,
    #: `"CDS catalog"`, `"CHC catalog"`) so the user sees which
    #: catalog they failed against.
    _catalog_kind: str = "catalog"

    catalog: dict[str, Any] = Field(default_factory=dict)
    available_datasets: list[str] = Field(default_factory=list)
    datasets: dict[str, Any] = Field(default_factory=dict)
    providers: dict[str, Any] = Field(default_factory=dict)

    def model_post_init(self, __context: Any) -> None:
        """Populate :attr:`catalog` after pydantic validation runs.

        Pydantic calls this hook automatically; subclasses that need
        their own post-init wiring should override it and call
        `super().model_post_init(__context)` first to keep the
        catalog-loading behaviour.
        """
        self.catalog = self.get_catalog()

    def get_catalog(self) -> Any:
        """Read the catalog of the datasource from disk or retrieve it from server.

        Abstract; concrete subclasses must override and return their
        backend-specific catalog object (e.g. a pydantic `Catalog`
        instance, a `dict`, or whatever shape the backend uses).

        Raises:
            NotImplementedError: Always, until overridden by a subclass.
        """
        raise NotImplementedError

    def get_variable(self, var_name: str) -> Any:
        """get the details of a specific variable."""
        return self.catalog.get(var_name)

    # -- shared dict-like surface over `datasets` (M1 from catalog-cross-backend-comparison)

    def get_dataset(self, name: str) -> Any:
        """Return the dataset record for `name`, with a did-you-mean hint on miss.

        Backend-generic: looks up `name` in :attr:`datasets` and raises
        `ValueError` (not `KeyError`) with the closest known name when
        absent. Concrete subclasses can override to narrow the return
        type or customise the error message.

        Args:
            name: Catalog key (e.g. CDS dataset short name, EE asset id,
                CHC dataset key).

        Returns:
            The matching dataset record (type depends on the subclass).

        Raises:
            ValueError: If `name` is not a key of :attr:`datasets`.
        """
        try:
            return self.datasets[name]
        except KeyError:
            close = difflib.get_close_matches(name, self.datasets, n=1)
            hint = f" Did you mean {close[0]!r}?" if close else ""
            raise ValueError(
                f"{name!r} is not in the {self._catalog_kind}. "
                f"Known datasets: {sorted(self.datasets)}.{hint}"
            ) from None

    def __getitem__(self, name: str) -> Any:
        """`cat[name]` — dict-style lookup; raises `KeyError` on miss."""
        try:
            return self.get_dataset(name)
        except ValueError as exc:
            raise KeyError(name) from exc

    def __contains__(self, name: object) -> bool:
        """`name in cat` — True when `name` is a curated dataset."""
        return name in self.datasets

    def __iter__(self):
        """Iterate over the curated dataset keys."""
        return iter(self.datasets)

    def __len__(self) -> int:
        """Number of curated datasets in the catalog."""
        return len(self.datasets)

    def __repr__(self) -> str:
        """Compact developer repr — counts, not contents."""
        return (
            f"{type(self).__name__}(datasets={len(self.datasets)}, "
            f"available_datasets={len(self.available_datasets)})"
        )

    def get_provider(self, slug: str) -> Any:
        """Return the :class:`Provider` for `slug` (with a did-you-mean hint on miss).

        Args:
            slug: A registered provider slug (e.g. `"nasa-lp-daac"`,
                `"ucsb-chc"`, `"copernicus"`).

        Returns:
            The matching :class:`earthlens.base.Provider`.

        Raises:
            ValueError: If `slug` is not a registered provider.
        """
        try:
            return self.providers[slug]
        except KeyError:
            close = difflib.get_close_matches(slug, self.providers, n=1)
            hint = f" Did you mean {close[0]!r}?" if close else ""
            raise ValueError(
                f"{slug!r} is not a registered provider. "
                f"Known providers: {sorted(self.providers)}.{hint}"
            ) from None

    def __str__(self) -> str:
        """Pretty-print the curated `datasets` map as YAML.

        `None`-valued fields are omitted so the output stays readable;
        the ordering of keys follows insertion. Concrete subclasses
        whose dataset values aren't pydantic `BaseModel`s (rare) must
        override.
        """
        import yaml

        body = {}
        for key, dataset in self.datasets.items():
            if isinstance(dataset, BaseModel):
                body[key] = dataset.model_dump(exclude_none=True)
            else:
                body[key] = dataset
        return yaml.safe_dump(
            body, default_flow_style=False, sort_keys=False, allow_unicode=True
        )

`contains(name)` #

name in cat — True when name is a curated dataset.

Source code in src/earthlens/base/abstractdatasource.py

def __contains__(self, name: object) -> bool:
    """`name in cat` — True when `name` is a curated dataset."""
    return name in self.datasets

`getitem(name)` #

cat[name] — dict-style lookup; raises KeyError on miss.

Source code in src/earthlens/base/abstractdatasource.py

def __getitem__(self, name: str) -> Any:
    """`cat[name]` — dict-style lookup; raises `KeyError` on miss."""
    try:
        return self.get_dataset(name)
    except ValueError as exc:
        raise KeyError(name) from exc

`iter()` #

Iterate over the curated dataset keys.

Source code in src/earthlens/base/abstractdatasource.py

def __iter__(self):
    """Iterate over the curated dataset keys."""
    return iter(self.datasets)

`len()` #

Number of curated datasets in the catalog.

Source code in src/earthlens/base/abstractdatasource.py

def __len__(self) -> int:
    """Number of curated datasets in the catalog."""
    return len(self.datasets)

`repr()` #

Compact developer repr — counts, not contents.

Source code in src/earthlens/base/abstractdatasource.py

def __repr__(self) -> str:
    """Compact developer repr — counts, not contents."""
    return (
        f"{type(self).__name__}(datasets={len(self.datasets)}, "
        f"available_datasets={len(self.available_datasets)})"
    )

`str()` #

Pretty-print the curated datasets map as YAML.

None-valued fields are omitted so the output stays readable; the ordering of keys follows insertion. Concrete subclasses whose dataset values aren't pydantic BaseModels (rare) must override.

Source code in src/earthlens/base/abstractdatasource.py

def __str__(self) -> str:
    """Pretty-print the curated `datasets` map as YAML.

    `None`-valued fields are omitted so the output stays readable;
    the ordering of keys follows insertion. Concrete subclasses
    whose dataset values aren't pydantic `BaseModel`s (rare) must
    override.
    """
    import yaml

    body = {}
    for key, dataset in self.datasets.items():
        if isinstance(dataset, BaseModel):
            body[key] = dataset.model_dump(exclude_none=True)
        else:
            body[key] = dataset
    return yaml.safe_dump(
        body, default_flow_style=False, sort_keys=False, allow_unicode=True
    )

`get_catalog()` #

Read the catalog of the datasource from disk or retrieve it from server.

Abstract; concrete subclasses must override and return their backend-specific catalog object (e.g. a pydantic Catalog instance, a dict, or whatever shape the backend uses).

Raises:

Type	Description
`NotImplementedError`	Always, until overridden by a subclass.

Source code in src/earthlens/base/abstractdatasource.py

def get_catalog(self) -> Any:
    """Read the catalog of the datasource from disk or retrieve it from server.

    Abstract; concrete subclasses must override and return their
    backend-specific catalog object (e.g. a pydantic `Catalog`
    instance, a `dict`, or whatever shape the backend uses).

    Raises:
        NotImplementedError: Always, until overridden by a subclass.
    """
    raise NotImplementedError

`get_dataset(name)` #

Return the dataset record for name, with a did-you-mean hint on miss.

Backend-generic: looks up name in :attr:datasets and raises ValueError (not KeyError) with the closest known name when absent. Concrete subclasses can override to narrow the return type or customise the error message.

Parameters:

Name	Type	Description	Default
`name`	`str`	Catalog key (e.g. CDS dataset short name, EE asset id, CHC dataset key).	required

Returns:

Type	Description
`Any`	The matching dataset record (type depends on the subclass).

Raises:

Type	Description
`ValueError`	If `name` is not a key of :attr:`datasets`.

Source code in src/earthlens/base/abstractdatasource.py

def get_dataset(self, name: str) -> Any:
    """Return the dataset record for `name`, with a did-you-mean hint on miss.

    Backend-generic: looks up `name` in :attr:`datasets` and raises
    `ValueError` (not `KeyError`) with the closest known name when
    absent. Concrete subclasses can override to narrow the return
    type or customise the error message.

    Args:
        name: Catalog key (e.g. CDS dataset short name, EE asset id,
            CHC dataset key).

    Returns:
        The matching dataset record (type depends on the subclass).

    Raises:
        ValueError: If `name` is not a key of :attr:`datasets`.
    """
    try:
        return self.datasets[name]
    except KeyError:
        close = difflib.get_close_matches(name, self.datasets, n=1)
        hint = f" Did you mean {close[0]!r}?" if close else ""
        raise ValueError(
            f"{name!r} is not in the {self._catalog_kind}. "
            f"Known datasets: {sorted(self.datasets)}.{hint}"
        ) from None

`get_provider(slug)` #

Return the :class:Provider for slug (with a did-you-mean hint on miss).

Parameters:

Name	Type	Description	Default
`slug`	`str`	A registered provider slug (e.g. `"nasa-lp-daac"`, `"ucsb-chc"`, `"copernicus"`).	required

Returns:

Type	Description
`Any`	The matching :class:`earthlens.base.Provider`.

Raises:

Type	Description
`ValueError`	If `slug` is not a registered provider.

Source code in src/earthlens/base/abstractdatasource.py

def get_provider(self, slug: str) -> Any:
    """Return the :class:`Provider` for `slug` (with a did-you-mean hint on miss).

    Args:
        slug: A registered provider slug (e.g. `"nasa-lp-daac"`,
            `"ucsb-chc"`, `"copernicus"`).

    Returns:
        The matching :class:`earthlens.base.Provider`.

    Raises:
        ValueError: If `slug` is not a registered provider.
    """
    try:
        return self.providers[slug]
    except KeyError:
        close = difflib.get_close_matches(slug, self.providers, n=1)
        hint = f" Did you mean {close[0]!r}?" if close else ""
        raise ValueError(
            f"{slug!r} is not a registered provider. "
            f"Known providers: {sorted(self.providers)}.{hint}"
        ) from None

`get_variable(var_name)` #

get the details of a specific variable.

Source code in src/earthlens/base/abstractdatasource.py

def get_variable(self, var_name: str) -> Any:
    """get the details of a specific variable."""
    return self.catalog.get(var_name)

`model_post_init(__context)` #

Populate :attr:catalog after pydantic validation runs.

Pydantic calls this hook automatically; subclasses that need their own post-init wiring should override it and call super().model_post_init(__context) first to keep the catalog-loading behaviour.

Source code in src/earthlens/base/abstractdatasource.py

def model_post_init(self, __context: Any) -> None:
    """Populate :attr:`catalog` after pydantic validation runs.

    Pydantic calls this hook automatically; subclasses that need
    their own post-init wiring should override it and call
    `super().model_post_init(__context)` first to keep the
    catalog-loading behaviour.
    """
    self.catalog = self.get_catalog()