Skip to content

Commit

Permalink
created CubeMetadata class
Browse files Browse the repository at this point in the history
issue #464
moved general methods from CollectionMetadata to CubeMetadata
only collection parsing specific methods are left in CollectionMetadata
This only has a refactoring effect, no functional changes for now
  • Loading branch information
VictorVerhaert committed Feb 23, 2024
1 parent b7f9478 commit 80b6f6e
Showing 1 changed file with 152 additions and 133 deletions.
285 changes: 152 additions & 133 deletions openeo/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import warnings
from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union
from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union, Self

from openeo.internal.jupyter import render_component
from openeo.util import deep_get
Expand Down Expand Up @@ -199,23 +199,12 @@ def rename_labels(self, target, source) -> Dimension:
return BandDimension(name=self.name, bands=new_bands)


class CollectionMetadata:
class CubeMetadata:
"""
Wrapper for Image Collection metadata.
Simplifies getting values from deeply nested mappings,
allows additional parsing and normalizing compatibility issues.
Metadata is expected to follow format defined by
https://openeo.org/documentation/1.0/developers/api/reference.html#operation/describe-collection
(with partial support for older versions)
Interface for metadata of a data cube.
allows interaction with the cube dimensions and their labels (if available).
"""

# TODO: "CollectionMetadata" is also used as "cube metadata" where the link to original collection
# might be lost (if any). Better separation between rich EO raster collection metadata and
# essential cube metadata? E.g.: also thing of vector cubes.

def __init__(self, metadata: dict, dimensions: List[Dimension] = None):
# Original collection metadata (actual cube metadata might be altered through processes)
self._orig_metadata = metadata
Expand All @@ -228,121 +217,37 @@ def __init__(self, metadata: dict, dimensions: List[Dimension] = None):
self._temporal_dimension = None
for dim in self._dimensions:
# TODO: here we blindly pick last bands or temporal dimension if multiple. Let user choose?
# TODO: add spacial dimension handling?
if dim.type == "bands":
# TODO: add check and/or cast to BandDimension
self._band_dimension = dim
if dim.type == "temporal":
# TODO: add check and/or cast to TemporalDimension
self._temporal_dimension = dim

def __eq__(self, o: Any) -> bool:
return isinstance(o, CollectionMetadata) and self._dimensions == o._dimensions
return isinstance(o, type(self)) and self._dimensions == o._dimensions

def _clone_and_update(
self, metadata: dict = None, dimensions: List[Dimension] = None, **kwargs
) -> CollectionMetadata:
) -> CubeMetadata: # python >= 3.11: -> Self to be more correct for subclasses
"""Create a new instance (of same class) with copied/updated fields."""
# TODO: do we want to keep the type the same or force it to be CubeMetadata?
# this method is e.g. used by reduce_dimension, which should return a CubeMetadata
# If adjusted, name should be changed to e.g. _create_updated
# Alternative is to use an optional argument to specify the class to use
cls = type(self)
if dimensions == None:
dimensions = self._dimensions
return cls(metadata=metadata or self._orig_metadata, dimensions=dimensions, **kwargs)

@classmethod
def _parse_dimensions(cls, spec: dict, complain: Callable[[str], None] = warnings.warn) -> List[Dimension]:
"""
Extract data cube dimension metadata from STAC-like description of a collection.
Dimension metadata comes from different places in spec:
- 'cube:dimensions' has dimension names (e.g. 'x', 'y', 't'), dimension extent info
and band names for band dimensions
- 'eo:bands' has more detailed band information like "common" name and wavelength info
This helper tries to normalize/combine these sources.
:param spec: STAC like collection metadata dict
:param complain: handler for warnings
:return list: list of `Dimension` objects
"""

# Dimension info is in `cube:dimensions` (or 0.4-style `properties/cube:dimensions`)
cube_dimensions = (
deep_get(spec, "cube:dimensions", default=None)
or deep_get(spec, "properties", "cube:dimensions", default=None)
or {}
)
if not cube_dimensions:
complain("No cube:dimensions metadata")
dimensions = []
for name, info in cube_dimensions.items():
dim_type = info.get("type")
if dim_type == "spatial":
dimensions.append(
SpatialDimension(
name=name,
extent=info.get("extent"),
crs=info.get("reference_system", SpatialDimension.DEFAULT_CRS),
step=info.get("step", None),
)
)
elif dim_type == "temporal":
dimensions.append(TemporalDimension(name=name, extent=info.get("extent")))
elif dim_type == "bands":
bands = [Band(name=b) for b in info.get("values", [])]
if not bands:
complain("No band names in dimension {d!r}".format(d=name))
dimensions.append(BandDimension(name=name, bands=bands))
else:
complain("Unknown dimension type {t!r}".format(t=dim_type))
dimensions.append(Dimension(name=name, type=dim_type))

# Detailed band information: `summaries/[eo|raster]:bands` (and 0.4 style `properties/eo:bands`)
eo_bands = (
deep_get(spec, "summaries", "eo:bands", default=None)
or deep_get(spec, "summaries", "raster:bands", default=None)
or deep_get(spec, "properties", "eo:bands", default=None)
)
if eo_bands:
# center_wavelength is in micrometer according to spec
bands_detailed = [
Band(
name=b["name"],
common_name=b.get("common_name"),
wavelength_um=b.get("center_wavelength"),
aliases=b.get("aliases"),
gsd=b.get("openeo:gsd"),
)
for b in eo_bands
]
# Update band dimension with more detailed info
band_dimensions = [d for d in dimensions if d.type == "bands"]
if len(band_dimensions) == 1:
dim = band_dimensions[0]
# Update band values from 'cube:dimensions' with more detailed 'eo:bands' info
eo_band_names = [b.name for b in bands_detailed]
cube_dimension_band_names = [b.name for b in dim.bands]
if eo_band_names == cube_dimension_band_names:
dim.bands = bands_detailed
else:
complain("Band name mismatch: {a} != {b}".format(a=cube_dimension_band_names, b=eo_band_names))
elif len(band_dimensions) == 0:
if len(dimensions) == 0:
complain("Assuming name 'bands' for anonymous band dimension.")
dimensions.append(BandDimension(name="bands", bands=bands_detailed))
else:
complain("No 'bands' dimension in 'cube:dimensions' while having 'eo:bands' or 'raster:bands'")
else:
complain("Multiple dimensions of type 'bands'")

return dimensions
def _parse_dimensions(**kwargs):
pass

def get(self, *args, default=None):
return deep_get(self._orig_metadata, *args, default=default)

@property
def extent(self) -> dict:
# TODO: is this currently used and relevant?
# TODO: check against extent metadata in dimensions
return self._orig_metadata.get("extent")

def dimension_names(self) -> List[str]:
return list(d.name for d in self._dimensions)

Expand Down Expand Up @@ -394,29 +299,27 @@ def get_band_index(self, band: Union[int, str]) -> int:
# TODO: eliminate this shortcut for smaller API surface
return self.band_dimension.band_index(band)

def filter_bands(self, band_names: List[Union[int, str]]) -> CollectionMetadata:
def filter_bands(self, band_names: List[Union[int, str]]) -> CubeMetadata:
"""
Create new `CollectionMetadata` with filtered band dimension
Create new `CubeMetadata` with filtered band dimension
:param band_names: list of band names/indices to keep
:return:
"""
assert self.band_dimension
return self._clone_and_update(dimensions=[
d.filter_bands(band_names) if isinstance(d, BandDimension) else d
for d in self._dimensions
])
return self._clone_and_update(
dimensions=[d.filter_bands(band_names) if isinstance(d, BandDimension) else d for d in self._dimensions]
)

def append_band(self, band: Band) -> CollectionMetadata:
def append_band(self, band: Band) -> CubeMetadata:
"""
Create new `CollectionMetadata` with given band added to band dimension.
Create new `CubeMetadata` with given band added to band dimension.
"""
assert self.band_dimension
return self._clone_and_update(dimensions=[
d.append_band(band) if isinstance(d, BandDimension) else d
for d in self._dimensions
])
return self._clone_and_update(
dimensions=[d.append_band(band) if isinstance(d, BandDimension) else d for d in self._dimensions]
)

def rename_labels(self, dimension: str, target: list, source: list = None) -> CollectionMetadata:
def rename_labels(self, dimension: str, target: list, source: list = None) -> CubeMetadata:
"""
Renames the labels of the specified dimension from source to target.
Expand All @@ -433,7 +336,7 @@ def rename_labels(self, dimension: str, target: list, source: list = None) -> Co

return self._clone_and_update(dimensions=new_dimensions)

def rename_dimension(self, source: str, target: str) -> CollectionMetadata:
def rename_dimension(self, source: str, target: str) -> CubeMetadata:
"""
Rename source dimension into target, preserving other properties
"""
Expand All @@ -444,23 +347,23 @@ def rename_dimension(self, source: str, target: str) -> CollectionMetadata:

return self._clone_and_update(dimensions=new_dimensions)

def reduce_dimension(self, dimension_name: str) -> CollectionMetadata:
"""Create new metadata object by collapsing/reducing a dimension."""
def reduce_dimension(self, dimension_name: str) -> CubeMetadata:
"""Create new CubeMetadata object by collapsing/reducing a dimension."""
# TODO: option to keep reduced dimension (with a single value)?
# TODO: rename argument to `name` for more internal consistency
# TODO: merge with drop_dimension (which does the same).
self.assert_valid_dimension(dimension_name)
loc = self.dimension_names().index(dimension_name)
dimensions = self._dimensions[:loc] + self._dimensions[loc + 1:]
dimensions = self._dimensions[:loc] + self._dimensions[loc + 1 :]
return self._clone_and_update(dimensions=dimensions)

def reduce_spatial(self) -> CollectionMetadata:
"""Create new metadata object by reducing the spatial dimensions."""
def reduce_spatial(self) -> CubeMetadata:
"""Create new CubeMetadata object by reducing the spatial dimensions."""
dimensions = [d for d in self._dimensions if not isinstance(d, SpatialDimension)]
return self._clone_and_update(dimensions=dimensions)

def add_dimension(self, name: str, label: Union[str, float], type: str = None) -> CollectionMetadata:
"""Create new metadata object with added dimension"""
def add_dimension(self, name: str, label: Union[str, float], type: str = None) -> CubeMetadata:
"""Create new CubeMetadata object with added dimension"""
if any(d.name == name for d in self._dimensions):
raise DimensionAlreadyExistsException(f"Dimension with name {name!r} already exists")
if type == "bands":
Expand All @@ -473,13 +376,129 @@ def add_dimension(self, name: str, label: Union[str, float], type: str = None) -
dim = Dimension(type=type or "other", name=name)
return self._clone_and_update(dimensions=self._dimensions + [dim])

def drop_dimension(self, name: str = None) -> CollectionMetadata:
"""Drop dimension with given name"""
def drop_dimension(self, name: str = None) -> CubeMetadata:
"""Create new CubeMetadata object without dropped dimension with given name"""
dimension_names = self.dimension_names()
if name not in dimension_names:
raise ValueError("No dimension named {n!r} (valid names: {ns!r})".format(n=name, ns=dimension_names))
return self._clone_and_update(dimensions=[d for d in self._dimensions if not d.name == name])

def __str__(self) -> str:
bands = self.band_names if self.has_band_dimension() else "no bands dimension"
return f"CubeMetadata({bands} - {self.dimension_names()})"


class CollectionMetadata(CubeMetadata):
"""
Wrapper for Image Collection metadata.
Simplifies getting values from deeply nested mappings,
allows additional parsing and normalizing compatibility issues.
Metadata is expected to follow format defined by
https://openeo.org/documentation/1.0/developers/api/reference.html#operation/describe-collection
(with partial support for older versions)
"""

def __init__(self, metadata: dict, dimensions: List[Dimension] = None):
super().__init__(metadata=metadata, dimensions=dimensions)

@classmethod
def _parse_dimensions(cls, spec: dict, complain: Callable[[str], None] = warnings.warn) -> List[Dimension]:
"""
Extract data cube dimension metadata from STAC-like description of a collection.
Dimension metadata comes from different places in spec:
- 'cube:dimensions' has dimension names (e.g. 'x', 'y', 't'), dimension extent info
and band names for band dimensions
- 'eo:bands' has more detailed band information like "common" name and wavelength info
This helper tries to normalize/combine these sources.
:param spec: STAC like collection metadata dict
:param complain: handler for warnings
:return list: list of `Dimension` objects
"""

# Dimension info is in `cube:dimensions` (or 0.4-style `properties/cube:dimensions`)
cube_dimensions = (
deep_get(spec, "cube:dimensions", default=None)
or deep_get(spec, "properties", "cube:dimensions", default=None)
or {}
)
if not cube_dimensions:
complain("No cube:dimensions metadata")
dimensions = []
for name, info in cube_dimensions.items():
dim_type = info.get("type")
if dim_type == "spatial":
dimensions.append(
SpatialDimension(
name=name,
extent=info.get("extent"),
crs=info.get("reference_system", SpatialDimension.DEFAULT_CRS),
step=info.get("step", None),
)
)
elif dim_type == "temporal":
dimensions.append(TemporalDimension(name=name, extent=info.get("extent")))
elif dim_type == "bands":
bands = [Band(name=b) for b in info.get("values", [])]
if not bands:
complain("No band names in dimension {d!r}".format(d=name))
dimensions.append(BandDimension(name=name, bands=bands))
else:
complain("Unknown dimension type {t!r}".format(t=dim_type))
dimensions.append(Dimension(name=name, type=dim_type))

# Detailed band information: `summaries/[eo|raster]:bands` (and 0.4 style `properties/eo:bands`)
eo_bands = (
deep_get(spec, "summaries", "eo:bands", default=None)
or deep_get(spec, "summaries", "raster:bands", default=None)
or deep_get(spec, "properties", "eo:bands", default=None)
)
if eo_bands:
# center_wavelength is in micrometer according to spec
bands_detailed = [
Band(
name=b["name"],
common_name=b.get("common_name"),
wavelength_um=b.get("center_wavelength"),
aliases=b.get("aliases"),
gsd=b.get("openeo:gsd"),
)
for b in eo_bands
]
# Update band dimension with more detailed info
band_dimensions = [d for d in dimensions if d.type == "bands"]
if len(band_dimensions) == 1:
dim = band_dimensions[0]
# Update band values from 'cube:dimensions' with more detailed 'eo:bands' info
eo_band_names = [b.name for b in bands_detailed]
cube_dimension_band_names = [b.name for b in dim.bands]
if eo_band_names == cube_dimension_band_names:
dim.bands = bands_detailed
else:
complain("Band name mismatch: {a} != {b}".format(a=cube_dimension_band_names, b=eo_band_names))
elif len(band_dimensions) == 0:
if len(dimensions) == 0:
complain("Assuming name 'bands' for anonymous band dimension.")
dimensions.append(BandDimension(name="bands", bands=bands_detailed))
else:
complain("No 'bands' dimension in 'cube:dimensions' while having 'eo:bands' or 'raster:bands'")
else:
complain("Multiple dimensions of type 'bands'")

return dimensions

@property
def extent(self) -> dict:
# TODO: is this currently used and relevant?
# TODO: check against extent metadata in dimensions
return self._orig_metadata.get("extent")

def _repr_html_(self):
return render_component("collection", data=self._orig_metadata)

Expand Down

0 comments on commit 80b6f6e

Please sign in to comment.