Source code for tiatoolbox.wsicore.wsimeta

"""This module defines a dataclass which holds metadata about a WSI.

With this class, metadata is in a normalized consistent format
which is quite useful when working with many different WSI formats.
The raw metadata is also preserved and accessible via a dictionary. The
format of this dictionary may vary between WSI formats.

"""

from __future__ import annotations

from numbers import Number
from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np

from tiatoolbox import logger

if TYPE_CHECKING:  # pragma: no cover
    from collections.abc import Mapping, Sequence

    from tiatoolbox.typing import Resolution, Units



[docs]
class WSIMeta:
    """Whole slide image metadata class.

    Args:
            slide_dimensions (int, int):
                Tuple containing the width and height of the WSI. These
                are for the baseline (full resolution) image if the WSI
                is a pyramid or multi-resolution.
            level_dimensions (list):
                A list of dimensions for each level of the pyramid or
                for each resolution in the WSI.
            objective_power (float, optional):
                The power of the objective lens used to create the
                image.
            level_count: (int, optional):
                The number of levels or resolutions in the WSI. If not
                given this is assigned len(level_dimensions). Defaults
                to None.
            level_downsamples (:obj:`list` of :obj:`float`):
                List of scale values which describe how many times
                smaller the current level is compared with the baseline.
            vendor (str, optional):
                Scanner vendor/manufacturer description.
            mpp (float, float, optional):
                Microns per pixel.
            file_path (Path, optional):
                Path to the corresponding WSI file.
            raw (dict, optional):
                Dictionary of unprocessed metadata extracted from the
                WSI format. For JPEG-2000 images this contains an xml
                object under the key "xml".

    Attributes:
        slide_dimensions (tuple(int)):
            Tuple containing the width and height of the WSI. These are
            for the baseline (full resolution) image if the WSI is a
            pyramid or multi-resolution. Required.
        axes (str):
            Axes ordering of the image. This is most relevant for
            OME-TIFF images where the axes ordering can vary. For most
            images this with be "YXS" i.e. the image is store in the
            axis order of Y coordinates first, then X coordinates, and
            colour channels last.
        level_dimensions (list):
            A list of dimensions for each level of the pyramid or for
            each resolution in the WSI. Defaults to [slide_dimension].
        objective_power (float):
            The magnification power of the objective lens used to scan
            the image. Not always present or accurate. Defaults to None.
        level_count: (int):
            The number of levels or resolutions in the WSI. If not given
            this is assigned len(level_dimensions). Defaults to
            len(level_dimensions).
        level_downsamples (:obj:`list` of :obj:`float`):
            List of scale values which describe how many times smaller
            the current level is compared with the baseline. Defaults to
            (1,).
        vendor (str):
            Scanner vendor/manufacturer description.
        mpp (float, float, optional):
            Microns per pixel. Derived from objective power and sensor
            size. Not always present or accurate. Defaults to None.
        file_path (Path):
            Path to the corresponding WSI file. Defaults to None.
        raw (dict):
            Dictionary of unprocessed metadata extracted from the WSI
            format. For JP2 images this contains an xml object under the
            key "xml". Defaults to empty dictionary.

    """

    _valid_axes_characters = "YXSTZ"

    def __init__(  # noqa: PLR0913
        self: WSIMeta,
        slide_dimensions: tuple[int, int],
        axes: str,
        level_dimensions: Sequence[tuple[int, int]] | None = None,
        objective_power: float | None = None,
        level_count: int | None = None,
        level_downsamples: Sequence[float] | None = (1,),
        vendor: str | None = None,
        mpp: Sequence[float] | None = None,
        file_path: Path | None = None,
        raw: Mapping[str, str] | None = None,
    ) -> None:
        """Initialize WSIMeta."""
        self.axes = axes
        self.objective_power = float(objective_power) if objective_power else None
        self.slide_dimensions = tuple(int(x) for x in slide_dimensions)
        self.level_dimensions = (
            tuple((int(w), int(h)) for w, h in level_dimensions)
            if level_dimensions is not None
            else [self.slide_dimensions]
        )
        self.level_downsamples = (
            [float(x) for x in level_downsamples]
            if level_downsamples is not None
            else None
        )
        self.level_count = (
            int(level_count) if level_count is not None else len(self.level_dimensions)
        )
        self.vendor = str(vendor)
        self.mpp = np.array([float(x) for x in mpp]) if mpp is not None else None
        self.file_path = Path(file_path) if file_path is not None else None
        self.raw = raw if raw is not None else None

        self.validate()


[docs]
    def validate(self: WSIMeta) -> bool:
        """Validate passed values and cast to Python types.

        Metadata values are often given as strings and must be
        parsed/cast to the appropriate python type e.g. "3.14" to 3.14
        etc.

        Returns:
            bool:
                True is validation passed, False otherwise.

        """
        passed = True

        # Fatal conditions: Should return False if not True

        if len(set(self.axes) - set(self._valid_axes_characters)) > 0:
            logger.warning(
                "Axes contains invalid characters. Valid characters are %s.",
                self._valid_axes_characters,
            )
            passed = False

        if self.level_count < 1:
            logger.warning("Level count is not a positive integer.")
            passed = False

        if self.level_dimensions is None:
            logger.warning("'level_dimensions' is None.")
            passed = False
        elif len(self.level_dimensions) != self.level_count:
            logger.warning("Length of level dimensions != level count")
            passed = False

        if self.level_downsamples is None:
            logger.warning("Level downsamples is None.")
            passed = False
        elif len(self.level_downsamples) != self.level_count:
            logger.warning("Length of level downsamples != level count")
            passed = False

        # Non-fatal conditions: Raise warning only, do not fail validation

        if self.raw is None:
            logger.warning("Raw data is None.")

        if all(x is None for x in [self.objective_power, self.mpp]):
            logger.warning("Unknown scale (no objective_power or mpp)")

        return passed



[docs]
    def level_downsample(
        self: WSIMeta,
        level: float,
    ) -> float:
        """Get the downsample factor for a level.

        For non-integer values of `level`, the downsample factor is
        linearly interpolated between from the downsample factors of the
        level below and the level above.

        Args:
            level (float):
                Level to get downsample factor for.

        Returns:
            float:
                Downsample factor for the given level.

        """
        level_downsamples = self.level_downsamples
        if isinstance(level, int) or int(level) == level:
            # Return the downsample for the level
            return level_downsamples[int(level)]
        # Linearly interpolate between levels
        floor = int(np.floor(level))
        ceil = int(np.ceil(level))
        floor_downsample = level_downsamples[floor]
        ceil_downsample = level_downsamples[ceil]
        return np.interp(level, [floor, ceil], [floor_downsample, ceil_downsample])



[docs]
    def relative_level_scales(
        self: WSIMeta,
        resolution: Resolution,
        units: Units,
    ) -> list[np.ndarray]:
        """Calculate scale of each level in the WSI relative to given resolution.

        Find the relative scale of each image pyramid / resolution level
        of the WSI relative to the given resolution and units.

        Values > 1 indicate that the level has a larger scale than the
        target and < 1 indicates that it is smaller.

        Args:
            resolution (Resolution):
                Scale to calculate relative to units.
            units (Units):
                Units of the scale. Allowed values are: `"mpp"`,
                `"power"`, `"level"`, `"baseline"`. Baseline refers to
                the largest resolution in the WSI (level 0).

        Raises:
            ValueError:
                Missing MPP metadata.
            ValueError:
                Missing objective power metadata.
            ValueError:
                Invalid units.

        Returns:
            list:
                Scale for each level relative to the given scale and
                units.

        Examples:
            >>> from tiatoolbox.wsicore.wsireader import WSIReader
            >>> wsi = WSIReader.open(input_img="./CMU-1.ndpi")
            >>> print(wsi.info.relative_level_scales(0.5, "mpp"))
            [array([0.91282519, 0.91012514]), array([1.82565039, 1.82025028]) ...

            >>> from tiatoolbox.wsicore.wsireader import WSIReader
            >>> wsi = WSIReader.open(input_img="./CMU-1.ndpi")
            >>> print(wsi.info.relative_level_scales(0.5, "baseline"))
            [0.125, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0]

        """
        if units not in ("mpp", "power", "level", "baseline"):
            msg = "Invalid units"
            raise ValueError(msg)

        level_downsamples = self.level_downsamples

        def np_pair(x: Number | np.array) -> np.ndarray:
            """Ensure input x is a numpy array of length 2."""
            # If one number is given, the same value is used for x and y
            if isinstance(x, Number):
                return np.array([x] * 2)
            return np.array(x)

        if units == "level":
            if resolution >= len(level_downsamples):
                msg = (
                    f"Target scale level {resolution} > "
                    f"number of levels {len(level_downsamples)} in WSI"
                )
                raise ValueError(
                    msg,
                )
            base_scale, resolution = 1, self.level_downsample(resolution)

        resolution = np_pair(resolution)

        if units == "mpp":
            if self.mpp is None:
                msg = "MPP is None. Cannot determine scale in terms of MPP."
                raise ValueError(msg)
            base_scale = self.mpp

        if units == "power":
            if self.objective_power is None:
                msg = (
                    "Objective power is None. "
                    "Cannot determine scale in terms of objective power.",
                )
                raise ValueError(
                    msg,
                )
            base_scale, resolution = 1 / self.objective_power, 1 / resolution

        if units == "baseline":
            base_scale, resolution = 1, 1 / resolution

        return [
            (base_scale * downsample) / resolution for downsample in level_downsamples
        ]



[docs]
    def as_dict(self: WSIMeta) -> dict:
        """Convert WSIMeta to dictionary of Python types.

        Returns:
            dict:
                Whole slide image metadata as dictionary.

        """
        mpp = (self.mpp, self.mpp) if self.mpp is None else tuple(self.mpp)

        return {
            "objective_power": self.objective_power,
            "slide_dimensions": self.slide_dimensions,
            "level_count": self.level_count,
            "level_dimensions": self.level_dimensions,
            "level_downsamples": self.level_downsamples,
            "vendor": self.vendor,
            "mpp": mpp,
            "file_path": self.file_path,
            "axes": self.axes,
        }