Source code for tiatoolbox.wsicore.wsimeta
"""This module defines a dataclass which holds metadata about a WSI.
With this class, metadata is in a normalized consistent format
which is quite useful when working with many different WSI formats.
The raw metadata is also preserved and accessible via a dictionary. The
format of this dictionary may vary between WSI formats.
"""
from __future__ import annotations
from numbers import Number
from pathlib import Path
from typing import TYPE_CHECKING
import numpy as np
from tiatoolbox import logger
if TYPE_CHECKING: # pragma: no cover
from collections.abc import Mapping, Sequence
from tiatoolbox.typing import Resolution, Units
[docs]
class WSIMeta:
"""Whole slide image metadata class.
Args:
slide_dimensions (int, int):
Tuple containing the width and height of the WSI. These
are for the baseline (full resolution) image if the WSI
is a pyramid or multi-resolution.
level_dimensions (list):
A list of dimensions for each level of the pyramid or
for each resolution in the WSI.
objective_power (float, optional):
The power of the objective lens used to create the
image.
level_count: (int, optional):
The number of levels or resolutions in the WSI. If not
given this is assigned len(level_dimensions). Defaults
to None.
level_downsamples (:obj:`list` of :obj:`float`):
List of scale values which describe how many times
smaller the current level is compared with the baseline.
vendor (str, optional):
Scanner vendor/manufacturer description.
mpp (float, float, optional):
Microns per pixel.
file_path (Path, optional):
Path to the corresponding WSI file.
raw (dict, optional):
Dictionary of unprocessed metadata extracted from the
WSI format. For JPEG-2000 images this contains an xml
object under the key "xml".
Attributes:
slide_dimensions (tuple(int)):
Tuple containing the width and height of the WSI. These are
for the baseline (full resolution) image if the WSI is a
pyramid or multi-resolution. Required.
axes (str):
Axes ordering of the image. This is most relevant for
OME-TIFF images where the axes ordering can vary. For most
images this with be "YXS" i.e. the image is store in the
axis order of Y coordinates first, then X coordinates, and
colour channels last.
level_dimensions (list):
A list of dimensions for each level of the pyramid or for
each resolution in the WSI. Defaults to [slide_dimension].
objective_power (float):
The magnification power of the objective lens used to scan
the image. Not always present or accurate. Defaults to None.
level_count: (int):
The number of levels or resolutions in the WSI. If not given
this is assigned len(level_dimensions). Defaults to
len(level_dimensions).
level_downsamples (:obj:`list` of :obj:`float`):
List of scale values which describe how many times smaller
the current level is compared with the baseline. Defaults to
(1,).
vendor (str):
Scanner vendor/manufacturer description.
mpp (float, float, optional):
Microns per pixel. Derived from objective power and sensor
size. Not always present or accurate. Defaults to None.
file_path (Path):
Path to the corresponding WSI file. Defaults to None.
raw (dict):
Dictionary of unprocessed metadata extracted from the WSI
format. For JP2 images this contains an xml object under the
key "xml". Defaults to empty dictionary.
"""
_valid_axes_characters = "YXSTZ"
def __init__( # noqa: PLR0913
self: WSIMeta,
slide_dimensions: tuple[int, int],
axes: str,
level_dimensions: Sequence[tuple[int, int]] | None = None,
objective_power: float | None = None,
level_count: int | None = None,
level_downsamples: Sequence[float] | None = (1,),
vendor: str | None = None,
mpp: Sequence[float] | None = None,
file_path: Path | None = None,
raw: Mapping[str, str] | None = None,
) -> None:
"""Initialize WSIMeta."""
self.axes = axes
self.objective_power = float(objective_power) if objective_power else None
self.slide_dimensions = tuple(int(x) for x in slide_dimensions)
self.level_dimensions = (
tuple((int(w), int(h)) for w, h in level_dimensions)
if level_dimensions is not None
else [self.slide_dimensions]
)
self.level_downsamples = (
[float(x) for x in level_downsamples]
if level_downsamples is not None
else None
)
self.level_count = (
int(level_count) if level_count is not None else len(self.level_dimensions)
)
self.vendor = str(vendor)
self.mpp = np.array([float(x) for x in mpp]) if mpp is not None else None
self.file_path = Path(file_path) if file_path is not None else None
self.raw = raw if raw is not None else None
self.validate()
[docs]
def validate(self: WSIMeta) -> bool:
"""Validate passed values and cast to Python types.
Metadata values are often given as strings and must be
parsed/cast to the appropriate python type e.g. "3.14" to 3.14
etc.
Returns:
bool:
True is validation passed, False otherwise.
"""
passed = True
# Fatal conditions: Should return False if not True
if len(set(self.axes) - set(self._valid_axes_characters)) > 0:
logger.warning(
"Axes contains invalid characters. Valid characters are %s.",
self._valid_axes_characters,
)
passed = False
if self.level_count < 1:
logger.warning("Level count is not a positive integer.")
passed = False
if self.level_dimensions is None:
logger.warning("'level_dimensions' is None.")
passed = False
elif len(self.level_dimensions) != self.level_count:
logger.warning("Length of level dimensions != level count")
passed = False
if self.level_downsamples is None:
logger.warning("Level downsamples is None.")
passed = False
elif len(self.level_downsamples) != self.level_count:
logger.warning("Length of level downsamples != level count")
passed = False
# Non-fatal conditions: Raise warning only, do not fail validation
if self.raw is None:
logger.warning("Raw data is None.")
if all(x is None for x in [self.objective_power, self.mpp]):
logger.warning("Unknown scale (no objective_power or mpp)")
return passed
[docs]
def level_downsample(
self: WSIMeta,
level: float,
) -> float:
"""Get the downsample factor for a level.
For non-integer values of `level`, the downsample factor is
linearly interpolated between from the downsample factors of the
level below and the level above.
Args:
level (float):
Level to get downsample factor for.
Returns:
float:
Downsample factor for the given level.
"""
level_downsamples = self.level_downsamples
if isinstance(level, int) or int(level) == level:
# Return the downsample for the level
return level_downsamples[int(level)]
# Linearly interpolate between levels
floor = int(np.floor(level))
ceil = int(np.ceil(level))
floor_downsample = level_downsamples[floor]
ceil_downsample = level_downsamples[ceil]
return np.interp(level, [floor, ceil], [floor_downsample, ceil_downsample])
[docs]
def relative_level_scales(
self: WSIMeta,
resolution: Resolution,
units: Units,
) -> list[np.ndarray]:
"""Calculate scale of each level in the WSI relative to given resolution.
Find the relative scale of each image pyramid / resolution level
of the WSI relative to the given resolution and units.
Values > 1 indicate that the level has a larger scale than the
target and < 1 indicates that it is smaller.
Args:
resolution (Resolution):
Scale to calculate relative to units.
units (Units):
Units of the scale. Allowed values are: `"mpp"`,
`"power"`, `"level"`, `"baseline"`. Baseline refers to
the largest resolution in the WSI (level 0).
Raises:
ValueError:
Missing MPP metadata.
ValueError:
Missing objective power metadata.
ValueError:
Invalid units.
Returns:
list:
Scale for each level relative to the given scale and
units.
Examples:
>>> from tiatoolbox.wsicore.wsireader import WSIReader
>>> wsi = WSIReader.open(input_img="./CMU-1.ndpi")
>>> print(wsi.info.relative_level_scales(0.5, "mpp"))
[array([0.91282519, 0.91012514]), array([1.82565039, 1.82025028]) ...
>>> from tiatoolbox.wsicore.wsireader import WSIReader
>>> wsi = WSIReader.open(input_img="./CMU-1.ndpi")
>>> print(wsi.info.relative_level_scales(0.5, "baseline"))
[0.125, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0]
"""
if units not in ("mpp", "power", "level", "baseline"):
msg = "Invalid units"
raise ValueError(msg)
level_downsamples = self.level_downsamples
def np_pair(x: Number | np.array) -> np.ndarray:
"""Ensure input x is a numpy array of length 2."""
# If one number is given, the same value is used for x and y
if isinstance(x, Number):
return np.array([x] * 2)
return np.array(x)
if units == "level":
if resolution >= len(level_downsamples):
msg = (
f"Target scale level {resolution} > "
f"number of levels {len(level_downsamples)} in WSI"
)
raise ValueError(
msg,
)
base_scale, resolution = 1, self.level_downsample(resolution)
resolution = np_pair(resolution)
if units == "mpp":
if self.mpp is None:
msg = "MPP is None. Cannot determine scale in terms of MPP."
raise ValueError(msg)
base_scale = self.mpp
if units == "power":
if self.objective_power is None:
msg = (
"Objective power is None. "
"Cannot determine scale in terms of objective power.",
)
raise ValueError(
msg,
)
base_scale, resolution = 1 / self.objective_power, 1 / resolution
if units == "baseline":
base_scale, resolution = 1, 1 / resolution
return [
(base_scale * downsample) / resolution for downsample in level_downsamples
]
[docs]
def as_dict(self: WSIMeta) -> dict:
"""Convert WSIMeta to dictionary of Python types.
Returns:
dict:
Whole slide image metadata as dictionary.
"""
mpp = (self.mpp, self.mpp) if self.mpp is None else tuple(self.mpp)
return {
"objective_power": self.objective_power,
"slide_dimensions": self.slide_dimensions,
"level_count": self.level_count,
"level_dimensions": self.level_dimensions,
"level_downsamples": self.level_downsamples,
"vendor": self.vendor,
"mpp": mpp,
"file_path": self.file_path,
"axes": self.axes,
}