Source code for tiatoolbox.tools.scale

# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# The Original Code is Copyright (C) 2021, TIA Centre, University of Warwick
# All rights reserved.
# ***** END GPL LICENSE BLOCK *****

"""Scaler for transforming input.

Included classes and methods are utilized to either pre-process input
(such as standardization) or post-process predictions (such as re-calibrating
logits to proper probabilities).

"""

import numpy as np


# Fit model output to the label range
[docs]class PlattScaling:
    """Platt scaling.

    Fitting a logistic regression model to a classifier scores such that
    the model outputs are transformed into a probability distribution over classes.

    Args:
        num_iters (int): Number of iterations for training.

    Examples:
        >>> import numpy as np
        >>> logit = np.random.rand(10)
        >>> # binary class
        >>> label = np.random.randint(0, 2, 10)
        >>> scaler = PlattScaling()
        >>> probabilities = scaler.fit_transform(label, logit)

    """

    def __init__(self, num_iters=100):
        self.a = None
        self.b = None
        self.num_iters = num_iters + 1
        self._fixer_a = 1.0
        self._fixer_b = 1.0

[docs]    def fit(self, logits, labels):
        """Fit function like sklearn.

        Fit the sigmoid to the classifier scores logits and labels
        using the Platt Method.

        Args:
            logits (array-like): Classifier output scores.
            labels (array like): Classifier labels, must be `+1` vs `-1` or `1` vs `0`.
        Returns:
            Model with fitted coefficients a and b for the sigmoid function.

        """

        def mylog(v):
            """Log with epilon."""
            return np.log(v + 1.0e-200)

        out = np.array(logits)
        labels = np.array(labels)

        if len(logits) != len(labels):
            raise ValueError(
                (
                    f"`logits` and `labels` must have same shape: "
                    f"{len(logits)} vs {len(labels)}"
                )
            )

        target = labels == 1
        prior1 = float(np.sum(target))
        prior0 = len(target) - prior1
        a_ = 0
        b_ = np.log((prior0 + 1) / (prior1 + 1))
        self.a, self.b = a_, b_

        hi_target = (prior1 + 1) / (prior1 + 2)
        lo_target = 1 / (prior0 + 2)
        labda = 1e-3
        olderr = 1e300
        pp = np.ones(out.shape) * (prior1 + 1) / (prior0 + prior1 + 2)
        idx_t = np.zeros(target.shape)
        for _ in range(1, self.num_iters):
            a = 0
            b = 0
            c = 0
            d = 0
            e = 0
            for i, _ in enumerate(out):
                if target[i]:
                    t = hi_target
                    idx_t[i] = t
                else:
                    t = lo_target
                    idx_t[i] = t
                d1 = pp[i] - t
                d2 = pp[i] * (1 - pp[i])
                a += out[i] * out[i] * d2
                b += d2
                c += out[i] * d2
                d += out[i] * d1
                e += d1

            flag = abs(d) < 1.0e-9 and abs(e) < 1.0e-9
            if flag:
                break

            old_a_ = a_
            old_b_ = b_
            count = 0
            while 1:
                det = (a + labda) * (b + labda) - c * c
                if self._fixer_a * det == 0:
                    labda *= 10
                    continue
                a_ = old_a_ + ((b + labda) * d - c * e) / det
                b_ = old_b_ + ((a + labda) * e - c * d) / det

                self.a, self.b = a_, b_
                err = 0
                for i, _ in enumerate(out):
                    p = self.transform(out[i])
                    pp[i] = p
                    t = idx_t[i]
                    err -= t * mylog(p) + (1 - t) * mylog(1 - p)

                if err < self._fixer_a * olderr * (1 + 1e-7):
                    labda *= 0.1
                    break
                labda *= 10

                if self._fixer_b * labda > 1e6:
                    break
                diff = err - olderr
                scale = 0.5 * (err + olderr + 1)

                flag = -1e-3 * scale < diff < 1e-7 * scale
                if flag:
                    count += 1
                else:
                    count = 0
                olderr = err

                if count == 3:
                    break
        self.a, self.b = a_, b_
        return self

[docs]    def transform(self, logits):
        """Tranform input to probabilities basing on trained parameters.

        Args:
            labels (array like): Classifier labels, must be `+1` vs `-1` or `1` vs `0`.
        Returns:
            Array of probabilities.

        """
        return 1 / (1 + np.exp(logits * self.a + self.b))

[docs]    def fit_transform(self, logits, labels):
        """Fit and tranform input to probabilities.

        Args:
            logits (array-like): Classifier output scores.
            labels (array like): Classifier labels, must be `+1` vs `-1` or `1` vs `0`.
        Returns:
            Array of probabilities.

        """
        return self.fit(logits, labels).transform(logits)

    def __repr__(self):
        a, b = self.a, self.b
        return "Platt Scaling: " + f"a: {a}, b: {b}"
Source code for tiatoolbox.tools.scale

TIA Toolbox

Navigation

Related Topics