# Source code for skpro.metrics

import numpy as np

[docs]def sample_loss(loss, return_std=False):
""" Averages the loss of a sample

Parameters
----------
loss: np.array
Loss sample
return_std: boolean, default=False
If true, the standard deviation of the
loss sample will be returned

Returns
-------
np.array
Sample loss (with standard deviation if return_std is True)
"""
loss = loss[~np.isnan(loss)]
if return_std:
return np.mean(loss), np.std(loss) / np.sqrt(len(loss))
else:
return np.mean(loss)

class _Scorer:

def __init__(self, score_func, sign):
self._score_func = score_func
self._sign = sign

def __call__(self, estimator, X, y, sample=True, return_std=False):
y_pred = estimator.predict(X)
score = self._score_func(y, y_pred, sample=sample, return_std=return_std)
if sample and return_std:
return self._sign * score[0], score[1]
else:
return self._sign * score

[docs]def make_scorer(score_func, greater_is_better=True):
"""Make a scorer from a performance metric or loss function.

This factory function wraps scoring functions for use in GridSearchCV
and cross_val_score. It takes a score function, such as log_loss,
and returns a callable that scores an estimator's output.

Parameters
----------
score_func : callable,
Score function (or loss function) with signature
score_func(y, y_pred, **kwargs).

greater_is_better : boolean, default=True
Whether score_func is a score function (default), meaning high is good,
or a loss function, meaning low is good. In the latter case, the
scorer object will sign-flip the outcome of the score_func.

Additional parameters to be passed to score_func.

Returns
-------
scorer : callable
Callable object that returns a scalar score; greater is better.
"""
sign = 1 if greater_is_better else -1
return _Scorer(score_func, sign)

[docs]def gneiting_loss(y_true, dist_pred, sample=True, return_std=False):
""" Gneiting loss

Parameters
----------
y_true: np.array
The true labels
dist_pred: ProbabilisticEstimator.Distribution
The predicted distribution
sample: boolean, default=True
If true, loss will be averaged across the sample
return_std: boolean, default=False
If true, the standard deviation of the
loss sample will be returned

Returns
-------
np.array
Loss (with standard deviation if return_std is True)
"""
lp2 = getattr(dist_pred, 'lp2', False)
if not lp2:
raise Exception('The estimator does not provide an lp2 integration')

loss = -2 * dist_pred.pdf(y_true) + lp2()

if sample is True:
return sample_loss(loss, return_std)

return loss

[docs]def linearized_log_loss(y_true, dist_pred, range=1e-10, sample=True, return_std=False):
""" Linearized log loss

Parameters
----------
y_true: np.array
The true labels
dist_pred: ProbabilisticEstimator.Distribution
The predicted distribution
range: float
Threshold value of linearization
sample: boolean, default=True
If true, loss will be averaged across the sample
return_std: boolean, default=False
If true, the standard deviation of the
loss sample will be returned

Returns
-------
np.array
Loss (with standard deviation if return_std is True)
"""
pdf = dist_pred.pdf(y_true)

def f(x):
if x <= range:
return (-1 / range) * x - np.log(range) + 1
else:
return -np.log(x)

f = np.vectorize(f)
loss = f(pdf)

if sample:
return sample_loss(loss, return_std)

return loss

[docs]def log_loss(y_true, dist_pred, sample=True, return_std=False):
""" Log loss

Parameters
----------
y_true: np.array
The true labels
dist_pred: ProbabilisticEstimator.Distribution
The predicted distribution
sample: boolean, default=True
If true, loss will be averaged across the sample
return_std: boolean, default=False
If true, the standard deviation of the
loss sample will be returned

Returns
-------
np.array
Loss (with standard deviation if return_std is True)
"""
pdf = dist_pred.pdf(y_true)
loss = -np.log(pdf)

if sample:
return sample_loss(loss, return_std)

return loss

[docs]def rank_probability_loss(y_true, dist_pred, sample=True, return_std=False):
""" Rank probability loss

.. math::
L(F,y) = \int_{-\infty}^{y} F(x)^2 dx + \int_{y}^{+\infty} (1-F(x))^2 dx

where :math:F(x) denotes the CDF of the predicted distribution

Parameters
----------
y_true: np.array
The true labels
dist_pred: ProbabilisticEstimator.Distribution
The predicted distribution
sample: boolean, default=True
If true, loss will be averaged across the sample
return_std: boolean, default=False
If true, the standard deviation of the
loss sample will be returned

Returns
-------
np.array
Loss (with standard deviation if return_std is True)
"""
def term(index, one_minus=False):
def integrand(x):
if one_minus:
return (1 - dist_pred[index].cdf(x)) ** 2
else:
return dist_pred[index].cdf(x) ** 2

return integrand

from scipy.integrate import quad as integrate

loss = -1 * np.array([
# -int_ -\infty ^ y F(x)² dx
- integrate(term(index), -np.inf, y_true[index])[0]
# – int_y ^\infty(1 - F(x))² dx
- integrate(term(index, one_minus=True), y_true[index], np.inf)[0]
for index in range(len(dist_pred))
])

if sample:
return sample_loss(loss, return_std)

return loss