Source code for uravu.distribution

"""
The storage and manipulation of probability distributions is fundamental to the operation of ``uravu`` and Bayesian inference.
The :py:class:`~uravu.distribution.Distribution` class oversees these operations.
"""

# Copyright (c) Andrew R. McCluskey
# Distributed under the terms of the MIT License
# author: Andrew R. McCluskey

from typing import Union, List
import numpy as np
from scipy.stats import normaltest, gaussian_kde
from scipy.optimize import minimize


[docs]class Distribution:
    """
    In addition to storage of the probability distribution, this class allows for some basic analysis, such as determination of normality.

    Attributes:
        samples (:py:attr:`array_like`): Samples in the distribution.
        name (:py:attr:`str`): Distribution name.
        normal (:py:attr:`bool`): Are the samples normally distributed?
        kde (:py:class:`scipy.stats.kde.gaussian_kde`): Kernel density approximation for the distribution.

    Args:
        samples (:py:attr:`array_like`): Sample for the distribution.
        name (:py:attr:`str`, optional): A name to identify the distribution. Default is :py:attr:`'Distribution'`.
        ci_points (:py:attr:`array_like`, optional): The two percentiles at which confidence intervals should be found. Default is :py:attr:`[2.5, 97.5]` (a 95 % confidence interval).

    .. _FAQ: ./faq.html
    """

    def __init__(self, samples: Union[List[Union[float, int]], np.ndarray], name: str="Distribution", random_state: np.random._generator.Generator=None) -> 'Distribution':
        """
        Initialisation function for a :py:class:`~uravu.distribution.Distribution` object.
        """
        self.name = name
        self.samples = np.array([])
        self.normal = False
        self._random_state = random_state
        if random_state is None:
            self._random_state = np.random.default_rng(np.random.randint(1))
        self.add_samples(np.array(samples))

[docs]    def to_dict(self) -> dict:
        """
        :return: Dictionary description of the object.
        """
        my_dict = {'name': self.name,
                   'samples': self.samples
                   }
        return my_dict
    
[docs]    @classmethod
    def from_dict(cls, my_dict: dict) -> 'Distribution':
        """
        :param my_dict: Dictionary description of the distribution.
        
        :return: Distribution object form the dictionary.
        """
        return cls(my_dict['samples'], name=my_dict['name'])

    @property
    def size(self) -> int:
        """
        :return: Number of samples in the distribution.
        """
        return self.samples.size

[docs]    def check_normality(self) -> bool:
        """
        Uses a :func:`scipy.stats.normaltest()` to evaluate if samples are normally distributed and updates 
        the :py:attr:`~uravu.distribution.Distribution.normal` attribute.
        """
        alpha = 0.05
        test_samples = self.samples
        if self.size > 500:
            test_samples = self._random_state.choice(self.samples, size=500)
        p_value = normaltest(test_samples)[1]
        if p_value > alpha:
            self.normal = True
        else:
            self.normal = False

[docs]    def pdf(self, x: Union[float, List[Union[float, int]], np.ndarray]) -> Union[float, np.ndarray]:
        """
        Get the probability density function for the distribution.

        :param x: Value to return probability of.
        :return: Probability.
        """
        return self.kde.pdf(x)

[docs]    def logpdf(self, x: Union[float, List[Union[float, int]], np.ndarray]) -> Union[float, np.ndarray]:
        """
        Get the natural log probability density function for the distribution.

        :param x: Value to return natural log probability of.
        :return: Natural log probability.
        """
        return self.kde.logpdf(x)

[docs]    def negative_pdf(self, x: Union[float, List[Union[float, int]], np.ndarray]) -> Union[float, np.ndarray]:
        """
        Get the negative of the probability density function for the distribution.

        :param x: Value to return negative probability of.
        :return: Negative probability.
        """
        return -self.kde.pdf(x)

    @property
    def dist_max(self) -> float:
        """
        Get the value that maximises the distribution. If no :py:attr:`kde` has been created (for example
        if the distribution has fewer than 8 values) the median is returned.

        :return: Most likely value.
        """
        try:
            return minimize(self.negative_pdf, x0=[self.n]).x[0]
        except AttributeError:
            return self.n

    @property
    def min(self) -> float:
        """
        :return: Sample minimum.
        """
        return self.samples.min()

    @property
    def max(self) -> float:
        """
        :return: Sample maximum.
        """
        return self.samples.max()

    @property
    def n(self) -> float:
        """
        :return: Median value.
        """
        return np.percentile(self.samples, [50])[0]

    @property
    def s(self) -> Union[float, None]:
        """
        :return: Standard deviation of the distribution. For a non-normal distribution, this will return :py:attr:`None`.
        """
        if self.normal:
            return np.std(self.samples, ddof=1)
        return None

    @property
    def v(self) -> Union[float, None]:
        """
        :return: Standard deviation of the distribution. For a non-normal distribution, this will return :py:attr:`None`.
        """
        if self.normal:
            return np.var(self.samples, ddof=1)
        return None

[docs]    def con_int(self, ci_points: List[float]=[2.5, 97.5]) -> np.ndarray:
        """
        Get the extrema of the confidence intervals of the distribution.

        :param ci_points: The confidence interval points to return.
        :return: Distribution values at the confidence interval.
        """
        return self.ci(ci_points)

[docs]    def ci(self, ci_points: List[float]=[2.5, 97.5]) -> np.ndarray:
        """
        Get the extrema of the confidence intervals of the distribution.

        :param ci_points: The confidence interval points to return.
        :return: Distribution values at the confidence interval.
        """
        return np.percentile(self.samples, ci_points)

[docs]    def add_samples(self, samples: Union[List[Union[float, int]], np.ndarray]):
        """
        Add samples to the distribution.

        Args:
            samples (:py:attr:`array_like`): Samples to be added to the distribution.
        """
        self.samples = np.append(self.samples, np.array(samples).flatten())
        if (self.size > 8) and (np.std(self.samples) != 0):
            self.check_normality()
            self.kde = gaussian_kde(self.samples)

    def __repr__(self) -> np.ndarray:
        """
        :return: Representation.
        """
        return self.samples.__repr__()

    def __array__(self) -> np.ndarray:
        """
        :return: Array of samples.
        """
        return self.samples

    def __add__(self, obj2: Union[float, np.ndarray]) -> np.ndarray:
        """
        :return: Sum of samples and obj2.
        """
        return np.add(self, obj2)
 
    def __sub__(self, obj2: Union[float, np.ndarray]) -> np.ndarray:
        """
        :return: Difference between samples and obj2.
        """
        return np.subtract(self, obj2)
 
    def __mul__(self, obj2: Union[float, np.ndarray]) -> np.ndarray:
        """
        :return: Product of samples and obj2.
        """
        return np.multiply(self, obj2)
 
    def __truediv__(self, obj2: Union[float, np.ndarray]) -> np.ndarray:
        """
        :return: Ratio between samples and obj2.
        """
        return np.divide(self, obj2)
   
    def __mod__(self, obj2: Union[float, np.ndarray]) -> np.ndarray:
        """
        :return: Modulus of samples by obj2.
        """
        return np.remainder(self, obj2)

    def __str__(self) -> np.ndarray:
        """
        :return: String representation.
        """
        if self.s is not None:
            return f'{self.n:.3e} +/- {self.s:.3e}'
        else:
            c = np.abs(self.n - self.con_int())
            return f'{self.n:.3e}(+{c[1]:.3e}/-{c[0]:.3e}'