Source code for uravu.distribution

"""
The storage and manipulation of probability distributions is fundamental to the operation of ``uravu`` and Bayesian inference.
The :py:class:`~uravu.distribution.Distribution` class oversees these operations.
"""

# Copyright (c) Andrew R. McCluskey
# Distributed under the terms of the MIT License
# author: Andrew R. McCluskey

from typing import Union, List
import numpy as np
from scipy.stats import normaltest, gaussian_kde
from scipy.optimize import minimize


[docs]class Distribution: """ In addition to storage of the probability distribution, this class allows for some basic analysis, such as determination of normality. Attributes: samples (:py:attr:`array_like`): Samples in the distribution. name (:py:attr:`str`): Distribution name. normal (:py:attr:`bool`): Are the samples normally distributed? kde (:py:class:`scipy.stats.kde.gaussian_kde`): Kernel density approximation for the distribution. Args: samples (:py:attr:`array_like`): Sample for the distribution. name (:py:attr:`str`, optional): A name to identify the distribution. Default is :py:attr:`'Distribution'`. ci_points (:py:attr:`array_like`, optional): The two percentiles at which confidence intervals should be found. Default is :py:attr:`[2.5, 97.5]` (a 95 % confidence interval). .. _FAQ: ./faq.html """ def __init__(self, samples: Union[List[Union[float, int]], np.ndarray], name: str="Distribution", random_state: np.random._generator.Generator=None) -> 'Distribution': """ Initialisation function for a :py:class:`~uravu.distribution.Distribution` object. """ self.name = name self.samples = np.array([]) self.normal = False self._random_state = random_state if random_state is None: self._random_state = np.random.default_rng(np.random.randint(1)) self.add_samples(np.array(samples))
[docs] def to_dict(self) -> dict: """ :return: Dictionary description of the object. """ my_dict = {'name': self.name, 'samples': self.samples } return my_dict
[docs] @classmethod def from_dict(cls, my_dict: dict) -> 'Distribution': """ :param my_dict: Dictionary description of the distribution. :return: Distribution object form the dictionary. """ return cls(my_dict['samples'], name=my_dict['name'])
@property def size(self) -> int: """ :return: Number of samples in the distribution. """ return self.samples.size
[docs] def check_normality(self) -> bool: """ Uses a :func:`scipy.stats.normaltest()` to evaluate if samples are normally distributed and updates the :py:attr:`~uravu.distribution.Distribution.normal` attribute. """ alpha = 0.05 test_samples = self.samples if self.size > 500: test_samples = self._random_state.choice(self.samples, size=500) p_value = normaltest(test_samples)[1] if p_value > alpha: self.normal = True else: self.normal = False
[docs] def pdf(self, x: Union[float, List[Union[float, int]], np.ndarray]) -> Union[float, np.ndarray]: """ Get the probability density function for the distribution. :param x: Value to return probability of. :return: Probability. """ return self.kde.pdf(x)
[docs] def logpdf(self, x: Union[float, List[Union[float, int]], np.ndarray]) -> Union[float, np.ndarray]: """ Get the natural log probability density function for the distribution. :param x: Value to return natural log probability of. :return: Natural log probability. """ return self.kde.logpdf(x)
[docs] def negative_pdf(self, x: Union[float, List[Union[float, int]], np.ndarray]) -> Union[float, np.ndarray]: """ Get the negative of the probability density function for the distribution. :param x: Value to return negative probability of. :return: Negative probability. """ return -self.kde.pdf(x)
@property def dist_max(self) -> float: """ Get the value that maximises the distribution. If no :py:attr:`kde` has been created (for example if the distribution has fewer than 8 values) the median is returned. :return: Most likely value. """ try: return minimize(self.negative_pdf, x0=[self.n]).x[0] except AttributeError: return self.n @property def min(self) -> float: """ :return: Sample minimum. """ return self.samples.min() @property def max(self) -> float: """ :return: Sample maximum. """ return self.samples.max() @property def n(self) -> float: """ :return: Median value. """ return np.percentile(self.samples, [50])[0] @property def s(self) -> Union[float, None]: """ :return: Standard deviation of the distribution. For a non-normal distribution, this will return :py:attr:`None`. """ if self.normal: return np.std(self.samples, ddof=1) return None @property def v(self) -> Union[float, None]: """ :return: Standard deviation of the distribution. For a non-normal distribution, this will return :py:attr:`None`. """ if self.normal: return np.var(self.samples, ddof=1) return None
[docs] def con_int(self, ci_points: List[float]=[2.5, 97.5]) -> np.ndarray: """ Get the extrema of the confidence intervals of the distribution. :param ci_points: The confidence interval points to return. :return: Distribution values at the confidence interval. """ return self.ci(ci_points)
[docs] def ci(self, ci_points: List[float]=[2.5, 97.5]) -> np.ndarray: """ Get the extrema of the confidence intervals of the distribution. :param ci_points: The confidence interval points to return. :return: Distribution values at the confidence interval. """ return np.percentile(self.samples, ci_points)
[docs] def add_samples(self, samples: Union[List[Union[float, int]], np.ndarray]): """ Add samples to the distribution. Args: samples (:py:attr:`array_like`): Samples to be added to the distribution. """ self.samples = np.append(self.samples, np.array(samples).flatten()) if (self.size > 8) and (np.std(self.samples) != 0): self.check_normality() self.kde = gaussian_kde(self.samples)
def __repr__(self) -> np.ndarray: """ :return: Representation. """ return self.samples.__repr__() def __array__(self) -> np.ndarray: """ :return: Array of samples. """ return self.samples def __add__(self, obj2: Union[float, np.ndarray]) -> np.ndarray: """ :return: Sum of samples and obj2. """ return np.add(self, obj2) def __sub__(self, obj2: Union[float, np.ndarray]) -> np.ndarray: """ :return: Difference between samples and obj2. """ return np.subtract(self, obj2) def __mul__(self, obj2: Union[float, np.ndarray]) -> np.ndarray: """ :return: Product of samples and obj2. """ return np.multiply(self, obj2) def __truediv__(self, obj2: Union[float, np.ndarray]) -> np.ndarray: """ :return: Ratio between samples and obj2. """ return np.divide(self, obj2) def __mod__(self, obj2: Union[float, np.ndarray]) -> np.ndarray: """ :return: Modulus of samples by obj2. """ return np.remainder(self, obj2) def __str__(self) -> np.ndarray: """ :return: String representation. """ if self.s is not None: return f'{self.n:.3e} +/- {self.s:.3e}' else: c = np.abs(self.n - self.con_int()) return f'{self.n:.3e}(+{c[1]:.3e}/-{c[0]:.3e}'