Source code for openalea.stat_tool.estimate

#!/usr/bin/env python
#-*- coding: utf-8 -*-
"""Estimate functions

.. topic:: estimate.py summary

    A module dedicated to estimation functionalities

    :Code status: mature
    :Documentation status: to be completed
    :Author: Thomas Cokelaer <Thomas.Cokelaer@sophia.inria.fr>


.. warning:: sequence analysis package also contains an estimate module and
    function
"""

from . import error, interface
from .enums import *
from .distribution import *
from .error import *

import openalea.stat_tool._stat_tool as cst

_DiscreteParametricModel = cst._DiscreteParametricModel
_DiscreteParametric = cst._DiscreteParametric
_Compound = cst._Compound
_Convolution = cst._Convolution
_Distribution = cst._Distribution
_DiscreteMixture = cst._DiscreteMixture
_FrequencyDistribution = cst._FrequencyDistribution
_DiscreteDistributionData = cst._DiscreteDistributionData

# from enums import likelihood_penalty_type
# from enums import smoothing_penalty_type
# from enums import outside_type
# from enums import compound_type
# from enums import estimator_type
from .enums import distribution_identifier_type as dist_type

# from openalea.stat_tool._stat_tool import _DiscreteParametricModel
# from openalea.stat_tool._stat_tool import _DiscreteParametric
# from openalea.stat_tool._stat_tool import _Compound
# from openalea.stat_tool._stat_tool import _Convolution
# from openalea.stat_tool._stat_tool import _Distribution
# from openalea.stat_tool._stat_tool import _DiscreteMixture
# from openalea.stat_tool._stat_tool import _FrequencyDistribution
# from openalea.stat_tool._stat_tool import LikelihoodPenaltyType

from openalea.stat_tool._stat_tool import I_DEFAULT, D_DEFAULT


def default_parametric_estimation(histo, iident_id):
    """Dummy estimation of parametric distribution, using maximal bounds
    as compliant with histo and moment estimators"""
    if isinstance(histo, _FrequencyDistribution):
        if iident_id == dist_type["B"]:
            # BINOMIAL
            inf_bound = histo.offset
            sup_bound = histo.nb_value
            mean = histo.mean
            e = Binomial(inf_bound, sup_bound, max(1e-10, mean/sup_bound))
        elif iident_id == dist_type["U"]:
            # UNIFORM
            inf_bound = histo.offset
            sup_bound = histo.nb_value-1
            print(inf_bound, sup_bound)
            e = Uniform(inf_bound, sup_bound)
            print(e)
        elif iident_id == dist_type["P"]:
            # POISSON
            inf_bound = histo.offset
            sup_bound = histo.nb_value
            mean = histo.mean
            e = Poisson(inf_bound, max(1e-10, mean-inf_bound))
        elif iident_id == dist_type["NB"]:
            inf_bound = histo.offset
            sup_bound = histo.nb_value
            mean = histo.mean
            var = histo.variance
            if (mean - inf_bound) > 0:
                prob = min(1-1e-10, max(1e-10, var / (mean - inf_bound)))
            param = prob * abs(mean - inf_bound) / (1-prob)
            e = NegativeBinomial(inf_bound, param, prob)
        else:
            raise ValueError("Wrong distribution label:" + str(iident_id))
    else:
        raise TypeError("Wrong argument type:" + str(type(histo)))
    return(e)

__all__ = ["Estimate", "EstimateFunctions"]

[docs] class EstimateFunctions: """ Class containing histogram estimation functions This class must not be used alone, but through an histogram object """
[docs] def estimate_nonparametric(histo): """ Estimate a non parametric distribution :Parameters: * histo (histogram, mixture_data, convolution_data, compound_data) :Usage: .. doctest:: :options: +SKIP >>> Estimate(histo, "NON-PARAMETRIC") >>> estimate_nonparametric(histo) """ return _DiscreteParametricModel(histo)
[docs] def estimate_parametric(histo, ident, MinInfBound=0, InfBoundStatus="Free"): """ Estimate a parametric discrete distribution (binomial, Poisson or negative binomial distribution with an additional shift parameter) :Parameters: * histo (histogram, mixture_data, convolution_data, compound_data), * ident ("BINOMIAL", "POISSON", "NEGATIVE_BINOMIAL", "UNIFORM") * MinInfBound (int): lower bound to the range of possible values (0 - default value - or 1). * InfBoundStatus (string): shifting or not of the distribution: "Free" (default value) or "Fixed". T :Usage: .. doctest:: :options: +SKIP >>> estimate_parametric(histo, ident, MinInfBound=0, InfBoundStatus="Free") >>> Estimate(histo, "NB", MinInfBound=1, InfBoundStatus="Fixed") """ error.CheckType([ident, MinInfBound, InfBoundStatus], [str, int, str]) flag = bool(InfBoundStatus == "Free") try: ident_id = dist_type[ident] except KeyError: raise KeyError("Valid type are %s" % (str(list(dist_type.keys())))) e = histo.parametric_estimation(int(ident_id), MinInfBound, flag) if (e is not None): return(_DiscreteParametricModel(e, histo)) else: e = histo.default_parametric_estimation(int(ident_id)) if (e is None): raise FormatError("Estimation Error") else: return(_DiscreteParametricModel(e, histo))
[docs] def estimate_DiscreteMixture(histo, *args, **kargs): """ Estimate a finite mixture of discrete distributions :Parameters: * histo (histogram, mixture_data, convolution_data, compound_data), * distributions (list) : a list of distribution object or distribution label(string) : 'B', 'NB', 'U', 'P', ... * unknown (string): type of unknown distribution: "Sum" or "Elementary". :Keywords: * MinInfBound (int): lower bound to the range of possible values (0 -default- or 1). \ This optional argument cannot be used in conjunction \ with the optional argument InitialDistribution. * InfBoundStatus (string): shifting or not of the distribution: "Free" (default value) or "Fixed". * DistInfBoundStatus (string): shifting or not of the subsequent components of \ the mixture: "Free" (default value) or "Fixed". * NbComponent (string): estimation of the number of components of the mixture: \ "Fixed" (default value) or "Estimated". Le number of estimated \ components is comprised between\ 1 and a maximum number which is given by the number of specified \ parametric distributions in the mandatory arguments \ (all of these distributions are assumed to be unknown). * Penalty (string): type of Penalty function for model selection: \ "AIC" (Akaike Information Criterion), \ "AICc" (corrected Akaike Information Criterion) \ "BIC" (Bayesian Information Criterion - default value). \ "BICc" (corrected Bayesian Information Criterion). \ This optional argument can only be used if the optional argument NbComponent is set at "Estimated". :Examples: .. doctest:: :options: +SKIP >>> estimate_DiscreteMixture(histo, "MIXTURE", "B", dist,...,, MinInfBound=1, InfBoundStatus="Fixed", DistInfBoundStatus="Fixed") >>> estimate_DiscreteMixture(histo, "MIXTURE", "B", "NB",...,, MinInfBound=1, InfBoundStatus="Fixed", DistInfBoundStatus="Fixed", NbComponent="Estimated", Penalty="AIC") >>> Estimate(histo, "MIXTURE", "B", dist, MinInfBound=1, InfBoundStatus="Fixed", DistInfBoundStatus="Fixed") >>> Estimate(histo, "MIXTURE", "B", "NB", MinInfBound=1, InfBoundStatus="Fixed", DistInfBoundStatus="Fixed", NbComponent="Estimated", Penalty="AIC") """ #alias #error.CheckArgumentsLength(args, 1, 1) # get user arguments # list of distributions can be either a list or several arguments # e.g.: estimate_DiscreteMixture(["B","B"]) or estimate_DiscreteMixture("B", "B") if len(args)==1 and type(args[0])==list: distributions = args[0] else: distributions = list(args) InfBoundStatus = kargs.get("InfBoundStatus","Fixed") DistInfBoundStatus = kargs.get("DistInfBoundStatus", "Free") NbComponent = kargs.get("NbComponent", "Fixed") MinInfBound = kargs.get("MinInfBound", 0) Penalty = error.ParseKargs(kargs, "Penalty", "AIC", likelihood_penalty_type) #should be before the conversion to booleans error.CheckType([MinInfBound, InfBoundStatus, DistInfBoundStatus, NbComponent, Penalty], [int, str, str, str, LikelihoodPenaltyType]) # transform into boolean when needed InfBoundStatus = bool(InfBoundStatus == "Free") DistInfBoundStatus = bool(DistInfBoundStatus == "Free") NbComponent = bool(NbComponent == "Estimated") estimate = [] # list of bool pcomponent = [] # list of distribution ident = [] # list of distribution identifier # Parse list of distribution that could be defined by a distribution, # compound, mixture, convolution or simplya string such as "B", # "Mixture", ... # breakpoint() for dist in distributions: if isinstance(dist, str): dist_authorised = ["BINOMIAL", "B", "POISSON", "P", "NB", "NEGATIVE_BINOMIAL"] if dist not in dist_authorised: raise ValueError("""If distribution is a string, then it must be in %s. You provided %s""" % (dist_authorised, dist)) #todo: check that poisson is allowed # TODO: check wether ident is properly transferred to _DiscreteParametric(dist) d = _DiscreteDistributionData(histo).estimate_parametric(dist) assert(d.get_ident() == dist_type[dist]) pcomponent.append(d) ident.append(dist_type[dist]) estimate.append(True) elif (isinstance(dist, _DiscreteParametric) or \ isinstance(dist, _DiscreteParametricModel)): pcomponent.append(_DiscreteParametricModel(dist)) ident.append(None) estimate.append(False) elif type(dist) in [_DiscreteMixture, _Convolution, _Compound]: pcomponent.append(_Distribution(dist)) ident.append(None) estimate.append(False) else: raise ValueError("""In the case of a MIXTURE estimation, argument related to distributions must be either string, or Distribution, Mixture, Convolution, Compound. %s provided""" % dist) # check parameters if not NbComponent and Penalty: raise TypeError(""" Penalty can only be used with NbComponent set to 'Estimated'""") if not NbComponent: # "FIXED" # TODO: debug from here (imixt: are components BINOMIAL distributions, what is the mass function, etc.) imixt = _DiscreteMixture(pcomponent) ret = histo.discrete_mixture_estimation1(imixt, estimate, MinInfBound, InfBoundStatus, DistInfBoundStatus) return ret else: # "ESTIMATED" ret = histo.discrete_mixture_estimation2(ident, MinInfBound, InfBoundStatus, DistInfBoundStatus, Penalty) return ret
[docs] def estimate_compound(histo, *args, **kargs): """estimate a compound :Usage: .. doctest:: :options: +SKIP >>> Estimate(histo, "COMPOUND", dist, unknown, Parametric=False, MinInfBound=0) Estimate(histo, "COMPOUND", dist, unknown, InitialDistribution=initial_dist, Parametric=False) """ if len(args)<2: raise ValueError("expect at least three arguments") known_distribution = args[0] ##if isinstance(known_distribution, _DiscreteParametricModel): # known_distribution = _DiscreteParametric(known_distribution) #elif type(known_distribution) in [_DiscreteMixture, _Convolution, _Compound]: # known_distribution = _Distribution(known_distribution) #else: # raise TypeError(""" # argument "known_distribution" must be of type _DiscreteMixture, # _COnvolution, _Compound or _DiscreteParametricModel""") Type = args[1] error.CheckType([Type], [str]) Weight = kargs.get("Weight", -1) NbIteration = kargs.get("NbIteration", -1) InitialDistribution = kargs.get("InitialDistribution", None) MinInfBound = kargs.get("MinInfBound", 0) Estimator = error.ParseKargs(kargs, "Estimator", "Likelihood", estimator_type) Penalty = error.ParseKargs(kargs, "Penalty", "SecondDifference", smoothing_penalty_type) Outside = error.ParseKargs(kargs, "Outside", "Zero", outside_type) if MinInfBound and InitialDistribution: raise ValueError("""MinInfBound and InitialDistribution cannot be used together.""") #if Estimator != _stat_tool.PENALIZED_LIKELIHOOD: # if Penalty or Weight or Outside: # raise ValueError("""Estimator cannot be used with O # utside or Weight or Penalty option""") #The second argument is either a string (e.g.,"Sum") or an unknown #distribution. error.CheckValue([Type], [list(compound_type.keys())]) Type = compound_type[Type] # try: # if Type: # Type = compound_type[Type] # except KeyError: # raise AttributeError("Bad type. Possible types are %s" # % (str(list(compound_type.keys())))) #The second argument is either a string (e.g.,"Sum") or an unknown #distribution. unknown_distribution = None if InitialDistribution: unknown_distribution = InitialDistribution if isinstance(unknown_distribution, _Distribution): unknown_distribution = _DiscreteParametric(unknown_distribution) elif type(unknown_distribution) in \ [_DiscreteMixture, _Convolution, _Compound]: unknown_distribution = _Distribution(unknown_distribution) else: raise TypeError(""" argument "known_distribution" must be of type _DiscreteMixture, _COnvolution, _Compound or _DiscreteParametricModel""") if Type == compound_type['Sum'] : return histo.compound_estimation1( unknown_distribution, known_distribution, Type, Estimator, NbIteration, Weight, Penalty, Outside) elif Type == compound_TYPE['Elementary']: return histo.compound_estimation1( known_distribution, unknown_distribution, Type, Estimator, NbIteration, Weight, Penalty, Outside) else: raise ValueError("Bad compound type: " + str(Type) + "; not in " + str(compound_type.values())) else: return histo.compound_estimation2( known_distribution, Type, MinInfBound, Estimator, NbIteration, Weight, Penalty, Outside)
[docs] def estimate_convolution(histo, *args, **kargs): """ Estimate a convolution :Usage: .. doctest:: :options: +SKIP >>> Estimate(histo, "CONVOLUTION", dist, MinInfBound=1, Parametric=False) Estimate(histo, "CONVOLUTION", dist, InitialDistribution=initial_dist, Parametric=False) """ if len(args)==0: raise ValueError("expect at least two argument") known_distribution = args[0] Weight = kargs.get("Weight", -1) NbIteration = kargs.get("NbIteration", -1) InitialDistribution = kargs.get("InitialDistribution", None) MinInfBound = kargs.get("MinInfBound", 0) Estimator = error.ParseKargs(kargs, "Estimator", "Likelihood", estimator_type) Penalty = error.ParseKargs(kargs, "Penalty", "SecondDifference", smoothing_penalty_type) Outside = error.ParseKargs(kargs, "Outside", "Zero", outside_type) if isinstance(known_distribution, _DiscreteParametricModel): known_distribution = _DiscreteParametric(known_distribution) elif type(known_distribution) in [_DiscreteMixture, _Convolution, _Compound]: known_distribution = _Distribution(known_distribution) else: raise TypeError(""" argument "known_distribution" must be of type _DiscreteMixture, _COnvolution, _Compound or _DiscreteParametricModel""") if InitialDistribution: return histo.convolution_estimation1(known_distribution, InitialDistribution, Estimator, NbIteration, Weight, Penalty, Outside) else : return histo.convolution_estimation2(known_distribution, MinInfBound, Estimator, NbIteration, Weight, Penalty, Outside)
# Extend _Histogram class _FrequencyDistribution = interface.extend_class( _FrequencyDistribution, EstimateFunctions) _FrequencyDistribution.default_parametric_estimation = lambda h, iident_id: default_parametric_estimation(h, iident_id)
[docs] def Estimate(histo, itype, *args, **kargs): """Estimate function This function is a dispatcher to several estimate functions depending on the first argument and the type. :param obj: the input object (may be histogram, sequence, compound, ...) :param itype: string. .. seealso:: :func:`~openalea.stat_tool.estimate.EstimateFunctions.estimate_nonparametric`, :func:`~openalea.stat_tool.estimate.EstimateFunctions.estimate_parametric`, :func:`~openalea.stat_tool.estimate.EstimateFunctions.estimate_DiscreteMixture`, :func:`~openalea.stat_tool.estimate.EstimateFunctions.estimate_convolution`, :func:`~openalea.stat_tool.estimate.EstimateFunctions.estimate_compound`, """ fct_map = { "NONPARAMETRIC" : _FrequencyDistribution.estimate_nonparametric, "NP" : _FrequencyDistribution.estimate_nonparametric, "B" : _FrequencyDistribution.estimate_parametric, "BINOMIAL" : _FrequencyDistribution.estimate_parametric, "P" : _FrequencyDistribution.estimate_parametric, "POISSON" : _FrequencyDistribution.estimate_parametric, "NB" : _FrequencyDistribution.estimate_parametric, "NEGATIVE_BINOMIAL" : _FrequencyDistribution.estimate_parametric, "U" : _FrequencyDistribution.estimate_parametric, "UNIFORM" : _FrequencyDistribution.estimate_parametric, "MIXTURE" : _FrequencyDistribution.estimate_DiscreteMixture, "CONVOLUTION" : _FrequencyDistribution.estimate_convolution, "COMPOUND": _FrequencyDistribution.estimate_compound, } Type = itype.upper() # sequence analysis case if Type not in list(fct_map.keys()): try: from openalea.sequence_analysis.estimate import Estimate \ as SeqEstimate except: raise ImportError("Could not import sequence_analysis") return SeqEstimate(histo, itype, *args, **kargs) else: fct = fct_map[Type] if fct == _FrequencyDistribution.estimate_parametric: return fct(histo, Type, *args, **kargs) elif fct == _FrequencyDistribution.estimate_DiscreteMixture: return fct(histo, *args, **kargs) else: return fct(histo, *args, **kargs)