utils.py 5.55 KB
Newer Older
1 2 3
''' utility functions for bob.measure '''

import numpy
4
import scipy.stats
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
import bob.core

def remove_nan(scores):
    """remove_nan

    Remove NaN(s) in the given array

    Parameters
    ----------
    scores :
        :py:class:`numpy.ndarray` : array

    Returns
    -------
        :py:class:`numpy.ndarray` : array without NaN(s)
        :py:class:`int` : number of NaN(s) in the input array
        :py:class:`int` : length of the input array
    """
    nans = numpy.isnan(scores)
    sum_nans = sum(nans)
    total = len(scores)
    if sum_nans > 0:
        logger = bob.core.log.setup("bob.measure")
        logger.warning('Found {} NaNs in {} scores'.format(sum_nans, total))
    return scores[numpy.where(~nans)], sum_nans, total

def get_fta(scores):
    """get_fta
        calculates the Failure To Acquire (FtA) rate, i.e. proportion of NaN(s)
        in the input scores

    Parameters
    ----------
    scores :
        Tuple of (``positive``, ``negative``) :py:class:`numpy.ndarray`.

    Returns
    -------
    (:py:class:`numpy.ndarray`, :py:class:`numpy.ndarray`): scores without
    NaN(s)
    :py:class:`float` : failure to acquire rate
    """
    fta_sum, fta_total = 0, 0
    neg, sum_nans, total = remove_nan(scores[0])
    fta_sum += sum_nans
    fta_total += total
    pos, sum_nans, total = remove_nan(scores[1])
    fta_sum += sum_nans
    fta_total += total
    return ((neg, pos), fta_sum / fta_total)

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
def get_fta_list(scores):
    """ Get FTAs for a list of scores

    Parameters
    ----------
    scores: :any:`list`
        list of scores

    Returns
    -------
    neg_list: :any:`list`
        list of negatives
    pos_list: :any:`list`
        list of positives
    fta_list: :any:`list`
        list of FTAs
    """
    neg_list = []
    pos_list = []
    fta_list = []
    for score in scores:
        neg = pos = fta = None
        if score is not None:
            (neg, pos), fta = get_fta(score)
            if neg is None:
                raise ValueError("While loading dev-score file")
        neg_list.append(neg)
        pos_list.append(pos)
        fta_list.append(fta)
    return (neg_list, pos_list, fta_list)

87
def get_thres(criter, neg, pos, far=None):
88 89 90 91 92
    """Get threshold for the given positive/negatives scores and criterion

    Parameters
    ----------
    criter :
93
        Criterion (`eer` or `hter` or `far`)
94 95 96 97 98 99 100 101 102 103 104 105 106
    neg : :py:class:`numpy.ndarray`:
        array of negative scores
        pos : :py:class:`numpy.ndarray`::
        array of positive scores

    Returns
    -------
    :py:obj:`float`
        threshold
    """
    if criter == 'eer':
        from . import eer_threshold
        return eer_threshold(neg, pos)
107
    elif criter == 'min-hter':
108 109
        from . import min_hter_threshold
        return min_hter_threshold(neg, pos)
110
    elif criter == 'far':
111
        if far is None:
112 113
            raise ValueError("FAR value must be provided through "
                             "``--far-value`` option.")
114 115
        from . import far_threshold
        return far_threshold(neg, pos, far)
116
    else:
117
        raise ValueError("Incorrect plotting criterion: ``%s``" % criter)
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137

def get_colors(n):
    """get_colors
    Get a list of matplotlib colors

    Parameters
    ----------
    n : :obj:`int`
        Number of colors to output

    Returns
    -------
    :any:`list`
        list of colors
    """
    if n > 10:
        from matplotlib import pyplot
        cmap = pyplot.cm.get_cmap(name='magma')
        return [cmap(i) for i in numpy.linspace(0, 1.0, n + 1)]

138
    return ['C0','C1','C2','C3','C4','C5','C6','C7','C8','C9']
139

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
def get_linestyles(n, on=True):
    """Get a list of matplotlib linestyles

    Parameters
    ----------
    n : :obj:`int`
        Number of linestyles to output

    Returns
    -------
    :any:`list`
        list of linestyles
    """
    if not on:
        return [None] * n

    list_linestyles = [
        (0, ()),                    #solid
        (0, (1, 1)),                #densely dotted
        (0, (5, 5)),                #dashed
        (0, (5, 1)),                #densely dashed
        (0, (3, 1, 1, 1, 1, 1)),    #densely dashdotdotted
        (0, (3, 10, 1, 10, 1, 10)), #loosely dashdotdotted
        (0, (3, 5, 1, 5, 1, 5)),    #dashdotdotted
        (0, (3, 1, 1, 1)),          #densely dashdotted
        (0, (1, 5)),                #dotted
        (0, (3, 5, 1, 5)),          #dashdotted
        (0, (5, 10)),               #loosely dashed
        (0, (3, 10, 1, 10)),        #loosely dashdotted
        (0, (1, 10))                #loosely dotted
    ]
    while n > len(list_linestyles):
        list_linestyles += list_linestyles
    return list_linestyles

175 176 177 178 179 180 181 182 183 184 185
def confidence_for_indicator_variable(x, n, alpha=0.05):
    '''Calculates the confidence interval for proportion estimates
    The Clopper-Pearson interval method is used for estimating the confidence
    intervals.

    Parameters
    ----------
    x : int
        The number of successes.
    n : int
        The number of trials.
186
        alpha : :obj:`float`, optional
187 188 189 190 191
        The 1-confidence value that you want. For example, alpha should be 0.05
        to obtain 95% confidence intervals.

    Returns
    -------
192 193
    (:obj:`float`, :obj:`float`)
        a tuple of (lower_bound, upper_bound) which
194 195 196 197 198 199 200 201 202
        shows the limit of your success rate: lower_bound < x/n < upper_bound
    '''
    lower_bound = scipy.stats.beta.ppf(alpha / 2.0, x, n - x + 1)
    upper_bound = scipy.stats.beta.ppf(1 - alpha / 2.0, x + 1, n - x)
    if numpy.isnan(lower_bound):
        lower_bound = 0
    if numpy.isnan(upper_bound):
        upper_bound = 1
    return (lower_bound, upper_bound)