utils.py 5.43 KB
Newer Older
1 2 3
''' utility functions for bob.measure '''

import numpy
4
import scipy.stats
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
5 6 7
import logging

LOGGER = logging.getLogger(__name__)
8

9

10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
def remove_nan(scores):
    """remove_nan

    Remove NaN(s) in the given array

    Parameters
    ----------
    scores :
        :py:class:`numpy.ndarray` : array

    Returns
    -------
        :py:class:`numpy.ndarray` : array without NaN(s)
        :py:class:`int` : number of NaN(s) in the input array
        :py:class:`int` : length of the input array
    """
    nans = numpy.isnan(scores)
    sum_nans = sum(nans)
    total = len(scores)
    if sum_nans > 0:
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
30 31
        LOGGER.warning('Found {} NaNs in {} scores'.format(sum_nans, total))
    return scores[~nans], sum_nans, total
32

33

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
def get_fta(scores):
    """get_fta
        calculates the Failure To Acquire (FtA) rate, i.e. proportion of NaN(s)
        in the input scores

    Parameters
    ----------
    scores :
        Tuple of (``positive``, ``negative``) :py:class:`numpy.ndarray`.

    Returns
    -------
    (:py:class:`numpy.ndarray`, :py:class:`numpy.ndarray`): scores without
    NaN(s)
    :py:class:`float` : failure to acquire rate
    """
50
    fta_sum, fta_total = 0.0, 0.0
51 52 53 54 55 56 57 58
    neg, sum_nans, total = remove_nan(scores[0])
    fta_sum += sum_nans
    fta_total += total
    pos, sum_nans, total = remove_nan(scores[1])
    fta_sum += sum_nans
    fta_total += total
    return ((neg, pos), fta_sum / fta_total)

59

60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
def get_fta_list(scores):
    """ Get FTAs for a list of scores

    Parameters
    ----------
    scores: :any:`list`
        list of scores

    Returns
    -------
    neg_list: :any:`list`
        list of negatives
    pos_list: :any:`list`
        list of positives
    fta_list: :any:`list`
        list of FTAs
    """
    neg_list = []
    pos_list = []
    fta_list = []
    for score in scores:
        neg = pos = fta = None
        if score is not None:
            (neg, pos), fta = get_fta(score)
            if neg is None:
                raise ValueError("While loading dev-score file")
        neg_list.append(neg)
        pos_list.append(pos)
        fta_list.append(fta)
    return (neg_list, pos_list, fta_list)

91

92
def get_thres(criter, neg, pos, far=None):
93 94 95 96 97
    """Get threshold for the given positive/negatives scores and criterion

    Parameters
    ----------
    criter :
98
        Criterion (`eer` or `hter` or `far`)
99 100 101 102 103 104 105 106 107 108 109 110 111
    neg : :py:class:`numpy.ndarray`:
        array of negative scores
        pos : :py:class:`numpy.ndarray`::
        array of positive scores

    Returns
    -------
    :py:obj:`float`
        threshold
    """
    if criter == 'eer':
        from . import eer_threshold
        return eer_threshold(neg, pos)
112
    elif criter == 'min-hter':
113 114
        from . import min_hter_threshold
        return min_hter_threshold(neg, pos)
115
    elif criter == 'far':
116
        if far is None:
117 118
            raise ValueError("FAR value must be provided through "
                             "``--far-value`` option.")
119 120
        from . import far_threshold
        return far_threshold(neg, pos, far)
121
    else:
122
        raise ValueError("Incorrect plotting criterion: ``%s``" % criter)
123

124

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
def get_colors(n):
    """get_colors
    Get a list of matplotlib colors

    Parameters
    ----------
    n : :obj:`int`
        Number of colors to output

    Returns
    -------
    :any:`list`
        list of colors
    """
    if n > 10:
        from matplotlib import pyplot
        cmap = pyplot.cm.get_cmap(name='magma')
        return [cmap(i) for i in numpy.linspace(0, 1.0, n + 1)]

144 145
    return ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9']

146

147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
def get_linestyles(n, on=True):
    """Get a list of matplotlib linestyles

    Parameters
    ----------
    n : :obj:`int`
        Number of linestyles to output

    Returns
    -------
    :any:`list`
        list of linestyles
    """
    if not on:
        return [None] * n

    list_linestyles = [
164 165 166 167 168 169 170 171 172 173 174 175 176
        (0, ()),  # solid
        (0, (1, 1)),  # densely dotted
        (0, (5, 5)),  # dashed
        (0, (5, 1)),  # densely dashed
        (0, (3, 1, 1, 1, 1, 1)),  # densely dashdotdotted
        (0, (3, 10, 1, 10, 1, 10)),  # loosely dashdotdotted
        (0, (3, 5, 1, 5, 1, 5)),  # dashdotdotted
        (0, (3, 1, 1, 1)),  # densely dashdotted
        (0, (1, 5)),  # dotted
        (0, (3, 5, 1, 5)),  # dashdotted
        (0, (5, 10)),  # loosely dashed
        (0, (3, 10, 1, 10)),  # loosely dashdotted
        (0, (1, 10))  # loosely dotted
177 178 179 180 181
    ]
    while n > len(list_linestyles):
        list_linestyles += list_linestyles
    return list_linestyles

182

183 184 185 186 187 188 189 190 191 192 193
def confidence_for_indicator_variable(x, n, alpha=0.05):
    '''Calculates the confidence interval for proportion estimates
    The Clopper-Pearson interval method is used for estimating the confidence
    intervals.

    Parameters
    ----------
    x : int
        The number of successes.
    n : int
        The number of trials.
194
        alpha : :obj:`float`, optional
195 196 197 198 199
        The 1-confidence value that you want. For example, alpha should be 0.05
        to obtain 95% confidence intervals.

    Returns
    -------
200 201
    (:obj:`float`, :obj:`float`)
        a tuple of (lower_bound, upper_bound) which
202 203 204 205 206 207 208 209 210
        shows the limit of your success rate: lower_bound < x/n < upper_bound
    '''
    lower_bound = scipy.stats.beta.ppf(alpha / 2.0, x, n - x + 1)
    upper_bound = scipy.stats.beta.ppf(1 - alpha / 2.0, x + 1, n - x)
    if numpy.isnan(lower_bound):
        lower_bound = 0
    if numpy.isnan(upper_bound):
        upper_bound = 1
    return (lower_bound, upper_bound)