Skip to content
Snippets Groups Projects
Commit 14e39fb4 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[*] Remove all traces of metric -> measure

parent 8c873885
No related branches found
No related tags found
No related merge requests found
Pipeline #39701 failed
...@@ -15,7 +15,7 @@ import torchvision.transforms.functional as VF ...@@ -15,7 +15,7 @@ import torchvision.transforms.functional as VF
import h5py import h5py
from ..utils.metric import base_measures from ..utils.measure import base_measures
import logging import logging
...@@ -49,9 +49,9 @@ def _posneg(pred, gt, threshold): ...@@ -49,9 +49,9 @@ def _posneg(pred, gt, threshold):
return tp_tensor, fp_tensor, tn_tensor, fn_tensor return tp_tensor, fp_tensor, tn_tensor, fn_tensor
def _sample_metrics(pred, gt, bins): def _sample_measures(pred, gt, bins):
""" """
Calculates metrics on one single sample and saves it to disk Calculates measures on one single sample and saves it to disk
Parameters Parameters
...@@ -71,7 +71,7 @@ def _sample_metrics(pred, gt, bins): ...@@ -71,7 +71,7 @@ def _sample_metrics(pred, gt, bins):
Returns Returns
------- -------
metrics : pandas.DataFrame measures : pandas.DataFrame
A pandas dataframe with the following columns: A pandas dataframe with the following columns:
...@@ -94,7 +94,7 @@ def _sample_metrics(pred, gt, bins): ...@@ -94,7 +94,7 @@ def _sample_metrics(pred, gt, bins):
pred, gt, threshold pred, gt, threshold
) )
# calc metrics from scalars # calc measures from scalars
tp_count = torch.sum(tp_tensor).item() tp_count = torch.sum(tp_tensor).item()
fp_count = torch.sum(fp_tensor).item() fp_count = torch.sum(fp_tensor).item()
tn_count = torch.sum(tn_tensor).item() tn_count = torch.sum(tn_tensor).item()
...@@ -221,7 +221,7 @@ def run( ...@@ -221,7 +221,7 @@ def run(
threshold=None, threshold=None,
): ):
""" """
Runs inference and calculates metrics Runs inference and calculates measures
Parameters Parameters
...@@ -232,7 +232,7 @@ def run( ...@@ -232,7 +232,7 @@ def run(
name : str name : str
the local name of this dataset (e.g. ``train``, or ``test``), to be the local name of this dataset (e.g. ``train``, or ``test``), to be
used when saving metrics files. used when saving measures files.
predictions_folder : str predictions_folder : str
folder where predictions for the dataset images has been previously folder where predictions for the dataset images has been previously
...@@ -263,7 +263,7 @@ def run( ...@@ -263,7 +263,7 @@ def run(
""" """
# Collect overall metrics # Collect overall measures
bins = 1000 # number of thresholds to analyse for bins = 1000 # number of thresholds to analyse for
data = {} data = {}
...@@ -279,7 +279,7 @@ def run( ...@@ -279,7 +279,7 @@ def run(
raise RuntimeError( raise RuntimeError(
f"{stem} entry already exists in data. Cannot overwrite." f"{stem} entry already exists in data. Cannot overwrite."
) )
data[stem] = _sample_metrics(pred, gt, bins) data[stem] = _sample_measures(pred, gt, bins)
if overlayed_folder is not None: if overlayed_folder is not None:
overlay_image = _sample_analysis( overlay_image = _sample_analysis(
...@@ -291,31 +291,31 @@ def run( ...@@ -291,31 +291,31 @@ def run(
overlay_image.save(fullpath) overlay_image.save(fullpath)
# Merges all dataframes together # Merges all dataframes together
df_metrics = pandas.concat(data.values()) df_measures = pandas.concat(data.values())
# Report and Averages # Report and Averages
avg_metrics = df_metrics.groupby("index").mean() avg_measures = df_measures.groupby("index").mean()
std_metrics = df_metrics.groupby("index").std() std_measures = df_measures.groupby("index").std()
# Uncomment below for F1-score calculation based on average precision and # Uncomment below for F1-score calculation based on average precision and
# metrics instead of F1-scores of individual images. This method is in line # measures instead of F1-scores of individual images. This method is in line
# with Maninis et. al. (2016) # with Maninis et. al. (2016)
# #
# avg_metrics["f1_score"] = \ # avg_measures["f1_score"] = \
# (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ # (2* avg_measures["precision"]*avg_measures["recall"])/ \
# (avg_metrics["precision"]+avg_metrics["recall"]) # (avg_measures["precision"]+avg_measures["recall"])
avg_metrics["std_pr"] = std_metrics["precision"] avg_measures["std_pr"] = std_measures["precision"]
avg_metrics["pr_upper"] = avg_metrics["precision"] + std_metrics["precision"] avg_measures["pr_upper"] = avg_measures["precision"] + std_measures["precision"]
avg_metrics["pr_lower"] = avg_metrics["precision"] - std_metrics["precision"] avg_measures["pr_lower"] = avg_measures["precision"] - std_measures["precision"]
avg_metrics["std_re"] = std_metrics["recall"] avg_measures["std_re"] = std_measures["recall"]
avg_metrics["re_upper"] = avg_metrics["recall"] + std_metrics["recall"] avg_measures["re_upper"] = avg_measures["recall"] + std_measures["recall"]
avg_metrics["re_lower"] = avg_metrics["recall"] - std_metrics["recall"] avg_measures["re_lower"] = avg_measures["recall"] - std_measures["recall"]
avg_metrics["std_f1"] = std_metrics["f1_score"] avg_measures["std_f1"] = std_measures["f1_score"]
maxf1 = avg_metrics["f1_score"].max() maxf1 = avg_measures["f1_score"].max()
maxf1_index = avg_metrics["f1_score"].idxmax() maxf1_index = avg_measures["f1_score"].idxmax()
maxf1_threshold = avg_metrics["threshold"][maxf1_index] maxf1_threshold = avg_measures["threshold"][maxf1_index]
logger.info( logger.info(
f"Maximum F1-score of {maxf1:.5f}, achieved at " f"Maximum F1-score of {maxf1:.5f}, achieved at "
...@@ -326,8 +326,8 @@ def run( ...@@ -326,8 +326,8 @@ def run(
# get the closest possible threshold we have # get the closest possible threshold we have
index = int(round(bins * threshold)) index = int(round(bins * threshold))
f1_a_priori = avg_metrics["f1_score"][index] f1_a_priori = avg_measures["f1_score"][index]
actual_threshold = avg_metrics["threshold"][index] actual_threshold = avg_measures["threshold"][index]
logger.info( logger.info(
f"F1-score of {f1_a_priori:.5f}, at threshold " f"F1-score of {f1_a_priori:.5f}, at threshold "
...@@ -337,11 +337,11 @@ def run( ...@@ -337,11 +337,11 @@ def run(
if output_folder is not None: if output_folder is not None:
logger.info(f"Output folder: {output_folder}") logger.info(f"Output folder: {output_folder}")
os.makedirs(output_folder, exist_ok=True) os.makedirs(output_folder, exist_ok=True)
metrics_path = os.path.join(output_folder, f"{name}.csv") measures_path = os.path.join(output_folder, f"{name}.csv")
logger.info( logger.info(
f"Saving averages over all input images at {metrics_path}..." f"Saving averages over all input images at {measures_path}..."
) )
avg_metrics.to_csv(metrics_path) avg_measures.to_csv(measures_path)
return maxf1_threshold return maxf1_threshold
...@@ -364,7 +364,7 @@ def compare_annotators(baseline, other, name, output_folder, ...@@ -364,7 +364,7 @@ def compare_annotators(baseline, other, name, output_folder,
name : str name : str
the local name of this dataset (e.g. ``train-second-annotator``, or the local name of this dataset (e.g. ``train-second-annotator``, or
``test-second-annotator``), to be used when saving metrics files. ``test-second-annotator``), to be used when saving measures files.
output_folder : str output_folder : str
folder where to store results folder where to store results
...@@ -378,7 +378,7 @@ def compare_annotators(baseline, other, name, output_folder, ...@@ -378,7 +378,7 @@ def compare_annotators(baseline, other, name, output_folder,
logger.info(f"Output folder: {output_folder}") logger.info(f"Output folder: {output_folder}")
os.makedirs(output_folder, exist_ok=True) os.makedirs(output_folder, exist_ok=True)
# Collect overall metrics # Collect overall measures
data = {} data = {}
for baseline_sample, other_sample in tqdm( for baseline_sample, other_sample in tqdm(
...@@ -392,7 +392,7 @@ def compare_annotators(baseline, other, name, output_folder, ...@@ -392,7 +392,7 @@ def compare_annotators(baseline, other, name, output_folder,
raise RuntimeError( raise RuntimeError(
f"{stem} entry already exists in data. " f"Cannot overwrite." f"{stem} entry already exists in data. " f"Cannot overwrite."
) )
data[stem] = _sample_metrics(pred, gt, 2) data[stem] = _sample_measures(pred, gt, 2)
if overlayed_folder is not None: if overlayed_folder is not None:
overlay_image = _sample_analysis( overlay_image = _sample_analysis(
...@@ -405,33 +405,33 @@ def compare_annotators(baseline, other, name, output_folder, ...@@ -405,33 +405,33 @@ def compare_annotators(baseline, other, name, output_folder,
overlay_image.save(fullpath) overlay_image.save(fullpath)
# Merges all dataframes together # Merges all dataframes together
df_metrics = pandas.concat(data.values()) df_measures = pandas.concat(data.values())
df_metrics.drop(0, inplace=True) df_measures.drop(0, inplace=True)
# Report and Averages # Report and Averages
avg_metrics = df_metrics.groupby("index").mean() avg_measures = df_measures.groupby("index").mean()
std_metrics = df_metrics.groupby("index").std() std_measures = df_measures.groupby("index").std()
# Uncomment below for F1-score calculation based on average precision and # Uncomment below for F1-score calculation based on average precision and
# {name} instead of F1-scores of individual images. This method is in line # {name} instead of F1-scores of individual images. This method is in line
# with Maninis et. al. (2016) # with Maninis et. al. (2016)
# #
# avg_metrics["f1_score"] = \ # avg_measures["f1_score"] = \
# (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ # (2* avg_measures["precision"]*avg_measures["recall"])/ \
# (avg_metrics["precision"]+avg_metrics["recall"]) # (avg_measures["precision"]+avg_measures["recall"])
avg_metrics["std_pr"] = std_metrics["precision"] avg_measures["std_pr"] = std_measures["precision"]
avg_metrics["pr_upper"] = avg_metrics["precision"] + std_metrics["precision"] avg_measures["pr_upper"] = avg_measures["precision"] + std_measures["precision"]
avg_metrics["pr_lower"] = avg_metrics["precision"] - std_metrics["precision"] avg_measures["pr_lower"] = avg_measures["precision"] - std_measures["precision"]
avg_metrics["std_re"] = std_metrics["recall"] avg_measures["std_re"] = std_measures["recall"]
avg_metrics["re_upper"] = avg_metrics["recall"] + std_metrics["recall"] avg_measures["re_upper"] = avg_measures["recall"] + std_measures["recall"]
avg_metrics["re_lower"] = avg_metrics["recall"] - std_metrics["recall"] avg_measures["re_lower"] = avg_measures["recall"] - std_measures["recall"]
avg_metrics["std_f1"] = std_metrics["f1_score"] avg_measures["std_f1"] = std_measures["f1_score"]
metrics_path = os.path.join(output_folder, "second-annotator", f"{name}.csv") measures_path = os.path.join(output_folder, "second-annotator", f"{name}.csv")
os.makedirs(os.path.dirname(metrics_path), exist_ok=True) os.makedirs(os.path.dirname(measures_path), exist_ok=True)
logger.info(f"Saving averages over all input images at {metrics_path}...") logger.info(f"Saving averages over all input images at {measures_path}...")
avg_metrics.to_csv(metrics_path) avg_measures.to_csv(measures_path)
maxf1 = avg_metrics["f1_score"].max() maxf1 = avg_measures["f1_score"].max()
logger.info(f"F1-score of {maxf1:.5f} (second annotator; threshold=0.5)") logger.info(f"F1-score of {maxf1:.5f} (second annotator; threshold=0.5)")
...@@ -12,7 +12,7 @@ import pandas ...@@ -12,7 +12,7 @@ import pandas
import torch import torch
from tqdm import tqdm from tqdm import tqdm
from ..utils.metric import SmoothedValue from ..utils.measure import SmoothedValue
from ..utils.plot import loss_curve from ..utils.plot import loss_curve
import logging import logging
......
...@@ -11,7 +11,7 @@ import distutils.version ...@@ -11,7 +11,7 @@ import distutils.version
import torch import torch
from tqdm import tqdm from tqdm import tqdm
from ..utils.metric import SmoothedValue from ..utils.measure import SmoothedValue
from ..utils.summary import summary from ..utils.summary import summary
from ..utils.resources import cpu_constants, gpu_constants, cpu_log, gpu_log from ..utils.resources import cpu_constants, gpu_constants, cpu_log, gpu_log
......
...@@ -149,7 +149,7 @@ def analyze( ...@@ -149,7 +149,7 @@ def analyze(
└── second-annotator/ #if set, store overlayed images for the └── second-annotator/ #if set, store overlayed images for the
#second annotator here #second annotator here
└── analysis / #the outputs of the analysis of both train/test sets └── analysis / #the outputs of the analysis of both train/test sets
#includes second-annotator "metrics" as well, if #includes second-annotator "mesures" as well, if
# configured # configured
N.B.: The tool is designed to prevent analysis bias and allows one to N.B.: The tool is designed to prevent analysis bias and allows one to
......
...@@ -55,11 +55,11 @@ def _load(data, threshold=None): ...@@ -55,11 +55,11 @@ def _load(data, threshold=None):
data : dict data : dict
A dict in which keys are the names of the systems and the values are A dict in which keys are the names of the systems and the values are
paths to ``metrics.csv`` style files. paths to ``measures.csv`` style files.
threshold : :py:class:`float`, :py:class:`str`, Optional threshold : :py:class:`float`, :py:class:`str`, Optional
A value indicating which threshold to choose for selecting a "F1-score" A value indicating which threshold to choose for selecting a "F1-score"
If set to ``None``, then use the maximum F1-score on that metrics file. If set to ``None``, then use the maximum F1-score on that measures file.
If set to a floating-point value, then use the F1-score that is If set to a floating-point value, then use the F1-score that is
obtained on that particular threshold. If set to a string, it should obtained on that particular threshold. If set to a string, it should
match one of the keys in ``data``. It then first calculate the match one of the keys in ``data``. It then first calculate the
...@@ -74,7 +74,7 @@ def _load(data, threshold=None): ...@@ -74,7 +74,7 @@ def _load(data, threshold=None):
A dict in which keys are the names of the systems and the values are A dict in which keys are the names of the systems and the values are
dictionaries that contain two keys: dictionaries that contain two keys:
* ``df``: A :py:class:`pandas.DataFrame` with the metrics data loaded * ``df``: A :py:class:`pandas.DataFrame` with the measures data loaded
to to
* ``threshold``: A threshold to be used for summarization, depending on * ``threshold``: A threshold to be used for summarization, depending on
the ``threshold`` parameter set on the input the ``threshold`` parameter set on the input
...@@ -84,8 +84,8 @@ def _load(data, threshold=None): ...@@ -84,8 +84,8 @@ def _load(data, threshold=None):
if isinstance(threshold, str): if isinstance(threshold, str):
logger.info(f"Calculating threshold from maximum F1-score at " logger.info(f"Calculating threshold from maximum F1-score at "
f"'{threshold}' dataset...") f"'{threshold}' dataset...")
metrics_path = data[threshold] measures_path = data[threshold]
df = pandas.read_csv(metrics_path) df = pandas.read_csv(measures_path)
maxf1 = df.f1_score.max() maxf1 = df.f1_score.max()
use_threshold = df.threshold[df.f1_score.idxmax()] use_threshold = df.threshold[df.f1_score.idxmax()]
...@@ -101,10 +101,10 @@ def _load(data, threshold=None): ...@@ -101,10 +101,10 @@ def _load(data, threshold=None):
# loads all data # loads all data
retval = {} retval = {}
for name, metrics_path in data.items(): for name, measures_path in data.items():
logger.info(f"Loading metrics from {metrics_path}...") logger.info(f"Loading measures from {measures_path}...")
df = pandas.read_csv(metrics_path) df = pandas.read_csv(measures_path)
if threshold is None: if threshold is None:
use_threshold = df.threshold[df.f1_score.idxmax()] use_threshold = df.threshold[df.f1_score.idxmax()]
...@@ -119,9 +119,9 @@ def _load(data, threshold=None): ...@@ -119,9 +119,9 @@ def _load(data, threshold=None):
epilog="""Examples: epilog="""Examples:
\b \b
1. Compares system A and B, with their own pre-computed metric files: 1. Compares system A and B, with their own pre-computed measure files:
\b \b
$ bob binseg compare -vv A path/to/A/metrics.csv B path/to/B/metrics.csv $ bob binseg compare -vv A path/to/A/train.csv B path/to/B/test.csv
""", """,
) )
@click.argument( @click.argument(
...@@ -182,7 +182,7 @@ def compare(label_path, output_figure, table_format, output_table, threshold, ...@@ -182,7 +182,7 @@ def compare(label_path, output_figure, table_format, output_table, threshold,
threshold = _validate_threshold(threshold, data) threshold = _validate_threshold(threshold, data)
# load all data metrics # load all data measures
data = _load(data, threshold=threshold) data = _load(data, threshold=threshold)
if output_figure is not None: if output_figure is not None:
......
...@@ -247,7 +247,7 @@ def experiment( ...@@ -247,7 +247,7 @@ def experiment(
└── second-annotator/ #if set, store overlayed images for the └── second-annotator/ #if set, store overlayed images for the
#second annotator here #second annotator here
└── analysis / #the outputs of the analysis of both train/test sets └── analysis / #the outputs of the analysis of both train/test sets
#includes second-annotator "metrics" as well, if #includes second-annotator "mesures" as well, if
# configured # configured
Training is performed for a configurable number of epochs, and generates at Training is performed for a configurable number of epochs, and generates at
......
...@@ -9,7 +9,7 @@ import torch ...@@ -9,7 +9,7 @@ import torch
import pandas import pandas
import numpy import numpy
from ..engine.evaluator import _sample_metrics from ..engine.evaluator import _sample_measures
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) ...@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
class Tester(unittest.TestCase): class Tester(unittest.TestCase):
""" """
Unit test for batch metrics Unit test for batch measures
""" """
def setUp(self): def setUp(self):
...@@ -29,10 +29,10 @@ class Tester(unittest.TestCase): ...@@ -29,10 +29,10 @@ class Tester(unittest.TestCase):
self.ground_truths = torch.randint(low=0, high=2, size=(2, 1, 420, 420)) self.ground_truths = torch.randint(low=0, high=2, size=(2, 1, 420, 420))
self.names = ["Bob", "Tim"] self.names = ["Bob", "Tim"]
def test_batch_metrics(self): def test_batch_measures(self):
dfs = [] dfs = []
for pred, gt in zip(self.predictions, self.ground_truths): for pred, gt in zip(self.predictions, self.ground_truths):
dfs.append(_sample_metrics(pred, gt, 100)) dfs.append(_sample_measures(pred, gt, 100))
bm = pandas.concat(dfs) bm = pandas.concat(dfs)
self.assertEqual(len(bm), 2 * 100) self.assertEqual(len(bm), 2 * 100)
......
...@@ -178,7 +178,7 @@ def _check_experiment_stare(overlay): ...@@ -178,7 +178,7 @@ def _check_experiment_stare(overlay):
r"^F1-score of.*\(second annotator; threshold=0.5\)$": 2, r"^F1-score of.*\(second annotator; threshold=0.5\)$": 2,
r"^Ended evaluation$": 1, r"^Ended evaluation$": 1,
r"^Started comparison$": 1, r"^Started comparison$": 1,
r"^Loading metrics from": 4, r"^Loading measures from": 4,
r"^Creating and saving plot at": 1, r"^Creating and saving plot at": 1,
r"^Tabulating performance summary...": 1, r"^Tabulating performance summary...": 1,
r"^Saving table at": 1, r"^Saving table at": 1,
...@@ -403,7 +403,7 @@ def _check_compare(runner): ...@@ -403,7 +403,7 @@ def _check_compare(runner):
compare, compare,
[ [
"-vv", "-vv",
# label - path to metrics # label - path to measures
"test", "test",
os.path.join(output_folder, "test.csv"), os.path.join(output_folder, "test.csv"),
"test (2nd. human)", "test (2nd. human)",
...@@ -418,7 +418,7 @@ def _check_compare(runner): ...@@ -418,7 +418,7 @@ def _check_compare(runner):
assert os.path.exists("comparison.rst") assert os.path.exists("comparison.rst")
keywords = { keywords = {
r"^Loading metrics from": 2, r"^Loading measures from": 2,
r"^Creating and saving plot at": 1, r"^Creating and saving plot at": 1,
r"^Tabulating performance summary...": 1, r"^Tabulating performance summary...": 1,
r"^Saving table at": 1, r"^Saving table at": 1,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import tabulate import tabulate
from .metric import auc from .measure import auc
def performance_table(data, fmt): def performance_table(data, fmt):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment