Skip to content
Snippets Groups Projects
Commit 87bd101d authored by Daniel CARRON's avatar Daniel CARRON :b:
Browse files

[saliency] Remove unused iou code

parent b29dfd63
No related branches found
No related tags found
1 merge request!46Create common library
Pipeline #89592 failed
......@@ -9,12 +9,9 @@ import typing
import lightning.pytorch
import numpy
import numpy.typing
import skimage.measure
import torch
import torchvision.ops
from tqdm import tqdm
from ...config.data.tbx11k.datamodule import BoundingBox, BoundingBoxes
from ...config.data.tbx11k.datamodule import BoundingBoxes
logger = logging.getLogger(__name__)
......@@ -24,230 +21,6 @@ SaliencyMap: typing.TypeAlias = (
BinaryMask: typing.TypeAlias = numpy.typing.NDArray[numpy.bool_]
def _ordered_connected_components(
saliency_map: SaliencyMap,
threshold: float,
) -> list[BinaryMask]:
"""Calculate the largest connected components available on a saliency map
and return those as individual masks.
This implementation is based on [SCORECAM-2020]_:
1. Thresholding: The pixel values above ``threshold``% of max value are
kept in the original saliency map. Everything else is set to zero. The
value proposed on [SCORECAM-2020]_ is 0.2. Use this value if unsure.
2. The thresholded saliency map is transformed into a boolean array (ones
are attributed to all elements above the threshold.
3. We call :py:func:`skimage.metrics.label` to evaluate all connected
components and label those with distinct integers.
4. We histogram the labels and return one binary mask for each label,
sorted by decreasing size.
Parameters
----------
saliency_map
Input saliciency map whose connected components will be calculated
from.
threshold
Relative threshold to be used to zero parts of the original saliency
map. A value of 0.2 will zero all values in the saliency map that are
bellow 20% of the maximum value observed in the said map.
Returns
-------
list[BinaryMask]
A list of boolean masks, one for each connected component, ordered by
decreasing size. This list may be empty if the input ``saliency_map``
is all zeroes.
"""
# thresholds like [SCORECAM-2020]_
saliency_array = numpy.array(saliency_map)
thresholded_mask = (saliency_array >= (threshold * saliency_array.max())).astype(
numpy.uint8
)
# avoids an all zeroes mask being processed
if not numpy.any(thresholded_mask):
return []
labelled, n = skimage.measure.label(thresholded_mask, return_num=True) # type: ignore
retval = [labelled == k for k in range(1, n + 1)]
return sorted(retval, key=lambda x: x.sum(), reverse=True)
def _extract_bounding_box(
mask: BinaryMask,
) -> BoundingBox:
"""Define a bounding box surrounding a connected component mask.
Parameters
----------
mask
The connected component mask from whom extract the bounding box.
Returns
-------
BoundingBox
A bounding box.
"""
x, y, x2, y2 = torchvision.ops.masks_to_boxes(torch.tensor(mask)[None, :])[0]
return BoundingBox(-1, int(x), int(y), int(x2 - x + 1), int(y2 - y + 1))
def _compute_max_iou_and_ioda(
detected_box: BoundingBox,
gt_bboxes: BoundingBoxes,
) -> tuple[float, float]:
"""Calculate how much of detected area lies in ground truth boxes.
If there are multiple gt boxes, the detected area will be calculated
for each gt box separately and the gt box with the highest
intersecting part will be used for the calculation.
Parameters
----------
detected_box
BoundingBox of the detected area.
gt_bboxes
Ground-truth bounding boxes in the format ``(x, y, width,
height)``.
Returns
-------
tuple[float, float]
The max iou and ioda values.
"""
detected_area = detected_box.area()
if detected_area == 0:
return 0.0, 0.0
max_intersection = 0
max_gt_area = 0
for bbox in gt_bboxes:
intersection = bbox.intersection(detected_box)
if intersection > max_intersection:
max_intersection = intersection
max_gt_area = bbox.area()
if max_gt_area == 0 and max_intersection == 0:
# This case means no intersection was found, even though there are gt boxes
iou, ioda = 0.0, 0.0
else:
iou = max_intersection / (detected_area + max_gt_area - max_intersection)
ioda = max_intersection / detected_area
return iou, ioda
def _get_largest_bounding_boxes(
saliency_map: SaliencyMap,
n: int,
threshold: float = 0.2,
) -> list[BoundingBox]:
"""Return the N largest connected components as bounding boxes in a
saliency map.
The return of values is subject to the value of ``threshold`` applied, as
well as on the saliency map itself. The number of objects found is also
affected by those parameters.
Parameters
----------
saliency_map
Input saliciency map whose connected components will be calculated
from.
n
The number of connected components to search for in the saliency map.
Connected components are then translated to bounding-box notation.
threshold
Relative threshold to be used to zero parts of the original saliency
map. A value of 0.2 will zero all values in the saliency map that are
bellow 20% of the maximum value observed in the said map.
Returns
-------
list[BoundingBox]
The N largest connected components as bounding boxes in a saliency map.
"""
retval: list[BoundingBox] = []
masks = _ordered_connected_components(saliency_map, threshold)
if masks:
retval += [_extract_bounding_box(k) for k in masks[:n]]
return retval
def _compute_simultaneous_iou_and_ioda(
detected_box: BoundingBox,
gt_bboxes: BoundingBoxes,
) -> tuple[float, float]:
"""Calculate how much of detected area lies between ground truth boxes.
This means that if there are multiple gt boxes, the detected area
will be compared to them simultaneously (and not to each gt box
separately).
Parameters
----------
detected_box
BoundingBox of the detected area.
gt_bboxes
Collection of bounding boxes of the ground-truth drawn as
``True`` values.
Returns
-------
tuple[float, float]
The iou and ioda for the provided boxes.
"""
detected_area = detected_box.area()
if detected_area == 0:
return 0, 0
intersection = sum([k.intersection(detected_box) for k in gt_bboxes])
total_gt_area = sum([k.area() for k in gt_bboxes])
iou = intersection / (detected_area + total_gt_area - intersection)
ioda = intersection / detected_area
return float(iou), float(ioda)
def _compute_iou_ioda_from_largest_bbox(
gt_bboxes: BoundingBoxes,
saliency_map: SaliencyMap,
) -> tuple[float, float]:
"""Calculate the metrics for a single sample.
Parameters
----------
gt_bboxes
A list of ground-truth bounding boxes.
saliency_map
A real-valued saliency-map that conveys regions used for
classification in the original sample.
Returns
-------
tuple[float, float]
A tuple containing the iou and ioda for the largest bounding box.
"""
largest_bbox = _get_largest_bounding_boxes(saliency_map, n=1, threshold=0.2)
detected_box = largest_bbox[0] if largest_bbox else BoundingBox(-1, 0, 0, 0, 0)
iou, ioda = _compute_max_iou_and_ioda(detected_box, gt_bboxes)
return (iou, ioda)
def _compute_avg_saliency_focus(
saliency_map: SaliencyMap,
gt_mask: BinaryMask,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment