Skip to content
Snippets Groups Projects
Commit cd21c775 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[data.tbx11k] Re-structure database to accomodate bbox annotation querying

parent 816e8b80
No related branches found
No related tags found
1 merge request!6Making use of LightningDataModule and simplification of data loading
Pipeline #76667 failed
Showing
with 47468 additions and 12320 deletions
......@@ -7,11 +7,11 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/psf/black
rev: 23.1.0
rev: 23.7.0
hooks:
- id: black
- repo: https://github.com/pycqa/docformatter
rev: v1.5.1
rev: v1.7.5
hooks:
- id: docformatter
- repo: https://github.com/pycqa/isort
......@@ -19,11 +19,11 @@ repos:
hooks:
- id: isort
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
rev: 6.1.0
hooks:
- id: flake8
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.0.0
rev: v1.4.1
hooks:
- id: mypy
args: [
......@@ -33,10 +33,15 @@ repos:
--ignore-missing-imports,
]
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
rev: v3.10.1
hooks:
- id: pyupgrade
args: [--py38-plus]
args: [--py39-plus]
# - repo: https://github.com/pre-commit/mirrors-prettier
# rev: v2.7.1
# hooks:
# - id: prettier
# types_or: [json]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
......@@ -53,6 +58,7 @@ repos:
^doc/results/img/rad_sign_drop.png
)
- id: check-toml
- id: check-json
- id: check-yaml
exclude: |
(?x)(
......@@ -65,6 +71,6 @@ repos:
- id: end-of-file-fixer
- id: debug-statements
- repo: https://github.com/fsfe/reuse-tool
rev: v1.1.2
rev: v2.1.0
hooks:
- id: reuse
......@@ -4,6 +4,7 @@
import importlib.resources
import os
import typing
import PIL.Image
......@@ -16,23 +17,32 @@ from ..typing import DatabaseSplit
from ..typing import RawDataLoader as _BaseRawDataLoader
from ..typing import Sample
DatabaseSample = (
tuple[str, int] | tuple[str, int, list[tuple[int, int, int, int, int]]]
BoundingBoxAnnotation: typing.TypeAlias = tuple[int, int, int, int, int]
"""Location of TB radiological findings (latent or active)
Objects of this type carry bounding-box information of radiological findings on
the original 512x512 pixel images of TBX11k. The radiological findings are
defined as such:
* 0/1: This sign is for latent TB (0), or active TB (1)
* xmin: horizontal position of bounding box upper-left corner, in pixels
* ymin: vertical position of bounding box upper-left corner, in pixels
* width: width of the bounding box, in pixels
* height: height of the bounding box, in pixels
"""
DatabaseSample: typing.TypeAlias = (
tuple[str, int] | tuple[str, int, list[BoundingBoxAnnotation]]
)
"""Type of objects in our JSON representation for this database.
For healthy/sick (no TB)/latent TB cases, each sample is represented by a
filename, followed by the number 0 (negative class).
For active TB cases, each sample is represented by a filename, followed by the
number 1, and then 1 or more 5-tuples with radiological finding locations. The
radiological findings are defined as such:
For healthy/sick (no TB)/latent TB cases, each sample is represented by
a filename, relative to the root of the installed database, followed by
the number 0 (negative class).
* 0/1: Signs of latent TB (0), or active TB (1)
* xmin
* ymin
* width
* height
For active TB cases, each sample is represented by a filename, followed
by the number 1, and then by 1 or more 5-tuples with radiological
finding locations, as described above.
"""
......@@ -104,6 +114,29 @@ class RawDataLoader(_BaseRawDataLoader):
"""
return sample[1]
def bbox_annotations(
self, sample: DatabaseSample
) -> list[BoundingBoxAnnotation]:
"""Loads a single image sample label from the disk.
Parameters
----------
sample:
A tuple containing the path suffix, within the dataset root folder,
where to find the image to be loaded, an integer, representing the
sample label, and possible radiological findings represented by
bounding boxes.
Returns
-------
annotations
Bounding box annotations, if any available with the sample.
"""
return sample[2] if len(sample) > 2 else [] # type: ignore
def make_split(basename: str) -> DatabaseSplit:
"""Returns a database split for the Montgomery database."""
......
......@@ -76,7 +76,7 @@ def reorder(data: dict) -> list:
assert len(set(images.values())) == len(images), "Image ids are not unique"
retval: dict[str, list[typing.Any]] = {
k["file_name"]: [-1] for k in data["images"]
k["file_name"]: [-1, []] for k in data["images"]
}
# we now "consume" all annotations and assign each to an image
......@@ -85,7 +85,12 @@ def reorder(data: dict) -> list:
categories[annotation["category_id"]],
*[round(k) for k in annotation["bbox"]],
]
retval[images[annotation["image_id"]]].append(int_bbox)
retval[images[annotation["image_id"]]][1].append(int_bbox)
# remove empty bounding-box entries to save space on final JSON
for v in retval.values():
if not v[1]:
del v[1]
return sorted([["imgs/" + k, *v] for k, v in retval.items()])
......@@ -127,7 +132,7 @@ def normalize_labels(data: list) -> list:
f"but contains no tb bbox annotations"
)
return -1 # unknown diagnosis
bbx_labels: list[int] = [k[0] for k in s[2:]]
bbx_labels: list[int] = [k[0] for k in s[2]]
tb_counts = collections.Counter(bbx_labels)
assert 2 not in tb_counts, (
f"Label 2 (PulmonaryTuberculosis) was used in image {s[0]} "
......@@ -149,7 +154,10 @@ def normalize_labels(data: list) -> list:
else:
raise RuntimeError("Cannot happen - please check")
return [[k[0], _set_label(k), *k[2:]] for k in data]
for k in data:
k[1] = _set_label(k)
return data
def print_statistics(d: dict):
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment