Skip to content
Snippets Groups Projects
Commit b0fcbe1f authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[script.*] Add support for dataset dictionaries

parent 284bc576
No related branches found
No related tags found
1 merge request!12Streamlining
#!/usr/bin/env python
# coding=utf-8
import os
import click
from torch.utils.data import DataLoader
......@@ -58,7 +59,13 @@ logger = logging.getLogger(__name__)
@click.option(
"--dataset",
"-d",
help="A torch.utils.data.dataset.Dataset instance implementing a dataset to be used for evaluating predictions, possibly including all pre-processing pipelines required",
help="A bob.ip.binseg.data.utils.SampleList2TorchDataset instance "
"implementing a dataset to be used for evaluation purposes, possibly "
"including all pre-processing pipelines required or, optionally, a "
"dictionary mapping string keys to "
"bob.ip.binseg.data.utils.SampleList2TorchDataset's. In such a case, "
"all datasets will be used for evaluation. Data augmentation "
"operations are excluded automatically in this case",
required=True,
cls=ResourceOption,
)
......@@ -96,7 +103,11 @@ def evaluate(output_folder, predictions_folder, dataset, overlayed,
overlay_threshold, **kwargs):
"""Evaluates an FCN on a binary segmentation task.
"""
data_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False,
pin_memory=False)
run(dataset, predictions_folder, output_folder, overlayed,
overlay_threshold)
if isinstance(dataset, dict):
for k,v in dataset.items():
analysis_folder = os.path.join(output_folder, k)
with v.not_augmented() as d:
data_loader = DataLoader(dataset=d, batch_size=1,
shuffle=False, pin_memory=False)
run(d, predictions_folder, analysis_folder, overlayed,
overlay_threshold)
......@@ -22,11 +22,11 @@ logger = logging.getLogger(__name__)
epilog="""Examples:
\b
1. Trains a M2U-Net model (VGG-16 backbone) with STARE (vessel segmentation),
1. Trains a M2U-Net model (VGG-16 backbone) with DRIVE (vessel segmentation),
on the CPU, for only two epochs, then runs inference and evaluation on
results from its test set:
$ bob binseg experiment -vv m2unet stare --epochs=2
$ bob binseg experiment -vv m2unet drive --epochs=2
""",
)
......@@ -47,20 +47,15 @@ logger = logging.getLogger(__name__)
cls=ResourceOption,
)
@click.option(
"--train-dataset",
"--dataset",
"-d",
help="A torch.utils.data.dataset.Dataset instance implementing a dataset "
"to be used for training the model, possibly including all pre-processing"
" pipelines required, including data augmentation",
required=True,
cls=ResourceOption,
)
@click.option(
"--test-dataset",
"-d",
help="A torch.utils.data.dataset.Dataset instance implementing a dataset "
"to be used for testing the model, possibly including all pre-processing"
" pipelines required",
help="A dictionary mapping string keys to "
"bob.ip.binseg.data.utils.SampleList2TorchDataset's. At least one key "
"named 'train' must be available. This dataset will be used for training "
"the network model. All other datasets will be used for prediction and "
"evaluation. Dataset descriptions include all required pre-processing, "
"including eventual data augmentation, which may be eventually excluded "
"for prediction and evaluation purposes",
required=True,
cls=ResourceOption,
)
......@@ -209,8 +204,7 @@ def experiment(
batch_size,
drop_incomplete_batch,
criterion,
train_dataset,
test_dataset,
dataset,
checkpoint_period,
device,
seed,
......@@ -220,7 +214,7 @@ def experiment(
verbose,
**kwargs,
):
"""Runs a complete experiment, from training, prediction and evaluation
"""Runs a complete experiment, from training, to prediction and evaluation
This script is just a wrapper around the individual scripts for training,
running prediction and evaluating FCN models. It organises the output in a
......@@ -259,7 +253,7 @@ def experiment(
batch_size=batch_size,
drop_incomplete_batch=drop_incomplete_batch,
criterion=criterion,
dataset=train_dataset,
dataset=dataset,
checkpoint_period=checkpoint_period,
device=device,
seed=seed,
......@@ -282,25 +276,11 @@ def experiment(
else None
)
# train set
ctx.invoke(
predict,
output_folder=predictions_folder,
model=model,
dataset=train_dataset,
batch_size=batch_size,
device=device,
weight=model_file,
overlayed=overlayed_folder,
verbose=verbose,
)
# test set
ctx.invoke(
predict,
output_folder=predictions_folder,
model=model,
dataset=test_dataset,
dataset=dataset,
batch_size=batch_size,
device=device,
weight=model_file,
......@@ -320,41 +300,29 @@ def experiment(
else None
)
# train set
train_analysis_folder = os.path.join(output_folder, "analysis", "train")
analysis_folder = os.path.join(output_folder, "analysis")
ctx.invoke(
evaluate,
output_folder=train_analysis_folder,
output_folder=analysis_folder,
predictions_folder=predictions_folder,
dataset=train_dataset,
dataset=dataset,
overlayed=overlayed_folder,
overlay_threshold=0.5,
verbose=verbose,
)
# test set
test_analysis_folder = os.path.join(output_folder, "analysis", "test")
ctx.invoke(
evaluate,
output_folder=test_analysis_folder,
predictions_folder=predictions_folder,
dataset=test_dataset,
overlayed=overlayed_folder,
overlay_threshold=0.5,
verbose=verbose,
)
logger.info("Ended evaluation")
## Comparison
logger.info("Started comparison")
# compare train and test set performances
# compare performances on the various sets
from .compare import compare
systems = (
"train": os.path.join(train_analysis_folder, "metric.csv"),
"test": os.path.join(test_analysis_folder, "metric.csv"),
)
systems = []
for k, v in dataset.items():
systems += [k, os.path.join(output_folder, "analysis", k, "metrics.csv")]
output_pdf = os.path.join(output_folder, "comparison.pdf")
ctx.invoke(compare, label_path=systems, output=output_pdf, verbose=verbose)
logger.info("End comparison, and the experiment - bye.")
logger.info("Ended comparison, and the experiment - bye.")
......@@ -61,7 +61,13 @@ logger = logging.getLogger(__name__)
@click.option(
"--dataset",
"-d",
help="A torch.utils.data.dataset.Dataset instance implementing a dataset to be used for evaluating the model, possibly including all pre-processing pipelines required",
help="A bob.ip.binseg.data.utils.SampleList2TorchDataset instance "
"implementing a dataset to be used for running prediction, possibly "
"including all pre-processing pipelines required or, optionally, a "
"dictionary mapping string keys to "
"bob.ip.binseg.data.utils.SampleList2TorchDataset's. In such a case, "
"all datasets will be used for running prediction. Data augmentation "
"operations are excluded automatically for prediction purposes",
required=True,
cls=ResourceOption,
)
......@@ -108,13 +114,7 @@ def predict(output_folder, model, dataset, batch_size, device, weight,
overlayed, **kwargs):
"""Predicts vessel map (probabilities) on input images"""
# PyTorch dataloader
data_loader = DataLoader(
dataset=dataset,
batch_size=batch_size,
shuffle=False,
pin_memory=torch.cuda.is_available(),
)
dataset = dataset if isinstance(dataset, dict) else dict(test=dataset)
# checkpointer, loads pre-fit model
weight_fullpath = os.path.abspath(weight)
......@@ -128,4 +128,12 @@ def predict(output_folder, model, dataset, batch_size, device, weight,
if overlayed is not None:
overlayed = overlayed.strip()
run(model, data_loader, device, output_folder, overlayed)
for k,v in dataset.items():
with v.not_augmented() as d: # we remove any data augmentation
data_loader = DataLoader(
dataset=d,
batch_size=batch_size,
shuffle=False,
pin_memory=torch.cuda.is_available(),
)
run(model, data_loader, device, output_folder, overlayed)
......@@ -64,8 +64,12 @@ logger = logging.getLogger(__name__)
"--dataset",
"-d",
help="A torch.utils.data.dataset.Dataset instance implementing a dataset "
"to be used for training the model, possibly including all pre-processing"
" pipelines required",
"to be used for training the model, possibly including all pre-processing "
"pipelines required or, optionally, a dictionary mapping string keys to "
"bob.ip.binseg.data.utils.SampleList2TorchDataset's. At least one key "
"named 'train' must be available. This dataset will be used for training "
"the network model. The dataset description include all required "
"pre-processing, including eventual data augmentation",
required=True,
cls=ResourceOption,
)
......@@ -222,7 +226,7 @@ def train(
# PyTorch dataloader
data_loader = DataLoader(
dataset=dataset,
dataset=dataset["train"] if isinstance(dataset, dict) else dataset,
batch_size=batch_size,
shuffle=True,
drop_last=drop_incomplete_batch,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment