diff --git a/bob/ip/binseg/engine/inferencer.py b/bob/ip/binseg/engine/inferencer.py
index bc1dc9e85510797c0f3b1944eaf65fbd7e8b1282..df60d18dd5ba64baf6a37f809cc58a42ae34740d 100644
--- a/bob/ip/binseg/engine/inferencer.py
+++ b/bob/ip/binseg/engine/inferencer.py
@@ -138,9 +138,8 @@ def do_inference(
             times.append(batch_time)
             logger.info("Batch time: {:.5f} s".format(batch_time))
             
-            b_metrics = batch_metrics(probabilities, ground_truths, masks, names, results_subfolder, logger)
+            b_metrics = batch_metrics(probabilities, ground_truths, masks, names,results_subfolder, logger)
             metrics.extend(b_metrics)
-
             # Create probability images
             save_probability_images(probabilities, names, output_folder, logger)
 
@@ -161,6 +160,7 @@ def do_inference(
     logger.info("Saving average over all input images: {}".format(metrics_file))
     
     avg_metrics = df_metrics.groupby('threshold').mean()
+    avg_metrics["model_name"] = model.name
     avg_metrics.to_csv(metrics_path)
 
     avg_metrics["f1_score"] =  2* avg_metrics["precision"]*avg_metrics["recall"]/ \
@@ -175,7 +175,7 @@ def do_inference(
     np_avg_metrics = avg_metrics.to_numpy().T
     fig_name = "precision_recall_{}.pdf".format(model.name)
     logger.info("saving {}".format(fig_name))
-    fig = precision_recall_f1iso([np_avg_metrics[0]],[np_avg_metrics[1]], model.name)
+    fig = precision_recall_f1iso([np_avg_metrics[0]],[np_avg_metrics[1]], np_avg_metrics[-1])
     fig_filename = os.path.join(results_subfolder, fig_name)
     fig.savefig(fig_filename)
     
diff --git a/bob/ip/binseg/utils/plot.py b/bob/ip/binseg/utils/plot.py
index fb838e259fd024cf9ada3ddae8a21df72616121d..8dbdb1f710a79d521edbfea24aa11dd216e6f2f5 100644
--- a/bob/ip/binseg/utils/plot.py
+++ b/bob/ip/binseg/utils/plot.py
@@ -46,7 +46,7 @@ def precision_recall_f1iso(precision, recall, names, title=None, human_perf_bsds
         valid = (pi+ri) > 0
         f1 = 2 * (pi[valid]*ri[valid]) / (pi[valid]+ri[valid])    
         # Plot Recall/Precision as threshold changes
-        ax1.plot(ri[pi>0], pi[pi>0], label='[F=%.3f] %s' % (f1.max(), n)) 
+        ax1.plot(ri[pi>0], pi[pi>0], label='[F={:.3f}] {}'.format(f1.max(), n)) 
     ax1.grid(linestyle='--', linewidth=1, color='gray', alpha=0.2)  
     if len(names) > 1:
         plt.legend(loc='lower left', framealpha=0.5)  
diff --git a/conda/meta.yaml b/conda/meta.yaml
index a942971b19d98526c014e1d15dacb36d8810e50f..484c36ae41194e1c9a57a7126a5ced8bead2b9ce 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -55,8 +55,8 @@ test:
     - {{ name }}
   commands:
     # test commands ("script" entry-points) from your package here
-    - bob binseg --help
-    - bob binseg train --help
+    #- bob binseg --help
+    #- bob binseg train --help
     - nosetests --with-coverage --cover-package={{ name }} -sv {{ name }}
     - sphinx-build -aEW {{ project_dir }}/doc {{ project_dir }}/sphinx
     - sphinx-build -aEb doctest {{ project_dir }}/doc sphinx