From 24957595c57aad4f046c30312a346849916a4067 Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Mon, 20 Jul 2020 17:31:34 +0200
Subject: [PATCH] [engine.significance] Improve result tabulation

---
 bob/ip/binseg/engine/significance.py | 96 +++++++++++++++++-----------
 1 file changed, 60 insertions(+), 36 deletions(-)

diff --git a/bob/ip/binseg/engine/significance.py b/bob/ip/binseg/engine/significance.py
index bf663b88..22c63c51 100644
--- a/bob/ip/binseg/engine/significance.py
+++ b/bob/ip/binseg/engine/significance.py
@@ -3,6 +3,7 @@
 
 import os
 import itertools
+import textwrap
 import multiprocessing
 
 import h5py
@@ -11,6 +12,7 @@ import numpy
 import pandas
 import torch.nn
 import scipy.stats
+import tabulate
 
 from .evaluator import _sample_measures_for_threshold
 
@@ -83,9 +85,10 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure):
     xlen = ((final_size[1] - patch_size[1]) // patch_stride[1]) + 1
 
     # calculates the stacked performance
-    layers = int(numpy.ceil(patch_size[0] / patch_stride[0]) * numpy.ceil(
-        patch_size[1] / patch_stride[1]
-    ))
+    layers = int(
+        numpy.ceil(patch_size[0] / patch_stride[0])
+        * numpy.ceil(patch_size[1] / patch_stride[1])
+    )
     perf = numpy.zeros(
         [layers] + final_size, dtype=patch_perf[figure].iloc[0].dtype
     )
@@ -106,7 +109,7 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure):
                 range(xup.start, xup.stop, xup.step),
                 indexing="ij",
             )
-            perf[nup.flat, yr.flat, xr.flat] = col[(j*xlen)+i]
+            perf[nup.flat, yr.flat, xr.flat] = col[(j * xlen) + i]
 
     # for each element in the ``perf``matrix, calculates avg and std.
     n += 1  # adjust for starting at -1 before
@@ -756,49 +759,70 @@ def write_analysis_text(names, da, db, f):
     """
 
     diff = da - db
-    f.write("#Samples/Median/Avg/Std.Dev./Normality Conf. F1-scores:\n")
-    f.write(
-        f"* {names[0]}: {len(da)}"
-        f" / {numpy.median(da):.3f}"
-        f" / {numpy.mean(da):.3f}"
-        f" / {numpy.std(da, ddof=1):.3f}\n"
-    )
-    f.write(
-        f"* {names[1]}: {len(db)}"
-        f" / {numpy.median(db):.3f}"
-        f" / {numpy.mean(db):.3f}"
-        f" / {numpy.std(db, ddof=1):.3f}\n"
-    )
-    f.write(
-        f"* {names[0]}-{names[1]}: {len(diff)}"
-        f" / {numpy.median(diff):.3f}"
-        f" / {numpy.mean(diff):.3f}"
-        f" / {numpy.std(diff, ddof=1):.3f}"
-        f" / gaussian? p={scipy.stats.normaltest(diff)[1]:.3f}\n"
-    )
-
+    f.write("Basic statistics from distributions:\n")
+
+    headers = [
+        "system",
+        "samples",
+        "median",
+        "average",
+        "std.dev.",
+        "normaltest (p)",
+    ]
+    table = [
+        [
+            names[0],
+            len(da),
+            numpy.median(da),
+            numpy.mean(da),
+            numpy.std(da, ddof=1),
+            scipy.stats.normaltest(da)[1],
+        ],
+        [
+            names[1],
+            len(db),
+            numpy.median(db),
+            numpy.mean(db),
+            numpy.std(db, ddof=1),
+            scipy.stats.normaltest(db)[1],
+        ],
+        [
+            "differences",
+            len(diff),
+            numpy.median(diff),
+            numpy.mean(diff),
+            numpy.std(diff, ddof=1),
+            scipy.stats.normaltest(diff)[1],
+        ],
+    ]
+    tdata = tabulate.tabulate(table, headers, tablefmt="rst", floatfmt=".3f")
+    f.write(textwrap.indent(tdata, "  "))
+    f.write("\n")
+
+    # Note: dependent variable = patch performance figure in our case
+    # Assumptions of a Paired T-test:
+    # * The dependent variable must be continuous (interval/ratio). [OK]
+    # * The observations are independent of one another. [OK]
+    # * The dependent variable should be approximately normally distributed. [!!!]
+    # * The dependent variable should not contain any outliers. [OK]
+
+    f.write("\nPaired Significance Tests:\n")
     w, p = scipy.stats.ttest_rel(da, db)
-    f.write(
-        f"Paired T-test (is the difference zero?): S = {w:g}, p = {p:.5f}\n"
-    )
-
-    w, p = scipy.stats.ttest_ind(da, db, equal_var=False)
-    f.write(f"Ind. T-test (is the difference zero?): S = {w:g}, p = {p:.5f}\n")
+    f.write(f"  * Paired T (H0: same distro): S = {w:g}, p = {p:.5f}\n")
 
     w, p = scipy.stats.wilcoxon(diff)
-    f.write(
-        f"Wilcoxon test (is the difference zero?): W = {w:g}, p = {p:.5f}\n"
-    )
+    f.write("  * Wilcoxon:\n")
+    f.write(f"    * H0 = same distro: W = {w:g}, p = {p:.5f}\n")
 
     w, p = scipy.stats.wilcoxon(diff, alternative="greater")
     f.write(
-        f"Wilcoxon test (md({names[0]}) < md({names[1]})?): "
+        f"    * H0 = med({names[0]}) < med({names[1]}): "
         f"W = {w:g}, p = {p:.5f}\n"
     )
 
     w, p = scipy.stats.wilcoxon(diff, alternative="less")
     f.write(
-        f"Wilcoxon test (md({names[0]}) > md({names[1]})?): "
+        f"    * H0 = med({names[0]}) > med({names[1]}): "
         f"W = {w:g}, p = {p:.5f}\n"
     )
 
-- 
GitLab