Skip to content
Snippets Groups Projects
Commit 24957595 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[engine.significance] Improve result tabulation

parent e32c1ca8
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import os import os
import itertools import itertools
import textwrap
import multiprocessing import multiprocessing
import h5py import h5py
...@@ -11,6 +12,7 @@ import numpy ...@@ -11,6 +12,7 @@ import numpy
import pandas import pandas
import torch.nn import torch.nn
import scipy.stats import scipy.stats
import tabulate
from .evaluator import _sample_measures_for_threshold from .evaluator import _sample_measures_for_threshold
...@@ -83,9 +85,10 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure): ...@@ -83,9 +85,10 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure):
xlen = ((final_size[1] - patch_size[1]) // patch_stride[1]) + 1 xlen = ((final_size[1] - patch_size[1]) // patch_stride[1]) + 1
# calculates the stacked performance # calculates the stacked performance
layers = int(numpy.ceil(patch_size[0] / patch_stride[0]) * numpy.ceil( layers = int(
patch_size[1] / patch_stride[1] numpy.ceil(patch_size[0] / patch_stride[0])
)) * numpy.ceil(patch_size[1] / patch_stride[1])
)
perf = numpy.zeros( perf = numpy.zeros(
[layers] + final_size, dtype=patch_perf[figure].iloc[0].dtype [layers] + final_size, dtype=patch_perf[figure].iloc[0].dtype
) )
...@@ -106,7 +109,7 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure): ...@@ -106,7 +109,7 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure):
range(xup.start, xup.stop, xup.step), range(xup.start, xup.stop, xup.step),
indexing="ij", indexing="ij",
) )
perf[nup.flat, yr.flat, xr.flat] = col[(j*xlen)+i] perf[nup.flat, yr.flat, xr.flat] = col[(j * xlen) + i]
# for each element in the ``perf``matrix, calculates avg and std. # for each element in the ``perf``matrix, calculates avg and std.
n += 1 # adjust for starting at -1 before n += 1 # adjust for starting at -1 before
...@@ -756,49 +759,70 @@ def write_analysis_text(names, da, db, f): ...@@ -756,49 +759,70 @@ def write_analysis_text(names, da, db, f):
""" """
diff = da - db diff = da - db
f.write("#Samples/Median/Avg/Std.Dev./Normality Conf. F1-scores:\n") f.write("Basic statistics from distributions:\n")
f.write(
f"* {names[0]}: {len(da)}" headers = [
f" / {numpy.median(da):.3f}" "system",
f" / {numpy.mean(da):.3f}" "samples",
f" / {numpy.std(da, ddof=1):.3f}\n" "median",
) "average",
f.write( "std.dev.",
f"* {names[1]}: {len(db)}" "normaltest (p)",
f" / {numpy.median(db):.3f}" ]
f" / {numpy.mean(db):.3f}" table = [
f" / {numpy.std(db, ddof=1):.3f}\n" [
) names[0],
f.write( len(da),
f"* {names[0]}-{names[1]}: {len(diff)}" numpy.median(da),
f" / {numpy.median(diff):.3f}" numpy.mean(da),
f" / {numpy.mean(diff):.3f}" numpy.std(da, ddof=1),
f" / {numpy.std(diff, ddof=1):.3f}" scipy.stats.normaltest(da)[1],
f" / gaussian? p={scipy.stats.normaltest(diff)[1]:.3f}\n" ],
) [
names[1],
len(db),
numpy.median(db),
numpy.mean(db),
numpy.std(db, ddof=1),
scipy.stats.normaltest(db)[1],
],
[
"differences",
len(diff),
numpy.median(diff),
numpy.mean(diff),
numpy.std(diff, ddof=1),
scipy.stats.normaltest(diff)[1],
],
]
tdata = tabulate.tabulate(table, headers, tablefmt="rst", floatfmt=".3f")
f.write(textwrap.indent(tdata, " "))
f.write("\n")
# Note: dependent variable = patch performance figure in our case
# Assumptions of a Paired T-test:
# * The dependent variable must be continuous (interval/ratio). [OK]
# * The observations are independent of one another. [OK]
# * The dependent variable should be approximately normally distributed. [!!!]
# * The dependent variable should not contain any outliers. [OK]
f.write("\nPaired Significance Tests:\n")
w, p = scipy.stats.ttest_rel(da, db) w, p = scipy.stats.ttest_rel(da, db)
f.write( f.write(f" * Paired T (H0: same distro): S = {w:g}, p = {p:.5f}\n")
f"Paired T-test (is the difference zero?): S = {w:g}, p = {p:.5f}\n"
)
w, p = scipy.stats.ttest_ind(da, db, equal_var=False)
f.write(f"Ind. T-test (is the difference zero?): S = {w:g}, p = {p:.5f}\n")
w, p = scipy.stats.wilcoxon(diff) w, p = scipy.stats.wilcoxon(diff)
f.write( f.write(" * Wilcoxon:\n")
f"Wilcoxon test (is the difference zero?): W = {w:g}, p = {p:.5f}\n" f.write(f" * H0 = same distro: W = {w:g}, p = {p:.5f}\n")
)
w, p = scipy.stats.wilcoxon(diff, alternative="greater") w, p = scipy.stats.wilcoxon(diff, alternative="greater")
f.write( f.write(
f"Wilcoxon test (md({names[0]}) < md({names[1]})?): " f" * H0 = med({names[0]}) < med({names[1]}): "
f"W = {w:g}, p = {p:.5f}\n" f"W = {w:g}, p = {p:.5f}\n"
) )
w, p = scipy.stats.wilcoxon(diff, alternative="less") w, p = scipy.stats.wilcoxon(diff, alternative="less")
f.write( f.write(
f"Wilcoxon test (md({names[0]}) > md({names[1]})?): " f" * H0 = med({names[0]}) > med({names[1]}): "
f"W = {w:g}, p = {p:.5f}\n" f"W = {w:g}, p = {p:.5f}\n"
) )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment