Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
mednet
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
medai
software
mednet
Commits
222e8515
Commit
222e8515
authored
1 year ago
by
André Anjos
Browse files
Options
Downloads
Patches
Plain Diff
[tests] Fix testing
parent
67ca29f4
No related branches found
No related tags found
1 merge request
!6
Making use of LightningDataModule and simplification of data loading
Pipeline
#77152
passed
1 year ago
Stage: qa
Stage: test
Stage: doc
Stage: dist
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
tests/test_evaluator.py
+28
-0
28 additions, 0 deletions
tests/test_evaluator.py
tests/test_measures.py
+0
-199
0 additions, 199 deletions
tests/test_measures.py
with
28 additions
and
199 deletions
tests/test_evaluator.py
0 → 100644
+
28
−
0
View file @
222e8515
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""
Tests for measure functions.
"""
import
numpy
def
test_centered_maxf1
():
from
ptbench.engine.evaluator
import
_get_centered_maxf1
# Multiple max F1
f1_scores
=
numpy
.
array
([
0.8
,
0.9
,
1.0
,
1.0
,
1.0
,
0.3
])
thresholds
=
numpy
.
array
([
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
])
maxf1
,
threshold
=
_get_centered_maxf1
(
f1_scores
,
thresholds
)
assert
maxf1
==
1.0
assert
threshold
==
0.5
# Single max F1
f1_scores
=
numpy
.
array
([
0.8
,
0.9
,
1.0
,
0.9
,
0.7
,
0.3
])
thresholds
=
numpy
.
array
([
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
])
maxf1
,
threshold
=
_get_centered_maxf1
(
f1_scores
,
thresholds
)
assert
maxf1
==
1.0
assert
threshold
==
0.4
This diff is collapsed.
Click to expand it.
tests/test_measures.py
deleted
100644 → 0
+
0
−
199
View file @
67ca29f4
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""
Tests for measure functions.
"""
import
random
import
unittest
import
numpy
from
ptbench.utils.measure
import
(
base_measures
,
bayesian_measures
,
beta_credible_region
,
get_centered_maxf1
,
)
def
test_centered_maxf1
():
# Multiple max F1
f1_scores
=
numpy
.
array
([
0.8
,
0.9
,
1.0
,
1.0
,
1.0
,
0.3
])
thresholds
=
numpy
.
array
([
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
])
maxf1
,
threshold
=
get_centered_maxf1
(
f1_scores
,
thresholds
)
assert
maxf1
==
1.0
assert
threshold
==
0.5
# Single max F1
f1_scores
=
numpy
.
array
([
0.8
,
0.9
,
1.0
,
0.9
,
0.7
,
0.3
])
thresholds
=
numpy
.
array
([
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
])
maxf1
,
threshold
=
get_centered_maxf1
(
f1_scores
,
thresholds
)
assert
maxf1
==
1.0
assert
threshold
==
0.4
class
TestFrequentist
(
unittest
.
TestCase
):
"""
Unit test for frequentist base measures.
"""
def
setUp
(
self
):
self
.
tp
=
random
.
randint
(
1
,
100
)
self
.
fp
=
random
.
randint
(
1
,
100
)
self
.
tn
=
random
.
randint
(
1
,
100
)
self
.
fn
=
random
.
randint
(
1
,
100
)
def
test_precision
(
self
):
precision
=
base_measures
(
self
.
tp
,
self
.
fp
,
self
.
tn
,
self
.
fn
)[
0
]
self
.
assertEqual
((
self
.
tp
)
/
(
self
.
tp
+
self
.
fp
),
precision
)
def
test_recall
(
self
):
recall
=
base_measures
(
self
.
tp
,
self
.
fp
,
self
.
tn
,
self
.
fn
)[
1
]
self
.
assertEqual
((
self
.
tp
)
/
(
self
.
tp
+
self
.
fn
),
recall
)
def
test_specificity
(
self
):
specificity
=
base_measures
(
self
.
tp
,
self
.
fp
,
self
.
tn
,
self
.
fn
)[
2
]
self
.
assertEqual
((
self
.
tn
)
/
(
self
.
tn
+
self
.
fp
),
specificity
)
def
test_accuracy
(
self
):
accuracy
=
base_measures
(
self
.
tp
,
self
.
fp
,
self
.
tn
,
self
.
fn
)[
3
]
self
.
assertEqual
(
(
self
.
tp
+
self
.
tn
)
/
(
self
.
tp
+
self
.
tn
+
self
.
fp
+
self
.
fn
),
accuracy
,
)
def
test_jaccard
(
self
):
jaccard
=
base_measures
(
self
.
tp
,
self
.
fp
,
self
.
tn
,
self
.
fn
)[
4
]
self
.
assertEqual
(
self
.
tp
/
(
self
.
tp
+
self
.
fp
+
self
.
fn
),
jaccard
)
def
test_f1
(
self
):
p
,
r
,
s
,
a
,
j
,
f1
=
base_measures
(
self
.
tp
,
self
.
fp
,
self
.
tn
,
self
.
fn
)
self
.
assertEqual
(
(
2.0
*
self
.
tp
)
/
(
2.0
*
self
.
tp
+
self
.
fp
+
self
.
fn
),
f1
)
self
.
assertAlmostEqual
((
2
*
p
*
r
)
/
(
p
+
r
),
f1
)
# base definition
class
TestBayesian
:
"""
Unit test for bayesian base measures.
"""
def
mean
(
self
,
k
,
lk
,
lambda_
):
return
(
k
+
lambda_
)
/
(
k
+
lk
+
2
*
lambda_
)
def
mode1
(
self
,
k
,
lk
,
lambda_
):
# (k+lambda_), (l+lambda_) > 1
return
(
k
+
lambda_
-
1
)
/
(
k
+
lk
+
2
*
lambda_
-
2
)
def
test_beta_credible_region_base
(
self
):
k
=
40
lk
=
10
lambda_
=
0.5
cover
=
0.95
got
=
beta_credible_region
(
k
,
lk
,
lambda_
,
cover
)
# mean, mode, lower, upper
exp
=
(
self
.
mean
(
k
,
lk
,
lambda_
),
self
.
mode1
(
k
,
lk
,
lambda_
),
0.6741731038857685
,
0.8922659692341358
,
)
assert
numpy
.
isclose
(
got
,
exp
).
all
(),
f
"
{
got
}
<>
{
exp
}
"
def
test_beta_credible_region_small_k
(
self
):
k
=
4
lk
=
1
lambda_
=
0.5
cover
=
0.95
got
=
beta_credible_region
(
k
,
lk
,
lambda_
,
cover
)
# mean, mode, lower, upper
exp
=
(
self
.
mean
(
k
,
lk
,
lambda_
),
self
.
mode1
(
k
,
lk
,
lambda_
),
0.37137359936800574
,
0.9774872340008449
,
)
assert
numpy
.
isclose
(
got
,
exp
).
all
(),
f
"
{
got
}
<>
{
exp
}
"
def
test_beta_credible_region_precision_jeffrey
(
self
):
# simulation of situation for precision TP == FP == 0, Jeffrey's prior
k
=
0
lk
=
0
lambda_
=
0.5
cover
=
0.95
got
=
beta_credible_region
(
k
,
lk
,
lambda_
,
cover
)
# mean, mode, lower, upper
exp
=
(
self
.
mean
(
k
,
lk
,
lambda_
),
0.0
,
0.0015413331334360135
,
0.998458666866564
,
)
assert
numpy
.
isclose
(
got
,
exp
).
all
(),
f
"
{
got
}
<>
{
exp
}
"
def
test_beta_credible_region_precision_flat
(
self
):
# simulation of situation for precision TP == FP == 0, flat prior
k
=
0
lk
=
0
lambda_
=
1.0
cover
=
0.95
got
=
beta_credible_region
(
k
,
lk
,
lambda_
,
cover
)
# mean, mode, lower, upper
exp
=
(
self
.
mean
(
k
,
lk
,
lambda_
),
0.0
,
0.025000000000000022
,
0.975
)
assert
numpy
.
isclose
(
got
,
exp
).
all
(),
f
"
{
got
}
<>
{
exp
}
"
def
test_bayesian_measures
(
self
):
tp
=
random
.
randint
(
100000
,
1000000
)
fp
=
random
.
randint
(
100000
,
1000000
)
tn
=
random
.
randint
(
100000
,
1000000
)
fn
=
random
.
randint
(
100000
,
1000000
)
_prec
,
_rec
,
_spec
,
_acc
,
_jac
,
_f1
=
base_measures
(
tp
,
fp
,
tn
,
fn
)
prec
,
rec
,
spec
,
acc
,
jac
,
f1
=
bayesian_measures
(
tp
,
fp
,
tn
,
fn
,
0.5
,
0.95
)
# Notice that for very large k and l, the base frequentist measures
# should be approximately the same as the bayesian mean and mode
# extracted from the beta posterior. We test that here.
assert
numpy
.
isclose
(
_prec
,
prec
[
0
]
),
f
"
freq:
{
_prec
}
<> bays:
{
prec
[
0
]
}
"
assert
numpy
.
isclose
(
_prec
,
prec
[
1
]
),
f
"
freq:
{
_prec
}
<> bays:
{
prec
[
1
]
}
"
assert
numpy
.
isclose
(
_rec
,
rec
[
0
]),
f
"
freq:
{
_rec
}
<> bays:
{
rec
[
0
]
}
"
assert
numpy
.
isclose
(
_rec
,
rec
[
1
]),
f
"
freq:
{
_rec
}
<> bays:
{
rec
[
1
]
}
"
assert
numpy
.
isclose
(
_spec
,
spec
[
0
]
),
f
"
freq:
{
_spec
}
<> bays:
{
spec
[
0
]
}
"
assert
numpy
.
isclose
(
_spec
,
spec
[
1
]
),
f
"
freq:
{
_spec
}
<> bays:
{
spec
[
1
]
}
"
assert
numpy
.
isclose
(
_acc
,
acc
[
0
]),
f
"
freq:
{
_acc
}
<> bays:
{
acc
[
0
]
}
"
assert
numpy
.
isclose
(
_acc
,
acc
[
1
]),
f
"
freq:
{
_acc
}
<> bays:
{
acc
[
1
]
}
"
assert
numpy
.
isclose
(
_jac
,
jac
[
0
]),
f
"
freq:
{
_jac
}
<> bays:
{
jac
[
0
]
}
"
assert
numpy
.
isclose
(
_jac
,
jac
[
1
]),
f
"
freq:
{
_jac
}
<> bays:
{
jac
[
1
]
}
"
assert
numpy
.
isclose
(
_f1
,
f1
[
0
]),
f
"
freq:
{
_f1
}
<> bays:
{
f1
[
0
]
}
"
assert
numpy
.
isclose
(
_f1
,
f1
[
1
]),
f
"
freq:
{
_f1
}
<> bays:
{
f1
[
1
]
}
"
# We also test that the interval in question includes the mode and the
# mean in this case.
assert
(
prec
[
2
]
<
prec
[
1
])
and
(
prec
[
1
]
<
prec
[
3
]
),
f
"
precision is out of bounds
{
_prec
[
2
]
}
<
{
_prec
[
1
]
}
<
{
_prec
[
3
]
}
"
assert
(
rec
[
2
]
<
rec
[
1
])
and
(
rec
[
1
]
<
rec
[
3
]
),
f
"
recall is out of bounds
{
_rec
[
2
]
}
<
{
_rec
[
1
]
}
<
{
_rec
[
3
]
}
"
assert
(
spec
[
2
]
<
spec
[
1
])
and
(
spec
[
1
]
<
spec
[
3
]
),
f
"
specif. is out of bounds
{
_spec
[
2
]
}
<
{
_spec
[
1
]
}
<
{
_spec
[
3
]
}
"
assert
(
acc
[
2
]
<
acc
[
1
])
and
(
acc
[
1
]
<
acc
[
3
]
),
f
"
accuracy is out of bounds
{
_acc
[
2
]
}
<
{
_acc
[
1
]
}
<
{
_acc
[
3
]
}
"
assert
(
jac
[
2
]
<
jac
[
1
])
and
(
jac
[
1
]
<
jac
[
3
]
),
f
"
jaccard is out of bounds
{
_jac
[
2
]
}
<
{
_jac
[
1
]
}
<
{
_jac
[
3
]
}
"
assert
(
f1
[
2
]
<
f1
[
1
])
and
(
f1
[
1
]
<
f1
[
3
]
),
f
"
f1-score is out of bounds
{
_f1
[
2
]
}
<
{
_f1
[
1
]
}
<
{
_f1
[
3
]
}
"
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment