Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
bob.measure
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
2
Issues
2
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
bob
bob.measure
Commits
0c19f066
Commit
0c19f066
authored
Oct 27, 2016
by
Manuel Günther
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Rewrote split_four/five_column to use four/five_column; reimplemented four/five_column
parent
7ed0fdca
Pipeline
#5135
failed with stages
in 5 minutes and 50 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
86 deletions
+25
-86
bob/measure/load.py
bob/measure/load.py
+25
-86
No files found.
bob/measure/load.py
View file @
0c19f066
...
...
@@ -79,7 +79,7 @@ def four_column(filename):
opened with :py:func:`open_file` containing the scores.
Return
s:
Yield
s:
str: The claimed identity -- the client name of the model that was used in
the comparison
...
...
@@ -93,18 +93,10 @@ def four_column(filename):
"""
for
i
,
l
in
enumerate
(
open_file
(
filename
)):
if
isinstance
(
l
,
bytes
):
l
=
l
.
decode
(
'utf-8'
)
s
=
l
.
strip
()
if
len
(
s
)
==
0
or
s
[
0
]
==
'#'
:
continue
#empty or comment
field
=
[
k
.
strip
()
for
k
in
s
.
split
()]
if
len
(
field
)
<
4
:
raise
SyntaxError
(
'Line %d of file "%s" is invalid: %s'
%
(
i
,
filename
,
l
))
try
:
score
=
float
(
field
[
3
])
except
:
raise
SyntaxError
(
'Cannot convert score to float at line %d of file "%s": %s'
%
(
i
,
filename
,
l
))
yield
(
field
[
0
],
field
[
1
],
field
[
2
],
score
)
reader
=
csv
.
reader
(
open_file
(
filename
,
mode
=
'rb'
),
delimiter
=
' '
)
for
splits
in
reader
:
splits
[
-
1
]
=
float
(
splits
[
-
1
])
yield
splits
def
split_four_column
(
filename
):
...
...
@@ -136,8 +128,8 @@ def split_four_column(filename):
"""
score_lines
=
load_score_with_generator
(
filename
,
4
)
return
get_negatives_positives_from_generator
(
score_lines
)
score_lines
=
four_column
(
filename
)
return
_split_scores
(
score_lines
,
1
)
def
cmc_four_column
(
filename
):
...
...
@@ -206,7 +198,7 @@ def five_column(filename):
opened with :py:func:`open_file` containing the scores.
Return
s:
Yield
s:
str: The claimed identity -- the client name of the model that was used in
the comparison
...
...
@@ -222,18 +214,10 @@ def five_column(filename):
"""
for
i
,
l
in
enumerate
(
open_file
(
filename
)):
if
isinstance
(
l
,
bytes
):
l
=
l
.
decode
(
'utf-8'
)
s
=
l
.
strip
()
if
len
(
s
)
==
0
or
s
[
0
]
==
'#'
:
continue
#empty or comment
field
=
[
k
.
strip
()
for
k
in
s
.
split
()]
if
len
(
field
)
<
5
:
raise
SyntaxError
(
'Line %d of file "%s" is invalid: %s'
%
(
i
,
filename
,
l
))
try
:
score
=
float
(
field
[
4
])
except
:
raise
SyntaxError
(
'Cannot convert score to float at line %d of file "%s": %s'
%
(
i
,
filename
,
l
))
yield
(
field
[
0
],
field
[
1
],
field
[
2
],
field
[
3
],
score
)
reader
=
csv
.
reader
(
open_file
(
filename
,
mode
=
'rb'
),
delimiter
=
' '
)
for
splits
in
reader
:
splits
[
-
1
]
=
float
(
splits
[
-
1
])
yield
splits
def
split_five_column
(
filename
):
...
...
@@ -265,8 +249,8 @@ def split_five_column(filename):
"""
score_lines
=
load_score_with_generator
(
filename
,
5
)
return
get_negatives_positives_from_generator
(
score_lines
)
score_lines
=
four_column
(
filename
)
return
_split_scores
(
score_lines
,
2
)
def
cmc_five_column
(
filename
):
...
...
@@ -313,62 +297,6 @@ def cmc_five_column(filename):
return
_convert_cmc_scores
(
neg_dict
,
pos_dict
)
COLUMNS
=
{
4
:
(
'claimed_id'
,
'real_id'
,
'test_label'
,
'score'
),
5
:
(
'claimed_id'
,
'model_label'
,
'real_id'
,
'test_label'
,
'score'
)
}
def
load_score_with_generator
(
filename
,
ncolumns
=
None
):
"""Load scores using :py:class:`csv.DictReader` and yield the scores line by line in a dictionary.
Parameters:
filename (:py:class:`str`, ``file-like``): The file object that will be
opened with :py:func:`open_file` containing the scores.
ncolumns (:py:class:`int`, optional): 4, 5 or None (the default),
specifying the number of columns in the score file. If None is provided,
the number of columns will be guessed.
Yields:
line: A dictionary which contains not only the actual ``score`` but also the
``claimed_id``, ``real_id``, ``test_label`` (and ``['model_label']``)
"""
if
ncolumns
is
None
:
f
=
open_file
(
filename
)
try
:
line
=
f
.
readline
()
ncolumns
=
len
(
line
.
split
())
except
Exception
:
logger
.
warn
(
'Could not guess the number of columns in file: {}. '
'Assuming 4 column format.'
.
format
(
filename
))
ncolumns
=
4
finally
:
f
.
close
()
elif
ncolumns
not
in
(
4
,
5
):
raise
ValueError
(
"ncolumns of 4 and 5 are supported only."
)
reader
=
csv
.
DictReader
(
open_file
(
filename
,
mode
=
'rb'
),
fieldnames
=
COLUMNS
[
ncolumns
],
delimiter
=
' '
)
for
splits
in
reader
:
splits
[
'score'
]
=
float
(
splits
[
'score'
])
yield
splits
def
get_negatives_positives_from_generator
(
score_lines
):
"""Take the output of :py:func:`load_score_with_generator` and return negatives and positives. This
function aims to replace split_four_column and split_five_column but takes a
different input. It's up to you to use which one.
"""
positives
,
negatives
=
[],
[]
for
line
in
score_lines
:
which
=
positives
if
line
[
'claimed_id'
]
==
line
[
'real_id'
]
else
negatives
which
.
append
(
line
[
'score'
])
return
(
numpy
.
array
(
negatives
),
numpy
.
array
(
positives
))
def
load_score
(
filename
,
ncolumns
=
None
):
"""Load scores using numpy.loadtxt and return the data as a numpy array.
...
...
@@ -482,6 +410,17 @@ def dump_score(filename, score_lines):
numpy
.
savetxt
(
filename
,
score_lines
,
fmt
=
fmt
)
def
_split_scores
(
score_lines
,
real_id_index
,
claimed_id_index
=
0
,
score_index
=
-
1
):
"""Take the output of :py:func:`four_column` or :py:func:`five_column` and return negatives and positives.
"""
positives
,
negatives
=
[],
[]
for
line
in
score_lines
:
which
=
positives
if
line
[
claimed_id_index
]
==
line
[
real_id_index
]
else
negatives
which
.
append
(
line
[
score_index
])
return
(
numpy
.
array
(
negatives
),
numpy
.
array
(
positives
))
def
_convert_cmc_scores
(
neg_dict
,
pos_dict
):
"""Converts the negative and positive scores read with
:py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment