Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.measure
Commits
0c19f066
Commit
0c19f066
authored
Oct 27, 2016
by
Manuel Günther
Browse files
Rewrote split_four/five_column to use four/five_column; reimplemented four/five_column
parent
7ed0fdca
Pipeline
#5135
failed with stages
in 5 minutes and 50 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
bob/measure/load.py
View file @
0c19f066
...
...
@@ -79,7 +79,7 @@ def four_column(filename):
opened with :py:func:`open_file` containing the scores.
Return
s:
Yield
s:
str: The claimed identity -- the client name of the model that was used in
the comparison
...
...
@@ -93,18 +93,10 @@ def four_column(filename):
"""
for
i
,
l
in
enumerate
(
open_file
(
filename
)):
if
isinstance
(
l
,
bytes
):
l
=
l
.
decode
(
'utf-8'
)
s
=
l
.
strip
()
if
len
(
s
)
==
0
or
s
[
0
]
==
'#'
:
continue
#empty or comment
field
=
[
k
.
strip
()
for
k
in
s
.
split
()]
if
len
(
field
)
<
4
:
raise
SyntaxError
(
'Line %d of file "%s" is invalid: %s'
%
(
i
,
filename
,
l
))
try
:
score
=
float
(
field
[
3
])
except
:
raise
SyntaxError
(
'Cannot convert score to float at line %d of file "%s": %s'
%
(
i
,
filename
,
l
))
yield
(
field
[
0
],
field
[
1
],
field
[
2
],
score
)
reader
=
csv
.
reader
(
open_file
(
filename
,
mode
=
'rb'
),
delimiter
=
' '
)
for
splits
in
reader
:
splits
[
-
1
]
=
float
(
splits
[
-
1
])
yield
splits
def
split_four_column
(
filename
):
...
...
@@ -136,8 +128,8 @@ def split_four_column(filename):
"""
score_lines
=
load_score_with_generator
(
filename
,
4
)
return
get_negatives_positives_from_generator
(
score_lines
)
score_lines
=
four_column
(
filename
)
return
_split_scores
(
score_lines
,
1
)
def
cmc_four_column
(
filename
):
...
...
@@ -206,7 +198,7 @@ def five_column(filename):
opened with :py:func:`open_file` containing the scores.
Return
s:
Yield
s:
str: The claimed identity -- the client name of the model that was used in
the comparison
...
...
@@ -222,18 +214,10 @@ def five_column(filename):
"""
for
i
,
l
in
enumerate
(
open_file
(
filename
)):
if
isinstance
(
l
,
bytes
):
l
=
l
.
decode
(
'utf-8'
)
s
=
l
.
strip
()
if
len
(
s
)
==
0
or
s
[
0
]
==
'#'
:
continue
#empty or comment
field
=
[
k
.
strip
()
for
k
in
s
.
split
()]
if
len
(
field
)
<
5
:
raise
SyntaxError
(
'Line %d of file "%s" is invalid: %s'
%
(
i
,
filename
,
l
))
try
:
score
=
float
(
field
[
4
])
except
:
raise
SyntaxError
(
'Cannot convert score to float at line %d of file "%s": %s'
%
(
i
,
filename
,
l
))
yield
(
field
[
0
],
field
[
1
],
field
[
2
],
field
[
3
],
score
)
reader
=
csv
.
reader
(
open_file
(
filename
,
mode
=
'rb'
),
delimiter
=
' '
)
for
splits
in
reader
:
splits
[
-
1
]
=
float
(
splits
[
-
1
])
yield
splits
def
split_five_column
(
filename
):
...
...
@@ -265,8 +249,8 @@ def split_five_column(filename):
"""
score_lines
=
load_score_with_generator
(
filename
,
5
)
return
get_negatives_positives_from_generator
(
score_lines
)
score_lines
=
four_column
(
filename
)
return
_split_scores
(
score_lines
,
2
)
def
cmc_five_column
(
filename
):
...
...
@@ -313,62 +297,6 @@ def cmc_five_column(filename):
return
_convert_cmc_scores
(
neg_dict
,
pos_dict
)
COLUMNS
=
{
4
:
(
'claimed_id'
,
'real_id'
,
'test_label'
,
'score'
),
5
:
(
'claimed_id'
,
'model_label'
,
'real_id'
,
'test_label'
,
'score'
)
}
def
load_score_with_generator
(
filename
,
ncolumns
=
None
):
"""Load scores using :py:class:`csv.DictReader` and yield the scores line by line in a dictionary.
Parameters:
filename (:py:class:`str`, ``file-like``): The file object that will be
opened with :py:func:`open_file` containing the scores.
ncolumns (:py:class:`int`, optional): 4, 5 or None (the default),
specifying the number of columns in the score file. If None is provided,
the number of columns will be guessed.
Yields:
line: A dictionary which contains not only the actual ``score`` but also the
``claimed_id``, ``real_id``, ``test_label`` (and ``['model_label']``)
"""
if
ncolumns
is
None
:
f
=
open_file
(
filename
)
try
:
line
=
f
.
readline
()
ncolumns
=
len
(
line
.
split
())
except
Exception
:
logger
.
warn
(
'Could not guess the number of columns in file: {}. '
'Assuming 4 column format.'
.
format
(
filename
))
ncolumns
=
4
finally
:
f
.
close
()
elif
ncolumns
not
in
(
4
,
5
):
raise
ValueError
(
"ncolumns of 4 and 5 are supported only."
)
reader
=
csv
.
DictReader
(
open_file
(
filename
,
mode
=
'rb'
),
fieldnames
=
COLUMNS
[
ncolumns
],
delimiter
=
' '
)
for
splits
in
reader
:
splits
[
'score'
]
=
float
(
splits
[
'score'
])
yield
splits
def
get_negatives_positives_from_generator
(
score_lines
):
"""Take the output of :py:func:`load_score_with_generator` and return negatives and positives. This
function aims to replace split_four_column and split_five_column but takes a
different input. It's up to you to use which one.
"""
positives
,
negatives
=
[],
[]
for
line
in
score_lines
:
which
=
positives
if
line
[
'claimed_id'
]
==
line
[
'real_id'
]
else
negatives
which
.
append
(
line
[
'score'
])
return
(
numpy
.
array
(
negatives
),
numpy
.
array
(
positives
))
def
load_score
(
filename
,
ncolumns
=
None
):
"""Load scores using numpy.loadtxt and return the data as a numpy array.
...
...
@@ -482,6 +410,17 @@ def dump_score(filename, score_lines):
numpy
.
savetxt
(
filename
,
score_lines
,
fmt
=
fmt
)
def
_split_scores
(
score_lines
,
real_id_index
,
claimed_id_index
=
0
,
score_index
=
-
1
):
"""Take the output of :py:func:`four_column` or :py:func:`five_column` and return negatives and positives.
"""
positives
,
negatives
=
[],
[]
for
line
in
score_lines
:
which
=
positives
if
line
[
claimed_id_index
]
==
line
[
real_id_index
]
else
negatives
which
.
append
(
line
[
score_index
])
return
(
numpy
.
array
(
negatives
),
numpy
.
array
(
positives
))
def
_convert_cmc_scores
(
neg_dict
,
pos_dict
):
"""Converts the negative and positive scores read with
:py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment