From c60a6b1699a6bb5c437ed2846f3f718b3a38c098 Mon Sep 17 00:00:00 2001 From: Yannick DAYER <yannick.dayer@idiap.ch> Date: Wed, 1 May 2024 15:52:33 +0200 Subject: [PATCH] fix(scores): pandas to read score column as float. When loading a score file, indicate to pandas to interpret the score column as float (prevent issues when the first elements are empty). --- src/bob/bio/base/score/load.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/bob/bio/base/score/load.py b/src/bob/bio/base/score/load.py index 3ce39f3..60b9fe6 100644 --- a/src/bob/bio/base/score/load.py +++ b/src/bob/bio/base/score/load.py @@ -9,6 +9,7 @@ import logging import os import tarfile +from collections import defaultdict from pathlib import Path import dask.dataframe @@ -162,7 +163,9 @@ def get_split_dataframe(filename): :ref:`bob.bio.base.pipeline_simple_advanced_features`) """ - df = dask.dataframe.read_csv(filename) + df = dask.dataframe.read_csv( + filename, dtype=defaultdict(lambda: str, {"score": float}) + ) genuines = df[df.probe_subject_id == df.bio_ref_subject_id] impostors = df[df.probe_subject_id != df.bio_ref_subject_id] @@ -195,7 +198,9 @@ def split_csv_scores(filename, score_column: str = "score"): :ref:`bob.bio.base.pipeline_simple_advanced_features`) """ - df = dask.dataframe.read_csv(filename) + df = dask.dataframe.read_csv( + filename, dtype=defaultdict(lambda: str, {"score": float}) + ) genuines = df[df.probe_subject_id == df.bio_ref_subject_id] impostors = df[df.probe_subject_id != df.bio_ref_subject_id] -- GitLab