From 0b88d486322391d4f0cd4b9c3129936196d8adc2 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Mon, 16 Mar 2020 17:43:24 +0100 Subject: [PATCH] Created functions to break down a sampleset in order to use the estimator.fit properly and a function that transform samplesets --- bob/pipelines/sample.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/bob/pipelines/sample.py b/bob/pipelines/sample.py index b5da013..2fffa92 100644 --- a/bob/pipelines/sample.py +++ b/bob/pipelines/sample.py @@ -1,6 +1,45 @@ """Base definition of sample""" +def samplesets_to_samples(samplesets): + """ + Given a list of :py:class:`SampleSet` break them in to a list of :py:class:`Sample` with its + corresponding key + + This is supposed to fit the :py:meth:`sklearn.estimator.BaseEstimator.fit` where X and y are the inputs + Check here https://scikit-learn.org/stable/developers/develop.html for more info + + Parameters + ---------- + samplesets: list + List of :py:class:`SampleSet + + + Return + ------ + X and y used in :py:meth:`sklearn.estimator.BaseEstimator.fit` + + """ + + # TODO: Is there a way to make this operation more efficient? numpy.arrays? + X = [] + y= [] + + for s in samplesets: + X += s.samples + y += [s.key] + + return X, y + + +def transform_sample_sets(transformer, sample_sets): + return [ + SampleSet(transformer.transform(sset.samples), parent=sset) + for sset in sample_sets + ] + + + def _copy_attributes(s, d): """Copies attributes from a dictionary to self """ -- GitLab