From 0b88d486322391d4f0cd4b9c3129936196d8adc2 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Mon, 16 Mar 2020 17:43:24 +0100
Subject: [PATCH] Created functions to break down a sampleset in order to use
 the estimator.fit properly and a function that transform samplesets

---
 bob/pipelines/sample.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/bob/pipelines/sample.py b/bob/pipelines/sample.py
index b5da013..2fffa92 100644
--- a/bob/pipelines/sample.py
+++ b/bob/pipelines/sample.py
@@ -1,6 +1,45 @@
 """Base definition of sample"""
 
 
+def samplesets_to_samples(samplesets):
+    """
+    Given a list of :py:class:`SampleSet` break them in to a list of :py:class:`Sample` with its 
+    corresponding key
+
+    This is supposed to fit the :py:meth:`sklearn.estimator.BaseEstimator.fit` where X and y are the inputs
+    Check here https://scikit-learn.org/stable/developers/develop.html for more info
+
+    Parameters
+    ----------
+      samplesets: list
+         List of :py:class:`SampleSet
+
+
+    Return 
+    ------
+       X and y used in :py:meth:`sklearn.estimator.BaseEstimator.fit`
+
+    """
+
+    # TODO: Is there a way to make this operation more efficient? numpy.arrays?
+    X = []
+    y= []
+
+    for s in samplesets:
+        X += s.samples
+        y += [s.key]
+
+    return X, y
+
+
+def transform_sample_sets(transformer, sample_sets):
+    return [
+        SampleSet(transformer.transform(sset.samples), parent=sset)
+        for sset in sample_sets
+    ]
+
+
+
 def _copy_attributes(s, d):
     """Copies attributes from a dictionary to self
     """
-- 
GitLab