Commit 1b84b956 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

These operations should be delayed to avoid larger than memory issues

parent ffa826bb
Pipeline #52141 passed with stage
in 16 minutes and 31 seconds
......@@ -165,7 +165,7 @@ def get_split_dataframe(filename):
genuines = df[df.probe_subject_id == df.bio_ref_subject_id]
impostors = df[df.probe_subject_id != df.bio_ref_subject_id]
return impostors.compute(), genuines.compute()
return impostors, genuines
def split_csv_writer(filename):
