Commit eb286c33 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira

Merge branch 'frame-container-load-subset' into 'master'

Improve FrameContainer and FrameSelector to be more memory efficient

See merge request !40
parents fe242495 b014e4a5
Pipeline #36898 passed with stages
in 12 minutes and 43 seconds
......@@ -9,7 +9,7 @@ class Base(
frame_selector : :any:``
A frame selector class to define, which frames of the video to use.
read_original_data : callable
read_original_data : ``callable``
A function with the signature of
``data = read_original_data(biofile, directory, extension)``
that will be used to load the data from biofiles. By default the
......@@ -28,7 +28,7 @@ class FailSafeVideo(Base):
The maximum number of frames that an annotation is valid for next frames.
This value should be positive. If you want to set max_age to infinite,
then you can use the :any:`` instead.
validator : callable
validator : ``callable``
A function that takes the annotations of a frame and validates it.
......@@ -10,7 +10,7 @@ def normalize_annotations(annotations, validator, max_age=-1):
strings (starting from 0). The inside dicts contain annotations for that
frame. The dictionary needs to be an ordered dict in order for this to
validator : callable
validator : ``callable``
Takes a dict (annotations) and returns True if the annotations are valid.
This can be a check based on minimal face size for example: see
......@@ -41,6 +41,12 @@ def test_frame_container():
# test as_array method
assert numpy.allclose(read.as_array(), test_data)
# check loading only a part of the hdf5
with as f:
# load only a subset of the FrameContainer
fc =, selection_style="spread", max_number_of_frames=10)
assert len(fc) == 10, len(fc)
if os.path.exists(filename):
......@@ -113,3 +119,9 @@ def test_frame_selector():
assert frames[1][0] == '6'
assert numpy.allclose(frames[1][1], video_data[6])
assert frames[1][2] is None
# test support
path ="testvideo.avi", __name__)
fs ="spread", max_number_of_frames=20)
fc = fs( # only loads 20 frames into memory
assert len(fc) == 20, len(fc)
......@@ -3,15 +3,68 @@
import numpy
import logging
logger = logging.getLogger("")
logger = logging.getLogger(__name__)
def select_frames(count, max_number_of_frames, selection_style, step_size):
"""Returns indices of the frames to be selected given the parameters.
Different selection styles are supported:
* first : The first frames are selected
* spread : Frames are selected to be taken from the whole video with equal spaces in
* step : Frames are selected every ``step_size`` indices, starting at
``step_size/2`` **Think twice if you want to have that when giving FrameContainer
* all : All frames are selected unconditionally.
count : int
Total number of frames that are available
max_number_of_frames : int
The maximum number of frames to be selected. Ignored when selection_style is
selection_style : str
One of (``first``, ``spread``, ``step``, ``all``). See above.
step_size : int
Only useful when ``selection_style`` is ``step``.
A range of frames to be selected.
If ``selection_style`` is not one of the supported ones.
if selection_style == "first":
# get the first frames (limited by all frames)
indices = range(0, min(count, max_number_of_frames))
elif selection_style == "spread":
# get frames lineraly spread over all frames
indices =, max_number_of_frames)
elif selection_style == "step":
indices = range(step_size // 2, count, step_size)[:max_number_of_frames]
elif selection_style == "all":
indices = range(0, count)
raise ValueError(f"Invalid selection style: {selection_style}")
return indices
class FrameContainer:
"""A class for reading, manipulating and saving video content.
def __init__(self, hdf5 = None, load_function =
def __init__(self, hdf5=None,, **kwargs):
self._frames = []
if hdf5 is not None:
self.load(hdf5, load_function)
......@@ -29,37 +82,87 @@ class FrameContainer:
"""Indexer (mostly used in tests)."""
return self._frames[i]
def add(self, frame_id, frame, quality = None):
def add(self, frame_id, frame, quality=None):
"""Adds the frame with the given id and the given quality."""
self._frames.append((str(frame_id), frame, quality))
def load(self, hdf5, load_function =
def load(
"""Loads a previously saved FrameContainer into the current FrameContainer.
hdf5 : :any:``
An opened HDF5 file to load the data form
load_function : ``callable``, ``optional``
the function to be used on the hdf5 object to load each frame
selection_style : str, ``optional``
See :any:`select_frames`
max_number_of_frames : int, ``optional``
See :any:`select_frames`
step_size : int, ``optional``
See :any:`select_frames`
returns itself.
If no frames can be loaded from the hdf5 file.
If the selection_style is all and you are trying to load an old format
self._frames = []
if hdf5.has_group("FrameIndexes"):"FrameIndexes")
indices = sorted(int(i) for i in hdf5.keys(relative=True))
indices = select_frames(
frame_ids = [hdf5[str(i)] for i in indices]"..")
if selection_style != "all":
raise ValueError(
"selection_style must be all when loading FrameContainers with "
"the old format. Try re-writing the FrameContainers again "
"to avoid this."
frame_ids = hdf5.sub_groups(relative=True, recursive=False)
# Read content (frames) from HDF5File
for path in frame_ids:
# extract frame_id
if path[:6] == 'Frame_':
if path[:6] == "Frame_":
frame_id = str(path[6:])
# Read data
data = load_function(hdf5)
# read quality, if present
quality ="FrameQuality") if hdf5.has_key("FrameQuality") else None
quality = hdf5["FrameQuality"] if "FrameQuality" in hdf5 else None
self.add(frame_id, data, quality)"..")
if not len(self):
raise IOError("Could not load data as a Frame Container from file %s" % hdf5.filename)
raise IOError(
"Could not load data as a Frame Container from file %s" % hdf5.filename
return self
def save(self, hdf5, save_function =
def save(self, hdf5,
""" Save the content to the given HDF5 File.
The contained data will be written using the given save_function."""
if not len(self):
......@@ -81,11 +184,15 @@ class FrameContainer:"..")
def is_similar_to(self, other):
if len(self) != len(other): return False
for a,b in zip(self, other):
if a[0] != b[0]: return False
if abs(a[2] - b[2]) > 1e-8: return False
if not numpy.allclose(a[1], b[1]): return False
if len(self) != len(other):
return False
for a, b in zip(self, other):
if a[0] != b[0]:
return False
if abs(a[2] - b[2]) > 1e-8:
return False
if not numpy.allclose(a[1], b[1]):
return False
return True
def as_array(self):
......@@ -97,21 +204,25 @@ class FrameContainer:
The frames are returned as an array with the shape of (n_frames, ...)
like a video.
def _reader(frame):
# Each frame is assumed to be an image here. We make it a single frame
# video here by expanding its dimensions. This way it can be used with
# the vstack_features function.
return frame[1][None, ...]
return, self._frames, same_size=True)
def save_compressed(frame_container, filename, save_function, create_link=True):
hdf5 =, 'w')
hdf5 =, "w"), save_function), hdf5, create_link=create_link)
del hdf5
def load_compressed(filename, load_function):
hdf5 =, 'r')
hdf5 =, "r")
fc = FrameContainer(hdf5, load_function), hdf5)
del hdf5
......@@ -10,9 +10,11 @@ import os
import six
import logging
logger = logging.getLogger("")
from .FrameContainer import FrameContainer
logger = logging.getLogger(__name__)
from .FrameContainer import FrameContainer, select_frames
class FrameSelector:
"""A class for selecting frames from videos.
......@@ -27,18 +29,17 @@ class FrameSelector:
* quality (only valid for FrameContainer data) : Select the frames based on the highest internally stored quality value
def __init__(self,
max_number_of_frames = 20,
selection_style = "spread",
step_size = 10
if selection_style not in ('first', 'spread', 'step', 'all'):
raise ValueError("Unknown selection style '%s', choose one of ('first', 'spread', 'step', 'all')" % selection_style)
def __init__(self, max_number_of_frames=20, selection_style="spread", step_size=10):
if selection_style not in ("first", "spread", "step", "all"):
raise ValueError(
"Unknown selection style '%s', choose one of ('first', 'spread', 'step', 'all')"
% selection_style
self.selection = selection_style
self.max_frames = max_number_of_frames
self.step = step_size
def __call__(self, data, load_function =
def __call__(self, data,
"""Selects frames and returns them in a FrameContainer.
Different ``data`` parameters are accepted:
......@@ -46,6 +47,7 @@ class FrameSelector:
* ``str`` : A video file to read and select frames from
* ``[str]`` : A list of image names to select from
* ``numpy.array`` (3D or 4D): A video to select frames from
* ```` : An instance of
When giving ``str`` or ``[str]`` data, the given ``load_function`` is used to read the data from file.
......@@ -55,17 +57,17 @@ class FrameSelector:
data = load_function(data)
# first, get the indices
if isinstance(data,
count = data.number_of_frames
count = len(data)
if self.selection == 'first':
# get the first frames (limited by all frames)
indices = range(0, min(count, self.max_frames))
elif self.selection == 'spread':
# get frames lineraly spread over all frames
indices =, self.max_frames)
elif self.selection == 'step':
indices = range(self.step//2, count, self.step)[:self.max_frames]
elif self.selection == 'all':
indices = range(0, count)
indices = select_frames(
# now, iterate through the data
fc = FrameContainer()
......@@ -75,6 +77,10 @@ class FrameSelector:
for i, frame in enumerate(data):
if i in indices:
elif isinstance(data,
for i, frame in enumerate(data):
if i in indices:
fc.add(i, frame)
elif isinstance(data, numpy.ndarray):
# select video frames
for i in indices:
......@@ -90,4 +96,7 @@ class FrameSelector:
def __str__(self):
"""Writes the parameters of the FrameSelector as a string."""
return "FrameSelector(max_number_of_frames=%d, selection_style='%s', step_size=%d)" % (self.max_frames, self.selection, self.step)
return (
"FrameSelector(max_number_of_frames=%d, selection_style='%s', step_size=%d)"
% (self.max_frames, self.selection, self.step)
from .FrameContainer import FrameContainer, load_compressed, save_compressed
from .FrameContainer import FrameContainer, load_compressed, save_compressed, select_frames
from .FrameSelector import FrameSelector
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment