diff --git a/bob/io/base/__init__.py b/bob/io/base/__init__.py index 436943a423aeada364f7558392b7dda0ad95219e..33b2e2d769cb843866a43a3a3dc88b235d00da83 100644 --- a/bob/io/base/__init__.py +++ b/bob/io/base/__init__.py @@ -1,4 +1,4 @@ -from ._library import File, HDF5File +from ._library import File, HDF5File, extensions from . import version from .version import module as __version__ from .version import api as __api_version__ diff --git a/bob/io/base/bobskin.cpp b/bob/io/base/bobskin.cpp index c1bc9caa9b0dbea69e84c0d2f4b1320c2b9684ed..494bdafc26fe3520f67b7a5f826d8a9982ba3557 100644 --- a/bob/io/base/bobskin.cpp +++ b/bob/io/base/bobskin.cpp @@ -8,7 +8,7 @@ #include "bobskin.h" #include <stdexcept> -bobskin::bobskin(PyObject* array, bob::core::array::ElementType eltype) { +bobskin::bobskin(PyObject* array, bob::io::base::array::ElementType eltype) { if (!PyArray_CheckExact(array)) { PyErr_SetString(PyExc_TypeError, "input object to bobskin constructor is not (exactly) a numpy.ndarray"); @@ -23,7 +23,7 @@ bobskin::bobskin(PyObject* array, bob::core::array::ElementType eltype) { } -bobskin::bobskin(PyArrayObject* array, bob::core::array::ElementType eltype) { +bobskin::bobskin(PyArrayObject* array, bob::io::base::array::ElementType eltype) { m_type.set<npy_intp>(eltype, PyArray_NDIM((PyArrayObject*)array), PyArray_DIMS((PyArrayObject*)array), @@ -33,42 +33,42 @@ bobskin::bobskin(PyArrayObject* array, bob::core::array::ElementType eltype) { } -static bob::core::array::ElementType signed_integer_type(int bits) { +static bob::io::base::array::ElementType signed_integer_type(int bits) { switch(bits) { case 8: - return bob::core::array::t_int8; + return bob::io::base::array::t_int8; case 16: - return bob::core::array::t_int16; + return bob::io::base::array::t_int16; case 32: - return bob::core::array::t_int32; + return bob::io::base::array::t_int32; case 64: - return bob::core::array::t_int64; + return bob::io::base::array::t_int64; default: PyErr_Format(PyExc_TypeError, "unsupported signed integer element type with %d bits", bits); } - return bob::core::array::t_unknown; + return bob::io::base::array::t_unknown; } -static bob::core::array::ElementType unsigned_integer_type(int bits) { +static bob::io::base::array::ElementType unsigned_integer_type(int bits) { switch(bits) { case 8: - return bob::core::array::t_uint8; + return bob::io::base::array::t_uint8; case 16: - return bob::core::array::t_uint16; + return bob::io::base::array::t_uint16; case 32: - return bob::core::array::t_uint32; + return bob::io::base::array::t_uint32; case 64: - return bob::core::array::t_uint64; + return bob::io::base::array::t_uint64; default: PyErr_Format(PyExc_TypeError, "unsupported unsigned signed integer element type with %d bits", bits); } - return bob::core::array::t_unknown; + return bob::io::base::array::t_unknown; } -static bob::core::array::ElementType num_to_type (int num) { +static bob::io::base::array::ElementType num_to_type (int num) { switch(num) { case NPY_BOOL: - return bob::core::array::t_bool; + return bob::io::base::array::t_bool; //signed integers case NPY_BYTE: @@ -96,34 +96,34 @@ static bob::core::array::ElementType num_to_type (int num) { //floats case NPY_FLOAT32: - return bob::core::array::t_float32; + return bob::io::base::array::t_float32; case NPY_FLOAT64: - return bob::core::array::t_float64; + return bob::io::base::array::t_float64; #ifdef NPY_FLOAT128 case NPY_FLOAT128: - return bob::core::array::t_float128; + return bob::io::base::array::t_float128; #endif //complex case NPY_COMPLEX64: - return bob::core::array::t_complex64; + return bob::io::base::array::t_complex64; case NPY_COMPLEX128: - return bob::core::array::t_complex128; + return bob::io::base::array::t_complex128; #ifdef NPY_COMPLEX256 case NPY_COMPLEX256: - return bob::core::array::t_complex256; + return bob::io::base::array::t_complex256; #endif default: PyErr_Format(PyExc_TypeError, "unsupported NumPy element type (%d)", num); } - return bob::core::array::t_unknown; + return bob::io::base::array::t_unknown; } bobskin::bobskin(PyBlitzArrayObject* array) { - bob::core::array::ElementType eltype = num_to_type(array->type_num); - if (eltype == bob::core::array::t_unknown) { + bob::io::base::array::ElementType eltype = num_to_type(array->type_num); + if (eltype == bob::io::base::array::t_unknown) { throw std::runtime_error("error is already set"); } m_type.set<Py_ssize_t>(num_to_type(array->type_num), array->ndim, @@ -143,7 +143,7 @@ void bobskin::set(boost::shared_ptr<interface>) { throw std::runtime_error("error is already set"); } -void bobskin::set (const bob::core::array::typeinfo&) { +void bobskin::set (const bob::io::base::array::typeinfo&) { PyErr_SetString(PyExc_NotImplementedError, "setting C++ bobskin with (const typeinfo&) implemented - DEBUG ME!"); throw std::runtime_error("error is already set"); } diff --git a/bob/io/base/bobskin.h b/bob/io/base/bobskin.h index 8343daf566826e454feee632632554142a93c61e..7e69132a01e663b86dcd645f8301f8f6e3e33816 100644 --- a/bob/io/base/bobskin.h +++ b/bob/io/base/bobskin.h @@ -2,7 +2,7 @@ * @author Andre Anjos <andre.anjos@idiap.ch> * @date Tue 5 Nov 22:09:07 2013 * - * @brief A pythonic version of bob::core::array::interface, with minimal + * @brief A pythonic version of bob::io::base::array::interface, with minimal * functionality. */ @@ -11,7 +11,7 @@ #include <Python.h> -#include <bob/core/array.h> +#include <bob.io.base/array.h> extern "C" { #include <bob.blitz/capi.h> @@ -21,19 +21,19 @@ extern "C" { /** * Wraps a PyArrayObject such that we can access it from bob::io */ -class bobskin: public bob::core::array::interface { +class bobskin: public bob::io::base::array::interface { public: //api /** * @brief Builds a new skin from an array like object */ - bobskin(PyObject* array, bob::core::array::ElementType eltype); + bobskin(PyObject* array, bob::io::base::array::ElementType eltype); /** * @brief Builds a new skin from a numpy array object */ - bobskin(PyArrayObject* array, bob::core::array::ElementType eltype); + bobskin(PyArrayObject* array, bob::io::base::array::ElementType eltype); /** * @brief Builds a new skin around a blitz array object @@ -60,12 +60,12 @@ class bobskin: public bob::core::array::interface { * @brief Re-allocates this interface taking into consideration new * requirements. The internal memory should be considered uninitialized. */ - virtual void set (const bob::core::array::typeinfo& req); + virtual void set (const bob::io::base::array::typeinfo& req); /** * @brief Type information for this interface. */ - virtual const bob::core::array::typeinfo& type() const { return m_type; } + virtual const bob::io::base::array::typeinfo& type() const { return m_type; } /** * @brief Borrows a reference from the underlying memory. This means @@ -85,7 +85,7 @@ class bobskin: public bob::core::array::interface { private: //representation - bob::core::array::typeinfo m_type; ///< type information + bob::io::base::array::typeinfo m_type; ///< type information void* m_ptr; ///< pointer to the data }; diff --git a/bob/io/base/codec.cpp b/bob/io/base/codec.cpp index fe5aad405a922f05c97bac4b9736cb4fa8e3c8bb..bfc3eb94de0f9b29371f3d4b9c6a85f45837b441 100644 --- a/bob/io/base/codec.cpp +++ b/bob/io/base/codec.cpp @@ -2,15 +2,15 @@ * @author Andre Anjos <andre.anjos@idiap.ch> * @date Wed 14 May 11:53:36 2014 CEST * - * @brief Bindings to bob::io::CodecRegistry + * @brief Bindings to bob::io::base::CodecRegistry */ #define BOB_IO_BASE_MODULE #include <bob.io.base/api.h> -int PyBobIoCodec_Register (const char* extension, const char* description, bob::io::file_factory_t factory) { - boost::shared_ptr<bob::io::CodecRegistry> instance = - bob::io::CodecRegistry::instance(); +int PyBobIoCodec_Register (const char* extension, const char* description, bob::io::base::file_factory_t factory) { + boost::shared_ptr<bob::io::base::CodecRegistry> instance = + bob::io::base::CodecRegistry::instance(); if (instance->isRegistered(extension)) { PyErr_Format(PyExc_RuntimeError, "codec for extension `%s' is already registered with description `%s' - in order to register a new codec for such an extension, first unregister the existing codec", extension, PyBobIoCodec_GetDescription(extension)); @@ -22,8 +22,8 @@ int PyBobIoCodec_Register (const char* extension, const char* description, bob:: } int PyBobIoCodec_Deregister (const char* extension) { - boost::shared_ptr<bob::io::CodecRegistry> instance = - bob::io::CodecRegistry::instance(); + boost::shared_ptr<bob::io::base::CodecRegistry> instance = + bob::io::base::CodecRegistry::instance(); if (!instance->isRegistered(extension)) { PyErr_Format(PyExc_RuntimeError, "there is no codec registered for extension `%s'", extension); @@ -35,14 +35,14 @@ int PyBobIoCodec_Deregister (const char* extension) { } int PyBobIoCodec_IsRegistered (const char* extension) { - boost::shared_ptr<bob::io::CodecRegistry> instance = - bob::io::CodecRegistry::instance(); + boost::shared_ptr<bob::io::base::CodecRegistry> instance = + bob::io::base::CodecRegistry::instance(); if (instance->isRegistered(extension)) return 1; return 0; } const char* PyBobIoCodec_GetDescription (const char* extension) { - boost::shared_ptr<bob::io::CodecRegistry> instance = - bob::io::CodecRegistry::instance(); + boost::shared_ptr<bob::io::base::CodecRegistry> instance = + bob::io::base::CodecRegistry::instance(); return instance->getDescription(extension); } diff --git a/bob/io/base/cpp/CSVFile.cpp b/bob/io/base/cpp/CSVFile.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1200523a09b74eee343a14686b39ba76995cc058 --- /dev/null +++ b/bob/io/base/cpp/CSVFile.cpp @@ -0,0 +1,314 @@ +/** + * @date Thu 10 May 2012 15:19:24 CEST + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Code to read and write CSV files to/from arrays. CSV files are always + * treated as containing sequences of double precision numbers. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <sstream> +#include <fstream> +#include <string> +#include <boost/filesystem.hpp> +#include <boost/format.hpp> +#include <boost/filesystem.hpp> +#include <boost/make_shared.hpp> +#include <boost/tokenizer.hpp> + +#include <boost/shared_array.hpp> +#include <boost/algorithm/string.hpp> + +#include <bob.io.base/CodecRegistry.h> + +typedef boost::tokenizer<boost::escaped_list_separator<char> > Tokenizer; + +class CSVFile: public bob::io::base::File { + + public: //api + + /** + * Peeks the file contents for a type. We assume the element type to be + * always doubles. This method, effectively, only peaks for the total + * number of lines and the number of columns in the file. + */ + void peek() { + + std::string line; + size_t line_number = 0; + size_t entries = 0; + std::streampos cur_pos = 0; + + m_file.seekg(0); //< returns to the begin of file and start reading... + + while (std::getline(m_file,line)) { + ++line_number; + m_pos.push_back(cur_pos); + cur_pos = m_file.tellg(); + Tokenizer tok(line); + size_t size = std::distance(tok.begin(), tok.end()); + if (!entries) entries = size; + else if (entries != size) { + boost::format m("line %d at file '%s' contains %d entries instead of %d (expected)"); + m % line_number % m_filename % size % entries; + throw std::runtime_error(m.str()); + } + } + + if (!line_number) { + m_newfile = true; + m_pos.clear(); + return; + } + + m_arrayset_type.dtype = bob::io::base::array::t_float64; + m_arrayset_type.nd = 1; + m_arrayset_type.shape[0] = entries; + m_arrayset_type.update_strides(); + + m_array_type = m_arrayset_type; + m_array_type.nd = 2; + m_array_type.shape[0] = m_pos.size(); + m_array_type.shape[1] = entries; + m_array_type.update_strides(); + } + + CSVFile(const char* path, char mode): + m_filename(path), + m_newfile(false) { + + if (mode == 'r' || (mode == 'a' && boost::filesystem::exists(path))) { //try peeking + + if (mode == 'r') + m_file.open(m_filename.c_str(), std::ios::in); + else if (mode == 'a') + m_file.open(m_filename.c_str(), std::ios::app|std::ios::in|std::ios::out); + if (!m_file.is_open()) { + boost::format m("cannot open file '%s' for reading or appending"); + m % path; + throw std::runtime_error(m.str()); + } + + peek(); ///< peek file properties + } + else { + m_file.open(m_filename.c_str(), std::ios::trunc|std::ios::in|std::ios::out); + + if (!m_file.is_open()) { + boost::format m("cannot open file '%s' for writing"); + m % path; + throw std::runtime_error(m.str()); + } + + m_newfile = true; + } + + //general precision settings, in case output is needed... + m_file.precision(10); + m_file.setf(std::ios_base::scientific, std::ios_base::floatfield); + + } + + virtual ~CSVFile() { } + + virtual const char* filename() const { + return m_filename.c_str(); + } + + virtual const bob::io::base::array::typeinfo& type() const { + return m_arrayset_type; + } + + virtual const bob::io::base::array::typeinfo& type_all() const { + return m_array_type; + } + + virtual size_t size() const { + return m_pos.size(); + } + + virtual const char* name() const { + return s_codecname.c_str(); + } + + virtual void read_all(bob::io::base::array::interface& buffer) { + if (m_newfile) + throw std::runtime_error("uninitialized CSV file cannot be read"); + + if (!buffer.type().is_compatible(m_array_type)) buffer.set(m_array_type); + + //read contents + std::string line; + if (m_file.eof()) m_file.clear(); ///< clear current "end" state. + m_file.seekg(0); + double* p = static_cast<double*>(buffer.ptr()); + while (std::getline(m_file, line)) { + Tokenizer tok(line); + for(Tokenizer::iterator k=tok.begin(); k!=tok.end(); ++k) { + std::istringstream(*k) >> *(p++); + } + } + } + + virtual void read(bob::io::base::array::interface& buffer, size_t index) { + + if (m_newfile) + throw std::runtime_error("uninitialized CSV file cannot be read"); + + if (!buffer.type().is_compatible(m_arrayset_type)) + buffer.set(m_arrayset_type); + + if (index >= m_pos.size()) { + boost::format m("cannot array at position %d -- there is only %d entries at file '%s'"); + m % index % m_pos.size() % m_filename; + throw std::runtime_error(m.str()); + } + + //reads a specific line from the file. + std::string line; + if (m_file.eof()) m_file.clear(); ///< clear current "end" state. + m_file.seekg(m_pos[index]); + if (!std::getline(m_file, line)) { + boost::format m("could not seek to line %u (offset %u) while reading file '%s'"); + m % index % m_pos[index] % m_filename; + throw std::runtime_error(m.str()); + } + Tokenizer tok(line); + double* p = static_cast<double*>(buffer.ptr()); + for(Tokenizer::iterator k=tok.begin(); k!=tok.end(); ++k) { + std::istringstream(*k) >> *(p++); + } + + } + + virtual size_t append (const bob::io::base::array::interface& buffer) { + + const bob::io::base::array::typeinfo& type = buffer.type(); + + if (m_newfile) { + if (type.nd != 1 || type.dtype != bob::io::base::array::t_float64) { + boost::format m("cannot append %s to file '%s' - CSV files only accept 1D double precision float arrays"); + m % type.str() % m_filename; + throw std::runtime_error(m.str()); + } + m_pos.clear(); + m_arrayset_type = m_array_type = type; + m_array_type.shape[1] = m_arrayset_type.shape[0]; + m_newfile = false; + } + + else { + + //check compatibility + if (!m_arrayset_type.is_compatible(buffer.type())) { + boost::format m("CSV file '%s' only accepts arrays of type %s"); + m % m_filename % m_arrayset_type.str(); + throw std::runtime_error(m.str()); + } + + } + + const double* p = static_cast<const double*>(buffer.ptr()); + if (m_pos.size()) m_file << std::endl; ///< adds a new line + m_pos.push_back(m_file.tellp()); ///< register start of line + for (size_t k=1; k<type.shape[0]; ++k) m_file << *(p++) << ","; + m_file << *(p++); + m_array_type.shape[0] = m_pos.size(); + m_array_type.update_strides(); + return (m_pos.size()-1); + + } + + virtual void write (const bob::io::base::array::interface& buffer) { + + const bob::io::base::array::typeinfo& type = buffer.type(); + + if (m_newfile) { + if (type.nd != 2 || type.dtype != bob::io::base::array::t_float64) { + boost::format m("cannot write %s to file '%s' - CSV files only accept a single 2D double precision float array as input"); + m % type.str() % m_filename; + throw std::runtime_error(m.str()); + } + const double* p = static_cast<const double*>(buffer.ptr()); + for (size_t l=1; l<type.shape[0]; ++l) { + m_pos.push_back(m_file.tellp()); + for (size_t k=1; k<type.shape[1]; ++k) m_file << *(p++) << ","; + m_file << *(p++) << std::endl; + } + for (size_t k=1; k<type.shape[1]; ++k) m_file << *(p++) << ","; + m_file << *(p++); + m_arrayset_type = type; + m_arrayset_type.nd = 1; + m_arrayset_type.shape[0] = type.shape[1]; + m_arrayset_type.update_strides(); + m_array_type = type; + m_newfile = false; + return; + } + + //TODO + throw std::runtime_error("Writing a 2D array to a CSV file that already contains entries is not implemented at the moment"); + + } + + private: //representation + std::fstream m_file; + std::string m_filename; + bool m_newfile; + bob::io::base::array::typeinfo m_array_type; + bob::io::base::array::typeinfo m_arrayset_type; + std::vector<std::streampos> m_pos; ///< dictionary of line starts + + static std::string s_codecname; + +}; + +std::string CSVFile::s_codecname = "bob.csv"; + +/** + * From this point onwards we have the registration procedure. If you are + * looking at this file for a coding example, just follow the procedure bellow, + * minus local modifications you may need to apply. + */ + +/** + * This defines the factory method F that can create codecs of this type. + * + * Here are the meanings of the mode flag that should be respected by your + * factory implementation: + * + * 'r': opens for reading only - no modifications can occur; it is an + * error to open a file that does not exist for read-only operations. + * 'w': opens for reading and writing, but truncates the file if it + * exists; it is not an error to open files that do not exist with + * this flag. + * 'a': opens for reading and writing - any type of modification can + * occur. If the file does not exist, this flag is effectively like + * 'w'. + * + * Returns a newly allocated File object that can read and write data to the + * file using a specific backend. + * + * @note: This method can be static. + */ +static boost::shared_ptr<bob::io::base::File> make_file (const char* path, char mode) { + return boost::make_shared<CSVFile>(path, mode); +} + +/** + * Takes care of codec registration per se. + */ +static bool register_codec() { + + boost::shared_ptr<bob::io::base::CodecRegistry> instance = + bob::io::base::CodecRegistry::instance(); + + instance->registerExtension(".csv", "Comma-Separated Values", &make_file); + instance->registerExtension(".txt", "Comma-Separated Values", &make_file); + + return true; + +} + +static bool codec_registered = register_codec(); diff --git a/bob/io/base/cpp/CodecRegistry.cpp b/bob/io/base/cpp/CodecRegistry.cpp new file mode 100644 index 0000000000000000000000000000000000000000..229d5802f0eaf6538b83d440ebdeeda4ab862488 --- /dev/null +++ b/bob/io/base/cpp/CodecRegistry.cpp @@ -0,0 +1,98 @@ +/** + * @date Tue Oct 25 23:25:46 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Implements the CodecRegistry class. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <vector> + +#include <boost/filesystem.hpp> +#include <boost/format.hpp> + +#include <bob.io.base/CodecRegistry.h> + +#include <bob/core/logging.h> + +boost::shared_ptr<bob::io::base::CodecRegistry> bob::io::base::CodecRegistry::instance() { + static boost::shared_ptr<bob::io::base::CodecRegistry> s_instance(new CodecRegistry()); + return s_instance; +} + +void bob::io::base::CodecRegistry::deregisterExtension(const char* ext) { + s_extension2codec.erase(ext); + s_extension2description.erase(ext); +} + +const char* bob::io::base::CodecRegistry::getDescription(const char* ext) { + auto it = s_extension2description.find(ext); + if (it == s_extension2description.end()) return 0; + return it->second.c_str(); +} + +void bob::io::base::CodecRegistry::deregisterFactory(bob::io::base::file_factory_t factory) { + + std::vector<std::string> to_remove; + for (auto it = s_extension2codec.begin(); it != s_extension2codec.end(); ++it) { + if (it->second == factory) to_remove.push_back(it->first); + } + + for (auto it = to_remove.begin(); it != to_remove.end(); ++it) { + s_extension2codec.erase(*it); + s_extension2description.erase(*it); + } + +} + +void bob::io::base::CodecRegistry::registerExtension(const char* extension, + const char* description, bob::io::base::file_factory_t codec) { + + auto it = s_extension2codec.find(extension); + + if (it == s_extension2codec.end()) { + s_extension2codec[extension] = codec; + s_extension2description[extension] = description; + } + else if (!s_ignore) { + boost::format m("extension already registered: %s - ignoring second registration with description `%s'"); + m % extension % description; + bob::core::error << m.str() << std::endl; + throw std::runtime_error(m.str()); + } + +} + +bool bob::io::base::CodecRegistry::isRegistered(const char* ext) { + std::string extension(ext); + std::string lower_extension = extension; + std::transform(extension.begin(), extension.end(), lower_extension.begin(), ::tolower); + return (s_extension2codec.find(lower_extension) != s_extension2codec.end()); +} + +bob::io::base::file_factory_t bob::io::base::CodecRegistry::findByExtension (const char* ext) { + + std::string extension(ext); + std::string lower_extension = extension; + std::transform(extension.begin(), extension.end(), lower_extension.begin(), ::tolower); + + std::map<std::string, bob::io::base::file_factory_t >::iterator it = + s_extension2codec.find(lower_extension); + + if (it == s_extension2codec.end()) { + boost::format m("unregistered extension: %s"); + m % lower_extension; + throw std::runtime_error(m.str()); + } + + return it->second; + +} + +bob::io::base::file_factory_t bob::io::base::CodecRegistry::findByFilenameExtension +(const char* filename) { + + return findByExtension(boost::filesystem::path(filename).extension().c_str()); + +} diff --git a/bob/io/base/cpp/File.cpp b/bob/io/base/cpp/File.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bbdfedfe7e2a5d3dc733e2c6360e933475c0ad4d --- /dev/null +++ b/bob/io/base/cpp/File.cpp @@ -0,0 +1,10 @@ +/** + * @date Tue Oct 25 23:25:46 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <bob.io.base/File.h> + +bob::io::base::File::~File() { } diff --git a/bob/io/base/cpp/HDF5ArrayFile.cpp b/bob/io/base/cpp/HDF5ArrayFile.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8a9aa97f888fa38ea24ea37e85d74c883ed646b3 --- /dev/null +++ b/bob/io/base/cpp/HDF5ArrayFile.cpp @@ -0,0 +1,222 @@ +/** + * @date Tue Oct 25 23:25:46 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Implements the HDF5 (.hdf5) array codec + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/make_shared.hpp> +#include <boost/filesystem.hpp> +#include <boost/format.hpp> + +#include <bob.io.base/CodecRegistry.h> +#include <bob.io.base/HDF5File.h> + +/** + * Read and write arrays in HDF5 format + */ +class HDF5ArrayFile: public bob::io::base::File { + + public: + + HDF5ArrayFile (const char* filename, bob::io::base::HDF5File::mode_t mode): + m_file(filename, mode), + m_filename(filename), + m_size_arrayset(0), + m_newfile(true) { + + //tries to update the current descriptors + std::vector<std::string> paths; + m_file.paths(paths); + + if (paths.size()) { //file contains data, read it and establish defaults + m_path = paths[0]; ///< locks on a path name from now on... + m_newfile = false; ///< blocks re-initialization + + //arrayset reading + const bob::io::base::HDF5Descriptor& desc_arrayset = m_file.describe(m_path)[0]; + desc_arrayset.type.copy_to(m_type_arrayset); + m_size_arrayset = desc_arrayset.size; + + //array reading + const bob::io::base::HDF5Descriptor& desc_array = m_file.describe(m_path)[1]; + desc_array.type.copy_to(m_type_array); + + //if m_type_all has extent == 1 on the first dimension and dimension + //0 is expandable, collapse that + if (m_type_array.shape[0] == 1 && desc_arrayset.expandable) + { + m_type_array = m_type_arrayset; + } + } + + else { + //default path in case the file is new or has been truncated + m_path = "/array"; + } + + } + + virtual ~HDF5ArrayFile() { } + + virtual const char* filename() const { + return m_filename.c_str(); + } + + virtual const bob::io::base::array::typeinfo& type_all () const { + return m_type_array; + } + + virtual const bob::io::base::array::typeinfo& type () const { + return m_type_arrayset; + } + + virtual size_t size() const { + return m_size_arrayset; + } + + virtual const char* name() const { + return s_codecname.c_str(); + } + + virtual void read_all(bob::io::base::array::interface& buffer) { + + if(m_newfile) { + boost::format f("uninitialized HDF5 file at '%s' cannot be read"); + f % m_filename; + throw std::runtime_error(f.str()); + } + + if(!buffer.type().is_compatible(m_type_array)) buffer.set(m_type_array); + + m_file.read_buffer(m_path, 0, buffer.type(), buffer.ptr()); + } + + virtual void read(bob::io::base::array::interface& buffer, size_t index) { + + if(m_newfile) { + boost::format f("uninitialized HDF5 file at '%s' cannot be read"); + f % m_filename; + throw std::runtime_error(f.str()); + } + + if(!buffer.type().is_compatible(m_type_arrayset)) buffer.set(m_type_arrayset); + + m_file.read_buffer(m_path, index, buffer.type(), buffer.ptr()); + } + + virtual size_t append (const bob::io::base::array::interface& buffer) { + + if (m_newfile) { + //creates non-compressible, extensible dataset on HDF5 file + m_newfile = false; + m_file.create(m_path, buffer.type(), true, 0); + m_file.describe(m_path)[0].type.copy_to(m_type_arrayset); + m_file.describe(m_path)[1].type.copy_to(m_type_array); + + //if m_type_all has extent == 1 on the first dimension, collapse that + if (m_type_array.shape[0] == 1) m_type_array = m_type_arrayset; + } + + m_file.extend_buffer(m_path, buffer.type(), buffer.ptr()); + ++m_size_arrayset; + //needs to flush the data to the file + return m_size_arrayset - 1; ///< index of this object in the file + + } + + virtual void write (const bob::io::base::array::interface& buffer) { + + if (!m_newfile) { + boost::format f("cannot perform single (array-style) write on file/dataset at '%s' that have already been initialized -- try to use a new file"); + f % m_filename; + throw std::runtime_error(f.str()); + } + + m_newfile = false; + m_file.create(m_path, buffer.type(), false, 0); + + m_file.describe(m_path)[0].type.copy_to(m_type_arrayset); + m_file.describe(m_path)[1].type.copy_to(m_type_array); + + //if m_type_all has extent == 1 on the first dimension, collapse that + if (m_type_array.shape[0] == 1) m_type_array = m_type_arrayset; + + //otherwise, all must be in place... + m_file.write_buffer(m_path, 0, buffer.type(), buffer.ptr()); + } + + private: //representation + + bob::io::base::HDF5File m_file; + std::string m_filename; + bob::io::base::array::typeinfo m_type_array; ///< type for reading all data at once + bob::io::base::array::typeinfo m_type_arrayset; ///< type for reading data by sub-arrays + size_t m_size_arrayset; ///< number of arrays in arrayset mode + std::string m_path; ///< default path to use + bool m_newfile; ///< path check optimization + + static std::string s_codecname; + +}; + +std::string HDF5ArrayFile::s_codecname = "bob.hdf5"; + +/** + * From this point onwards we have the registration procedure. If you are + * looking at this file for a coding example, just follow the procedure bellow, + * minus local modifications you may need to apply. + */ + +/** + * This defines the factory method F that can create codecs of this type. + * + * Here are the meanings of the mode flag that should be respected by your + * factory implementation: + * + * 'r': opens for reading only - no modifications can occur; it is an + * error to open a file that does not exist for read-only operations. + * 'w': opens for reading and writing, but truncates the file if it + * exists; it is not an error to open files that do not exist with + * this flag. + * 'a': opens for reading and writing - any type of modification can + * occur. If the file does not exist, this flag is effectively like + * 'w'. + * + * Returns a newly allocated File object that can read and write data to the + * file using a specific backend. + * + * @note: This method can be static. + */ +static boost::shared_ptr<bob::io::base::File> make_file (const char* path, char mode) { + + bob::io::base::HDF5File::mode_t h5mode; + if (mode == 'r') h5mode = bob::io::base::HDF5File::in; + else if (mode == 'w') h5mode = bob::io::base::HDF5File::trunc; + else if (mode == 'a') h5mode = bob::io::base::HDF5File::inout; + else throw std::runtime_error("unsupported file opening mode"); + + return boost::make_shared<HDF5ArrayFile>(path, h5mode); + +} + +/** + * Takes care of codec registration per se. + */ +static bool register_codec() { + static const char* description = "Hierarchical Data Format v5 (default)"; + + boost::shared_ptr<bob::io::base::CodecRegistry> instance = + bob::io::base::CodecRegistry::instance(); + + instance->registerExtension(".h5", description, &make_file); + instance->registerExtension(".hdf5", description, &make_file); + instance->registerExtension(".hdf", description, &make_file); + + return true; + +} + +static bool codec_registered = register_codec(); diff --git a/bob/io/base/cpp/HDF5Attribute.cpp b/bob/io/base/cpp/HDF5Attribute.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e701aced85c99ade1009ded61aaaf8e1e46b30cc --- /dev/null +++ b/bob/io/base/cpp/HDF5Attribute.cpp @@ -0,0 +1,243 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Fri 2 Mar 08:23:47 2012 + * + * @brief Implements attribute read/write for HDF5 files + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/format.hpp> + +#include <bob/core/logging.h> + +#include <bob.io.base/HDF5Attribute.h> + +static std::runtime_error status_error(const char* f, herr_t s) { + boost::format m("call to HDF5 C-function %s() returned error %d. HDF5 error statck follows:\n%s"); + m % f % s % bob::io::base::format_hdf5_error(); + return std::runtime_error(m.str()); +} + +bool bob::io::base::detail::hdf5::has_attribute(const boost::shared_ptr<hid_t> location, + const std::string& name) { + return H5Aexists(*location, name.c_str()); +} + +/** + * Opens an "auto-destructible" HDF5 dataspace + */ +static void delete_h5dataspace (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Sclose(*p); + if (err < 0) { + bob::core::error << "H5Sclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> open_memspace(const bob::io::base::HDF5Shape& s) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5dataspace)); + *retval = H5Screate_simple(s.n(), s.get(), 0); + if (*retval < 0) throw status_error("H5Screate_simple", *retval); + return retval; +} + +/** + * Opens the memory space of attribute + */ +static boost::shared_ptr<hid_t> get_memspace(hid_t attr) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5dataspace)); + *retval = H5Aget_space(attr); + if (*retval < 0) throw status_error("H5Aget_space", *retval); + return retval; +} + +/** + * Auto-destructing HDF5 type + */ +static void delete_h5type (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Tclose(*p); + if (err < 0) { + bob::core::error << "H5Tclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +/** + * Gets datatype of attribute + */ +static boost::shared_ptr<hid_t> get_type(hid_t attr) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5type)); + *retval = H5Aget_type(attr); + if (*retval < 0) throw status_error("H5Aget_type", *retval); + return retval; +} + +/** + * Figures out the extents of an attribute + */ +static bob::io::base::HDF5Shape get_extents(hid_t space) { + int rank = H5Sget_simple_extent_ndims(space); + if (rank < 0) throw status_error("H5Sget_simple_extent_ndims", rank); + //is at least a list of scalars, but could be a list of arrays + bob::io::base::HDF5Shape shape(rank); + herr_t status = H5Sget_simple_extent_dims(space, shape.get(), 0); + if (status < 0) throw status_error("H5Sget_simple_extent_dims",status); + return shape; +} + +/** + * Opens an "auto-destructible" HDF5 attribute + */ +static void delete_h5attribute (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Aclose(*p); + if (err < 0) { + bob::core::error << "H5Aclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> open_attribute +(const boost::shared_ptr<hid_t> location, const std::string& name, + const bob::io::base::HDF5Type& t) { + + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5attribute)); + + *retval = H5Aopen(*location, name.c_str(), H5P_DEFAULT); + + if (*retval < 0) throw status_error("H5Aopen", *retval); + + //checks if the opened attribute is compatible w/ the expected type + bob::io::base::HDF5Type expected; + boost::shared_ptr<hid_t> atype = get_type(*retval); + if (H5Tget_class(*atype) == H5T_STRING) { + expected = bob::io::base::HDF5Type(atype); + } + else { + boost::shared_ptr<hid_t> aspace = get_memspace(*retval); + bob::io::base::HDF5Shape shape = get_extents(*aspace); + expected = bob::io::base::HDF5Type(atype, shape); + } + + if (expected != t) { + boost::format m("Trying to access attribute '%s' with incompatible buffer - expected `%s', but you gave me `%s'"); + m % name % expected.str() % t.str(); + throw std::runtime_error(m.str()); + } + + return retval; +} + +void bob::io::base::detail::hdf5::delete_attribute (boost::shared_ptr<hid_t> location, + const std::string& name) { + herr_t err = H5Adelete(*location, name.c_str()); + if (err < 0) throw status_error("H5Adelete", err); +} + +void bob::io::base::detail::hdf5::read_attribute (const boost::shared_ptr<hid_t> location, + const std::string& name, const bob::io::base::HDF5Type& dest, + void* buffer) { + boost::shared_ptr<hid_t> attribute = open_attribute(location, name, dest); + herr_t err = H5Aread(*attribute, *dest.htype(), buffer); + if (err < 0) throw status_error("H5Aread", err); +} + +void bob::io::base::detail::hdf5::gettype_attribute (const boost::shared_ptr<hid_t> location, + const std::string& name, bob::io::base::HDF5Type& type) { + + boost::shared_ptr<hid_t> attr(new hid_t(-1), + std::ptr_fun(delete_h5attribute)); + + *attr = H5Aopen(*location, name.c_str(), H5P_DEFAULT); + + if (*attr < 0) throw status_error("H5Aopen", *attr); + + boost::shared_ptr<hid_t> atype = get_type(*attr); + if (H5Tget_class(*atype) == H5T_STRING) { + type = bob::io::base::HDF5Type(atype); + } + else { + boost::shared_ptr<hid_t> aspace = get_memspace(*attr); + bob::io::base::HDF5Shape shape = get_extents(*aspace); + type = bob::io::base::HDF5Type(atype, shape); + } +} + +static boost::shared_ptr<hid_t> create_attribute(boost::shared_ptr<hid_t> loc, + const std::string& name, const bob::io::base::HDF5Type& t, + boost::shared_ptr<hid_t> space) { + + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5attribute)); + + *retval = H5Acreate2(*loc, name.c_str(), *t.htype(), *space, H5P_DEFAULT, + H5P_DEFAULT); + + if (*retval < 0) throw status_error("H5Acreate", *retval); + return retval; +} + +void bob::io::base::detail::hdf5::write_attribute (boost::shared_ptr<hid_t> location, + const std::string& name, const bob::io::base::HDF5Type& dest, const void* buffer) +{ + boost::shared_ptr<hid_t> dataspace; + //strings have to be treated slightly differently + if (dest.type() == bob::io::base::s) { + hsize_t strings = 1; + HDF5Shape shape(1, &strings); + dataspace = open_memspace(shape); + } + else { + dataspace = open_memspace(dest.shape()); + } + + if (bob::io::base::detail::hdf5::has_attribute(location, name)) bob::io::base::detail::hdf5::delete_attribute(location, name); + boost::shared_ptr<hid_t> attribute = + create_attribute(location, name, dest, dataspace); + + /* Write the attribute data. */ + herr_t err = H5Awrite(*attribute, *dest.htype(), buffer); + if (err < 0) throw status_error("H5Awrite", err); +} + +static herr_t attr_iterator (hid_t obj, const char* name, const H5A_info_t*, + void* cookie) { + std::map<std::string, bob::io::base::HDF5Type>& dict = + *static_cast<std::map<std::string, bob::io::base::HDF5Type>*>(cookie); + + boost::shared_ptr<hid_t> attr(new hid_t(-1), + std::ptr_fun(delete_h5attribute)); + + *attr = H5Aopen(obj, name, H5P_DEFAULT); + + if (*attr < 0) throw status_error("H5Aopen", *attr); + + boost::shared_ptr<hid_t> atype = get_type(*attr); + if (H5Tget_class(*atype) == H5T_STRING) { + dict[name] = bob::io::base::HDF5Type(atype); + } + else { + boost::shared_ptr<hid_t> aspace = get_memspace(*attr); + bob::io::base::HDF5Shape shape = get_extents(*aspace); + dict[name] = bob::io::base::HDF5Type(atype, shape); + } + + return 0; +} + +void bob::io::base::detail::hdf5::list_attributes(boost::shared_ptr<hid_t> location, + std::map<std::string, bob::io::base::HDF5Type>& attributes) { + hsize_t offset=0; + H5Aiterate2(*location, H5_INDEX_NAME, H5_ITER_NATIVE, &offset, attr_iterator, + static_cast<void*>(&attributes)); +} diff --git a/bob/io/base/cpp/HDF5Dataset.cpp b/bob/io/base/cpp/HDF5Dataset.cpp new file mode 100644 index 0000000000000000000000000000000000000000..59b20939f77394d1ce287555bc397200d9fc5347 --- /dev/null +++ b/bob/io/base/cpp/HDF5Dataset.cpp @@ -0,0 +1,612 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Wed 29 Feb 17:51:21 2012 + * + * @brief Implementation of the Dataset class + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/format.hpp> +#include <boost/make_shared.hpp> +#include <boost/shared_array.hpp> + +#include <bob/core/logging.h> + +#include <bob.io.base/HDF5Utils.h> +#include <bob.io.base/HDF5Group.h> +#include <bob.io.base/HDF5Dataset.h> + +static std::runtime_error status_error(const char* f, herr_t s) { + boost::format m("call to HDF5 C-function %s() returned error %d. HDF5 error statck follows:\n%s"); + m % f % s % bob::io::base::format_hdf5_error(); + return std::runtime_error(m.str()); +} + +/** + * Opens an "auto-destructible" HDF5 dataset + */ +static void delete_h5dataset (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Dclose(*p); + if (err < 0) { + bob::core::error << "H5Dclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> open_dataset +(boost::shared_ptr<bob::io::base::detail::hdf5::Group>& par, const std::string& name) { + if (!name.size() || name == "." || name == "..") { + boost::format m("Cannot open dataset with illegal name `%s' at `%s:%s'"); + m % name % par->file()->filename() % par->path(); + throw std::runtime_error(m.str()); + } + + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5dataset)); + *retval = H5Dopen2(*par->location(), name.c_str(), H5P_DEFAULT); + if (*retval < 0) { + throw status_error("H5Dopen2", *retval); + } + return retval; +} + +/** + * Opens an "auto-destructible" HDF5 datatype + */ +static void delete_h5datatype (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Tclose(*p); + if (err < 0) { + bob::core::error << "H5Tclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> open_datatype +(const boost::shared_ptr<hid_t>& ds) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5datatype)); + *retval = H5Dget_type(*ds); + if (*retval < 0) { + throw status_error("H5Dget_type", *retval); + } + return retval; +} + +/** + * Opens an "auto-destructible" HDF5 property list + */ +static void delete_h5plist (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Pclose(*p); + if (err < 0) { + bob::core::error << "H5Pclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> open_plist(hid_t classid) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5plist)); + *retval = H5Pcreate(classid); + if (*retval < 0) { + throw status_error("H5Pcreate", *retval); + } + return retval; +} + +/** + * Opens an "auto-destructible" HDF5 dataspace + */ +static void delete_h5dataspace (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Sclose(*p); + if (err < 0) { + bob::core::error << "H5Sclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> open_filespace +(const boost::shared_ptr<hid_t>& ds) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5dataspace)); + *retval = H5Dget_space(*ds); + if (*retval < 0) throw status_error("H5Dget_space", *retval); + return retval; +} + +static boost::shared_ptr<hid_t> open_memspace(const bob::io::base::HDF5Shape& sh) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5dataspace)); + *retval = H5Screate_simple(sh.n(), sh.get(), 0); + if (*retval < 0) throw status_error("H5Screate_simple", *retval); + return retval; +} + +static void set_memspace(boost::shared_ptr<hid_t> s, const bob::io::base::HDF5Shape& sh) { + herr_t status = H5Sset_extent_simple(*s, sh.n(), sh.get(), 0); + if (status < 0) throw status_error("H5Sset_extent_simple", status); +} + +/** + * Figures out if a dataset is expandible + */ +static bool is_extensible(boost::shared_ptr<hid_t>& space) { + + //has unlimited size on first dimension? + int rank = H5Sget_simple_extent_ndims(*space); + if (rank < 0) throw status_error("H5Sget_simple_extent_ndims", rank); + + bob::io::base::HDF5Shape maxshape(rank); + herr_t status = H5Sget_simple_extent_dims(*space, 0, maxshape.get()); + if (status < 0) throw status_error("H5Sget_simple_extent_dims",status); + + return (maxshape[0] == H5S_UNLIMITED); +} + +/** + * Figures out the extents of a dataset + */ +static bob::io::base::HDF5Shape get_extents(boost::shared_ptr<hid_t>& space) { + int rank = H5Sget_simple_extent_ndims(*space); + if (rank < 0) throw status_error("H5Sget_simple_extent_ndims", rank); + //is at least a list of scalars, but could be a list of arrays + bob::io::base::HDF5Shape shape(rank); + herr_t status = H5Sget_simple_extent_dims(*space, shape.get(), 0); + if (status < 0) throw status_error("H5Sget_simple_extent_dims",status); + return shape; +} + +/** + * Creates the extensive list of compatible types for each of possible ways to + * read/write this dataset. + */ +static void reset_compatibility_list(boost::shared_ptr<hid_t>& space, + const bob::io::base::HDF5Type& file_base, std::vector<bob::io::base::HDF5Descriptor>& descr) { + + if (!file_base.shape()) throw std::runtime_error("empty HDF5 dataset"); + + descr.clear(); + + switch (file_base.shape().n()) { + + case 1: ///< file type has 1 dimension + descr.push_back(bob::io::base::HDF5Descriptor(file_base.type(), + file_base.shape()[0], is_extensible(space))); + break; + + case 2: + case 3: + case 4: + case 5: + { + bob::io::base::HDF5Shape alt = file_base.shape(); + alt <<= 1; ///< contract shape + descr.push_back(bob::io::base::HDF5Descriptor(bob::io::base::HDF5Type(file_base.type(), alt), + file_base.shape()[0], is_extensible(space)).subselect()); + } + break; + + default: + { + boost::format m("%d exceeds the number of supported dimensions"); + m % file_base.shape().n(); + throw std::runtime_error(m.str()); + } + } + + //can always read the data as a single, non-expandible array + descr.push_back(bob::io::base::HDF5Descriptor(file_base, 1, false)); +} + +bob::io::base::detail::hdf5::Dataset::Dataset(boost::shared_ptr<Group> parent, + const std::string& name) : + m_parent(parent), + m_name(name), + m_id(open_dataset(parent, name)), + m_dt(open_datatype(m_id)), + m_filespace(open_filespace(m_id)), + m_descr(), + m_memspace() +{ + bob::io::base::HDF5Type type(m_dt, get_extents(m_filespace)); + reset_compatibility_list(m_filespace, type, m_descr); + + //strings have to be treated slightly differently + if (H5Tget_class(*m_dt) == H5T_STRING) { + hsize_t strings = 1; + HDF5Shape shape(1, &strings); + m_memspace = open_memspace(shape); + } + else { + m_memspace = open_memspace(m_descr[0].type.shape()); + } +} + +/** + * Creates and writes an "empty" Dataset in an existing file. + */ +static void create_dataset (boost::shared_ptr<bob::io::base::detail::hdf5::Group> par, + const std::string& name, const bob::io::base::HDF5Type& type, bool list, + size_t compression) { + + if (!name.size() || name == "." || name == "..") { + boost::format m("Cannot create dataset with illegal name `%s' at `%s:%s'"); + m % name % par->file()->filename() % par->path(); + throw std::runtime_error(m.str()); + } + + bob::io::base::HDF5Shape xshape(type.shape()); + + if (list) { ///< if it is a list, add and extra dimension as dimension 0 + xshape = type.shape(); + xshape >>= 1; + xshape[0] = 0; ///< no elements for the time being + } + + bob::io::base::HDF5Shape maxshape(xshape); + if (list) maxshape[0] = H5S_UNLIMITED; ///< can expand forever + + //creates the data space. + boost::shared_ptr<hid_t> space(new hid_t(-1), + std::ptr_fun(delete_h5dataspace)); + *space = H5Screate_simple(xshape.n(), xshape.get(), maxshape.get()); + if (*space < 0) throw status_error("H5Screate_simple", *space); + + //creates the property list saying we need the data to be chunked if this is + //supposed to be a list -- HDF5 only supports expandability like this. + boost::shared_ptr<hid_t> dcpl = open_plist(H5P_DATASET_CREATE); + + //according to the HDF5 manual, chunks have to have the same rank as the + //array shape. + bob::io::base::HDF5Shape chunking(xshape); + chunking[0] = 1; + if (list || compression) { ///< note: compression requires chunking + herr_t status = H5Pset_chunk(*dcpl, chunking.n(), chunking.get()); + if (status < 0) throw status_error("H5Pset_chunk", status); + } + + //if the user has decided to compress the dataset, do it with gzip. + if (compression) { + if (compression > 9) compression = 9; + herr_t status = H5Pset_deflate(*dcpl, compression); + if (status < 0) throw status_error("H5Pset_deflate", status); + } + + //our link creation property list for HDF5 + boost::shared_ptr<hid_t> lcpl = open_plist(H5P_LINK_CREATE); + herr_t status = H5Pset_create_intermediate_group(*lcpl, 1); //1 == true + if (status < 0) + throw status_error("H5Pset_create_intermediate_group", status); + + //please note that we don't define the fill value as in the example, but + //according to the HDF5 documentation, this value is set to zero by default. + + boost::shared_ptr<hid_t> cls = type.htype(); + + //finally create the dataset on the file. + boost::shared_ptr<hid_t> dataset(new hid_t(-1), + std::ptr_fun(delete_h5dataset)); + *dataset = H5Dcreate2(*par->location(), name.c_str(), + *cls, *space, *lcpl, *dcpl, H5P_DEFAULT); + + if (*dataset < 0) throw status_error("H5Dcreate2", *dataset); +} + +/** + * Creates and writes an "empty" std::string Dataset in an existing file. + */ +static void create_string_dataset (boost::shared_ptr<bob::io::base::detail::hdf5::Group> par, + const std::string& name, const bob::io::base::HDF5Type& type, size_t compression) { + + if (!name.size() || name == "." || name == "..") { + boost::format m("Cannot create dataset with illegal name `%s' at `%s:%s'"); + m % name % par->file()->filename() % par->path(); + throw std::runtime_error(m.str()); + } + + //there can be only 1 string in a string dataset (for the time being) + hsize_t strings = 1; + bob::io::base::HDF5Shape xshape(1, &strings); + + //creates the data space. + boost::shared_ptr<hid_t> space(new hid_t(-1), + std::ptr_fun(delete_h5dataspace)); + *space = H5Screate_simple(xshape.n(), xshape.get(), xshape.get()); + if (*space < 0) throw status_error("H5Screate_simple", *space); + + //creates the property list saying we need the data to be chunked if this is + //supposed to be a list -- HDF5 only supports expandability like this. + boost::shared_ptr<hid_t> dcpl = open_plist(H5P_DATASET_CREATE); + + //if the user has decided to compress the dataset, do it with gzip. + if (compression) { + if (compression > 9) compression = 9; + herr_t status = H5Pset_deflate(*dcpl, compression); + if (status < 0) throw status_error("H5Pset_deflate", status); + } + + //our link creation property list for HDF5 + boost::shared_ptr<hid_t> lcpl = open_plist(H5P_LINK_CREATE); + herr_t status = H5Pset_create_intermediate_group(*lcpl, 1); //1 == true + if (status < 0) + throw status_error("H5Pset_create_intermediate_group", status); + + //please note that we don't define the fill value as in the example, but + //according to the HDF5 documentation, this value is set to zero by default. + + boost::shared_ptr<hid_t> cls = type.htype(); + + //finally create the dataset on the file. + boost::shared_ptr<hid_t> dataset(new hid_t(-1), + std::ptr_fun(delete_h5dataset)); + *dataset = H5Dcreate2(*par->location(), name.c_str(), + *cls, *space, *lcpl, *dcpl, H5P_DEFAULT); + + if (*dataset < 0) throw status_error("H5Dcreate2", *dataset); +} + +bob::io::base::detail::hdf5::Dataset::Dataset(boost::shared_ptr<Group> parent, + const std::string& name, const bob::io::base::HDF5Type& type, + bool list, size_t compression): + m_parent(parent), + m_name(name), + m_id(), + m_dt(), + m_filespace(), + m_descr(), + m_memspace() +{ + //First, we test to see if we can find the named dataset. + bob::io::base::DefaultHDF5ErrorStack->mute(); + hid_t set_id = H5Dopen2(*parent->location(),m_name.c_str(),H5P_DEFAULT); + bob::io::base::DefaultHDF5ErrorStack->unmute(); + + if (set_id < 0) { + if (type.type() == bob::io::base::s) + create_string_dataset(parent, m_name, type, compression); + else + create_dataset(parent, m_name, type, list, compression); + } + else H5Dclose(set_id); //close it, will re-open it properly + + m_id = open_dataset(parent, m_name); + m_dt = open_datatype(m_id); + m_filespace = open_filespace(m_id); + bob::io::base::HDF5Type file_type(m_dt, get_extents(m_filespace)); + reset_compatibility_list(m_filespace, file_type, m_descr); + + //strings have to be treated slightly differently + if (H5Tget_class(*m_dt) == H5T_STRING) { + hsize_t strings = 1; + HDF5Shape shape(1, &strings); + m_memspace = open_memspace(shape); + } + else { + m_memspace = open_memspace(m_descr[0].type.shape()); + } +} + +bob::io::base::detail::hdf5::Dataset::~Dataset() { } + +size_t bob::io::base::detail::hdf5::Dataset::size () const { + return m_descr[0].size; +} + +size_t bob::io::base::detail::hdf5::Dataset::size (const bob::io::base::HDF5Type& type) const { + for (size_t k=0; k<m_descr.size(); ++k) { + if (m_descr[k].type == type) return m_descr[k].size; + } + boost::format m("trying to read or write `%s' at `%s' that only accepts `%s'"); + m % type.str() % url() % m_descr[0].type.str(); + throw std::runtime_error(m.str()); +} + +const boost::shared_ptr<bob::io::base::detail::hdf5::Group> bob::io::base::detail::hdf5::Dataset::parent() const { + return m_parent.lock(); +} + +boost::shared_ptr<bob::io::base::detail::hdf5::Group> bob::io::base::detail::hdf5::Dataset::parent() { + return m_parent.lock(); +} + +const std::string& bob::io::base::detail::hdf5::Dataset::filename() const { + return parent()->filename(); +} + +std::string bob::io::base::detail::hdf5::Dataset::url() const { + return filename() + ":" + path(); +} + +std::string bob::io::base::detail::hdf5::Dataset::path() const { + return parent()->path() + "/" + m_name; +} + +const boost::shared_ptr<bob::io::base::detail::hdf5::File> bob::io::base::detail::hdf5::Dataset::file() const { + return parent()->file(); +} + +boost::shared_ptr<bob::io::base::detail::hdf5::File> bob::io::base::detail::hdf5::Dataset::file() { + return parent()->file(); +} + +/** + * Locates a compatible type or returns end(). + */ +static std::vector<bob::io::base::HDF5Descriptor>::iterator + find_type_index(std::vector<bob::io::base::HDF5Descriptor>& descr, + const bob::io::base::HDF5Type& user_type) { + std::vector<bob::io::base::HDF5Descriptor>::iterator it = descr.begin(); + for (; it != descr.end(); ++it) { + if (it->type == user_type) break; + } + return it; +} + +std::vector<bob::io::base::HDF5Descriptor>::iterator +bob::io::base::detail::hdf5::Dataset::select (size_t index, const bob::io::base::HDF5Type& dest) { + + //finds compatibility type + std::vector<bob::io::base::HDF5Descriptor>::iterator it = find_type_index(m_descr, dest); + + //if we cannot find a compatible type, we throw + if (it == m_descr.end()) { + boost::format m("trying to read or write `%s' at `%s' that only accepts `%s'"); + m % dest.str() % url() % m_descr[0].type.str(); + throw std::runtime_error(m.str()); + } + + //checks indexing + if (index >= it->size) { + boost::format m("trying to access element %d in Dataset '%s' that only contains %d elements"); + m % index % url() % it->size; + throw std::runtime_error(m.str()); + } + + set_memspace(m_memspace, it->type.shape()); + + it->hyperslab_start[0] = index; + + herr_t status = H5Sselect_hyperslab(*m_filespace, H5S_SELECT_SET, + it->hyperslab_start.get(), 0, it->hyperslab_count.get(), 0); + if (status < 0) throw status_error("H5Sselect_hyperslab", status); + + return it; +} + +void bob::io::base::detail::hdf5::Dataset::read_buffer (size_t index, const bob::io::base::HDF5Type& dest, void* buffer) { + + std::vector<bob::io::base::HDF5Descriptor>::iterator it = select(index, dest); + + herr_t status = H5Dread(*m_id, *it->type.htype(), + *m_memspace, *m_filespace, H5P_DEFAULT, buffer); + + if (status < 0) throw status_error("H5Dread", status); +} + +void bob::io::base::detail::hdf5::Dataset::write_buffer (size_t index, const bob::io::base::HDF5Type& dest, + const void* buffer) { + + std::vector<bob::io::base::HDF5Descriptor>::iterator it = select(index, dest); + + herr_t status = H5Dwrite(*m_id, *it->type.htype(), + *m_memspace, *m_filespace, H5P_DEFAULT, buffer); + + if (status < 0) throw status_error("H5Dwrite", status); +} + +void bob::io::base::detail::hdf5::Dataset::extend_buffer (const bob::io::base::HDF5Type& dest, const void* buffer) { + + //finds compatibility type + std::vector<bob::io::base::HDF5Descriptor>::iterator it = find_type_index(m_descr, dest); + + //if we cannot find a compatible type, we throw + if (it == m_descr.end()) { + boost::format m("trying to read or write `%s' at `%s' that only accepts `%s'"); + m % dest.str() % url() % m_descr[0].type.str(); + throw std::runtime_error(m.str()); + } + + if (!it->expandable) { + boost::format m("trying to append to '%s' that is not expandible"); + m % url(); + throw std::runtime_error(m.str()); + } + + //if it is expandible, try expansion + bob::io::base::HDF5Shape tmp(it->type.shape()); + tmp >>= 1; + tmp[0] = it->size + 1; + herr_t status = H5Dset_extent(*m_id, tmp.get()); + if (status < 0) throw status_error("H5Dset_extent", status); + + //if expansion succeeded, update all compatible types + for (size_t k=0; k<m_descr.size(); ++k) { + if (m_descr[k].expandable) { //updated only the length + m_descr[k].size += 1; + } + else { //not expandable, update the shape/count for a straight read/write + m_descr[k].type.shape()[0] += 1; + m_descr[k].hyperslab_count[0] += 1; + } + } + + m_filespace = open_filespace(m_id); //update filespace + + write_buffer(tmp[0]-1, dest, buffer); +} + +void bob::io::base::detail::hdf5::Dataset::gettype_attribute(const std::string& name, + bob::io::base::HDF5Type& type) const { + bob::io::base::detail::hdf5::gettype_attribute(m_id, name, type); +} + +bool bob::io::base::detail::hdf5::Dataset::has_attribute(const std::string& name) const { + return bob::io::base::detail::hdf5::has_attribute(m_id, name); +} + +void bob::io::base::detail::hdf5::Dataset::delete_attribute (const std::string& name) { + bob::io::base::detail::hdf5::delete_attribute(m_id, name); +} + +void bob::io::base::detail::hdf5::Dataset::read_attribute (const std::string& name, + const bob::io::base::HDF5Type& dest_type, void* buffer) const { + bob::io::base::detail::hdf5::read_attribute(m_id, name, dest_type, buffer); +} + +void bob::io::base::detail::hdf5::Dataset::write_attribute (const std::string& name, + const bob::io::base::HDF5Type& dest_type, const void* buffer) { + bob::io::base::detail::hdf5::write_attribute(m_id, name, dest_type, buffer); +} + +void bob::io::base::detail::hdf5::Dataset::list_attributes(std::map<std::string, bob::io::base::HDF5Type>& attributes) const { + bob::io::base::detail::hdf5::list_attributes(m_id, attributes); +} + +template <> void bob::io::base::detail::hdf5::Dataset::read<std::string>(size_t index, std::string& value) { + if (index != 0) throw std::runtime_error("Bob's HDF5 bindings do not (yet) support string vectors - reading something on position > 0 is therefore not possible"); + + size_t str_size = H5Tget_size(*m_dt); ///< finds out string size + boost::shared_array<char> storage(new char[str_size+1]); + storage[str_size] = 0; ///< null termination + + herr_t status = H5Dread(*m_id, *m_dt, *m_memspace, *m_filespace, H5P_DEFAULT, storage.get()); + if (status < 0) throw status_error("H5Dread", status); + + value = storage.get(); +} + +template <> void bob::io::base::detail::hdf5::Dataset::replace<std::string>(size_t index, const std::string& value) { + if (index != 0) throw std::runtime_error("Bob's HDF5 bindings do not (yet) support string vectors - indexing something on position > 0 is therefore not possible"); + + herr_t status = H5Dwrite(*m_id, *m_dt, *m_memspace, *m_filespace, H5P_DEFAULT, value.c_str()); + if (status < 0) throw status_error("H5Dwrite", status); +} + +template <> void bob::io::base::detail::hdf5::Dataset::add<std::string>(const std::string& value) { + herr_t status = H5Dwrite(*m_id, *m_dt, *m_memspace, *m_filespace, H5P_DEFAULT, value.c_str()); + if (status < 0) throw status_error("H5Dwrite", status); +} + +template <> void bob::io::base::detail::hdf5::Dataset::set_attribute<std::string>(const std::string& name, const std::string& v) { + bob::io::base::HDF5Type dest_type(v); + write_attribute(name, dest_type, reinterpret_cast<const void*>(v.c_str())); +} + +template <> std::string bob::io::base::detail::hdf5::Dataset::get_attribute(const std::string& name) const { + HDF5Type type; + gettype_attribute(name, type); + boost::shared_array<char> v(new char[type.shape()[0]+1]); + v[type.shape()[0]] = 0; ///< null termination + read_attribute(name, type, reinterpret_cast<void*>(v.get())); + std::string retval(v.get()); + return retval; +} diff --git a/bob/io/base/cpp/HDF5File.cpp b/bob/io/base/cpp/HDF5File.cpp new file mode 100644 index 0000000000000000000000000000000000000000..549069f8ab821df603ef35727d0da07a4a757137 --- /dev/null +++ b/bob/io/base/cpp/HDF5File.cpp @@ -0,0 +1,294 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Implementation of the read/write functionality for HDF5 files + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/format.hpp> + +#include <bob.io.base/HDF5File.h> + +static unsigned int getH5Access (bob::io::base::HDF5File::mode_t v) { + switch(v) { + case 0: return H5F_ACC_RDONLY; + case 1: return H5F_ACC_RDWR; + case 2: return H5F_ACC_TRUNC; + case 4: return H5F_ACC_EXCL; + default: + { + boost::format m("Trying to use an undefined access mode '%d'"); + m % v; + throw std::runtime_error(m.str()); + } + } +} + +bob::io::base::HDF5File::HDF5File(const std::string& filename, mode_t mode): + m_file(new bob::io::base::detail::hdf5::File(filename, getH5Access(mode))), + m_cwd(m_file->root()) ///< we start by looking at the root directory +{ +} + +bob::io::base::HDF5File::HDF5File(const std::string& filename, const char mode): +m_file(), +m_cwd() +{ + bob::io::base::HDF5File::mode_t new_mode = bob::io::base::HDF5File::inout; + switch (mode){ + case 'r': new_mode = bob::io::base::HDF5File::in; break; + case 'a': new_mode = bob::io::base::HDF5File::inout; break; + case 'w': new_mode = bob::io::base::HDF5File::trunc; break; + case 'x': new_mode = bob::io::base::HDF5File::excl; break; + default: + throw std::runtime_error("Supported flags are 'r' (read-only), 'a' (read/write/append), 'w' (read/write/truncate) or 'x' (read/write/exclusive)"); + } + m_file.reset(new bob::io::base::detail::hdf5::File(filename, getH5Access(new_mode))); + m_cwd = m_file->root(); ///< we start by looking at the root directory + +} + +bob::io::base::HDF5File::HDF5File(const bob::io::base::HDF5File& other_file): + m_file(other_file.m_file), + m_cwd(other_file.m_cwd) +{ +} + +bob::io::base::HDF5File::~HDF5File() { +} + +bob::io::base::HDF5File& bob::io::base::HDF5File::operator =(const bob::io::base::HDF5File& other_file){ + m_file = other_file.m_file; + m_cwd = other_file.m_cwd; + return *this; +} + +void bob::io::base::HDF5File::close() { + m_file.reset(); + m_cwd.reset(); +} + +void bob::io::base::HDF5File::cd(const std::string& path) { + check_open(); + m_cwd = m_cwd->cd(path); +} + +bool bob::io::base::HDF5File::hasGroup(const std::string& path) { + check_open(); + return m_cwd->has_group(path); +} + +void bob::io::base::HDF5File::createGroup(const std::string& path) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot create group '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + m_cwd->create_group(path); +} + +std::string bob::io::base::HDF5File::cwd() const { + check_open(); + return m_cwd->path(); +} + +bool bob::io::base::HDF5File::contains (const std::string& path) const { + check_open(); + return m_cwd->has_dataset(path); +} + +const std::vector<bob::io::base::HDF5Descriptor>& bob::io::base::HDF5File::describe +(const std::string& path) const { + check_open(); + return (*m_cwd)[path]->m_descr; +} + +void bob::io::base::HDF5File::unlink (const std::string& path) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot remove dataset at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + m_cwd->remove_dataset(path); +} + +void bob::io::base::HDF5File::rename (const std::string& from, const std::string& to) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot rename dataset '%s' -> '%s' at path '%s' of file '%s' because it is not writeable"); + m % from % to % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + m_cwd->rename_dataset(from, to); + std::string current_path = m_cwd->path(); + m_file->reset(); //re-read the whole structure + m_cwd = m_file->root(); + m_cwd = m_cwd->cd(current_path); //go back to the path we were before +} + +void bob::io::base::HDF5File::copy (HDF5File& other) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot copy data of file '%s' to path '%s' of file '%s' because it is not writeable"); + m % other.filename() % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + + //groups + typedef std::map<std::string, boost::shared_ptr<bob::io::base::detail::hdf5::Group> > group_map_type; + const group_map_type& group_map = other.m_file->root()->groups(); + for (group_map_type::const_iterator it=group_map.begin(); + it != group_map.end(); ++it) { + m_cwd->copy_group(it->second, it->first); + } + + //datasets + typedef std::map<std::string, boost::shared_ptr<bob::io::base::detail::hdf5::Dataset> > dataset_map_type; + const dataset_map_type& dataset_map = other.m_file->root()->datasets(); + for (dataset_map_type::const_iterator it=dataset_map.begin(); + it != dataset_map.end(); ++it) { + m_cwd->copy_dataset(it->second, it->first); + } +} + +void bob::io::base::HDF5File::create (const std::string& path, const bob::io::base::HDF5Type& type, + bool list, size_t compression) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot create dataset '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + if (!contains(path)) m_cwd->create_dataset(path, type, list, compression); + else (*m_cwd)[path]->size(type); +} + +void bob::io::base::HDF5File::read_buffer (const std::string& path, size_t pos, + const bob::io::base::HDF5Type& type, void* buffer) const { + check_open(); + (*m_cwd)[path]->read_buffer(pos, type, buffer); +} + +void bob::io::base::HDF5File::write_buffer (const std::string& path, + size_t pos, const bob::io::base::HDF5Type& type, const void* buffer) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot write to object '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + (*m_cwd)[path]->write_buffer(pos, type, buffer); +} + +void bob::io::base::HDF5File::extend_buffer(const std::string& path, + const bob::io::base::HDF5Type& type, const void* buffer) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot extend object '%s' at path '%s' of file '%s' because the file is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + (*m_cwd)[path]->extend_buffer(type, buffer); +} + +bool bob::io::base::HDF5File::hasAttribute(const std::string& path, + const std::string& name) const { + check_open(); + if (m_cwd->has_dataset(path)) { + return (*m_cwd)[path]->has_attribute(name); + } + else if (m_cwd->has_group(path)) { + return m_cwd->cd(path)->has_attribute(name); + } + return false; +} + +void bob::io::base::HDF5File::getAttributeType(const std::string& path, + const std::string& name, HDF5Type& type) const { + check_open(); + if (m_cwd->has_dataset(path)) { + (*m_cwd)[path]->gettype_attribute(name, type); + } + else if (m_cwd->has_group(path)) { + m_cwd->cd(path)->gettype_attribute(name, type); + } + else { + boost::format m("cannot read attribute '%s' type at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % name % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } +} + +void bob::io::base::HDF5File::deleteAttribute(const std::string& path, + const std::string& name) { + check_open(); + if (m_cwd->has_dataset(path)) { + (*m_cwd)[path]->delete_attribute(name); + } + else if (m_cwd->has_group(path)) { + m_cwd->cd(path)->delete_attribute(name); + } + else { + boost::format m("cannot delete attribute '%s' at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % name % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } +} + +void bob::io::base::HDF5File::listAttributes(const std::string& path, + std::map<std::string, bob::io::base::HDF5Type>& attributes) const { + check_open(); + if (m_cwd->has_dataset(path)) { + (*m_cwd)[path]->list_attributes(attributes); + } + else if (m_cwd->has_group(path)) { + m_cwd->cd(path)->list_attributes(attributes); + } + else { + boost::format m("cannot list attributes at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } +} + +void bob::io::base::HDF5File::read_attribute(const std::string& path, + const std::string& name, const bob::io::base::HDF5Type& type, void* buffer) const { + check_open(); + if (m_cwd->has_dataset(path)) { + (*m_cwd)[path]->read_attribute(name, type, buffer); + } + else if (m_cwd->has_group(path)) { + m_cwd->cd(path)->read_attribute(name, type, buffer); + } + else { + boost::format m("cannot get attribute '%s' at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % name % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } +} + +void bob::io::base::HDF5File::write_attribute(const std::string& path, + const std::string& name, const bob::io::base::HDF5Type& type, const void* buffer) { + check_open(); + if (m_cwd->has_dataset(path)) { + (*m_cwd)[path]->write_attribute(name, type, buffer); + } + else if (m_cwd->has_group(path)) { + m_cwd->cd(path)->write_attribute(name, type, buffer); + } + else { + boost::format m("cannot set attribute '%s' at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % name % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } +} + +void bob::io::base::HDF5File::check_open() const{ + if (!m_cwd || ! m_file){ + throw std::runtime_error("The file is not opened yet / any more"); + } +} diff --git a/bob/io/base/cpp/HDF5Group.cpp b/bob/io/base/cpp/HDF5Group.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aab916684071b0f51c260483f15c3a4dd3f0766d --- /dev/null +++ b/bob/io/base/cpp/HDF5Group.cpp @@ -0,0 +1,560 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Wed 29 Feb 17:24:10 2012 + * + * @brief Implements HDF5 groups. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/make_shared.hpp> +#include <boost/shared_array.hpp> +#include <boost/filesystem.hpp> +#include <boost/format.hpp> +#include <boost/algorithm/string.hpp> + +#include <bob/core/logging.h> + +#include <bob.io.base/HDF5Group.h> +#include <bob.io.base/HDF5Utils.h> + +/** + * Creates an "auto-destructible" HDF5 Group + */ +static void delete_h5g (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Gclose(*p); + if (err < 0) { + bob::core::error << "H5Gclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> create_new_group(boost::shared_ptr<hid_t> p, + const std::string& name) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5g)); + *retval = H5Gcreate2(*p, name.c_str(), H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Gcreate2() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + return retval; +} + +static boost::shared_ptr<hid_t> open_group(boost::shared_ptr<hid_t> g, + const char* name) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5g)); + *retval = H5Gopen2(*g, name, H5P_DEFAULT); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Gopen2() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + return retval; +} + +bob::io::base::detail::hdf5::Group::Group(boost::shared_ptr<Group> parent, const std::string& name): + m_name(name), + m_id(create_new_group(parent->location(), name)), + m_parent(parent) +{ +} + +/** + * Simple wrapper to call internal bob::io::base::detail::hdf5::Group::iterate_callback, that can call + * Group and Dataset constructors. Note that those are private or protected for + * design reasons. + */ +static herr_t group_iterate_callback(hid_t self, const char *name, + const H5L_info_t *info, void *object) { + return static_cast<bob::io::base::detail::hdf5::Group*>(object)->iterate_callback(self, name, info); +} + +herr_t bob::io::base::detail::hdf5::Group::iterate_callback(hid_t self, const char *name, + const H5L_info_t *info) { + + // If we are not looking at a hard link to the data, just ignore + if (info->type != H5L_TYPE_HARD) { + TDEBUG1("Ignoring soft-link `" << name << "' in HDF5 file"); + return 0; + } + + // Get information about the HDF5 object + H5O_info_t obj_info; + herr_t status = H5Oget_info_by_name(self, name, &obj_info, H5P_DEFAULT); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Oget_info_by_name() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + + switch(obj_info.type) { + case H5O_TYPE_GROUP: + //creates with recursion + m_groups[name] = boost::make_shared<bob::io::base::detail::hdf5::Group>(shared_from_this(), + name, true); + m_groups[name]->open_recursively(); + break; + case H5O_TYPE_DATASET: + m_datasets[name] = boost::make_shared<bob::io::base::detail::hdf5::Dataset>(shared_from_this(), + std::string(name)); + break; + default: + break; + } + + return 0; +} + +bob::io::base::detail::hdf5::Group::Group(boost::shared_ptr<Group> parent, + const std::string& name, bool): + m_name(name), + m_id(open_group(parent->location(), name.c_str())), + m_parent(parent) +{ + //checks name + if (!m_name.size() || m_name == "." || m_name == "..") { + boost::format m("Cannot create group with illegal name `%s' at `%s'"); + m % name % url(); + throw std::runtime_error(m.str()); + } +} + +void bob::io::base::detail::hdf5::Group::open_recursively() { + //iterates over this group only and instantiates what needs to be instantiated + herr_t status = H5Literate(*m_id, H5_INDEX_NAME, + H5_ITER_NATIVE, 0, group_iterate_callback, static_cast<void*>(this)); + if (status < 0) { + boost::format m("Call to HDF5 C-function H5Literate() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } +} + +bob::io::base::detail::hdf5::Group::Group(boost::shared_ptr<File> parent): + m_name(""), + m_id(open_group(parent->location(), "/")), + m_parent() +{ +} + +bob::io::base::detail::hdf5::Group::~Group() { } + +const boost::shared_ptr<bob::io::base::detail::hdf5::Group> bob::io::base::detail::hdf5::Group::parent() const { + return m_parent.lock(); +} + +boost::shared_ptr<bob::io::base::detail::hdf5::Group> bob::io::base::detail::hdf5::Group::parent() { + return m_parent.lock(); +} + +const std::string& bob::io::base::detail::hdf5::Group::filename() const { + return parent()->filename(); +} + +std::string bob::io::base::detail::hdf5::Group::path() const { + return (m_name.size()?parent()->path():"") + "/" + m_name; +} + +std::string bob::io::base::detail::hdf5::Group::url() const { + return filename() + ":" + path(); +} + +const boost::shared_ptr<bob::io::base::detail::hdf5::File> bob::io::base::detail::hdf5::Group::file() const { + return parent()->file(); +} + +boost::shared_ptr<bob::io::base::detail::hdf5::File> bob::io::base::detail::hdf5::Group::file() { + return parent()->file(); +} + +boost::shared_ptr<bob::io::base::detail::hdf5::Group> bob::io::base::detail::hdf5::Group::cd(const std::string& dir) { + //empty dir == void action, return self + if (!dir.size()) return shared_from_this(); + + if (dir[0] == '/') { //absolute path given, apply to root node + return file()->root()->cd(dir.substr(1)); + } + + //relative path given, start from self + std::string::size_type pos = dir.find_first_of('/'); + if (pos == std::string::npos) { //it should be one of my children + if (dir == ".") return shared_from_this(); + if (dir == "..") { + if (!m_name.size()) { //this is the root group already + boost::format m("Cannot go beyond root directory at file `%s'"); + m % file()->filename(); + throw std::runtime_error(m.str()); + } + //else, just return its parent + return parent(); + } + if (!has_group(dir)) { + boost::format m("Cannot find group `%s' at `%s'"); + m % dir % url(); + throw std::runtime_error(m.str()); + } + //else, just return the named group + return m_groups[dir]; + } + + //if you get to this point, we are just traversing + std::string mydir = dir.substr(0, pos); + if (mydir == ".") return cd(dir.substr(pos+1)); + if (mydir == "..") return parent()->cd(dir.substr(pos+1)); + if (!has_group(mydir)) { + boost::format m("Cannot find group `%s' at `%s'"); + m % dir % url(); + throw std::runtime_error(m.str()); + } + + //else, just recurse to the next group + return m_groups[mydir]->cd(dir.substr(pos+1)); +} + +const boost::shared_ptr<bob::io::base::detail::hdf5::Group> bob::io::base::detail::hdf5::Group::cd(const std::string& dir) const { + return const_cast<bob::io::base::detail::hdf5::Group*>(this)->cd(dir); +} + +boost::shared_ptr<bob::io::base::detail::hdf5::Dataset> bob::io::base::detail::hdf5::Group::operator[] (const std::string& dir) { + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //search on the current group + if (!has_dataset(dir)) { + boost::format m("Cannot find dataset `%s' at `%s'"); + m % dir % url(); + throw std::runtime_error(m.str()); + } + return m_datasets[dir]; + } + + //if you get to this point, the search routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user or raise an exception. + std::string dest = dir.substr(0, pos); + if (!dest.size()) dest = "/"; + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = cd(dest); + return g->operator[](dir.substr(pos+1)); +} + +const boost::shared_ptr<bob::io::base::detail::hdf5::Dataset> bob::io::base::detail::hdf5::Group::operator[] (const std::string& dir) const { + return const_cast<bob::io::base::detail::hdf5::Group*>(this)->operator[](dir); +} + +void bob::io::base::detail::hdf5::Group::reset() { + typedef std::map<std::string, boost::shared_ptr<bob::io::base::detail::hdf5::Group> > group_map_type; + for (group_map_type::const_iterator it = m_groups.begin(); + it != m_groups.end(); ++it) { + remove_group(it->first); + } + + typedef std::map<std::string, boost::shared_ptr<bob::io::base::detail::hdf5::Dataset> > + dataset_map_type; + for (dataset_map_type::const_iterator it = m_datasets.begin(); + it != m_datasets.end(); ++it) { + remove_dataset(it->first); + } +} + +boost::shared_ptr<bob::io::base::detail::hdf5::Group> bob::io::base::detail::hdf5::Group::create_group(const std::string& dir) { + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //creates on the current group + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = + boost::make_shared<bob::io::base::detail::hdf5::Group>(shared_from_this(), dir); + m_groups[dir] = g; + return g; + } + + //if you get to this point, the search routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user or raise an exception. + std::string dest = dir.substr(0, pos); + if (!dest.size()) dest = "/"; + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = cd(dest); + return g->create_group(dir.substr(pos+1)); +} + +void bob::io::base::detail::hdf5::Group::remove_group(const std::string& dir) { + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //copy on the current group + herr_t status = H5Ldelete(*m_id, dir.c_str(), H5P_DEFAULT); + if (status < 0) { + boost::format m("Call to HDF5 C-function H5Ldelete() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + typedef std::map<std::string, boost::shared_ptr<bob::io::base::detail::hdf5::Group> > map_type; + map_type::iterator it = m_groups.find(dir); + m_groups.erase(it); + return; + } + + //if you get to this point, the removal routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user or raise an exception. + std::string dest = dir.substr(0, pos); + if (!dest.size()) dest = "/"; + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = cd(dest); + return g->remove_group(dir.substr(pos+1)); +} + +/** + * Opens an "auto-destructible" HDF5 property list + */ +static void delete_h5plist (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Pclose(*p); + if (err < 0) { + bob::core::error << "H5Pclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> open_plist(hid_t classid) { + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5plist)); + *retval = H5Pcreate(classid); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Pcreate() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + return retval; +} + +void bob::io::base::detail::hdf5::Group::rename_group(const std::string& from, const std::string& to) { + boost::shared_ptr<hid_t> create_props = open_plist(H5P_LINK_CREATE); + H5Pset_create_intermediate_group(*create_props, 1); + herr_t status = H5Lmove(*m_id, from.c_str(), H5L_SAME_LOC, to.c_str(), + *create_props, H5P_DEFAULT); + if (status < 0) { + boost::format m("Call to HDF5 C-function H5Lmove() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } +} + +void bob::io::base::detail::hdf5::Group::copy_group(const boost::shared_ptr<Group> other, + const std::string& dir) { + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //copy on the current group + const char* use_name = dir.size()?dir.c_str():other->name().c_str(); + herr_t status = H5Ocopy(*other->parent()->location(), + other->name().c_str(), *m_id, use_name, H5P_DEFAULT, H5P_DEFAULT); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Ocopy() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + + //read new group contents + boost::shared_ptr<bob::io::base::detail::hdf5::Group> copied = + boost::make_shared<bob::io::base::detail::hdf5::Group>(shared_from_this(), use_name); + copied->open_recursively(); + + //index it + m_groups[use_name] = copied; + + return; + } + + //if you get to this point, the copy routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user or return false. + std::string dest = dir.substr(0, pos); + if (!dest.size()) dest = "/"; + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = cd(dest); + return g->copy_group(other, dir.substr(pos+1)); +} + +bool bob::io::base::detail::hdf5::Group::has_group(const std::string& dir) const { + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //search on the current group + if (dir == "." || dir == "..") return true; //special case + typedef std::map<std::string, boost::shared_ptr<bob::io::base::detail::hdf5::Group> > map_type; + map_type::const_iterator it = m_groups.find(dir); + return (it != m_groups.end()); + } + + //if you get to this point, the search routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user or return false. + std::string dest = dir.substr(0, pos); + if (!dest.size()) dest = "/"; + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = cd(dest); + return g->has_group(dir.substr(pos+1)); +} + +boost::shared_ptr<bob::io::base::detail::hdf5::Dataset> bob::io::base::detail::hdf5::Group::create_dataset +(const std::string& dir, const bob::io::base::HDF5Type& type, bool list, + size_t compression) { + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //creates on the current group + boost::shared_ptr<bob::io::base::detail::hdf5::Dataset> d = + boost::make_shared<bob::io::base::detail::hdf5::Dataset>(shared_from_this(), dir, type, + list, compression); + m_datasets[dir] = d; + return d; + } + + //if you get to this point, the search routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user or return false. + std::string dest = dir.substr(0, pos); + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g; + if (!dest.size()) g = cd("/"); + else { + //let's make sure the directory exists, or let's create it recursively + if (!has_group(dest)) g = create_group(dest); + else g = cd(dest); + } + return g->create_dataset(dir.substr(pos+1), type, list, compression); +} + +void bob::io::base::detail::hdf5::Group::remove_dataset(const std::string& dir) { + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //removes on the current group + herr_t status = H5Ldelete(*m_id, dir.c_str(), H5P_DEFAULT); + if (status < 0) { + boost::format m("Call to HDF5 C-function H5Ldelete() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + typedef std::map<std::string, boost::shared_ptr<bob::io::base::detail::hdf5::Dataset> > map_type; + map_type::iterator it = m_datasets.find(dir); + m_datasets.erase(it); + return; + } + + //if you get to this point, the removal routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user or raise an exception. + std::string dest = dir.substr(0, pos); + if (!dest.size()) dest = "/"; + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = cd(dest); + return g->remove_dataset(dir.substr(pos+1)); +} + +void bob::io::base::detail::hdf5::Group::rename_dataset(const std::string& from, const std::string& to) { + boost::shared_ptr<hid_t> create_props = open_plist(H5P_LINK_CREATE); + H5Pset_create_intermediate_group(*create_props, 1); + herr_t status = H5Lmove(*m_id, from.c_str(), H5L_SAME_LOC, to.c_str(), + *create_props, H5P_DEFAULT); + if (status < 0) { + boost::format m("Call to HDF5 C-function H5Ldelete() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } +} + +void bob::io::base::detail::hdf5::Group::copy_dataset(const boost::shared_ptr<Dataset> other, + const std::string& dir) { + + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //search on the current group + const char* use_name = dir.size()?dir.c_str():other->name().c_str(); + herr_t status = H5Ocopy(*other->parent()->location(), + other->name().c_str(), *m_id, use_name, H5P_DEFAULT, H5P_DEFAULT); + if (status < 0) { + boost::format m("Call to HDF5 C-function H5Ocopy() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + //read new group contents + m_datasets[use_name] = boost::make_shared<bob::io::base::detail::hdf5::Dataset>(shared_from_this(), use_name); + return; + } + + //if you get to this point, the copy routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user. + std::string dest = dir.substr(0, pos); + if (!dest.size()) dest = "/"; + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = cd(dest); + return g->copy_dataset(other, dir.substr(pos+1)); +} + +bool bob::io::base::detail::hdf5::Group::has_dataset(const std::string& dir) const { + std::string::size_type pos = dir.find_last_of('/'); + if (pos == std::string::npos) { //search on the current group + typedef std::map<std::string, boost::shared_ptr<bob::io::base::detail::hdf5::Dataset> > map_type; + map_type::const_iterator it = m_datasets.find(dir); + return (it != m_datasets.end()); + } + + //if you get to this point, the search routine needs to be performed on + //another group, indicated by the path. So, we first cd() there and then do + //the same as we do here. This will recurse through the directory structure + //until we find the place defined by the user or return false. + std::string dest = dir.substr(0, pos); + if (!dest.size()) dest = "/"; + boost::shared_ptr<bob::io::base::detail::hdf5::Group> g = cd(dest); + return g->has_dataset(dir.substr(pos+1)); +} + +void bob::io::base::detail::hdf5::Group::gettype_attribute(const std::string& name, + bob::io::base::HDF5Type& type) const { + bob::io::base::detail::hdf5::gettype_attribute(m_id, name, type); +} + +bool bob::io::base::detail::hdf5::Group::has_attribute(const std::string& name) const { + return bob::io::base::detail::hdf5::has_attribute(m_id, name); +} + +void bob::io::base::detail::hdf5::Group::delete_attribute (const std::string& name) { + bob::io::base::detail::hdf5::delete_attribute(m_id, name); +} + +void bob::io::base::detail::hdf5::Group::read_attribute (const std::string& name, + const bob::io::base::HDF5Type& dest_type, void* buffer) const { + bob::io::base::detail::hdf5::read_attribute(m_id, name, dest_type, buffer); +} + +void bob::io::base::detail::hdf5::Group::write_attribute (const std::string& name, + const bob::io::base::HDF5Type& dest_type, const void* buffer) { + bob::io::base::detail::hdf5::write_attribute(m_id, name, dest_type, buffer); +} + +void bob::io::base::detail::hdf5::Group::list_attributes(std::map<std::string, bob::io::base::HDF5Type>& attributes) const { + bob::io::base::detail::hdf5::list_attributes(m_id, attributes); +} + +template <> void bob::io::base::detail::hdf5::Group::set_attribute<std::string>(const std::string& name, const std::string& v) { + bob::io::base::HDF5Type dest_type(v); + write_attribute(name, dest_type, reinterpret_cast<const void*>(v.c_str())); +} + +template <> std::string bob::io::base::detail::hdf5::Group::get_attribute(const std::string& name) const { + HDF5Type type; + gettype_attribute(name, type); + boost::shared_array<char> v(new char[type.shape()[0]+1]); + v[type.shape()[0]] = 0; ///< null termination + read_attribute(name, type, reinterpret_cast<void*>(v.get())); + std::string retval(v.get()); + return retval; +} + +bob::io::base::detail::hdf5::RootGroup::RootGroup(boost::shared_ptr<File> parent): + bob::io::base::detail::hdf5::Group(parent), + m_parent(parent) +{ +} + +bob::io::base::detail::hdf5::RootGroup::~RootGroup() { +} + +const std::string& bob::io::base::detail::hdf5::RootGroup::filename() const { + return m_parent.lock()->filename(); +} diff --git a/bob/io/base/cpp/HDF5Types.cpp b/bob/io/base/cpp/HDF5Types.cpp new file mode 100644 index 0000000000000000000000000000000000000000..16795bf8ba17e07326bcef4807cfe61188b15ab4 --- /dev/null +++ b/bob/io/base/cpp/HDF5Types.cpp @@ -0,0 +1,866 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief A few helpers to handle HDF5 datasets in a more abstract way. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/format.hpp> +#include <sstream> +#include <boost/make_shared.hpp> + +/** + * MT "lock" support was only introduced in Boost 1.35. Before copying this + * very ugly hack, make sure we are still using Boost 1.34. This will no longer + * be the case starting January 2011. + */ +#include <boost/version.hpp> +#include <boost/thread/mutex.hpp> +#if ((BOOST_VERSION / 100) % 1000) > 34 +#include <boost/thread/locks.hpp> +#else +#warning Disabling MT locks because Boost < 1.35! +#endif + +#include <bob/core/logging.h> + +#include <bob.io.base/HDF5Types.h> + +const char* bob::io::base::stringize (hdf5type t) { + switch (t) { + case bob::io::base::s: + return "string"; + case bob::io::base::b: + return "bool"; + case bob::io::base::i8: + return "int8"; + case bob::io::base::i16: + return "int16"; + case bob::io::base::i32: + return "int32"; + case bob::io::base::i64: + return "int64"; + case bob::io::base::u8: + return "uint8"; + case bob::io::base::u16: + return "uint16"; + case bob::io::base::u32: + return "uint32"; + case bob::io::base::u64: + return "uint64"; + case bob::io::base::f32: + return "float32"; + case bob::io::base::f64: + return "float64"; + case bob::io::base::f128: + return "float128"; + case bob::io::base::c64: + return "complex64"; + case bob::io::base::c128: + return "complex128"; + case bob::io::base::c256: + return "complex256"; + case bob::io::base::unsupported: + return "unsupported"; + } + return "unsupported"; ///< just to silence gcc +} + +static herr_t walker(unsigned n, const H5E_error2_t *desc, void *cookie) { + bob::io::base::HDF5ErrorStack& stack = *(bob::io::base::HDF5ErrorStack*)cookie; + std::vector<std::string>& sv = stack.get(); + boost::format fmt("%s() @ %s+%d: %s"); + fmt % desc->func_name % desc->file_name % desc->line % desc->desc; + sv.push_back(fmt.str()); + return 0; +} + +static herr_t err_callback(hid_t stack, void* cookie) { + bob::io::base::HDF5ErrorStack& err_stack = *(bob::io::base::HDF5ErrorStack*)cookie; + if (!err_stack.muted()) H5Ewalk2(stack, H5E_WALK_DOWNWARD, walker, cookie); + H5Eclear2(stack); + return 0; +} + +bob::io::base::HDF5ErrorStack::HDF5ErrorStack (): + m_stack(H5E_DEFAULT), + m_muted(false), + m_err(), + m_func(0), + m_client_data(0) +{ + H5Eget_auto2(m_stack, &m_func, &m_client_data); + H5Eset_auto2(m_stack, err_callback, this); +} + +bob::io::base::HDF5ErrorStack::HDF5ErrorStack (hid_t stack): + m_stack(stack), + m_muted(false), + m_err(), + m_func(0), + m_client_data(0) +{ + H5Eget_auto2(m_stack, &m_func, &m_client_data); + H5Eset_auto2(m_stack, err_callback, this); +} + +bob::io::base::HDF5ErrorStack::~HDF5ErrorStack () { + H5Eset_auto2(m_stack, m_func, m_client_data); +} + +//creates a pointer to the default HDF5 error stack that is global to the +//application level. +const boost::shared_ptr<bob::io::base::HDF5ErrorStack> + bob::io::base::DefaultHDF5ErrorStack(new HDF5ErrorStack()); + +bob::io::base::HDF5Shape::HDF5Shape (size_t n): + m_n(n), + m_shape() +{ + if (n > MAX_HDF5SHAPE_SIZE) { + boost::format m("cannot create shape with %u dimensions, exceeding the maximum number of dimensions supported by this API (%u)"); + m % n % MAX_HDF5SHAPE_SIZE; + throw std::runtime_error(m.str()); + } + for (size_t i=0; i<n; ++i) m_shape[i] = 0; +} + +bob::io::base::HDF5Shape::HDF5Shape (): + m_n(0), + m_shape() +{ +} + +bob::io::base::HDF5Shape::HDF5Shape (const bob::io::base::HDF5Shape& other): + m_n(other.m_n), + m_shape() +{ + for (size_t i=0; i<m_n; ++i) m_shape[i] = other.m_shape[i]; +} + +bob::io::base::HDF5Shape::~HDF5Shape() { +} + +bob::io::base::HDF5Shape& bob::io::base::HDF5Shape::operator= (const bob::io::base::HDF5Shape& other) { + m_n = other.m_n; + for (size_t i=0; i<m_n; ++i) m_shape[i] = other.m_shape[i]; + return *this; +} + +void bob::io::base::HDF5Shape::copy(const bob::io::base::HDF5Shape& other) { + if (m_n <= other.m_n) { //I'm smaller or equal + for (size_t i=0; i<m_n; ++i) m_shape[i] = other.m_shape[i]; + } + else { //The other is smaller + for (size_t i=0; i<other.m_n; ++i) m_shape[i] = other.m_shape[i]; + } +} + +void bob::io::base::HDF5Shape::reset() { + m_n = 0; +} + +bob::io::base::HDF5Shape& bob::io::base::HDF5Shape::operator <<= (size_t pos) { + if (!m_n || !pos) return *this; + for (size_t i=0; i<(m_n-pos); ++i) m_shape[i] = m_shape[i+pos]; + m_n -= pos; + return *this; +} + +bob::io::base::HDF5Shape& bob::io::base::HDF5Shape::operator >>= (size_t pos) { + if (!pos) return *this; + if ( (m_n + pos) > MAX_HDF5SHAPE_SIZE) { + boost::format m("if you shift right this shape by %u positions, you will exceed the maximum number of dimensions supported by this API (%u)"); + m % pos % MAX_HDF5SHAPE_SIZE; + throw std::runtime_error(m.str()); + } + for (size_t i=(m_n+pos-1); i>(pos-1); --i) m_shape[i] = m_shape[i-1]; + for (size_t i=0; i<pos; ++i) m_shape[i] = 1; + m_n += pos; + return *this; +} + +hsize_t bob::io::base::HDF5Shape::product() const { + hsize_t retval = 1; + for (size_t i=0; i<m_n; ++i) retval *= m_shape[i]; + return retval; +} + +bool bob::io::base::HDF5Shape::operator== (const HDF5Shape& other) const { + if (m_n != other.m_n) return false; + for (size_t i=0; i<m_n; ++i) if (m_shape[i] != other[i]) return false; + return true; +} + +bool bob::io::base::HDF5Shape::operator!= (const HDF5Shape& other) const { + return !(*this == other); +} + +std::string bob::io::base::HDF5Shape::str () const { + if (m_n == 0) return ""; + std::ostringstream retval(""); + retval << m_shape[0]; + for (size_t i=1; i<m_n; ++i) retval << ", " << m_shape[i]; + return retval.str(); +} + +/** + * Deleter method for auto-destroyable HDF5 datatypes. + */ +static void delete_h5datatype (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Tclose(*p); + if (err < 0) { + bob::core::error << "H5Tclose() exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +/** + * Given a datatype which is a compound type, returns the std::complex<T> + * hdf5type equivalent or raises. + */ +static bob::io::base::hdf5type equivctype(const boost::shared_ptr<hid_t>& dt) { + if (H5Tget_nmembers(*dt) != 2) throw std::runtime_error("the internal HDF5 type is not supported by our HDF5 interface"); + + //members have to: + // 1. have names "real" and "imag" + // 2. have class type H5T_FLOAT + // 3. have equal size + // 4. have a size of 4, 8 or 16 bytes + + // 1. + int real = H5Tget_member_index(*dt, "real"); + if (real < 0) { + throw std::runtime_error("the complex member index for `real' is not present on this HDF5 type"); + } + int imag = H5Tget_member_index(*dt, "imag"); + if (imag < 0) { + throw std::runtime_error("the complex member index for `imag' is not present on this HDF5 type"); + } + + // 2. + if (H5Tget_member_class(*dt, real) != H5T_FLOAT) + throw std::runtime_error("the raw type for member `real' on complex structure in HDF5 is not H5T_FLOAT as expected"); + if (H5Tget_member_class(*dt, imag) != H5T_FLOAT) + throw std::runtime_error("the raw type for member `imag' on complex structure in HDF5 is not H5T_FLOAT as expected"); + + // 3. + boost::shared_ptr<hid_t> realid(new hid_t(-1), std::ptr_fun(delete_h5datatype)); + *realid = H5Tget_member_type(*dt, real); + boost::shared_ptr<hid_t> imagid(new hid_t(-1), std::ptr_fun(delete_h5datatype)); + *imagid = H5Tget_member_type(*dt, imag); + size_t realsize = H5Tget_size(*realid); + size_t imagsize = H5Tget_size(*imagid); + if (realsize != imagsize) { + throw std::runtime_error("the sizes of the real and imaginary parts on HDF5 complex struct are not the same"); + } + + // 4. + switch (realsize) { + case 4: //std::complex<float> + return bob::io::base::c64; + case 8: //std::complex<double> + return bob::io::base::c128; + case 16: //std::complex<double> + return bob::io::base::c256; + default: + break; + } + + throw std::runtime_error("could not find the equivalent internal type for (supposedly) complex HDF5 structure"); +} + +/** + * Checks if a given type can be read as boolean + */ +static void checkbool(const boost::shared_ptr<hid_t>& dt) { + + if (H5Tget_nmembers(*dt) != 2) { + throw std::runtime_error("the number of enumeration members for the locally installed boolean type is not 2"); + } + + int8_t value; + herr_t status = H5Tget_member_value(*dt, 0, &value); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tget_member_value() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + bool next_is_false = false; + if (value != 0) next_is_false = true; + status = H5Tget_member_value(*dt, 1, &value); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tget_member_value() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + if (next_is_false) { + if (value != 0) { + throw std::runtime_error("the attribution of false(0) or true(1) is messed up on the current data type, which is supposed to be a boolean"); + } + } + else { + if (value == 0) { + throw std::runtime_error("the attribution of false(0) or true(1) is messed up on the current data type, which is supposed to be a boolean"); + } + } +} + +/** + * Given a datatype, returns the supported type equivalent or raises + */ +static bob::io::base::hdf5type get_datatype +(const boost::shared_ptr<hid_t>& dt) { + H5T_class_t classtype = H5Tget_class(*dt); + + if (classtype == H5T_STRING) return bob::io::base::s; //no need to check further + + size_t typesize = H5Tget_size(*dt); ///< element size + H5T_sign_t signtype = H5Tget_sign(*dt); + + //we only support little-endian byte-ordering + H5T_order_t ordertype = H5Tget_order(*dt); + + //please note that checking compound types for hdf5 < 1.8.6 does not work. +# if H5_VERSION_GE(1,8,6) + if (ordertype < 0) { + boost::format m("call to HDF5 C-function H5Tget_order returned error %d. HDF5 error statck follows:\n%s"); + m % ordertype % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + + if (ordertype != H5T_ORDER_LE) { + throw std::runtime_error("The endianness of datatype is not little-endian"); + } +# else + if ((ordertype >= 0) && (ordertype != H5T_ORDER_LE)) { + throw std::runtime_error("The endianness of datatype is not little-endian"); + } +# endif + + switch (classtype) { + case H5T_ENUM: + checkbool(dt); + return bob::io::base::b; + case H5T_INTEGER: + switch (typesize) { + case 1: //int8 or uint8 + switch (signtype) { + case H5T_SGN_NONE: + return bob::io::base::u8; + case H5T_SGN_2: //two's complement == "is signed" ;-) + return bob::io::base::i8; + default: + throw std::runtime_error("HDF5 1-byte integer datatype (read from file) cannot be mapped into a C++ type supported by this API"); + } + break; + case 2: //int16 or uint16 + switch (signtype) { + case H5T_SGN_NONE: + return bob::io::base::u16; + case H5T_SGN_2: //two's complement == "is signed" ;-) + return bob::io::base::i16; + default: + throw std::runtime_error("HDF5 2-byte integer datatype (read from file) cannot be mapped into a C++ type supported by this API"); + } + break; + case 4: //int32 or uint32 + switch (signtype) { + case H5T_SGN_NONE: + return bob::io::base::u32; + case H5T_SGN_2: //two's complement == "is signed" ;-) + return bob::io::base::i32; + default: + throw std::runtime_error("HDF5 4-byte integer datatype (read from file) cannot be mapped into a C++ type supported by this API"); + } + break; + case 8: //int64 or uint64 + switch (signtype) { + case H5T_SGN_NONE: + return bob::io::base::u64; + case H5T_SGN_2: //two's complement == "is signed" ;-) + return bob::io::base::i64; + default: + throw std::runtime_error("HDF5 8-byte integer datatype (read from file) cannot be mapped into a C++ type supported by this API"); + } + break; + default: + break; + } + break; + case H5T_FLOAT: + switch (typesize) { + case 4: //float + return bob::io::base::f32; + case 8: //double + return bob::io::base::f64; + case 16: //long double + return bob::io::base::f128; + default: + break; + } + break; + case H5T_COMPOUND: //complex + return equivctype(dt); + default: + break; + } + + throw std::runtime_error("cannot handle HDF5 datatype on file using one of the native types supported by this API"); +} + +bool bob::io::base::HDF5Type::compatible (const bob::io::base::array::typeinfo& value) const +{ + return *this == HDF5Type(value); +} + +/** + * Given a datatype, returns the supported HDF5 datatype equivalent or -1 + */ +boost::shared_ptr<hid_t> bob::io::base::HDF5Type::htype() const { + switch (m_type) { + case bob::io::base::s: + { + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5datatype)); + *retval = H5Tcopy(H5T_C_S1); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Tcopy() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + + //set string size + herr_t status = H5Tset_size(*retval, m_shape[0]); + if (status < 0) { + boost::format m("Call to HDF5 C-function H5Tset_size() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + + return retval; + } + case bob::io::base::b: + { + //why? HDF5 is a C library and in C there is no boolean type + //bottom-line => we have to define our own... + + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5datatype)); + *retval = H5Tenum_create(H5T_NATIVE_INT8); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Tenum_create() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + int8_t val; + herr_t status; + + //defines false + val = 0; + status = H5Tenum_insert(*retval, "false", &val); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tenum_insert() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + + //defines true + val = 1; + status = H5Tenum_insert(*retval, "true", &val); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Tenum_insert() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + + return retval; + } + case bob::io::base::i8: + return boost::make_shared<hid_t>(H5T_NATIVE_INT8); + case bob::io::base::i16: + return boost::make_shared<hid_t>(H5T_NATIVE_INT16); + case bob::io::base::i32: + return boost::make_shared<hid_t>(H5T_NATIVE_INT32); + case bob::io::base::i64: + return boost::make_shared<hid_t>(H5T_NATIVE_INT64); + case bob::io::base::u8: + return boost::make_shared<hid_t>(H5T_NATIVE_UINT8); + case bob::io::base::u16: + return boost::make_shared<hid_t>(H5T_NATIVE_UINT16); + case bob::io::base::u32: + return boost::make_shared<hid_t>(H5T_NATIVE_UINT32); + case bob::io::base::u64: + return boost::make_shared<hid_t>(H5T_NATIVE_UINT64); + case bob::io::base::f32: + return boost::make_shared<hid_t>(H5T_NATIVE_FLOAT); + case bob::io::base::f64: + return boost::make_shared<hid_t>(H5T_NATIVE_DOUBLE); + case bob::io::base::f128: + return boost::make_shared<hid_t>(H5T_NATIVE_LDOUBLE); + case bob::io::base::c64: + { + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5datatype)); + *retval = H5Tcreate(H5T_COMPOUND, 2*sizeof(float)); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Tcreate() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + herr_t status = H5Tinsert(*retval, "real", 0, H5T_NATIVE_FLOAT); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tinsert() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + status = H5Tinsert(*retval, "imag", sizeof(float), H5T_NATIVE_FLOAT); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tinsert() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + return retval; + } + case bob::io::base::c128: + { + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5datatype)); + *retval = H5Tcreate(H5T_COMPOUND, 2*sizeof(double)); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Tcreate() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + herr_t status = H5Tinsert(*retval, "real", 0, H5T_NATIVE_DOUBLE); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tinsert() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + status = H5Tinsert(*retval, "imag", sizeof(double), H5T_NATIVE_DOUBLE); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tinsert() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + return retval; + } + case bob::io::base::c256: + { + boost::shared_ptr<hid_t> retval(new hid_t(-1), + std::ptr_fun(delete_h5datatype)); + *retval = H5Tcreate(H5T_COMPOUND, 2*sizeof(long double)); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Tcreate() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + herr_t status = H5Tinsert(*retval, "real", 0, H5T_NATIVE_LDOUBLE); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tinsert() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + status = H5Tinsert(*retval, "imag", sizeof(long double), H5T_NATIVE_LDOUBLE); + if (status < 0) { + boost::format m("call to HDF5 C-function H5Tinsert() returned error %d. HDF5 error statck follows:\n%s"); + m % status % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + return retval; + } + default: + break; + } + throw std::runtime_error("the C++ type you are trying to convert into a native HDF5 type is not supported by this API"); +} + +#define DEFINE_SUPPORT(T,E) bob::io::base::HDF5Type::HDF5Type(const T& value): \ + m_type(E), m_shape(1) { m_shape[0] = 1; } +DEFINE_SUPPORT(bool,bob::io::base::b) +DEFINE_SUPPORT(int8_t,bob::io::base::i8) +DEFINE_SUPPORT(int16_t,bob::io::base::i16) +DEFINE_SUPPORT(int32_t,bob::io::base::i32) +DEFINE_SUPPORT(int64_t,bob::io::base::i64) +DEFINE_SUPPORT(uint8_t,bob::io::base::u8) +DEFINE_SUPPORT(uint16_t,bob::io::base::u16) +DEFINE_SUPPORT(uint32_t,bob::io::base::u32) +DEFINE_SUPPORT(uint64_t,bob::io::base::u64) +DEFINE_SUPPORT(float,bob::io::base::f32) +DEFINE_SUPPORT(double,bob::io::base::f64) +DEFINE_SUPPORT(long double,bob::io::base::f128) +DEFINE_SUPPORT(std::complex<float>,bob::io::base::c64) +DEFINE_SUPPORT(std::complex<double>,bob::io::base::c128) +DEFINE_SUPPORT(std::complex<long double>,bob::io::base::c256) +#undef DEFINE_SUPPORT + +bob::io::base::HDF5Type::HDF5Type(const char* value): + m_type(bob::io::base::s), + m_shape(1) +{ + m_shape[0] = std::strlen(value); +} + +bob::io::base::HDF5Type::HDF5Type(const std::string& value): + m_type(bob::io::base::s), + m_shape(1) +{ + m_shape[0] = value.size(); +} + +#define DEFINE_SUPPORT(T,E,N) bob::io::base::HDF5Type::HDF5Type \ + (const blitz::Array<T,N>& value): \ + m_type(E), \ + m_shape(value.shape()) { \ + if (N > bob::io::base::array::N_MAX_DIMENSIONS_ARRAY) {\ + boost::format m("you passed an array with %d dimensions, but this HDF5 API only supports arrays with up to %d dimensions"); \ + m % N % bob::io::base::array::N_MAX_DIMENSIONS_ARRAY; \ + throw std::runtime_error(m.str()); \ + } \ + } + +#define DEFINE_BZ_SUPPORT(T,E) \ + DEFINE_SUPPORT(T,E,1) \ + DEFINE_SUPPORT(T,E,2) \ + DEFINE_SUPPORT(T,E,3) \ + DEFINE_SUPPORT(T,E,4) + +DEFINE_BZ_SUPPORT(bool,bob::io::base::b) +DEFINE_BZ_SUPPORT(int8_t,bob::io::base::i8) +DEFINE_BZ_SUPPORT(int16_t,bob::io::base::i16) +DEFINE_BZ_SUPPORT(int32_t,bob::io::base::i32) +DEFINE_BZ_SUPPORT(int64_t,bob::io::base::i64) +DEFINE_BZ_SUPPORT(uint8_t,bob::io::base::u8) +DEFINE_BZ_SUPPORT(uint16_t,bob::io::base::u16) +DEFINE_BZ_SUPPORT(uint32_t,bob::io::base::u32) +DEFINE_BZ_SUPPORT(uint64_t,bob::io::base::u64) +DEFINE_BZ_SUPPORT(float,bob::io::base::f32) +DEFINE_BZ_SUPPORT(double,bob::io::base::f64) +DEFINE_BZ_SUPPORT(long double,bob::io::base::f128) +DEFINE_BZ_SUPPORT(std::complex<float>,bob::io::base::c64) +DEFINE_BZ_SUPPORT(std::complex<double>,bob::io::base::c128) +DEFINE_BZ_SUPPORT(std::complex<long double>,bob::io::base::c256) +#undef DEFINE_BZ_SUPPORT +#undef DEFINE_SUPPORT + +bob::io::base::HDF5Type::HDF5Type(): + m_type(bob::io::base::unsupported), + m_shape() +{ +} + +bob::io::base::HDF5Type::HDF5Type(bob::io::base::hdf5type type): + m_type(type), + m_shape(1) +{ + m_shape[0] = 1; +} + +bob::io::base::HDF5Type::HDF5Type(bob::io::base::hdf5type type, const bob::io::base::HDF5Shape& extents): + m_type(type), + m_shape(extents) +{ +} + +static bob::io::base::hdf5type array_to_hdf5 (bob::io::base::array::ElementType eltype) { + switch(eltype) { + case bob::io::base::array::t_unknown: + return bob::io::base::unsupported; + case bob::io::base::array::t_bool: + return bob::io::base::b; + case bob::io::base::array::t_int8: + return bob::io::base::i8; + case bob::io::base::array::t_int16: + return bob::io::base::i16; + case bob::io::base::array::t_int32: + return bob::io::base::i32; + case bob::io::base::array::t_int64: + return bob::io::base::i64; + case bob::io::base::array::t_uint8: + return bob::io::base::u8; + case bob::io::base::array::t_uint16: + return bob::io::base::u16; + case bob::io::base::array::t_uint32: + return bob::io::base::u32; + case bob::io::base::array::t_uint64: + return bob::io::base::u64; + case bob::io::base::array::t_float32: + return bob::io::base::f32; + case bob::io::base::array::t_float64: + return bob::io::base::f64; + case bob::io::base::array::t_float128: + return bob::io::base::f128; + case bob::io::base::array::t_complex64: + return bob::io::base::c64; + case bob::io::base::array::t_complex128: + return bob::io::base::c128; + case bob::io::base::array::t_complex256: + return bob::io::base::c256; + } + throw std::runtime_error("unsupported dtype <=> hdf5 type conversion -- FIXME"); +} + +bob::io::base::HDF5Type::HDF5Type(const bob::io::base::array::typeinfo& ti): + m_type(array_to_hdf5(ti.dtype)), + m_shape(ti.nd, ti.shape) +{ +} + +bob::io::base::HDF5Type::HDF5Type(bob::io::base::array::ElementType eltype, + const HDF5Shape& extents): + m_type(array_to_hdf5(eltype)), + m_shape(extents) +{ +} + +bob::io::base::HDF5Type::HDF5Type(const boost::shared_ptr<hid_t>& type, + const bob::io::base::HDF5Shape& extents): + m_type(get_datatype(type)), + m_shape(extents) +{ +} + +bob::io::base::HDF5Type::HDF5Type(const boost::shared_ptr<hid_t>& type): + m_type(get_datatype(type)), + m_shape(1) +{ + //strings have to be treated slightly differently + if (H5Tget_class(*type) == H5T_STRING) m_shape[0] = H5Tget_size(*type); + else m_shape[0] = 1; +} + +bob::io::base::HDF5Type::HDF5Type(const HDF5Type& other): + m_type(other.m_type), + m_shape(other.m_shape) +{ +} + +bob::io::base::HDF5Type::~HDF5Type() { } + +bob::io::base::HDF5Type& bob::io::base::HDF5Type::operator= (const bob::io::base::HDF5Type& other) +{ + m_type = other.m_type; + m_shape = other.m_shape; + return *this; +} + +bool bob::io::base::HDF5Type::operator== (const bob::io::base::HDF5Type& other) const { + return (m_type == other.m_type) && (m_shape == other.m_shape); +} + +bool bob::io::base::HDF5Type::operator!= (const bob::io::base::HDF5Type& other) const { + return !(*this == other); +} + +std::string bob::io::base::HDF5Type::str() const { + boost::format retval("%s (%s)"); + retval % bob::io::base::stringize(m_type) % m_shape.str(); + return retval.str(); +} + +bob::io::base::array::ElementType bob::io::base::HDF5Type::element_type() const { + switch (m_type) { + case b: + return bob::io::base::array::t_bool; + case i8: + return bob::io::base::array::t_int8; + case i16: + return bob::io::base::array::t_int16; + case i32: + return bob::io::base::array::t_int32; + case i64: + return bob::io::base::array::t_int64; + case u8: + return bob::io::base::array::t_uint8; + case u16: + return bob::io::base::array::t_uint16; + case u32: + return bob::io::base::array::t_uint32; + case u64: + return bob::io::base::array::t_uint64; + case f32: + return bob::io::base::array::t_float32; + case f64: + return bob::io::base::array::t_float64; + case f128: + return bob::io::base::array::t_float128; + case c64: + return bob::io::base::array::t_complex64; + case c128: + return bob::io::base::array::t_complex128; + case c256: + return bob::io::base::array::t_complex256; + case s: + throw std::runtime_error("Cannot convert HDF5 string type to an element type to be used in blitz::Array's - FIXME: something is wrong in the logic"); + default: + break; + } + return bob::io::base::array::t_unknown; +} + +void bob::io::base::HDF5Type::copy_to (bob::io::base::array::typeinfo& ti) const { + ti.dtype = element_type(); + ti.nd = shape().n(); + if (ti.nd > (BOB_MAX_DIM+1)) { + boost::format f("HDF5 type has more (%d) than the allowed maximum number of dimensions (%d)"); + f % ti.nd % (BOB_MAX_DIM+1); + throw std::runtime_error(f.str()); + } + for (size_t i=0; i<ti.nd; ++i) ti.shape[i] = shape()[i]; + ti.update_strides(); +} + +bob::io::base::HDF5Descriptor::HDF5Descriptor(const HDF5Type& type, size_t size, + bool expand): + type(type), + size(size), + expandable(expand), + hyperslab_start(type.shape().n()), + hyperslab_count(type.shape()) +{ +} + +bob::io::base::HDF5Descriptor::HDF5Descriptor(const HDF5Descriptor& other): + type(other.type), + size(other.size), + expandable(other.expandable), + hyperslab_start(other.hyperslab_start), + hyperslab_count(other.hyperslab_count) +{ +} + +bob::io::base::HDF5Descriptor::~HDF5Descriptor() { } + +bob::io::base::HDF5Descriptor& bob::io::base::HDF5Descriptor::operator= +(const bob::io::base::HDF5Descriptor& other) { + type = other.type; + size = other.size; + expandable = other.expandable; + hyperslab_start = other.hyperslab_start; + hyperslab_count = other.hyperslab_count; + return *this; +} + +bob::io::base::HDF5Descriptor& bob::io::base::HDF5Descriptor::subselect() { + hyperslab_start >>= 1; + hyperslab_count >>= 1; + hyperslab_count[0] = 1; + return *this; +} + +std::string bob::io::base::format_hdf5_error() { + const std::vector<std::string>& stack = bob::io::base::DefaultHDF5ErrorStack->get(); + std::ostringstream retval; + std::string prefix(" "); + if (stack.size()) retval << prefix << stack[0]; + for (size_t i=1; i<stack.size(); ++i) + retval << std::endl << prefix << stack[i]; + bob::io::base::DefaultHDF5ErrorStack->clear(); + return retval.str(); +} diff --git a/bob/io/base/cpp/HDF5Utils.cpp b/bob/io/base/cpp/HDF5Utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b735be4a22414eee60c8d3a30c83b2246648b881 --- /dev/null +++ b/bob/io/base/cpp/HDF5Utils.cpp @@ -0,0 +1,157 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Implements a set of utilities to read HDF5 files. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/format.hpp> +#include <boost/make_shared.hpp> + +#include <bob/core/logging.h> + +#include <bob.io.base/HDF5Utils.h> + +/** + * Opens/Creates an "auto-destructible" HDF5 file + */ +static void delete_h5file (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Fclose(*p); + if (err < 0) { + bob::core::error << "H5Fclose(hid=" << *p << ") exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + } + } + delete p; +} + +/** + * Opens/Creates and "auto-destructible" HDF5 file creation property list + */ +static void delete_h5p (hid_t* p) { + if (*p >= 0) { + herr_t err = H5Pclose(*p); + if (err < 0) { + bob::core::error << "H5Pclose(hid=" << *p << ") exited with an error (" << err << "). The stack trace follows:" << std::endl; + bob::core::error << bob::io::base::format_hdf5_error() << std::endl; + return; + } + } + delete p; +} + +static boost::shared_ptr<hid_t> open_file(const boost::filesystem::path& path, + unsigned int flags, boost::shared_ptr<hid_t>& fcpl) { + + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5file)); + + if (!boost::filesystem::exists(path) && flags == H5F_ACC_RDONLY) { + //file was opened for reading, but does not exist... Raise + boost::format m("cannot open file `%s'"); + m % path.string(); + throw std::runtime_error(m.str()); + } + + if (boost::filesystem::exists(path) && flags != H5F_ACC_TRUNC) { //open + *retval = H5Fopen(path.string().c_str(), flags, H5P_DEFAULT); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Fopen() returned error %d on file '%s'. HDF5 error statck follows:\n%s"); + m % *retval % path.string().c_str() % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + //replaces the file create list properties with the one from the file + fcpl = boost::shared_ptr<hid_t>(new hid_t(-1), std::ptr_fun(delete_h5p)); + *fcpl = H5Fget_create_plist(*retval); + if (*fcpl < 0) { + boost::format m("call to HDF5 C-function H5Fget_create_list() returned error %d on file '%s'. HDF5 error statck follows:\n%s"); + m % *fcpl % path.string().c_str() % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + } + else { //file needs to be created or truncated (can set user block) + *retval = H5Fcreate(path.string().c_str(), H5F_ACC_TRUNC, + *fcpl, H5P_DEFAULT); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Fcreate() returned error %d on file '%s'. HDF5 error statck follows:\n%s"); + m % *retval % path.string().c_str() % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + } + return retval; +} + +static boost::shared_ptr<hid_t> create_fcpl(hsize_t userblock_size) { + if (!userblock_size) return boost::make_shared<hid_t>(H5P_DEFAULT); + //otherwise we have to go through the settings + boost::shared_ptr<hid_t> retval(new hid_t(-1), std::ptr_fun(delete_h5p)); + *retval = H5Pcreate(H5P_FILE_CREATE); + if (*retval < 0) { + boost::format m("call to HDF5 C-function H5Pcreate() returned error %d. HDF5 error statck follows:\n%s"); + m % *retval % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + herr_t err = H5Pset_userblock(*retval, userblock_size); + if (err < 0) { + boost::format m("call to HDF5 C-function H5Pset_userblock() returned error %d. HDF5 error statck follows:\n%s"); + m % err % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + return retval; +} + +bob::io::base::detail::hdf5::File::File(const boost::filesystem::path& path, unsigned int flags, + size_t userblock_size): + m_path(path), + m_flags(flags), + m_fcpl(create_fcpl(userblock_size)), + m_id(open_file(m_path, m_flags, m_fcpl)) +{ +} + +bob::io::base::detail::hdf5::File::~File() { +} + +boost::shared_ptr<bob::io::base::detail::hdf5::RootGroup> bob::io::base::detail::hdf5::File::root() { + if (!m_root) { + m_root = boost::make_shared<bob::io::base::detail::hdf5::RootGroup>(shared_from_this()); + m_root->open_recursively(); + } + return m_root; +} + +void bob::io::base::detail::hdf5::File::reset() { + m_root.reset(); +} + +void bob::io::base::detail::hdf5::File::flush() { + herr_t err = H5Fflush(*m_id, H5F_SCOPE_GLOBAL); + if (err < 0){ + std::runtime_error("H5Fflush returned with an error code."); + } +} + +bool bob::io::base::detail::hdf5::File::writable() const { + return (m_flags != H5F_ACC_RDONLY); +} + +size_t bob::io::base::detail::hdf5::File::userblock_size() const { + hsize_t retval; + herr_t err = H5Pget_userblock(*m_fcpl, &retval); + if (err < 0) { + boost::format m("Call to HDF5 C-function H5Pget_create_plist() returned error %d. HDF5 error statck follows:\n%s"); + m % err % bob::io::base::format_hdf5_error(); + throw std::runtime_error(m.str()); + } + return retval; +} + +void bob::io::base::detail::hdf5::File::get_userblock(std::string& data) const { + //TODO +} + +void bob::io::base::detail::hdf5::File::set_userblock(const std::string& data) { + //TODO +} diff --git a/bob/io/base/cpp/T3File.cpp b/bob/io/base/cpp/T3File.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8380a62fd912aa370f06f7e307c6fe614559eb81 --- /dev/null +++ b/bob/io/base/cpp/T3File.cpp @@ -0,0 +1,318 @@ +/** + * @date Wed Oct 26 17:11:16 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Implements a torch3vision bindata reader/writer + * The format, as described in the old source code goes like this. + * 1) data is always recorded in little endian format + * 2) the first 4 bytes describe an integer that indicates the number of arrays + * to follow + * 3) the second 4 bytes describe an integer that specifies the frame width. + * 4) all arrays inserted there are single dimensional arrays. + * 5) all elements from all arrays are "normally" float (4-bytes), but could be + * double if set in the header of T3 during compilation. The file size will + * indicate the right type to use. + * Because of this restriction, this codec will only be able to work with + * single-dimension input. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <fstream> +#include <boost/filesystem.hpp> +#include <boost/make_shared.hpp> +#include <boost/format.hpp> + +//some infrastructure to check the file size +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <bob.core/check.h> + +#include <bob.io.base/CodecRegistry.h> +#include <bob.io.base/blitz_array.h> + +static inline size_t get_filesize(const char* filename) { + struct stat filestatus; + stat(filename, &filestatus); + return filestatus.st_size; +} + +class T3File: public bob::io::base::File { + + public: //api + + T3File(const char* path, char mode): + m_filename(path), + m_newfile(true), + m_length(0) { + if ( mode == 'r' || (mode == 'a' && boost::filesystem::exists(path) ) ) { // try peek + size_t fsize = get_filesize(path); + fsize -= 8; // remove the first two entries + // read the first two 4-byte integers in the file, convert to unsigned + + std::fstream s(path, std::ios::binary|std::ios::in); + + if (!s) { + boost::format m("cannot open file `%s'"); + m % path; + throw std::runtime_error(m.str()); + } + + uint32_t nsamples, framesize; + nsamples = framesize = 0; + s.read((char*)&nsamples, sizeof(uint32_t)); + s.read((char*)&framesize, sizeof(uint32_t)); + + m_length = nsamples; + + // are those floats or doubles? + if (fsize == (nsamples*framesize*sizeof(float))) { + m_type_array.dtype = bob::io::base::array::t_float32; + m_type_arrayset.dtype = bob::io::base::array::t_float32; + } + else if (fsize == (nsamples*framesize*sizeof(double))) { + m_type_array.dtype = bob::io::base::array::t_float64; + m_type_arrayset.dtype = bob::io::base::array::t_float64; + } + else { + boost::format s("Cannot read file '%s', mode = '%c': fsize (%d) != %d*%d*sizeof(float32) nor *sizeof(float64)"); + s % path % mode % fsize % nsamples % framesize; + throw std::runtime_error(s.str()); + } + + size_t shape[2] = {nsamples, framesize}; + m_type_array.set_shape<size_t>(2, &shape[0]); + m_type_arrayset.set_shape<size_t>(1, &shape[1]); + m_newfile = false; + + } + } + + virtual ~T3File() { } + + virtual const char* filename() const { + return m_filename.c_str(); + } + + virtual const bob::io::base::array::typeinfo& type_all () const { + return m_type_array; + } + + virtual const bob::io::base::array::typeinfo& type () const { + return m_type_arrayset; + } + + virtual size_t size() const { + return m_length; + } + + virtual const char* name() const { + return s_codecname.c_str(); + } + + virtual void read_all(bob::io::base::array::interface& buffer) { + + if (m_newfile) { + boost::format f("cannot read uninitialized t3 binary file at '%s'"); + f % m_filename; + throw std::runtime_error(f.str()); + } + + if (!buffer.type().is_compatible(m_type_array)) buffer.set(m_type_array); + + //open the file, now for reading the contents... + std::ifstream ifile(m_filename.c_str(), std::ios::binary|std::ios::in); + + //skip the first 8 bytes, that contain the header that we already read + ifile.seekg(8, std::ios::beg); + ifile.read(static_cast<char*>(buffer.ptr()), buffer.type().buffer_size()); + + } + + virtual void read(bob::io::base::array::interface& buffer, size_t index) { + + if (m_newfile) { + boost::format f("cannot read uninitialized t3 binary file at '%s'"); + f % m_filename; + throw std::runtime_error(f.str()); + } + + const bob::io::base::array::typeinfo& type = buffer.type(); + + if (!buffer.type().is_compatible(m_type_arrayset)) buffer.set(m_type_arrayset); + + //open the file, now for reading the contents... + std::ifstream ifile(m_filename.c_str(), std::ios::binary|std::ios::in); + + //skip the first 8 bytes, that contain the header that we already read + ifile.seekg(8 + (index*type.buffer_size()), std::ios::beg); + ifile.read(static_cast<char*>(buffer.ptr()), type.buffer_size()); + + } + + virtual size_t append (const bob::io::base::array::interface& buffer) { + + const bob::io::base::array::typeinfo& info = buffer.type(); + + if (!m_newfile && !info.is_compatible(m_type_arrayset)) { + boost::format f("input buffer of type %s cannot be appended to already initialized torch3vision binary file of type %s"); + f % info.str() % m_type_arrayset.str(); + throw std::runtime_error(f.str()); + } + + std::ofstream ofile; + if (m_newfile) { + + //can only save uni-dimensional data, so throw if that is not the case + if (info.nd != 1) { + boost::format m("codec for torch3vision binary files can only save uni-dimensional data, but you passed: %s"); + m % info.str(); + throw std::runtime_error(m.str()); + } + + //can only save float32 or float64, otherwise, throw. + if ((info.dtype != bob::io::base::array::t_float32) && + (info.dtype != bob::io::base::array::t_float64)) { + boost::format f("cannot have T3 bindata files with type %s - only float32 or float64"); + f % bob::io::base::array::stringize(info.dtype); + throw std::runtime_error(f.str()); + } + + ofile.open(m_filename.c_str(), std::ios::binary|std::ios::out|std::ios::trunc); + + //header writing... + const uint32_t nsamples = 0; + const uint32_t framesize = info.shape[0]; + ofile.write((const char*)&nsamples, sizeof(uint32_t)); + ofile.write((const char*)&framesize, sizeof(uint32_t)); + + m_type_arrayset = info; + m_type_array.dtype = info.dtype; + m_newfile = false; ///< block re-initialization + m_length = 0; + + } + else { + //only open the file, the rest is setup already + ofile.open(m_filename.c_str(), std::ios::binary|std::ios::out|std::ios::app); + } + + if (!ofile) { + boost::format f("cannot open output file '%s' for writing"); + f % m_filename; + throw std::runtime_error(f.str()); + } + + ofile.write(static_cast<const char*>(buffer.ptr()), info.buffer_size()); + ofile.close(); + + //setup new type information + ++m_length; + size_t shape[2] = {m_length, info.shape[0]}; + m_type_array.set_shape<size_t>(2, &shape[0]); + + //update the header information on the file + ofile.open(m_filename.c_str(), std::ios::binary|std::ios::in|std::ios::out); + const uint32_t nsamples = m_length; + ofile.write((const char*)&nsamples, sizeof(uint32_t)); + ofile.flush(); + return m_length-1; + + } + + /** + * Supports writing a single vector or a set of vectors represented as a + * matrix. In this last case, vectors are formed from the rows of the given + * matrix. + */ + virtual void write (const bob::io::base::array::interface& buffer) { + + m_newfile = true; //force file re-setting + const bob::io::base::array::typeinfo& info = buffer.type(); + + if (info.nd == 1) {//just do a normal append + append(buffer); + } + + else if (info.nd == 2) { //append every array individually + + const uint8_t* ptr = static_cast<const uint8_t*>(buffer.ptr()); + bob::io::base::array::typeinfo slice_info(info.dtype, static_cast<size_t>(1), + &info.shape[1]); + for (size_t k=0; k<info.shape[0]; ++k) { + const void* slice_ptr=static_cast<const void*>(ptr+k*slice_info.buffer_size()); + bob::io::base::array::blitz_array slice(const_cast<void*>(slice_ptr), slice_info); + append(slice); + } + + } + + else { + boost::format f("cannot do single write of torch3vision .bindata file with array with type '%s' - only supports 1D or 2D arrays of types float32 or float64"); + f % info.str(); + throw std::runtime_error(f.str()); + } + + } + + private: //representation + + std::string m_filename; + bool m_newfile; + bob::io::base::array::typeinfo m_type_array; + bob::io::base::array::typeinfo m_type_arrayset; + size_t m_length; + + static std::string s_codecname; + +}; + +std::string T3File::s_codecname = "torch3.binary"; + +/** + * From this point onwards we have the registration procedure. If you are + * looking at this file for a coding example, just follow the procedure bellow, + * minus local modifications you may need to apply. + */ + +/** + * This defines the factory method F that can create codecs of this type. + * + * Here are the meanings of the mode flag that should be respected by your + * factory implementation: + * + * 'r': opens for reading only - no modifications can occur; it is an + * error to open a file that does not exist for read-only operations. + * 'w': opens for reading and writing, but truncates the file if it + * exists; it is not an error to open files that do not exist with + * this flag. + * 'a': opens for reading and writing - any type of modification can + * occur. If the file does not exist, this flag is effectively like + * 'w'. + * + * Returns a newly allocated File object that can read and write data to the + * file using a specific backend. + * + * @note: This method can be static. + */ +static boost::shared_ptr<bob::io::base::File> make_file (const char* path, char mode) { + return boost::make_shared<T3File>(path, mode); +} + +/** + * Takes care of codec registration per se. + */ +static bool register_codec() { + + boost::shared_ptr<bob::io::base::CodecRegistry> instance = + bob::io::base::CodecRegistry::instance(); + + instance->registerExtension(".bindata", "torch3 binary data format", &make_file); + + return true; + +} + +static bool codec_registered = register_codec(); diff --git a/bob/io/base/cpp/TensorArrayFile.cpp b/bob/io/base/cpp/TensorArrayFile.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d0cb72d82c948519126ac0e4dcfe237197d017b4 --- /dev/null +++ b/bob/io/base/cpp/TensorArrayFile.cpp @@ -0,0 +1,144 @@ +/** + * @date Wed Oct 26 17:11:16 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Implements the TensorArrayCodec type + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include "TensorFile.h" +#include <bob.io.base/CodecRegistry.h> + +class TensorArrayFile: public bob::io::base::File { + + public: //api + + TensorArrayFile(const char* path, bob::io::base::TensorFile::openmode mode): + m_file(path, mode), + m_filename(path) { + if (m_file.size()) m_file.peek(m_type); + } + + virtual ~TensorArrayFile() { } + + virtual const char* filename() const { + return m_filename.c_str(); + } + + virtual const bob::io::base::array::typeinfo& type_all () const { + return m_type; + } + + virtual const bob::io::base::array::typeinfo& type () const { + return m_type; + } + + virtual size_t size() const { + return m_file.size(); + } + + virtual const char* name() const { + return s_codecname.c_str(); + } + + virtual void read_all(bob::io::base::array::interface& buffer) { + + if(!m_file) + throw std::runtime_error("uninitialized binary file cannot be read"); + + m_file.read(0, buffer); + + } + + virtual void read(bob::io::base::array::interface& buffer, size_t index) { + + if(!m_file) + throw std::runtime_error("uninitialized binary file cannot be read"); + + m_file.read(index, buffer); + + } + + virtual size_t append (const bob::io::base::array::interface& buffer) { + + m_file.write(buffer); + + if (size() == 1) m_file.peek(m_type); + + return size() - 1; + + } + + virtual void write (const bob::io::base::array::interface& buffer) { + + //we don't have a special way to treat write()'s like in HDF5. + append(buffer); + + } + + private: //representation + + bob::io::base::TensorFile m_file; + bob::io::base::array::typeinfo m_type; + std::string m_filename; + + static std::string s_codecname; + +}; + +std::string TensorArrayFile::s_codecname = "bob.tensor"; + +/** + * From this point onwards we have the registration procedure. If you are + * looking at this file for a coding example, just follow the procedure bellow, + * minus local modifications you may need to apply. + */ + +/** + * This defines the factory method F that can create codecs of this type. + * + * Here are the meanings of the mode flag that should be respected by your + * factory implementation: + * + * 'r': opens for reading only - no modifications can occur; it is an + * error to open a file that does not exist for read-only operations. + * 'w': opens for reading and writing, but truncates the file if it + * exists; it is not an error to open files that do not exist with + * this flag. + * 'a': opens for reading and writing - any type of modification can + * occur. If the file does not exist, this flag is effectively like + * 'w'. + * + * Returns a newly allocated File object that can read and write data to the + * file using a specific backend. + * + * @note: This method can be static. + */ +static boost::shared_ptr<bob::io::base::File> make_file (const char* path, char mode) { + + bob::io::base::TensorFile::openmode _mode; + if (mode == 'r') _mode = bob::io::base::TensorFile::in; + else if (mode == 'w') _mode = bob::io::base::TensorFile::out; + else if (mode == 'a') _mode = bob::io::base::TensorFile::append; + else throw std::runtime_error("unsupported tensor file opening mode"); + + return boost::make_shared<TensorArrayFile>(path, _mode); + +} + +/** + * Takes care of codec registration per se. + */ +static bool register_codec() { + + boost::shared_ptr<bob::io::base::CodecRegistry> instance = + bob::io::base::CodecRegistry::instance(); + + instance->registerExtension(".tensor", "torch3vision v2.1 tensor files", &make_file); + + return true; + +} + +static bool codec_registered = register_codec(); diff --git a/bob/io/base/cpp/TensorFile.cpp b/bob/io/base/cpp/TensorFile.cpp new file mode 100644 index 0000000000000000000000000000000000000000..66b72160ed861cf97126c4ff4bf4b26fc1f18046 --- /dev/null +++ b/bob/io/base/cpp/TensorFile.cpp @@ -0,0 +1,153 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief This class can be used to store and load multiarrays into/from files. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include "TensorFile.h" + +#include <bob.io.base/reorder.h> +#include <bob.io.base/array_type.h> + +bob::io::base::TensorFile::TensorFile(const std::string& filename, + bob::io::base::TensorFile::openmode flag): + m_header_init(false), + m_current_array(0), + m_n_arrays_written(0), + m_openmode(flag) +{ + if((flag & bob::io::base::TensorFile::out) && (flag & bob::io::base::TensorFile::in)) { + m_stream.open(filename.c_str(), std::ios::in | std::ios::out | + std::ios::binary); + if(m_stream) + { + m_header.read(m_stream); + m_buffer.reset(new char[m_header.m_type.buffer_size()]); + m_header_init = true; + m_n_arrays_written = m_header.m_n_samples; + + if (flag & bob::io::base::TensorFile::append) { + m_stream.seekp(0, std::ios::end); + m_current_array = m_header.m_n_samples; + } + } + } + else if(flag & bob::io::base::TensorFile::out) { + if(m_stream && (flag & bob::io::base::TensorFile::append)) { + m_stream.open(filename.c_str(), std::ios::out | std::ios::in | + std::ios::binary); + m_header.read(m_stream); + m_buffer.reset(new char[m_header.m_type.buffer_size()]); + m_header_init = true; + m_n_arrays_written = m_header.m_n_samples; + m_stream.seekp(0, std::ios::end); + m_current_array = m_header.m_n_samples; + } + else + m_stream.open(filename.c_str(), std::ios::out | std::ios::binary); + } + else if(flag & bob::io::base::TensorFile::in) { + m_stream.open(filename.c_str(), std::ios::in | std::ios::binary); + if(m_stream) { + m_header.read(m_stream); + m_buffer.reset(new char[m_header.m_type.buffer_size()]); + m_header_init = true; + m_n_arrays_written = m_header.m_n_samples; + + if (flag & bob::io::base::TensorFile::append) { + throw std::runtime_error("cannot append data in read only mode"); + } + } + } + else { + throw std::runtime_error("invalid combination of flags"); + } +} + +bob::io::base::TensorFile::~TensorFile() { + close(); +} + +void bob::io::base::TensorFile::peek(bob::io::base::array::typeinfo& info) const { + info = m_header.m_type; +} + +void bob::io::base::TensorFile::close() { + // Rewrite the header and update the number of samples + m_header.m_n_samples = m_n_arrays_written; + if(m_openmode & bob::io::base::TensorFile::out) m_header.write(m_stream); + + m_stream.close(); +} + +void bob::io::base::TensorFile::initHeader(const bob::io::base::array::typeinfo& info) { + // Check that data have not already been written + if (m_n_arrays_written > 0 ) { + throw std::runtime_error("cannot init the header of an output stream in which data have already been written"); + } + + // Initialize header + m_header.m_type = info; + m_header.m_tensor_type = bob::io::base::arrayTypeToTensorType(info.dtype); + m_header.write(m_stream); + + // Temporary buffer to help with data transposition... + m_buffer.reset(new char[m_header.m_type.buffer_size()]); + + m_header_init = true; +} + +void bob::io::base::TensorFile::write(const bob::io::base::array::interface& data) { + + const bob::io::base::array::typeinfo& info = data.type(); + + if (!m_header_init) initHeader(info); + else { + //checks compatibility with previously written stuff + if (!m_header.m_type.is_compatible(info)) + throw std::runtime_error("buffer does not conform to expected type"); + } + + bob::io::base::row_to_col_order(data.ptr(), m_buffer.get(), info); + + m_stream.write(static_cast<const char*>(m_buffer.get()), info.buffer_size()); + + // increment m_n_arrays_written and m_current_array + ++m_current_array; + if (m_current_array>m_n_arrays_written) ++m_n_arrays_written; +} + +void bob::io::base::TensorFile::read (bob::io::base::array::interface& buf) { + + if(!m_header_init) { + throw std::runtime_error("TensorFile: header is not initialized"); + } + if(!buf.type().is_compatible(m_header.m_type)) buf.set(m_header.m_type); + + m_stream.read(reinterpret_cast<char*>(m_buffer.get()), + m_header.m_type.buffer_size()); + + bob::io::base::col_to_row_order(m_buffer.get(), buf.ptr(), m_header.m_type); + + ++m_current_array; +} + +void bob::io::base::TensorFile::read (size_t index, bob::io::base::array::interface& buf) { + + // Check that we are reaching an existing array + if( index > m_header.m_n_samples ) { + boost::format m("request to read list item at position %d which is outside the bounds of declared object with size %d"); + m % index % m_header.m_n_samples; + throw std::runtime_error(m.str()); + } + + // Set the stream pointer at the correct position + m_stream.seekg( m_header.getArrayIndex(index) ); + m_current_array = index; + + // Put the content of the stream in the blitz array. + read(buf); +} diff --git a/bob/io/base/cpp/TensorFile.h b/bob/io/base/cpp/TensorFile.h new file mode 100644 index 0000000000000000000000000000000000000000..35d42bfcea4d9a25084b82c28f008acb0f9f0609 --- /dev/null +++ b/bob/io/base/cpp/TensorFile.h @@ -0,0 +1,246 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief This class can be used to load and store arrays from/to .tensor files + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_TENSORFILE_H +#define BOB_IO_TENSORFILE_H + +#include <boost/format.hpp> +#include <stdexcept> + +#include <bob.io.base/blitz_array.h> + +#include "TensorFileHeader.h" + +namespace bob { namespace io { namespace base { + + /** + * Defines the flags that might be used when loading/storing a file + * containing blitz arrays. + */ + enum _TensorFileFlag { + _unset = 0, + _append = 1L << 0, + _in = 1L << 3, + _out = 1L << 4 + }; + + /** + * This class can be used for loading and storing multiarrays from/to + * tensor files + */ + class TensorFile + { + public: + /** + * Defines the bitmask type for providing information about the type of + * the stream. + */ + typedef _TensorFileFlag openmode; + static const openmode append = _append; + static const openmode in = _in; + static const openmode out = _out; + + /** + * Constructor + */ + TensorFile(const std::string& filename, openmode f); + + /** + * Destructor + */ + ~TensorFile(); + + /** + * Tests if next operation will succeed. + */ + inline bool operator!() const { return !m_stream; } + + /** + * Closes the TensorFile + */ + void close(); + + /** + * Puts an Array of a given type into the output stream/file. If the + * type/shape have not yet been set, it is set according to the type + * and shape given in the blitz array, otherwise the type/shape should + * match or an exception is thrown. + * + * Please note that blitz::Array<> will be implicitly constructed as + * required and respecting those norms. + * + * @warning: Please convert your files to HDF5, this format is + * deprecated starting on 16.04.2011 - AA + */ + void write(const bob::io::base::array::interface& data); + + /** + * Reads the file data into a bob::io::base::array::interface - this variant reads the next + * variable. The bob::io::base::array::interface size will be reset if required. + */ + void read(bob::io::base::array::interface& data); + + /** + * Reads the file data into a bob::io::base::array::interface - this variant allows the + * specification of a position to read data from. The bob::io::base::array::interface size will be + * reset if required. + */ + void read (size_t index, bob::io::base::array::interface& data); + + /** + * Peeks the file and returns the currently set typeinfo + */ + void peek(bob::io::base::array::typeinfo& info) const; + + /** + * Gets the number of samples/arrays written so far + * + * @warning An exception is thrown if nothing was written so far + */ + inline size_t size() const { + return (m_header_init)? m_n_arrays_written : 0; + } + + /** + * Gets the number of elements per array + * + * @warning An exception is thrown if nothing was written so far + */ + inline size_t getNElements() const { + headerInitialized(); + return m_header.getNElements(); + } + + /** + * Gets the size along a particular dimension + * + * @warning An exception is thrown if nothing was written so far + */ + inline size_t getSize(size_t dim_index) const { + headerInitialized(); + return m_header.m_type.shape[dim_index]; + } + + /** + * Initializes the tensor file with the given type and shape. + */ + inline void initTensorFile(const bob::io::base::array::typeinfo& info) { + initHeader(info); + } + + private: //Some stuff I need privately + + /** + * Checks if the end of the tensor file is reached + */ + inline void endOfFile() { + if(m_current_array >= m_header.m_n_samples ) { + boost::format m("TensorFile::endOfFile(): current array index == %d is outside the bounds of declared object with size %d"); + m % m_current_array % m_header.m_n_samples; + throw std::runtime_error(m.str()); + } + } + + /** + * Checks that the header has been initialized, and raise an + * exception if not + */ + inline void headerInitialized() const { + if (!m_header_init) { + throw std::runtime_error("TensorFile: header is not initialized"); + } + } + + /** + * Initializes the header of the (output) stream with the given type + * and shape + */ + void initHeader(const bob::io::base::array::typeinfo& info); + + public: + + /******************************************************************** + * Specific blitz::Array<> operations + ********************************************************************/ + + + /** + * A shortcut to write a blitz::Array<T,D> + * + * @warning: Please convert your files to HDF5, this format is + * deprecated starting on 16.04.2011 - AA + */ + template <typename T, int D> + inline void write(blitz::Array<T,D>& bz) { + write(bob::io::base::array::blitz_array(bz)); + } + + /** + * Load one blitz++ multiarray from the input stream/file All the + * multiarrays saved have the same dimensions. + */ + template <typename T, int D> inline blitz::Array<T,D> read() { + bob::io::base::array::typeinfo info; + peek(info); + bob::io::base::array::blitz_array buf(info); + read(buf); + return bob::io::base::array::cast<T,D>(buf); + } + + template <typename T, int D> inline blitz::Array<T,D> read(size_t + index) { + bob::io::base::array::typeinfo info; + peek(info); + bob::io::base::array::blitz_array buf(info); + read(index, buf); + return bob::io::base::array::cast<T,D>(buf); + } + + private: //representation + + bool m_header_init; + size_t m_current_array; + size_t m_n_arrays_written; + std::fstream m_stream; + detail::TensorFileHeader m_header; + openmode m_openmode; + boost::shared_ptr<void> m_buffer; + }; + + inline _TensorFileFlag operator&(_TensorFileFlag a, _TensorFileFlag b) { + return _TensorFileFlag(static_cast<int>(a) & static_cast<int>(b)); + } + + inline _TensorFileFlag operator|(_TensorFileFlag a, _TensorFileFlag b) { + return _TensorFileFlag(static_cast<int>(a) | static_cast<int>(b)); + } + + inline _TensorFileFlag operator^(_TensorFileFlag a, _TensorFileFlag b) { + return _TensorFileFlag(static_cast<int>(a) ^ static_cast<int>(b)); + } + + inline _TensorFileFlag& operator|=(_TensorFileFlag& a, _TensorFileFlag b) { + return a = a | b; + } + + inline _TensorFileFlag& operator&=(_TensorFileFlag& a, _TensorFileFlag b) { + return a = a & b; + } + + inline _TensorFileFlag& operator^=(_TensorFileFlag& a, _TensorFileFlag b) { + return a = a ^ b; + } + + inline _TensorFileFlag operator~(_TensorFileFlag a) { + return _TensorFileFlag(~static_cast<int>(a)); + } + +}}} + +#endif /* BOB_IO_BINFILE_H */ diff --git a/bob/io/base/cpp/TensorFileHeader.cpp b/bob/io/base/cpp/TensorFileHeader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fb96a13b5d383eebed1c59467185272ded2a2547 --- /dev/null +++ b/bob/io/base/cpp/TensorFileHeader.cpp @@ -0,0 +1,173 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * This class defines an header for storing multiarrays into .tensor files. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/format.hpp> + +#include "TensorFileHeader.h" + +bob::io::base::detail::TensorFileHeader::TensorFileHeader() + : m_tensor_type(bob::io::base::Char), + m_type(), + m_n_samples(0), + m_tensor_size(0) +{ +} + +bob::io::base::detail::TensorFileHeader::~TensorFileHeader() { } + +size_t bob::io::base::detail::TensorFileHeader::getArrayIndex (size_t index) const { + size_t header_size = 7 * sizeof(int); + return header_size + index * m_tensor_size; +} + +void bob::io::base::detail::TensorFileHeader::read(std::istream& str) { + // Start reading at the beginning of the stream + str.seekg(std::ios_base::beg); + + int val; + str.read( reinterpret_cast<char*>(&val), sizeof(int)); + m_tensor_type = (bob::io::base::TensorType)val; + m_type.dtype = bob::io::base::tensorTypeToArrayType(m_tensor_type); + + str.read( reinterpret_cast<char*>(&val), sizeof(int)); + m_n_samples = (size_t)val; + + int nd; + str.read(reinterpret_cast<char*>(&nd), sizeof(int)); + + int shape[BOB_MAX_DIM]; + + str.read( reinterpret_cast<char*>(&val), sizeof(int)); + shape[0] = (size_t)val; + str.read( reinterpret_cast<char*>(&val), sizeof(int)); + shape[1] = (size_t)val; + str.read( reinterpret_cast<char*>(&val), sizeof(int)); + shape[2] = (size_t)val; + str.read( reinterpret_cast<char*>(&val), sizeof(int)); + shape[3] = (size_t)val; + + m_type.set_shape(nd, shape); + + header_ok(); +} + +void bob::io::base::detail::TensorFileHeader::write(std::ostream& str) const +{ + // Start writing at the beginning of the stream + str.seekp(std::ios_base::beg); + + int val; + val = (int)m_tensor_type; + str.write( reinterpret_cast<char*>(&val), sizeof(int)); + val = (int)m_n_samples; + str.write( reinterpret_cast<char*>(&val), sizeof(int)); + val = (int)m_type.nd; + str.write( reinterpret_cast<char*>(&val), sizeof(int)); + val = (int)m_type.shape[0]; + str.write( reinterpret_cast<char*>(&val), sizeof(int)); + val = (int)m_type.shape[1]; + str.write( reinterpret_cast<char*>(&val), sizeof(int)); + val = (int)m_type.shape[2]; + str.write( reinterpret_cast<char*>(&val), sizeof(int)); + val = (int)m_type.shape[3]; + str.write( reinterpret_cast<char*>(&val), sizeof(int)); +} + +void bob::io::base::detail::TensorFileHeader::header_ok() +{ + // Check the type + switch (m_tensor_type) + { + // supported tensor types + case bob::io::base::Char: + case bob::io::base::Short: + case bob::io::base::Int: + case bob::io::base::Long: + case bob::io::base::Float: + case bob::io::base::Double: + break; + // error + default: + throw std::runtime_error("unsupported data type found while scanning header of tensor file"); + } + + // Check the number of samples and dimensions + if( m_type.nd < 1 || m_type.nd > 4) { + boost::format m("header for tensor file indicates an unsupported type: %s"); + m % m_type.str(); + throw std::runtime_error(m.str()); + } + + // OK + update(); +} + +void bob::io::base::detail::TensorFileHeader::update() +{ + size_t base_size = 0; + switch (m_tensor_type) + { + case bob::io::base::Char: base_size = sizeof(char); break; + case bob::io::base::Short: base_size = sizeof(short); break; + case bob::io::base::Int: base_size = sizeof(int); break; + case bob::io::base::Long: base_size = sizeof(long); break; + case bob::io::base::Float: base_size = sizeof(float); break; + case bob::io::base::Double: base_size = sizeof(double); break; + default: + throw std::runtime_error("unsupported data type found while updating tensor file"); + } + + size_t tsize = 1; + for(size_t i = 0; i < m_type.nd; ++i) tsize *= m_type.shape[i]; + + m_tensor_size = tsize * base_size; +} + + +bob::io::base::TensorType bob::io::base::arrayTypeToTensorType(bob::io::base::array::ElementType eltype) +{ + switch(eltype) + { + case bob::io::base::array::t_int8: + return bob::io::base::Char; + case bob::io::base::array::t_int16: + return bob::io::base::Short; + case bob::io::base::array::t_int32: + return bob::io::base::Int; + case bob::io::base::array::t_int64: + return bob::io::base::Long; + case bob::io::base::array::t_float32: + return bob::io::base::Float; + case bob::io::base::array::t_float64: + return bob::io::base::Double; + default: + throw std::runtime_error("unsupported data type found while converting array type to tensor type"); + } +} + +bob::io::base::array::ElementType bob::io::base::tensorTypeToArrayType(bob::io::base::TensorType tensortype) +{ + switch(tensortype) + { + case bob::io::base::Char: + return bob::io::base::array::t_int8; + case bob::io::base::Short: + return bob::io::base::array::t_int16; + case bob::io::base::Int: + return bob::io::base::array::t_int32; + case bob::io::base::Long: + return bob::io::base::array::t_int64; + case bob::io::base::Float: + return bob::io::base::array::t_float32; + case bob::io::base::Double: + return bob::io::base::array::t_float64; + default: + throw std::runtime_error("unsupported data type found while converting tensor type to array type"); + } +} diff --git a/bob/io/base/cpp/TensorFileHeader.h b/bob/io/base/cpp/TensorFileHeader.h new file mode 100644 index 0000000000000000000000000000000000000000..7e852b829997eb2f07914123e88646ccbd1f2fcb --- /dev/null +++ b/bob/io/base/cpp/TensorFileHeader.h @@ -0,0 +1,95 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief This class defines an header for storing multiarrays into + * .tensor files. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_TENSORFILEHEADER_H +#define BOB_IO_BASE_TENSORFILEHEADER_H + +#include <fstream> +#include <blitz/array.h> + +#include <bob.io.base/array.h> + +namespace bob { namespace io { namespace base { + + // TensorType + enum TensorType + { + Char, + Short, + Int, + Long, + Float, + Double + }; + + TensorType arrayTypeToTensorType(bob::io::base::array::ElementType eltype); + bob::io::base::array::ElementType tensorTypeToArrayType(bob::io::base::TensorType tensortype); + + namespace detail { + /** + * The Header for storing arrays into binary files. Please note that this + * class is for private use of the TensorFile type. + */ + struct TensorFileHeader { + + /** + * Constructor + */ + TensorFileHeader(); + + /** + * Destructor + */ + virtual ~TensorFileHeader(); + + /** + * Gets the offset of some array in the file + */ + size_t getArrayIndex(size_t index) const; + + /** + * Writes the header into an output stream + */ + void write(std::ostream& str) const; + + /** + * Reads the header from an input stream + */ + void read(std::istream& str); + + /** + * Gets number of elements in binary file + */ + inline size_t getNElements() const { + size_t tmp = 1; + for(size_t i=0; i<m_type.nd; ++i) tmp *= m_type.shape[i]; + return tmp; + } + + /** + * Checks if the header is valid + */ + void header_ok(); + + /** + * Update the TensorSize value + */ + void update(); + + //representation + TensorType m_tensor_type; ///< array element type + bob::io::base::array::typeinfo m_type; ///< the type information + size_t m_n_samples; ///< total number of arrays in the file + size_t m_tensor_size; ///< the number of dimensions in each array + }; + +}}}} + +#endif /* BOB_IO_BASE_TENSORFILEHEADER_H */ diff --git a/bob/io/base/cpp/array.cpp b/bob/io/base/cpp/array.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2ffc94d77a13adec893f99fa8b052a55c45554ff --- /dev/null +++ b/bob/io/base/cpp/array.cpp @@ -0,0 +1,134 @@ +/** + * @date Tue Nov 8 15:34:31 2011 +0100 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Some buffer stuff + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/format.hpp> +#include <bob.io.base/array.h> + +bob::io::base::array::typeinfo::typeinfo(): + dtype(bob::io::base::array::t_unknown), + nd(0) +{ +} + +bob::io::base::array::typeinfo::typeinfo(const bob::io::base::array::typeinfo& other): + dtype(other.dtype) +{ + set_shape(other.nd, other.shape); +} + +bob::io::base::array::typeinfo& bob::io::base::array::typeinfo::operator= (const bob::io::base::array::typeinfo& other) { + dtype = other.dtype; + set_shape(other.nd, other.shape); + return *this; +} + +void bob::io::base::array::typeinfo::reset() { + dtype = bob::io::base::array::t_unknown; + nd = 0; +} + +bool bob::io::base::array::typeinfo::is_valid() const { + return (dtype != bob::io::base::array::t_unknown) && (nd > 0) && (nd <= (BOB_MAX_DIM+1)) && has_valid_shape(); +} + +void bob::io::base::array::typeinfo::update_strides() { + switch (nd) { + case 0: + return; + case 1: + stride[0] = 1; + return; + case 2: + stride[1] = 1; + stride[0] = shape[1]; + return; + case 3: + stride[2] = 1; + stride[1] = shape[2]; + stride[0] = shape[1]*shape[2]; + return; + case 4: + stride[3] = 1; + stride[2] = shape[3]; + stride[1] = shape[2]*shape[3]; + stride[0] = shape[1]*shape[2]*shape[3]; + return; + case 5: + stride[4] = 1; + stride[3] = shape[4]; + stride[2] = shape[3]*shape[4]; + stride[1] = shape[2]*shape[3]*shape[4]; + stride[0] = shape[1]*shape[2]*shape[3]*shape[4]; + return; + default: + break; + } + throw std::runtime_error("unsupported number of dimensions"); +} + +size_t bob::io::base::array::typeinfo::size() const { + size_t retval = 1; + for (size_t k=0; k<nd; ++k) retval *= shape[k]; + return retval; +} + +size_t bob::io::base::array::typeinfo::buffer_size() const { + return size()*bob::io::base::array::getElementSize(dtype); +} + +static bool same_shape(size_t nd, const size_t* s1, const size_t* s2) { + for (size_t k=0; k<nd; ++k) if (s1[k] != s2[k]) return false; + return true; +} + +bool bob::io::base::array::typeinfo::is_compatible(const bob::io::base::array::typeinfo& other) const { + return (dtype == other.dtype) && (nd == other.nd) && same_shape(nd, shape, other.shape); +} + +std::string bob::io::base::array::typeinfo::str() const { + boost::format s("dtype: %s (%d); shape: [%s]; size: %d bytes"); + size_t sz = 0; + size_t buf_sz = 0; + if (dtype != bob::io::base::array::t_unknown) { + //otherwise it throws + sz = item_size(); + buf_sz = buffer_size(); + } + s % item_str() % sz; + switch (nd) { + case 0: + s % ""; + break; + case 1: + s % (boost::format("%d") % shape[0]).str(); + break; + case 2: + s % (boost::format("%d,%d") % shape[0] % shape[1]).str(); + break; + case 3: + s % (boost::format("%d,%d,%d") % shape[0] % shape[1] % shape[2]).str(); + break; + case 4: + s % (boost::format("%d,%d,%d,%d") % shape[0] % shape[1] % shape[2] % shape[3]).str(); + break; + default: + s % ">4 dimensions?"; + break; + } + s % buf_sz; + return s.str(); +} + +void bob::io::base::array::typeinfo::reset_shape() { + shape[0] = 0; +} + +bool bob::io::base::array::typeinfo::has_valid_shape() const { + return shape[0] != 0; +} diff --git a/bob/io/base/cpp/array_type.cpp b/bob/io/base/cpp/array_type.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d42368e9175199572724f337ba5623d6a8a76a71 --- /dev/null +++ b/bob/io/base/cpp/array_type.cpp @@ -0,0 +1,126 @@ +/** + * @date Sat Apr 9 18:10:10 2011 +0200 + * @author Laurent El Shafey <Laurent.El-Shafey@idiap.ch> + * + * @brief Some type-related array utilities + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <bob.io.base/array_type.h> +#include <boost/format.hpp> + +static const char* t_bool_string = "bool"; +static const char* t_int8_string = "int8"; +static const char* t_int16_string = "int16"; +static const char* t_int32_string = "int32"; +static const char* t_int64_string = "int64"; +static const char* t_uint8_string = "uint8"; +static const char* t_uint16_string = "uint16"; +static const char* t_uint32_string = "uint32"; +static const char* t_uint64_string = "uint64"; +static const char* t_float32_string = "float32"; +static const char* t_float64_string = "float64"; +static const char* t_float128_string = "float128"; +static const char* t_complex64_string = "complex64"; +static const char* t_complex128_string = "complex128"; +static const char* t_complex256_string = "complex256"; +static const char* t_unknown_string = "unknown"; + +size_t bob::io::base::array::getElementSize(ElementType t) { + switch(t) { + case bob::io::base::array::t_bool: + return sizeof(bool); + case bob::io::base::array::t_int8: + return sizeof(int8_t); + case bob::io::base::array::t_int16: + return sizeof(int16_t); + case bob::io::base::array::t_int32: + return sizeof(int32_t); + case bob::io::base::array::t_int64: + return sizeof(int64_t); + case bob::io::base::array::t_uint8: + return sizeof(uint8_t); + case bob::io::base::array::t_uint16: + return sizeof(uint16_t); + case bob::io::base::array::t_uint32: + return sizeof(uint32_t); + case bob::io::base::array::t_uint64: + return sizeof(uint64_t); + case bob::io::base::array::t_float32: + return sizeof(float); + case bob::io::base::array::t_float64: + return sizeof(double); + case bob::io::base::array::t_float128: + return sizeof(long double); + case bob::io::base::array::t_complex64: + return sizeof(std::complex<float>); + case bob::io::base::array::t_complex128: + return sizeof(std::complex<double>); + case bob::io::base::array::t_complex256: + return sizeof(std::complex<long double>); + default: + { + boost::format m("unsupported element type (%d)"); + m % (int)t; + throw std::runtime_error(m.str()); + } + } +} + +const char* bob::io::base::array::stringize(ElementType t) { + switch(t) { + case bob::io::base::array::t_bool: + return t_bool_string; + case bob::io::base::array::t_int8: + return t_int8_string; + case bob::io::base::array::t_int16: + return t_int16_string; + case bob::io::base::array::t_int32: + return t_int32_string; + case bob::io::base::array::t_int64: + return t_int64_string; + case bob::io::base::array::t_uint8: + return t_uint8_string; + case bob::io::base::array::t_uint16: + return t_uint16_string; + case bob::io::base::array::t_uint32: + return t_uint32_string; + case bob::io::base::array::t_uint64: + return t_uint64_string; + case bob::io::base::array::t_float32: + return t_float32_string; + case bob::io::base::array::t_float64: + return t_float64_string; + case bob::io::base::array::t_float128: + return t_float128_string; + case bob::io::base::array::t_complex64: + return t_complex64_string; + case bob::io::base::array::t_complex128: + return t_complex128_string; + case bob::io::base::array::t_complex256: + return t_complex256_string; + default: + return t_unknown_string; + } +} + +bob::io::base::array::ElementType bob::io::base::array::unstringize(const char* s) { + std::string sc(s); + if (sc == t_bool_string) return bob::io::base::array::t_bool; + if (sc == t_int8_string) return bob::io::base::array::t_int8; + if (sc == t_int16_string) return bob::io::base::array::t_int16; + if (sc == t_int32_string) return bob::io::base::array::t_int32; + if (sc == t_int64_string) return bob::io::base::array::t_int64; + if (sc == t_uint8_string) return bob::io::base::array::t_uint8; + if (sc == t_uint16_string) return bob::io::base::array::t_uint16; + if (sc == t_uint32_string) return bob::io::base::array::t_uint32; + if (sc == t_uint64_string) return bob::io::base::array::t_uint64; + if (sc == t_float32_string) return bob::io::base::array::t_float32; + if (sc == t_float64_string) return bob::io::base::array::t_float64; + if (sc == t_float128_string) return bob::io::base::array::t_float128; + if (sc == t_complex64_string) return bob::io::base::array::t_complex64; + if (sc == t_complex128_string) return bob::io::base::array::t_complex128; + if (sc == t_complex256_string) return bob::io::base::array::t_complex256; + return bob::io::base::array::t_unknown; +} diff --git a/bob/io/base/cpp/blitz_array.cpp b/bob/io/base/cpp/blitz_array.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2bc723a71d4b2770207fb24732641e1e67a1948f --- /dev/null +++ b/bob/io/base/cpp/blitz_array.cpp @@ -0,0 +1,166 @@ +/** + * @date Tue Nov 8 15:34:31 2011 +0100 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Implementation of non-templated methods of the blitz + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <stdexcept> + +#include <bob.io.base/blitz_array.h> + +bob::io::base::array::blitz_array::blitz_array(boost::shared_ptr<blitz_array> other) { + set(other); +} + +bob::io::base::array::blitz_array::blitz_array(const blitz_array& other) { + set(other); +} + +bob::io::base::array::blitz_array::blitz_array(boost::shared_ptr<interface> other) { + set(other); +} + +bob::io::base::array::blitz_array::blitz_array(const interface& other) { + set(other); +} + +bob::io::base::array::blitz_array::blitz_array(const typeinfo& info) { + set(info); +} + +bob::io::base::array::blitz_array::blitz_array(void* data, const typeinfo& info): + m_type(info), + m_ptr(data), + m_is_blitz(false) { +} + +bob::io::base::array::blitz_array::~blitz_array() { +} + +void bob::io::base::array::blitz_array::set(boost::shared_ptr<blitz_array> other) { + m_type = other->m_type; + m_ptr = other->m_ptr; + m_is_blitz = other->m_is_blitz; + m_data = other->m_data; +} + +void bob::io::base::array::blitz_array::set(const interface& other) { + set(other.type()); + memcpy(m_ptr, other.ptr(), m_type.buffer_size()); +} + +void bob::io::base::array::blitz_array::set(boost::shared_ptr<interface> other) { + m_type = other->type(); + m_ptr = other->ptr(); + m_is_blitz = false; + m_data = other; +} + +template <typename T> +static boost::shared_ptr<void> make_array(size_t nd, const size_t* shape, + void*& ptr) { + switch(nd) { + case 1: + { + blitz::TinyVector<int,1> tv_shape; + for (size_t k=0; k<nd; ++k) tv_shape[k] = shape[k]; + boost::shared_ptr<void> retval = + boost::make_shared<blitz::Array<T,1> >(tv_shape); + ptr = reinterpret_cast<void*>(boost::static_pointer_cast<blitz::Array<T,1> >(retval)->data()); + return retval; + } + case 2: + { + blitz::TinyVector<int,2> tv_shape; + for (size_t k=0; k<nd; ++k) tv_shape[k] = shape[k]; + boost::shared_ptr<void> retval = + boost::make_shared<blitz::Array<T,2> >(tv_shape); + ptr = reinterpret_cast<void*>(boost::static_pointer_cast<blitz::Array<T,2> >(retval)->data()); + return retval; + } + case 3: + { + blitz::TinyVector<int,3> tv_shape; + for (size_t k=0; k<nd; ++k) tv_shape[k] = shape[k]; + boost::shared_ptr<void> retval = + boost::make_shared<blitz::Array<T,3> >(tv_shape); + ptr = reinterpret_cast<void*>(boost::static_pointer_cast<blitz::Array<T,3> >(retval)->data()); + return retval; + } + case 4: + { + blitz::TinyVector<int,4> tv_shape; + for (size_t k=0; k<nd; ++k) tv_shape[k] = shape[k]; + boost::shared_ptr<void> retval = + boost::make_shared<blitz::Array<T,4> >(tv_shape); + ptr = reinterpret_cast<void*>(boost::static_pointer_cast<blitz::Array<T,4> >(retval)->data()); + return retval; + } + default: + break; + } + throw std::runtime_error("unsupported number of dimensions -- debug me"); +} + +void bob::io::base::array::blitz_array::set (const bob::io::base::array::typeinfo& req) { + if (m_type.is_compatible(req)) return; ///< double-check requirement first! + + //ok, have to go through reallocation + m_type = req; + m_is_blitz = true; + switch (m_type.dtype) { + case bob::io::base::array::t_bool: + m_data = make_array<bool>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_int8: + m_data = make_array<int8_t>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_int16: + m_data = make_array<int16_t>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_int32: + m_data = make_array<int32_t>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_int64: + m_data = make_array<int64_t>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_uint8: + m_data = make_array<uint8_t>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_uint16: + m_data = make_array<uint16_t>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_uint32: + m_data = make_array<uint32_t>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_uint64: + m_data = make_array<uint64_t>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_float32: + m_data = make_array<float>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_float64: + m_data = make_array<double>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_float128: + m_data = make_array<long double>(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_complex64: + m_data = make_array<std::complex<float> >(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_complex128: + m_data = make_array<std::complex<double> >(req.nd, req.shape, m_ptr); + return; + case bob::io::base::array::t_complex256: + m_data = make_array<std::complex<long double> >(req.nd, req.shape, m_ptr); + return; + default: + break; + } + + //if we get to this point, there is nothing much we can do... + throw std::runtime_error("invalid data type on blitz array reset -- debug me"); +} diff --git a/bob/io/base/cpp/reorder.cpp b/bob/io/base/cpp/reorder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cda5ab335a47cb9a05dcf2e14357b91f6311cdfd --- /dev/null +++ b/bob/io/base/cpp/reorder.cpp @@ -0,0 +1,288 @@ +/** + * @date Tue Nov 22 11:24:44 2011 +0100 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Implementation of row-major/column-major reordering + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <boost/format.hpp> +#include <cstring> //for memcpy + +#include <bob.io.base/reorder.h> + +void bob::io::base::rc2d(size_t& row, size_t& col, const size_t i, const size_t j, + const size_t* shape) { + row = (i * shape[1]) + j; + col = (j * shape[0]) + i; +} + +void bob::io::base::rc3d(size_t& row, size_t& col, const size_t i, const size_t j, + const size_t k, const size_t* shape) { + row = ( (i * shape[1]) + j ) * shape[2] + k; + col = ( (k * shape[1]) + j ) * shape[0] + i; +} + +void bob::io::base::rc4d(size_t& row, size_t& col, const size_t i, const size_t j, + const size_t k, const size_t l, const size_t* shape) { + row = ( ( i * shape[1] + j ) * shape[2] + k ) * shape[3] + l; + col = ( ( l * shape[2] + k ) * shape[1] + j ) * shape[0] + i; +} + +void bob::io::base::row_to_col_order(const void* src_, void* dst_, + const bob::io::base::array::typeinfo& info) { + + size_t dsize = info.item_size(); + + //cast to byte type so we can manipulate the pointers... + const uint8_t* src = static_cast<const uint8_t*>(src_); + uint8_t* dst = static_cast<uint8_t*>(dst_); + + switch(info.nd) { + + case 1: + std::memcpy(dst, src, info.buffer_size()); + break; + + case 2: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) { + size_t row_major, col_major; + bob::io::base::rc2d(row_major, col_major, i, j, info.shape); + row_major *= dsize; + col_major *= dsize; + std::memcpy(&dst[col_major], &src[row_major], dsize); + } + break; + + case 3: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) + for (size_t k=0; k<info.shape[2]; ++k) { + size_t row_major, col_major; + bob::io::base::rc3d(row_major, col_major, i, j, k, info.shape); + row_major *= dsize; + col_major *= dsize; + std::memcpy(&dst[col_major], &src[row_major], dsize); + } + break; + + case 4: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) + for (size_t k=0; k<info.shape[2]; ++k) + for (size_t l=0; l<info.shape[3]; ++l) { + size_t row_major, col_major; + bob::io::base::rc4d(row_major, col_major, i, j, k, l, info.shape); + row_major *= dsize; + col_major *= dsize; + std::memcpy(&dst[col_major], &src[row_major], dsize); + } + break; + + default: + { + boost::format m("row_to_col_order() can only flip arrays with up to %u dimensions - you passed one with %u dimensions"); + m % BOB_MAX_DIM % info.nd; + throw std::runtime_error(m.str()); + } + } +} + +void bob::io::base::col_to_row_order(const void* src_, void* dst_, + const bob::io::base::array::typeinfo& info) { + + size_t dsize = info.item_size(); + + //cast to byte type so we can manipulate the pointers... + const uint8_t* src = static_cast<const uint8_t*>(src_); + uint8_t* dst = static_cast<uint8_t*>(dst_); + + switch(info.nd) { + + case 1: + std::memcpy(dst, src, info.buffer_size()); + break; + + case 2: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) { + size_t row_major, col_major; + bob::io::base::rc2d(row_major, col_major, i, j, info.shape); + row_major *= dsize; + col_major *= dsize; + std::memcpy(&dst[row_major], &src[col_major], dsize); + } + break; + + case 3: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) + for (size_t k=0; k<info.shape[2]; ++k) { + size_t row_major, col_major; + bob::io::base::rc3d(row_major, col_major, i, j, k, info.shape); + row_major *= dsize; + col_major *= dsize; + std::memcpy(&dst[row_major], &src[col_major], dsize); + } + break; + + case 4: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) + for (size_t k=0; k<info.shape[2]; ++k) + for (size_t l=0; l<info.shape[3]; ++l) { + size_t row_major, col_major; + bob::io::base::rc4d(row_major, col_major, i, j, k, l, info.shape); + row_major *= dsize; + col_major *= dsize; + std::memcpy(&dst[row_major], &src[col_major], dsize); + } + break; + + default: + { + boost::format m("col_to_row_order() can only flip arrays with up to %u dimensions - you passed one with %u dimensions"); + m % BOB_MAX_DIM % info.nd; + throw std::runtime_error(m.str()); + } + } +} + +void bob::io::base::row_to_col_order_complex(const void* src_, void* dst_re_, + void* dst_im_, const bob::io::base::array::typeinfo& info) { + + size_t dsize = info.item_size(); + size_t dsize2 = dsize/2; ///< size of each complex component (real, imaginary) + + //cast to byte type so we can manipulate the pointers... + const uint8_t* src = static_cast<const uint8_t*>(src_); + uint8_t* dst_re = static_cast<uint8_t*>(dst_re_); + uint8_t* dst_im = static_cast<uint8_t*>(dst_im_); + + switch(info.nd) { + + case 1: + for (size_t i=0; i<info.shape[0]; ++i) { + std::memcpy(&dst_re[dsize2*i], &src[dsize*i] , dsize2); + std::memcpy(&dst_im[dsize2*i], &src[dsize*i]+dsize2, dsize2); + } + break; + + case 2: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) { + size_t row_major, col_major; + bob::io::base::rc2d(row_major, col_major, i, j, info.shape); + row_major *= dsize; + col_major *= dsize2; + std::memcpy(&dst_re[col_major], &src[row_major] , dsize2); + std::memcpy(&dst_im[col_major], &src[row_major]+dsize2, dsize2); + } + break; + + case 3: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) + for (size_t k=0; k<info.shape[2]; ++k) { + size_t row_major, col_major; + bob::io::base::rc3d(row_major, col_major, i, j, k, info.shape); + row_major *= dsize; + col_major *= dsize2; + std::memcpy(&dst_re[col_major], &src[row_major] , dsize2); + std::memcpy(&dst_im[col_major], &src[row_major]+dsize2, dsize2); + } + break; + + case 4: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) + for (size_t k=0; k<info.shape[2]; ++k) + for (size_t l=0; l<info.shape[3]; ++l) { + size_t row_major, col_major; + bob::io::base::rc4d(row_major, col_major, i, j, k, l, info.shape); + row_major *= dsize; + col_major *= dsize2; + std::memcpy(&dst_re[col_major], &src[row_major] , dsize2); + std::memcpy(&dst_im[col_major], &src[row_major]+dsize2, dsize2); + } + break; + + default: + { + boost::format m("row_to_col_order_complex() can only flip arrays with up to %u dimensions - you passed one with %u dimensions"); + m % BOB_MAX_DIM % info.nd; + throw std::runtime_error(m.str()); + } + } +} + +void bob::io::base::col_to_row_order_complex(const void* src_re_, const void* src_im_, + void* dst_, const bob::io::base::array::typeinfo& info) { + + size_t dsize = info.item_size(); + size_t dsize2 = dsize/2; ///< size of each complex component (real, imaginary) + + //cast to byte type so we can manipulate the pointers... + const uint8_t* src_re = static_cast<const uint8_t*>(src_re_); + const uint8_t* src_im = static_cast<const uint8_t*>(src_im_); + uint8_t* dst = static_cast<uint8_t*>(dst_); + + switch(info.nd) { + + case 1: + for (size_t i=0; i<info.shape[0]; ++i) { + std::memcpy(&dst[dsize*i] , &src_re[dsize2*i], dsize2); + std::memcpy(&dst[dsize*i]+dsize2, &src_im[dsize2*i], dsize2); + } + break; + + case 2: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) { + size_t row_major, col_major; + bob::io::base::rc2d(row_major, col_major, i, j, info.shape); + row_major *= dsize; + col_major *= dsize2; + std::memcpy(&dst[row_major], &src_re[col_major], dsize2); + std::memcpy(&dst[row_major]+dsize2, &src_im[col_major], dsize2); + } + break; + + case 3: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) + for (size_t k=0; k<info.shape[2]; ++k) { + size_t row_major, col_major; + bob::io::base::rc3d(row_major, col_major, i, j, k, info.shape); + row_major *= dsize; + col_major *= dsize2; + std::memcpy(&dst[row_major] , &src_re[col_major], dsize2); + std::memcpy(&dst[row_major]+dsize2, &src_im[col_major], dsize2); + } + break; + + case 4: + for (size_t i=0; i<info.shape[0]; ++i) + for (size_t j=0; j<info.shape[1]; ++j) + for (size_t k=0; k<info.shape[2]; ++k) + for (size_t l=0; l<info.shape[3]; ++l) { + size_t row_major, col_major; + bob::io::base::rc4d(row_major, col_major, i, j, k, l, info.shape); + row_major *= dsize; + col_major *= dsize2; + std::memcpy(&dst[row_major] , &src_re[col_major], dsize2); + std::memcpy(&dst[row_major]+dsize2, &src_im[col_major], dsize2); + } + break; + + default: + { + boost::format m("col_to_row_order_complex() can only flip arrays with up to %u dimensions - you passed one with %u dimensions"); + m % BOB_MAX_DIM % info.nd; + throw std::runtime_error(m.str()); + } + } +} + diff --git a/bob/io/base/cpp/utils.cpp b/bob/io/base/cpp/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3467b997726137f619d1fb4911d5df8b28caaadc --- /dev/null +++ b/bob/io/base/cpp/utils.cpp @@ -0,0 +1,30 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Wed 3 Oct 08:36:48 2012 + * + * @brief Implementation of some compile-time I/O utitlites + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#include <bob.io.base/CodecRegistry.h> +#include <bob.io.base/utils.h> + +boost::shared_ptr<bob::io::base::File> bob::io::base::open (const char* filename, + char mode, const char* pretend_extension) { + boost::shared_ptr<bob::io::base::CodecRegistry> instance = bob::io::base::CodecRegistry::instance(); + return instance->findByExtension(pretend_extension)(filename, mode); +} + +boost::shared_ptr<bob::io::base::File> bob::io::base::open (const char* filename, char mode) { + boost::shared_ptr<bob::io::base::CodecRegistry> instance = bob::io::base::CodecRegistry::instance(); + return instance->findByFilenameExtension(filename)(filename, mode); +} + +bob::io::base::array::typeinfo bob::io::base::peek (const char* filename) { + return open(filename, 'r')->type(); +} + +bob::io::base::array::typeinfo bob::io::base::peek_all (const char* filename) { + return open(filename, 'r')->type_all(); +} diff --git a/bob/io/base/file.cpp b/bob/io/base/file.cpp index f293b657260240e748ea165e1a2186bc0cc42b4c..58d760c5bf52455de8cddd27715d2ce8ff1a1ca5 100644 --- a/bob/io/base/file.cpp +++ b/bob/io/base/file.cpp @@ -2,24 +2,25 @@ * @author Andre Anjos <andre.anjos@idiap.ch> * @date Tue 5 Nov 11:16:09 2013 * - * @brief Bindings to bob::io::File + * @brief Bindings to bob::io::base::File */ #define BOB_IO_BASE_MODULE #include "bobskin.h" #include <bob.io.base/api.h> -#include <bob/io/CodecRegistry.h> -#include <bob/io/utils.h> #include <numpy/arrayobject.h> #include <bob.blitz/capi.h> #include <bob.blitz/cleanup.h> #include <stdexcept> +#include <bob.io.base/CodecRegistry.h> +#include <bob.io.base/utils.h> + #define FILETYPE_NAME "File" PyDoc_STRVAR(s_file_str, BOB_EXT_MODULE_PREFIX "." FILETYPE_NAME); PyDoc_STRVAR(s_file_doc, -"File(filename, [mode='r', [pretend_extension='']]) -> new bob::io::File\n\ +"File(filename, [mode='r', [pretend_extension='']]) -> new bob::io::base::File\n\ \n\ Use this object to read and write data into files.\n\ \n\ @@ -120,10 +121,10 @@ static int PyBobIoFile_Init(PyBobIoFileObject* self, PyObject *args, PyObject* k try { if (pretend_extension) { - self->f = bob::io::open(c_filename, mode, pretend_extension); + self->f = bob::io::base::open(c_filename, mode, pretend_extension); } else { - self->f = bob::io::open(c_filename, mode); + self->f = bob::io::base::open(c_filename, mode); } } catch (std::exception& e) { @@ -192,45 +193,45 @@ static Py_ssize_t PyBobIoFile_Len (PyBobIoFileObject* self) { return retval; } -int PyBobIo_AsTypenum (bob::core::array::ElementType type) { +int PyBobIo_AsTypenum (bob::io::base::array::ElementType type) { switch(type) { - case bob::core::array::t_bool: + case bob::io::base::array::t_bool: return NPY_BOOL; - case bob::core::array::t_int8: + case bob::io::base::array::t_int8: return NPY_INT8; - case bob::core::array::t_int16: + case bob::io::base::array::t_int16: return NPY_INT16; - case bob::core::array::t_int32: + case bob::io::base::array::t_int32: return NPY_INT32; - case bob::core::array::t_int64: + case bob::io::base::array::t_int64: return NPY_INT64; - case bob::core::array::t_uint8: + case bob::io::base::array::t_uint8: return NPY_UINT8; - case bob::core::array::t_uint16: + case bob::io::base::array::t_uint16: return NPY_UINT16; - case bob::core::array::t_uint32: + case bob::io::base::array::t_uint32: return NPY_UINT32; - case bob::core::array::t_uint64: + case bob::io::base::array::t_uint64: return NPY_UINT64; - case bob::core::array::t_float32: + case bob::io::base::array::t_float32: return NPY_FLOAT32; - case bob::core::array::t_float64: + case bob::io::base::array::t_float64: return NPY_FLOAT64; #ifdef NPY_FLOAT128 - case bob::core::array::t_float128: + case bob::io::base::array::t_float128: return NPY_FLOAT128; #endif - case bob::core::array::t_complex64: + case bob::io::base::array::t_complex64: return NPY_COMPLEX64; - case bob::core::array::t_complex128: + case bob::io::base::array::t_complex128: return NPY_COMPLEX128; #ifdef NPY_COMPLEX256 - case bob::core::array::t_complex256: + case bob::io::base::array::t_complex256: return NPY_COMPLEX256; #endif default: - PyErr_Format(PyExc_TypeError, "unsupported Bob/C++ element type (%s)", bob::core::array::stringize(type)); + PyErr_Format(PyExc_TypeError, "unsupported Bob/C++ element type (%s)", bob::io::base::array::stringize(type)); } return NPY_NOTYPE; @@ -246,7 +247,7 @@ static PyObject* PyBobIoFile_GetIndex (PyBobIoFileObject* self, Py_ssize_t i) { return 0; } - const bob::core::array::typeinfo& info = self->f->type(); + const bob::io::base::array::typeinfo& info = self->f->type(); npy_intp shape[NPY_MAXDIMS]; for (size_t k=0; k<info.nd; ++k) shape[k] = info.shape[k]; @@ -287,7 +288,7 @@ static PyObject* PyBobIoFile_GetSlice (PyBobIoFileObject* self, PySliceObject* s self->f->size(), &start, &stop, &step, &slicelength) < 0) return 0; //creates the return array - const bob::core::array::typeinfo& info = self->f->type(); + const bob::io::base::array::typeinfo& info = self->f->type(); int type_num = PyBobIo_AsTypenum(info.dtype); if (type_num == NPY_NOTYPE) return 0; ///< failure @@ -382,7 +383,7 @@ static PyObject* PyBobIoFile_Read(PyBobIoFileObject* self, PyObject *args, PyObj // reads the whole file in a single shot - const bob::core::array::typeinfo& info = self->f->type_all(); + const bob::io::base::array::typeinfo& info = self->f->type_all(); npy_intp shape[NPY_MAXDIMS]; for (size_t k=0; k<info.nd; ++k) shape[k] = info.shape[k]; @@ -543,7 +544,7 @@ Returns the current position of the newly written array.\n\ " ); -PyObject* PyBobIo_TypeInfoAsTuple (const bob::core::array::typeinfo& ti) { +PyObject* PyBobIo_TypeInfoAsTuple (const bob::io::base::array::typeinfo& ti) { int type_num = PyBobIo_AsTypenum(ti.dtype); if (type_num == NPY_NOTYPE) return 0; @@ -575,7 +576,7 @@ static PyObject* PyBobIoFile_Describe(PyBobIoFileObject* self, PyObject *args, P PyObject* all = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, &all)) return 0; - const bob::core::array::typeinfo* info = 0; + const bob::io::base::array::typeinfo* info = 0; if (all && PyObject_IsTrue(all)) info = &self->f->type_all(); else info = &self->f->type(); diff --git a/bob/io/base/hdf5.cpp b/bob/io/base/hdf5.cpp index 51249aff7baa6d65b8af77f9d9b5ebb00f7fa9b7..dcb4249cc61a7f6f67c1c0cc32a360ca7b805a25 100644 --- a/bob/io/base/hdf5.cpp +++ b/bob/io/base/hdf5.cpp @@ -2,7 +2,7 @@ * @author Andre Anjos <andre.anjos@idiap.ch> * @date Tue 12 Nov 18:19:22 2013 * - * @brief Bindings to bob::io::HDF5File + * @brief Bindings to bob::io::base::HDF5File */ #define BOB_IO_BASE_MODULE @@ -21,7 +21,7 @@ PyDoc_STRVAR(s_hdf5file_str, BOB_EXT_MODULE_PREFIX "." HDF5FILE_NAME); PyDoc_STRVAR(s_hdf5file_doc, -"HDF5File(filename, [mode='r']) -> new bob::io::HDF5File\n\ +"HDF5File(filename, [mode='r']) -> new bob::io::base::HDF5File\n\ \n\ Reads and writes data to HDF5 files.\n\ \n\ @@ -77,15 +77,15 @@ static void PyBobIoHDF5File_Delete (PyBobIoHDF5FileObject* o) { } -static bob::io::HDF5File::mode_t mode_from_char (char mode) { +static bob::io::base::HDF5File::mode_t mode_from_char (char mode) { - bob::io::HDF5File::mode_t new_mode = bob::io::HDF5File::inout; + bob::io::base::HDF5File::mode_t new_mode = bob::io::base::HDF5File::inout; switch (mode) { - case 'r': new_mode = bob::io::HDF5File::in; break; - case 'a': new_mode = bob::io::HDF5File::inout; break; - case 'w': new_mode = bob::io::HDF5File::trunc; break; - case 'x': new_mode = bob::io::HDF5File::excl; break; + case 'r': new_mode = bob::io::base::HDF5File::in; break; + case 'a': new_mode = bob::io::base::HDF5File::inout; break; + case 'w': new_mode = bob::io::base::HDF5File::trunc; break; + case 'x': new_mode = bob::io::base::HDF5File::excl; break; default: PyErr_SetString(PyExc_RuntimeError, "Supported flags are 'r' (read-only), 'a' (read/write/append), 'w' (read/write/truncate) or 'x' (read/write/exclusive)"); } @@ -124,7 +124,7 @@ static int PyBobIoHDF5File_Init(PyBobIoHDF5FileObject* self, PyErr_Format(PyExc_ValueError, "file open mode string should have 1 element and be either 'r' (read), 'w' (write), 'a' (append), 'x' (exclusive)"); return -1; } - bob::io::HDF5File::mode_t mode_mode = mode_from_char(mode); + bob::io::base::HDF5File::mode_t mode_mode = mode_from_char(mode); if (PyErr_Occurred()) return -1; #if PY_VERSION_HEX >= 0x03000000 @@ -134,7 +134,7 @@ static int PyBobIoHDF5File_Init(PyBobIoHDF5FileObject* self, #endif try { - self->f.reset(new bob::io::HDF5File(c_filename, mode_mode)); + self->f.reset(new bob::io::base::HDF5File(c_filename, mode_mode)); } catch (std::exception& e) { PyErr_SetString(PyExc_RuntimeError, e.what()); @@ -404,85 +404,85 @@ If the given path is relative, it is take w.r.t. to the current\n\ working directory.\n\ "); -static bob::io::hdf5type PyBobIo_H5FromTypenum (int type_num) { +static bob::io::base::hdf5type PyBobIo_H5FromTypenum (int type_num) { switch(type_num) { - case NPY_STRING: return bob::io::s; - case NPY_BOOL: return bob::io::b; - case NPY_INT8: return bob::io::i8; - case NPY_INT16: return bob::io::i16; - case NPY_INT32: return bob::io::i32; - case NPY_INT64: return bob::io::i64; - case NPY_UINT8: return bob::io::u8; - case NPY_UINT16: return bob::io::u16; - case NPY_UINT32: return bob::io::u32; - case NPY_UINT64: return bob::io::u64; - case NPY_FLOAT32: return bob::io::f32; - case NPY_FLOAT64: return bob::io::f64; + case NPY_STRING: return bob::io::base::s; + case NPY_BOOL: return bob::io::base::b; + case NPY_INT8: return bob::io::base::i8; + case NPY_INT16: return bob::io::base::i16; + case NPY_INT32: return bob::io::base::i32; + case NPY_INT64: return bob::io::base::i64; + case NPY_UINT8: return bob::io::base::u8; + case NPY_UINT16: return bob::io::base::u16; + case NPY_UINT32: return bob::io::base::u32; + case NPY_UINT64: return bob::io::base::u64; + case NPY_FLOAT32: return bob::io::base::f32; + case NPY_FLOAT64: return bob::io::base::f64; #ifdef NPY_FLOAT128 - case NPY_FLOAT128: return bob::io::f128; + case NPY_FLOAT128: return bob::io::base::f128; #endif - case NPY_COMPLEX64: return bob::io::c64; - case NPY_COMPLEX128: return bob::io::c128; + case NPY_COMPLEX64: return bob::io::base::c64; + case NPY_COMPLEX128: return bob::io::base::c128; #ifdef NPY_COMPLEX256 - case NPY_COMPLEX256: return bob::io::c256; + case NPY_COMPLEX256: return bob::io::base::c256; #endif #if defined(__LP64__) || defined(__APPLE__) case NPY_LONGLONG: switch (NPY_BITSOF_LONGLONG) { - case 8: return bob::io::i8; - case 16: return bob::io::i16; - case 32: return bob::io::i32; - case 64: return bob::io::i64; - default: return bob::io::unsupported; + case 8: return bob::io::base::i8; + case 16: return bob::io::base::i16; + case 32: return bob::io::base::i32; + case 64: return bob::io::base::i64; + default: return bob::io::base::unsupported; } break; case NPY_ULONGLONG: switch (NPY_BITSOF_LONGLONG) { - case 8: return bob::io::u8; - case 16: return bob::io::u16; - case 32: return bob::io::u32; - case 64: return bob::io::u64; - default: return bob::io::unsupported; + case 8: return bob::io::base::u8; + case 16: return bob::io::base::u16; + case 32: return bob::io::base::u32; + case 64: return bob::io::base::u64; + default: return bob::io::base::unsupported; } break; #endif - default: return bob::io::unsupported; + default: return bob::io::base::unsupported; } } -static int PyBobIo_H5AsTypenum (bob::io::hdf5type type) { +static int PyBobIo_H5AsTypenum (bob::io::base::hdf5type type) { switch(type) { - case bob::io::s: return NPY_STRING; - case bob::io::b: return NPY_BOOL; - case bob::io::i8: return NPY_INT8; - case bob::io::i16: return NPY_INT16; - case bob::io::i32: return NPY_INT32; - case bob::io::i64: return NPY_INT64; - case bob::io::u8: return NPY_UINT8; - case bob::io::u16: return NPY_UINT16; - case bob::io::u32: return NPY_UINT32; - case bob::io::u64: return NPY_UINT64; - case bob::io::f32: return NPY_FLOAT32; - case bob::io::f64: return NPY_FLOAT64; + case bob::io::base::s: return NPY_STRING; + case bob::io::base::b: return NPY_BOOL; + case bob::io::base::i8: return NPY_INT8; + case bob::io::base::i16: return NPY_INT16; + case bob::io::base::i32: return NPY_INT32; + case bob::io::base::i64: return NPY_INT64; + case bob::io::base::u8: return NPY_UINT8; + case bob::io::base::u16: return NPY_UINT16; + case bob::io::base::u32: return NPY_UINT32; + case bob::io::base::u64: return NPY_UINT64; + case bob::io::base::f32: return NPY_FLOAT32; + case bob::io::base::f64: return NPY_FLOAT64; #ifdef NPY_FLOAT128 - case bob::io::f128: return NPY_FLOAT128; + case bob::io::base::f128: return NPY_FLOAT128; #endif - case bob::io::c64: return NPY_COMPLEX64; - case bob::io::c128: return NPY_COMPLEX128; + case bob::io::base::c64: return NPY_COMPLEX64; + case bob::io::base::c128: return NPY_COMPLEX128; #ifdef NPY_COMPLEX256 - case bob::io::c256: return NPY_COMPLEX256; + case bob::io::base::c256: return NPY_COMPLEX256; #endif default: return NPY_NOTYPE; } } -static PyObject* PyBobIo_HDF5TypeAsTuple (const bob::io::HDF5Type& t) { +static PyObject* PyBobIo_HDF5TypeAsTuple (const bob::io::base::HDF5Type& t) { - const bob::io::HDF5Shape& sh = t.shape(); + const bob::io::base::HDF5Shape& sh = t.shape(); size_t ndim = sh.n(); const hsize_t* shptr = sh.get(); @@ -513,7 +513,7 @@ static PyObject* PyBobIo_HDF5TypeAsTuple (const bob::io::HDF5Type& t) { } -static PyObject* PyBobIo_HDF5DescriptorAsTuple (const bob::io::HDF5Descriptor& d) { +static PyObject* PyBobIo_HDF5DescriptorAsTuple (const bob::io::base::HDF5Descriptor& d) { PyObject* type = PyBobIo_HDF5TypeAsTuple(d.type); if (!type) return 0; @@ -542,7 +542,7 @@ static PyObject* PyBobIoHDF5File_Describe(PyBobIoHDF5FileObject* self, PyObject boost::shared_ptr<PyObject> retval_; try { - const std::vector<bob::io::HDF5Descriptor>& dv = self->f->describe(key); + const std::vector<bob::io::base::HDF5Descriptor>& dv = self->f->describe(key); retval = PyTuple_New(dv.size()); retval_ = make_safe(retval); @@ -795,7 +795,7 @@ recursive\n\ static PyObject* PyBobIoHDF5File_Xread(PyBobIoHDF5FileObject* self, const char* p, int descriptor, int pos) { - const std::vector<bob::io::HDF5Descriptor>* D = 0; + const std::vector<bob::io::base::HDF5Descriptor>* D = 0; try { D = &self->f->describe(p); } @@ -811,43 +811,43 @@ static PyObject* PyBobIoHDF5File_Xread(PyBobIoHDF5FileObject* self, } //last descriptor always contains the full readout. - const bob::io::HDF5Type& type = (*D)[descriptor].type; - const bob::io::HDF5Shape& shape = type.shape(); + const bob::io::base::HDF5Type& type = (*D)[descriptor].type; + const bob::io::base::HDF5Shape& shape = type.shape(); if (shape.n() == 1 && shape[0] == 1) { //read as scalar try { switch(type.type()) { - case bob::io::s: + case bob::io::base::s: return Py_BuildValue("s", self->f->read<std::string>(p, pos).c_str()); - case bob::io::b: + case bob::io::base::b: return PyBlitzArrayCxx_FromCScalar(self->f->read<bool>(p, pos)); - case bob::io::i8: + case bob::io::base::i8: return PyBlitzArrayCxx_FromCScalar(self->f->read<int8_t>(p, pos)); - case bob::io::i16: + case bob::io::base::i16: return PyBlitzArrayCxx_FromCScalar(self->f->read<int16_t>(p, pos)); - case bob::io::i32: + case bob::io::base::i32: return PyBlitzArrayCxx_FromCScalar(self->f->read<int32_t>(p, pos)); - case bob::io::i64: + case bob::io::base::i64: return PyBlitzArrayCxx_FromCScalar(self->f->read<int64_t>(p, pos)); - case bob::io::u8: + case bob::io::base::u8: return PyBlitzArrayCxx_FromCScalar(self->f->read<uint8_t>(p, pos)); - case bob::io::u16: + case bob::io::base::u16: return PyBlitzArrayCxx_FromCScalar(self->f->read<uint16_t>(p, pos)); - case bob::io::u32: + case bob::io::base::u32: return PyBlitzArrayCxx_FromCScalar(self->f->read<uint32_t>(p, pos)); - case bob::io::u64: + case bob::io::base::u64: return PyBlitzArrayCxx_FromCScalar(self->f->read<uint64_t>(p, pos)); - case bob::io::f32: + case bob::io::base::f32: return PyBlitzArrayCxx_FromCScalar(self->f->read<float>(p, pos)); - case bob::io::f64: + case bob::io::base::f64: return PyBlitzArrayCxx_FromCScalar(self->f->read<double>(p, pos)); - case bob::io::f128: + case bob::io::base::f128: return PyBlitzArrayCxx_FromCScalar(self->f->read<long double>(p, pos)); - case bob::io::c64: + case bob::io::base::c64: return PyBlitzArrayCxx_FromCScalar(self->f->read<std::complex<float> >(p, pos)); - case bob::io::c128: + case bob::io::base::c128: return PyBlitzArrayCxx_FromCScalar(self->f->read<std::complex<double> >(p, pos)); - case bob::io::c256: + case bob::io::base::c256: return PyBlitzArrayCxx_FromCScalar(self->f->read<std::complex<long double> >(p, pos)); default: PyErr_Format(PyExc_TypeError, "unsupported HDF5 type: %s", type.str().c_str()); @@ -861,7 +861,7 @@ static PyObject* PyBobIoHDF5File_Xread(PyBobIoHDF5FileObject* self, catch (...) { const char* filename = "<unknown>"; try{ filename = self->f->filename().c_str(); } catch(...){} - PyErr_Format(PyExc_RuntimeError, "caught unknown exception while reading %s scalar from dataset `%s' at position %d from HDF5 file `%s'", bob::io::stringize(type.type()), p, pos, filename); + PyErr_Format(PyExc_RuntimeError, "caught unknown exception while reading %s scalar from dataset `%s' at position %d from HDF5 file `%s'", bob::io::base::stringize(type.type()), p, pos, filename); return 0; } } @@ -936,7 +936,7 @@ static PyObject* PyBobIoHDF5File_ListRead(PyBobIoHDF5FileObject* self, PyObject if (pos >= 0) return PyBobIoHDF5File_Xread(self, key, 0, pos); //otherwise returns as a list - const std::vector<bob::io::HDF5Descriptor>* D = 0; + const std::vector<bob::io::base::HDF5Descriptor>* D = 0; try { D = &self->f->describe(key); } @@ -1037,16 +1037,16 @@ static boost::shared_ptr<char> PyBobIo_GetString(PyObject* o) { } -static int PyBobIoHDF5File_SetStringType(bob::io::HDF5Type& t, PyObject* o) { +static int PyBobIoHDF5File_SetStringType(bob::io::base::HDF5Type& t, PyObject* o) { auto value = PyBobIo_GetString(o); if (!value) return -1; - t = bob::io::HDF5Type(value.get()); + t = bob::io::base::HDF5Type(value.get()); return 0; } -template <typename T> int PyBobIoHDF5File_SetType(bob::io::HDF5Type& t) { +template <typename T> int PyBobIoHDF5File_SetType(bob::io::base::HDF5Type& t) { T v; - t = bob::io::HDF5Type(v); + t = bob::io::base::HDF5Type(v); return 0; } @@ -1082,7 +1082,7 @@ static bool PyBobIoHDF5File_IsPythonScalar(PyObject* obj) { * `*converted' is set to 0 (NULL), then we don't try a conversion, returning * -1. */ -static int PyBobIoHDF5File_GetObjectType(PyObject* o, bob::io::HDF5Type& t, +static int PyBobIoHDF5File_GetObjectType(PyObject* o, bob::io::base::HDF5Type& t, PyObject** converted=0) { if (PyArray_IsScalar(o, Generic) || PyBobIoHDF5File_IsPythonScalar(o)) { @@ -1172,10 +1172,10 @@ static int PyBobIoHDF5File_GetObjectType(PyObject* o, bob::io::HDF5Type& t, else if (PyBlitzArray_Check(o)) { PyBlitzArrayObject* bz = reinterpret_cast<PyBlitzArrayObject*>(o); - bob::io::hdf5type h5type = PyBobIo_H5FromTypenum(bz->type_num); - if (h5type == bob::io::unsupported) return -1; - bob::io::HDF5Shape h5shape(bz->ndim, bz->shape); - t = bob::io::HDF5Type(h5type, h5shape); + bob::io::base::hdf5type h5type = PyBobIo_H5FromTypenum(bz->type_num); + if (h5type == bob::io::base::unsupported) return -1; + bob::io::base::HDF5Shape h5shape(bz->ndim, bz->shape); + t = bob::io::base::HDF5Type(h5type, h5shape); return 1; } @@ -1183,10 +1183,10 @@ static int PyBobIoHDF5File_GetObjectType(PyObject* o, bob::io::HDF5Type& t, else if (PyArray_CheckExact(o) && PyArray_ISCARRAY_RO((PyArrayObject*)o)) { PyArrayObject* np = reinterpret_cast<PyArrayObject*>(o); - bob::io::hdf5type h5type = PyBobIo_H5FromTypenum(PyArray_DESCR(np)->type_num); - if (h5type == bob::io::unsupported) return -1; - bob::io::HDF5Shape h5shape(PyArray_NDIM(np), PyArray_DIMS(np)); - t = bob::io::HDF5Type(h5type, h5shape); + bob::io::base::hdf5type h5type = PyBobIo_H5FromTypenum(PyArray_DESCR(np)->type_num); + if (h5type == bob::io::base::unsupported) return -1; + bob::io::base::HDF5Shape h5shape(PyArray_NDIM(np), PyArray_DIMS(np)); + t = bob::io::base::HDF5Type(h5type, h5shape); return 2; } @@ -1203,13 +1203,13 @@ static int PyBobIoHDF5File_GetObjectType(PyObject* o, bob::io::HDF5Type& t, if (!*converted) return -1; ///< error condition PyArrayObject* np = reinterpret_cast<PyArrayObject*>(*converted); - bob::io::hdf5type h5type = PyBobIo_H5FromTypenum(PyArray_DESCR(np)->type_num); - if (h5type == bob::io::unsupported) { + bob::io::base::hdf5type h5type = PyBobIo_H5FromTypenum(PyArray_DESCR(np)->type_num); + if (h5type == bob::io::base::unsupported) { Py_CLEAR(*converted); return -1; } - bob::io::HDF5Shape h5shape(PyArray_NDIM(np), PyArray_DIMS(np)); - t = bob::io::HDF5Type(h5type, h5shape); + bob::io::base::HDF5Shape h5shape(PyArray_NDIM(np), PyArray_DIMS(np)); + t = bob::io::base::HDF5Type(h5type, h5shape); return 3; } @@ -1242,7 +1242,7 @@ static PyObject* PyBobIoHDF5File_Replace(PyBobIoHDF5FileObject* self, PyObject* PyObject* data = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, "snO", kwlist, &path, &pos, &data)) return 0; - bob::io::HDF5Type type; + bob::io::base::HDF5Type type; PyObject* converted = 0; int is_array = PyBobIoHDF5File_GetObjectType(data, type, &converted); auto converted_ = make_xsafe(converted); @@ -1259,42 +1259,42 @@ static PyObject* PyBobIoHDF5File_Replace(PyBobIoHDF5FileObject* self, PyObject* if (!is_array) { //write as a scalar switch(type.type()) { - case bob::io::s: + case bob::io::base::s: { auto value = PyBobIo_GetString(data); if (!value) return 0; self->f->replace<std::string>(path, pos, value.get()); Py_RETURN_NONE; } - case bob::io::b: + case bob::io::base::b: return PyBobIoHDF5File_ReplaceScalar<bool>(self, path, pos, data); - case bob::io::i8: + case bob::io::base::i8: return PyBobIoHDF5File_ReplaceScalar<int8_t>(self, path, pos, data); - case bob::io::i16: + case bob::io::base::i16: return PyBobIoHDF5File_ReplaceScalar<int16_t>(self, path, pos, data); - case bob::io::i32: + case bob::io::base::i32: return PyBobIoHDF5File_ReplaceScalar<int32_t>(self, path, pos, data); - case bob::io::i64: + case bob::io::base::i64: return PyBobIoHDF5File_ReplaceScalar<int64_t>(self, path, pos, data); - case bob::io::u8: + case bob::io::base::u8: return PyBobIoHDF5File_ReplaceScalar<uint8_t>(self, path, pos, data); - case bob::io::u16: + case bob::io::base::u16: return PyBobIoHDF5File_ReplaceScalar<uint16_t>(self, path, pos, data); - case bob::io::u32: + case bob::io::base::u32: return PyBobIoHDF5File_ReplaceScalar<uint32_t>(self, path, pos, data); - case bob::io::u64: + case bob::io::base::u64: return PyBobIoHDF5File_ReplaceScalar<uint64_t>(self, path, pos, data); - case bob::io::f32: + case bob::io::base::f32: return PyBobIoHDF5File_ReplaceScalar<float>(self, path, pos, data); - case bob::io::f64: + case bob::io::base::f64: return PyBobIoHDF5File_ReplaceScalar<double>(self, path, pos, data); - case bob::io::f128: + case bob::io::base::f128: return PyBobIoHDF5File_ReplaceScalar<long double>(self, path, pos, data); - case bob::io::c64: + case bob::io::base::c64: return PyBobIoHDF5File_ReplaceScalar<std::complex<float> >(self, path, pos, data); - case bob::io::c128: + case bob::io::base::c128: return PyBobIoHDF5File_ReplaceScalar<std::complex<double> >(self, path, pos, data); - case bob::io::c256: + case bob::io::base::c256: return PyBobIoHDF5File_ReplaceScalar<std::complex<long double> >(self, path, pos, data); default: break; @@ -1381,7 +1381,7 @@ static int PyBobIoHDF5File_AppendScalar(PyBobIoHDF5FileObject* self, static int PyBobIoHDF5File_InnerAppend(PyBobIoHDF5FileObject* self, const char* path, PyObject* data, Py_ssize_t compression) { - bob::io::HDF5Type type; + bob::io::base::HDF5Type type; PyObject* converted = 0; int is_array = PyBobIoHDF5File_GetObjectType(data, type, &converted); auto converted_ = make_xsafe(converted); @@ -1398,42 +1398,42 @@ static int PyBobIoHDF5File_InnerAppend(PyBobIoHDF5FileObject* self, const char* if (!is_array) { //write as a scalar switch(type.type()) { - case bob::io::s: + case bob::io::base::s: { auto value = PyBobIo_GetString(data); if (!value) return 0; self->f->append<std::string>(path, value.get()); return 1; } - case bob::io::b: + case bob::io::base::b: return PyBobIoHDF5File_AppendScalar<bool>(self, path, data); - case bob::io::i8: + case bob::io::base::i8: return PyBobIoHDF5File_AppendScalar<int8_t>(self, path, data); - case bob::io::i16: + case bob::io::base::i16: return PyBobIoHDF5File_AppendScalar<int16_t>(self, path, data); - case bob::io::i32: + case bob::io::base::i32: return PyBobIoHDF5File_AppendScalar<int32_t>(self, path, data); - case bob::io::i64: + case bob::io::base::i64: return PyBobIoHDF5File_AppendScalar<int64_t>(self, path, data); - case bob::io::u8: + case bob::io::base::u8: return PyBobIoHDF5File_AppendScalar<uint8_t>(self, path, data); - case bob::io::u16: + case bob::io::base::u16: return PyBobIoHDF5File_AppendScalar<uint16_t>(self, path, data); - case bob::io::u32: + case bob::io::base::u32: return PyBobIoHDF5File_AppendScalar<uint32_t>(self, path, data); - case bob::io::u64: + case bob::io::base::u64: return PyBobIoHDF5File_AppendScalar<uint64_t>(self, path, data); - case bob::io::f32: + case bob::io::base::f32: return PyBobIoHDF5File_AppendScalar<float>(self, path, data); - case bob::io::f64: + case bob::io::base::f64: return PyBobIoHDF5File_AppendScalar<double>(self, path, data); - case bob::io::f128: + case bob::io::base::f128: return PyBobIoHDF5File_AppendScalar<long double>(self, path, data); - case bob::io::c64: + case bob::io::base::c64: return PyBobIoHDF5File_AppendScalar<std::complex<float> >(self, path, data); - case bob::io::c128: + case bob::io::base::c128: return PyBobIoHDF5File_AppendScalar<std::complex<double> >(self, path, data); - case bob::io::c256: + case bob::io::base::c256: return PyBobIoHDF5File_AppendScalar<std::complex<long double> >(self, path, data); default: break; @@ -1578,7 +1578,7 @@ static PyObject* PyBobIoHDF5File_Set(PyBobIoHDF5FileObject* self, PyObject* args return 0; } - bob::io::HDF5Type type; + bob::io::base::HDF5Type type; PyObject* converted = 0; int is_array = PyBobIoHDF5File_GetObjectType(data, type, &converted); auto converted_ = make_xsafe(converted); @@ -1595,7 +1595,7 @@ static PyObject* PyBobIoHDF5File_Set(PyBobIoHDF5FileObject* self, PyObject* args if (!is_array) { //write as a scalar switch(type.type()) { - case bob::io::s: + case bob::io::base::s: { auto value = PyBobIo_GetString(data); if (!value) return 0; @@ -1603,35 +1603,35 @@ static PyObject* PyBobIoHDF5File_Set(PyBobIoHDF5FileObject* self, PyObject* args Py_RETURN_NONE; } break; - case bob::io::b: + case bob::io::base::b: return PyBobIoHDF5File_SetScalar<bool>(self, path, data); - case bob::io::i8: + case bob::io::base::i8: return PyBobIoHDF5File_SetScalar<int8_t>(self, path, data); - case bob::io::i16: + case bob::io::base::i16: return PyBobIoHDF5File_SetScalar<int16_t>(self, path, data); - case bob::io::i32: + case bob::io::base::i32: return PyBobIoHDF5File_SetScalar<int32_t>(self, path, data); - case bob::io::i64: + case bob::io::base::i64: return PyBobIoHDF5File_SetScalar<int64_t>(self, path, data); - case bob::io::u8: + case bob::io::base::u8: return PyBobIoHDF5File_SetScalar<uint8_t>(self, path, data); - case bob::io::u16: + case bob::io::base::u16: return PyBobIoHDF5File_SetScalar<uint16_t>(self, path, data); - case bob::io::u32: + case bob::io::base::u32: return PyBobIoHDF5File_SetScalar<uint32_t>(self, path, data); - case bob::io::u64: + case bob::io::base::u64: return PyBobIoHDF5File_SetScalar<uint64_t>(self, path, data); - case bob::io::f32: + case bob::io::base::f32: return PyBobIoHDF5File_SetScalar<float>(self, path, data); - case bob::io::f64: + case bob::io::base::f64: return PyBobIoHDF5File_SetScalar<double>(self, path, data); - case bob::io::f128: + case bob::io::base::f128: return PyBobIoHDF5File_SetScalar<long double>(self, path, data); - case bob::io::c64: + case bob::io::base::c64: return PyBobIoHDF5File_SetScalar<std::complex<float> >(self, path, data); - case bob::io::c128: + case bob::io::base::c128: return PyBobIoHDF5File_SetScalar<std::complex<double> >(self, path, data); - case bob::io::c256: + case bob::io::base::c256: return PyBobIoHDF5File_SetScalar<std::complex<long double> >(self, path, data); default: break; @@ -1758,7 +1758,7 @@ file\n\ template <typename T> static PyObject* PyBobIoHDF5File_ReadScalarAttribute (PyBobIoHDF5FileObject* self, const char* path, const char* name, - const bob::io::HDF5Type& type) { + const bob::io::base::HDF5Type& type) { T value; try { self->f->read_attribute(path, name, type, static_cast<void*>(&value)); @@ -1778,7 +1778,7 @@ template <typename T> static PyObject* PyBobIoHDF5File_ReadScalarAttribute template <> PyObject* PyBobIoHDF5File_ReadScalarAttribute<const char*> (PyBobIoHDF5FileObject* self, const char* path, const char* name, - const bob::io::HDF5Type& type) { + const bob::io::base::HDF5Type& type) { std::string retval; try { self->f->getAttribute(path, name, retval); @@ -1797,46 +1797,46 @@ template <> PyObject* PyBobIoHDF5File_ReadScalarAttribute<const char*> } static PyObject* PyBobIoHDF5File_ReadAttribute(PyBobIoHDF5FileObject* self, - const char* path, const char* name, const bob::io::HDF5Type& type) { + const char* path, const char* name, const bob::io::base::HDF5Type& type) { //no error detection: this should be done before reaching this method - const bob::io::HDF5Shape& shape = type.shape(); + const bob::io::base::HDF5Shape& shape = type.shape(); - if (type.type() == bob::io::s || (shape.n() == 1 && shape[0] == 1)) { + if (type.type() == bob::io::base::s || (shape.n() == 1 && shape[0] == 1)) { //read as scalar switch(type.type()) { - case bob::io::s: + case bob::io::base::s: return PyBobIoHDF5File_ReadScalarAttribute<const char*>(self, path, name, type); - case bob::io::b: + case bob::io::base::b: return PyBobIoHDF5File_ReadScalarAttribute<bool>(self, path, name, type); - case bob::io::i8: + case bob::io::base::i8: return PyBobIoHDF5File_ReadScalarAttribute<int8_t>(self, path, name, type); - case bob::io::i16: + case bob::io::base::i16: return PyBobIoHDF5File_ReadScalarAttribute<int16_t>(self, path, name, type); - case bob::io::i32: + case bob::io::base::i32: return PyBobIoHDF5File_ReadScalarAttribute<int32_t>(self, path, name, type); - case bob::io::i64: + case bob::io::base::i64: return PyBobIoHDF5File_ReadScalarAttribute<int64_t>(self, path, name, type); - case bob::io::u8: + case bob::io::base::u8: return PyBobIoHDF5File_ReadScalarAttribute<uint8_t>(self, path, name, type); - case bob::io::u16: + case bob::io::base::u16: return PyBobIoHDF5File_ReadScalarAttribute<uint16_t>(self, path, name, type); - case bob::io::u32: + case bob::io::base::u32: return PyBobIoHDF5File_ReadScalarAttribute<uint32_t>(self, path, name, type); - case bob::io::u64: + case bob::io::base::u64: return PyBobIoHDF5File_ReadScalarAttribute<uint64_t>(self, path, name, type); - case bob::io::f32: + case bob::io::base::f32: return PyBobIoHDF5File_ReadScalarAttribute<float>(self, path, name, type); - case bob::io::f64: + case bob::io::base::f64: return PyBobIoHDF5File_ReadScalarAttribute<double>(self, path, name, type); - case bob::io::f128: + case bob::io::base::f128: return PyBobIoHDF5File_ReadScalarAttribute<long double>(self, path, name, type); - case bob::io::c64: + case bob::io::base::c64: return PyBobIoHDF5File_ReadScalarAttribute<std::complex<float> >(self, path, name, type); - case bob::io::c128: + case bob::io::base::c128: return PyBobIoHDF5File_ReadScalarAttribute<std::complex<double> >(self, path, name, type); - case bob::io::c256: + case bob::io::base::c256: return PyBobIoHDF5File_ReadScalarAttribute<std::complex<long double> >(self, path, name, type); default: break; @@ -1882,7 +1882,7 @@ static PyObject* PyBobIoHDF5File_GetAttribute(PyBobIoHDF5FileObject* self, PyObj const char* path = "."; if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", kwlist, &name, &path)) return 0; - bob::io::HDF5Type type; + bob::io::base::HDF5Type type; try { self->f->getAttributeType(path, name, type); @@ -1898,7 +1898,7 @@ static PyObject* PyBobIoHDF5File_GetAttribute(PyBobIoHDF5FileObject* self, PyObj return 0; } - if (type.type() == bob::io::unsupported) { + if (type.type() == bob::io::base::unsupported) { const char* filename = "<unknown>"; try{ filename = self->f->filename().c_str(); } catch(...){} boost::format m("unsupported HDF5 data type detected for attribute `%s' at path `%s' of file `%s' - returning None"); @@ -1943,7 +1943,7 @@ static PyObject* PyBobIoHDF5File_GetAttributes(PyBobIoHDF5FileObject* self, PyOb const char* path = "."; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s", kwlist, &path)) return 0; - std::map<std::string, bob::io::HDF5Type> attributes; + std::map<std::string, bob::io::base::HDF5Type> attributes; self->f->listAttributes(path, attributes); PyObject* retval = PyDict_New(); if (!retval) return 0; @@ -1951,7 +1951,7 @@ static PyObject* PyBobIoHDF5File_GetAttributes(PyBobIoHDF5FileObject* self, PyOb for (auto k=attributes.begin(); k!=attributes.end(); ++k) { PyObject* item = 0; - if (k->second.type() == bob::io::unsupported) { + if (k->second.type() == bob::io::base::unsupported) { const char* filename = "<unknown>"; try{ filename = self->f->filename().c_str(); } catch(...){} boost::format m("unsupported HDF5 data type detected for attribute `%s' at path `%s' of file `%s' - returning None"); @@ -1996,7 +1996,7 @@ a specific attribute, use :py:meth:`HDF5File.get_attribute()`.\n\ template <typename T> PyObject* PyBobIoHDF5File_WriteScalarAttribute (PyBobIoHDF5FileObject* self, const char* path, const char* name, - const bob::io::HDF5Type& type, PyObject* o) { + const bob::io::base::HDF5Type& type, PyObject* o) { T value = PyBlitzArrayCxx_AsCScalar<T>(o); if (PyErr_Occurred()) return 0; @@ -2021,7 +2021,7 @@ template <typename T> PyObject* PyBobIoHDF5File_WriteScalarAttribute template <> PyObject* PyBobIoHDF5File_WriteScalarAttribute<const char*> (PyBobIoHDF5FileObject* self, const char* path, const char* name, - const bob::io::HDF5Type& type, PyObject* o) { + const bob::io::base::HDF5Type& type, PyObject* o) { auto value = PyBobIo_GetString(o); if (!value) return 0; @@ -2045,44 +2045,44 @@ template <> PyObject* PyBobIoHDF5File_WriteScalarAttribute<const char*> } static PyObject* PyBobIoHDF5File_WriteAttribute(PyBobIoHDF5FileObject* self, - const char* path, const char* name, const bob::io::HDF5Type& type, + const char* path, const char* name, const bob::io::base::HDF5Type& type, PyObject* o, int is_array, PyObject* converted) { //no error detection: this should be done before reaching this method if (!is_array) { //write as a scalar switch(type.type()) { - case bob::io::s: + case bob::io::base::s: return PyBobIoHDF5File_WriteScalarAttribute<const char*>(self, path, name, type, o); - case bob::io::b: + case bob::io::base::b: return PyBobIoHDF5File_WriteScalarAttribute<bool>(self, path, name, type, o); - case bob::io::i8: + case bob::io::base::i8: return PyBobIoHDF5File_WriteScalarAttribute<int8_t>(self, path, name, type, o); - case bob::io::i16: + case bob::io::base::i16: return PyBobIoHDF5File_WriteScalarAttribute<int16_t>(self, path, name, type, o); - case bob::io::i32: + case bob::io::base::i32: return PyBobIoHDF5File_WriteScalarAttribute<int32_t>(self, path, name, type, o); - case bob::io::i64: + case bob::io::base::i64: return PyBobIoHDF5File_WriteScalarAttribute<int64_t>(self, path, name, type, o); - case bob::io::u8: + case bob::io::base::u8: return PyBobIoHDF5File_WriteScalarAttribute<uint8_t>(self, path, name, type, o); - case bob::io::u16: + case bob::io::base::u16: return PyBobIoHDF5File_WriteScalarAttribute<uint16_t>(self, path, name, type, o); - case bob::io::u32: + case bob::io::base::u32: return PyBobIoHDF5File_WriteScalarAttribute<uint32_t>(self, path, name, type, o); - case bob::io::u64: + case bob::io::base::u64: return PyBobIoHDF5File_WriteScalarAttribute<uint64_t>(self, path, name, type, o); - case bob::io::f32: + case bob::io::base::f32: return PyBobIoHDF5File_WriteScalarAttribute<float>(self, path, name, type, o); - case bob::io::f64: + case bob::io::base::f64: return PyBobIoHDF5File_WriteScalarAttribute<double>(self, path, name, type, o); - case bob::io::f128: + case bob::io::base::f128: return PyBobIoHDF5File_WriteScalarAttribute<long double>(self, path, name, type, o); - case bob::io::c64: + case bob::io::base::c64: return PyBobIoHDF5File_WriteScalarAttribute<std::complex<float> >(self, path, name, type, o); - case bob::io::c128: + case bob::io::base::c128: return PyBobIoHDF5File_WriteScalarAttribute<std::complex<double> >(self, path, name, type, o); - case bob::io::c256: + case bob::io::base::c256: return PyBobIoHDF5File_WriteScalarAttribute<std::complex<long double> >(self, path, name, type, o); default: break; @@ -2142,7 +2142,7 @@ static PyObject* PyBobIoHDF5File_SetAttribute(PyBobIoHDF5FileObject* self, PyObj const char* path = "."; if (!PyArg_ParseTupleAndKeywords(args, kwds, "sO|s", kwlist, &name, &value, &path)) return 0; - bob::io::HDF5Type type; + bob::io::base::HDF5Type type; PyObject* converted = 0; int is_array = PyBobIoHDF5File_GetObjectType(value, type, &converted); auto converted_ = make_xsafe(converted); @@ -2212,7 +2212,7 @@ static PyObject* PyBobIoHDF5File_SetAttributes(PyBobIoHDF5FileObject* self, PyOb PyObject *key, *value; Py_ssize_t pos = 0; while (PyDict_Next(attrs, &pos, &key, &value)) { - bob::io::HDF5Type type; + bob::io::base::HDF5Type type; PyObject* converted = 0; auto name = PyBobIo_GetString(key); @@ -2361,7 +2361,7 @@ static PyObject* PyBobIoHDF5File_DelAttributes(PyBobIoHDF5FileObject* self, PyOb } //else, find the attributes and remove all of them - std::map<std::string, bob::io::HDF5Type> attributes; + std::map<std::string, bob::io::base::HDF5Type> attributes; try { self->f->listAttributes(path, attributes); } @@ -2751,8 +2751,3 @@ PyTypeObject PyBobIoHDF5File_Type = { 0, /* tp_alloc */ PyBobIoHDF5File_New, /* tp_new */ }; - -/** - .def("__contains__", &bob::io::HDF5File::contains, (arg("self"), arg("key")), "Returns True if the file contains an HDF5 dataset with a given path") - -**/ diff --git a/bob/io/base/include/bob.io.base/CodecRegistry.h b/bob/io/base/include/bob.io.base/CodecRegistry.h new file mode 100644 index 0000000000000000000000000000000000000000..019bb5770959199f2a27c6df1594770c65dcf1f6 --- /dev/null +++ b/bob/io/base/include/bob.io.base/CodecRegistry.h @@ -0,0 +1,81 @@ +/** + * @date Tue Oct 25 23:25:46 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_CODECREGISTRY_H +#define BOB_IO_BASE_CODECREGISTRY_H + +#include <map> +#include <string> +#include <boost/shared_ptr.hpp> + +#include <bob.io.base/File.h> + +namespace bob { namespace io { namespace base { + + /** + * The CodecRegistry holds registered converters for different types of + * input files. It manages registration and helps the user in picking the + * best codecs for their data. This class is a singleton (single global + * variable). + */ + class CodecRegistry { + + public: //static access + + /** + * Returns the singleton + */ + static boost::shared_ptr<CodecRegistry> instance(); + + static const std::map<std::string, std::string>& getExtensions () { + boost::shared_ptr<CodecRegistry> ptr = instance(); + return ptr->s_extension2description; + } + + /** + * Sets and unsets double-registration ignore flag + */ + static bool ignoreDoubleRegistration() { return instance()->s_ignore; } + static void ignoreDoubleRegistration(bool v) { instance()->s_ignore = v; } + + public: //object access + + void registerExtension(const char* extension, const char* description, + file_factory_t factory); + + void deregisterFactory(file_factory_t factory); + void deregisterExtension(const char* ext); + + /** + * Returns the codec description, if an extension was registered with the + * matching input string. Otherwise, returns 0. + */ + const char* getDescription(const char* ext); + + file_factory_t findByExtension(const char* ext); + file_factory_t findByFilenameExtension(const char* fn); + + bool isRegistered(const char* ext); + + private: + + CodecRegistry(): s_extension2codec(), s_ignore(false) {} + + // Not implemented + CodecRegistry( const CodecRegistry&); + + std::map<std::string, file_factory_t> s_extension2codec; + std::map<std::string, std::string> s_extension2description; + bool s_ignore; ///< shall I ignore double-registrations? + + }; + +}}} + +#endif /* BOB_IO_BASE_CODECREGISTRY_H */ diff --git a/bob/io/base/include/bob.io.base/File.h b/bob/io/base/include/bob.io.base/File.h new file mode 100644 index 0000000000000000000000000000000000000000..e41bccdd9c4983f2be3526b3d1a24cacab150b87 --- /dev/null +++ b/bob/io/base/include/bob.io.base/File.h @@ -0,0 +1,184 @@ +/** + * @date Tue Oct 25 23:25:46 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Describes a generic API for reading and writing data to external + * files. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_FILE_H +#define BOB_IO_BASE_FILE_H + +#include <boost/shared_ptr.hpp> +#include <bob.io.base/array.h> +#include <bob.io.base/blitz_array.h> + +namespace bob { namespace io { namespace base { + + /** + * @brief Files deal with reading and writing multiple (homogeneous) array + * data to and from files. + */ + class File { + + public: //abstract API + + virtual ~File(); + + /** + * The filename this array codec current points to + */ + virtual const char* filename() const =0; + + /** + * The typeinfo of data within this file, if it is supposed to be read as + * as a sequence of arrays + */ + virtual const bob::io::base::array::typeinfo& type() const =0; + + /** + * The typeinfo of data within this file, if it is supposed to be read as + * a single array. + */ + virtual const bob::io::base::array::typeinfo& type_all() const =0; + + /** + * The number of arrays available in this file, if it is supposed to be + * read as a sequence of arrays. + */ + virtual size_t size() const =0; + + /** + * Returns the name of the codec, for compatibility reasons. + */ + virtual const char* name() const =0; + + /** + * Loads the data of the array into memory. If an index is specified + * loads the specific array data from the file, otherwise, loads the data + * at position 0. + * + * This method will check to see if the given array has enough space. If + * that is not the case, it will allocate enough space internally by + * reseting the input array and putting the data read from the file + * inside. + */ + virtual void read(bob::io::base::array::interface& buffer, size_t index) =0; + + /** + * Loads all the data available at the file into a single in-memory + * array. + * + * This method will check to see if the given array has enough space. If + * that is not the case, it will allocate enough space internally by + * reseting the input array and putting the data read from the file + * inside. + */ + virtual void read_all(bob::io::base::array::interface& buffer) =0; + + /** + * Appends the given buffer into a file. If the file does not exist, + * create a new file, else, makes sure that the inserted array respects + * the previously set file structure. + * + * Returns the current position of the newly written array. + */ + virtual size_t append (const bob::io::base::array::interface& buffer) =0; + + /** + * Writes the data from the given buffer into the file and act like it is + * the only piece of data that will ever be written to such a file. Not + * more data appending may happen after a call to this method. + */ + virtual void write (const bob::io::base::array::interface& buffer) =0; + + public: //blitz::Array specific API + + /** + * This method returns a copy of the array in the file with the element + * type you wish (just have to get the number of dimensions right!). + */ + template <typename T, int N> blitz::Array<T,N> cast(size_t index) { + bob::io::base::array::blitz_array tmp(type()); + read(tmp, index); + return tmp.cast<T,N>(); + } + + /** + * This method returns a copy of the array in the file with the element + * type you wish (just have to get the number of dimensions right!). + * + * This variant loads all data available into the file in a single array. + */ + template <typename T, int N> blitz::Array<T,N> cast_all() { + bob::io::base::array::blitz_array tmp(type_all()); + read_all(tmp); + return tmp.cast<T,N>(); + } + + template <typename T, int N> void read(blitz::Array<T,N>& io, + size_t index) { + bob::io::base::array::blitz_array use_this(io); + use_this.set(type()); + read(use_this, index); + io.reference(use_this.get<T,N>()); + } + + template <typename T, int N> blitz::Array<T,N> read(size_t index) { + bob::io::base::array::blitz_array tmp(type()); + read(tmp, index); + return tmp.get<T,N>(); + } + + template <typename T, int N> void read_all(blitz::Array<T,N>& io) { + bob::io::base::array::blitz_array use_this(io); + use_this.set(type_all()); + read_all(use_this); + io.reference(use_this.get<T,N>()); + } + + template <typename T, int N> blitz::Array<T,N> read_all() { + bob::io::base::array::blitz_array tmp(type_all()); + read_all(tmp); + return tmp.get<T,N>(); + } + + template <typename T, int N> size_t append(const blitz::Array<T,N>& in) { + bob::io::base::array::blitz_array use_this(in); + return append(use_this); + } + + template <typename T, int N> void write (const blitz::Array<T,N>& in) { + bob::io::base::array::blitz_array use_this(in); + write(use_this); + } + + }; + + /** + * @brief This defines the factory method F that can create codecs. Your + * task, as a codec developer is to create one of such methods for each of + * your codecs and statically register them to the codec registry. + * + * Here are the meanings of the mode flag that should be respected by your + * factory implementation: + * + * 'r': opens for reading only - no modifications can occur; it is an + * error to open a file that does not exist for read-only operations. + * 'w': opens for reading and writing, but truncates the file if it + * exists; it is not an error to open files that do not exist with + * this flag. + * 'a': opens for reading and writing - any type of modification can + * occur. If the file does not exist, this flag is effectively like + * 'w'. + * + * Returns a newly allocated File object that can read and write data to the + * file using a specific backend. + */ + typedef boost::shared_ptr<File> (*file_factory_t) (const char* filename, char mode); + +}}} + +#endif /* BOB_IO_BASE_FILE_H */ diff --git a/bob/io/base/include/bob.io.base/HDF5Attribute.h b/bob/io/base/include/bob.io.base/HDF5Attribute.h new file mode 100644 index 0000000000000000000000000000000000000000..3af8a7a697f225e9d01ba1ffed856445774a736b --- /dev/null +++ b/bob/io/base/include/bob.io.base/HDF5Attribute.h @@ -0,0 +1,87 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Fri 2 Mar 08:19:03 2012 + * + * @brief Simple attribute support for HDF5 files + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_HDF5ATTRIBUTE_H +#define BOB_IO_BASE_HDF5ATTRIBUTE_H + +#include <string> +#include <map> +#include <boost/shared_ptr.hpp> +#include <hdf5.h> + +#include <bob.io.base/HDF5Types.h> + +namespace bob { namespace io { namespace base { namespace detail { namespace hdf5 { + + /** + * Finds out the type of the attribute, if it exists, raises otherwise. + */ + void gettype_attribute (const boost::shared_ptr<hid_t> location, + const std::string& name, HDF5Type& type); + + /** + * Reads the attribute value, place it in "buffer" + */ + void read_attribute (const boost::shared_ptr<hid_t> location, + const std::string& name, const bob::io::base::HDF5Type& dest, void* buffer); + + /** + * Writes an attribute value from "buffer" + */ + void write_attribute (boost::shared_ptr<hid_t> location, + const std::string& name, const bob::io::base::HDF5Type& dest, + const void* buffer); + + /** + * Sets a scalar attribute on the given location. Setting an existing + * attribute overwrites its value. + * + * @note Only simple scalars are supported for the time being + */ + template <typename T> void set_attribute(boost::shared_ptr<hid_t> location, + const std::string& name, const T& v) { + bob::io::base::HDF5Type dest_type(v); + write_attribute(location, name, dest_type, + reinterpret_cast<const void*>(&v)); + } + + /** + * Reads an attribute from the current group. Raises an error if such + * attribute does not exist on the group. To check for existence, use + * has_attribute(). + */ + template <typename T> T get_attribute(const boost::shared_ptr<hid_t> location, + const std::string& name) { + T v; + bob::io::base::HDF5Type dest_type(v); + read_attribute(location, name, dest_type, reinterpret_cast<void*>(&v)); + return v; + } + + /** + * Checks if a certain attribute exists in this location. + */ + bool has_attribute(const boost::shared_ptr<hid_t> location, + const std::string& name); + + /** + * Deletes an attribute from a location. + */ + void delete_attribute(boost::shared_ptr<hid_t> location, + const std::string& name); + + /** + * Lists all attributes and associated types currently available somewhere + */ + void list_attributes(boost::shared_ptr<hid_t> location, + std::map<std::string, bob::io::base::HDF5Type>& attributes); + +}}}}} + +#endif /* BOB_IO_BASE_HDF5ATTRIBUTE_H */ diff --git a/bob/io/base/include/bob.io.base/HDF5Dataset.h b/bob/io/base/include/bob.io.base/HDF5Dataset.h new file mode 100644 index 0000000000000000000000000000000000000000..557241079ca1a289f8ea5d778cb082837ea648a9 --- /dev/null +++ b/bob/io/base/include/bob.io.base/HDF5Dataset.h @@ -0,0 +1,536 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Wed 29 Feb 17:27:45 2012 + * + * @brief Describes HDF5 datasets + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_HDF5DATASET_H +#define BOB_IO_BASE_HDF5DATASET_H + +#include <vector> + +#include <boost/shared_ptr.hpp> +#include <blitz/array.h> +#include <hdf5.h> + +#include <bob.core/assert.h> +#include <bob.core/array_copy.h> + +#include <bob.io.base/HDF5Attribute.h> +#include <bob.io.base/HDF5Types.h> + +namespace bob { namespace io { namespace base { namespace detail { namespace hdf5 { + + class File; + class Group; + + /** + * An HDF5 C-style dataset that knows how to close itself. + */ + class Dataset { + + public: //better to protect? + + /** + * Creates a new HDF5 dataset by reading its contents from a certain + * file. + */ + Dataset(boost::shared_ptr<Group> parent, const std::string& name); + + /** + * Creates a new HDF5 dataset from scratch and inserts it in the given + * group. If the Dataset already exists on file and the types are + * compatible, we attach to that type, otherwise, we raise an exception. + * + * If a new Dataset is to be created, you can also set if you would like + * to have as a list and the compression level. Note these settings have + * no effect if the Dataset already exists on file, in which case the + * current settings for that dataset are respected. The maximum value for + * the gzip compression is 9. The value of zero turns compression off + * (the default). + * + * The effect of setting "list" to false is that the created dataset: + * + * a) Will not be expandable (chunked) + * b) Will contain the exact number of dimensions of the input type. + * + * When you set "list" to true (the default), datasets are created with + * chunking automatically enabled (the chunk size is set to the size of + * the given variable) and an extra dimension is inserted to accommodate + * list operations. + */ + Dataset(boost::shared_ptr<Group> parent, const std::string& name, + const bob::io::base::HDF5Type& type, bool list=true, + size_t compression=0); + + public: //api + + /** + * Destructor virtualization + */ + virtual ~Dataset(); + + /** + * Returns the number of objects installed at this dataset from the + * perspective of the default compatible type. + */ + size_t size() const; + + /** + * Returns the number of objects installed at this dataset from the + * perspective of the default compatible type. If the given type is not + * compatible, raises a type error. + */ + size_t size(const bob::io::base::HDF5Type& type) const; + + /** + * Get parent group + */ + virtual const boost::shared_ptr<Group> parent() const; + virtual boost::shared_ptr<Group> parent(); + + /** + * Filename where I'm sitting + */ + virtual const std::string& filename() const; + + /** + * Full path to myself. Constructed each time it is called. + */ + virtual std::string path() const; + + /** + * Path with filename. Constructed each time it is called. + */ + virtual std::string url() const; + + /** + * Access file + */ + virtual const boost::shared_ptr<File> file() const; + virtual boost::shared_ptr<File> file(); + + /** + * My name + */ + virtual const std::string& name() const { + return m_name; + } + + /** + * Accesses the current location id of this dataset + */ + const boost::shared_ptr<hid_t> location() const { + return m_id; + } + boost::shared_ptr<hid_t> location() { + return m_id; + } + + /** + * DATA READING FUNCTIONALITY + */ + + /** + * Reads data from the file into a scalar. The conditions bellow have to + * be respected: + * + * a. My internal shape is 1D **OR** my internal shape is 2D, but the + * extent of the second dimension is 1. + * b. The indexed position exists + * + * If the internal shape is not like defined above, raises a type error. + * If the indexed position does not exist, raises an index error. + */ + template <typename T> void read(size_t index, T& value) { + bob::io::base::HDF5Type dest_type(value); + read_buffer(index, dest_type, reinterpret_cast<void*>(&value)); + } + + /** + * Reads data from the file into a scalar (allocated internally). The + * same conditions as for read(index, value) apply. + */ + template <typename T> T read(size_t index) { + T retval; + read(index, retval); + return retval; + } + + /** + * Reads data from the file into a scalar. This is equivalent to using + * read(0). The same conditions as for read(index=0, value) apply. + */ + template <typename T> T read() { + T retval; + read(0, retval); + return retval; + } + + /** + * Reads data from the file into a array. The following conditions have + * to be respected: + * + * a. My internal shape is the same as the shape of the given value + * **OR** my internal shape has one more dimension as the given value. + * In this case, the first dimension of the internal shape is + * considered to be an index and the remaining shape values the + * dimension of the value to be read. The given array has to be + * compatible with this re-defined N-1 shape. + * b. The indexed position exists + * + * If the internal shape is not like defined above, raises a type error. + * If the index does not exist, raises an index error. + * + * @param index Which of the arrays to read in the current dataset, by + * order + * @param value The output array data will be stored inside this + * variable. This variable has to be a zero-based C-style contiguous + * storage array. If that is not the case, we will raise an exception. + */ + template <typename T, int N> + void readArray(size_t index, blitz::Array<T,N>& value) { + bob::core::array::assertCZeroBaseContiguous(value); + bob::io::base::HDF5Type dest_type(value); + read_buffer(index, dest_type, reinterpret_cast<void*>(value.data())); + } + + /** + * Reads data from the file into an array allocated dynamically. The same + * conditions as for readArray(index, value) apply. + * + * @param index Which of the arrays to read in the current dataset, by + * order + */ + template <typename T, int N> + blitz::Array<T,N> readArray(size_t index) { + for (size_t k=m_descr.size(); k>0; --k) { + const bob::io::base::HDF5Shape& S = m_descr[k-1].type.shape(); + if(S.n() == N) { + blitz::TinyVector<int,N> shape; + S.set(shape); + blitz::Array<T,N> retval(shape); + readArray(index, retval); + return retval; + } + } + boost::format m("trying to read or write `%s' at `%s' that only accepts `%s'"); + m % "unknown dynamic shape" % url() % m_descr[0].type.str(); + throw std::runtime_error(m.str()); + } + + /** + * Reads data from the file into a array. This is equivalent to using + * readArray(0, value). The same conditions as for readArray(index=0, + * value) apply. + * + * @param value The output array data will be stored inside this + * variable. This variable has to be a zero-based C-style contiguous + * storage array. If that is not the case, we will raise an exception. + */ + template <typename T, int N> + void readArray(blitz::Array<T,N>& value) { + readArray(0, value); + } + + /** + * Reads data from the file into a array. This is equivalent to using + * readArray(0). The same conditions as for readArray(index=0, value) + * apply. + */ + template <typename T, int N> + blitz::Array<T,N> readArray() { + return readArray<T,N>(0); + } + + /** + * DATA WRITING FUNCTIONALITY + */ + + /** + * Modifies the value of a scalar inside the file. Modifying a value + * requires that the expected internal shape for this dataset and the + * shape of the given scalar are consistent. To replace a scalar the + * conditions bellow have to be respected: + * + * a. The internal shape is 1D **OR** the internal shape is 2D, but the + * second dimension of the internal shape has is extent == 1. + * b. The given indexing position exists + * + * If the above conditions are not met, an exception is raised. + */ + template <typename T> void replace(size_t index, const T& value) { + bob::io::base::HDF5Type dest_type(value); + write_buffer(index, dest_type, reinterpret_cast<const void*>(&value)); + } + + /** + * Modifies the value of a scalar inside the file. This is equivalent to + * using replace(0, value). The same conditions as for replace(index=0, + * value) apply. + */ + template <typename T> void replace(const T& value) { + replace(0, value); + } + + /** + * Inserts a scalar in the current (existing ;-) dataset. This will + * trigger writing data to the file. Adding a scalar value requires that + * the expected internal shape for this dataset and the shape of the + * given scalar are consistent. To add a scalar the conditions + * bellow have to be respected: + * + * a. The internal shape is 1D **OR** the internal shape is 2D, but the + * second dimension of the internal shape has is extent == 1. + * b. This dataset is expandible (chunked) + * + * If the above conditions are not met, an exception is raised. + */ + template <typename T> void add(const T& value) { + bob::io::base::HDF5Type dest_type(value); + extend_buffer(dest_type, reinterpret_cast<const void*>(&value)); + } + + /** + * Replaces data at the file using a new array. Replacing an existing + * array requires shape consistence. The following conditions should be + * met: + * + * a. My internal shape is the same as the shape of the given value + * **OR** my internal shape has one more dimension as the given value. + * In this case, the first dimension of the internal shape is + * considered to be an index and the remaining shape values the + * dimension of the value to be read. The given array has to be + * compatible with this re-defined N-1 shape. + * b. The given indexing position exists. + * + * If the internal shape is not like defined above, raises a type error. + * If the indexed position does not exist, raises an index error. + * + * @param index Which of the arrays to read in the current dataset, by + * order + * @param value The output array data will be stored inside this + * variable. This variable has to be a zero-based C-style contiguous + * storage array. If that is not the case, we will raise an exception. + */ + template <typename T, int N> + void replaceArray(size_t index, const blitz::Array<T,N>& value) { + bob::io::base::HDF5Type dest_type(value); + if(!bob::core::array::isCZeroBaseContiguous(value)) { + blitz::Array<T,N> tmp = bob::core::array::ccopy(value); + write_buffer(index, dest_type, reinterpret_cast<const void*>(tmp.data())); + } + else { + write_buffer(index, dest_type, + reinterpret_cast<const void*>(value.data())); + } + } + + /** + * Replaces data at the file using a new array. This is equivalent to + * calling replaceArray(0, value). The conditions for + * replaceArray(index=0, value) apply. + * + * @param value The output array data will be stored inside this + * variable. This variable has to be a zero-based C-style contiguous + * storage array. If that is not the case, we will raise an exception. + */ + template <typename T, int N> + void replaceArray(const blitz::Array<T,N>& value) { + replaceArray(0, value); + } + + /** + * Appends a array in a certain subdirectory of the file. If that + * subdirectory (or a "group" in HDF5 parlance) does not exist, it is + * created. If the dataset does not exist, it is created, otherwise, we + * append to it. In this case, the dimensionality of the scalar has to be + * compatible with the existing dataset shape (or "dataspace" in HDF5 + * parlance). If you want to do this, first unlink and than use one of + * the add() methods. + */ + template <typename T, int N> + void addArray(const blitz::Array<T,N>& value) { + bob::io::base::HDF5Type dest_type(value); + if(!bob::core::array::isCZeroBaseContiguous(value)) { + blitz::Array<T,N> tmp = bob::core::array::ccopy(value); + extend_buffer(dest_type, reinterpret_cast<const void*>(tmp.data())); + } + else { + extend_buffer(dest_type, reinterpret_cast<const void*>(value.data())); + } + } + + private: //apis + + /** + * Selects a bit of the file to be affected at the next read or write + * operation. This method encapsulate calls to H5Sselect_hyperslab(). + * + * The index is checked for existence as well as the consistence of the + * destination type. + */ + std::vector<bob::io::base::HDF5Descriptor>::iterator select (size_t index, + const bob::io::base::HDF5Type& dest); + + public: //direct access for other bindings -- don't use these! + + /** + * Reads a previously selected area into the given (user) buffer. + */ + void read_buffer (size_t index, const bob::io::base::HDF5Type& dest, void* buffer); + + /** + * Writes the contents of a given buffer into the file. The area that the + * data will occupy should have been selected beforehand. + */ + void write_buffer (size_t index, const bob::io::base::HDF5Type& dest, + const void* buffer); + + /** + * Extend the dataset with one extra variable. + */ + void extend_buffer (const bob::io::base::HDF5Type& dest, const void* buffer); + + public: //attribute support + + /** + * Gets the current type set for an attribute + */ + void gettype_attribute(const std::string& name, + HDF5Type& type) const; + + /** + * Sets a scalar attribute on the current group. Setting an existing + * attribute overwrites its value. + */ + template <typename T> void set_attribute(const std::string& name, + const T& v) { + bob::io::base::HDF5Type dest_type(v); + write_attribute(name, dest_type, reinterpret_cast<const void*>(&v)); + } + + /** + * Reads an attribute from the current dataset. Raises an error if such + * attribute does not exist on the group. To check for existence, use + * has_attribute(). + */ + template <typename T> T get_attribute(const std::string& name) const { + T v; + bob::io::base::HDF5Type dest_type(v); + read_attribute(name, dest_type, reinterpret_cast<void*>(&v)); + return v; + } + + /** + * Checks if a certain attribute exists in this group. + */ + bool has_attribute(const std::string& name) const; + + /** + * Deletes an attribute + */ + void delete_attribute(const std::string& name); + + /** + * List attributes available on this dataset. + */ + void list_attributes(std::map<std::string, bob::io::base::HDF5Type>& attributes) const; + + public: //array attribute support + + /** + * Sets a array attribute on the current group. Setting an existing + * attribute overwrites its value. If the attribute exists it is erased + * and re-written. + */ + template <typename T, int N> void set_array_attribute(const std::string& name, + const blitz::Array<T,N>& v) { + bob::io::base::HDF5Type dest_type(v); + if(!bob::core::array::isCZeroBaseContiguous(v)) { + blitz::Array<T,N> tmp = bob::core::array::ccopy(v); + write_attribute(name, dest_type, reinterpret_cast<const void*>(tmp.data())); + } + else { + write_attribute(name, dest_type, reinterpret_cast<const void*>(v.data())); + } + } + + /** + * Reads an attribute from the current dataset. Raises an error if such + * attribute does not exist on the group. To check for existence, use + * has_attribute(). + */ + template <typename T, int N> blitz::Array<T,N> get_array_attribute(const std::string& name) const { + blitz::Array<T,N> v; + bob::io::base::HDF5Type dest_type(v); + read_attribute(name, dest_type, reinterpret_cast<void*>(v.data())); + return v; + } + + /** + * Reads an attribute from the current dataset. Places the data in an + * already allocated array. + */ + template <typename T, int N> void get_array_attribute(const std::string& name, + blitz::Array<T,N>& v) const { + bob::io::base::HDF5Type dest_type(v); + read_attribute(name, dest_type, reinterpret_cast<void*>(v.data())); + } + + public: //buffer attribute support + + /** + * Reads an attribute into a user buffer. It is the user's responsibility + * to have a buffer that represents the given type. + */ + void read_attribute (const std::string& name, + const bob::io::base::HDF5Type& dest, void* buffer) const; + + /** + * Writes the contents of a given buffer into the attribute. + */ + void write_attribute (const std::string& name, + const bob::io::base::HDF5Type& dest, const void* buffer); + + private: //not implemented + + /** + * Copies the contents of an existing dataset -- not implemented + */ + Dataset(const Dataset& other); + + /** + * Assigns the contents of an existing dataset to myself -- not + * implemented + */ + Dataset& operator= (const Dataset& other); + + public: //representation + + boost::weak_ptr<Group> m_parent; ///< my parent group + std::string m_name; ///< name of this object + boost::shared_ptr<hid_t> m_id; ///< the HDF5 Dataset this type points to + boost::shared_ptr<hid_t> m_dt; ///< the datatype of this Dataset + boost::shared_ptr<hid_t> m_filespace; ///< the "file" space for this set + std::vector<bob::io::base::HDF5Descriptor> m_descr; ///< read/write descr.'s + boost::shared_ptr<hid_t> m_memspace; ///< read/write space + + }; + + /** + * std::string specialization + */ + template <> void Dataset::read<std::string>(size_t index, std::string& value); + template <> void Dataset::replace<std::string>(size_t index, const std::string& value); + template <> void Dataset::add<std::string>(const std::string& value); + template <> void Dataset::set_attribute<std::string>(const std::string& name, const std::string& v); + template <> std::string Dataset::get_attribute(const std::string& name) const; + +}}}}} + + +#endif /* BOB_IO_BASE_HDF5DATASET_H */ diff --git a/bob/io/base/include/bob.io.base/HDF5File.h b/bob/io/base/include/bob.io.base/HDF5File.h new file mode 100644 index 0000000000000000000000000000000000000000..31c65944e5088b51af3fe1307e270e466b71052a --- /dev/null +++ b/bob/io/base/include/bob.io.base/HDF5File.h @@ -0,0 +1,558 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief bob support for HDF5 files. HDF5 is a open standard for + * self-describing data files. You can get more information in this webpage: + * http://www.hdfgroup.org/HDF5 + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_HDF5FILE_H +#define BOB_IO_BASE_HDF5FILE_H + +#include <boost/format.hpp> + +#include <bob.io.base/HDF5Utils.h> + +namespace bob { namespace io { namespace base { + + /** + * This is the main type for interfacing bob with HDF5. It allows the user + * to create, delete and modify data objects using a very-level API. The + * total functionality provided by this API is, of course, much smaller than + * what is provided if you use the HDF5 C-APIs directly, but is much simpler + * as well. + */ + class HDF5File { + + public: + + /** + * This enumeration defines the different values with which you can open + * the files with + */ + typedef enum mode_t { + in = 0, //H5F_ACC_RDONLY < can only read file + inout = 1, //H5F_ACC_RDWR < open file for reading and writing + trunc = 2, //H5F_ACC_TRUNC < if file exists, truncate it and open + excl = 4 //H5F_ACC_EXCL < if file exists, raise, otherwise == inout + } mode_t; + + public: //api + + /** + * Constructor, starts a new HDF5File object giving it a file name and an + * action: excl/trunc/in/inout + */ + HDF5File (const std::string& filename, mode_t mode); + + /** + * Constructor, starts a new HDF5File object giving it a file name and an + * action: 'r' (read-only), 'a' (read/write/append), 'w' (read/write/truncate) or 'x' (read/write/exclusive) + */ + HDF5File (const std::string& filename, const char mode='r'); + + /** + * Destructor virtualization + */ + virtual ~HDF5File(); + + /** + * Flushes the current content of the file to disk. + */ + void flush() {m_file->flush();} + + /** + * Closes the file after writing its content to disk + */ + void close(); + + /** + * Changes the current prefix path. When this object is started, it + * points to the root of the file. If you set this to a different + * value, it will be used as a prefix to any subsequent operation on + * relative paths until you reset it. + * + * @param path If path starts with '/', it is treated as an absolute + * path. '..' and '.' are supported. This object should be a std::string. + * If the value is relative, it is added to the current path. + * + * @note All operations taking a relative path, following a cd(), will be + * considered relative to the value returned by cwd(). + */ + void cd(const std::string& path); + + /** + * Tells if a certain directory exists in a file. + */ + bool hasGroup(const std::string& path); + + /** + * Creates a directory within the file. It is an error to recreate a path + * that already exists. You can check this with hasGroup() + */ + void createGroup(const std::string& path); + + /** + * Returns the name of the file currently opened + */ + const std::string filename() const {check_open(); return m_file->filename(); } + + /** + * Checks if the file is open for writing + */ + bool writable() const {check_open(); return m_file->writable();} + + /** + * Returns the current working path, fully resolved. This is + * re-calculated every time you call this method. + */ + std::string cwd() const; + + /** + * Tells if we have a variable with the given name inside the HDF5 file. + * If the file path given is a relative file path, it is taken w.r.t. the + * current working directory, as returned by cwd(). + */ + bool contains(const std::string& path) const; + + /** + * Describe a certain dataset path. If the file path is a relative one, + * it is taken w.r.t. the current working directory, as returned by + * cwd(). + */ + const std::vector<HDF5Descriptor>& describe (const std::string& path) const; + + /** + * Unlinks a particular dataset from the file. Note that this will + * not erase the data on the current file as that functionality is not + * provided by HDF5. To actually reclaim the space occupied by the + * unlinked structure, you must re-save this file to another file. The + * new file will not contain the data of any dangling datasets (datasets + * w/o names or links). Relative paths are allowed. + */ + void unlink (const std::string& path); + + /** + * Renames an existing dataset + */ + void rename (const std::string& from, const std::string& to); + + /** + * Accesses all existing paths in one shot. Input has to be a std + * container with T = std::string and accepting push_back() + */ + template <typename T> void paths (T& container, const bool relative = false) const { + m_cwd->dataset_paths(container); + check_open(); + if (relative){ + const std::string d = cwd(); + const int len = d.length()+1; + for (typename T::iterator it = container.begin(); it != container.end(); ++it){ + // assert that the string contains the current path + assert(it->find(d) == 0); + // subtract current path + *it = it->substr(len); + } + } + } + + /** + * Accesses all existing paths in one shot. Input has to be a std + * container with T = std::string and accepting push_back() + */ + template <typename T> void sub_groups (T& container, bool relative = false, bool recursive = true) const { + check_open(); + m_cwd->subgroup_paths(container, recursive); + if (!relative){ + const std::string d = cwd() + "/"; + for (typename T::iterator it = container.begin(); it != container.end(); ++it){ + // add current path + *it = d + *it; + } + } + } + + /** + * Copies the contents of the other file to this file. This is a blind + * operation, so we try to copy everything from the given file to the + * current one. It is the user responsibility to make sure the "path" + * slots in the other file are not already taken. If that is detected, an + * exception will be raised. + * + * This operation will be conducted w.r.t. the currently set prefix path + * (verifiable using cwd()). + */ + void copy (HDF5File& other); + + /** + * Reads data from the file into a scalar. Raises an exception if the + * type is incompatible. Relative paths are accepted. + */ + template <typename T> + void read(const std::string& path, size_t pos, T& value) { + check_open(); + (*m_cwd)[path]->read(pos, value); + } + + /** + * Reads data from the file into a scalar. Returns by copy. Raises if the + * type T is incompatible. Relative paths are accepted. + */ + template <typename T> T read(const std::string& path, size_t pos) { + check_open(); + return (*m_cwd)[path]->read<T>(pos); + } + + /** + * Reads data from the file into a scalar. Raises an exception if the + * type is incompatible. Relative paths are accepted. Calling this method + * is equivalent to calling read(path, 0). Returns by copy. + */ + template <typename T> T read(const std::string& path) { + return read<T>(path, 0); + } + + /** + * Reads data from the file into a array. Raises an exception if the type + * is incompatible. Relative paths are accepted. + */ + template <typename T, int N> void readArray(const std::string& path, + size_t pos, blitz::Array<T,N>& value) { + check_open(); + (*m_cwd)[path]->readArray(pos, value); + } + + /** + * Reads data from the file into a array. Raises an exception if the type + * is incompatible. Relative paths are accepted. Destination array is + * allocated internally and returned by value. + */ + template <typename T, int N> blitz::Array<T,N> readArray + (const std::string& path, size_t pos) { + check_open(); + return (*m_cwd)[path]->readArray<T,N>(pos); + } + + /** + * Reads data from the file into a array. Raises an exception if the type + * is incompatible. Relative paths are accepted. Calling this method is + * equivalent to calling readArray(path, 0, value). + */ + template <typename T, int N> void readArray(const std::string& path, + blitz::Array<T,N>& value) { + readArray(path, 0, value); + } + + /** + * Reads data from the file into a array. Raises an exception if the type + * is incompatible. Relative paths are accepted. Calling this method is + * equivalent to calling readArray(path, 0). Destination array is + * allocated internally. + */ + template <typename T, int N> blitz::Array<T,N> readArray + (const std::string& path) { + return readArray<T,N>(path, 0); + } + + /** + * Modifies the value of a scalar inside the file. Relative paths are + * accepted. + */ + template <typename T> void replace(const std::string& path, size_t pos, + const T& value) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot replace value at dataset '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + (*m_cwd)[path]->replace(pos, value); + } + + /** + * Modifies the value of a scalar inside the file. Relative paths are + * accepted. Calling this method is equivalent to calling replace(path, + * 0, value). + */ + template <typename T> void replace(const std::string& path, + const T& value) { + replace(path, 0, value); + } + + /** + * Modifies the value of a array inside the file. Relative paths are + * accepted. + */ + template <typename T> void replaceArray(const std::string& path, + size_t pos, const T& value) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot replace array at dataset '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + (*m_cwd)[path]->replaceArray(pos, value); + } + + /** + * Modifies the value of a array inside the file. Relative paths are + * accepted. Calling this method is equivalent to calling + * replaceArray(path, 0, value). + */ + template <typename T> void replaceArray(const std::string& path, + const T& value) { + replaceArray(path, 0, value); + } + + /** + * Appends a scalar in a dataset. If the dataset does not yet exist, one + * is created with the type characteristics. Relative paths are accepted. + */ + template <typename T> void append(const std::string& path, + const T& value) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot append value to dataset '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + if (!contains(path)) m_cwd->create_dataset(path, bob::io::base::HDF5Type(value), true, 0); + (*m_cwd)[path]->add(value); + } + + /** + * Appends a array in a dataset. If the dataset does not yet exist, one + * is created with the type characteristics. Relative paths are accepted. + * + * If a new Dataset is to be created, you can also set the compression + * level. Note this setting has no effect if the Dataset already exists + * on file, in which case the current setting for that dataset is + * respected. The maximum value for the gzip compression is 9. The value + * of zero turns compression off (the default). + */ + template <typename T> void appendArray(const std::string& path, + const T& value, size_t compression=0) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot append array to dataset '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + if (!contains(path)) m_cwd->create_dataset(path, bob::io::base::HDF5Type(value), true, compression); + (*m_cwd)[path]->addArray(value); + } + + /** + * Sets the scalar at position 0 to the given value. This method is + * equivalent to checking if the scalar at position 0 exists and then + * replacing it. If the path does not exist, we append the new scalar. + */ + template <typename T> void set(const std::string& path, const T& value) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot set value at dataset '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + if (!contains(path)) m_cwd->create_dataset(path, bob::io::base::HDF5Type(value), false, 0); + (*m_cwd)[path]->replace(0, value); + } + + /** + * Sets the array at position 0 to the given value. This method is + * equivalent to checking if the array at position 0 exists and then + * replacing it. If the path does not exist, we append the new array. + * + * If a new Dataset is to be created, you can also set the compression + * level. Note this setting has no effect if the Dataset already exists + * on file, in which case the current setting for that dataset is + * respected. The maximum value for the gzip compression is 9. The value + * of zero turns compression off (the default). + */ + template <typename T> void setArray(const std::string& path, + const T& value, size_t compression=0) { + check_open(); + if (!m_file->writable()) { + boost::format m("cannot set array at dataset '%s' at path '%s' of file '%s' because it is not writeable"); + m % path % m_cwd->path() % m_file->filename(); + throw std::runtime_error(m.str()); + } + if (!contains(path)) m_cwd->create_dataset(path, bob::io::base::HDF5Type(value), false, compression); + (*m_cwd)[path]->replaceArray(0, value); + } + + public: //api shortcuts to deal with buffers -- avoid these at all costs! + + /** + * creates a new dataset. If the dataset already exists, checks if the + * existing data is compatible with the required type. + */ + void create (const std::string& path, const HDF5Type& dest, bool list, + size_t compression); + + /** + * Reads data from the file into a buffer. The given buffer contains + * sufficient space to hold the type described in "dest". Raises an + * exception if the type is incompatible with the expected data in the + * file. Relative paths are accepted. + */ + void read_buffer (const std::string& path, size_t pos, + const HDF5Type& type, void* buffer) const; + + /** + * writes the contents of a given buffer into the file. the area that the + * data will occupy should have been selected beforehand. + */ + void write_buffer (const std::string& path, size_t pos, + const HDF5Type& type, const void* buffer); + + /** + * extend the dataset with one extra variable. + */ + void extend_buffer (const std::string& path, + const HDF5Type& type, const void* buffer); + + /** + * Copy construct an already opened HDF5File; just creates a shallow copy + * of the file + */ + HDF5File (const HDF5File& other); + + /** + * Drop the current settings and load new ones from the other file. + */ + HDF5File& operator= (const HDF5File& other); + + public: // attribute handling + + /** + * Tells if there is an attribute with a given name on the given path, + * relative to the current location, possibly. + */ + bool hasAttribute(const std::string& path, const std::string& name) const; + + /** + * Reads data from an attribute into a scalar. If the attribute does not + * exist, raise an exception. Raises a TypeError if the types are not + * compatible. + */ + template <typename T> + void getAttribute(const std::string& path, const std::string& name, + T& value) const { + check_open(); + if (m_cwd->has_dataset(path)) { + value = (*m_cwd)[path]->get_attribute<T>(name); + } + else if (m_cwd->has_group(path)) { + value = m_cwd->cd(path)->get_attribute<T>(name); + } + else { + boost::format m("cannot read attribute '%s' at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % name % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } + } + + /** + * Reads data from an attribute into an array. If the attribute does not + * exist, raise an exception. Raises a type error if the types are not + * compatible. + */ + template <typename T, int N> + void getArrayAttribute(const std::string& path, + const std::string& name, blitz::Array<T,N>& value) const { + check_open(); + if (m_cwd->has_dataset(path)) { + value = (*m_cwd)[path]->get_array_attribute<T,N>(name); + } + else if (m_cwd->has_group(path)) { + value = m_cwd->cd(path)->get_array_attribute<T,N>(name); + } + else { + boost::format m("cannot read (array) attribute '%s' at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % name % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } + } + + /** + * Writes a scalar as an attribute to a path in this file. + */ + template <typename T> + void setAttribute(const std::string& path, const std::string& name, + const T value) { + check_open(); + if (m_cwd->has_dataset(path)) { + (*m_cwd)[path]->set_attribute(name, value); + } + else if (m_cwd->has_group(path)) { + m_cwd->cd(path)->set_attribute(name, value); + } + else { + boost::format m("cannot write attribute '%s' at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % name % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } + } + + /** + * Writes an array as an attribute to a path in this file. + */ + template <typename T, int N> + void setArrayAttribute(const std::string& path, + const std::string& name, const blitz::Array<T,N>& value) { + check_open(); + if (m_cwd->has_dataset(path)) { + (*m_cwd)[path]->set_array_attribute(name, value); + } + else if (m_cwd->has_group(path)) { + m_cwd->cd(path)->set_array_attribute(name, value); + } + else { + boost::format m("cannot write (array) attribute '%s' at path/dataset '%s' of file '%s' (cwd: '%s') because this path/dataset does not currently exist"); + m % name % path % m_file->filename() % m_cwd->path(); + throw std::runtime_error(m.str()); + } + } + + /** + * Gets the type information of an attribute + */ + void getAttributeType(const std::string& path, + const std::string& name, bob::io::base::HDF5Type& type) const; + + /** + * Deletes a given attribute + */ + void deleteAttribute(const std::string& path, + const std::string& name); + + /** + * List attributes available on a certain object. + */ + void listAttributes(const std::string& path, + std::map<std::string, bob::io::base::HDF5Type>& attributes) const; + + public: //raw accessors to attributes + + void read_attribute(const std::string& path, const std::string& name, + const bob::io::base::HDF5Type& type, void* buffer) const; + + void write_attribute(const std::string& path, const std::string& name, + const bob::io::base::HDF5Type& type, const void* buffer); + + private: //representation + + void check_open() const; + + boost::shared_ptr<detail::hdf5::File> m_file; ///< the file itself + boost::shared_ptr<detail::hdf5::Group> m_cwd; ///< current working dir + + }; + +}}} + +#endif /* BOB_IO_BASE_HDF5FILE_H */ diff --git a/bob/io/base/include/bob.io.base/HDF5Group.h b/bob/io/base/include/bob.io.base/HDF5Group.h new file mode 100644 index 0000000000000000000000000000000000000000..9dcd7a6e76cccdb40c1e4b98b4efdec63bcc52ef --- /dev/null +++ b/bob/io/base/include/bob.io.base/HDF5Group.h @@ -0,0 +1,468 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Wed 29 Feb 17:24:10 2012 + * + * @brief Describes HDF5 groups. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_HDF5GROUP_H +#define BOB_IO_BASE_HDF5GROUP_H + +#include <boost/shared_ptr.hpp> +#include <boost/enable_shared_from_this.hpp> +#include <hdf5.h> + +#include <bob.io.base/HDF5Types.h> +#include <bob.io.base/HDF5Dataset.h> +#include <bob.io.base/HDF5Attribute.h> + +namespace bob { namespace io { namespace base { namespace detail { namespace hdf5 { + + class File; + + /** + * A group represents a path inside the HDF5 file. It can contain Datasets or + * other Groups. + */ + class Group: public boost::enable_shared_from_this<Group> { + + public: //better to protect? + + /** + * Creates a new group in a given parent. + */ + Group(boost::shared_ptr<Group> parent, const std::string& name); + + /** + * Binds to an existing group in a parent, reads all the group contents + * recursively. Note that the last parameter is there only to + * differentiate from the above constructor. It is ignored. + */ + Group(boost::shared_ptr<Group> parent, const std::string& name, + bool open); + + /** + * Constructor used by the root group, just open the root group + */ + Group(boost::shared_ptr<File> parent); + + /** + * Recursively open sub-groups and datasets. This cannot be done at the + * constructor because of a enable_shared_from_this<> restriction that + * results in a bad weak pointer exception being raised. + */ + void open_recursively(); + + public: //api + + /** + * D'tor - presently, does nothing + */ + virtual ~Group(); + + /** + * Get parent group + */ + virtual const boost::shared_ptr<Group> parent() const; + virtual boost::shared_ptr<Group> parent(); + + /** + * Filename where I'm sitting + */ + virtual const std::string& filename() const; + + /** + * Full path to myself. Constructed each time it is called. + */ + virtual std::string path() const; + + /** + * Access file + */ + virtual const boost::shared_ptr<File> file() const; + virtual boost::shared_ptr<File> file(); + + /** + * My name + */ + virtual const std::string& name() const { + return m_name; + } + + /** + * Deletes all children nodes and properties in this group. + * + * Note that removing data already written in a file will only be + * effective in terms of space saving when you actually re-write that + * file. This instruction just unlinks all data from this group and makes + * them inaccessible to any further read operation. + */ + virtual void reset(); + + /** + * Accesses the current location id of this group + */ + const boost::shared_ptr<hid_t> location() const { + return m_id; + } + + boost::shared_ptr<hid_t> location() { + return m_id; + } + + /** + * Path with filename. Constructed each time it is called. + */ + virtual std::string url() const; + + /** + * move up-down on the group hierarchy + */ + virtual boost::shared_ptr<Group> cd(const std::string& path); + virtual const boost::shared_ptr<Group> cd(const std::string& path) const; + + /** + * Get a mapping of all child groups + */ + virtual const std::map<std::string, boost::shared_ptr<Group> >& groups() + const { + return m_groups; + } + + /** + * Create a new subgroup with a given name. + */ + virtual boost::shared_ptr<Group> create_group(const std::string& name); + + /** + * Deletes an existing subgroup with a given name. If a relative name is + * given, it is interpreted w.r.t. to this group. + * + * Note that removing data already written in a file will only be + * effective in terms of space saving when you actually re-write that + * file. This instruction just unlinks all data from this group and makes + * them inaccessible to any further read operation. + */ + virtual void remove_group(const std::string& path); + + /** + * Rename an existing group under me. + */ + virtual void rename_group(const std::string& from, const std::string& to); + + /** + * Copies all data from an existing group into myself, creating a new + * subgroup, by default, with the same name as the other group. If a + * relative name is given, it is interpreted w.r.t. to this group. + * + * If an empty string is given as "dir", copies the other group name. + */ + virtual void copy_group(const boost::shared_ptr<Group> other, const + std::string& path=""); + + /** + * Says if a group with a certain path exists in this group. + */ + virtual bool has_group(const std::string& path) const; + + /** + * Get all datasets attached to this group + */ + virtual const std::map<std::string, boost::shared_ptr<Dataset> >& + datasets() const { + return m_datasets; + } + + /** + * Creates a new HDF5 dataset from scratch and inserts it in this group. + * If the Dataset already exists on file and the types are compatible, we + * attach to that type, otherwise, we raise an exception. + * + * You can set if you would like to have the dataset created as a list + * and the compression level. + * + * The effect of setting "list" to false is that the created dataset: + * + * a) Will not be expandible (chunked) b) Will contain the exact number + * of dimensions of the input type. + * + * When you set "list" to true (the default), datasets are created with + * chunking automatically enabled (the chunk size is set to the size of + * the given variable) and an extra dimension is inserted to accomodate + * list operations. + */ + virtual boost::shared_ptr<Dataset> create_dataset + (const std::string& path, const bob::io::base::HDF5Type& type, bool list=true, + size_t compression=0); + + /** + * Deletes a dataset in this group + * + * Note that removing data already written in a file will only be + * effective in terms of space saving when you actually re-write that + * file. This instruction just unlinks all data from this group and makes + * them inaccessible to any further read operation. + */ + virtual void remove_dataset(const std::string& path); + + /** + * Rename an existing dataset under me. + */ + virtual void rename_dataset(const std::string& from, + const std::string& to); + + /** + * Copies the contents of the given dataset into this. By default, use + * the same name. + */ + virtual void copy_dataset(const boost::shared_ptr<Dataset> other, + const std::string& path=""); + + /** + * Says if a dataset with a certain name exists in the current file. + */ + virtual bool has_dataset(const std::string& path) const; + + /** + * Accesses a certain dataset from this group + */ + boost::shared_ptr<Dataset> operator[] (const std::string& path); + const boost::shared_ptr<Dataset> operator[] (const std::string& path) const; + + /** + * Accesses all existing paths in one shot. Input has to be a std + * container with T = std::string and accepting push_back() + */ + template <typename T> void dataset_paths (T& container) const { + for (std::map<std::string, boost::shared_ptr<io::base::detail::hdf5::Dataset> >::const_iterator it=m_datasets.begin(); it != m_datasets.end(); ++it) container.push_back(it->second->path()); + for (std::map<std::string, boost::shared_ptr<io::base::detail::hdf5::Group> >::const_iterator it=m_groups.begin(); it != m_groups.end(); ++it) it->second->dataset_paths(container); + } + + /** + * Accesses all existing sub-groups in one shot. Input has to be a std + * container with T = std::string and accepting push_back() + */ + template <typename T> void subgroup_paths (T& container, bool recursive = true) const { + for (std::map<std::string, boost::shared_ptr<io::base::detail::hdf5::Group> >::const_iterator it=m_groups.begin(); it != m_groups.end(); ++it){ + container.push_back(it->first); + if (recursive){ + unsigned pos = container.size(); + it->second->subgroup_paths(container); + for (unsigned p = pos; p < container.size(); ++p){ + container[p] = it->first + "/" + container[p]; + } + } + } + } + + /** + * Callback function for group iteration. Two cases are blessed here: + * + * 1. Object is another group. In this case just instantiate the group and + * recursively iterate from there + * 2. Object is a dataset. Instantiate it. + * + * Only hard-links are considered. At the time being, no soft links. + */ + herr_t iterate_callback(hid_t group, const char *name, + const H5L_info_t *info); + + public: //attribute support + + /** + * Gets the current type set for an attribute + */ + void gettype_attribute(const std::string& name, HDF5Type& type) const; + + /** + * Sets a scalar attribute on the current group. Setting an existing + * attribute overwrites its value. + * + * @note Only simple scalars are supported for the time being + */ + template <typename T> void set_attribute(const std::string& name, + const T& v) { + bob::io::base::HDF5Type dest_type(v); + write_attribute(name, dest_type, reinterpret_cast<const void*>(&v)); + } + + /** + * Reads an attribute from the current group. Raises an error if such + * attribute does not exist on the group. To check for existence, use + * has_attribute(). + */ + template <typename T> T get_attribute(const std::string& name) const { + T v; + bob::io::base::HDF5Type dest_type(v); + read_attribute(name, dest_type, reinterpret_cast<void*>(&v)); + return v; + } + + /** + * Checks if a certain attribute exists in this group. + */ + bool has_attribute(const std::string& name) const; + + /** + * Deletes an attribute + */ + void delete_attribute(const std::string& name); + + /** + * List attributes available on this dataset. + */ + void list_attributes(std::map<std::string, bob::io::base::HDF5Type>& attributes) const; + + public: //array attribute support + + /** + * Sets a array attribute on the current group. Setting an existing + * attribute overwrites its value. If the attribute exists it is erased + * and re-written. + */ + template <typename T, int N> void set_array_attribute(const std::string& name, + const blitz::Array<T,N>& v) { + bob::io::base::HDF5Type dest_type(v); + if(!bob::core::array::isCZeroBaseContiguous(v)) { + blitz::Array<T,N> tmp = bob::core::array::ccopy(v); + write_attribute(name, dest_type, reinterpret_cast<const void*>(tmp.data())); + } + else { + write_attribute(name, dest_type, reinterpret_cast<const void*>(v.data())); + } + } + + /** + * Reads an attribute from the current dataset. Raises an error if such + * attribute does not exist on the group. To check for existence, use + * has_attribute(). + */ + template <typename T, int N> blitz::Array<T,N> get_array_attribute + (const std::string& name) const { + blitz::Array<T,N> v; + bob::io::base::HDF5Type dest_type(v); + read_attribute(name, dest_type, reinterpret_cast<void*>(v.data())); + return v; + } + + /** + * Reads an attribute from the current dataset. Places the data in an + * already allocated array. + */ + template <typename T, int N> void get_array_attribute + (const std::string& name, blitz::Array<T,N>& v) const { + bob::io::base::HDF5Type dest_type(v); + read_attribute(name, dest_type, reinterpret_cast<void*>(v.data())); + } + + public: //buffer attribute support + + /** + * Reads an attribute into a user buffer. It is the user's responsibility + * to have a buffer that represents the given type. + */ + void read_attribute (const std::string& name, + const bob::io::base::HDF5Type& dest, void* buffer) const; + + /** + * Writes the contents of a given buffer into the attribute. + */ + void write_attribute (const std::string& name, + const bob::io::base::HDF5Type& dest, const void* buffer); + + private: //not implemented + + /** + * Copies the contents of an existing group -- not implemented + */ + Group(const Group& other); + + /** + * Assigns the contents of an existing group to myself -- not + * implemented + */ + Group& operator= (const Group& other); + + private: //representation + + std::string m_name; ///< my name + boost::shared_ptr<hid_t> m_id; ///< the HDF5 Group this object points to + boost::weak_ptr<Group> m_parent; + std::map<std::string, boost::shared_ptr<Group> > m_groups; + std::map<std::string, boost::shared_ptr<Dataset> > m_datasets; + //std::map<std::string, boost::shared_ptr<Attribute> > m_attributes; + + }; + + /** + * The RootGroup is a special case of the Group object that is directly + * attached to the File (no parents). + */ + class RootGroup: public Group { + + public: //api + + /** + * Binds to the root group of a file. + */ + RootGroup(boost::shared_ptr<File> parent); + + /** + * D'tor - presently, does nothing + */ + virtual ~RootGroup(); + + /** + * Get parent group + */ + virtual const boost::shared_ptr<Group> parent() const { + return boost::shared_ptr<Group>(); + } + + /** + * Get parent group + */ + virtual boost::shared_ptr<Group> parent() { + return boost::shared_ptr<Group>(); + } + + /** + * Filename where I'm sitting + */ + virtual const std::string& filename() const; + + /** + * Full path to myself. Constructed each time it is called. + */ + virtual std::string path() const { + return ""; + } + + /** + * Access file + */ + virtual const boost::shared_ptr<File> file() const { + return m_parent.lock(); + } + + virtual boost::shared_ptr<File> file() { + return m_parent.lock(); + } + + private: //representation + + boost::weak_ptr<File> m_parent; ///< the file I belong to + + }; + + /** + * std::string specialization + */ + template <> void Group::set_attribute<std::string>(const std::string& name, const std::string& v); + template <> std::string Group::get_attribute(const std::string& name) const; + +}}}}} + +#endif /* BOB_IO_BASE_HDF5GROUP_H */ diff --git a/bob/io/base/include/bob.io.base/HDF5Types.h b/bob/io/base/include/bob.io.base/HDF5Types.h new file mode 100644 index 0000000000000000000000000000000000000000..d19eab341873aafd33d2d4342fede82f5d7d3c79 --- /dev/null +++ b/bob/io/base/include/bob.io.base/HDF5Types.h @@ -0,0 +1,508 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief A few helpers to handle HDF5 datasets in a more abstract way. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_HDF5TYPES_H +#define BOB_IO_BASE_HDF5TYPES_H + +#include <vector> +#include <string> +#include <boost/shared_ptr.hpp> +#include <blitz/array.h> +#include <hdf5.h> + +/** + * Checks if the version of HDF5 installed is greater or equal to some set of + * values. (extracted from hdf5-1.8.7) + */ +#ifndef H5_VERSION_GE +#define H5_VERSION_GE(Maj,Min,Rel) \ + (((H5_VERS_MAJOR==Maj) && (H5_VERS_MINOR==Min) && (H5_VERS_RELEASE>=Rel)) || \ + ((H5_VERS_MAJOR==Maj) && (H5_VERS_MINOR>Min)) || \ + (H5_VERS_MAJOR>Maj)) +#endif + +#include <bob.io.base/array.h> + +namespace bob { namespace io { namespace base { + + /** + * Supported types + */ + typedef enum hdf5type { + s=0, //std::string + b, //bool + i8, //int8_t + i16, //int16_t + i32, //int32_t + i64, //int64_t + u8, //uint8_t + u16, //uint16_t + u32, //uint32_t + u64, //uint64_t + f32, //float + f64, //double + f128, //long double + c64, //std::complex<float> + c128, //std::complex<double> + c256, //std::complex<long double> + unsupported //this must be last + } hdf5type; + + /** + * Converts a hdf5type enumeration into its string representation + */ + const char* stringize (hdf5type t); + + /** + * A wrapper to handle the HDF5 C-API error printing in a nicer way... + */ + class HDF5ErrorStack { + + public: //api + + /** + * Binds the HDF5Error to the current default error stack. + */ + HDF5ErrorStack (); + + /** + * Binds to a specific HDF5 error stack + */ + HDF5ErrorStack (hid_t stack); + + /** + * Destructor virtualization. + */ + virtual ~HDF5ErrorStack(); + + /** + * Returns the currently captured error stack + */ + inline std::vector<std::string>& get() { return m_err; } + + /** + * Clears the current error stack + */ + inline void clear() { m_err.clear(); } + + /** + * Sets muting + */ + inline void mute () { m_muted = true; } + inline void unmute () { m_muted = false; } + inline bool muted () const { return m_muted; } + + private: //not implemented + + HDF5ErrorStack(const HDF5ErrorStack& other); + + HDF5ErrorStack& operator= (const HDF5ErrorStack& other); + + private: //representation + hid_t m_stack; ///< the stack I'm observing + bool m_muted; ///< if I'm currently muted + std::vector<std::string> m_err; ///< the current captured stack + herr_t (*m_func)(hid_t, void*); ///< temporary cache + void* m_client_data; ///< temporary cache + + }; + + // Global default HDF5 error stack + extern const boost::shared_ptr<HDF5ErrorStack> DefaultHDF5ErrorStack; + + /** + * This class defines the shape type: a counter and a variable-size hsize_t + * array that contains the dimensionality of a certain array. Internally, we + * always allocate a fixed size vector with 12 positions (after the maximum + * number of dimensions of a blitz::Array<T,N> + 1). + */ + class HDF5Shape { + +# define MAX_HDF5SHAPE_SIZE 12 + + public: //api + + /** + * Builds a new shape with a certain size and values. The size has to be + * smaller than the maximum number of supported dimensions (12). + */ + template <typename T> HDF5Shape(const size_t n, const T* values): + m_n(n), m_shape() { + if (n > MAX_HDF5SHAPE_SIZE) + throw std::length_error("maximum number of dimensions exceeded"); + for (size_t i=0; i<n; ++i) m_shape[i] = values[i]; + } + + /** + * Builds a new shape with data from a blitz::TinyVector + */ + template <int N> HDF5Shape(const blitz::TinyVector<int,N>& vec): + m_n(N), m_shape() { + if (N > MAX_HDF5SHAPE_SIZE) + throw std::length_error("maximum number of dimensions exceeded"); + for (size_t i=0; i<N; ++i) m_shape[i] = vec[i]; + } + + /** + * Allocates the shape space, but fills all with zeros + */ + HDF5Shape (size_t n); + + /** + * Default constructor (m_n = 0, no shape) + */ + HDF5Shape (); + + /** + * Copy construct the shape + */ + HDF5Shape (const HDF5Shape& other); + + /** + * Virtual destructor + */ + virtual ~HDF5Shape(); + + /** + * Resets this new shape + */ + HDF5Shape& operator= (const HDF5Shape& other); + + /** + * Returns the current size of shape. If values are less than zero, the + * shape is not valid. + */ + inline size_t n () const { return m_n; } + + /** + * Returs a pointer to the first element of the shape + */ + inline const hsize_t* get() const { return m_shape; } + inline hsize_t* get() { return m_shape; } + + /** + * Copies the data from the other HDF5Shape. If the other shape is + * smaller, will copy up to the number of positions in the other shape, + * if it is bigger, will copy up to my number of positions. + */ + void copy(const HDF5Shape& other); + + /** + * Sets a TinyVector with the contents of this shape. If the tinyvector + * shape is smaller, will copy up to the number of positions in the + * current shape. If that is bigger, will copy up to my number of + * positions + */ + template <int N> void set (blitz::TinyVector<int,N>& v) const { + if (N >= m_n) for (size_t i=0; i<m_n; ++i) v[i] = m_shape[i]; + else for (size_t i=0; i<N; ++i) v[i] = m_shape[i]; + } + + /** + * Resets the current shape so it becomes invalid. + */ + void reset(); + + /** + * Accesses a certain position of this shape (unchecked!) + */ + inline const hsize_t& operator[] (size_t pos) const { return m_shape[pos]; } + inline hsize_t& operator[] (size_t pos) { return m_shape[pos]; } + + /** + * Left-shift a number of positions, decreases the total size. + */ + HDF5Shape& operator <<= (size_t pos); + + /** + * Right-shift a number of positions, increases the total size. New + * positions are filled with 1's (ones). + */ + HDF5Shape& operator >>= (size_t pos); + + /** + * Returns the product of all dimensions + */ + hsize_t product() const; + + /** + * Compares two shapes for equality + */ + bool operator== (const HDF5Shape& other) const; + bool operator!= (const HDF5Shape& other) const; + + /** + * Compares a shape with a TinyVector for equality + */ + template <int N> + bool operator== (const blitz::TinyVector<int,N>& other) const { + if (N != m_n) return false; + for (size_t i=0; i<m_n; ++i) if (m_shape[i] != other[i]) return false; + return true; + } + + template <int N> + bool operator!= (const blitz::TinyVector<int,N>& other) const { + return !(*this == other); + } + + /** + * Tells if this shape is invalid + */ + inline bool operator! () const { return m_n == 0; } + + /** + * Returns a tuple-like string representation for this shape + */ + std::string str() const; + + private: //representation + size_t m_n; ///< The number of valid hsize_t's in this shape + hsize_t m_shape[MAX_HDF5SHAPE_SIZE]; ///< The actual shape values + + }; + + /** + * Support to compare data types, convert types into runtime equivalents and + * make our life easier when deciding what to input and output. + */ + class HDF5Type { + + public: + + /** + * Specific implementations bind the type T to the support_t enum + */ +# define DECLARE_SUPPORT(T) HDF5Type(const T& value); + DECLARE_SUPPORT(bool) + DECLARE_SUPPORT(int8_t) + DECLARE_SUPPORT(int16_t) + DECLARE_SUPPORT(int32_t) + DECLARE_SUPPORT(int64_t) + DECLARE_SUPPORT(uint8_t) + DECLARE_SUPPORT(uint16_t) + DECLARE_SUPPORT(uint32_t) + DECLARE_SUPPORT(uint64_t) + DECLARE_SUPPORT(float) + DECLARE_SUPPORT(double) + DECLARE_SUPPORT(long double) + DECLARE_SUPPORT(std::complex<float>) + DECLARE_SUPPORT(std::complex<double>) + DECLARE_SUPPORT(std::complex<long double>) +# undef DECLARE_SUPPORT + + /** + * Builds the type from a string (special HDF5 handling) + */ + HDF5Type(const char* value); + HDF5Type(const std::string& value); + +# define DECLARE_SUPPORT(T,N) HDF5Type(const blitz::Array<T,N>& value); + +# define DECLARE_BZ_SUPPORT(T) \ + DECLARE_SUPPORT(T,1) \ + DECLARE_SUPPORT(T,2) \ + DECLARE_SUPPORT(T,3) \ + DECLARE_SUPPORT(T,4) + + DECLARE_BZ_SUPPORT(bool) + DECLARE_BZ_SUPPORT(int8_t) + DECLARE_BZ_SUPPORT(int16_t) + DECLARE_BZ_SUPPORT(int32_t) + DECLARE_BZ_SUPPORT(int64_t) + DECLARE_BZ_SUPPORT(uint8_t) + DECLARE_BZ_SUPPORT(uint16_t) + DECLARE_BZ_SUPPORT(uint32_t) + DECLARE_BZ_SUPPORT(uint64_t) + DECLARE_BZ_SUPPORT(float) + DECLARE_BZ_SUPPORT(double) + DECLARE_BZ_SUPPORT(long double) + DECLARE_BZ_SUPPORT(std::complex<float>) + DECLARE_BZ_SUPPORT(std::complex<double>) + DECLARE_BZ_SUPPORT(std::complex<long double>) +# undef DECLARE_BZ_SUPPORT +# undef DECLARE_SUPPORT + + /** + * Default constructor, results in an unsupported type with invalid shape + */ + HDF5Type(); + + /** + * Creates a HDF5Type from a type enumeration, assumes it is a scalar + */ + HDF5Type(hdf5type type); + + /** + * Creates a HDF5Type from an bob::io::base::array::typeinfo + */ + HDF5Type(const bob::io::base::array::typeinfo& ti); + + /** + * Creates a HDF5Type from a type enumeration and an explicit shape + */ + HDF5Type(bob::io::base::array::ElementType eltype, const HDF5Shape& extents); + + /** + * Creates a HDF5Type from a type enumeration and an explicit shape + */ + HDF5Type(hdf5type type, const HDF5Shape& extents); + + /** + * Creates a HDF5Type from a HDF5 Dataset, Datatype and Dataspace + */ + HDF5Type(const boost::shared_ptr<hid_t>& type, + const HDF5Shape& extents); + + /** + * Scalar of a certain type + */ + HDF5Type(const boost::shared_ptr<hid_t>& type); + + /** + * Copy construction + */ + HDF5Type(const HDF5Type& other); + + /** + * Virtual destructor + */ + virtual ~HDF5Type(); + + /** + * Assignment + */ + HDF5Type& operator= (const HDF5Type& other); + + /** + * Checks if two types are the same + */ + bool operator== (const HDF5Type& other) const; + + /** + * Checks if two types are *not* the same + */ + bool operator!= (const HDF5Type& other) const; + + /** + * Checks if an existing object is compatible with my type + */ + template <typename T> bool compatible (const T& value) const { + return *this == HDF5Type(value); + } + + /** + * Checks if an existing object is compatible with my type + */ + bool compatible (const bob::io::base::array::typeinfo& value) const; + + /** + * Returns the HDF5Shape of this type + */ + const HDF5Shape& shape() const { return m_shape; } + + /** + * Returns the HDF5Shape of this type + */ + HDF5Shape& shape() { return m_shape; } + + /** + * Returns the equivalent HDF5 type info object for this type. + */ + boost::shared_ptr<hid_t> htype() const; + + /** + * Returns a string representation of this supported type. + */ + std::string str() const; + + /** + * Returns a string representation of the element type. + */ + std::string type_str() const { return stringize(m_type); } + + /** + * Returns the current enumeration for the type + */ + inline hdf5type type() const { return m_type; } + + /** + * Returns a mapping between the current type and the supported element + * types in bob::io::base::array + */ + bob::io::base::array::ElementType element_type() const; + + /** + * Copies this type information to a stock bob::io::base::array::typeinfo + */ + void copy_to (bob::io::base::array::typeinfo& ti) const; + + private: //representation + + hdf5type m_type; ///< the precise supported type + HDF5Shape m_shape; ///< what is the shape of the type (scalar) + + }; + + /** + * Describes ways to read a Dataset. + */ + struct HDF5Descriptor { + + public: //api + + /** + * Constructor + */ + HDF5Descriptor(const HDF5Type& type, size_t size = 0, bool expand = true); + + /** + * Copy constructor + */ + HDF5Descriptor(const HDF5Descriptor& other); + + /** + * Virtual destructor + */ + virtual ~HDF5Descriptor(); + + /** + * Assignment + */ + HDF5Descriptor& operator= (const HDF5Descriptor& other); + + /** + * Setup myself as I was supposed to be read from a space with N+1 + * dimensions. + */ + HDF5Descriptor& subselect(); + + public: //representation + + HDF5Type type; ///< base type for read/write operations + size_t size; ///< number of objects of this type stored at dataset + bool expandable; ///< is this dataset expandable using this type? + + /** + * Variables required for fast read/write operations. + */ + HDF5Shape hyperslab_start; ///< offset to read/write operations + HDF5Shape hyperslab_count; ///< count for read/write operations + + }; + + /** + * Format and returns the current HDF5 error stack. It also clears the stack + * before returning. + */ + std::string format_hdf5_error(); + +}}} + +#endif /* BOB_IO_BASE_HDF5TYPES_H */ diff --git a/bob/io/base/include/bob.io.base/HDF5Utils.h b/bob/io/base/include/bob.io.base/HDF5Utils.h new file mode 100644 index 0000000000000000000000000000000000000000..670430e2b4ffdf47d16c4953b593b89ba5352058 --- /dev/null +++ b/bob/io/base/include/bob.io.base/HDF5Utils.h @@ -0,0 +1,135 @@ +/** + * @date Wed Jun 22 17:50:08 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief A bunch of private utilities to make programming against the HDF5 + * library a little bit more confortable. + * + * Classes and non-member methods in this file handle the low-level HDF5 C-API + * and try to make it a little bit safer and higher-level for use by the + * publicly visible HDF5File class. The functionality here is heavily based on + * boost::shared_ptr's for handling automatic deletion and releasing of HDF5 + * objects. Two top-level classes do the whole work: File and Dataset. The File + * class represents a raw HDF5 file. You can iterate with it in a very limited + * way: create one, rename an object or delete one. The Dataset object + * encapsulates reading and writing of data from a specific HDF5 dataset. + * Everything is handled automatically and the user should not have to worry + * about it too much. + * + * @todo Missing support for list<std::string> + * @todo Inprint file creation time, author, comments? + * @todo Missing support for automatic endianness conversion + * @todo Missing true support for scalars + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_HDF5UTILS_H +#define BOB_IO_BASE_HDF5UTILS_H + +#include <boost/filesystem.hpp> +#include <boost/shared_ptr.hpp> +#include <boost/enable_shared_from_this.hpp> +#include <hdf5.h> + +#include <bob.io.base/HDF5Group.h> + +namespace bob { namespace io { namespace base { namespace detail { namespace hdf5 { + + /** + * An HDF5 C-style file that knows how to close itself. + */ + class File: public boost::enable_shared_from_this<File> { + + public: + + /** + * Creates a new HDF5 file. Optionally set the userblock size (multiple + * of 2 number of bytes). + */ + File(const boost::filesystem::path& path, unsigned int flags, + size_t userblock_size=0); + + /** + * Copies a file by creating a copy of each of its groups + */ + File(const File& other); + + /** + * Destructor virtualization + */ + virtual ~File(); + + /** + * Assignment + */ + File& operator= (const File& other); + + /** + * Accesses the current location id of this file + */ + const boost::shared_ptr<hid_t> location() const { + return m_id; + } + boost::shared_ptr<hid_t> location() { + return m_id; + } + + /** + * Returns the userblock size + */ + size_t userblock_size() const; + + /** + * Copies the userblock into a string -- not yet implemented. If you want + * to do it, read the code for the command-line utilitlies h5jam and + * h5unjam. + */ + void get_userblock(std::string& data) const; + + /** + * Writes new data to the user block. Data is truncated up to the size + * set during file creation -- not yet implemented. If you want to do it, + * read the code for the command-line utilitlies h5jam and h5unjam. + */ + void set_userblock(const std::string& data); + + /** + * Gets the current path + */ + const std::string& filename() const { + return m_path.string(); + } + + /** + * Returns the root group + */ + boost::shared_ptr<RootGroup> root(); + + /** + * Resets this file, sets to read again all groups and datasets + */ + void reset(); + + /** + * Flushes the current content of the file to disk + */ + void flush(); + + /** + * Tells if this file is writable + */ + bool writable() const; + + private: //representation + + const boost::filesystem::path m_path; ///< path to the file + unsigned int m_flags; ///< flags used to open it + boost::shared_ptr<hid_t> m_fcpl; ///< file creation property lists + boost::shared_ptr<hid_t> m_id; ///< the HDF5 id attributed to this file. + boost::shared_ptr<RootGroup> m_root; + }; + +}}}}} + +#endif /* BOB_IO_BASE_HDF5UTILS_H */ diff --git a/bob/io/base/include/bob.io.base/api.h b/bob/io/base/include/bob.io.base/api.h index 668b1a41b5fe7069dc121eb79ed398dad36340a5..001e8d4a5a282c082eb2adc862c0c9e15ff50619 100644 --- a/bob/io/base/include/bob.io.base/api.h +++ b/bob/io/base/include/bob.io.base/api.h @@ -2,7 +2,7 @@ * @author Andre Anjos <andre.anjos@idiap.ch> * @date Tue 5 Nov 12:22:48 2013 * - * @brief C/C++ API for bob::io + * @brief Python API for bob::io::base */ #ifndef BOB_IO_BASE_H @@ -16,10 +16,9 @@ #include <Python.h> #include <bob.io.base/config.h> -#include <bob/config.h> -#include <bob/io/File.h> -#include <bob/io/HDF5File.h> -#include <bob/io/CodecRegistry.h> +#include <bob.io.base/File.h> +#include <bob.io.base/CodecRegistry.h> +#include <bob.io.base/HDF5File.h> #include <boost/shared_ptr.hpp> @@ -65,7 +64,7 @@ typedef struct { PyObject_HEAD /* Type-specific fields go here. */ - boost::shared_ptr<bob::io::File> f; + boost::shared_ptr<bob::io::base::File> f; } PyBobIoFileObject; @@ -87,10 +86,10 @@ typedef struct { ************************/ #define PyBobIo_AsTypenum_RET int -#define PyBobIo_AsTypenum_PROTO (bob::core::array::ElementType et) +#define PyBobIo_AsTypenum_PROTO (bob::io::base::array::ElementType et) #define PyBobIo_TypeInfoAsTuple_RET PyObject* -#define PyBobIo_TypeInfoAsTuple_PROTO (const bob::core::array::typeinfo& ti) +#define PyBobIo_TypeInfoAsTuple_PROTO (const bob::io::base::array::typeinfo& ti) #define PyBobIo_FilenameConverter_RET int #define PyBobIo_FilenameConverter_PROTO (PyObject* o, PyObject** b) @@ -103,7 +102,7 @@ typedef struct { PyObject_HEAD /* Type-specific fields go here. */ - boost::shared_ptr<bob::io::HDF5File> f; + boost::shared_ptr<bob::io::base::HDF5File> f; } PyBobIoHDF5FileObject; @@ -120,7 +119,7 @@ typedef struct { *****************************************/ #define PyBobIoCodec_Register_RET int -#define PyBobIoCodec_Register_PROTO (const char* extension, const char* description, bob::io::file_factory_t factory) +#define PyBobIoCodec_Register_PROTO (const char* extension, const char* description, bob::io::base::file_factory_t factory) #define PyBobIoCodec_Deregister_RET int #define PyBobIoCodec_Deregister_PROTO (const char* extension) diff --git a/bob/io/base/include/bob.io.base/array.h b/bob/io/base/include/bob.io.base/array.h new file mode 100644 index 0000000000000000000000000000000000000000..570c09f05f52fc115cc659f207c1137e080acce0 --- /dev/null +++ b/bob/io/base/include/bob.io.base/array.h @@ -0,0 +1,255 @@ +/** + * @date Tue Nov 8 15:34:31 2011 +0100 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief The array API describes a non-specific way to handle N dimensional + * array data. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_ARRAY_INTERFACE_H +#define BOB_IO_BASE_ARRAY_INTERFACE_H + +#include <stdexcept> +#include <string> + +#include <boost/shared_ptr.hpp> +#include <blitz/array.h> + +#include <bob.io.base/array_type.h> + +/* MinGW flags */ +#ifdef _WIN32 +#undef interface +#endif + +/** + * @brief Array submodule API of the I/O module + */ +namespace bob { namespace io { namespace base { namespace array { + + /** + * @brief Encapsulation of special type information of interfaces. + */ + struct typeinfo { + + ElementType dtype; ///< data type + size_t nd; ///< number of dimensions + size_t shape[BOB_MAX_DIM+1]; ///< length along each dimension + size_t stride[BOB_MAX_DIM+1]; ///< strides along each dimension + + /** + * @brief Default constructor + */ + typeinfo(); + + /** + * @brief Simplification to build a typeinfo from a size + */ + template <typename T> typeinfo(ElementType dtype_, T nd_) { + set(dtype_, nd_); + } + + /** + * @brief Simplification to build a typeinfo from a shape pointer. + */ + template <typename T> typeinfo(ElementType dtype_, T nd_, const T* shape_) { + set(dtype_, nd_, shape_); + } + + /** + * @brief Copies information from another typeinfo + */ + typeinfo(const typeinfo& other); + + /** + * @brief Assignment + */ + typeinfo& operator= (const typeinfo& other); + + /** + * @brief Builds with type and number of dimensions, but set the shape and + * strides to all zeros. + */ + template <typename T> + void set(ElementType dtype_, T nd_) { + dtype = dtype_; + nd = nd_; + reset_shape(); + } + + /** + * @brief Set to specific values + */ + template <typename T> + void set(ElementType dtype_, T nd_, const T* shape_) { + dtype = dtype_; + set_shape(nd_, shape_); + } + + /** + * @brief Set to specific values, including strides + */ + template <typename T> + void set(ElementType dtype_, T nd_, const T* shape_, + const T* stride_) { + dtype = dtype_; + nd = nd_; + for (size_t k=0; k<nd; ++k) { + shape[k] = shape_[k]; + stride[k] = stride_[k]; + } + } + + /** + * @brief Reset to defaults -- as if uninitialized. + */ + void reset(); + + /** + * @brief Is this a valid type information? + */ + bool is_valid() const; + + /** + * @brief Does this has a valid shape information? + */ + bool has_valid_shape() const; + + /** + * @brief sets the shape + */ + template <typename T> void set_shape(T nd_, const T* shape_) { + if (nd_ > (BOB_MAX_DIM+1)) + throw std::runtime_error("unsupported number of dimensions"); + nd = nd_; + for (size_t k=0; k<nd; ++k) shape[k] = shape_[k]; + update_strides(); + } + + /** + * @brief resets the shape to all zeros + */ + void reset_shape(); + + /** + * @brief Update my own stride vector. Called automatically after any use + * of set_shape(). + */ + void update_strides(); + + /** + * @brief Returns the total number of elements available + */ + size_t size() const; + + /** + * @brief Returns the size of each element + */ + inline size_t item_size() const { return getElementSize(dtype); } + + /** + * @brief Returns the total size (in bytes) of the buffer that I'm + * associated with. + */ + size_t buffer_size() const; + + /** + * @brief Returns the item type description + */ + const char* item_str() const { return stringize(dtype); } + + /** + * @brief Checks compatibility with other typeinfo + */ + bool is_compatible(const typeinfo& other) const; + + /** + * @brief Formats and returns a string containing the full typeinfo + * description. + */ + std::string str() const; + + /** + * @brief Make it easy to set for blitz::Array<T,N> + */ + template <typename T, int N> void set(const blitz::Array<T,N>& array) { + dtype = getElementType<T>(); + set_shape(array.shape()); + } + + template <typename T, int N> + void set(boost::shared_ptr<blitz::Array<T,N> >& array) { + dtype = getElementType<T>(); + set_shape(array->shape()); + } + + template <int N> void set_shape(const blitz::TinyVector<int,N>& tv_shape) { + nd = N; + for (size_t k=0; k<nd; ++k) shape[k] = tv_shape(k); + update_strides(); + } + + }; + + /** + * @brief The interface manager introduces a concept for managing the + * interfaces that can be handled as C-style arrays. It encapsulates methods + * to store and delete the buffer contents in a safe way. + * + * The interface is an entity that either stores a copy of its own data or + * refers to data belonging to another interface. + */ + class interface { + + public: //api + + /** + * @brief By default, the interface is never freed. You must override + * this method to do something special for your class type. + */ + virtual ~interface() { } + + /** + * @brief Copies the data from another interface. + */ + virtual void set(const interface& other) =0; + + /** + * @brief Refers to the data of another interface. + */ + virtual void set(boost::shared_ptr<interface> other) =0; + + /** + * @brief Re-allocates this interface taking into consideration new + * requirements. The internal memory should be considered uninitialized. + */ + virtual void set (const typeinfo& req) =0; + + /** + * @brief Type information for this interface. + */ + virtual const typeinfo& type() const =0; + + /** + * @brief Borrows a reference from the underlying memory. This means + * this object continues to be responsible for deleting the memory and + * you should make sure that it outlives the usage of the returned + * pointer. + */ + virtual void* ptr() =0; + virtual const void* ptr() const =0; + + /** + * @brief Returns a representation of the internal cache using shared + * pointers. + */ + virtual boost::shared_ptr<void> owner() =0; + virtual boost::shared_ptr<const void> owner() const =0; + + }; + +}}}} + +#endif /* BOB_IO_BASE_ARRAY_INTERFACE_H */ diff --git a/bob/io/base/include/bob.io.base/array_type.h b/bob/io/base/include/bob.io.base/array_type.h new file mode 100644 index 0000000000000000000000000000000000000000..89eb1e26df2000beb040f2dc47aced0429029f6e --- /dev/null +++ b/bob/io/base/include/bob.io.base/array_type.h @@ -0,0 +1,162 @@ +/** + * @date Sat Apr 9 18:10:10 2011 +0200 + * @author Laurent El Shafey <Laurent.El-Shafey@idiap.ch> + * + * @brief This file contains information about the supported arrays + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_ARRAY_TYPE_H +#define BOB_IO_BASE_ARRAY_TYPE_H + +#include <stdint.h> +#include <cstdlib> +#include <complex> + +/** + * @ingroup IO_ARRAY + * @brief This macro defines the maximum number of dimensions supported by bob. + * A variable in the bob.io.array namespace is created from this macro + * receiving the same value. Use that variable on your programs, or this macro + * on your preprocessor code. + */ +#define BOB_MAX_DIM 4 + +namespace bob { namespace io { namespace base { namespace array { + + /** + * @brief Enumeration of the supported type for multidimensional arrays + * @warning float128 and complex256 are defined but currently not + * supported + */ + typedef enum ElementType { + t_unknown=0, + t_bool=1, + t_int8=2, + t_int16=3, + t_int32=4, + t_int64=5, + t_uint8=6, + t_uint16=7, + t_uint32=8, + t_uint64=9, + t_float32=10, + t_float64=11, + t_float128=12, + t_complex64=13, + t_complex128=14, + t_complex256=15 + } ElementType; + + /** + * @brief Maximum number of supported dimensions for multidimensional + * arrays. + */ + const size_t N_MAX_DIMENSIONS_ARRAY = BOB_MAX_DIM; + + /** + * @brief These are some type to element type conversions + */ + template<typename T> ElementType getElementType() { + return t_unknown; + } + + /** + * @brief Some specializations that convert type to element type. + */ + template<> inline ElementType getElementType<bool>() { return t_bool; } + template<> inline ElementType getElementType<int8_t>() { return t_int8; } + template<> inline ElementType getElementType<int16_t>() + { return t_int16; } + template<> inline ElementType getElementType<int32_t>() + { return t_int32; } + template<> inline ElementType getElementType<int64_t>() + { return t_int64; } + template<> inline ElementType getElementType<uint8_t>() + { return t_uint8; } + template<> inline ElementType getElementType<uint16_t>() + { return t_uint16; } + template<> inline ElementType getElementType<uint32_t>() + { return t_uint32; } + template<> inline ElementType getElementType<uint64_t>() + { return t_uint64; } + template<> inline ElementType getElementType<float>() + { return t_float32; } + template<> inline ElementType getElementType<double>() + { return t_float64; } + template<> inline ElementType getElementType<long double>() + { return t_float128; } + template<> inline ElementType getElementType<std::complex<float> >() + { return t_complex64; } + template<> inline ElementType getElementType<std::complex<double> >() + { return t_complex128; } + template<> inline ElementType getElementType<std::complex<long double> >() + { return t_complex256; } + + /** + * @brief These are some type to element size conversions + */ + template<typename T> size_t getElementSize() { + return 0; + } + + /** + * @brief Some specializations that convert the types we handle properly + */ + template<> inline size_t getElementSize<bool>() { return sizeof(bool); } + template<> inline size_t getElementSize<int8_t>() + { return sizeof(int8_t); } + template<> inline size_t getElementSize<int16_t>() + { return sizeof(int16_t); } + template<> inline size_t getElementSize<int32_t>() + { return sizeof(int32_t); } + template<> inline size_t getElementSize<int64_t>() + { return sizeof(int64_t); } + template<> inline size_t getElementSize<uint8_t>() + { return sizeof(uint8_t); } + template<> inline size_t getElementSize<uint16_t>() + { return sizeof(uint16_t); } + template<> inline size_t getElementSize<uint32_t>() + { return sizeof(uint32_t); } + template<> inline size_t getElementSize<uint64_t>() + { return sizeof(uint64_t); } + template<> inline size_t getElementSize<float>() + { return sizeof(float); } + template<> inline size_t getElementSize<double>() + { return sizeof(double); } + template<> inline size_t getElementSize<long double>() + { return sizeof(long double); } + template<> inline size_t getElementSize<std::complex<float> >() + { return sizeof(std::complex<float>); } + template<> inline size_t getElementSize<std::complex<double> >() + { return sizeof(std::complex<double>); } + template<> inline size_t getElementSize<std::complex<long double> >() + { return sizeof(std::complex<long double>); } + + /** + * @brief Returns the type size given the enumeration + */ + size_t getElementSize(ElementType t); + + /** + * @brief Gets a string representation of an element type value + */ + const char* stringize(ElementType t); + + /** + * @brief Equivalent to call stringize() on the result of + * getElementType<T>(). + */ + template<typename T> const char* stringize() { + return stringize(getElementType<T>()); + } + + /** + * @brief Returns the ElementType given the string representation + */ + ElementType unstringize(const char* name); + +}}}} + +#endif /* BOB_IO_BASE_ARRAY_TYPE_H */ diff --git a/bob/io/base/include/bob.io.base/array_utils.h b/bob/io/base/include/bob.io.base/array_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..8fdfa77b4d6a1bb181031152255ab4a5fd685153 --- /dev/null +++ b/bob/io/base/include/bob.io.base/array_utils.h @@ -0,0 +1,133 @@ +/** + * @date Tue Nov 8 15:34:31 2011 +0100 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Utilities for converting data to-from blitz::Arrays and other + * goodies. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_ARRAY_UTILS_H +#define BOB_IO_BASE_ARRAY_UTILS_H + +#include <blitz/array.h> +#include <stdint.h> +#include <stdexcept> +#include <boost/format.hpp> + +#include <bob.core/cast.h> +#include <bob.io.base/array.h> + +namespace bob { namespace io { namespace base { namespace array { + + /** + * @brief Fills in shape and stride starting from a typeinfo object + */ + template <int N> void set_shape_and_stride(const typeinfo& info, + blitz::TinyVector<int,N>& shape, blitz::TinyVector<int,N>& stride) { + for (int k=0; k<N; ++k) { + shape[k] = info.shape[k]; + stride[k] = info.stride[k]; + } + } + + + /** + * @brief Takes a data pointer and assumes it is a C-style array for the + * defined type. Creates a wrapper as a blitz::Array<T,N> with the same + * number of dimensions and type. Notice that the blitz::Array<> created + * will have its memory tied to the passed buffer. In other words you have + * to make sure that the buffer outlives the returned blitz::Array<>. + */ + template <typename T, int N> + blitz::Array<T,N> wrap(const interface& buf) { + + const typeinfo& type = buf.type(); + + if (!buf.ptr()) throw std::runtime_error("empty buffer"); + + if (type.dtype != bob::io::base::array::getElementType<T>()) { + boost::format m("cannot efficiently retrieve blitz::Array<%s,%d> from buffer of type '%s'"); + m % stringize<T>() % N % type.str(); + throw std::runtime_error(m.str()); + } + + if (type.nd != N) { + boost::format m("cannot retrieve blitz::Array<%s,%d> from buffer of type '%s'"); + m % stringize<T>() % N % type.str(); + throw std::runtime_error(m.str()); + } + + blitz::TinyVector<int,N> shape; + blitz::TinyVector<int,N> stride; + set_shape_and_stride(type, shape, stride); + + return blitz::Array<T,N>((T*)buf.ptr(), + shape, stride, blitz::neverDeleteData); + } + + + /** + * @brief Takes a data pointer and assumes it is a C-style array for the + * defined type. Creates a copy as a blitz::Array<T,N> with the same number + * of dimensions, but with a type as specified by you. If the type does not + * match the type of the original C-style array, a cast will happen. + * + * If a certain type cast is not supported. An appropriate exception will + * be raised. + */ + template <typename T, int N> + blitz::Array<T,N> cast(const interface& buf) { + + const typeinfo& type = buf.type(); + + if (type.nd != N) { + boost::format m("cannot cast blitz::Array<%s,%d> from buffer of type '%s'"); + m % stringize<T>() % N % type.str(); + throw std::runtime_error(m.str()); + } + + switch (type.dtype) { + case bob::io::base::array::t_bool: + return bob::core::array::cast<T>(wrap<bool,N>(buf)); + case bob::io::base::array::t_int8: + return bob::core::array::cast<T>(wrap<int8_t,N>(buf)); + case bob::io::base::array::t_int16: + return bob::core::array::cast<T>(wrap<int16_t,N>(buf)); + case bob::io::base::array::t_int32: + return bob::core::array::cast<T>(wrap<int32_t,N>(buf)); + case bob::io::base::array::t_int64: + return bob::core::array::cast<T>(wrap<int64_t,N>(buf)); + case bob::io::base::array::t_uint8: + return bob::core::array::cast<T>(wrap<uint8_t,N>(buf)); + case bob::io::base::array::t_uint16: + return bob::core::array::cast<T>(wrap<uint16_t,N>(buf)); + case bob::io::base::array::t_uint32: + return bob::core::array::cast<T>(wrap<uint32_t,N>(buf)); + case bob::io::base::array::t_uint64: + return bob::core::array::cast<T>(wrap<uint64_t,N>(buf)); + case bob::io::base::array::t_float32: + return bob::core::array::cast<T>(wrap<float,N>(buf)); + case bob::io::base::array::t_float64: + return bob::core::array::cast<T>(wrap<double,N>(buf)); + case bob::io::base::array::t_float128: + return bob::core::array::cast<T>(wrap<long double,N>(buf)); + case bob::io::base::array::t_complex64: + return bob::core::array::cast<T>(wrap<std::complex<float>,N>(buf)); + case bob::io::base::array::t_complex128: + return bob::core::array::cast<T>(wrap<std::complex<double>,N>(buf)); + case bob::io::base::array::t_complex256: + return bob::core::array::cast<T>(wrap<std::complex<long double>,N>(buf)); + default: + break; + } + + //if we get to this point, there is nothing much we can do... + throw std::runtime_error("invalid type on blitz buffer array casting -- debug me"); + + } + +}}}} + +#endif /* BOB_IO_BASE_ARRAY_UTILS_H */ diff --git a/bob/io/base/include/bob.io.base/blitz_array.h b/bob/io/base/include/bob.io.base/blitz_array.h new file mode 100644 index 0000000000000000000000000000000000000000..9927e0040e8f56f94ecf400327d8ff1648de3ef8 --- /dev/null +++ b/bob/io/base/include/bob.io.base/blitz_array.h @@ -0,0 +1,261 @@ +/** + * @date Tue Nov 8 15:34:31 2011 +0100 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief A class that implements the polimorphic behaviour required when + * reading and writing blitz arrays to disk or memory. + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_BLITZ_ARRAY_H +#define BOB_IO_BASE_BLITZ_ARRAY_H + +#include <stdexcept> +#include <boost/make_shared.hpp> +#include <boost/format.hpp> +#include <blitz/array.h> + +#include <bob.core/check.h> +#include <bob.core/cast.h> +#include <bob.core/array_copy.h> + +#include <bob.io.base/array.h> +#include <bob.io.base/array_utils.h> +#include <bob.io.base/array_type.h> + +namespace bob { namespace io { namespace base { namespace array { + + /** + * @brief A blitz::Array representation of an array. + */ + class blitz_array: public interface { + + public: + + /** + * @brief Starts by refering to the data from another blitz array. + */ + blitz_array(boost::shared_ptr<blitz_array> other); + + /** + * @brief Starts by copying the data from another blitz array. + */ + blitz_array(const blitz_array& other); + + /** + * @brief Starts by refering to the data from another buffer. + */ + blitz_array(boost::shared_ptr<interface> other); + + /** + * @brief Starts by copying the data from another buffer. + */ + blitz_array(const interface& other); + + /** + * @brief Starts with an uninitialized, pre-allocated array. + */ + blitz_array(const typeinfo& info); + + /** + * @brief Borrows the given pointer - if you use this constructor, you + * must make sure the pointed data outlives this object. + */ + blitz_array(void* data, const typeinfo& info); + + /** + * @brief Destroyes me + */ + virtual ~blitz_array(); + + /** + * @brief Copies the data from another buffer. + */ + virtual void set(const interface& other); + + /** + * @brief Refers to the data of another buffer. + */ + virtual void set(boost::shared_ptr<interface> other); + + /** + * @brief Re-allocates this buffer taking into consideration new + * requirements. The internal memory should be considered uninitialized. + */ + virtual void set (const typeinfo& req); + + /** + * @brief Refers to the data of another blitz array. + */ + void set(boost::shared_ptr<blitz_array> other); + + /** + * @brief Element type + */ + virtual const typeinfo& type() const { return m_type; } + + /** + * @brief Borrows a reference from the underlying memory. This means + * this object continues to be responsible for deleting the memory and + * you should make sure that it outlives the usage of the returned + * pointer. + */ + virtual void* ptr() { return m_ptr; } + virtual const void* ptr() const { return m_ptr; } + + virtual boost::shared_ptr<void> owner() { return m_data; } + virtual boost::shared_ptr<const void> owner() const { return m_data; } + + + /****************************************************************** + * Blitz Array specific manipulations + ******************************************************************/ + + + /** + * @brief Starts me with new arbitrary data. Please note we refer to the + * given array. External modifications to the array memory will affect + * me. If you don't want that to be the case, use the const variant. + */ + template <typename T, int N> + blitz_array(boost::shared_ptr<blitz::Array<T,N> > data) { + set(data); + } + + /** + * @brief Starts me with new arbitrary data. Please note we copy the + * given array. External modifications to the array memory will not + * affect me. If you don't want that to be the case, start with a + * non-const reference. + */ + template <typename T, int N> + blitz_array(const blitz::Array<T,N>& data) { + set(data); + } + + /** + * @brief Starts me with new arbitrary data. Please note we don't copy + * the given array. + * @warning Any resize of the given blitz::Array after this call leads to + * unexpected results + */ + template <typename T, int N> + blitz_array(blitz::Array<T,N>& data) { + set(data); + } + + /** + * @brief This method will set my internal data to the value you + * specify. We will do this by referring to the data you gave. + */ + template <typename T, int N> + void set(boost::shared_ptr<blitz::Array<T,N> > data) { + + if (getElementType<T>() == t_unknown) + throw std::runtime_error("unsupported element type on blitz::Array<>"); + if (N > BOB_MAX_DIM) + throw std::runtime_error("unsupported number of dimensions on blitz::Array<>"); + + if (!isCContiguous(*data.get())) + throw std::runtime_error("cannot buffer'ize non-c contiguous array"); + + m_type.set(data); + + m_data = data; + m_ptr = reinterpret_cast<void*>(data->data()); + m_is_blitz = true; + } + + /** + * @brief This method will set my internal data to the value you + * specify. We will do this by copying the data you gave. + */ + template <typename T, int N> void set(const blitz::Array<T,N>& data) { + set(boost::make_shared<blitz::Array<T,N> >(ccopy(data))); + } + + /** + * @brief This method will set my internal data to the value you specify. + * We will do this by referencing the data you gave. + * @warning Any resize of the given blitz::Array after this call leads to + * unexpected results + */ + template <typename T, int N> void set(blitz::Array<T,N>& data) { + set(boost::make_shared<blitz::Array<T,N> >(data)); + } + + /** + * @brief This method returns a reference to my internal data. It is the + * fastest way to get access to my data because it involves no data + * copying. This method has two limitations: + * + * 1) You need to know the correct type and number of dimensions or I'll + * throw an exception. + * + * 2) If this buffer was started by refering to another buffer's data + * which is not a blitz array, an exception will be raised. + * Unfortunately, blitz::Array<>'s do not offer a management mechanism + * for tracking external data allocation. The exception can be avoided + * and the referencing mechanism forced if you set the flag "temporary" + * to "true". In this mode, this method will always suceed, but the + * object returned will have its lifetime associated to this buffer. In + * other words, you should make sure this buffer outlives the returned + * blitz::Array<T,N>. + */ + template <typename T, int N> blitz::Array<T,N> get(bool temporary=false) { + + if (m_is_blitz) { + + if (!m_data) throw std::runtime_error("empty blitz array"); + + if (m_type.dtype != getElementType<T>()) { + boost::format m("cannot efficiently retrieve blitz::Array<%s,%d> from buffer of type '%s'"); + m % stringize<T>() % N % m_type.str(); + throw std::runtime_error(m.str()); + } + + if (m_type.nd != N) { + boost::format m("cannot retrieve blitz::Array<%s,%d> from buffer of type '%s'"); + m % stringize<T>() % N % m_type.str(); + throw std::runtime_error(m.str()); + } + + return *boost::static_pointer_cast<blitz::Array<T,N> >(m_data).get(); + } + + else { + + if (temporary) { //returns a temporary reference + return bob::io::base::array::wrap<T,N>(*this); + } + + else { + throw std::runtime_error("cannot get() external non-temporary non-blitz array buffer -- for a temporary object, set temporary=true; if you need the returned object to outlive this buffer; use copy() or cast()"); + } + } + + } + + /** + * @brief This method returns a copy to my internal data (not a + * reference) in the type you wish. It is the easiest method to use + * because I'll never throw, no matter which type you want to receive + * data at. Only get the number of dimensions right! + */ + template <typename T, int N> blitz::Array<T,N> cast() const { + return bob::core::array::cast<T,N>(*this); + } + + private: //representation + + typeinfo m_type; ///< type information + void* m_ptr; ///< pointer to the data + bool m_is_blitz; ///< true if initiated with a blitz::Array<> + boost::shared_ptr<void> m_data; ///< Pointer to the data owner + + }; + +}}}} + +#endif /* BOB_IO_BASE_BLITZ_ARRAY_H */ diff --git a/bob/io/base/include/bob.io.base/reorder.h b/bob/io/base/include/bob.io.base/reorder.h new file mode 100644 index 0000000000000000000000000000000000000000..09ebf50422392cb6da05b255c4d9738f5d71d445 --- /dev/null +++ b/bob/io/base/include/bob.io.base/reorder.h @@ -0,0 +1,94 @@ +/** + * @date Tue Nov 22 11:24:44 2011 +0100 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Row-major to column-major reordering and vice-versa + * + * Copyright (C) Idiap Research Institute, Martigny, Switzerland + */ + +#ifndef BOB_IO_BASE_REORDER_H +#define BOB_IO_BASE_REORDER_H + +#include <stdint.h> + +#include <bob.io.base/array.h> + +namespace bob { namespace io { namespace base { + + /** + * Returns, on the first argument, the linear indexes by calculating the + * linear positions relative to both row-major and column-major order + * matrixes given a certain index accessing a position in the matrix and the + * matrix shape + * + * @param row The resulting row-major linear index. + * (row,col) is a 2-tuple with the results: row-major and + * column-major linear indexes + * @param col The resulting column-major linear index. (see above) + * @param i Index of the column. + * (i,j) a 2-tuple with the indexes as would be accessed + * [col][row]; this is the same as accessing the matrix like + * on directions [y][x] + * @param j Index of the row. (see above) + * @param shape a 2-tuple with the matrix shape like [col][row]; this is the + * same as thinking about the extends of the matrix like on directions + * [y][x] + * + * Detailed arithmetics with graphics and explanations can be found here: + * http://webster.cs.ucr.edu/AoA/Windows/HTML/Arraysa2.html + */ + void rc2d(size_t& row, size_t& col, const size_t i, const size_t j, + const size_t* shape); + + /** + * Same as above, but for a 3D array organized as [depth][column][row] + */ + void rc3d(size_t& row, size_t& col, const size_t i, const size_t j, + const size_t k, const size_t* shape); + + /** + * Same as above, but for a 4D array organized as [time][depth][column][row] + */ + void rc4d(size_t& row, size_t& col, const size_t i, const size_t j, + const size_t k, const size_t l, const size_t* shape); + + /** + * Converts the data from row-major order (C-Style) to column major order + * (Fortran style). Input parameters are the src data in row-major order, the + * destination (pre-allocated) array of the same size and the type + * information. + */ + void row_to_col_order(const void* src_, void* dst_, const + bob::io::base::array::typeinfo& info); + + /** + * Converts the data from column-major order (Fortran-Style) to row major + * order (C style), which is required by bob. Input parameters are the src + * data in column-major order, the destination (pre-allocated) array of the + * same size and the type information. + */ + void col_to_row_order(const void* src_, void* dst_, + const bob::io::base::array::typeinfo& info); + + /** + * Converts the data from row-major order (C-Style) to column major order + * (Fortran style). Input parameters are the src data in row-major order, the + * destination (pre-allocated) array of the same size and the type + * information. + */ + void row_to_col_order_complex(const void* src_, void* dst_re_, + void* dst_im_, const bob::io::base::array::typeinfo& info); + + /** + * Converts the data from column-major order (Fortran-Style) to row major + * order (C style), which is required by bob. Input parameters are the src + * data in column-major order, the destination (pre-allocated) array of the + * same size and the type information. + */ + void col_to_row_order_complex(const void* src_re_, const void* src_im_, + void* dst_, const bob::io::base::array::typeinfo& info); + +}}} + +#endif /* BOB_IO_BASE_REORDER_H */ diff --git a/bob/io/base/include/bob.io.base/utils.h b/bob/io/base/include/bob.io.base/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..c42b1fac659dca6956dd241916aeeb2670c134c8 --- /dev/null +++ b/bob/io/base/include/bob.io.base/utils.h @@ -0,0 +1,105 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Wed 3 Oct 07:46:49 2012 + * + * @brief Utilities for easy manipulation of filed data. + */ + +#ifndef BOB_IO_BASE_BASE_UTILS_H +#define BOB_IO_BASE_BASE_UTILS_H + +#include <boost/shared_ptr.hpp> + +#include <bob.io.base/File.h> + +namespace bob { namespace io { namespace base { + + /** + * Creates a new array codec using the filename extension to determine which + * codec to use. The opening mode is passed to the underlying registered File + * implementation. + * + * Here are the meanings of the mode flag: + * + * 'r': opens for reading only - no modifications can occur; it is an + * error to open a file that does not exist for read-only operations. + * 'w': opens for reading and writing, but truncates the file if it + * exists; it is not an error to open files that do not exist with + * this flag. + * 'a': opens for reading and writing - any type of modification can + * occur. If the file does not exist, this flag is effectively like + * 'w'. + */ + boost::shared_ptr<File> open (const char* filename, char mode); + + /** + * Opens the file pretending it has a different extension (that is, using a + * different codec) then the one expected (if any). This allows you to write + * a file with the extension you want, but still using one of the available + * codecs. + */ + boost::shared_ptr<File> open (const char* filename, char mode, + const char* pretend_extension); + + /** + * Peeks the file and returns the typeinfo for reading individual frames (or + * samples) from the file. + * + * This method is equivalent to calling open() with 'r' as mode flag and then + * calling type() on the returned bob::io::base::File object. + */ + bob::io::base::array::typeinfo peek (const char* filename); + + /** + * Peeks the file and returns the typeinfo for reading the whole contents in + * a single shot. + * + * This method is equivalent to calling open() with 'r' as mode flag and then + * calling type_all() on the returned bob::io::base::File object. + */ + bob::io::base::array::typeinfo peek_all (const char* filename); + + /** + * Opens for reading and load all contents + * + * This method is equivalent to calling open() with 'r' as mode flag and then + * calling read_all() on the returned bob::io::base::File object. + */ + template <typename T, int N> blitz::Array<T,N> load (const char* filename) { + return open(filename, 'r')->read_all<T,N>(); + } + + /** + * Opens for reading and load a particular frame (or sample) + * + * This method is equivalent to calling open() with 'r' as mode flag and then + * calling read(index) on the returned bob::io::base::File object. + */ + template <typename T, int N> blitz::Array<T,N> load (const char* filename, size_t index) { + return open(filename, 'r')->read<T,N>(index); + } + + /** + * Opens for appending and add an array to it + * + * This method is equivalent to calling open() with 'a' as mode flag and then + * calling append(data) on the returned bob::io::base::File object. + */ + template <typename T, int N> void append (const char* filename, const blitz::Array<T,N>& data) { + open(filename, 'a')->append(data); + } + + /** + * Opens for writing and write an array to it. If the file exists before the + * call to this method, it is truncated. + * + * This method is equivalent to calling open() with 'w' as mode flag and then + * calling write(data) on the returned bob::io::base::File object. + */ + template <typename T, int N> void save (const char* filename, const blitz::Array<T,N>& data) { + open(filename, 'w')->write(data); + } + +}}} + +#endif /* BOB_IO_BASE_BASE_UTILS_H */ diff --git a/bob/io/base/main.cpp b/bob/io/base/main.cpp index 7e97111e8c6326856a237a23f94e0bb29bb8a673..f7dfb87c59465b2d49985e641f6a765e16d22e61 100644 --- a/bob/io/base/main.cpp +++ b/bob/io/base/main.cpp @@ -14,7 +14,51 @@ #include <bob.blitz/capi.h> #include <bob.blitz/cleanup.h> +/** + * Creates an str object, from a C or C++ string. Returns a **new + * reference**. + */ +static PyObject* make_object(const char* s) { + return Py_BuildValue("s", s); +} + +static PyObject* PyBobIo_Extensions(PyObject*) { + + typedef std::map<std::string, std::string> map_type; + const map_type& table = bob::io::base::CodecRegistry::getExtensions(); + + PyObject* retval = PyDict_New(); + if (!retval) return 0; + auto retval_ = make_safe(retval); + + for (auto it=table.begin(); it!=table.end(); ++it) { + PyObject* pyvalue = make_object(it->second.c_str()); + if (!pyvalue) return 0; + if (PyDict_SetItemString(retval, it->first.c_str(), pyvalue) != 0) { + return 0; + } + } + + Py_INCREF(retval); + return retval; + +} + +PyDoc_STRVAR(s_extensions_str, "extensions"); +PyDoc_STRVAR(s_extensions_doc, +"extensions() -> dict\n\ +\n\ +Returns a dictionary containing all extensions and descriptions\n\ +currently stored on the global codec registry\n\ +"); + static PyMethodDef module_methods[] = { + { + s_extensions_str, + (PyCFunction)PyBobIo_Extensions, + METH_NOARGS, + s_extensions_doc, + }, {0} /* Sentinel */ }; diff --git a/bob/io/base/test_hdf5.py b/bob/io/base/test_hdf5.py index dfd2da2a9efaeecf84198745876deacc85b39d82..d1c02e52370b81ed5185b9a38106a1ce8913ad85 100644 --- a/bob/io/base/test_hdf5.py +++ b/bob/io/base/test_hdf5.py @@ -15,7 +15,6 @@ import random import nose.tools from . import HDF5File, load, save, peek_all, test_utils -from .test_utils import bob_at_least def read_write_check(outfile, dname, data, dtype=None): """Tests scalar input/output on HDF5 files""" @@ -314,7 +313,6 @@ def test_string_support(): del outfile os.unlink(tmpname) -@bob_at_least('1.3.0a0') def test_string_attribute_support(): try: diff --git a/bob/io/base/test_utils.py b/bob/io/base/test_utils.py index 3d9eda934193f44426de94a31e6948f4cc065553..6f1e3072bcd45255dccaabbb3066e6740a349edb 100644 --- a/bob/io/base/test_utils.py +++ b/bob/io/base/test_utils.py @@ -49,31 +49,6 @@ def temporary_filename(prefix='bobtest_', suffix='.hdf5'): os.unlink(name) return name -def bob_at_least(version_geq): - '''Decorator to check if at least a certain version of Bob is installed - - To use this, decorate your test routine with something like: - - .. code-block:: python - - @bob_at_least('1.2.2') - - ''' - - def test_wrapper(test): - - @functools.wraps(test) - def wrapper(*args, **kwargs): - from .version import externals - inst = SV(externals['Bob'][0]) - if inst < version_geq: - raise nose.plugins.skip.SkipTest('Bob version installed (%s) is smaller than required for this test (%s)' % (externals['Bob'][0], version_geq)) - return test(*args, **kwargs) - - return wrapper - - return test_wrapper - def extension_available(extension): '''Decorator to check if a extension is available before enabling a test''' @@ -81,7 +56,7 @@ def extension_available(extension): @functools.wraps(test) def wrapper(*args, **kwargs): - from .version import extensions + from . import extensions if extension in extensions(): return test(*args, **kwargs) else: diff --git a/bob/io/base/version.cpp b/bob/io/base/version.cpp index d545e2888ac633b5a1b694325b4b4f23126acaf4..b3864301badd6f91bd41e53183cd553cd1930e3d 100644 --- a/bob/io/base/version.cpp +++ b/bob/io/base/version.cpp @@ -15,16 +15,15 @@ #include <string> #include <cstdlib> +#include <blitz/blitz.h> #include <boost/preprocessor/stringize.hpp> #include <boost/version.hpp> #include <boost/format.hpp> - -#include <bob/config.h> -#include <bob/io/CodecRegistry.h> +#include <hdf5.h> #include <bob.blitz/capi.h> #include <bob.blitz/cleanup.h> -#include <hdf5.h> +#include <bob.core/config.h> static int dict_set(PyObject* d, const char* key, const char* value) { PyObject* v = Py_BuildValue("s", value); @@ -43,14 +42,6 @@ static int dict_steal(PyObject* d, const char* key, PyObject* value) { return 0; //a problem occurred } -/** - * Creates an str object, from a C or C++ string. Returns a **new - * reference**. - */ -static PyObject* make_object(const char* s) { - return Py_BuildValue("s", s); -} - /*********************************************************** * Version number generation ***********************************************************/ @@ -63,13 +54,6 @@ static PyObject* hdf5_version() { return Py_BuildValue("s", f.str().c_str()); } -/** - * Bob version, API version and platform - */ -static PyObject* bob_version() { - return Py_BuildValue("sis", BOB_VERSION, BOB_API_VERSION, BOB_PLATFORM); -} - /** * Describes the version of Boost libraries installed */ @@ -126,13 +110,19 @@ static PyObject* bob_blitz_version() { return Py_BuildValue("{ss}", "api", BOOST_PP_STRINGIZE(BOB_BLITZ_API_VERSION)); } +/** + * bob.core c/c++ api version + */ +static PyObject* bob_core_version() { + return Py_BuildValue("{ss}", "api", BOOST_PP_STRINGIZE(BOB_BLITZ_API_VERSION)); +} + static PyObject* build_version_dictionary() { PyObject* retval = PyDict_New(); if (!retval) return 0; auto retval_ = make_safe(retval); - if (!dict_steal(retval, "Bob", bob_version())) return 0; if (!dict_steal(retval, "HDF5", hdf5_version())) return 0; if (!dict_steal(retval, "Boost", boost_version())) return 0; if (!dict_steal(retval, "Compiler", compiler_version())) return 0; @@ -140,49 +130,14 @@ static PyObject* build_version_dictionary() { if (!dict_steal(retval, "NumPy", numpy_version())) return 0; if (!dict_set(retval, "Blitz++", BZ_VERSION)) return 0; if (!dict_steal(retval, "bob.blitz", bob_blitz_version())) return 0; + if (!dict_steal(retval, "bob.core", bob_core_version())) return 0; Py_INCREF(retval); Py_INCREF(retval); return retval; } -static PyObject* PyBobIo_Extensions(PyObject*) { - - typedef std::map<std::string, std::string> map_type; - const map_type& table = bob::io::CodecRegistry::getExtensions(); - - PyObject* retval = PyDict_New(); - if (!retval) return 0; - auto retval_ = make_safe(retval); - - for (auto it=table.begin(); it!=table.end(); ++it) { - PyObject* pyvalue = make_object(it->second.c_str()); - if (!pyvalue) return 0; - if (PyDict_SetItemString(retval, it->first.c_str(), pyvalue) != 0) { - return 0; - } - } - - Py_INCREF(retval); - return retval; - -} - -PyDoc_STRVAR(s_extensions_str, "extensions"); -PyDoc_STRVAR(s_extensions_doc, -"extensions() -> dict\n\ -\n\ -Returns a dictionary containing all extensions and descriptions\n\ -currently stored on the global codec registry\n\ -"); - static PyMethodDef module_methods[] = { - { - s_extensions_str, - (PyCFunction)PyBobIo_Extensions, - METH_NOARGS, - s_extensions_doc, - }, {0} /* Sentinel */ }; diff --git a/buildout.cfg b/buildout.cfg index 82733dba98b02b958a8516d1eea547638d4a42cb..798515bd0c47542cf6e75f92a4d13b50927dd8d5 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -10,6 +10,7 @@ extensions = bob.buildout auto-checkout = * develop = src/bob.extension src/bob.blitz + src/bob.core . ; options for bob.buildout extension @@ -21,6 +22,7 @@ prefixes = /idiap/group/torch5spro/releases/preview/install/linux-x86_64-release [sources] bob.extension = git https://github.com/bioidiap/bob.extension bob.blitz = git https://github.com/bioidiap/bob.blitz +bob.core = git https://github.com/bioidiap/bob.core [scripts] recipe = bob.buildout:scripts diff --git a/setup.py b/setup.py index d0737e6ddb8e2827870faa3779f599c5c5f1fdb0..d3eac6e1516469f234f98bcff7b8fcf8692bfa41 100644 --- a/setup.py +++ b/setup.py @@ -4,17 +4,131 @@ # Mon 16 Apr 08:18:08 2012 CEST from setuptools import setup, find_packages, dist -dist.Distribution(dict(setup_requires=['bob.blitz'])) +dist.Distribution(dict(setup_requires=['bob.blitz', 'bob.core'])) +from bob.extension.utils import egrep, find_header, find_library from bob.blitz.extension import Extension +import bob.core import os package_dir = os.path.dirname(os.path.realpath(__file__)) package_dir = os.path.join(package_dir, 'bob', 'io', 'base', 'include') -include_dirs = [package_dir] +include_dirs = [package_dir, bob.core.get_include()] -packages = ['bob-io >= 2.0.0a2'] +packages = ['bob-core >= 1.2.2'] version = '2.0.0a0' +def libhdf5_version(header): + + version = egrep(header, r"#\s*define\s+H5_VERSION\s+\"([\d\.]+)\"") + if not len(version): return None + return version[0].group(1) + +class hdf5: + + def __init__ (self, requirement='', only_static=False): + """ + Searches for libhdf5 in stock locations. Allows user to override. + + If the user sets the environment variable BOB_PREFIX_PATH, that prefixes + the standard path locations. + + Parameters: + + requirement, str + A string, indicating a version requirement for this library. For example, + ``'>= 8.2'``. + + only_static, boolean + A flag, that indicates if we intend to link against the static library + only. This will trigger our library search to disconsider shared + libraries when searching. + """ + + self.name = 'hdf5' + header = 'hdf5.h' + + candidates = find_header(header) + + if not candidates: + raise RuntimeError("could not find %s's `%s' - have you installed %s on this machine?" % (self.name, header, self.name)) + + found = False + + if not requirement: + self.include_directory = os.path.dirname(candidates[0]) + directory = os.path.dirname(candidates[0]) + version_header = os.path.join(directory, 'H5pubconf.h') + self.version = libhdf5_version(version_header) + found = True + + else: + + # requirement is 'operator' 'version' + operator, required = [k.strip() for k in requirement.split(' ', 1)] + + # now check for user requirements + for candidate in candidates: + directory = os.path.dirname(candidate) + version_header = os.path.join(directory, 'H5pubconf.h') + version = libhdf5_version(version_header) + available = LooseVersion(version) + if (operator == '<' and available < required) or \ + (operator == '<=' and available <= required) or \ + (operator == '>' and available > required) or \ + (operator == '>=' and available >= required) or \ + (operator == '==' and available == required): + self.include_directory = os.path.dirname(candidate) + self.version = version + found = True + break + + if not found: + raise RuntimeError("could not find the required (%s) version of %s on the file system (looked at: %s)" % (requirement, self.name, ', '.join(candidates))) + + # normalize + self.include_directory = os.path.normpath(self.include_directory) + + # find library + prefix = os.path.dirname(os.path.dirname(self.include_directory)) + module = 'hdf5' + candidates = find_library(module, version=self.version, prefixes=[prefix], only_static=only_static) + + if not candidates: + raise RuntimeError("cannot find required %s binary module `%s' - make sure libsvm is installed on `%s'" % (self.name, module, prefix)) + + # libraries + self.libraries = [] + name, ext = os.path.splitext(os.path.basename(candidates[0])) + if ext in ['.so', '.a', '.dylib', '.dll']: + self.libraries.append(name[3:]) #strip 'lib' from the name + else: #link against the whole thing + self.libraries.append(':' + os.path.basename(candidates[0])) + + # library path + self.library_directory = os.path.dirname(candidates[0]) + + def macros(self): + return [ + ('HAVE_%s' % self.name.upper(), '1'), + ('%s_VERSION' % self.name.upper(), '"%s"' % self.version), + ] + + +hdf5_pkg = hdf5() + +extra_compile_args = [ + '-isystem', hdf5_pkg.include_directory, + ] + +library_dirs = [ + hdf5_pkg.library_directory, + ] + +libraries = hdf5_pkg.libraries + +define_macros = hdf5_pkg.macros() + + setup( name='bob.io.base', @@ -48,10 +162,32 @@ setup( ], packages = packages, include_dirs = include_dirs, + define_macros = define_macros, + extra_compile_args = extra_compile_args, version = version, ), Extension("bob.io.base._library", [ + "bob/io/base/cpp/CodecRegistry.cpp", + "bob/io/base/cpp/CSVFile.cpp", + "bob/io/base/cpp/File.cpp", + "bob/io/base/cpp/HDF5ArrayFile.cpp", + "bob/io/base/cpp/HDF5Attribute.cpp", + "bob/io/base/cpp/HDF5Dataset.cpp", + "bob/io/base/cpp/HDF5File.cpp", + "bob/io/base/cpp/HDF5Group.cpp", + "bob/io/base/cpp/HDF5Types.cpp", + "bob/io/base/cpp/HDF5Utils.cpp", + "bob/io/base/cpp/reorder.cpp", + "bob/io/base/cpp/T3File.cpp", + "bob/io/base/cpp/TensorArrayFile.cpp", + "bob/io/base/cpp/TensorFileHeader.cpp", + "bob/io/base/cpp/utils.cpp", + "bob/io/base/cpp/TensorFile.cpp", + "bob/io/base/cpp/array.cpp", + "bob/io/base/cpp/array_type.cpp", + "bob/io/base/cpp/blitz_array.cpp", + "bob/io/base/bobskin.cpp", "bob/io/base/codec.cpp", "bob/io/base/file.cpp", @@ -60,6 +196,10 @@ setup( ], packages = packages, include_dirs = include_dirs, + library_dirs = library_dirs, + libraries = libraries, + define_macros = define_macros, + extra_compile_args = extra_compile_args, version = version, ), ],