[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

multi_array_chunked_hdf5.hxx VIGRA

1 /************************************************************************/
2 /* */
3 /* Copyright 2012-2014 by Ullrich Koethe and Thorben Kroeger */
4 /* */
5 /* This file is part of the VIGRA computer vision library. */
6 /* The VIGRA Website is */
7 /* http://hci.iwr.uni-heidelberg.de/vigra/ */
8 /* Please direct questions, bug reports, and contributions to */
9 /* ullrich.koethe@iwr.uni-heidelberg.de or */
10 /* vigra@informatik.uni-hamburg.de */
11 /* */
12 /* Permission is hereby granted, free of charge, to any person */
13 /* obtaining a copy of this software and associated documentation */
14 /* files (the "Software"), to deal in the Software without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of the Software, and to permit persons to whom the */
18 /* Software is furnished to do so, subject to the following */
19 /* conditions: */
20 /* */
21 /* The above copyright notice and this permission notice shall be */
22 /* included in all copies or substantial portions of the */
23 /* Software. */
24 /* */
25 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32 /* OTHER DEALINGS IN THE SOFTWARE. */
33 /* */
34 /************************************************************************/
35 
36 #ifndef VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
37 #define VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
38 
39 #include <queue>
40 
41 #include "multi_array_chunked.hxx"
42 #include "hdf5impex.hxx"
43 
44 // Bounds checking Macro used if VIGRA_CHECK_BOUNDS is defined.
45 #ifdef VIGRA_CHECK_BOUNDS
46 #define VIGRA_ASSERT_INSIDE(diff) \
47  vigra_precondition(this->isInside(diff), "Index out of bounds")
48 #else
49 #define VIGRA_ASSERT_INSIDE(diff)
50 #endif
51 
52 namespace vigra {
53 
54 /** \addtogroup ChunkedArrayClasses
55 */
56 //@{
57 
58 /** \weakgroup ParallelProcessing
59  \sa ChunkedArrayHDF5
60 */
61 
62 /** Implement ChunkedArray as a chunked dataset in an HDF5 file.
63 
64  <b>\#include</b> <vigra/multi_array_chunked_hdf5.hxx> <br/>
65  Namespace: vigra
66 
67  This uses the native chunking and compression functionality provided by the
68  HDF5 library. Note: This file must only be included when the HDF5 headers
69  and libraries are installed on the system.
70 */
71 template <unsigned int N, class T, class Alloc = std::allocator<T> >
73 : public ChunkedArray<N, T>
74 {
75  /* REMARKS
76  Alternatives are:
77  * Back chunks by HDF5 chunks, possibly using on-the-fly compression. This
78  is in particular useful for existing HDF5 files.
79  * Back chunks by HDF5 datasets. This can be combined with compression
80  (both explicit and on-the-fly) or with memory mapping (using the
81  function H5Dget_offset() to get the offset from the beginning of the file).
82  */
83 
84  public:
85 
86  class Chunk
87  : public ChunkBase<N, T>
88  {
89  public:
90  typedef typename MultiArrayShape<N>::type shape_type;
91  typedef T value_type;
92  typedef value_type * pointer;
93  typedef value_type & reference;
94 
95  Chunk(shape_type const & shape, shape_type const & start,
96  ChunkedArrayHDF5 * array, Alloc const & alloc)
97  : ChunkBase<N, T>(detail::defaultStride(shape))
98  , shape_(shape)
99  , start_(start)
100  , array_(array)
101  , alloc_(alloc)
102  {}
103 
104  ~Chunk()
105  {
106  write();
107  }
108 
109  std::size_t size() const
110  {
111  return prod(shape_);
112  }
113 
114  void write(bool deallocate = true)
115  {
116  if(this->pointer_ != 0)
117  {
118  if(!array_->file_.isReadOnly())
119  {
120  herr_t status = array_->file_.writeBlock(array_->dataset_, start_,
121  MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
122  vigra_postcondition(status >= 0,
123  "ChunkedArrayHDF5: write to dataset failed.");
124  }
125  if(deallocate)
126  {
127  alloc_.deallocate(this->pointer_, this->size());
128  this->pointer_ = 0;
129  }
130  }
131  }
132 
133  pointer read()
134  {
135  if(this->pointer_ == 0)
136  {
137  this->pointer_ = alloc_.allocate(this->size());
138  herr_t status = array_->file_.readBlock(array_->dataset_, start_, shape_,
139  MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
140  vigra_postcondition(status >= 0,
141  "ChunkedArrayHDF5: read from dataset failed.");
142  }
143  return this->pointer_;
144  }
145 
146  shape_type shape_, start_;
147  ChunkedArrayHDF5 * array_;
148  Alloc alloc_;
149 
150  private:
151  Chunk & operator=(Chunk const &);
152  };
153 
154  typedef ChunkedArray<N, T> base_type;
156  typedef typename ChunkStorage::difference_type shape_type;
157  typedef T value_type;
158  typedef value_type * pointer;
159  typedef value_type & reference;
160 
161  /** \brief Construct with given 'shape', 'chunk_shape' and 'options',
162  using 'alloc' to manage the in-memory version of the data..
163 
164  The data are placed in 'file' at the internal path 'dataset'. Argument
165  'mode' must be one of the following:
166  <ul>
167  <li>HDF5File::New: Create new dataset, possibly deleting any existing content.
168  It is an error to request this mode when the entire
169  'file' is read-only.
170  <li>HDF5File::Replace: Same as New.
171  <li>HDF5File::ReadWrite: Open the dataset for reading and writing. Create
172  the datset if it doesn't exist. It is an error
173  to request this mode when 'file' is read-only.
174  <li>HDF5File::ReadOnly: Open the dataset for reading. It is an error to
175  request this mode when the dataset doesn't exist.
176  <li>HDF5File::Default: Resolves to ReadOnly when the dataset exists, and
177  to New otherwise.
178  </ul>
179  The supported compression algorithms are:
180  <ul>
181  <li>ZLIB_FAST: Fast compression using 'zlib' (slower than LZ4, but higher compression).
182  <li>ZLIB_BEST: Best compression using 'zlib', slow.
183  <li>ZLIB_NONE: Use 'zlib' format without compression.
184  <li>DEFAULT_COMPRESSION: Same as ZLIB_FAST.
185  </ul>
186  */
187  ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
188  HDF5File::OpenMode mode,
189  shape_type const & shape,
190  shape_type const & chunk_shape=shape_type(),
191  ChunkedArrayOptions const & options = ChunkedArrayOptions(),
192  Alloc const & alloc = Alloc())
193  : ChunkedArray<N, T>(shape, chunk_shape, options),
194  file_(file),
195  dataset_name_(dataset),
196  dataset_(),
197  compression_(options.compression_method),
198  alloc_(alloc)
199  {
200  init(mode);
201  }
202 
203  /** \brief Construct for an already existing dataset with given 'options',
204  using 'alloc' to manage the in-memory version of the data.
205 
206  The data must be located in 'file' at the internal path 'dataset'. The
207  array's shape and chunk_shape are read from the file. It is an error
208  to use this constructor when 'dataset' doesn't exist.
209 
210  Argument 'mode' must be one of the following:
211  <ul>
212  <li>HDF5File::ReadWrite: Open the dataset for reading and writing. It is an error
213  to request this mode when 'file' is read-only.
214  <li>HDF5File::ReadOnly: Open the dataset for reading (default).
215  <li>HDF5File::Default: Same as ReadOnly.
216  </ul>
217  The supported compression algorithms are:
218  <ul>
219  <li>ZLIB_FAST: Fast compression using 'zlib' (slower than LZ4, but higher compression).
220  <li>ZLIB_BEST: Best compression using 'zlib', slow.
221  <li>ZLIB_NONE: Use 'zlib' format without compression.
222  <li>DEFAULT_COMPRESSION: Same as ZLIB_FAST.
223  </ul>
224  */
225  ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
226  HDF5File::OpenMode mode = HDF5File::ReadOnly,
227  ChunkedArrayOptions const & options = ChunkedArrayOptions(),
228  Alloc const & alloc = Alloc())
229  : ChunkedArray<N, T>(shape_type(), shape_type(), options),
230  file_(file),
231  dataset_name_(dataset),
232  dataset_(),
233  compression_(options.compression_method),
234  alloc_(alloc)
235  {
236  init(mode);
237  }
238 
239  void init(HDF5File::OpenMode mode)
240  {
241  bool exists = file_.existsDataset(dataset_name_);
242 
243  if(mode == HDF5File::Replace)
244  {
245  mode = HDF5File::New;
246  }
247  else if(mode == HDF5File::Default)
248  {
249  if(exists)
250  mode = HDF5File::ReadOnly;
251  else
252  mode = HDF5File::New;
253  }
254 
255  if(mode == HDF5File::ReadOnly)
256  file_.setReadOnly();
257  else
258  vigra_precondition(!file_.isReadOnly(),
259  "ChunkedArrayHDF5(): 'mode' is incompatible with read-only file.");
260 
261  vigra_precondition(exists || !file_.isReadOnly(),
262  "ChunkedArrayHDF5(): dataset does not exist, but file is read-only.");
263 
264  if(!exists || mode == HDF5File::New)
265  {
266  // FIXME: set rdcc_nbytes to 0 (disable cache, because we don't
267  // need two caches
268  // H5Pset_chunk_cache (dapl, rdcc_nslots, rdcc_nbytes, rdcc_w0);
269  // Chunk cache size (rdcc_nbytes) should be large
270  // enough to hold all the chunks in a selection
271  // * If this is not possible, it may be best to disable chunk
272  // caching altogether (set rdcc_nbytes to 0)
273  // * rdcc_slots should be a prime number that is at
274  // least 10 to 100 times the number of chunks that can fit
275  // into rdcc_nbytes
276  // * rdcc_w0 should be set to 1 if chunks that have been
277  // fully read/written will never be read/written again
278  //
279  // the above may be WRONG in general - it may only apply if the
280  // chunk size in the file matches the chunk size in the CachedArray.
281  // Otherwise, make sure that the file cache can hold at least as many
282  // chunks as are needed for a single array chunk.
283  if(compression_ == DEFAULT_COMPRESSION)
284  compression_ = ZLIB_FAST;
285  vigra_precondition(compression_ != LZ4,
286  "ChunkedArrayHDF5(): HDF5 does not support LZ4 compression.");
287 
288  vigra_precondition(this->size() > 0,
289  "ChunkedArrayHDF5(): invalid shape.");
290  typename detail::HDF5TypeTraits<T>::value_type init(this->fill_scalar_);
291  dataset_ = file_.createDataset<N, T>(dataset_name_,
292  this->shape_,
293  init,
294  this->chunk_shape_,
295  compression_);
296  }
297  else
298  {
299  dataset_ = file_.getDatasetHandleShared(dataset_name_);
300 
301  // check shape
302  ArrayVector<hsize_t> fileShape(file_.getDatasetShape(dataset_name_));
303  typedef detail::HDF5TypeTraits<T> TypeTraits;
304  if(TypeTraits::numberOfBands() > 1)
305  {
306  vigra_precondition(fileShape.size() == N+1,
307  "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
308  vigra_precondition(fileShape[0] == static_cast<unsigned>(TypeTraits::numberOfBands()),
309  "ChunkedArrayHDF5(file, dataset): dataset has wrong number of bands.");
310  shape_type shape(fileShape.begin()+1);
311  if(this->size() > 0)
312  {
313  vigra_precondition(shape == this->shape_,
314  "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
315  }
316  else
317  {
318  this->shape_ = shape;
319  }
320  }
321  else
322  {
323  vigra_precondition(fileShape.size() == N,
324  "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
325  shape_type shape(fileShape.begin());
326  if(this->size() > 0)
327  {
328  vigra_precondition(shape == this->shape_,
329  "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
330  }
331  else
332  {
333  this->shape_ = shape;
334  ChunkStorage(detail::computeChunkArrayShape(shape, this->bits_, this->mask_)).swap(this->handle_array_);
335  }
336  }
337  typename ChunkStorage::iterator i = this->handle_array_.begin(),
338  end = this->handle_array_.end();
339  for(; i != end; ++i)
340  {
341  i->chunk_state_.store(base_type::chunk_asleep);
342  }
343  }
344  }
345 
347  {
348  closeImpl(true);
349  }
350 
351  void close()
352  {
353  closeImpl(false);
354  }
355 
356  void closeImpl(bool force_destroy)
357  {
358  flushToDiskImpl(true, force_destroy);
359  file_.close();
360  }
361 
362  void flushToDisk()
363  {
364  flushToDiskImpl(false, false);
365  }
366 
367  void flushToDiskImpl(bool destroy, bool force_destroy)
368  {
369  if(file_.isReadOnly())
370  return;
371 
372  threading::lock_guard<threading::mutex> guard(*this->chunk_lock_);
373  typename ChunkStorage::iterator i = this->handle_array_.begin(),
374  end = this->handle_array_.end();
375  if(destroy && !force_destroy)
376  {
377  for(; i != end; ++i)
378  {
379  vigra_precondition(i->chunk_state_.load() <= 0,
380  "ChunkedArrayHDF5::close(): cannot close file because there are active chunks.");
381  }
382  i = this->handle_array_.begin();
383  }
384  for(; i != end; ++i)
385  {
386  Chunk * chunk = static_cast<Chunk*>(i->pointer_);
387  if(!chunk)
388  continue;
389  if(destroy)
390  {
391  delete chunk;
392  i->pointer_ = 0;
393  }
394  else
395  {
396  chunk->write(false);
397  }
398  }
399  file_.flushToDisk();
400  }
401 
402  virtual bool isReadOnly() const
403  {
404  return file_.isReadOnly();
405  }
406 
407  virtual pointer loadChunk(ChunkBase<N, T> ** p, shape_type const & index)
408  {
409  vigra_precondition(file_.isOpen(),
410  "ChunkedArrayHDF5::loadChunk(): file was already closed.");
411  if(*p == 0)
412  {
413  *p = new Chunk(this->chunkShape(index), index*this->chunk_shape_, this, alloc_);
414  this->overhead_bytes_ += sizeof(Chunk);
415  }
416  return static_cast<Chunk *>(*p)->read();
417  }
418 
419  virtual bool unloadChunk(ChunkBase<N, T> * chunk, bool /* destroy */)
420  {
421  if(!file_.isOpen())
422  return true;
423  static_cast<Chunk *>(chunk)->write();
424  return false;
425  }
426 
427  virtual std::string backend() const
428  {
429  return "ChunkedArrayHDF5<'" + file_.filename() + "/" + dataset_name_ + "'>";
430  }
431 
432  virtual std::size_t dataBytes(ChunkBase<N,T> * c) const
433  {
434  return c->pointer_ == 0
435  ? 0
436  : static_cast<Chunk*>(c)->size()*sizeof(T);
437  }
438 
439  virtual std::size_t overheadBytesPerChunk() const
440  {
441  return sizeof(Chunk) + sizeof(SharedChunkHandle<N, T>);
442  }
443 
444  std::string fileName() const
445  {
446  return file_.filename();
447  }
448 
449  std::string datasetName() const
450  {
451  return dataset_name_;
452  }
453 
454  HDF5File file_;
455  std::string dataset_name_;
456  HDF5HandleShared dataset_;
457  CompressionMethod compression_;
458  Alloc alloc_;
459 };
460 
461 //@}
462 
463 } // namespace vigra
464 
465 #undef VIGRA_ASSERT_INSIDE
466 
467 #endif /* VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX */
iterator end()
Definition: multi_array.hxx:1883
std::size_t dataBytes() const
Bytes of main memory occupied by the array's data.
Definition: multi_array_chunked.hxx:1674
Option object for ChunkedArray construction.
Definition: multi_array_chunked.hxx:1267
ArrayVector< hsize_t > getDatasetShape(std::string datasetName) const
Get the shape of each dimension of a certain dataset.
Definition: hdf5impex.hxx:1371
MultiArrayIndex size() const
Return the number of elements in this array.
ChunkedArrayHDF5(HDF5File const &file, std::string const &dataset, HDF5File::OpenMode mode=HDF5File::ReadOnly, ChunkedArrayOptions const &options=ChunkedArrayOptions(), Alloc const &alloc=Alloc())
Construct for an already existing dataset with given 'options', using 'alloc' to manage the in-memory...
Definition: multi_array_chunked_hdf5.hxx:225
view_type::iterator iterator
Definition: multi_array.hxx:2494
iterator begin()
Definition: multi_array.hxx:1867
Main MultiArray class containing the memory management.
Definition: multi_array.hxx:2420
shape_type const & shape() const
Return the shape in this array.
HDF5HandleShared getDatasetHandleShared(std::string const &datasetName) const
Obtain a shared HDF5 handle of a dataset.
Definition: hdf5impex.hxx:1466
Interface and base class for chunked arrays.
Definition: multi_array_chunked.hxx:463
HDF5HandleShared createDataset(std::string datasetName, TinyVector< MultiArrayIndex, N > const &shape, typename detail::HDF5TypeTraits< T >::value_type init=typename detail::HDF5TypeTraits< T >::value_type(), TinyVector< MultiArrayIndex, N > const &chunkSize=(TinyVector< MultiArrayIndex, N >()), int compressionParameter=0)
Create a new dataset. This function can be used to create a dataset filled with a default value init...
Definition: hdf5impex.hxx:2709
view_type::difference_type difference_type
Definition: multi_array.hxx:2468
shape_type const & chunkShape() const
Return the global chunk shape.
std::string filename() const
Get the name of the associated file.
Definition: hdf5impex.hxx:1324
NumericTraits< V >::Promote prod(TinyVectorBase< V, SIZE, D1, D2 > const &l)
product of the vector's elements
Definition: tinyvector.hxx:2097
Definition: multi_array_chunked_hdf5.hxx:72
ChunkedArrayHDF5(HDF5File const &file, std::string const &dataset, HDF5File::OpenMode mode, shape_type const &shape, shape_type const &chunk_shape=shape_type(), ChunkedArrayOptions const &options=ChunkedArrayOptions(), Alloc const &alloc=Alloc())
Construct with given 'shape', 'chunk_shape' and 'options', using 'alloc' to manage the in-memory vers...
Definition: multi_array_chunked_hdf5.hxx:187
iterator end()
Create the end iterator for scan-order iteration over the entire chunked array.
Definition: multi_array_chunked.hxx:2389
OpenMode
Set how a file is opened.
Definition: hdf5impex.hxx:1015
void flushToDisk()
Immediately write all data to disk.
Definition: hdf5impex.hxx:2173
Class for fixed size vectors.This class contains an array of size SIZE of the specified VALUETYPE...
Definition: accessor.hxx:940
void close()
Close the current file.
Definition: hdf5impex.hxx:1176
Base class for, and view to, vigra::MultiArray.
Definition: multi_array.hxx:650
virtual std::size_t overheadBytesPerChunk() const
Bytes of main memory needed to manage a single chunk.
Definition: multi_array_chunked_hdf5.hxx:439
bool existsDataset(std::string datasetName) const
Check if given datasetName exists.
Definition: hdf5impex.hxx:1331
Access to HDF5 files.
Definition: hdf5impex.hxx:958

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.11.0 (Thu Mar 17 2016)