[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

multi_array_chunked_hdf5.hxx VIGRA

1 /************************************************************************/
2 /* */
3 /* Copyright 2012-2014 by Ullrich Koethe and Thorben Kroeger */
4 /* */
5 /* This file is part of the VIGRA computer vision library. */
6 /* The VIGRA Website is */
7 /* http://hci.iwr.uni-heidelberg.de/vigra/ */
8 /* Please direct questions, bug reports, and contributions to */
9 /* ullrich.koethe@iwr.uni-heidelberg.de or */
10 /* vigra@informatik.uni-hamburg.de */
11 /* */
12 /* Permission is hereby granted, free of charge, to any person */
13 /* obtaining a copy of this software and associated documentation */
14 /* files (the "Software"), to deal in the Software without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of the Software, and to permit persons to whom the */
18 /* Software is furnished to do so, subject to the following */
19 /* conditions: */
20 /* */
21 /* The above copyright notice and this permission notice shall be */
22 /* included in all copies or substantial portions of the */
23 /* Software. */
24 /* */
25 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32 /* OTHER DEALINGS IN THE SOFTWARE. */
33 /* */
34 /************************************************************************/
35 
36 #ifndef VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
37 #define VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
38 
39 #include <queue>
40 
41 #include "multi_array_chunked.hxx"
42 #include "hdf5impex.hxx"
43 
44 // Bounds checking Macro used if VIGRA_CHECK_BOUNDS is defined.
45 #ifdef VIGRA_CHECK_BOUNDS
46 #define VIGRA_ASSERT_INSIDE(diff) \
47  vigra_precondition(this->isInside(diff), "Index out of bounds")
48 #else
49 #define VIGRA_ASSERT_INSIDE(diff)
50 #endif
51 
52 namespace vigra {
53 
54 template <unsigned int N, class T, class Alloc = std::allocator<T> >
55 class ChunkedArrayHDF5
56 : public ChunkedArray<N, T>
57 {
58  public:
59 
60  class Chunk
61  : public ChunkBase<N, T>
62  {
63  public:
64  typedef typename MultiArrayShape<N>::type shape_type;
65  typedef T value_type;
66  typedef value_type * pointer;
67  typedef value_type & reference;
68 
69  Chunk(shape_type const & shape, shape_type const & start,
70  ChunkedArrayHDF5 * array, Alloc const & alloc)
71  : ChunkBase<N, T>(detail::defaultStride(shape))
72  , shape_(shape)
73  , start_(start)
74  , array_(array)
75  , alloc_(alloc)
76  {}
77 
78  ~Chunk()
79  {
80  write();
81  }
82 
83  std::size_t size() const
84  {
85  return prod(shape_);
86  }
87 
88  void write(bool deallocate = true)
89  {
90  if(this->pointer_ != 0)
91  {
92  if(!array_->file_.isReadOnly())
93  {
94  herr_t status = array_->file_.writeBlock(array_->dataset_, start_,
95  MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
96  vigra_postcondition(status >= 0,
97  "ChunkedArrayHDF5: write to dataset failed.");
98  }
99  if(deallocate)
100  {
101  alloc_.deallocate(this->pointer_, this->size());
102  this->pointer_ = 0;
103  }
104  }
105  }
106 
107  pointer read()
108  {
109  if(this->pointer_ == 0)
110  {
111  this->pointer_ = alloc_.allocate(this->size());
112  herr_t status = array_->file_.readBlock(array_->dataset_, start_, shape_,
113  MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
114  vigra_postcondition(status >= 0,
115  "ChunkedArrayHDF5: read from dataset failed.");
116  }
117  return this->pointer_;
118  }
119 
120  shape_type shape_, start_;
121  ChunkedArrayHDF5 * array_;
122  Alloc alloc_;
123 
124  private:
125  Chunk & operator=(Chunk const &);
126  };
127 
128  typedef ChunkedArray<N, T> base_type;
129  typedef MultiArray<N, SharedChunkHandle<N, T> > ChunkStorage;
130  typedef typename ChunkStorage::difference_type shape_type;
131  typedef T value_type;
132  typedef value_type * pointer;
133  typedef value_type & reference;
134 
135  ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
136  HDF5File::OpenMode mode,
137  shape_type const & shape,
138  shape_type const & chunk_shape=shape_type(),
139  ChunkedArrayOptions const & options = ChunkedArrayOptions(),
140  Alloc const & alloc = Alloc())
141  : ChunkedArray<N, T>(shape, chunk_shape, options),
142  file_(file),
143  dataset_name_(dataset),
144  dataset_(),
145  compression_(options.compression_method),
146  alloc_(alloc)
147  {
148  init(mode);
149  }
150 
151  ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
152  HDF5File::OpenMode mode = HDF5File::OpenReadOnly,
153  ChunkedArrayOptions const & options = ChunkedArrayOptions(),
154  Alloc const & alloc = Alloc())
155  : ChunkedArray<N, T>(shape_type(), shape_type(), options),
156  file_(file),
157  dataset_name_(dataset),
158  dataset_(),
159  compression_(options.compression_method),
160  alloc_(alloc)
161  {
162  init(mode);
163  }
164 
165  void init(HDF5File::OpenMode mode)
166  {
167  bool exists = file_.existsDataset(dataset_name_);
168 
169  if(mode == HDF5File::Replace)
170  {
171  mode = HDF5File::New;
172  }
173  else if(mode == HDF5File::Default)
174  {
175  if(exists)
176  mode = HDF5File::ReadOnly;
177  else
178  mode = HDF5File::New;
179  }
180 
181  if(mode == HDF5File::ReadOnly)
182  file_.setReadOnly();
183  else
184  vigra_precondition(!file_.isReadOnly(),
185  "ChunkedArrayHDF5(): 'mode' is incompatible with read-only file.");
186 
187  vigra_precondition(exists || !file_.isReadOnly(),
188  "ChunkedArrayHDF5(): dataset does not exist, but file is read-only.");
189 
190  if(!exists || mode == HDF5File::New)
191  {
192  // FIXME: set rdcc_nbytes to 0 (disable cache, because we don't
193  // need two caches
194  // H5Pset_chunk_cache (dapl, rdcc_nslots, rdcc_nbytes, rdcc_w0);
195  // Chunk cache size (rdcc_nbytes) should be large
196  // enough to hold all the chunks in a selection
197  // • If this is not possible, it may be best to disable chunk
198  // caching altogether (set rdcc_nbytes to 0)
199  // • rdcc_slots should be a prime number that is at
200  // least 10 to 100 times the number of chunks that can fit
201  // into rdcc_nbytes
202  // • rdcc_w0 should be set to 1 if chunks that have been
203  // fully read/written will never be read/written again
204  //
205  // the above may be WRONG in general - it may only apply if the
206  // chunk size in the file matches the chunk size in the CachedArray.
207  // Otherwise, make sure that the file cache can hold at least as many
208  // chunks as are needed for a single array chunk.
209  if(compression_ == DEFAULT_COMPRESSION)
210  compression_ = ZLIB_FAST;
211  vigra_precondition(compression_ != LZ4,
212  "ChunkedArrayHDF5(): HDF5 does not support LZ4 compression.");
213 
214  vigra_precondition(this->size() > 0,
215  "ChunkedArrayHDF5(): invalid shape.");
216  typename detail::HDF5TypeTraits<T>::value_type init(this->fill_scalar_);
217  dataset_ = file_.createDataset<N, T>(dataset_name_,
218  this->shape_,
219  init,
220  this->chunk_shape_,
221  compression_);
222  }
223  else
224  {
225  dataset_ = file_.getDatasetHandleShared(dataset_name_);
226 
227  // check shape
228  ArrayVector<hsize_t> fileShape(file_.getDatasetShape(dataset_name_));
229  typedef detail::HDF5TypeTraits<T> TypeTraits;
230  if(TypeTraits::numberOfBands() > 1)
231  {
232  vigra_precondition(fileShape.size() == N+1,
233  "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
234  vigra_precondition(fileShape[0] == TypeTraits::numberOfBands(),
235  "ChunkedArrayHDF5(file, dataset): dataset has wrong number of bands.");
236  shape_type shape(fileShape.begin()+1);
237  if(this->size() > 0)
238  {
239  vigra_precondition(shape == this->shape_,
240  "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
241  }
242  else
243  {
244  this->shape_ = shape;
245  }
246  }
247  else
248  {
249  vigra_precondition(fileShape.size() == N,
250  "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
251  shape_type shape(fileShape.begin());
252  if(this->size() > 0)
253  {
254  vigra_precondition(shape == this->shape_,
255  "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
256  }
257  else
258  {
259  this->shape_ = shape;
260  ChunkStorage(detail::computeChunkArrayShape(shape, this->bits_, this->mask_)).swap(this->handle_array_);
261  }
262  }
263  typename ChunkStorage::iterator i = this->handle_array_.begin(),
264  end = this->handle_array_.end();
265  for(; i != end; ++i)
266  {
267  i->chunk_state_.store(base_type::chunk_asleep);
268  }
269  }
270  }
271 
272  ~ChunkedArrayHDF5()
273  {
274  closeImpl(true);
275  }
276 
277  void close()
278  {
279  closeImpl(false);
280  }
281 
282  void closeImpl(bool force_destroy)
283  {
284  flushToDiskImpl(true, force_destroy);
285  file_.close();
286  }
287 
288  void flushToDisk()
289  {
290  flushToDiskImpl(false, false);
291  }
292 
293  void flushToDiskImpl(bool destroy, bool force_destroy)
294  {
295  if(file_.isReadOnly())
296  return;
297 
298  threading::lock_guard<threading::mutex> guard(*this->chunk_lock_);
299  typename ChunkStorage::iterator i = this->handle_array_.begin(),
300  end = this->handle_array_.end();
301  if(destroy && !force_destroy)
302  {
303  for(; i != end; ++i)
304  {
305  vigra_precondition(i->chunk_state_.load() <= 0,
306  "ChunkedArrayHDF5::close(): cannot close file because there are active chunks.");
307  }
308  i = this->handle_array_.begin();
309  }
310  for(; i != end; ++i)
311  {
312  Chunk * chunk = static_cast<Chunk*>(i->pointer_);
313  if(!chunk)
314  continue;
315  if(destroy)
316  {
317  delete chunk;
318  i->pointer_ = 0;
319  }
320  else
321  {
322  chunk->write(false);
323  }
324  }
325  file_.flushToDisk();
326  }
327 
328  virtual bool isReadOnly() const
329  {
330  return file_.isReadOnly();
331  }
332 
333  virtual pointer loadChunk(ChunkBase<N, T> ** p, shape_type const & index)
334  {
335  vigra_precondition(file_.isOpen(),
336  "ChunkedArrayHDF5::loadChunk(): file was already closed.");
337  if(*p == 0)
338  {
339  *p = new Chunk(this->chunkShape(index), index*this->chunk_shape_, this, alloc_);
340  this->overhead_bytes_ += sizeof(Chunk);
341  }
342  return static_cast<Chunk *>(*p)->read();
343  }
344 
345  virtual bool unloadChunk(ChunkBase<N, T> * chunk, bool /* destroy */)
346  {
347  if(!file_.isOpen())
348  return true;
349  static_cast<Chunk *>(chunk)->write();
350  return false;
351  }
352 
353  virtual std::string backend() const
354  {
355  return "ChunkedArrayHDF5<'" + file_.filename() + "/" + dataset_name_ + "'>";
356  }
357 
358  virtual std::size_t dataBytes(ChunkBase<N,T> * c) const
359  {
360  return c->pointer_ == 0
361  ? 0
362  : static_cast<Chunk*>(c)->size()*sizeof(T);
363  }
364 
365  virtual std::size_t overheadBytesPerChunk() const
366  {
367  return sizeof(Chunk) + sizeof(SharedChunkHandle<N, T>);
368  }
369 
370  std::string fileName() const
371  {
372  return file_.filename();
373  }
374 
375  std::string datasetName() const
376  {
377  return dataset_name_;
378  }
379 
380  HDF5File file_;
381  std::string dataset_name_;
382  HDF5HandleShared dataset_;
383  CompressionMethod compression_;
384  Alloc alloc_;
385 };
386 
387 } // namespace vigra
388 
389 #undef VIGRA_ASSERT_INSIDE
390 
391 #endif /* VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX */

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.10.0 (Thu Jan 8 2015)