bes Updated for version 3.20.10
build_dmrpp.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the Hyrax data server.
4
5// Copyright (c) 2018 OPeNDAP, Inc.
6// Author: James Gallagher <jgallagher@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#include "config.h"
25
26#include <iostream>
27#include <fstream>
28#include <sstream>
29#include <memory>
30#include <iterator>
31#include <algorithm>
32
33#include <unistd.h>
34#include <cstdlib>
35#include <libgen.h>
36
37#include <H5Ppublic.h>
38#include <H5Dpublic.h>
39#include <H5Epublic.h>
40#include <H5Zpublic.h> // Constants for compression filters
41#include <H5Spublic.h>
42#include "h5common.h"
43
44//#include <libdap/D4Attributes.h>
45#include <libdap/Array.h>
46#include <libdap/util.h>
47
48
49#if 0
50/*
51 * "Generic" chunk record. Each chunk is keyed by the minimum logical
52 * N-dimensional coordinates and the datatype size of the chunk.
53 * The fastest-varying dimension is assumed to reference individual bytes of
54 * the array, so a 100-element 1-D array of 4-byte integers would really be a
55 * 2-D array with the slow varying dimension of size 100 and the fast varying
56 * dimension of size 4 (the storage dimensionality has very little to do with
57 * the real dimensionality).
58 *
59 * The chunk's file address, filter mask and size on disk are not key values.
60 */
61typedef struct H5D_chunk_rec_t {
62 hsize_t scaled[H5O_LAYOUT_NDIMS]; /* Logical offset to start */
63 uint32_t nbytes; /* Size of stored data */
64 uint32_t filter_mask; /* Excluded filters */
65 haddr_t chunk_addr; /* Address of chunk in file */
66} H5D_chunk_rec_t;
67#endif
68
69//#include <DMRpp.h>
70#include <libdap/D4Attributes.h>
71#include <libdap/BaseType.h>
72#include <libdap/D4ParserSax2.h>
73//#include <GetOpt.h>
74
75//#include <BESDapNames.h>
76#include <TheBESKeys.h>
77#include <BESUtil.h>
78#include <BESDebug.h>
79
80#include <BESError.h>
81#include <BESNotFoundError.h>
82#include <BESInternalError.h>
83#include <BESDataHandlerInterface.h>
84
85#include "DMRpp.h"
86#include "DmrppTypeFactory.h"
87#include "DmrppD4Group.h"
88#include "DmrppMetadataStore.h"
89//#include "BESDapNames.h"
90#if 0
91//#define H5D_FRIEND // Workaround, needed to use H5D_chunk_rec_t
92//#include <H5Dpkg.h>
93#define H5S_MAX_RANK 32
94#define H5O_LAYOUT_NDIMS (H5S_MAX_RANK+1)
95
96/*
97 * "Generic" chunk record. Each chunk is keyed by the minimum logical
98 * N-dimensional coordinates and the datatype size of the chunk.
99 * The fastest-varying dimension is assumed to reference individual bytes of
100 * the array, so a 100-element 1-D array of 4-byte integers would really be a
101 * 2-D array with the slow varying dimension of size 100 and the fast varying
102 * dimension of size 4 (the storage dimensionality has very little to do with
103 * the real dimensionality).
104 *
105 * The chunk's file address, filter mask and size on disk are not key values.
106 */
107typedef struct H5D_chunk_rec_t {
108 hsize_t scaled[H5O_LAYOUT_NDIMS]; /* Logical offset to start */
109 uint32_t nbytes; /* Size of stored data */
110 uint32_t filter_mask; /* Excluded filters */
111 haddr_t chunk_addr; /* Address of chunk in file */
112} H5D_chunk_rec_t;
113#endif
114
115using namespace std;
116using namespace libdap;
117using namespace dmrpp;
118
119static bool verbose = false;
120#define VERBOSE(x) do { if (verbose) x; } while(false)
121
122#define DEBUG_KEY "metadata_store,dmrpp_store,dmrpp"
123#define ROOT_DIRECTORY "BES.Catalog.catalog.RootDirectory"
124
125#if 0
135
136void get_data(hid_t dset, void *buf)
137{
138 BESDEBUG("h5", ">get_data()" << endl);
139
140 hid_t dtype = -1;
141 if ((dtype = H5Dget_type(dset)) < 0) {
142 throw InternalErr(__FILE__, __LINE__, "Failed to get the datatype of the dataset");
143 }
144 hid_t dspace = -1;
145 if ((dspace = H5Dget_space(dset)) < 0) {
146 H5Tclose(dtype);
147 throw InternalErr(__FILE__, __LINE__, "Failed to get the data space of the dataset");
148 }
149 // Use HDF5 H5Tget_native_type API
150 hid_t memtype = H5Tget_native_type(dtype, H5T_DIR_ASCEND);
151 if (memtype < 0) {
152 H5Tclose(dtype);
153 H5Sclose(dspace);
154 throw InternalErr(__FILE__, __LINE__, "failed to get memory type");
155 }
156
157 if (H5Dread(dset, memtype, dspace, dspace, H5P_DEFAULT, buf)
158 < 0) {
159 H5Tclose(dtype);
160 H5Tclose(memtype);
161 H5Sclose(dspace);
162 throw InternalErr(__FILE__, __LINE__, "failed to read data");
163 }
164
165 if (H5Tclose(dtype) < 0){
166 H5Tclose(memtype);
167 H5Sclose(dspace);
168 throw InternalErr(__FILE__, __LINE__, "Unable to release the dtype.");
169 }
170
171 if (H5Tclose(memtype) < 0){
172 H5Sclose(dspace);
173 throw InternalErr(__FILE__, __LINE__, "Unable to release the memtype.");
174 }
175
176 if(H5Sclose(dspace)<0) {
177 throw InternalErr(__FILE__, __LINE__, "Unable to release the data space.");
178 }
179#if 0
180 // Supposed to release the resource at the release at the HDF5Array destructor.
181 //if (H5Dclose(dset) < 0){
182 // throw InternalErr(__FILE__, __LINE__, "Unable to close the dataset.");
183 //}
184 }
185#endif
186
187 BESDEBUG("h5", "<get_data()" << endl);
188}
189
190bool read_vlen_string(hid_t dsetid, int nelms, hsize_t *hoffset, hsize_t *hstep, hsize_t *hcount,vector<string> &finstrval)
191{
192
193 hid_t dspace = -1;
194 hid_t mspace = -1;
195 hid_t dtypeid = -1;
196 hid_t memtype = -1;
197 bool is_scalar = false;
198
199
200 if ((dspace = H5Dget_space(dsetid))<0) {
201 throw InternalErr (__FILE__, __LINE__, "Cannot obtain data space.");
202 }
203
204 if(H5S_SCALAR == H5Sget_simple_extent_type(dspace))
205 is_scalar = true;
206
207
208 if (false == is_scalar) {
209 if (H5Sselect_hyperslab(dspace, H5S_SELECT_SET,
210 hoffset, hstep,
211 hcount, NULL) < 0) {
212 H5Sclose(dspace);
213 throw InternalErr (__FILE__, __LINE__, "Cannot generate the hyperslab of the HDF5 dataset.");
214 }
215
216 int d_num_dim = H5Sget_simple_extent_ndims(dspace);
217 if(d_num_dim < 0) {
218 H5Sclose(dspace);
219 throw InternalErr (__FILE__, __LINE__, "Cannot obtain the number of dimensions of the data space.");
220 }
221
222 mspace = H5Screate_simple(d_num_dim, hcount,NULL);
223 if (mspace < 0) {
224 H5Sclose(dspace);
225 throw InternalErr (__FILE__, __LINE__, "Cannot create the memory space.");
226 }
227 }
228
229
230 if ((dtypeid = H5Dget_type(dsetid)) < 0) {
231
232 if (false == is_scalar)
233 H5Sclose(mspace);
234 H5Sclose(dspace);
235 throw InternalErr (__FILE__, __LINE__, "Cannot obtain the datatype.");
236
237 }
238
239 if ((memtype = H5Tget_native_type(dtypeid, H5T_DIR_ASCEND))<0) {
240
241 if (false == is_scalar)
242 H5Sclose(mspace);
243 H5Tclose(dtypeid);
244 H5Sclose(dspace);
245 throw InternalErr (__FILE__, __LINE__, "Fail to obtain memory datatype.");
246
247 }
248
249 size_t ty_size = H5Tget_size(memtype);
250 if (ty_size == 0) {
251 if (false == is_scalar)
252 H5Sclose(mspace);
253 H5Tclose(memtype);
254 H5Tclose(dtypeid);
255 H5Sclose(dspace);
256 throw InternalErr (__FILE__, __LINE__,"Fail to obtain the size of HDF5 string.");
257 }
258
259 vector <char> strval;
260 strval.resize(nelms*ty_size);
261 hid_t read_ret = -1;
262 if (true == is_scalar)
263 read_ret = H5Dread(dsetid,memtype,H5S_ALL,H5S_ALL,H5P_DEFAULT,(void*)&strval[0]);
264 else
265 read_ret = H5Dread(dsetid,memtype,mspace,dspace,H5P_DEFAULT,(void*)&strval[0]);
266
267 if (read_ret < 0) {
268 if (false == is_scalar)
269 H5Sclose(mspace);
270 H5Tclose(memtype);
271 H5Tclose(dtypeid);
272 H5Sclose(dspace);
273 throw InternalErr (__FILE__, __LINE__, "Fail to read the HDF5 variable length string dataset.");
274 }
275
276 // For scalar, nelms is 1.
277 char*temp_bp = &strval[0];
278 char*onestring = NULL;
279 for (int i =0;i<nelms;i++) {
280 onestring = *(char**)temp_bp;
281 if(onestring!=NULL )
282 finstrval[i] =string(onestring);
283 else // We will add a NULL if onestring is NULL.
284 finstrval[i]="";
285 temp_bp +=ty_size;
286 }
287
288 if (false == strval.empty()) {
289 herr_t ret_vlen_claim;
290 if (true == is_scalar)
291 ret_vlen_claim = H5Dvlen_reclaim(memtype,dspace,H5P_DEFAULT,(void*)&strval[0]);
292 else
293 ret_vlen_claim = H5Dvlen_reclaim(memtype,mspace,H5P_DEFAULT,(void*)&strval[0]);
294 if (ret_vlen_claim < 0){
295 if (false == is_scalar)
296 H5Sclose(mspace);
297 H5Tclose(memtype);
298 H5Tclose(dtypeid);
299 H5Sclose(dspace);
300 throw InternalErr (__FILE__, __LINE__, "Cannot reclaim the memory buffer of the HDF5 variable length string.");
301
302 }
303 }
304
305 if (false == is_scalar)
306 H5Sclose(mspace);
307 H5Tclose(memtype);
308 H5Tclose(dtypeid);
309 H5Sclose(dspace);
310
311 return true;
312
313}
314#endif
315
316#if 0
327static void print_dataset_type_info(hid_t dataset, uint8_t layout_type) {
328 hid_t dtype_id = H5Dget_type(dataset);
329 if (dtype_id < 0) {
330 throw BESInternalError("Cannot obtain the correct HDF5 datatype.", __FILE__, __LINE__);
331 }
332
333 if (H5Tget_class(dtype_id) == H5T_INTEGER || H5Tget_class(dtype_id) == H5T_FLOAT) {
334 hid_t dcpl_id = H5Dget_create_plist(dataset);
335 if (dcpl_id < 0) {
336 throw BESInternalError("Cannot obtain the HDF5 dataset creation property list.", __FILE__, __LINE__);
337 }
338
339 try {
340 // Wrap the resources like dcpl_id in try/catch blocks so that the
341 // calls to H5Pclose(dcpl_id) for each error can be removed. jhrg 5/7/18
342 H5D_fill_value_t fvalue_status;
343 if (H5Pfill_value_defined(dcpl_id, &fvalue_status) < 0) {
344 H5Pclose(dcpl_id);
345 throw BESInternalError("Cannot obtain the fill value status.", __FILE__, __LINE__);
346 }
347 if (fvalue_status == H5D_FILL_VALUE_UNDEFINED) {
348 // Replace with switch(), here and elsewhere. jhrg 5/7/18
349 if (layout_type == 1)
350 cerr << " The storage size is 0 and the storage type is contiguous." << endl;
351 else if (layout_type == 2)
352 cerr << " The storage size is 0 and the storage type is chunking." << endl;
353 else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
354
355 cerr << " The Fillvalue is undefined ." << endl;
356 } else {
357 if (layout_type == 1)
358 cerr << " The storage size is 0 and the storage type is contiguous." << endl;
359 else if (layout_type == 2)
360 cerr << " The storage size is 0 and the storage type is chunking." << endl;
361 else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
362
363 char *fvalue = NULL;
364 size_t fv_size = H5Tget_size(dtype_id);
365 if (fv_size == 1)
366 fvalue = (char *) (malloc(1));
367 else if (fv_size == 2)
368 fvalue = (char *) (malloc(2));
369 else if (fv_size == 4)
370 fvalue = (char *) (malloc(4));
371 else if (fv_size == 8) fvalue = (char *) (malloc(8));
372
373 if (fv_size <= 8) {
374 if (H5Pget_fill_value(dcpl_id, dtype_id, (void *) (fvalue)) < 0) {
375 H5Pclose(dcpl_id);
376 throw BESInternalError("Cannot obtain the fill value status.", __FILE__, __LINE__);
377 }
378 if (H5Tget_class(dtype_id) == H5T_INTEGER) {
379 H5T_sign_t fv_sign = H5Tget_sign(dtype_id);
380 if (fv_size == 1) {
381 if (fv_sign == H5T_SGN_NONE) {
382 cerr << "This dataset's datatype is unsigned char " << endl;
383 cerr << "and the fillvalue is " << *fvalue << endl;
384 } else {
385 cerr << "This dataset's datatype is char and the fillvalue is " << *fvalue << endl;
386 }
387 } else if (fv_size == 2) {
388 if (fv_sign == H5T_SGN_NONE) {
389 cerr << "This dataset's datatype is unsigned short and the fillvalue is " << *fvalue
390 << endl;
391 } else {
392 cerr << "This dataset's datatype is short and the fillvalue is " << *fvalue << endl;
393 }
394 } else if (fv_size == 4) {
395 if (fv_sign == H5T_SGN_NONE) {
396 cerr << "This dataset's datatype is unsigned int and the fillvalue is " << *fvalue
397 << endl;
398 } else {
399 cerr << "This dataset's datatype is int and the fillvalue is " << *fvalue << endl;
400 }
401 } else if (fv_size == 8) {
402 if (fv_sign == H5T_SGN_NONE) {
403 cerr << "This dataset's datatype is unsigned long long and the fillvalue is " << *fvalue
404 << endl;
405 } else {
406 cerr << "This dataset's datatype is long long and the fillvalue is " << *fvalue << endl;
407 }
408 }
409 }
410 if (H5Tget_class(dtype_id) == H5T_FLOAT) {
411 if (fv_size == 4) {
412 cerr << "This dataset's datatype is float and the fillvalue is " << *fvalue << endl;
413 } else if (fv_size == 8) {
414 cerr << "This dataset's datatype is double and the fillvalue is " << *fvalue << endl;
415 }
416 }
417
418 if (fvalue != NULL) free(fvalue);
419 } else
420 cerr
421 << "The size of the datatype is greater than 8 bytes, Use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
422 << endl;
423 }
424 }
425 catch (...) {
426 H5Pclose(dcpl_id);
427 throw;
428 }
429 H5Pclose(dcpl_id);
430 } else {
431 if (layout_type == 1)
432 cerr << " The storage size is 0 and the storage type is contiguous." << endl;
433 else if (layout_type == 2)
434 cerr << " The storage size is 0 and the storage type is chunking." << endl;
435 else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
436
437 cerr
438 << "The datatype is neither float nor integer,use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
439 << endl;
440 }
441}
442#endif
443
444// FYI: Filter IDs
445// H5Z_FILTER_ERROR (-1) no filter
446// H5Z_FILTER_NONE 0 reserved indefinitely
447// H5Z_FILTER_DEFLATE 1 deflation like gzip
448// H5Z_FILTER_SHUFFLE 2 shuffle the data
449// H5Z_FILTER_FLETCHER32 3 fletcher32 checksum of EDC
450// H5Z_FILTER_SZIP 4 szip compression
451// H5Z_FILTER_NBIT 5 nbit compression
452// H5Z_FILTER_SCALEOFFSET 6 scale+offset compression
453// H5Z_FILTER_RESERVED 256 filter ids below this value are reserved for library use
454
461static void set_filter_information(hid_t dataset_id, DmrppCommon *dc) {
462 hid_t plist_id = H5Dget_create_plist(dataset_id);
463
464 try {
465 int numfilt = H5Pget_nfilters(plist_id);
466 VERBOSE(cerr << "Number of filters associated with dataset: " << numfilt << endl);
467 string filters;
468
469 for (int filter = 0; filter < numfilt; filter++) {
470 size_t nelmts = 0;
471 unsigned int flags, filter_info;
472 H5Z_filter_t filter_type = H5Pget_filter2(plist_id, filter, &flags, &nelmts, NULL, 0, NULL, &filter_info);
473 VERBOSE(cerr << "Filter Type: ");
474
475 switch (filter_type) {
476 case H5Z_FILTER_DEFLATE:
477 VERBOSE(cerr << "H5Z_FILTER_DEFLATE" << endl);
478 // dc->set_deflate(true);
479 filters.append("deflate ");
480 break;
481 case H5Z_FILTER_SHUFFLE:
482 VERBOSE(cerr << "H5Z_FILTER_SHUFFLE" << endl);
483 // dc->set_shuffle(true);
484 filters.append("shuffle ");
485 break;
486 case H5Z_FILTER_FLETCHER32:
487 VERBOSE(cerr << "H5Z_FILTER_FLETCHER32" << endl);
488 // dc->set_fletcher32(true);
489 filters.append("fletcher32 ");
490 break;
491 default: {
492 ostringstream oss("Unsupported HDF5 filter: ", std::ios::ate);
493 oss << filter_type;
494 throw BESInternalError(oss.str(), __FILE__, __LINE__);
495 }
496 }
497 }
498 //trimming trailing space from compression (aka filter) string
499 filters = filters.substr(0, filters.length() - 1);
500 dc->set_filter(filters);
501 }
502 catch (...) {
503 H5Pclose(plist_id);
504 throw;
505 }
506
507 H5Pclose(plist_id);
508}
509
520static void get_variable_chunk_info(hid_t dataset, DmrppCommon *dc) {
521 std::string byteOrder = "";
522 H5T_order_t byte_order = H5T_ORDER_ERROR;
523
524 try {
525 hid_t dcpl = H5Dget_create_plist(dataset);
526 uint8_t layout_type = H5Pget_layout(dcpl);
527
528 hid_t fspace_id = H5Dget_space(dataset);
529 hid_t ftype_id = H5Dget_type(dataset);
530
531 byte_order = H5Tget_order(ftype_id);
532 switch (byte_order) {
533 case H5T_ORDER_LE:
534 byteOrder = "LE";
535 break;
536 case H5T_ORDER_BE:
537 byteOrder = "BE";
538 break;
539 case H5T_ORDER_NONE:
540 break;
541 default:
542 ostringstream oss("Unsupported HDF5 dataset byteOrder: ", std::ios::ate);
543 oss << byte_order << ".";
544 BESInternalError(oss.str(), __FILE__, __LINE__);
545 break; // unsupported enumerations: H5T_ORDER_[ERROR,VAX,MIXED,NONE]
546 }
547
548 unsigned int dataset_rank = H5Sget_simple_extent_ndims(fspace_id);
549
550 hid_t dtypeid = H5Dget_type(dataset);
551
552 size_t dsize = H5Tget_size(dtypeid);
553
554 /* layout_type: 1 contiguous 2 chunk 3 compact */
555 switch (layout_type) {
556
557 case H5D_CONTIGUOUS: { /* Contiguous storage */
558 haddr_t cont_addr = 0;
559 hsize_t cont_size = 0;
560
561 VERBOSE(cerr << "Storage: contiguous" << endl);
562
563 cont_addr = H5Dget_offset(dataset);
564 /* if statement never less than zero due to cont_addr being unsigned int. SBL 1.29.20
565 if (cont_addr < 0) {
566 throw BESInternalError("Cannot obtain the offset.", __FILE__, __LINE__);
567 }*/
568 cont_size = H5Dget_storage_size(dataset);
569 /* if statement never less than zero due to cont_size being unsigned int. SBL 1.29.20
570 if (cont_size < 0) {
571 throw BESInternalError("Cannot obtain the storage size.", __FILE__, __LINE__);
572 }*/
573
574
575 VERBOSE(cerr << " Addr: " << cont_addr << endl);
576 VERBOSE(cerr << " Size: " << cont_size << endl);
577 VERBOSE(cerr << "byteOrder: " << byteOrder << endl);
578
579 if (cont_size > 0) {
580 if (dc) dc->add_chunk(byteOrder, cont_size, cont_addr, "" /*pos in array*/);
581 }
582 break;
583 }
584 case H5D_CHUNKED: { /*chunking storage */
585 hsize_t num_chunks = 0;
586 herr_t status = H5Dget_num_chunks(dataset, fspace_id, &num_chunks);
587 if (status < 0) {
588 throw BESInternalError("Could not get the number of chunks",
589 __FILE__, __LINE__);
590 }
591
592 VERBOSE(cerr << "Storage: chunked." << endl);
593 VERBOSE(cerr << "Number of chunks is: " << num_chunks << endl);
594
595 if (dc)
596 set_filter_information(dataset, dc);
597
598 // Get chunking information: rank and dimensions
599 vector<size_t> chunk_dims(dataset_rank);
600 unsigned int chunk_rank = H5Pget_chunk(dcpl, dataset_rank, (hsize_t *) &chunk_dims[0]);
601 if (chunk_rank != dataset_rank)
602 throw BESNotFoundError(
603 "Found a chunk with rank different than the dataset's (aka variables's) rank", __FILE__,
604 __LINE__);
605
606 if (dc) dc->set_chunk_dimension_sizes(chunk_dims);
607
608 for (unsigned int i = 0; i < num_chunks; ++i) {
609
610 vector<hsize_t> temp_coords(dataset_rank);
611 vector<unsigned long long> chunk_coords(dataset_rank); //FIXME - see below
612
613 haddr_t addr = 0;
614 hsize_t size = 0;
615
616 //H5_DLL herr_t H5Dget_chunk_info(hid_t dset_id, hid_t fspace_id, hsize_t chk_idx, hsize_t *coord, unsigned *filter_mask, haddr_t *addr, hsize_t *size);
617 status = H5Dget_chunk_info(dataset, fspace_id, i, &temp_coords[0], NULL, &addr, &size);
618 if (status < 0) {
619 VERBOSE(cerr << "ERROR" << endl);
620 throw BESInternalError("Cannot get HDF5 dataset storage info.", __FILE__, __LINE__);
621 }
622
623 VERBOSE(cerr << "chk_idk: " << i << ", addr: " << addr << ", size: " << size << endl);
624
625 //The coords need to be of type 'unsigned int' when passed into add_chunk()
626 // This loop simply copies the values from the temp_coords to chunk_coords - kln 5/1/19
627 for (unsigned int j = 0; j < chunk_coords.size(); ++j) {
628 chunk_coords[j] = temp_coords[j];
629 }
630
631 if (dc) dc->add_chunk(byteOrder, size, addr, chunk_coords);
632 }
633
634 break;
635 }
636
637 case H5D_COMPACT: { /* Compact storage */
638 //else if (layout_type == 3) {
639 VERBOSE(cerr << "Storage: compact" << endl);
640
641 size_t comp_size = H5Dget_storage_size(dataset);
642 VERBOSE(cerr << " Size: " << comp_size << endl);
643
644 if (comp_size == 0) {
645 throw BESInternalError("Cannot obtain the compact storage size.",
646 __FILE__, __LINE__);
647 }
648
649 vector<uint8_t> values;
650
651 Array *btp = dynamic_cast<Array *>(dc);
652 if (btp != NULL) {
653 dc->set_compact(true);
654 size_t memRequired = btp->length() * dsize;
655
656 if (comp_size != memRequired) {
657 throw BESInternalError("Compact storage size does not match D4Array.",
658 __FILE__, __LINE__);
659 }
660
661 switch (btp->var()->type()) {
662 case dods_byte_c:
663 case dods_char_c:
664 case dods_int8_c:
665 case dods_uint8_c:
666 case dods_int16_c:
667 case dods_uint16_c:
668 case dods_int32_c:
669 case dods_uint32_c:
670 case dods_float32_c:
671 case dods_float64_c:
672 case dods_int64_c:
673 case dods_uint64_c: {
674 values.resize(memRequired);
675 get_data(dataset, reinterpret_cast<void *>(&values[0]));
676 btp->set_read_p(true);
677 btp->val2buf(reinterpret_cast<void *>(&values[0]));
678 break;
679
680 }
681
682 case dods_str_c: {
683 if (H5Tis_variable_str(dtypeid) > 0) {
684 vector<string> finstrval = {""}; // passed by reference to read_vlen_string
685 read_vlen_string(dataset, 1, NULL, NULL, NULL, finstrval);
686 btp->set_value(finstrval, finstrval.size());
687 btp->set_read_p(true);
688 } else {
689 // For this case, the Array is really a single string - check for that
690 // with the following assert - but is an Array because the string data
691 // is stored as an array of chars (hello, FORTRAN). Read the chars, make
692 // a string and load that into a vector<string> (which will be a vector
693 // of length one). Set that as the value of the Array. Really, this
694 // value could be stored as a scalar, but that's complicated and client
695 // software might be expecting an array, so better to handle it this way.
696 // jhrg 9/17/20
697 assert(btp->length() == 1);
698 values.resize(memRequired);
699 get_data(dataset, reinterpret_cast<void *>(&values[0]));
700 string str(values.begin(), values.end());
701 vector<string> strings = {str};
702 btp->set_value(strings, strings.size());
703 btp->set_read_p(true);
704 }
705 break;
706 }
707
708 default:
709 throw BESInternalError("Unsupported compact storage variable type.", __FILE__, __LINE__);
710 }
711
712 } else {
713 throw BESInternalError("Compact storage variable is not a D4Array.",
714 __FILE__, __LINE__);
715 }
716 break;
717 }
718
719 default: {
720 ostringstream oss("Unsupported HDF5 dataset layout type: ", std::ios::ate);
721 oss << layout_type << ".";
722 BESInternalError(oss.str(), __FILE__, __LINE__);
723 break;
724 }
725 }
726 }
727 catch (...) {
728 H5Dclose(dataset);
729 throw;
730 }
731
732 H5Dclose(dataset);
733}
734
742static void get_chunks_for_all_variables(hid_t file, D4Group *group) {
743 // variables in the group
744 for (Constructor::Vars_iter v = group->var_begin(), ve = group->var_end(); v != ve; ++v) {
745 // if this variable has a 'fullnamepath' attribute, use that and not the
746 // FQN value.
747 D4Attributes *d4_attrs = (*v)->attributes();
748 if (!d4_attrs)
749 throw BESInternalError("Expected to find an attribute table for " + (*v)->name() + " but did not.",
750 __FILE__, __LINE__);
751
752 // Look for the full name path for this variable
753 // If one was not given via an attribute, use BaseType::FQN() which
754 // relies on the variable's position in the DAP dataset hierarchy.
755 D4Attribute *attr = d4_attrs->get("fullnamepath");
756 string FQN;
757 // I believe the logic is more clear in this way:
758 // If fullnamepath exists and the H5Dopen2 fails to open, it should throw an error.
759 // If fullnamepath doesn't exist, we should ignore the error as the reason described below:
760 // (However, we should suppress the HDF5 dataset open error message.) KY 2019-12-02
761 // It's not an error if a DAP variable in a DMR from the hdf5 handler
762 // doesn't exist in the file _if_ there's no 'fullnamepath' because
763 // that variable was synthesized (likely for CF compliance)
764 hid_t dataset = -1;
765 if (attr) {
766 if (attr->num_values() == 1)
767 FQN = attr->value(0);
768 else
769 FQN = (*v)->FQN();
770 BESDEBUG("dmrpp", "Working on: " << FQN << endl);
771 dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
772 if (dataset < 0)
773 throw BESInternalError("HDF5 dataset '" + FQN + "' cannot be opened.", __FILE__, __LINE__);
774
775 } else {
776 // The current design seems to still prefer to open the dataset when the fullnamepath doesn't exist
777 // So go ahead to open the dataset. Continue even if the dataset cannot be open. KY 2019-12-02
778 H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
779 FQN = (*v)->FQN();
780 BESDEBUG("dmrpp", "Working on: " << FQN << endl);
781 dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
782 if (dataset < 0)
783 continue;
784 }
785#if 0
786 if (attr && attr->num_values() == 1)
787 FQN = attr->value(0);
788 else
789 FQN = (*v)->FQN();
790
791 VERBOSE(cerr << "Working on: " << FQN << endl);
792 hid_t dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
793 // It's not an error if a DAP variable in a DMR from the hdf5 handler
794 // doesn't exist in the file _if_ there's no 'fullnamepath' because
795 // that variable was synthesized (likely for CF compliance)
796 if (dataset < 0 && attr == 0) {
797 cerr<<"Unable to open dataset name "<<FQN <<endl;
798 continue;
799 }
800 else if (dataset < 0)
801 throw BESInternalError("HDF5 dataset '" + FQN + "' cannot be opened.", __FILE__, __LINE__);
802#endif
803 get_variable_chunk_info(dataset, dynamic_cast<DmrppCommon *>(*v));
804 }
805
806 // all groups in the group
807 D4Group::groupsIter g = group->grp_begin();
808 D4Group::groupsIter ge = group->grp_end();
809 while (g != ge)
810 get_chunks_for_all_variables(file, *g++);
811}
812
813string cmdln(int argc, char *argv[]){
814 stringstream ss;
815 for(int i=0; i<argc; i++) {
816 if (i > 0)
817 ss << " ";
818 ss << argv[i];
819 }
820 return ss.str();
821}
822
823void inject_version_and_configuration(int argc, char **argv, DMRpp *dmrpp){
824
825 dmrpp->set_version(CVER);
826
827 // Build the version attributes for the DMR++
828 D4Attribute *version = new D4Attribute("build_dmrpp_metadata", StringToD4AttributeType("container"));
829
830 D4Attribute *build_dmrpp_version = new D4Attribute("build_dmrpp", StringToD4AttributeType("string"));
831 build_dmrpp_version->add_value(CVER);
832 version->attributes()->add_attribute_nocopy(build_dmrpp_version);
833
834 D4Attribute *bes_version = new D4Attribute("bes", StringToD4AttributeType("string"));
835 bes_version->add_value(CVER);
836 version->attributes()->add_attribute_nocopy(bes_version);
837
838 stringstream ldv;
839 ldv << libdap_name() << "-" << libdap_version();
840 D4Attribute *libdap4_version = new D4Attribute("libdap", StringToD4AttributeType("string"));
841 libdap4_version->add_value(ldv.str());
842 version->attributes()->add_attribute_nocopy(libdap4_version);
843
844 if(!TheBESKeys::ConfigFile.empty()) {
845 // What is the BES configuration in play?
846 D4Attribute *config = new D4Attribute("configuration", StringToD4AttributeType("string"));
847 config->add_value(TheBESKeys::TheKeys()->get_as_config());
848 version->attributes()->add_attribute_nocopy(config);
849 }
850
851 // How was build_dmrpp invoked?
852 D4Attribute *invoke = new D4Attribute("invocation", StringToD4AttributeType("string"));
853 invoke->add_value(cmdln(argc,argv));
854 version->attributes()->add_attribute_nocopy(invoke);
855
856 // Inject version and configuration attributes into DMR here.
857 D4Attributes *top_level_attrs = dmrpp->root()->attributes();
858 top_level_attrs->add_attribute_nocopy(version);
859}
860
861void usage() {
862 const char *help = R"(
863 build_dmrpp -h: Show this help
864
865 build_dmrpp -V: Show build versions for componets that make up the program
866
867 build_dmrpp -c <bes.conf> -f <data file> [-u <href url>]: Build the DMR++ using the <bes.conf>
868 options to initialize the software for the <data file>. Optionally substitue the <href url>.
869 Builds the DMR using the HDF5 handler as configued using the options in the <bes.conf>.
870
871 build_dmrpp build_dmrpp -f <data file> -r <dmr file> [-u <href url>]: As above, but uses the DMR
872 read from the given file (so it does not run the HDF5 handler code.
873
874 Other options:
875 -v: Verbose
876 -d: Turn on BES software debugging output
877 -M: Add information about the build_dmrpp software, incl versions, to the built DMR++)";
878
879 cerr << help << endl;
880}
881
882int main(int argc, char *argv[]) {
883 string h5_file_name = "";
884 string h5_dset_path = "";
885 string dmr_name = "";
886 string url_name = "";
887 int status = 0;
888 bool add_production_metadata = false;
889
890 int option_char;
891 while ((option_char = getopt(argc, argv, "c:f:r:u:dhvVM")) != -1) {
892 switch (option_char) {
893 case 'V':
894 cerr << basename(argv[0]) << "-" << CVER << " (bes-"<< CVER << ", " << libdap_name() << "-"
895 << libdap_version() << ")" << endl;
896 return 0;
897
898 case 'v':
899 verbose = true; // verbose hdf5 errors
900 break;
901
902 case 'd':
903 BESDebug::SetUp(string("cerr,").append(DEBUG_KEY));
904 break;
905
906 case 'f':
907 h5_file_name = optarg;
908 break;
909
910 case 'r':
911 dmr_name = optarg;
912 break;
913
914 case 'u':
915 url_name = optarg;
916 break;
917
918 case 'c':
919 TheBESKeys::ConfigFile = optarg;
920 break;
921
922 case 'M':
923 add_production_metadata = true;
924 break;
925
926 case 'h':
927 usage();
928 exit(1);
929
930 default:
931 break;
932 }
933 }
934
935 if (h5_file_name.empty()) {
936 cerr << "HDF5 file name must be given (-f <input>)." << endl;
937 return 1;
938 }
939
940 hid_t file = 0;
941 try {
942 // Turn off automatic hdf5 error printing.
943 // See: https://support.hdfgroup.org/HDF5/doc1.8/RM/RM_H5E.html#Error-SetAuto2
944 //if (!verbose) H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
945
946 // For a given HDF5, get info for all the HDF5 datasets in a DMR or for a
947 // given HDF5 dataset
948 if (!dmr_name.empty()) {
949 // Get dmr:
950 DMRpp dmrpp;
952 dmrpp.set_factory(&dtf);
953
954 ifstream in(dmr_name.c_str());
955 D4ParserSax2 parser;
956 parser.intern(in, &dmrpp, false);
957
958 // Open the hdf5 file
959 file = H5Fopen(h5_file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
960 if (file < 0) {
961 cerr << "Error: HDF5 file '" + h5_file_name + "' cannot be opened." << endl;
962 return 1;
963 }
964
965 if(add_production_metadata) {
966 inject_version_and_configuration(argc, argv, &dmrpp);
967 }
968
969 // iterate over all the variables in the DMR
970 get_chunks_for_all_variables(file, dmrpp.root());
971
972 XMLWriter writer;
973 dmrpp.print_dmrpp(writer, url_name);
974
975 cout << writer.get_doc();
976 } else {
977 bool found;
978 string bes_data_root;
979 try {
980 TheBESKeys::TheKeys()->get_value(ROOT_DIRECTORY, bes_data_root, found);
981 if (!found) {
982 cerr << "Error: Could not find the BES root directory key." << endl;
983 return 1;
984 }
985 }
986 catch (BESError &e) {
987 cerr << "BESError: " << e.get_message() << endl;
988 return 1;
989 }
990
991 // Use the values from the bes.conf file... jhrg 5/21/18
993 if (!mds) {
994 cerr << "The Metadata Store (MDS) must be configured for this command to work." << endl;
995 return 1;
996 }
997
998 // Use the full path to open the file, but use the 'name' (which is the
999 // path relative to the BES Data Root) with the MDS.
1000 // Changed this to utilize assemblePath() because simply concatenating the strings
1001 // is fragile. - ndp 6/6/18
1002 string h5_file_path = BESUtil::assemblePath(bes_data_root, h5_file_name);
1003
1004 //bes::DmrppMetadataStore::MDSReadLock lock = mds->is_dmr_available(h5_file_name /*h5_file_path*/);
1005 bes::DmrppMetadataStore::MDSReadLock lock = mds->is_dmr_available(h5_file_path, h5_file_name, "h5");
1006 if (lock()) {
1007 // parse the DMR into a DMRpp (that uses the DmrppTypes)
1008 unique_ptr<DMRpp> dmrpp(dynamic_cast<DMRpp *>(mds->get_dmr_object(h5_file_name /*h5_file_path*/)));
1009 if (!dmrpp.get()) {
1010 cerr << "Expected a DMR++ object from the DmrppMetadataStore." << endl;
1011 return 1;
1012 }
1013
1014 // Open the hdf5 file
1015 file = H5Fopen(h5_file_path.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
1016 if (file < 0) {
1017 cerr << "Error: HDF5 file '" + h5_file_path + "' cannot be opened." << endl;
1018 return 1;
1019 }
1020
1021 get_chunks_for_all_variables(file, dmrpp->root());
1022
1023 dmrpp->set_href(url_name);
1024
1025 mds->add_dmrpp_response(dmrpp.get(), h5_file_name /*h5_file_path*/);
1026
1027 XMLWriter writer;
1028 dmrpp->set_print_chunks(true);
1029 dmrpp->print_dap4(writer);
1030
1031 cout << writer.get_doc();
1032 } else {
1033 cerr << "Error: Could not get a lock on the DMR for '" + h5_file_path + "'." << endl;
1034 return 1;
1035 }
1036 }
1037 }
1038 catch (BESError &e) {
1039 cerr << "BESError: " << e.get_message() << endl;
1040 status = 1;
1041 }
1042 catch (std::exception &e) {
1043 cerr << "std::exception: " << e.what() << endl;
1044 status = 1;
1045 }
1046 catch (...) {
1047 cerr << "Unknown error." << endl;
1048 status = 1;
1049 }
1050
1051 H5Fclose(file);
1052
1053 return status;
1054}
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Definition: BESDebug.cc:98
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
static std::string ConfigFile
Definition: TheBESKeys.h:185
Store the DAP DMR++ metadata responses.
virtual libdap::DMR * get_dmr_object(const string &name)
Use the DMR response to build a DMR with Dmrpp Types.
static DmrppMetadataStore * get_instance()
virtual MDSReadLock is_dmr_available(const std::string &name)
Is the DMR response for.
Provide a way to print the DMR++ response.
Definition: DMRpp.h:44
virtual void print_dmrpp(libdap::XMLWriter &xml, const std::string &href="", bool constrained=false, bool print_chunks=true)
Print the DMR++ response.
Definition: DMRpp.cc:71
void print_dap4(libdap::XMLWriter &xml, bool constrained=false)
override DMR::print_dap4() so the chunk info will print too.
Definition: DMRpp.cc:140
Size and offset information of data included in DMR++ files.
Definition: DmrppCommon.h:76
void set_chunk_dimension_sizes(const std::vector< size_t > &chunk_dims)
Set the value of the chunk dimension sizes given a vector of HDF5 hsize_t.
Definition: DmrppCommon.h:202
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:204
void set_filter(const std::string &value)
Set the value of the filters property.
Definition: DmrppCommon.cc:108
void set_compact(bool value)
Set the value of the compact property.
Definition: DmrppCommon.h:147
void get_data(hid_t dset, void *buf)
Definition: h5common.cc:50
Unlock and close the MDS item when the ReadLock goes out of scope.