bes Updated for version 3.20.10
SuperChunky.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4// Copyright (c) 2020 OPeNDAP, Inc.
5// Author: Nathan Potter<ndp@opendap.org>
6//
7// This library is free software; you can redistribute it and/or
8// modify it under the terms of the GNU Lesser General Public
9// License as published by the Free Software Foundation; either
10// version 2.1 of the License, or (at your option) any later version.
11//
12// This library is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15// Lesser General Public License for more details.
16//
17// You should have received a copy of the GNU Lesser General Public
18// License along with this library; if not, write to the Free Software
19// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20//
21// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
22//
23// Created by ndp on 12/2/20.
24//
25
26#include "config.h"
27
28#include <vector>
29#include <string>
30#include <sstream>
31#include <iostream>
32#include <fstream>
33
34#include <libdap/BaseType.h>
35#include <libdap/D4Group.h>
36
37#include "BESInternalError.h"
38#include "BESInternalFatalError.h"
39#include "BESDebug.h"
40#include "BESUtil.h"
41#include "TheBESKeys.h"
42#include "BESLog.h"
43#include "BESStopWatch.h"
44#include "BESIndent.h"
45
46#include "DmrppNames.h"
47#include "DMRpp.h"
48#include "DmrppD4Group.h"
49#include "DmrppArray.h"
50#include "DmrppParserSax2.h"
51#include "DmrppTypeFactory.h"
52
53#include "SuperChunk.h"
54
55#define prolog std::string("superchunky::").append(__func__).append("() - ")
56
57namespace dmrpp {
58
59bool debug = true;
60
61void compute_super_chunks(dmrpp::DmrppArray *array, bool /*only_constrained*/, vector<SuperChunk *> &super_chunks){
62
63 // Now we get the chunkyness
64 auto chunk_dim_sizes = array->get_chunk_dimension_sizes();
65 //unsigned int chunk_size_in_elements = array->get_chunk_size_in_elements();
66 auto const &chunks = array->get_immutable_chunks();
67
68 unsigned long long sc_count=0;
69 stringstream sc_id;
70 sc_id << array->name() << "-" << sc_count++;
71
72 //unsigned long long super_chunk_index = 0;
73 auto currentSuperChunk = new SuperChunk(sc_id.str(), array);
74 super_chunks.push_back(currentSuperChunk); // first super chunk...
75 if(debug) cout << "SuperChunking array: "<< array->name() << endl;
76
77 for(const auto &chunk:chunks){
78 bool was_added = currentSuperChunk->add_chunk(chunk);
79 if(!was_added){
80 if(debug) {
81 unsigned long long next_contiguous_chunk_offset = currentSuperChunk->get_offset() + currentSuperChunk->get_size();
82 unsigned long long gap_size;
83 bool is_behind = false;
84 if(chunk->get_offset() > next_contiguous_chunk_offset){
85 gap_size = chunk->get_offset() - next_contiguous_chunk_offset;
86 }
87 else {
88 is_behind = true;
89 gap_size = next_contiguous_chunk_offset - chunk->get_offset();
90 }
91 stringstream msg;
92 msg << "FOUND GAP chunk(offset: " << chunk->get_offset();
93 msg << " size: " << chunk->get_size() << ")";
94 msg << " SuperChunk(ptr: " << (void *) currentSuperChunk;
95 msg << " offset: " << currentSuperChunk->get_offset();
96 msg << " size: " << currentSuperChunk->get_size();
97 msg << " next_contiguous_chunk_offset: " << next_contiguous_chunk_offset << ") ";
98 msg << " gap_size: " << gap_size;
99 msg << " bytes" << (is_behind?" behind":" beyond") << " target offset";
100 msg << endl;
101 cerr << msg.str();
102 }
103 // If we were working on a SuperChunk (i.e. the current SuperChunk contains chunks)
104 // then we need to start a new one.
105 if(!currentSuperChunk->empty()){
106 sc_id.str(std::string());
107 sc_id << array->name() << "-" << sc_count++;
108 currentSuperChunk = new SuperChunk(sc_id.str(), array);
109 super_chunks.push_back(currentSuperChunk); // next super chunk...
110 }
111 bool add_first_successful = currentSuperChunk->add_chunk(chunk);
112 if(!add_first_successful)
113 throw BESInternalError("ERROR: Failed to add first Chunk to a new SuperChunk."+
114 chunk->to_string() ,__FILE__,__LINE__);
115
116 }
117 }
118 // Dump the currentSuperChunk if it doesn't have anything in it.
119 if(currentSuperChunk->empty()) {
120 super_chunks.pop_back();
121 delete currentSuperChunk;
122 }
123 if(false){
124 cout << "SuperChunk Inventory For Array: " << array->name() << endl;
125 for(auto super_chunk: super_chunks) {
126 cout << super_chunk->to_string(true) << endl;
127 }
128 }
129}
130
131void compute_super_chunks(libdap::BaseType *var, bool only_constrained, vector<SuperChunk *> &super_chunks) {
132 if (var->is_simple_type())
133 return;
134 if (var->is_constructor_type())
135 return;
136 if (var->is_vector_type()) {
137 auto array = dynamic_cast<dmrpp::DmrppArray *>(var);
138 if (array) {
139 if(debug) cout << "Found DmrppArray: "<< array->name() << endl;
140 compute_super_chunks(array, only_constrained, super_chunks);
141 }
142 else {
143 BESDEBUG(MODULE, prolog << "The variable: "<< var->name()
144 << " is not an instance of DmrppArray. SKIPPING"<< endl);
145 }
146 }
147}
148
149#if 0
150void inventory_super_chunks(libdap::BaseType *var, bool only_constrained, vector<SuperChunk *> &super_chunks){
151 if(var->is_simple_type())
152 return;
153 if(var->is_constructor_type())
154 return;
155 if(var->is_vector_type()){
156 auto array = dynamic_cast<DmrppArray*>(var);
157 if(array){
158 // Now we get the chunkyness
159 auto chunk_dim_sizes = array->get_chunk_dimension_sizes();
160 //unsigned int chunk_size_in_elements = array->get_chunk_size_in_elements();
161 auto chunks = array->get_immutable_chunks();
162 unsigned long long next_contiguous_chunk_offset = 0;
163
164 //unsigned long long super_chunk_index = 0;
165 vector<vector<const Chunk *> *> super_chunks;
166 auto currentSuperChunk = new vector<const Chunk *>();
167 super_chunks.push_back(currentSuperChunk); // first super chunk...
168
169 if(debug) cout << "SuperChunking array: "<< array->name() << endl;
170
171 bool first = true;
172 for(auto chunk:chunks){
173 auto current_offset = chunk.get_offset();
174 auto current_size = chunk.get_size();
175 // auto c_pia = chunk.get_position_in_array();
176
177 if(!first){
178 if(current_offset!=next_contiguous_chunk_offset){
179 // The current chunk is not contiguous with the previous
180 unsigned long long gap_size = current_offset - next_contiguous_chunk_offset;
181 if(debug) {
182 cout << "FOUND GAP current_offset: " << current_offset <<
183 " nbytes: " << current_offset <<
184 " next_contiguous_chunk_offset: " << next_contiguous_chunk_offset <<
185 " gap_size: " << gap_size <<
186 " currentSuperChunk.size(): " << currentSuperChunk->size() << endl;
187 }
188 // If we were working on a SuperChunk (i.e. the current SuperChunk contains chunks)
189 // then we need to start a new one.
190 if(!currentSuperChunk->empty()){
191 currentSuperChunk = new vector<const Chunk *>();
192 super_chunks.push_back(currentSuperChunk); // next super chunk...
193 }
194 }
195 }
196 currentSuperChunk->push_back(&chunk);
197 next_contiguous_chunk_offset = current_offset + current_size;
198 first = false;
199 }
200 // Dump the currentSuperChunk if it doesn't have anything in it.
201 if(currentSuperChunk->empty()) {
202 super_chunks.pop_back();
203 delete currentSuperChunk;
204 }
205 cout << "SuperChunk Inventory For Array: " << array->name() << endl;
206 unsigned long long sc_count=0;
207 for(auto super_chunk: super_chunks) {
208 cout << " SuperChunk[" << sc_count++ << "] contains : " << super_chunk->size() << " chunks."
209 << endl;
210 if (debug) {
211 for (auto chunk:*super_chunk) {
212 cout << " " << chunk->to_string() << endl;
213 }
214 }
215 }
216
217 }
218 else {
219 cerr << prolog << " ERROR! The variable: "<< var->name()
220 << " is not an instance of DmrppArray. SKIPPING"<< endl;
221 }
222
223 }
224 }
225#endif
226
227 void inventory_super_chunks(libdap::D4Group *group, bool only_constrained, vector<SuperChunk *> &super_chunks){
228
229 // Process Groups - RECURSION HAPPENS HERE.
230 auto gtr = group->grp_begin();
231 while(gtr!=group->grp_end()){
232 if(debug) cout << "Found Group: "<< (*gtr)->name() << endl;
233 inventory_super_chunks(*gtr++, only_constrained, super_chunks);
234 }
235
236 // Process Vars
237 auto vtr = group->var_begin();
238 while(vtr!=group->var_end()){
239 if(debug) cout << "Found Variable: "<< (*vtr)->type_name() << " " << (*vtr)->name() << endl;
240 compute_super_chunks(*vtr++, only_constrained, super_chunks);
241 //inventory_super_chunks(*vtr++, only_constrained);
242 }
243 }
244
245 void inventory_super_chunks(DMRpp &dmr, bool only_constrained, vector<SuperChunk *> &super_chunks){
246 inventory_super_chunks(dmr.root(), only_constrained, super_chunks);
247 }
248
249 dmrpp::DMRpp *get_dmrpp(const string dmrpp_filename){
250 ifstream dmrpp_ifs (dmrpp_filename);
251 if (dmrpp_ifs.is_open())
252 {
255 auto dmr = new DMRpp(&factory,dmrpp_filename);
256 parser.intern(dmrpp_ifs, dmr);
257 return dmr;
258 }
259 else {
260 throw BESInternalFatalError("The provided file could not be opened. filename: '"+dmrpp_filename+"'",__FILE__,__LINE__);
261 }
262 }
263
264 void inventory_super_chunks(const string dmrpp_filename){
265 cout << "DMR++ file: " << dmrpp_filename << endl;
266 dmrpp::DMRpp *dmr = get_dmrpp(dmrpp_filename);
267
268 vector<SuperChunk *> super_chunks;
269
270 {
271 BESStopWatch sw;
272 sw.start(prolog);
273 dmrpp::inventory_super_chunks(*dmr, false, super_chunks);
274 }
275
276 cout << "DMR++ file: " << dmrpp_filename << endl;
277 cout << "Produced " << super_chunks.size() << " SuperChunks." << endl;
278 for(auto super_chunk: super_chunks) {
279 cout << super_chunk->to_string(true) << endl;
280 }
281
282 delete dmr;
283 }
284
285 void dump_vars(libdap::D4Group *group){
286 // Process Groups - RECURSION HAPPENS HERE.
287 auto gtr = group->grp_begin();
288 while(gtr!=group->grp_end()){
289 if(debug) cout << "Found Group: "<< (*gtr)->name() << endl;
290 dump_vars(*gtr++);
291 }
292
293 // Process Vars
294 auto vtr = group->var_begin();
295 while(vtr!=group->var_end()){
296 libdap::BaseType *bt = *vtr++;
297 bt->dump(cout);
298 cout << endl;
299 }
300 }
301
302 void dump_vars(DMRpp &dmr){
303 dump_vars(dmr.root());
304 }
305} // namespace dmrpp
306
307int main(int argc, char *argv[]) {
308 string bes_log_file("superchunky_bes.log");
309 //string bes_debug_log_file("cerr");
310 //string bes_debug_keys( "bes,http,curl,dmrpp,dmrpp:3,dmrpp:4,rr");
311 //string http_cache_dir;
312 string prefix;
313 //string http_netrc_file;
314 string cache_effective_urls("false");
315 char *prefixCstr = getenv("prefix");
316 if (prefixCstr) {
317 prefix = prefixCstr;
318 } else {
319 prefix = "/";
320 }
321
322 cout << "bes_log_file: " << bes_log_file << endl;
323
324 auto bes_config_file = BESUtil::assemblePath(prefix, "/etc/bes/bes.conf", true);
325 TheBESKeys::ConfigFile = bes_config_file; // Set the config file for TheBESKeys
326 TheBESKeys::TheKeys()->set_key("BES.LogName", bes_log_file); // Set the log file so it goes where we say.
327 TheBESKeys::TheKeys()->set_key("AllowedHosts", "^https?:\\/\\/.*$", false); // Set AllowedHosts to allow any URL
328 TheBESKeys::TheKeys()->set_key("AllowedHosts", "^file:\\/\\/\\/.*$", true); // Set AllowedHosts to allow any file
329 TheBESKeys::TheKeys()->set_key("Http.cache.effective.urls", cache_effective_urls, false); // Set AllowedHosts to allow any file
330
331
332 // if (bes_debug) BESDebug::SetUp(bes_debug_log_file + "," + bes_debug_keys); // Enable BESDebug settings
333
334
335 BESIndent::SetIndent("");
336
337 for(auto i=1; i<argc; i++){
338 string dmrpp_filename(argv[i]);
339 //dmrpp::inventory_super_chunks(dmrpp_filename);
340
341 dmrpp::DMRpp *dmrpp = dmrpp::get_dmrpp( dmrpp_filename);
342 dump_vars(*dmrpp);
343 }
344 return 0;
345}
346
347
348
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void set_key(const std::string &key, const std::string &val, bool addto=false)
allows the user to set key/value pairs from within the application.
Definition: TheBESKeys.cc:206
static std::string ConfigFile
Definition: TheBESKeys.h:185
Provide a way to print the DMR++ response.
Definition: DMRpp.h:44
Extend libdap::Array so that a handler can read data using a DMR++ file.
Definition: DmrppArray.h:68
virtual unsigned long long get_size(bool constrained=false)
Return the total number of elements in this Array.
Definition: DmrppArray.cc:580
virtual const std::vector< std::shared_ptr< Chunk > > & get_immutable_chunks() const
A const reference to the vector of chunks.
Definition: DmrppCommon.h:169
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:204
virtual const std::vector< unsigned long long > & get_chunk_dimension_sizes() const
The chunk dimension sizes held in a const vector.
Definition: DmrppCommon.h:179
void intern(std::istream &f, libdap::DMR *dest_dmr)