bes Updated for version 3.20.10
AggregationElement.cc
1
2// This file is part of the "NcML Module" project, a BES module designed
3// to allow NcML files to be used to be used as a wrapper to add
4// AIS to existing datasets of any format.
5//
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: Michael Johnson <m.johnson@opendap.org>
8//
9// For more information, please also see the main website: http://opendap.org/
10//
11// This library is free software; you can redistribute it and/or
12// modify it under the terms of the GNU Lesser General Public
13// License as published by the Free Software Foundation; either
14// version 2.1 of the License, or (at your option) any later version.
15//
16// This library is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19// Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public
22// License along with this library; if not, write to the Free Software
23// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24//
25// Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26//
27// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29#include "config.h"
30
31#include <sstream>
32#include <fstream>
33#include <sys/stat.h>
34
35#include "AggregationElement.h"
36#include "AggMemberDatasetUsingLocationRef.h" // agg_util
37#include "AggMemberDatasetSharedDDSWrapper.h" // agg_util
38#include "AggregationUtil.h" // agg_util
39#include "ArrayAggregateOnOuterDimension.h" // agg_util
40#include "ArrayJoinExistingAggregation.h" // agg_util
41#include "GridAggregateOnOuterDimension.h" // agg_util
42#include "GridJoinExistingAggregation.h" // agg_util
43#include "AggMemberDatasetDimensionCache.h"
44
45#include <libdap/AttrTable.h> // libdap
46#include <libdap/Array.h> // libdap
47#include <libdap/AttrTable.h> // libdap
48#include "DDSAccessInterface.h" // agg_util
49#include "Dimension.h" // agg_util
50#include "DimensionElement.h"
51#include <libdap/Grid.h> // libdap
52#include "MyBaseTypeFactory.h"
53#include "NCMLBaseArray.h"
54#include "NCMLDebug.h"
55#include "NCMLParser.h"
56#include "NetcdfElement.h"
57#include "ScanElement.h"
58#include "BESDebug.h"
59#include "BESStopWatch.h"
60
61
62
65using agg_util::AMDList;
70using std::auto_ptr;
71
72namespace ncml_module {
73const string AggregationElement::_sTypeName = "aggregation";
74
75const vector<string> AggregationElement::_sValidAttrs = getValidAttributes();
76
77AggregationElement::AggregationElement() :
78 NCMLElement(0), _type(""), _dimName(""), _recheckEvery(""), _parent(0), _datasets(), _scanners(), _aggVars(), _gotVariableAggElement(
79 false), _wasAggregatedMapAddedForJoinExistingGrid(false), _coordinateAxisType("")
80{
81}
82
83AggregationElement::AggregationElement(const AggregationElement& proto) :
84 RCObjectInterface(), NCMLElement(proto), _type(proto._type), _dimName(proto._dimName), _recheckEvery(
85 proto._recheckEvery), _parent(proto._parent) // my parent is the same too... is this safe without a true weak reference?
86 , _datasets() // deep copy below
87 , _scanners() // deep copy below
88 , _aggVars(proto._aggVars), _gotVariableAggElement(false), _wasAggregatedMapAddedForJoinExistingGrid(false), _coordinateAxisType(
89 "")
90{
91 // Deep copy all the datasets and add them to me...
92 // This is potentially expensive in memory for large datasets, so let's tell someone.
93 if (!proto._datasets.empty()) {
94 BESDEBUG("ncml",
95 "WARNING: AggregationElement copy ctor is deep copying all contained datasets! This might be memory and time intensive!");
96 }
97
98 // Clone the actual members
99 _datasets.reserve(proto._datasets.size());
100 for (vector<NetcdfElement*>::const_iterator it = proto._datasets.begin(); it != proto._datasets.end(); ++it) {
101 const NetcdfElement* elt = (*it);
102 addChildDataset(elt->clone());
103 }
104 NCML_ASSERT(_datasets.size() == proto._datasets.size());
105
106 _scanners.reserve(proto._scanners.size());
107 for (vector<ScanElement*>::const_iterator it = proto._scanners.begin(); it != proto._scanners.end(); ++it) {
108 const ScanElement* elt = (*it);
109 addScanElement(elt->clone());
110 }
111 NCML_ASSERT(_scanners.size() == proto._scanners.size());
112}
113
114AggregationElement::~AggregationElement()
115{
116 BESDEBUG("ncml:memory", "~AggregationElement called...");
117 _type = "";
118 _dimName = "";
119 _recheckEvery = "";
120 _parent = 0;
121 _wasAggregatedMapAddedForJoinExistingGrid = false;
122
123 // Release strong references to the contained netcdfelements....
124 while (!_datasets.empty()) {
125 NetcdfElement* elt = _datasets.back();
126 _datasets.pop_back();
127 elt->unref(); // Will be deleted if the last strong reference
128 }
129
130 // And the scan elements
131 while (!_scanners.empty()) {
132 ScanElement* elt = _scanners.back();
133 _scanners.pop_back();
134 elt->unref(); // Will be deleted if the last strong reference
135 }
136}
137
138const string&
140{
141 return _sTypeName;
142}
143
146{
147 return new AggregationElement(*this);
148}
149
151{
152 _type = attrs.getValueForLocalNameOrDefault("type", "");
153 _dimName = attrs.getValueForLocalNameOrDefault("dimName", "");
154 _recheckEvery = attrs.getValueForLocalNameOrDefault("recheckEvery", "");
155
156 // default is to print errors and throw which we want.
157 validateAttributes(attrs, _sValidAttrs);
158}
159
161{
162#if 0
163 BESStopWatch sw;
164 if (BESISDEBUG( TIMING_LOG_KEY ))
165 sw.start("AggregationElement::handleBegin", "");
166#endif
167
168 NCML_ASSERT(!getParentDataset());
169
170 // Check that the immediate parent element is netcdf since we cannot put an aggregation anywhere else.
171 if (!_parser->isScopeNetcdf()) {
172 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
173 "Got an <aggregation> = " + toString()
174 + " at incorrect parse location. They can only be direct children of <netcdf>. Scope="
175 + _parser->getScopeString());
176 }
177
178 NetcdfElement* dataset = _parser->getCurrentDataset();
179 NCML_ASSERT_MSG(dataset,
180 "We expected a non-noll current dataset while processing AggregationElement::handleBegin() for " + toString());
181 // If the enclosing dataset already has an aggregation, this is a parse error.
182 if (dataset->getChildAggregation()) {
183 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
184 "Got <aggregation> = " + toString() + " but the enclosing dataset = " + dataset->toString()
185 + " already had an aggregation set! There can be only one!");
186 }
187 // Set me as the aggregation for the current dataset.
188 // This will set my parent and also ref() me.
189 dataset->setChildAggregation(this);
190}
191
192void AggregationElement::handleContent(const string& content)
193{
194 // Aggregations do not specify content!
195 if (!NCMLUtil::isAllWhitespace(content)) {
196 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
197 "Got non-whitespace for content and didn't expect it. Element=" + toString() + " content=\"" + content
198 + "\"");
199 }
200}
201
203{
204#if 1
205 BESStopWatch sw;
206 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::handleEnd", "");
207#endif
208 // Handle the actual processing!!
209 BESDEBUG("ncml", "AggregationElement::handleEnd() - Processing the aggregation!!" << endl);
210
211 if (isUnionAggregation()) {
212 BESDEBUG("ncml2", "AggregationElement::handleEnd() - isUnionAggregation" << endl);
213 processUnion();
214 }
215 else if (isJoinNewAggregation()) {
216 BESDEBUG("ncml2", "AggregationElement::handleEnd() - isJoinNewAggregation" << endl);
217 processJoinNew();
218 }
219 else if (isJoinExistingAggregation()) {
220 BESDEBUG("ncml2", "AggregationElement::handleEnd() - isJoinExistingAggregation" << endl);
221 processJoinExisting();
222 }
223 else if (_type == "forecastModelRunCollection" || _type == "forecastModelSingleRunCollection") {
224 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
225 "Sorry, we do not implement the forecastModelRunCollection aggregations in this version of the NCML Module!");
226 }
227 else {
228 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
229 "Unknown aggregation type=" + _type + " at scope=" + _parser->getScopeString());
230 }
231}
232
234{
235 return "<" + _sTypeName + " type=\"" + _type + "\"" + printAttributeIfNotEmpty("dimName", _dimName)
236 + printAttributeIfNotEmpty("recheckEvery", _recheckEvery) + ">";
237}
238
239bool AggregationElement::isJoinNewAggregation() const
240{
241 return (_type == "joinNew");
242}
243
244bool AggregationElement::isUnionAggregation() const
245{
246 return (_type == "union");
247}
248
249bool AggregationElement::isJoinExistingAggregation() const
250{
251 return (_type == "joinExisting");
252}
253
255{
256 VALID_PTR(pDataset);
257 BESDEBUG("ncml", "AggregationElement: adding child dataset: " << pDataset->toString() << endl);
258
259 // Add as a strong reference.
260 pDataset->ref();
261 _datasets.push_back(pDataset);
262
263 // also set a weak reference to us as the parent
264 pDataset->setParentAggregation(this);
265}
266
268{
269 if (isAggregationVariable(name)) {
270 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
271 "Tried to add an aggregation variable twice: name=" + name + " at scope=" + _parser->getScopeString());
272 }
273 else {
274 _aggVars.push_back(name);
275 BESDEBUG("ncml", "Added aggregation variable name=" + name << endl);
276 }
277}
278
279bool AggregationElement::isAggregationVariable(const string& name) const
280{
281 bool ret = false;
282 AggVarIter endIt = endAggVarIter();
283 AggVarIter it = beginAggVarIter();
284 for (; it != endIt; ++it) {
285 if (name == *it) {
286 ret = true;
287 break;
288 }
289 }
290 return ret;
291}
292
293string AggregationElement::printAggregationVariables() const
294{
295 string ret("{ ");
296 AggVarIter endIt = endAggVarIter();
297 AggVarIter it = beginAggVarIter();
298 for (; it != endIt; ++it) {
299 ret += *it;
300 ret += " ";
301 }
302 ret += "}";
303 return ret;
304}
305
306AggregationElement::AggVarIter AggregationElement::beginAggVarIter() const
307{
308 return _aggVars.begin();
309}
310
311AggregationElement::AggVarIter AggregationElement::endAggVarIter() const
312{
313 return _aggVars.end();
314}
315
317{
318 return _gotVariableAggElement;
319}
320
322{
323 _gotVariableAggElement = true;
324}
325
327{
328 VALID_PTR(pScanner);
329 _scanners.push_back(pScanner);
330 pScanner->ref(); // strong ref
331 pScanner->setParent(this); // weak ref.
332}
333
335{
336 BESDEBUG("ncml", "AggregationElement::processParentDatasetComplete() called..." << endl);
337
338 if (_type == "joinNew") {
339 processParentDatasetCompleteForJoinNew();
340 }
341 else if (_type == "joinExisting") {
342 processParentDatasetCompleteForJoinExisting();
343 }
344}
345
348
351{
352 NetcdfElement* ret = getParentDataset();
353 _parent = parent;
354 return ret;
355}
356
357void AggregationElement::processUnion()
358{
359 BESDEBUG("ncml", "Processing a union aggregation..." << endl);
360
361 // Merge all the dimensions... For now, it is a parse error if a dimension
362 // with the same name exists but has a different size.
363 // Since DAP2 doesn't have dimensions, we can't do this in agg_util, but
364 // have to do it here.
365 mergeDimensions();
366
367 // Merge the attributes and variables in all the DDS's into our parent DDS....
368 vector<const DDS*> datasetsInOrder;
369 // NOTE WELL: this will LOAD ALL DDX's, but there's no choice for union.
370 // This doesn't load data, just the metadata!
371 collectDatasetsInOrder(datasetsInOrder);
372 DDS* pUnion = 0;
373 if (getParentDataset()) {
374 pUnion = getParentDataset()->getDDS();
375 }
376 AggregationUtil::performUnionAggregation(pUnion, datasetsInOrder);
377}
378
379void AggregationElement::processJoinNew()
380{
381 BESStopWatch sw;
382 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processJoinNew", "");
383
384 // This will run any child <scan> elements to prepare them.
385 processAnyScanElements();
386
387 BESDEBUG("ncml",
388 "AggregationElement::processJoinNew() - beginning joinNew on the following aggVars=" + printAggregationVariables() << endl);
389
390 // Union the dimensions of the child sets so they're available
391 BESDEBUG("ncml", "Merging dimensions from children into aggregated dataset..." << endl);
392 mergeDimensions();
393
394 // For now we will explicitly create the new dimension for lookups.
395 unsigned int newDimSize = _datasets.size(); // ASSUMES we find an aggVar in EVERY dataset!
396 getParentDataset()->addDimension(new DimensionElement(agg_util::Dimension(_dimName, newDimSize)));
397
398 // We need at least one dataset, so warn.
399 if (_datasets.empty()) {
400 THROW_NCML_PARSE_ERROR(line(), "In joinNew aggregation we cannot have zero datasets specified!");
401 }
402
403 // This is where the output variables go
404 DDS* pAggDDS = getParentDataset()->getDDS();
405 // The first dataset acts as the template for the remainder
406 DDS* pTemplateDDS = _datasets[0]->getDDS();
407 NCML_ASSERT_MSG(pTemplateDDS, "AggregationElement::processJoinNew() - NULL template dataset!");
408
409 // First, union the template's global attribute table into the output's table.
410 AggregationUtil::unionAttrsInto(&(pAggDDS->get_attr_table()), pTemplateDDS->get_attr_table());
411
412 // Then perform the aggregation for each variable...
413 // TODO REFACTOR OPTIMIZE We loop on variables, not the datasets.
414 // It might be more efficient to do all vars for each dataset
415 vector<string>::const_iterator endIt = _aggVars.end();
416 for (vector<string>::const_iterator it = _aggVars.begin(); it != endIt; ++it) {
417 const string& varName = *it;
418 BESDEBUG("ncml",
419 "AggregationElement::processJoinNew() - Aggregating with joinNew on variable=" << varName << "..." << endl);
420 processJoinNewOnAggVar(pAggDDS, varName, *pTemplateDDS);
421 }
422
423 // Union any non-aggregated variables from the template dataset into the aggregated dataset
424 // Because we want the joinExistingaggregation to build up the Coordinate Variables (CVs)
425 // in the order they are declared in the NCML file, we need to track the current position
426 // where the last one was inserted. We can do that with a field in the AggregationUtil
427 // class. Here we reset that field so that it starts at position 0. 12.13.11 jhrg
428 AggregationUtil::resetCVInsertionPosition();
429
430 // Union any non-aggregated variables from the template dataset into the aggregated dataset
431 AggregationUtil::unionAllVariablesInto(pAggDDS, *pTemplateDDS, /*add_at_top = */true);
432}
433
434#if 0
435// This function was used previously, but not now.
436// Leaving it in case we need it, but commented out
437// to deal with -werror compilation.
438
439/* File local helper for next function */
440static bool
441doAllScannersSpecifyNCoords(const vector<ScanElement*>& scanners)
442{
443 bool success = true;
444 for (vector<ScanElement*>::const_iterator it = scanners.begin();
445 it != scanners.end();
446 ++it)
447 {
448 VALID_PTR(*it);
449 if ((*it)->ncoords().empty())
450 {
451 success = false;
452 break;
453 }
454 }
455 return success;
456}
457#endif // 0
458
459void AggregationElement::processJoinExisting()
460{
461 BESDEBUG("ncml:2", "Called AggregationElement::processJoinExisting()...");
462
463 // Merge any scans into _datasets
464 processAnyScanElements();
465
466 // We need at least one dataset or it's an error
467 if (_datasets.empty()) {
468 THROW_NCML_PARSE_ERROR(line(), "In joinExisting aggregation we cannot have zero datasets specified!");
469 }
470
471 // We need to know the size of the joinExisting dimension
472 // for all granule datasets.
473 // Make sure that we either get them from:
474 // 1) ncoords specified
475 // 2) Dimension cache file previously created
476 // 3) Load them the slow way and cache the result
477 AMDList granuleList;
478 granuleList.reserve(_datasets.size());
479 fillDimensionCacheForJoinExistingDimension(granuleList, _dimName);
480
481 // Figure out the cardinality of the aggregated dimension
482 // and add it into the parent dataset's scope for lookups.
483 addNewDimensionForJoinExisting(granuleList);
484
485 // Union any declared dimensions of the child sets so they're available,
486 // but be carefuly to skip the join dimension since we already created it
487 // new ourselves with the post-aggregation value!
488 BESDEBUG("ncml:2", "Merging dimensions from children into aggregated dataset..." << endl);
489 mergeDimensions(true, _dimName);
490
491 // This is where the output variables go
492 DDS* pAggDDS = getParentDataset()->getDDS();
493
494 // The first dataset acts as the template
495 DDS* pTemplateDDS = _datasets[0]->getDDS();
496 NCML_ASSERT_MSG(pTemplateDDS, "AggregationElement::processJoinExisting(): NULL template dataset!");
497
498 // First, union the template's global attribute table into the output's table.
499 AggregationUtil::unionAttrsInto(&(pAggDDS->get_attr_table()), pTemplateDDS->get_attr_table());
500
501 // Fills in the _aggVars list properly.
502 decideWhichVariablesToJoinExist(*pTemplateDDS);
503
504 // For each variable in the to-be-aggregated list, create the
505 // aggregation variable in the output based on the granule list.
506 vector<string>::const_iterator endIt = _aggVars.end();
507 for (vector<string>::const_iterator it = _aggVars.begin(); it != endIt; ++it) {
508 const string& varName = *it;
509 BESDEBUG("ncml", "Aggregating with joinExisting on variable=" << varName << "..." << endl);
510 processJoinExistingOnAggVar(pAggDDS, varName, *pTemplateDDS);
511 }
512
513 // Union in the remaining unaggregated variables from the template DDS
514 // since they are likely to be coordinate variables.
515 // Handle variableAgg properly.
516 unionAddAllRequiredNonAggregatedVariablesFrom(*pTemplateDDS);
517}
518
519void AggregationElement::unionAddAllRequiredNonAggregatedVariablesFrom(const DDS& templateDDS)
520{
521 // Union any non-aggregated variables from the template dataset into the aggregated dataset
522 // Because we want the joinExistingaggregation to build up the Coordinate Variables (CVs)
523 // in the order they are declared in the NCML file, we need to track the current position
524 // where the last one was inserted. We can do that with a field in the AggregationUtil
525 // class. Here we reset that field so that it starts at position 0. 12.13.11 jhrg
526 AggregationUtil::resetCVInsertionPosition();
527
528 // If we didn't get a variable agg for a joinExisting, then union them all.
529 if (isJoinExistingAggregation()) {
530 if (!gotVariableAggElement()) {
531 AggregationUtil::unionAllVariablesInto(getParentDataset()->getDDS(), templateDDS, /*add_at_top = */true);
532 }
533 else {
534 // THROW ONLY IF A GRID since we need to implement the path that handles maps
535 }
536 } // if isJoinExistingAggregation
537
538 else if (isJoinNewAggregation())
539 // joinNew requires the list of vars, so for this one just union them all in as well.
540 {
541 AggregationUtil::unionAllVariablesInto(getParentDataset()->getDDS(), templateDDS, /*add_at_top = */true);
542 }
543}
544
545void AggregationElement::decideWhichVariablesToJoinExist(const DDS& templateDDS)
546{
547 // If they were not specified by hand, then discover them.
548 if (_aggVars.empty()) {
549 BESDEBUG("ncml",
550 "Searching the the template DDS for variables with outer " "dimension matching the join dimension = " << _dimName << " in order to add them to the aggregation output list." << endl);
551
552 // the prototype (first dataset) will define the set of vars to be aggregated.
553 // Note: the c.v. dim(dim) _must_ exist, either in all datasets or in the agg itself.
554 vector<string> matchingVars;
555 findVariablesWithOuterDimensionName(matchingVars, templateDDS, _dimName);
556 for (vector<string>::const_iterator it = matchingVars.begin(); it != matchingVars.end(); ++it) {
558 }
559 }
560 else // make sure the listed ones are valid
561 {
562 BESDEBUG("ncml",
563 "joinExist aggregation had variableAgg specified... " "Validating these variables have outer dimension named " << _dimName << endl);
564
565 for (vector<string>::const_iterator it = _aggVars.begin(); it != _aggVars.end(); ++it) {
566 BaseType* pVar = AggregationUtil::findVariableAtDDSTopLevel(templateDDS, *it);
567
568 // First, it must exist!
569 if (!pVar) {
570 std::ostringstream msg;
571 msg << "Error validating the variableAgg list. The variable named " << *it
572 << " was not found in the top-level DDS!";
573 THROW_NCML_PARSE_ERROR(line(), msg.str());
574 }
575
576 // Next see that it can be aggregated
577 Array* pArray = AggregationUtil::getAsArrayIfPossible(pVar);
578 if (!pArray) {
579 std::ostringstream msg;
580 msg << "The declared variableAgg aggregation variable named " << *it
581 << " was not of a type able to be aggregated!";
582 THROW_NCML_PARSE_ERROR(line(), msg.str());
583 }
584
585 // Make sure the dimension name matches.
586 if (pArray->dimension_name(pArray->dim_begin()) != _dimName) {
587 std::ostringstream msg;
588 msg << "The declared variableAgg variable named " << *it << " did not match the outer dimension name "
589 << _dimName << " for this joinExisting aggregation!";
590 THROW_NCML_PARSE_ERROR(line(), msg.str());
591 }
592
593 // Otherwise, it's good, so let the log know.
594 std::ostringstream msg;
595 msg << "The variable named " << *it << " is a valid joinExisting variable. Will be added to output.";
596 BESDEBUG("ncml", msg.str() << endl);
597 } // for loop over user-declared variableAgg list.
598 }
599}
600
601//
602void AggregationElement::fillDimensionCacheForJoinExistingDimension(AMDList& granuleList,
603 const std::string& /* aggDimName */)
604{
605 // First, run down the dataset list (which has been expanded with scanners)
606 // and create the AMD list for them.
607 // for each entry in _dataset
608 vector<NetcdfElement*>::iterator endIt = _datasets.end();
609 for (vector<NetcdfElement*>::iterator it = _datasets.begin(); it != endIt; ++it) {
610 granuleList.push_back((*it)->getAggMemberDataset());
611 }
612
613 // Second, see if there is an ncoords for each of the datasets,
614 // and if so, for each one add it to the cache in the AMD.
615 if (doesFirstGranuleSpecifyNcoords()) {
616 // If so, check they all do or it's a user error.
617 if (!doAllGranulesSpecifyNcoords()) {
618 THROW_NCML_PARSE_ERROR(-1, "In a joinExisting aggregation we found that the first "
619 "granule specified an ncoords but not all of the others "
620 "did. Either all or none of them should have ncoords specified.");
621 }
622 // otherwise we're good, seed the cache from the ncoords
623 else {
624 seedDimensionCacheFromUserSpecs(granuleList);
625 }
626 }
627 else // look for cached dimension file or load dimensionalities from granules
628 {
629 BESStopWatch sw;
630 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("LOAD_AGGREGATION_DIMENSIONS_CACHE", "");
631
633
634 AMDList::iterator endIt = granuleList.end();
635 for (AMDList::iterator it = granuleList.begin(); it != endIt; ++it) {
636 AggMemberDataset *amd = (*it).get();
637 if(aggDimCache) {
638 BESDEBUG("ncml", "AggregationElement::fillDimensionCacheForJoinExistingDimension() - Loading dimension cache for: " << (*it)->getLocation() << "..." << endl);
639 aggDimCache->loadDimensionCache(amd);
640 }
641 else {
642 BESDEBUG("ncml", "AggregationElement::fillDimensionCacheForJoinExistingDimension() - " <<
643 "WARNING NcML Dimension Caching is not configured or is not working! Loading dimensions from DDS for dataset: " <<
644 (*it)->getLocation() << "" << endl);
646 }
647 }
648 }
649}
650
651
652
653
654
655bool AggregationElement::doesFirstGranuleSpecifyNcoords() const
656{
657 if (_datasets.size() > 0) {
658 return _datasets.at(0)->hasNcoords();
659 }
660 else {
661 return false;
662 }
663}
664
665bool AggregationElement::doAllGranulesSpecifyNcoords() const
666{
667 bool success = true;
668 vector<NetcdfElement*>::const_iterator endIt = _datasets.end();
669 for (vector<NetcdfElement*>::const_iterator it = _datasets.begin(); it != endIt; ++it) {
670 success = success && (*it)->hasNcoords();
671 if (!success) {
672 break;
673 }
674 }
675 return success;
676}
677
678void AggregationElement::seedDimensionCacheFromUserSpecs(agg_util::AMDList& rGranuleList) const
679{
680 NCML_ASSERT(_datasets.size() == rGranuleList.size());
681
682 vector<NetcdfElement*>::const_iterator datasetIt;
683 AMDList::iterator amdIt;
684 for (datasetIt = _datasets.begin(), amdIt = rGranuleList.begin(); datasetIt != _datasets.end();
685 ++datasetIt, ++amdIt) {
686 // Make sure the attribute exists or warn the author
687 const NetcdfElement* pDataset = *datasetIt;
688 if (!pDataset->hasNcoords()) {
689 // This is an assumption of the
690 THROW_NCML_INTERNAL_ERROR("Expected netcdf element member of a joinExisting "
691 "aggregation to have the ncoords attribute specified "
692 "but it did not.");
693 }
694 unsigned int ncoords = pDataset->getNcoordsAsUnsignedInt();
695 RCPtr<AggMemberDataset> pAMD = *amdIt;
696 VALID_PTR(pAMD.get());
698 dim.name = _dimName;
699 dim.size = ncoords;
700 pAMD->setDimensionCacheFor(dim, true);
701
702 NCML_ASSERT_MSG((pAMD->isDimensionCached(dim.name) && pAMD->getCachedDimensionSize(dim.name) == dim.size),
703 "Dimension cache bug");
704 }
705 // make sure they stayed in sync
706 NCML_ASSERT(amdIt == rGranuleList.end());
707}
708
709
710// For now, just count up the ncoords...
711void AggregationElement::addNewDimensionForJoinExisting(const agg_util::AMDList& rGranuleList)
712{
713 // Sum up the cardinalities from AMD's
714 unsigned int aggDimSize = 0;
715 for (AMDList::const_iterator it = rGranuleList.begin(); it != rGranuleList.end(); ++it) {
716 NCML_ASSERT((*it)->isDimensionCached(_dimName));
717 aggDimSize += (*it)->getCachedDimensionSize(_dimName);
718 }
719
720 // Error if the dimension exists in the output local scope already
721 NCML_ASSERT(getParentDataset());
722 NCML_ASSERT_MSG(!(getParentDataset()->getDimensionInLocalScope(_dimName)),
723 "AggregationElement::addNewDimensionForJoinExisting() found a dimension "
724 "named " + _dimName + " already but did not expect it!");
725
726 // Otherwise, create and add it in.
727 getParentDataset()->addDimension(new DimensionElement(agg_util::Dimension(_dimName, aggDimSize)));
728
729 // And tell the world at large
730 ostringstream oss;
731 oss << "Added joinExisting aggregation dimension "
732 " name=" << _dimName << " with aggregated size= " << aggDimSize;
733 BESDEBUG("ncml:2", oss.str());
734}
735
736void AggregationElement::findVariablesWithOuterDimensionName(vector<string>& oMatchingVars, const DDS& templateDDS,
737 const string& outerDimName) const
738{
739 for (DDS::Vars_iter it = const_cast<DDS&>(templateDDS).var_begin(); it != const_cast<DDS&>(templateDDS).var_end();
740 ++it) {
741 Array* pArray = AggregationUtil::getAsArrayIfPossible(*it);
742 // Only if it's an array or a grid data array
743 if (pArray && outerDimName == pArray->dimension_name(pArray->dim_begin())) {
744 oMatchingVars.push_back(pArray->name());
745 }
746 }
747}
748
749void AggregationElement::getParamsForJoinAggOnVariable(JoinAggParams* pOutParams, const DDS& /*aggOutputDDS*/,
750 const std::string& varName, const DDS& templateDDS)
751{
752 VALID_PTR(pOutParams);
753
754 // Look up the template variable.
755 pOutParams->_pAggVarTemplate = AggregationUtil::getVariableNoRecurse(templateDDS, varName);
756 if (!(pOutParams->_pAggVarTemplate)) {
757 THROW_NCML_PARSE_ERROR(line(),
758 " We could not find a template for the specified aggregation variable=" + varName
759 + " so we cannot continue the aggregation.");
760 }
761
762 // Dimension must exist already
763 const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
764 NCML_ASSERT_MSG(pDim, "Didn't find a DimensionElement with the aggregation dimName=" + _dimName);
765 pOutParams->_pAggDim = &(pDim->getDimension());
766
767#if 0
768 // I don't follow the logic here. I think we should be able to add attributes to
769 // variables that already exist. This may be intended to protect against removing
770 // the variable on which the aggregation is performed 'over' (e.g., time) with a
771 // different variable. But it has the affect of also prohibiting that addition of
772 // an attribute on that variable. I'm removing it for now. jhrg 10/17/11
773
774 // Be sure the name isn't taken in the output DDS.
775 BaseType* pExists = AggregationUtil::getVariableNoRecurse(aggOutputDDS, varName);
776 NCML_ASSERT_MSG(!pExists,
777 "Failed since the name of the new variable to add (name="
778 + varName
779 + ") already exists in the "
780 " output aggregation DDS! What happened?!");
781#endif
782
783 // Get a vector of lazy loaders
784 // We will transfer AGM ownership to the calls so do not need to delete them.
785 collectAggMemberDatasets(pOutParams->_memberDatasets);
786}
787
788void AggregationElement::processJoinNewOnAggVar(DDS* pAggDDS, const std::string& varName, const DDS& templateDDS)
789{
790 BESStopWatch sw;
791 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processJoinNewOnAggVar", "");
792
793 // Get the params we need to factory the actual aggregation subclass
794 JoinAggParams joinAggParams;
795 getParamsForJoinAggOnVariable(&joinAggParams, // output
796 *pAggDDS, varName, templateDDS);
797
798 // Factory out the proper subtype
799 BaseType* pAggVarTemplate = joinAggParams._pAggVarTemplate;
800 if (pAggVarTemplate->type() == dods_array_c) {
801 processAggVarJoinNewForArray(*pAggDDS, *(static_cast<Array*>(pAggVarTemplate)), *(joinAggParams._pAggDim),
802 joinAggParams._memberDatasets);
803 }
804 else if (pAggVarTemplate->type() == dods_grid_c) {
805 processAggVarJoinNewForGrid(*pAggDDS, *(static_cast<Grid*>(pAggVarTemplate)), *(joinAggParams._pAggDim),
806 joinAggParams._memberDatasets);
807 }
808 else {
809 THROW_NCML_PARSE_ERROR(line(),
810 "Got an aggregation variable not of type Array or Grid, but of: " + pAggVarTemplate->type_name()
811 + " which we cannot aggregate!");
812 }
813 // Nothing else to do for this var until the call to processParentDataset() is complete.
814}
815
816void AggregationElement::processJoinExistingOnAggVar(DDS* pAggDDS, const std::string& varName, const DDS& templateDDS)
817{
818
819 BESStopWatch sw;
820 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processJoinExistingOnAggVar", "");
821
822 // Get the params we need to factory the actual aggregation subclass
823 JoinAggParams joinAggParams;
824 getParamsForJoinAggOnVariable(&joinAggParams, // output
825 *pAggDDS, varName, templateDDS);
826
827 // Factory out the proper subtype
828 BaseType* pAggVarTemplate = joinAggParams._pAggVarTemplate;
829 if (pAggVarTemplate->type() == dods_array_c) {
830 processAggVarJoinExistingForArray(*pAggDDS, *(static_cast<Array*>(pAggVarTemplate)), *(joinAggParams._pAggDim),
831 joinAggParams._memberDatasets);
832 }
833 else if (pAggVarTemplate->type() == dods_grid_c) {
834 processAggVarJoinExistingForGrid(*pAggDDS, *(static_cast<Grid*>(pAggVarTemplate)), *(joinAggParams._pAggDim),
835 joinAggParams._memberDatasets);
836 }
837 else {
838 THROW_NCML_PARSE_ERROR(line(),
839 "Got an aggregation variable not of type Array or Grid, but of: " + pAggVarTemplate->type_name()
840 + " which we cannot aggregate!");
841 }
842 // Nothing else to do for this var until the call to processParentDataset() is complete.
843}
844
845void AggregationElement::processAggVarJoinNewForArray(DDS& aggDDS, const libdap::Array& arrayTemplate,
846 const agg_util::Dimension& dim, const AMDList& memberDatasets)
847{
848 BESStopWatch sw;
849 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processJoinExistingOnAggVar", "");
850
851 // Use the basic array getter to read adn get from top level DDS.
852 auto_ptr<agg_util::ArrayGetterInterface> arrayGetter(new agg_util::TopLevelArrayGetter());
853
854 auto_ptr<ArrayAggregateOnOuterDimension> pAggArray(
855 new ArrayAggregateOnOuterDimension(arrayTemplate, memberDatasets, arrayGetter, // will xfer ownership
856 dim));
857
858 // Make sure we xfer ownership of contained dumb ptr.
859 NCML_ASSERT_MSG(!(arrayGetter.get()), "Expected auto_ptr owner xfer, failed!");
860
861 // This will copy, auto_ptr will clear the prototype.
862 // NOTE: add_var() makes a copy.
863 // OPTIMIZE change to add_var_no_copy when it exists.
864 BESDEBUG("ncml",
865 "Adding new ArrayAggregateOnOuterDimension with name=" << arrayTemplate.name() << " to aggregated dataset!" << endl);
866
867 // Replaced the copy version of DDS::add_var() with the nocopy version. This saves
868 // a deep copy, but more importantly, is a workaround for a memory issue in the
869 // ArrayAggregateOnOuterDimension or ArrayAggreagtionBase copy constructor, which
870 // triggers a memory error deep in libdap::Array::Array(const Array&). See similar
871 // changes below. This and related changes fix HYRAX-803. jhrg 8/3/18
872#if 0
873 aggDDS.add_var(pAggArray.get());
874#endif
875
876 aggDDS.add_var_nocopy(pAggArray.release());
877}
878
879void AggregationElement::processAggVarJoinNewForGrid(DDS& aggDDS, const Grid& gridTemplate,
880 const agg_util::Dimension& dim, const AMDList& memberDatasets)
881{
882 BESStopWatch sw;
883 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processAggVarJoinNewForGrid", "");
884
885 auto_ptr<GridAggregateOnOuterDimension> pAggGrid(
886 new GridAggregateOnOuterDimension(gridTemplate, dim, memberDatasets, _parser->getDDSLoader()));
887
888 // This will copy, auto_ptr will clear the prototype.
889 // OPTIMIZE change to add_var_no_copy when it exists.
890 BESDEBUG("ncml",
891 "Adding new GridAggregateOnOuterDimension with name=" << gridTemplate.name() << " to aggregated dataset!" << endl);
892
893#if 0
894 aggDDS.add_var(pAggGrid.get());
895#endif
896
897 aggDDS.add_var_nocopy(pAggGrid.release());
898
899 // processParentDatasetCompleteForJoinNew() will
900 // make sure the correct new map vector gets added
901}
902
903void AggregationElement::processAggVarJoinExistingForArray(DDS& aggDDS, const libdap::Array& arrayTemplate,
904 const agg_util::Dimension& dim, const AMDList& memberDatasets)
905{
906
907 BESStopWatch sw;
908 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processAggVarJoinExistingForArray", "");
909
910 // Use the basic array getter to read adn get from top level DDS.
911 auto_ptr<agg_util::ArrayGetterInterface> arrayGetter(new agg_util::TopLevelArrayGetter());
912
913 auto_ptr<ArrayJoinExistingAggregation> pAggArray(
914 new ArrayJoinExistingAggregation(arrayTemplate, memberDatasets, arrayGetter, // will xfer ownership
915 dim));
916
917 // Make sure we xfer ownership of contained dumb ptr.
918 NCML_ASSERT_MSG(!(arrayGetter.get()), "Expected auto_ptr owner xfer, failed!");
919
920 // This will copy, auto_ptr will clear the prototype.
921 // NOTE: add_var() makes a copy.
922 // OPTIMIZE change to add_var_no_copy when it exists.
923 BESDEBUG("ncml",
924 "Adding new ArrayJoinExistingAggregation with name=" << arrayTemplate.name() << " to aggregated dataset!" << endl);
925
926#if 0
927 aggDDS.add_var(pAggArray.get());
928#endif
929
930 aggDDS.add_var_nocopy(pAggArray.release());
931}
932
933void AggregationElement::processAggVarJoinExistingForGrid(DDS& aggDDS, const Grid& gridTemplate,
934 const agg_util::Dimension& dim, const AMDList& memberDatasets)
935{
936
937 BESStopWatch sw;
938 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processAggVarJoinExistingForGrid", "");
939
940 auto_ptr<GridJoinExistingAggregation> pAggGrid(
941 new GridJoinExistingAggregation(gridTemplate, memberDatasets, _parser->getDDSLoader(), dim));
942
943 BESDEBUG("ncml",
944 "Adding new GridJoinExistingAggregation with name=" << gridTemplate.name() << " to aggregated dataset!" << endl);
945
946#if 0
947 aggDDS.add_var(pAggGrid.get()); // will copy
948#endif
949
950 aggDDS.add_var_nocopy(pAggGrid.release());
951}
952
953void AggregationElement::processParentDatasetCompleteForJoinNew()
954{
955 BESStopWatch sw;
956 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processParentDatasetCompleteForJoinNew", "");
957
958 NetcdfElement* pParentDataset = getParentDataset();
959 VALID_PTR(pParentDataset);
960 DDS* pParentDDS = pParentDataset->getDDS();
961 VALID_PTR(pParentDDS);
962
963 const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
964 NCML_ASSERT_MSG(pDim, " AggregationElement::processParentDatasetCompleteForJoinNew(): "
965 " didn't find a DimensionElement with the joinNew dimName=" + _dimName);
966 const agg_util::Dimension& dim = pDim->getDimension();
967
968 // See if there's an explicit or placeholder c.v. for this dimension name
969 BaseType* pBT = AggregationUtil::getVariableNoRecurse(*pParentDDS, dim.name);
970 Array* pCV = 0; // this will be a ptr to the actual (new or existing) c.v. in the *pParentDDS.
971
972 // If name totally unused, we need to create a new c.v. and add it.
973 if (!pBT) {
974 pCV = createAndAddCoordinateVariableForNewDimension(*pParentDDS, dim);
975 NCML_ASSERT_MSG(pCV, "processParentDatasetCompleteForJoinNew(): "
976 "failed to create a new coordinate variable for dim=" + dim.name);
977 }
978 else // name exists: either it's explicit or deferred.
979 {
980 // See if the var we found with the dimension name is
981 // in the deferred variable list for the parent dataset:
982 VariableElement* pVarElt = pParentDataset->findVariableElementForLibdapVar(pBT);
983 // If not, then we expect explicit values so just validate it's a proper c.v. for
984 // the aggregation (the dim) and set pCV to it if so.
985 if (!pVarElt) {
986 // will throw if not valid since we send true.
987 pCV = ensureVariableIsProperNewCoordinateVariable(pBT, dim, true);
988 VALID_PTR(pCV);
989 }
990 else // it was deferred, need to do some special work...
991 {
992 pCV = processDeferredCoordinateVariable(pBT, dim);
993 VALID_PTR(pCV);
994 }
995 }
996
997 // OK, either pCV is valid or we've unwound out by this point.
998 // If a coordinate axis type was specified, we need to add it now.
999 //
1000 // This fiddles with the attribute for the CV. jhrg 10/17/11
1001 if (!_coordinateAxisType.empty()) {
1002 addCoordinateAxisType(*pCV, _coordinateAxisType);
1003 }
1004
1005 // For each aggVar:
1006 // If it's a Grid, add the coordinate variable as a new map vector.
1007 // If it's an Array, do nothing -- we already added the CV as a sibling to the aggvar
1008 AggVarIter it;
1009 AggVarIter endIt = endAggVarIter();
1010 for (it = beginAggVarIter(); it != endIt; ++it) {
1011 const string& aggVar = *it;
1012 BaseType* pBT = AggregationUtil::getVariableNoRecurse(*pParentDDS, aggVar);
1014 if (pGrid) {
1015 // Add the given map to the Grid as a copy
1016 pGrid->prepend_map(pCV, true);
1017 }
1018 }
1019}
1020
1021void AggregationElement::processParentDatasetCompleteForJoinExisting()
1022{
1023 BESStopWatch sw;
1024 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processParentDatasetCompleteForJoinExisting", "");
1025
1026 NetcdfElement* pParentDataset = getParentDataset();
1027 VALID_PTR(pParentDataset);
1028 DDS* pAggDDS = pParentDataset->getDDS();
1029 VALID_PTR(pAggDDS);
1030
1031 const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
1032 NCML_ASSERT_MSG(pDim, " Didn't find a DimensionElement with the joinExisting dimName=" + _dimName);
1033 const agg_util::Dimension& dim = pDim->getDimension();
1034
1035 // See if there's an explicit or placeholder c.v. for this dimension name
1036 BaseType* pDimNameVar = AggregationUtil::getVariableNoRecurse(*pAggDDS, dim.name);
1037
1038 bool placeholderExists = false;
1039 Array* pCV = 0; // this will be a ptr to the actual (new or existing) c.v. in the *pParentDDS.
1040 // If the c.v. exists, then process it further.
1041 if (pDimNameVar) {
1042 // See if the var we found with the dimension name is
1043 // in the deferred variable list for the parent dataset:
1044 VariableElement* pVarElt = pParentDataset->findVariableElementForLibdapVar(pDimNameVar);
1045 // If not, then we expect explicit values so just validate it's a proper c.v. for
1046 // the aggregation (the dim) and set pCV to it if so.
1047 if (!pVarElt) {
1048 // will throw if not valid since we send true.
1049 pCV = ensureVariableIsProperNewCoordinateVariable(pDimNameVar, dim, true);
1050 VALID_PTR(pCV);
1051 placeholderExists = false;
1052 }
1053 else // it was deferred, need to do some special work below...
1054 {
1055 //pCV = processDeferredCoordinateVariable(pDimNameVar, dim);
1056 placeholderExists = true;
1057 }
1058 }
1059
1060 // For the scope of the next loop, this will be filled
1061 // with a new aggregated map variable when we fidnt he first Grid
1062 // and then pCV will refer to it until the fucntion end.
1063 // If created, it will be used as the map vector for all Grid's.
1064 auto_ptr<ArrayJoinExistingAggregation> pNewMap(0);
1065
1066 // For each aggVar:
1067 // If it's a Grid, add the coordinate variable as a new map vector
1068 // since we left it out in the actual Grid until aggregated.
1069 // If it's an Array, do nothing
1070 AggVarIter it;
1071 AggVarIter endIt = endAggVarIter();
1072 for (it = beginAggVarIter(); it != endIt; ++it) {
1073 const string& aggVar = *it;
1074 BaseType* pAggVar = AggregationUtil::getVariableNoRecurse(*pAggDDS, aggVar);
1075
1076 // HACK TODO clean this downcast later when we refactor this file.
1077 GridJoinExistingAggregation* pGrid = dynamic_cast<GridJoinExistingAggregation*>(pAggVar);
1078 if (pGrid) {
1079 // If we don't find it, but we're the first Grid, then assume it's in the Grid maps
1080 // and create it. Will be reused by other Grid's.
1081 // We also do this if it was a placeholder since we need to replace it!
1082 if (!pCV || placeholderExists) {
1083 pNewMap = pGrid->makeAggregatedOuterMapVector();
1084 VALID_PTR(pNewMap.get());
1085
1086 // If there was a placeholder, we need to
1087 // grab it's metadata as a changeset and replace
1088 // the variable in the DDS with the new one.
1089 if (placeholderExists) {
1090 processPlaceholderCoordinateVariableForJoinExisting(*pDimNameVar, pNewMap.get());
1091 }
1092
1093 // this will make a copy, so the auto_ptr is ok.
1094 AggregationUtil::addOrReplaceVariableForName(pAggDDS, *(pNewMap.get()));
1095
1096 // Use the new one as the coordinate variable for the maps below
1097 pCV = pNewMap.get();
1098 }
1099
1100 // It MUST exist for a Grid since we have to add it for completeness.
1101 NCML_ASSERT_MSG(pCV, "Expected a coordinate variable since a Grid exists... what happened?");
1102
1103 // Add the given map to the Grid as a copy
1104 pGrid->prepend_map(pCV, true);
1105 }
1106 }
1107}
1108
1109void AggregationElement::processPlaceholderCoordinateVariableForJoinExisting(const libdap::BaseType& placeholderVar,
1110 libdap::Array* pNewVar)
1111{
1112 VALID_PTR(pNewVar);
1113
1114 // Make sure the types of the placeholder scalar and created array match or the author goofed
1115 BaseType* pNewEltProto = pNewVar->var();
1116 VALID_PTR(pNewEltProto);
1117 if (placeholderVar.type() != pNewEltProto->type()) {
1118 THROW_NCML_PARSE_ERROR(line(),
1119 " We expected the type of the placeholder coordinate variable to be the same "
1120 " as that created by the aggregation. Expected type=" + pNewEltProto->type_name()
1121 + +" but placeholder has type=" + placeholderVar.type_name()
1122 + " Please make sure these match in the input file!");
1123 }
1124
1125 // Pull the metadata into the new c.v. from the placeholder
1126 AggregationUtil::gatherMetadataChangesFrom(pNewVar, placeholderVar);
1127
1128 // Let the validation know that we got values for the original value and to remove the entry
1129 // since we're about to delete the pointer to pBT!
1130 getParentDataset()->setVariableGotValues(const_cast<BaseType*>(&placeholderVar), true);
1131}
1132
1134{
1135 _coordinateAxisType = cat;
1136}
1137
1138const std::string&
1140{
1141 return _coordinateAxisType;
1142}
1143
1144libdap::Array*
1145AggregationElement::ensureVariableIsProperNewCoordinateVariable(libdap::BaseType* pBT, const agg_util::Dimension& dim,
1146 bool throwOnInvalidCV) const
1147{
1148 VALID_PTR(pBT);
1149 Array* pArrRet = 0;
1150
1151 // If 1D array with name == dim....
1152 if (AggregationUtil::couldBeCoordinateVariable(pBT)) {
1153 // Ensure the dimensionalities match
1154 Array* pArr = static_cast<Array*>(pBT);
1155 if (pArr->length() == static_cast<int>(dim.size)) {
1156 // OK, it's a valid return value.
1157 pArrRet = pArr;
1158 }
1159 else // Dimensionality mismatch, exception or return NULL.
1160 {
1161 ostringstream oss;
1162 oss << string("In the aggregation for dimension=") << dim.name
1163 << ": The coordinate variable we found does NOT have the same dimensionality as the"
1164 "aggregated dimension! We expected dimensionality=" << dim.size
1165 << " but the coordinate variable had dimensionality=" << pArr->length();
1166 BESDEBUG("ncml", oss.str() << endl);
1167 if (throwOnInvalidCV) {
1168 THROW_NCML_PARSE_ERROR(line(), oss.str());
1169 }
1170 }
1171 }
1172
1173 else // Name exists, but not a coordinate variable, then exception or return null.
1174 {
1175 std::ostringstream msg;
1176 msg << "Aggregation found a variable matching aggregated dimension name=" << dim.name
1177 << " but it was not a coordinate variable. "
1178 " It must be a 1D array whose dimension name is the same as its name. ";
1179 BESDEBUG("ncml", "AggregationElement::ensureVariableIsProperNewCoordinateVariable: " + msg.str() << endl);
1180 if (throwOnInvalidCV) {
1181 THROW_NCML_PARSE_ERROR(line(), msg.str())
1182 }
1183 }
1184 // Return valid Array or null on failures.
1185 return pArrRet;
1186}
1187
1188libdap::Array*
1189AggregationElement::findMatchingCoordinateVariable(const DDS& dds, const agg_util::Dimension& dim,
1190 bool throwOnInvalidCV/*=true*/) const
1191{
1192 BaseType* pBT = AggregationUtil::getVariableNoRecurse(dds, dim.name);
1193
1194 // Name doesn't exist, just NULL. We'll have to create it from scratch
1195 if (!pBT) {
1196 return 0;
1197 }
1198
1199 return ensureVariableIsProperNewCoordinateVariable(pBT, dim, throwOnInvalidCV);
1200}
1201
1214libdap::Array*
1215AggregationElement::processDeferredCoordinateVariable(libdap::BaseType* pBT, const agg_util::Dimension& dim)
1216{
1217 VALID_PTR(pBT);
1218
1219 BESDEBUG("ncml",
1220 "Processing the placeholder coordinate variable (no values) for the " "current aggregation to add placeholder metadata to the generated values..." << endl);
1221
1222 // Generate the c.v. as if we had no placeholder since pBT will be a scalar (shape cannot
1223 // be defined on it by ncml spec defn).
1224 // @OPTIMIZE try to refactor this to avoid unnecessary copies.
1225 auto_ptr<Array> pNewArrCV = createCoordinateVariableForNewDimension(dim);
1226 NCML_ASSERT_MSG(pNewArrCV.get(), " createCoordinateVariableForNewDimension()"
1227 " returned null.");
1228
1229 // Make sure the types of the placeholder scalar and created array match or the author goofed
1230 BaseType* pNewEltProto = pNewArrCV->var();
1231 VALID_PTR(pNewEltProto);
1232 if (pBT->type() != pNewEltProto->type()) {
1233 THROW_NCML_PARSE_ERROR(line(),
1234 " We expected the type of the placeholder coordinate variable to be the same "
1235 " as that created by the aggregation. Expected type=" + pNewEltProto->type_name()
1236 + +" but placeholder has type=" + pBT->type_name()
1237 + " Please make sure these match in the input file!");
1238 }
1239
1240 // Let the validation know that we got values for the original value and to remove the entry
1241 // since we're about to delete the pointer to pBT!
1242 getParentDataset()->setVariableGotValues(pBT, true);
1243
1244 // Copy the entire AttrTable tree (recursively) from the place holder into the new variable
1245 pNewArrCV->get_attr_table() = pBT->get_attr_table();
1246
1247 // Delete the placeholder
1248 DDS* pDDS = getParentDataset()->getDDS();
1249 VALID_PTR(pDDS);
1250 pDDS->del_var(pBT->name());
1251
1252 // Add the new one, which will copy it (argh! we need to fix this in libdap!)
1253 // OPTIMIZE use non copy add when available.
1254 BESDEBUG("ncml", "Adding CV: " << pNewArrCV->name() << endl);
1255#if 0
1256 pDDS->add_var(pNewArrCV.get()); // use raw ptr for the copy.
1257#endif
1258 pDDS->add_var_nocopy(pNewArrCV.release());
1259
1260 // Pull out the copy we just added and hand it back
1261 Array* pArrCV = static_cast<Array*>(AggregationUtil::getVariableNoRecurse(*pDDS, dim.name));
1262 VALID_PTR(pArrCV);
1263 return pArrCV;
1264}
1265
1266auto_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimension(
1267 const agg_util::Dimension& dim) const
1268{
1269 // Get the netcdf@coordValue or use the netcdf@location (or auto generate if empty() ).
1270 NCML_ASSERT(_datasets.size() > 0);
1271 bool hasCoordValue = !(_datasets[0]->coordValue().empty());
1272 if (hasCoordValue) {
1273 return createCoordinateVariableForNewDimensionUsingCoordValue(dim);
1274 }
1275 else {
1276 return createCoordinateVariableForNewDimensionUsingLocation(dim);
1277 }
1278}
1279
1280libdap::Array*
1281AggregationElement::createAndAddCoordinateVariableForNewDimension(DDS& dds, const agg_util::Dimension& dim)
1282{
1283 auto_ptr<libdap::Array> pNewCV = createCoordinateVariableForNewDimension(dim);
1284
1285 // Make sure it did it
1286 NCML_ASSERT_MSG(pNewCV.get(),
1287 "AgregationElement::createCoordinateVariableForNewDimension() failed to create a coordinate variable!");
1288
1289 // Add it to the DDS, which will make a copy
1290 // (change this when we add noncopy add_var to DDS)
1291 //
1292 // Fix. This will append the variable to the DDS; we need these CVs to be
1293 // prefixes to the Grids (so that old versions of the netCDF library will
1294 // recognize them). jhrg 10/17/11
1295 BESDEBUG("ncml2", "AggregationElement::createAndAddCoordinateVariableForNewDimension: " << pNewCV->name());
1296#if 0
1297 dds.add_var(pNewCV.get());
1298#else
1299 // This provides a way to remember where the last CV was inserted and adds
1300 // this one after it. That provides the behavior that all of the CVs are
1301 // added at the beginning of the DDS but in the order they appear in the NCML.
1302 // That will translate into a greater chance of success for users, I think ...
1303 //
1304 // See also similar code in AggregationUtil::addCopyOfVariableIfNameIsAvailable.
1305 // jhrg 10/17/11
1306 static int last_added = 0;
1307 DDS::Vars_iter pos = dds.var_begin();
1308 for (int i = 0; i < last_added; ++i)
1309 ++pos;
1310
1311 dds.insert_var(pos, pNewCV.get());
1312 ++last_added;
1313#endif
1314 // Grab the copy back out and set to our expected result.
1315 Array* pCV = static_cast<Array*>(AggregationUtil::getVariableNoRecurse(dds, dim.name));
1316
1317 NCML_ASSERT_MSG(pCV, "Logic Error: tried to add a new coordinate variable while processing joinNew"
1318 " but we couldn't locate it!");
1319 return pCV;
1320}
1321
1322auto_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValue(
1323 const agg_util::Dimension& dim) const
1324{
1325 NCML_ASSERT(_datasets.size() > 0);
1326 NCML_ASSERT_MSG(_datasets.size() == dim.size, "Logic error: Number of datasets doesn't match dimension!");
1327 // Use first dataset to define the proper type
1328 double doubleVal = 0;
1329 if (_datasets[0]->getCoordValueAsDouble(doubleVal)) {
1330 return createCoordinateVariableForNewDimensionUsingCoordValueAsDouble(dim);
1331 }
1332 else {
1333 return createCoordinateVariableForNewDimensionUsingCoordValueAsString(dim);
1334 }
1335}
1336
1337auto_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValueAsDouble(
1338 const agg_util::Dimension& dim) const
1339{
1340 vector<dods_float64> coords;
1341 coords.reserve(dim.size);
1342 double doubleVal = 0;
1343 // Use the index rather than iterator so we can use it in debug output...
1344 for (unsigned int i = 0; i < _datasets.size(); ++i) {
1345 const NetcdfElement* pDataset = _datasets[i];
1346 if (!pDataset->getCoordValueAsDouble(doubleVal)) {
1347 THROW_NCML_PARSE_ERROR(line(),
1348 "In creating joinNew coordinate variable from coordValue, expected a coordValue of type double"
1349 " but failed! coordValue=" + pDataset->coordValue() + " which was in the dataset location="
1350 + pDataset->location() + " with title=\"" + pDataset->title() + "\"");
1351 }
1352 else // we got our value fine, so add it
1353 {
1354 coords.push_back(static_cast<dods_float64>(doubleVal));
1355 }
1356 }
1357
1358 // If we got here, we have the array of coords.
1359 // So we need to make the proper array, fill it in, and return it.
1360 auto_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<Float64>", dim.name, true);
1361 NCML_ASSERT_MSG(pNewCV.get(), "createCoordinateVariableForNewDimensionUsingCoordValueAsDouble: failed to create"
1362 " the new Array<Float64> for variable: " + dim.name);
1363 pNewCV->append_dim(dim.size, dim.name);
1364 pNewCV->set_value(coords, coords.size()); // this will set the length correctly.
1365 return pNewCV;
1366}
1367
1368auto_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValueAsString(
1369 const agg_util::Dimension& dim) const
1370{
1371 // I feel suitably dirty for cut and pasting this.
1372 vector<string> coords;
1373 coords.reserve(dim.size);
1374 for (unsigned int i = 0; i < _datasets.size(); ++i) {
1375 const NetcdfElement* pDataset = _datasets[i];
1376 if (pDataset->coordValue().empty()) {
1377 int parseLine = line();
1378 THROW_NCML_PARSE_ERROR(parseLine,
1379 "In creating joinNew coordinate variable from coordValue, expected a coordValue of type string"
1380 " but it was empty! dataset location=" + pDataset->location() + " with title=\"" + pDataset->title()
1381 + "\"");
1382 }
1383 else // we got our value fine, so add it
1384 {
1385 coords.push_back(pDataset->coordValue());
1386 }
1387 }
1388 // If we got here, we have the array of coords.
1389 // So we need to make the proper array, fill it in, and return it.
1390 auto_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<String>", dim.name, true);
1391 NCML_ASSERT_MSG(pNewCV.get(), "createCoordinateVariableForNewDimensionUsingCoordValueAsString: failed to create"
1392 " the new Array<String> for variable: " + dim.name);
1393 pNewCV->append_dim(dim.size, dim.name);
1394 pNewCV->set_value(coords, coords.size()); // this will set the length correctly.
1395 return pNewCV;
1396}
1397
1398auto_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimensionUsingLocation(
1399 const agg_util::Dimension& dim) const
1400{
1401 // I feel suitably dirty for cut and pasting this.
1402 vector<string> coords;
1403 coords.reserve(dim.size);
1404 for (unsigned int i = 0; i < _datasets.size(); ++i) {
1405 const NetcdfElement* pDataset = _datasets[i];
1406 string location("");
1407 if (pDataset->location().empty()) {
1408 std::ostringstream oss;
1409 oss << "Virtual_Dataset_" << i;
1410 location = oss.str();
1411 }
1412 else // we got our value fine, so add it
1413 {
1414 location = pDataset->location();
1415 }
1416 coords.push_back(location);
1417 }
1418 // If we got here, we have the array of coords.
1419 // So we need to make the proper array, fill it in, and return it.
1420 auto_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<String>", dim.name, true);
1421 NCML_ASSERT_MSG(pNewCV.get(),
1422 "createCoordinateVariableForNewDimensionUsingCoordValueUsingLocation: failed to create"
1423 " the new Array<String> for variable: " + dim.name);
1424
1425 pNewCV->append_dim(dim.size, dim.name);
1426 pNewCV->set_value(coords, coords.size());
1427 return pNewCV;
1428}
1429
1430void AggregationElement::collectDatasetsInOrder(vector<const DDS*>& ddsList) const
1431{
1432 ddsList.resize(0);
1433 ddsList.reserve(_datasets.size());
1434 vector<NetcdfElement*>::const_iterator endIt = _datasets.end();
1435 vector<NetcdfElement*>::const_iterator it;
1436 for (it = _datasets.begin(); it != endIt; ++it) {
1437 const NetcdfElement* elt = *it;
1438 VALID_PTR(elt);
1439 const DDS* pDDS = elt->getDDS();
1440 VALID_PTR(pDDS);
1441 ddsList.push_back(pDDS);
1442 }
1443}
1444
1445void AggregationElement::collectAggMemberDatasets(AMDList& rMemberDatasets) const
1446{
1447 rMemberDatasets.resize(0);
1448 rMemberDatasets.reserve(_datasets.size());
1449
1450 for (vector<NetcdfElement*>::const_iterator it = _datasets.begin(); it != _datasets.end(); ++it) {
1451 VALID_PTR(*it);
1452 RCPtr<AggMemberDataset> pAGM((*it)->getAggMemberDataset());
1453 VALID_PTR(pAGM.get());
1454
1455 // Push down the ncoords hint if it was given
1456 if (!((*it)->ncoords().empty()) && !_dimName.empty()) {
1457 if (!(pAGM->isDimensionCached(_dimName))) {
1458 unsigned int ncoords = (*it)->getNcoordsAsUnsignedInt();
1459 pAGM->setDimensionCacheFor(agg_util::Dimension(_dimName, ncoords), false);
1460 }
1461 }
1462
1463 // don't need to ref(), the RCPtr copy ctor in the vector elt
1464 // takes care of it when we push_back()
1465 rMemberDatasets.push_back(pAGM);
1466 }
1467}
1468
1469void AggregationElement::processAnyScanElements()
1470{
1471 if (_scanners.size() > 0) {
1472 BESDEBUG("ncml", "Started to process " << _scanners.size() << " scan elements..." << endl);
1473 }
1474
1475 vector<ScanElement*>::iterator it;
1476 vector<ScanElement*>::iterator endIt = _scanners.end();
1477 vector<NetcdfElement*> scannedDatasets;
1478 for (it = _scanners.begin(); it != endIt; ++it) {
1479 BESDEBUG("ncml", "Processing scan element = " << (*it)->toString() << " ..." << endl);
1480
1481 // Run the scanner to get the scanned datasets.
1482 // These will be sorted, so maintain order.
1483 (*it)->getDatasetList(scannedDatasets);
1484
1485 // Add the datasets using the parser call to
1486 // set the data up correctly,
1487 // then unref() and remove them from the temp array
1488 vector<NetcdfElement*>::iterator datasetIt;
1489 vector<NetcdfElement*>::iterator datasetEndIt = scannedDatasets.end();
1490 for (datasetIt = scannedDatasets.begin(); datasetIt != datasetEndIt; ++datasetIt) {
1491 // this will ref() it and make sure we can load it.
1492 _parser->addChildDatasetToCurrentDataset(*datasetIt);
1493 // so we unref() it afterwards because we're dumping the temp array
1494 (*datasetIt)->unref();
1495 }
1496 // we're done with it and they're all unref().
1497 scannedDatasets.clear();
1498 }
1499}
1500
1501void AggregationElement::mergeDimensions(bool checkDimensionMismatch/*=true*/, const std::string& dimToSkip/*=""*/)
1502{
1503 NetcdfElement* pParent = getParentDataset();
1504 // For each dataset in the children....
1505 vector<NetcdfElement*>::const_iterator datasetsEndIt = _datasets.end();
1506 vector<NetcdfElement*>::const_iterator datasetsIt;
1507 for (datasetsIt = _datasets.begin(); datasetsIt != datasetsEndIt; ++datasetsIt) {
1508 // Check each dimension in it compared to the parent
1509 const NetcdfElement* dataset = *datasetsIt;
1510 VALID_PTR(dataset);
1511 const vector<DimensionElement*>& dimensions = dataset->getDimensionElements();
1512 vector<DimensionElement*>::const_iterator dimEndIt = dimensions.end();
1513 vector<DimensionElement*>::const_iterator dimIt;
1514 for (dimIt = dimensions.begin(); dimIt != dimEndIt; ++dimIt) {
1515 const DimensionElement* pDim = *dimIt;
1516 VALID_PTR(pDim);
1517 // Skip if asked to do so
1518 if (!dimToSkip.empty() && (pDim->name() == dimToSkip)) {
1519 continue;
1520 }
1521 // Otherwise continue to look it up
1522 const DimensionElement* pUnionDim = pParent->getDimensionInLocalScope(pDim->name());
1523 if (pUnionDim) {
1524 // We'll check the dimensions match no matter what, but only warn unless we're told to check
1525 if (!pUnionDim->checkDimensionsMatch(*pDim)) {
1526 string msg = string("The union aggregation already had a dimension=") + pUnionDim->toString()
1527 + " but we found another with different cardinality: " + pDim->toString()
1528 + " This is likely an error and could cause a later exception.";
1529 BESDEBUG("ncml", "WARNING: " + msg);
1530 if (checkDimensionMismatch) {
1531 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
1532 msg + " Scope=" + _parser->getScopeString());
1533 }
1534 }
1535 }
1536 else // if not in the union already, we want to add it!
1537 {
1538 // this will up the ref count for it so when child dataset dies, we're good.
1539 BESDEBUG("ncml",
1540 "Dimension name=" << pDim->name() << " was not found in the union yet, so adding it. The full elt is: " << pDim->toString() << endl);
1541 pParent->addDimension(const_cast<DimensionElement*>(pDim));
1542 }
1543 }
1544 }
1545}
1546
1547static const string COORDINATE_AXIS_TYPE_ATTR("_CoordinateAxisType");
1548void AggregationElement::addCoordinateAxisType(libdap::Array& rCV, const std::string& cat)
1549{
1550 AttrTable& rAT = rCV.get_attr_table();
1551 AttrTable::Attr_iter foundIt = rAT.simple_find(COORDINATE_AXIS_TYPE_ATTR);
1552 // preexists, then delete it and we'll replace with the new
1553 if (foundIt != rAT.attr_end()) {
1554 rAT.del_attr(COORDINATE_AXIS_TYPE_ATTR);
1555 }
1556
1557 BESDEBUG("ncml3",
1558 "Adding attribute to the aggregation variable " << rCV.name() << " Attr is " << COORDINATE_AXIS_TYPE_ATTR << " = " << cat << endl);
1559
1560 // Either way, now we can add it.
1561 rAT.append_attr(COORDINATE_AXIS_TYPE_ATTR, "String", cat);
1562}
1563
1564vector<string> AggregationElement::getValidAttributes()
1565{
1566 vector<string> attrs;
1567 attrs.push_back("type");
1568 attrs.push_back("dimName");
1569 attrs.push_back("recheckEvery");
1570 return attrs;
1571}
1572
1573
1574}
1575
1576// namespace ncml_module
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static AggMemberDatasetDimensionCache * get_instance()
virtual void fillDimensionCacheByUsingDDS()=0
auto_ptr< ArrayJoinExistingAggregation > makeAggregatedOuterMapVector() const
virtual int ref() const
Definition: RCObject.cc:71
virtual int unref() const
Definition: RCObject.cc:78
A reference to an RCObject which automatically ref() and deref() on creation and destruction.
Definition: RCObject.h:284
const std::string & getAggregationVariableCoordinateAxisType() const
void setAggregationVariableCoordinateAxisType(const std::string &cat)
void addChildDataset(NetcdfElement *pDataset)
NetcdfElement * setParentDataset(NetcdfElement *parent)
Private Impl.
void addScanElement(ScanElement *pScanner)
bool isAggregationVariable(const string &name) const
virtual void setAttributes(const XMLAttributeMap &attrs)
virtual const string & getTypeName() const
void addAggregationVariable(const string &name)
virtual AggregationElement * clone() const
static std::auto_ptr< libdap::Array > makeArrayTemplateVariable(const string &type, const string &name, bool addTemplateVar)
Base class for NcML element concrete classes.
Definition: NCMLElement.h:61
virtual bool validateAttributes(const XMLAttributeMap &attrs, const std::vector< std::string > &validAttrs, std::vector< std::string > *pInvalidAttrs=0, bool printInvalid=true, bool throwOnError=true)
Definition: NCMLElement.cc:174
static std::string printAttributeIfNotEmpty(const std::string &attrName, const std::string &attrValue)
Definition: NCMLElement.cc:212
int getParseLineNumber() const
Definition: NCMLParser.cc:200
static bool isAllWhitespace(const std::string &str)
Definition: NCMLUtil.cc:105
Concrete class for NcML <netcdf> element.
Definition: NetcdfElement.h:64
void addDimension(DimensionElement *dim)
virtual const libdap::DDS * getDDS() const
virtual NetcdfElement * clone() const
void setChildAggregation(AggregationElement *agg, bool throwIfExists=true)
const DimensionElement * getDimensionInLocalScope(const std::string &name) const
void setVariableGotValues(libdap::BaseType *pVarToValidate, bool removeEntry)
virtual std::string toString() const
void setParentAggregation(AggregationElement *parent)
AggregationElement * getChildAggregation() const
virtual ScanElement * clone() const
Definition: ScanElement.cc:136
void setParent(AggregationElement *pParent)
Definition: ScanElement.cc:124
const std::string getValueForLocalNameOrDefault(const std::string &localname, const std::string &defVal="") const
Definition: XMLHelpers.cc:181
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...