libdap  Updated for version 3.18.2
D4ParserSax2.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 //#define DODS_DEBUG 1
28 
29 #include <iostream>
30 #include <sstream>
31 
32 #include <cstring>
33 #include <cstdarg>
34 #include <cassert>
35 
36 #include <libxml/parserInternals.h>
37 
38 #include "DMR.h"
39 
40 #include "BaseType.h"
41 #include "Array.h"
42 #include "D4Group.h"
43 #include "D4Attributes.h"
44 #include "D4Maps.h"
45 #include "D4Enum.h"
46 
47 #include "D4BaseTypeFactory.h"
48 
49 #include "D4ParserSax2.h"
50 
51 #include "util.h"
52 #include "debug.h"
53 
54 namespace libdap {
55 
56 static const char *states[] = {
57  "parser_start",
58 
59  "inside_dataset",
60 
61  // inside_group is the state just after parsing the start of a Group
62  // element.
63  "inside_group",
64 
65  "inside_attribute_container",
66  "inside_attribute",
67  "inside_attribute_value",
68  "inside_other_xml_attribute",
69 
70  "inside_enum_def",
71  "inside_enum_const",
72 
73  "inside_dim_def",
74 
75  // This covers Byte, ..., Url, Opaque
76  "inside_simple_type",
77 
78  // "inside_array",
79  "inside_dim",
80  "inside_map",
81 
82  "inside_constructor",
83 
84  "parser_unknown",
85  "parser_error",
86  "parser_fatal_error",
87 
88  "parser_end"
89 };
90 
91 static bool is_not(const char *name, const char *tag)
92 {
93  return strcmp(name, tag) != 0;
94 }
95 
104 D4EnumDef *
105 D4ParserSax2::enum_def()
106 {
107  if (!d_enum_def) d_enum_def = new D4EnumDef;
108 
109  return d_enum_def;
110 }
111 
118 D4Dimension *
119 D4ParserSax2::dim_def() {
120  if (!d_dim_def) d_dim_def = new D4Dimension;
121 
122  return d_dim_def;
123 }
124 
130 void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
131 {
132  if (!xml_attrs.empty())
133  xml_attrs.clear(); // erase old attributes
134 
135  // Make a value using the attribute name and the prefix, namespace URI
136  // and the value. The prefix might be null.
137  unsigned int index = 0;
138  for (int i = 0; i < nb_attributes; ++i, index += 5) {
139  xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
140  XMLAttribute(attributes + index + 1)));
141 
142  DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
143  << xml_attrs[(const char *)attributes[index]].value << endl);
144  }
145 }
146 
153 void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
154 {
155  // make a value with the prefix and namespace URI. The prefix might be null.
156  for (int i = 0; i < nb_namespaces; ++i) {
157  namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
158  (const char *)namespaces[i * 2 + 1]));
159  }
160 }
161 
168 bool D4ParserSax2::check_required_attribute(const string & attr)
169 {
170  if (xml_attrs.find(attr) == xml_attrs.end()) {
171  dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
172  return false;
173  }
174  else
175  return true;
176 }
177 
184 bool D4ParserSax2::check_attribute(const string & attr)
185 {
186  return (xml_attrs.find(attr) != xml_attrs.end());
187 }
188 
189 bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
190 {
191  if (is_not(name, "Dimension"))
192  return false;
193 
194  transfer_xml_attrs(attrs, nb_attributes);
195 
196  if (!(check_required_attribute("name") && check_required_attribute("size"))) {
197  dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
198  return false;
199  }
200 
201  // This getter (dim_def) allocates a new object if needed.
202  dim_def()->set_name(xml_attrs["name"].value);
203  try {
204  dim_def()->set_size(xml_attrs["size"].value);
205  }
206  catch (Error &e) {
207  dmr_error(this, e.get_error_message().c_str());
208  return false;
209  }
210 
211  return true;
212 }
213 
231 bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
232 {
233  if (is_not(name, "Dim"))
234  return false;
235 
236  transfer_xml_attrs(attrs, nb_attributes);
237 
238  if (check_attribute("size") && check_attribute("name")) {
239  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
240  return false;
241  }
242  if (!(check_attribute("size") || check_attribute("name"))) {
243  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
244  return false;
245  }
246 
247  if (!top_basetype()->is_vector_type()) {
248  // Make the top BaseType* an array
249  BaseType *b = top_basetype();
250  pop_basetype();
251 
252  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
253  a->set_is_dap4(true);
254  a->add_var_nocopy(b);
255  a->set_attributes_nocopy(b->attributes());
256  // trick: instead of popping b's attributes, copying them and then pushing
257  // a's copy, just move the pointer (but make sure there's only one object that
258  // references that pointer).
259  b->set_attributes_nocopy(0);
260 
261  push_basetype(a);
262  }
263 
264  assert(top_basetype()->is_vector_type());
265 
266  Array *a = static_cast<Array*>(top_basetype());
267  if (check_attribute("size")) {
268  a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
269  return true;
270  }
271  else if (check_attribute("name")) {
272  string name = xml_attrs["name"].value;
273 
274  D4Dimension *dim = 0;
275  if (name[0] == '/') // lookup the Dimension in the root group
276  dim = dmr()->root()->find_dim(name);
277  else // get enclosing Group and lookup Dimension there
278  dim = top_group()->find_dim(name);
279 
280  if (!dim)
281  throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
282  a->append_dim(dim);
283  return true;
284  }
285 
286  return false;
287 }
288 
289 bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
290 {
291  if (is_not(name, "Map"))
292  return false;
293 
294  transfer_xml_attrs(attrs, nb_attributes);
295 
296  if (!check_attribute("name")) {
297  dmr_error(this, "The 'name' attribute must be used in a Map element.");
298  return false;
299  }
300 
301  if (!top_basetype()->is_vector_type()) {
302  // Make the top BaseType* an array
303  BaseType *b = top_basetype();
304  pop_basetype();
305 
306  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
307  a->set_is_dap4(true);
308  a->add_var_nocopy(b);
309  a->set_attributes_nocopy(b->attributes());
310  // trick: instead of popping b's attributes, copying them and then pushing
311  // a's copy, just move the pointer (but make sure there's only one object that
312  // references that pointer).
313  b->set_attributes_nocopy(0);
314 
315  push_basetype(a);
316  }
317 
318  assert(top_basetype()->is_vector_type());
319 
320  Array *a = static_cast<Array*>(top_basetype());
321 
322  string map_name = xml_attrs["name"].value;
323  if (xml_attrs["name"].value[0] != '/')
324  map_name = top_group()->FQN() + map_name;
325 
326  Array *map_source = 0; // The array variable that holds the data for the Map
327 
328  if (map_name[0] == '/') // lookup the Map in the root group
329  map_source = dmr()->root()->find_map_source(map_name);
330  else // get enclosing Group and lookup Map there
331  map_source = top_group()->find_map_source(map_name);
332 
333  // Change: If the parser is in 'strict' mode (the default) and the Array named by
334  // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
335  // mode), then this is not an error. However, the Array referenced by the Map will
336  // be null. This is a change in the parser's behavior to accommodate requests for
337  // Arrays that include Maps that do not also include the Map(s) in the request.
338  // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
339  if (!map_source && d_strict)
340  throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
341 
342  a->maps()->add_map(new D4Map(map_name, map_source));
343 
344  return true;
345 }
346 
347 bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
348 {
349  if (is_not(name, "Group"))
350  return false;
351 
352  transfer_xml_attrs(attrs, nb_attributes);
353 
354  if (!check_required_attribute("name")) {
355  dmr_error(this, "The required attribute 'name' was missing from a Group element.");
356  return false;
357  }
358 
359  BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
360  if (!btp) {
361  dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
362  return false;
363  }
364 
365  D4Group *grp = static_cast<D4Group*>(btp);
366 
367  // Need to set this to get the D4Attribute behavior in the type classes
368  // shared between DAP2 and DAP4. jhrg 4/18/13
369  grp->set_is_dap4(true);
370 
371  // link it up and change the current group
372  D4Group *parent = top_group();
373  if (!parent) {
374  dmr_fatal_error(this, "No Group on the Group stack.");
375  return false;
376  }
377 
378  grp->set_parent(parent);
379  parent->add_group_nocopy(grp);
380 
381  push_group(grp);
382  push_attributes(grp->attributes());
383  return true;
384 }
385 
392 inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
393 {
394  if (is_not(name, "Attribute"))
395  return false;
396 
397  // These methods set the state to parser_error if a problem is found.
398  transfer_xml_attrs(attrs, nb_attributes);
399 
400  // add error
401  if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
402  dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
403  return false;
404  }
405 
406  if (xml_attrs["type"].value == "Container") {
407  push_state(inside_attribute_container);
408 
409  DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
410  D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
411 
412  D4Attributes *tos = top_attributes();
413  // add return
414  if (!tos) {
415  delete child;
416  dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
417  return false;
418  }
419 
420  tos->add_attribute_nocopy(child);
421  push_attributes(child->attributes());
422  }
423  else if (xml_attrs["type"].value == "OtherXML") {
424  push_state(inside_other_xml_attribute);
425 
426  dods_attr_name = xml_attrs["name"].value;
427  dods_attr_type = xml_attrs["type"].value;
428  }
429  else {
430  push_state(inside_attribute);
431 
432  dods_attr_name = xml_attrs["name"].value;
433  dods_attr_type = xml_attrs["type"].value;
434  }
435 
436  return true;
437 }
438 
444 inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
445 {
446  if (is_not(name, "Enumeration"))
447  return false;
448 
449  transfer_xml_attrs(attrs, nb_attributes);
450 
451  if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
452  dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
453  return false;
454  }
455 
456  Type t = get_type(xml_attrs["basetype"].value.c_str());
457  if (!is_integer_type(t)) {
458  dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
459  xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
460  return false;
461  }
462 
463  // This getter allocates a new object if needed.
464  string enum_def_path = xml_attrs["name"].value;
465 #if 0
466  // Use FQNs when things are referenced, not when they are defined
467  if (xml_attrs["name"].value[0] != '/')
468  enum_def_path = top_group()->FQN() + enum_def_path;
469 #endif
470  enum_def()->set_name(enum_def_path);
471  enum_def()->set_type(t);
472 
473  return true;
474 }
475 
476 inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
477 {
478  if (is_not(name, "EnumConst"))
479  return false;
480 
481  // These methods set the state to parser_error if a problem is found.
482  transfer_xml_attrs(attrs, nb_attributes);
483 
484  if (!(check_required_attribute("name") && check_required_attribute("value"))) {
485  dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
486  return false;
487  }
488 
489  istringstream iss(xml_attrs["value"].value);
490  long long value = 0;
491  iss >> skipws >> value;
492  if (iss.fail() || iss.bad()) {
493  dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
494  xml_attrs["value"].value.c_str());
495  }
496  else if (!enum_def()->is_valid_enum_value(value)) {
497  dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
498  xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
499  }
500  else {
501  // unfortunate choice of names... args are 'label' and 'value'
502  enum_def()->add_value(xml_attrs["name"].value, value);
503  }
504 
505  return true;
506 }
507 
513 inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
514 {
515  Type t = get_type(name);
516  if (is_simple_type(t)) {
517  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
518  return true;
519  }
520  else {
521  switch(t) {
522  case dods_structure_c:
523  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
524  return true;
525 
526  case dods_sequence_c:
527  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
528  return true;
529 
530  default:
531  return false;
532  }
533  }
534 }
535 
543 void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
544 {
545  transfer_xml_attrs(attrs, nb_attributes);
546 
547  if (check_required_attribute("name")) {
548  BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
549  if (!btp) {
550  dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
551  return;
552  }
553 
554  if ((t == dods_enum_c) && check_required_attribute("enum")) {
555  D4EnumDef *enum_def = 0;
556  string enum_path = xml_attrs["enum"].value;
557  if (enum_path[0] == '/')
558  enum_def = dmr()->root()->find_enum_def(enum_path);
559  else
560  enum_def = top_group()->find_enum_def(enum_path);
561 
562  if (!enum_def)
563  dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
564 
565  static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
566  }
567 
568  btp->set_is_dap4(true); // see comment above
569  push_basetype(btp);
570 
571  push_attributes(btp->attributes());
572 
573  push_state(s);
574  }
575 }
576 
583 
589 {
590  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
591  parser->error_msg = "";
592  parser->char_data = "";
593 
594  // Set this in intern_helper so that the loop test for the parser_end
595  // state works for the first iteration. It seems like XMLParseChunk calls this
596  // function on it's first run. jhrg 9/16/13
597  // parser->push_state(parser_start);
598 
599  parser->push_attributes(parser->dmr()->root()->attributes());
600 
601  if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
602 }
603 
607 {
608  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
609 
610  if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
611 
612  if (parser->get_state() != parser_end)
613  D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
614 
615  // If we've found any sort of error, don't make the DMR; intern() will
616  // take care of the error.
617  if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
618  return;
619 
620  if (!parser->empty_basetype() || parser->empty_group())
621  D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
622 
623  parser->pop_group(); // leave the stack 'clean'
624  parser->pop_attributes();
625 }
626 
627 void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
628  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
629  const xmlChar **attributes)
630 {
631  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
632  const char *localname = (const char *) l;
633 
634  if (parser->debug()) cerr << "Start element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
635 
636  switch (parser->get_state()) {
637  case parser_start:
638  if (is_not(localname, "Dataset"))
639  D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
640 
641  parser->root_ns = URI ? (const char *) URI : "";
642  parser->transfer_xml_attrs(attributes, nb_attributes);
643 
644  if (parser->check_required_attribute(string("name")))
645  parser->dmr()->set_name(parser->xml_attrs["name"].value);
646 
647  if (parser->check_attribute("dapVersion"))
648  parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
649 
650  if (parser->check_attribute("dmrVersion"))
651  parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
652 
653  if (parser->check_attribute("base"))
654  parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
655 
656  if (!parser->root_ns.empty())
657  parser->dmr()->set_namespace(parser->root_ns);
658 
659  // Push the root Group on the stack
660  parser->push_group(parser->dmr()->root());
661 
662  parser->push_state(inside_dataset);
663 
664  break;
665 
666  // Both inside dataset and inside group can have the same stuff.
667  // The difference is that the Dataset holds the root group, which
668  // must be present; other groups are optional
669  case inside_dataset:
670  case inside_group:
671  if (parser->process_enum_def(localname, attributes, nb_attributes))
672  parser->push_state(inside_enum_def);
673  else if (parser->process_dimension_def(localname, attributes, nb_attributes))
674  parser->push_state(inside_dim_def);
675  else if (parser->process_group(localname, attributes, nb_attributes))
676  parser->push_state(inside_group);
677  else if (parser->process_variable(localname, attributes, nb_attributes))
678  // This will push either inside_simple_type or inside_structure
679  // onto the parser state stack.
680  break;
681  else if (parser->process_attribute(localname, attributes, nb_attributes))
682  // This will push either inside_attribute, inside_attribute_container
683  // or inside_otherxml_attribute onto the parser state stack
684  break;
685  else
686  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
687  break;
688 
689  case inside_attribute_container:
690  if (parser->process_attribute(localname, attributes, nb_attributes))
691  break;
692  else
693  D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
694  break;
695 
696  case inside_attribute:
697  if (parser->process_attribute(localname, attributes, nb_attributes))
698  break;
699  else if (strcmp(localname, "Value") == 0)
700  parser->push_state(inside_attribute_value);
701  else
702  dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
703  break;
704 
705  case inside_attribute_value:
706  // Attribute values are processed by the end element code.
707  break;
708 
709  case inside_other_xml_attribute:
710  parser->other_xml_depth++;
711 
712  // Accumulate the elements here
713  parser->other_xml.append("<");
714  if (prefix) {
715  parser->other_xml.append((const char *) prefix);
716  parser->other_xml.append(":");
717  }
718  parser->other_xml.append(localname);
719 
720  if (nb_namespaces != 0) {
721  parser->transfer_xml_ns(namespaces, nb_namespaces);
722 
723  for (map<string, string>::iterator i = parser->namespace_table.begin();
724  i != parser->namespace_table.end(); ++i) {
725  parser->other_xml.append(" xmlns");
726  if (!i->first.empty()) {
727  parser->other_xml.append(":");
728  parser->other_xml.append(i->first);
729  }
730  parser->other_xml.append("=\"");
731  parser->other_xml.append(i->second);
732  parser->other_xml.append("\"");
733  }
734  }
735 
736  if (nb_attributes != 0) {
737  parser->transfer_xml_attrs(attributes, nb_attributes);
738  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
739  parser->other_xml.append(" ");
740  if (!i->second.prefix.empty()) {
741  parser->other_xml.append(i->second.prefix);
742  parser->other_xml.append(":");
743  }
744  parser->other_xml.append(i->first);
745  parser->other_xml.append("=\"");
746  parser->other_xml.append(i->second.value);
747  parser->other_xml.append("\"");
748  }
749  }
750 
751  parser->other_xml.append(">");
752  break;
753 
754  case inside_enum_def:
755  // process an EnumConst element
756  if (parser->process_enum_const(localname, attributes, nb_attributes))
757  parser->push_state(inside_enum_const);
758  else
759  dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
760  break;
761 
762  case inside_enum_const:
763  // No content; nothing to do
764  break;
765 
766  case inside_dim_def:
767  // No content; nothing to do
768  break;
769 #if 0
770  case inside_dimension:
771  // No content.
772  break;
773 #endif
774  case inside_dim:
775  // No content.
776  break;
777 
778  case inside_map:
779  // No content.
780  break;
781 
782  case inside_simple_type:
783  if (parser->process_attribute(localname, attributes, nb_attributes))
784  break;
785  else if (parser->process_dimension(localname, attributes, nb_attributes))
786  parser->push_state(inside_dim);
787  else if (parser->process_map(localname, attributes, nb_attributes))
788  parser->push_state(inside_map);
789  else
790  dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
791  break;
792 
793  case inside_constructor:
794  if (parser->process_variable(localname, attributes, nb_attributes))
795  // This will push either inside_simple_type or inside_structure
796  // onto the parser state stack.
797  break;
798  else if (parser->process_attribute(localname, attributes, nb_attributes))
799  break;
800  else if (parser->process_dimension(localname, attributes, nb_attributes))
801  parser->push_state(inside_dim);
802  else if (parser->process_map(localname, attributes, nb_attributes))
803  parser->push_state(inside_map);
804  else
805  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
806  break;
807 
808  case parser_unknown:
809  // FIXME?
810  // *** Never used? If so remove/error
811  parser->push_state(parser_unknown);
812  break;
813 
814  case parser_error:
815  case parser_fatal_error:
816  break;
817 
818  case parser_end:
819  // FIXME Error?
820  break;
821  }
822 
823  if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
824 }
825 
826 void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
827 {
828  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
829  const char *localname = (const char *) l;
830 
831  if (parser->debug())
832  cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
833 
834  switch (parser->get_state()) {
835  case parser_start:
836  dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
837  break;
838 
839  case inside_dataset:
840  if (is_not(localname, "Dataset"))
841  D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
842 
843  parser->pop_state();
844  if (parser->get_state() != parser_start)
845  dmr_fatal_error(parser, "Unexpected state, expected start state.");
846  else {
847  parser->pop_state();
848  parser->push_state(parser_end);
849  }
850  break;
851 
852  case inside_group: {
853  if (is_not(localname, "Group"))
854  D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
855 
856  if (!parser->empty_basetype() || parser->empty_group())
857  D4ParserSax2::dmr_error(parser,
858  "The document did not contain a valid root Group or contained unbalanced tags.");
859 
860  parser->pop_group();
861  parser->pop_state();
862  break;
863  }
864 
865  case inside_attribute_container:
866  if (is_not(localname, "Attribute"))
867  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
868 
869  parser->pop_state();
870  parser->pop_attributes();
871  break;
872 
873  case inside_attribute:
874  if (is_not(localname, "Attribute"))
875  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
876 
877  parser->pop_state();
878  break;
879 
880  case inside_attribute_value: {
881  if (is_not(localname, "Value"))
882  D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
883 
884  parser->pop_state();
885 
886  // The old code added more values using the name and type as
887  // indexes to find the correct attribute. Use get() for that
888  // now. Or fix this code to keep a pointer to the to attribute...
889  D4Attributes *attrs = parser->top_attributes();
890  D4Attribute *attr = attrs->get(parser->dods_attr_name);
891  if (!attr) {
892  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
893  attrs->add_attribute_nocopy(attr);
894  }
895  attr->add_value(parser->char_data);
896 
897  parser->char_data = ""; // Null this after use.
898  break;
899  }
900 
901  case inside_other_xml_attribute: {
902  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
903  parser->pop_state();
904 
905  // The old code added more values using the name and type as
906  // indexes to find the correct attribute. Use get() for that
907  // now. Or fix this code to keep a pointer to the to attribute...
908  D4Attributes *attrs = parser->top_attributes();
909  D4Attribute *attr = attrs->get(parser->dods_attr_name);
910  if (!attr) {
911  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
912  attrs->add_attribute_nocopy(attr);
913  }
914  attr->add_value(parser->other_xml);
915 
916  parser->other_xml = ""; // Null this after use.
917  }
918  else {
919  if (parser->other_xml_depth == 0) {
920  D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
921  localname);
922  break;
923  }
924  parser->other_xml_depth--;
925 
926  parser->other_xml.append("</");
927  if (prefix) {
928  parser->other_xml.append((const char *) prefix);
929  parser->other_xml.append(":");
930  }
931  parser->other_xml.append(localname);
932  parser->other_xml.append(">");
933  }
934  break;
935  }
936 
937  case inside_enum_def:
938  if (is_not(localname, "Enumeration"))
939  D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
940  if (!parser->top_group())
942  "Expected a Group to be the current item, while finishing up an Enumeration.");
943  else {
944  // copy the pointer; not a deep copy
945  parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
946  // Set the enum_def to null; next call to enum_def() will
947  // allocate a new object
948  parser->clear_enum_def();
949  parser->pop_state();
950  }
951  break;
952 
953  case inside_enum_const:
954  if (is_not(localname, "EnumConst"))
955  D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
956 
957  parser->pop_state();
958  break;
959 
960  case inside_dim_def: {
961  if (is_not(localname, "Dimension"))
962  D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
963 
964  if (!parser->top_group())
965  D4ParserSax2::dmr_error(parser,
966  "Expected a Group to be the current item, while finishing up an Dimension.");
967 
968  // FIXME Use the Group on the top of the group stack
969  // copy the pointer; not a deep copy
970  parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
971  //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
972  // Set the dim_def to null; next call to dim_def() will
973  // allocate a new object. Calling 'clear' is important because
974  // the cleanup method will free dim_def if it's not null and
975  // we just copied the pointer in the add_dim_nocopy() call
976  // above.
977  parser->clear_dim_def();
978  parser->pop_state();
979  break;
980  }
981 
982  case inside_simple_type:
983  if (is_simple_type(get_type(localname))) {
984  BaseType *btp = parser->top_basetype();
985  parser->pop_basetype();
986  parser->pop_attributes();
987 
988  BaseType *parent = 0;
989  if (!parser->empty_basetype())
990  parent = parser->top_basetype();
991  else if (!parser->empty_group())
992  parent = parser->top_group();
993  else {
994  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
995  localname);
996  delete btp;
997  parser->pop_state();
998  break;
999  }
1000 
1001  if (parent->type() == dods_array_c)
1002  static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1003  else
1004  parent->add_var_nocopy(btp);
1005  }
1006  else
1007  D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1008 
1009  parser->pop_state();
1010  break;
1011 
1012  case inside_dim:
1013  if (is_not(localname, "Dim"))
1014  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1015 
1016  parser->pop_state();
1017  break;
1018 
1019  case inside_map:
1020  if (is_not(localname, "Map"))
1021  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1022 
1023  parser->pop_state();
1024  break;
1025 
1026  case inside_constructor: {
1027  if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1028  D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1029  return;
1030  }
1031 
1032  BaseType *btp = parser->top_basetype();
1033  parser->pop_basetype();
1034  parser->pop_attributes();
1035 
1036  BaseType *parent = 0;
1037  if (!parser->empty_basetype())
1038  parent = parser->top_basetype();
1039  else if (!parser->empty_group())
1040  parent = parser->top_group();
1041  else {
1042  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1043  localname);
1044  delete btp;
1045  parser->pop_state();
1046  break;
1047  }
1048 
1049  // TODO Why doesn't this code mirror the simple_var case and test
1050  // for the parent being an array? jhrg 10/13/13
1051  parent->add_var_nocopy(btp);
1052  parser->pop_state();
1053  break;
1054  }
1055 
1056  case parser_unknown:
1057  parser->pop_state();
1058  break;
1059 
1060  case parser_error:
1061  case parser_fatal_error:
1062  break;
1063 
1064  case parser_end:
1065  // FIXME Error?
1066  break;
1067  }
1068 
1069  if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1070 }
1071 
1075 void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1076 {
1077  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1078 
1079  switch (parser->get_state()) {
1080  case inside_attribute_value:
1081  parser->char_data.append((const char *) (ch), len);
1082  DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1083  break;
1084 
1085  case inside_other_xml_attribute:
1086  parser->other_xml.append((const char *) (ch), len);
1087  DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1088  break;
1089 
1090  default:
1091  break;
1092  }
1093 }
1094 
1099 void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1100 {
1101  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1102 
1103  switch (parser->get_state()) {
1104  case inside_other_xml_attribute:
1105  parser->other_xml.append((const char *) (ch), len);
1106  break;
1107 
1108  default:
1109  break;
1110  }
1111 }
1112 
1118 void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1119 {
1120  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1121 
1122  switch (parser->get_state()) {
1123  case inside_other_xml_attribute:
1124  parser->other_xml.append((const char *) (value), len);
1125  break;
1126 
1127  case parser_unknown:
1128  break;
1129 
1130  default:
1131  D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1132 
1133  break;
1134  }
1135 }
1136 
1141 xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
1142 {
1143  return xmlGetPredefinedEntity(name);
1144 }
1145 
1156 void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1157 {
1158  va_list args;
1159  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1160 
1161  parser->push_state(parser_fatal_error);
1162 
1163  va_start(args, msg);
1164  char str[1024];
1165  vsnprintf(str, 1024, msg, args);
1166  va_end(args);
1167 
1168  int line = xmlSAX2GetLineNumber(parser->context);
1169 
1170  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1171  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1172 }
1173 
1174 void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
1175 {
1176  va_list args;
1177  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1178 
1179  parser->push_state(parser_error);
1180 
1181  va_start(args, msg);
1182  char str[1024];
1183  vsnprintf(str, 1024, msg, args);
1184  va_end(args);
1185 
1186  int line = xmlSAX2GetLineNumber(parser->context);
1187 
1188  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1189  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1190 }
1192 
1196 void D4ParserSax2::cleanup_parse()
1197 {
1198  bool wellFormed = context->wellFormed;
1199  bool valid = context->valid;
1200 
1201  context->sax = NULL;
1202  xmlFreeParserCtxt(context);
1203 
1204  delete d_enum_def;
1205  d_enum_def = 0;
1206 
1207  delete d_dim_def;
1208  d_dim_def = 0;
1209 
1210  // If there's an error, there may still be items on the stack at the
1211  // end of the parse.
1212  while (!btp_stack.empty()) {
1213  delete top_basetype();
1214  pop_basetype();
1215  }
1216 
1217  if (!wellFormed)
1218  throw Error("The DMR was not well formed. " + error_msg);
1219  else if (!valid)
1220  throw Error("The DMR was not valid." + error_msg);
1221  else if (get_state() == parser_error)
1222  throw Error(error_msg);
1223  else if (get_state() == parser_fatal_error)
1224  throw InternalErr(error_msg);
1225 }
1226 
1241 void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1242 {
1243  d_debug = debug;
1244 
1245  // Code example from libxml2 docs re: read from a stream.
1246 
1247  if (!f.good())
1248  throw Error("Input stream not open or read error");
1249  if (!dest_dmr)
1250  throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1251 
1252  d_dmr = dest_dmr; // dump values here
1253 
1254  const int size = 1024;
1255  char chars[size];
1256  int line = 1;
1257 
1258  f.getline(chars, size);
1259  int res = f.gcount();
1260  if (res == 0) throw Error("No input found while parsing the DMR.");
1261 
1262  if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1263 
1264  context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, chars, res - 1, "stream");
1265  context->validate = true;
1266  push_state(parser_start);
1267 
1268  f.getline(chars, size);
1269  while ((f.gcount() > 0) && (get_state() != parser_end)) {
1270  if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1271  xmlParseChunk(context, chars, f.gcount() - 1, 0);
1272  f.getline(chars, size);
1273  }
1274 
1275  // This call ends the parse.
1276  xmlParseChunk(context, chars, 0, 1/*terminate*/);
1277 
1278  // This checks that the state on the parser stack is parser_end and throws
1279  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1280  cleanup_parse();
1281 }
1282 
1293 void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1294 {
1295  intern(document.c_str(), document.length(), dest_dmr, debug);
1296 }
1297 
1308 void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1309 {
1310  if (!(size > 0)) return;
1311 
1312  d_debug = debug;
1313 
1314  // Code example from libxml2 docs re: read from a stream.
1315 
1316  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1317  d_dmr = dest_dmr; // dump values in dest_dmr
1318 
1319  push_state(parser_start);
1320  context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, buffer, size, "stream");
1321  context->validate = true;
1322  //push_state(parser_start);
1323  //xmlParseChunk(context, buffer, size, 0);
1324 
1325  // This call ends the parse.
1326  xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1327 
1328  // This checks that the state on the parser stack is parser_end and throws
1329  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1330  cleanup_parse();
1331 }
1332 
1333 } // namespace libdap
virtual BaseType * NewVariable(Type t, const string &name) const
static void dmr_end_document(void *parser)
void set_namespace(const string &ns)
Set the namespace for this DDS/DDX object/response.
Definition: DMR.h:158
bool is_valid_enum_value(long long value)
Definition: D4EnumDefs.cc:43
D4Dimension * find_dim(const string &path)
Find the dimension using a path. Using the DAP4 name syntax, lookup a dimension. The dimension must b...
Definition: D4Group.cc:268
D4Group * root()
Definition: DMR.cc:242
static void dmr_start_document(void *parser)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition: util.cc:816
void set_request_xml_base(const string &xb)
Definition: DMR.h:152
Type
Identifies the data type.
Definition: Type.h:94
A class for software fault reporting.
Definition: InternalErr.h:64
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
void add_dim_nocopy(D4Dimension *dim)
Definition: D4Dimensions.h:160
virtual D4BaseTypeFactory * factory()
Definition: DMR.h:125
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
virtual Type type() const
Returns the type of the class instance.
Definition: BaseType.cc:310
ObjectType get_type(const string &value)
Definition: mime_util.cc:326
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition: util.cc:693
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition: util.cc:774
virtual D4Attributes * attributes()
Definition: BaseType.cc:544
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
The basic data type for the DODS DAP types.
Definition: BaseType.h:117
void set_dap_version(const string &version_string)
Definition: DMR.cc:254
virtual std::string FQN() const
Definition: D4Group.cc:176
D4Attribute * get(const string &fqn)
void add_map(D4Map *map)
Definition: D4Maps.h:115
A class for error processing.
Definition: Error.h:90
D4EnumDefs * enum_defs()
Get the enumerations defined for this Group.
Definition: D4Group.h:95
static void dmr_fatal_error(void *parser, const char *msg,...)
bool is_integer_type(Type t)
Definition: util.cc:905
D4Dimensions * dims()
Get the dimensions defined for this Group.
Definition: D4Group.h:80