cpp-ElementTree
Python ElementTree-alike XML API for C++
element.hpp
1 #ifndef ETREE_ELEMENT_H
2 #define ETREE_ELEMENT_H
3 
4 /*
5  * Copyright David Wilson, 2013.
6  * License: http://opensource.org/licenses/MIT
7  */
8 
9 #include <iostream>
10 #include <stdexcept>
11 #include <string>
12 #include <vector>
13 #include <mutex>
14 
15 
16 #if __cplusplus >= 201103L
17 # include <initializer_list>
18 # include <utility>
19 # define ETREE_0X
20 # define ETREE_EXPLICIT explicit
21 #else
22 # define ETREE_EXPLICIT
23 #endif
24 
25 
26 // libxml forwards.
27 struct _xmlAttr;
28 struct _xmlDoc;
29 struct _xmlNode;
30 struct _xmlNs;
31 struct _xmlXPathCompExpr;
32 struct _xmlXPathContext;
33 
34 
38 namespace etree {
39 
40 using std::string;
41 using std::vector;
42 
43 class AttrMap;
44 class Element;
45 class ElementTree;
46 class QName;
47 class ChildIterator;
48 class XPath;
49 class XPathContext;
50 
51 #ifdef ETREE_0X
52 typedef std::pair<string, string> kv_pair;
53 typedef std::pair<string, string> ns_pair;
54 typedef std::initializer_list<kv_pair> kv_list;
55 #endif
56 
57 
65 Element SubElement(Element &parent, const QName &qname);
66 
67 
68 #ifdef ETREE_0X
69 
79 Element SubElement(Element &parent, const QName &qname, kv_list attribs);
80 #endif
81 
82 
94 Element fromstring(const char *s, size_t n=0);
95 
102 string tostring(const Element &e);
103 
110 string tostring(const ElementTree &e);
111 
118 ElementTree parse(std::istream &is);
119 
126 ElementTree parse(const string &path);
127 
134 ElementTree parse(int fd);
135 
136 
140 namespace html {
148  Element fromstring(const char *s);
149 
157  Element fromstring(const std::string &s);
158 
166  std::string tostring(const Element &e);
167 
174  ElementTree parse(std::istream &is);
175 
182  ElementTree parse(const std::string &path);
183 
190  ElementTree parse(int fd);
191 } // namespace etree::html
192 
193 
201 std::ostream &operator<< (std::ostream &out, const ElementTree &elem);
202 
210 std::ostream &operator<< (std::ostream &out, const Element &elem);
211 
219 std::ostream &operator<< (std::ostream &out, const QName &qname);
220 
221 
239 template<typename T>
240 class Nullable {
242  //no constructed value, and also allows the value type to lack a default
243  //constructor.
244  unsigned char val_[sizeof(T)];
245 
247  bool set_;
248 
249  public:
253  Nullable();
254 
260  Nullable(const T &val);
261 
267  Nullable(const Nullable<T> &val);
268 
269  #ifdef ETREE_0X
270 
275  Nullable(T &&val);
276  #endif
277 
281  ~Nullable();
282 
288  bool operator==(const Nullable<T> &other) const;
289 
294  bool operator==(const T &other) const;
295 
299  Nullable<T> &operator=(const Nullable<T> &other);
300 
304  ETREE_EXPLICIT operator bool() const;
305 
309  T &operator *();
310 
311  /*
312  * Return the contained value, or throw missing_value_error().
313  */
314  T *operator ->();
315 
319  const T &operator *() const;
320 
324  const T *operator ->() const;
325 };
326 
332 
333 
342 class QName {
344  string ns_;
345 
347  string tag_;
348 
349  public:
356  QName(const string &ns, const string &tag);
357 
363  QName(const QName &other);
364 
370  QName(const string &qname);
371 
377  QName(const char *qname);
378 
384  string tostring() const;
385 
389  const string &tag() const;
390 
394  const string &ns() const;
395 
404  bool equals(const char *ns, const char *tag) const;
405 
412  bool operator==(const QName &other) const;
413 
420  bool operator!=(const QName &other) const;
421 };
422 
423 
428 typedef std::vector<std::pair<std::string, std::string>> ns_list;
429 
430 
437  _xmlXPathContext *context_;
438  std::mutex mtx_;
439 
440  // For mutex().
441  friend XPath;
442 
443  public:
444  ~XPathContext();
445  XPathContext(const ns_list &ns_list = {});
446  XPathContext(const XPathContext &other);
447 };
448 
449 
453 class XPath {
455  const XPathContext *context_;
456 
458  _xmlXPathCompExpr *expr_;
459 
461  string s_;
462 
463  public:
467  ~XPath();
468 
474  XPath(const char *s);
475 
481  XPath(const string &s);
482 
491  XPath(const string &s, const XPathContext &context);
492 
498  XPath(const XPath &other);
499 
503  const string &expr() const;
504 
510  XPath &operator =(const XPath &other);
511 
518  Nullable<Element> find(const Element &e) const;
519 
526  vector<Element> findall(const Element &e) const;
527 
532  vector<Element> removeall(Element &expr) const;
533 
544  string findtext(const Element &e, const string &default_="") const;
545 };
546 
547 
552 {
553  _xmlAttr *attr_;
554 
555  public:
556  Attribute(_xmlAttr *attr);
557 
561  string tag() const;
562 
566  string ns() const;
567 
571  QName qname() const;
572 
576  string value() const;
577 };
578 
579 
585 {
586  _xmlNode *node_;
587  _xmlAttr *attr_;
588 
589  public:
590  ~AttrIterator();
591  AttrIterator();
592  AttrIterator(_xmlNode *elem, _xmlAttr *attr);
593 
594  bool operator ==(const AttrIterator &other);
595  bool operator !=(const AttrIterator &other);
596 
600  const Attribute operator *();
601 
602  AttrIterator &operator++();
603 };
604 
605 
609 class AttrMap
610 {
611  _xmlNode *node_;
612 
613  public:
614  ~AttrMap();
615  AttrMap(_xmlNode *elem);
616 
620  AttrIterator begin() const;
621 
625  AttrIterator end() const;
626 
630  bool has(const QName &qname) const;
631 
640  string get(const QName &qname, const string &default_="") const;
641 
650  void set(const QName &qname, const string &s);
651 
652  #ifdef ETREE_0X
653 
659  void set(kv_list attribs);
660  #endif
661 
665  vector<QName> keys() const;
666 
670  bool remove(const QName &qname);
671 
675  size_t size() const;
676 };
677 
678 
685 {
686  template<typename P, typename T>
687  friend P nodeFor__(const T &);
688 
689  _xmlDoc *node_;
690 
691  public:
692  ~ElementTree();
693  ElementTree();
694  ElementTree(_xmlDoc *doc);
695  Element getroot() const;
696 
701  bool operator==(const ElementTree &other) const;
702 
707  bool operator!=(const ElementTree &other) const;
708 
714  ElementTree &operator=(const ElementTree&);
715 };
716 
717 
721 class Element
722 {
723  template<typename P, typename T>
724  friend P nodeFor__(const T &);
725 
727  _xmlNode *node_;
728 
730  Element();
731 
732  public:
737  ~Element();
738 
744  Element(const Element &e);
745 
752  Element(_xmlNode *node);
753 
762  Element(const QName &qname);
763 
764  #ifdef ETREE_0X
765 
775  Element(const QName &qname, kv_list attribs);
776  #endif
777 
786  void ensurens(const string &uri);
787 
791  QName qname() const;
792 
796  void qname(const QName &qname);
797 
801  string tag() const;
802 
806  void tag(const string &tag);
807 
811  string ns() const;
812 
817  void ns(const string &ns);
818 
822  AttrMap attrib() const;
823 
831  string get(const QName &qname, const string &default_="") const;
832 
836  size_t size() const;
837 
841  Element operator[] (size_t i);
842 
847  bool operator==(const Element &other) const;
848 
853  bool operator!=(const Element &other) const;
854 
860  Element &operator=(const Element&);
861 
868  Nullable<Element> child() const;
869 
878  Nullable<Element> child(const QName &qn) const;
879 
888  Element ensurechild(const QName &qn);
889 
896  vector<Element> children(const QName &qn) const;
897 
903  vector<Element> children() const;
904 
911  Nullable<Element> find(const XPath &expr) const;
912 
923  string findtext(const XPath &expr, const string &default_="") const;
924 
931  vector<Element> findall(const XPath &expr) const;
932 
937  vector<Element> removeall(const XPath &expr);
938 
951  void append(Element &e);
952 
969  void insert(size_t i, Element &e);
970 
980  void remove(Element &e);
981 
989  void remove();
990 
1013  void graft();
1014 
1019  Element copy();
1020 
1024  bool ancestorOf(const Element &e) const;
1025 
1029  Nullable<Element> getnext() const;
1030 
1034  Nullable<Element> getparent() const;
1035 
1039  Nullable<Element> getprev() const;
1040 
1044  ElementTree getroottree() const;
1045 
1057  string text() const;
1058 
1064  void text(const string &s);
1065 
1078  string tail() const;
1079 
1085  void tail(const string &s);
1086 
1090  ChildIterator begin() const;
1091 
1095  ChildIterator end() const;
1096 };
1097 
1098 
1104 {
1105  Nullable<Element> elem_;
1106 
1107  public:
1108  ChildIterator();
1109  ChildIterator(const Element &);
1110  ChildIterator(const ChildIterator &);
1111  ChildIterator operator++(int);
1112  ChildIterator operator++();
1113  bool operator==(const ChildIterator &) const;
1114  bool operator!=(const ChildIterator &) const;
1115 
1119  Element &operator*();
1120 };
1121 
1122 
1131 template<typename Function>
1132 void
1133 visit(Element elem, Function func)
1134 {
1135  func(elem);
1136  for(auto &child : elem.children()) {
1137  visit(child, func);
1138  }
1139 }
1140 
1141 
1142 #define EXCEPTION(name) \
1143  struct name : public std::runtime_error { \
1144  name() : std::runtime_error("etree::"#name) {} \
1145  };
1146 
1147 EXCEPTION(cyclical_tree_error)
1148 EXCEPTION(internal_error)
1149 EXCEPTION(invalid_xpath_error)
1150 EXCEPTION(memory_error)
1151 EXCEPTION(missing_namespace_error)
1152 EXCEPTION(missing_value_error)
1153 EXCEPTION(out_of_bounds_error)
1154 EXCEPTION(parse_error)
1155 EXCEPTION(qname_error)
1156 EXCEPTION(serialization_error)
1157 
1158 #undef EXCEPTION
1159 
1160 
1164 struct xml_error : public std::runtime_error
1165 {
1166  xml_error(const char *s)
1167  : std::runtime_error(s) {}
1168 };
1169 
1170 
1171 } // namespace
1172 
1173 
1174 namespace std {
1175  template<>
1176  struct hash<etree::QName>
1177  {
1178  size_t operator()(const etree::QName &x) const;
1179  };
1180 } // namespace
1181 
1182 
1183 #endif
Element SubElement(Element &parent, const QName &qname)
Construct a new child element.
Definition: element.cpp:1808
ElementTree parse(std::istream &is)
Parse an XML document from a STL istream and return it.
Definition: element.cpp:1931
Thrown to indicate libxml2 raised a parse error.
Definition: element.hpp:1164
ElementTree parse(std::istream &is)
Parse an HTML document from a STL istream and return it.
Definition: element.cpp:1985
Represents a mapping of an Element&#39;s attribute names to their values.
Definition: element.hpp:609
Manages a set of registered XPath namespaces and extension functions.
Definition: element.hpp:436
Represents a reference to a single XML element.
Definition: element.hpp:721
Manages a compiled XPath expression.
Definition: element.hpp:453
Definition: element.cpp:2046
vector< Element > children(const QName &qn) const
Return children matching a name.
Definition: element.cpp:1436
std::vector< std::pair< std::string, std::string > > ns_list
Represent a list of namespaces and their associated prefixes that should be defined while executing a...
Definition: element.hpp:428
ElementTree namespace; public classes and functions are defined here.
Definition: element.cpp:28
std::string tostring(const Element &e)
Serialize an HTML element.
Definition: element.cpp:1771
Canonical representation for a name-namespace pair, without namespace prefix, in James Clark&#39;s Univer...
Definition: element.hpp:342
Represents iteration position produced by AttrMap::begin() and AttrMap::end().
Definition: element.hpp:584
Element fromstring(const char *s)
Parse an HTML document from a character array and return a reference to its root node.
Definition: element.cpp:1967
Element fromstring(const char *s, size_t n)
Parse an XML document from a character array and return a reference to its root node.
Definition: element.cpp:1919
Nullable< Element > NullableElement
Convenient alias for Nullable<Element> to avoid typing "etree::" twice in scopes that do not import N...
Definition: element.hpp:331
void visit(Element elem, Function func)
Depth-first visit an element and all of its subelements.
Definition: element.hpp:1133
Proxy value type yielded by AttrIterator.
Definition: element.hpp:551
Lightweight wrapper to add nullable semantics to another type.
Definition: element.hpp:240
string tostring(const Element &e)
Serialize an element.
Definition: element.cpp:1771
Represents iteration position produced by Element::begin() and Element::end().
Definition: element.hpp:1103
Represents a reference to the root of an XML tree, the document itself.
Definition: element.hpp:684