xmlwrapp
document.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org)
3  * Copyright (C) 2013 Vaclav Slavik <vslavik@gmail.com>
4  * All Rights Reserved
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  * 3. Neither the name of the Author nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR
24  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /**
35  @file
36 
37  This file contains the definition of the xml::document class.
38  */
39 
40 #ifndef _xmlwrapp_document_h_
41 #define _xmlwrapp_document_h_
42 
43 // xmlwrapp includes
44 #include "xmlwrapp/init.h"
45 #include "xmlwrapp/node.h"
46 #include "xmlwrapp/export.h"
47 #include "xmlwrapp/errors.h"
48 
49 // standard includes
50 #include <iosfwd>
51 #include <string>
52 #include <cstddef>
53 
54 // forward declaration
55 namespace xslt
56 {
57 
58 class stylesheet;
59 namespace impl
60 {
61 class result;
62 }
63 
64 } // end xslt namespace
65 
66 namespace xml
67 {
68 
69 // forward declarations
70 class schema;
71 class tree_parser;
72 
73 namespace impl
74 {
75 struct doc_impl;
76 }
77 
78 /**
79  The xml::document class is used to hold the XML tree and various bits of
80  information about it.
81  */
82 class XMLWRAPP_API document
83 {
84 public:
85  /// size type
86  typedef std::size_t size_type;
87 
88  /**
89  Create a new XML document with the default settings. The new document
90  will contain a root node with a name of "blank".
91  */
92  document();
93 
94  /**
95  Create a new XML document and set the name of the root element to the
96  given text.
97 
98  @param root_name What to set the name of the root element to.
99 
100  @deprecated Use `xml::document(xml::node(root_name))` constructor instead.
101  */
102  XMLWRAPP_DEPRECATED("use xml::document(xml::node(root_name)) instead")
103  explicit document(const char *root_name);
104 
105  /**
106  Create a new XML document and set the root node.
107 
108  @param n The node to use as the root node. n will be copied.
109  */
110  explicit document(const node& n);
111 
112  /**
113  Load XML document from given file.
114 
115  Errors are handled by @a on_error handler; if you pass
116  xml::throw_on_error, xml::exception is thrown on errors. If there's a
117  fatal error that prevents the document from being loaded and the error
118  handler doesn't throw an exception, the constructor will throw
119  xml::exception anyway.
120 
121  @param filename The name of the file to parse.
122  @param on_error Handler called to process errors and warnings.
123 
124  @since 0.7.0
125  */
126  explicit document(const char *filename, error_handler& on_error);
127 
128  /**
129  Load XML document from given data.
130 
131  Errors are handled by @a on_error handler; by default, xml::exception
132  is thrown on errors. If there's a fatal error that prevents the document
133  from being loaded and the error handler doesn't throw an exception, the
134  constructor will throw xml::exception anyway.
135 
136  @param data The XML data to parse.
137  @param size The size of the XML data to parse.
138  @param on_error Handler called to process errors and warnings.
139 
140  @since 0.7.0
141  */
142  explicit document(const char *data, size_type size, error_handler& on_error = throw_on_error);
143 
144  /**
145  Copy construct a new XML document. The new document will be an exact
146  copy of the original.
147 
148  @param other The other document object to copy from.
149  */
150  document(const document& other);
151 
152  /**
153  Copy another document object into this one using the assignment
154  operator. This document object will be an exact copy of the other
155  document after the assignment.
156 
157  @param other The document to copy from.
158  @return *this.
159  */
160  document& operator=(const document& other);
161 
162  /**
163  Swap one xml::document object for another.
164 
165  @param other The other document to swap
166  */
167  void swap(document& other);
168 
169  /**
170  Clean up after an XML document object.
171  */
172  ~document();
173 
174  /**
175  Get a reference to the root node of this document. If no root node
176  has been set, the returned node will be a blank node. You should take
177  caution to use a reference so that you don't copy the whole node
178  tree!
179 
180  @return A const reference to the root node.
181  */
182  const node& get_root_node() const;
183 
184  /**
185  Get a reference to the root node of this document. If no root node
186  has been set, the returned node will be a blank node. You should take
187  caution to use a reference so that you don't copy the whole node
188  tree!
189 
190  @return A reference to the root node.
191  */
192  node& get_root_node();
193 
194  /**
195  Set the root node to the given node. A full copy is made and stored
196  in the document object.
197 
198  @param n The new root node to use.
199  */
200  void set_root_node(const node& n);
201 
202  /**
203  Get the XML version for this document. For generated documents, the
204  version will be the default. For parsed documents, this will be the
205  version from the XML processing instruction.
206 
207  @return The XML version string for this document.
208  */
209  const std::string& get_version() const;
210 
211  /**
212  Set the XML version number for this document. This version string
213  will be used when generating the XML output.
214 
215  @param version The version string to use, like "1.0".
216  */
217  void set_version(const char *version);
218 
219  /**
220  Get the XML encoding for this document. The default encoding is
221  ISO-8859-1.
222 
223  @return The encoding string.
224  */
225  const std::string& get_encoding() const;
226 
227  /**
228  Set the XML encoding string. If you don't set this, it will default
229  to ISO-8859-1.
230 
231  @param encoding The XML encoding to use.
232  */
233  void set_encoding(const char *encoding);
234 
235  /**
236  Find out if the current document is a standalone document. For
237  generated documents, this will be the default. For parsed documents
238  this will be set based on the XML processing instruction.
239 
240  @return True if this document is standalone.
241  @return False if this document is not standalone.
242  */
243  bool get_is_standalone() const;
244 
245  /**
246  Set the standalone flag. This will show up in the XML output in the
247  correct processing instruction.
248 
249  @param sa What to set the standalone flag to.
250  */
251  void set_is_standalone(bool sa);
252 
253  /**
254  Walk through the document and expand <xi:include> elements. For more
255  information, please see the w3c recommendation for XInclude.
256  http://www.w3.org/2001/XInclude.
257 
258  The return value of this function may change to int after a bug has
259  been fixed in libxml2 (xmlXIncludeDoProcess).
260 
261  @return False if there was an error with substitutions.
262  @return True if there were no errors (with or without substitutions).
263  */
264  bool process_xinclude();
265 
266  /**
267  Test to see if this document has an internal subset. That is, DTD
268  data that is declared within the XML document itself.
269 
270  @return True if this document has an internal subset.
271  @return False otherwise.
272  */
273  bool has_internal_subset() const;
274 
275  /**
276  Test to see if this document has an external subset. That is, it
277  references a DTD from an external source, such as a file or URL.
278 
279  @return True if this document has an external subset.
280  @return False otherwise.
281  */
282  bool has_external_subset() const;
283 
284  /**
285  Validate this document against the DTD that has been attached to it.
286  This would happen at parse time if there was a !DOCTYPE definition.
287  If the DTD is valid, and the document is valid, this member function
288  will return true.
289 
290  If it returns false, you may want to send the document through
291  xmllint to get the actual error messages.
292 
293  @return True if the document is valid.
294  @return False if there was a problem with the DTD or XML doc.
295  */
296  bool validate();
297 
298  /**
299  Parse the given DTD and try to validate this document against it. If
300  the DTD is valid, and the document is valid, this member function
301  will return true.
302 
303  If it returns false, you may want to send the document through
304  xmllint to get the actual error messages.
305 
306  This member function will add the parsed DTD to this document as the
307  external subset after the validation. If there is already an external
308  DTD attached to this document it will be removed and deleted.
309 
310  @param dtdname A filename or URL for the DTD to use.
311  @return True if the document is valid.
312  @return False if there was a problem with the DTD or XML doc.
313  */
314  bool validate(const char *dtdname);
315 
316  /**
317  Returns the number of child nodes of this document. This will always
318  be at least one, since all xmlwrapp documents must have a root node.
319  This member function is useful to find out how many document children
320  there are, including processing instructions, comments, etc.
321 
322  @return The number of children nodes that this document has.
323  */
324  size_type size() const;
325 
326  /**
327  Get an iterator to the first child node of this document. If what you
328  really wanted was the root node (the first element) you should use
329  the get_root_node() member function instead.
330 
331  @return A xml::node::iterator that points to the first child node.
332  @return An end iterator if there are no children in this document
333  */
334  node::iterator begin();
335 
336  /**
337  Get a const_iterator to the first child node of this document. If
338  what you really wanted was the root node (the first element) you
339  should use the get_root_node() member function instead.
340 
341  @return A xml::node::const_iterator that points to the first child node.
342  @return An end const_iterator if there are no children in this document.
343  */
344  node::const_iterator begin() const;
345 
346  /**
347  Get an iterator that points one past the last child node for this
348  document.
349 
350  @return An end xml::node::iterator.
351  */
352  node::iterator end();
353 
354  /**
355  Get a const_iterator that points one past the last child node for
356  this document.
357 
358  @return An end xml::node::const_iterator.
359  */
360  node::const_iterator end() const;
361 
362  /**
363  Add a child xml::node to this document. You should not add a element
364  type node, since there can only be one root node. This member
365  function is only useful for adding processing instructions, comments,
366  etc.. If you do try to add a node of type element, an exception will
367  be thrown.
368 
369  @param child The child xml::node to add.
370  */
371  void push_back (const node &child);
372 
373  /**
374  Insert a new child node. The new node will be inserted at the end of
375  the child list. This is similar to the xml::node::push_back member
376  function except that an iterator to the inserted node is returned.
377 
378  The rules from the push_back member function apply here. Don't add a
379  node of type element.
380 
381  @param n The node to insert as a child of this document.
382  @return An iterator that points to the newly inserted node.
383  @see xml::document::push_back
384  */
385  node::iterator insert (const node &n);
386 
387  /**
388  Insert a new child node. The new node will be inserted before the
389  node pointed to by the given iterator.
390 
391  The rules from the push_back member function apply here. Don't add a
392  node of type element.
393 
394  @param position An iterator that points to the location where the new node should be inserted (before it).
395  @param n The node to insert as a child of this document.
396  @return An iterator that points to the newly inserted node.
397  @see xml::document::push_back
398  */
399  node::iterator insert(node::iterator position, const node &n);
400 
401  /**
402  Replace the node pointed to by the given iterator with another node.
403  The old node will be removed, including all its children, and
404  replaced with the new node. This will invalidate any iterators that
405  point to the node to be replaced, or any pointers or references to
406  that node.
407 
408  Do not replace this root node with this member function. The same
409  rules that apply to push_back apply here. If you try to replace a
410  node of type element, an exception will be thrown.
411 
412  @param old_node An iterator that points to the node that should be removed.
413  @param new_node The node to put in old_node's place.
414  @return An iterator that points to the new node.
415  @see xml::document::push_back
416  */
417  node::iterator replace(node::iterator old_node, const node& new_node);
418 
419  /**
420  Erase the node that is pointed to by the given iterator. The node
421  and all its children will be removed from this node. This will
422  invalidate any iterators that point to the node to be erased, or any
423  pointers or references to that node.
424 
425  Do not remove the root node using this member function. The same
426  rules that apply to push_back apply here. If you try to erase the
427  root node, an exception will be thrown.
428 
429  @param to_erase An iterator that points to the node to be erased.
430  @return An iterator that points to the node after the one being erased.
431  @see xml::document::push_back
432  */
433  node::iterator erase(node::iterator to_erase);
434 
435  /**
436  Erase all nodes in the given range, from first to last. This will
437  invalidate any iterators that point to the nodes to be erased, or any
438  pointers or references to those nodes.
439 
440  Do not remove the root node using this member function. The same
441  rules that apply to push_back apply here. If you try to erase the
442  root node, an exception will be thrown.
443 
444  @param first The first node in the range to be removed.
445  @param last An iterator that points one past the last node to erase. Think xml::node::end().
446  @return An iterator that points to the node after the last one being erased.
447  @see xml::document::push_back
448  */
449  node::iterator erase(node::iterator first, node::iterator last);
450 
451  /**
452  Convert the XML document tree into XML text data and place it into
453  the given string.
454 
455  @param s The string to place the XML text data.
456  */
457  void save_to_string(std::string& s) const;
458 
459  /**
460  Convert the XML document tree into XML text data and place it into
461  the given filename.
462 
463  @param filename The name of the file to place the XML text data into.
464  @param compression_level 0 is no compression, 1-9 allowed, where 1 is
465  for better speed, and 9 is for smaller size
466  @return True if the data was saved successfully.
467  @return False otherwise.
468  */
469  bool save_to_file(const char *filename, int compression_level = 0) const;
470 
471  /**
472  Convert the XML document tree into XML text data and then insert it
473  into the given stream.
474 
475  @param stream The stream to insert the XML into.
476  @param doc The document to insert.
477  @return The stream from the first parameter.
478  */
479  friend XMLWRAPP_API std::ostream& operator<< (std::ostream &stream, const document &doc);
480 
481 private:
482  impl::doc_impl *pimpl_;
483 
484  void set_doc_data (void *data);
485  void set_doc_data_from_xslt (void *data, xslt::impl::result *xr);
486  void* get_doc_data();
487  void* get_doc_data_read_only() const;
488  void* release_doc_data();
489 
490  friend class tree_parser;
491  friend class schema;
492  friend class xslt::stylesheet;
493 };
494 
495 } // namespace xml
496 
497 #endif // _xmlwrapp_document_h_