View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.xml;
16  
17  import static java.nio.charset.StandardCharsets.UTF_8;
18  
19  import java.io.IOException;
20  import java.nio.charset.Charset;
21  import java.util.HashMap;
22  import java.util.List;
23  import java.util.Map;
24  
25  import javax.xml.parsers.ParserConfigurationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.htmlunit.SgmlPage;
30  import org.htmlunit.WebResponse;
31  import org.htmlunit.WebWindow;
32  import org.htmlunit.html.DomElement;
33  import org.htmlunit.html.DomProcessingInstruction;
34  import org.htmlunit.util.MimeType;
35  import org.htmlunit.util.XmlUtils;
36  import org.w3c.dom.Attr;
37  import org.w3c.dom.DOMConfiguration;
38  import org.w3c.dom.DOMImplementation;
39  import org.w3c.dom.Document;
40  import org.w3c.dom.DocumentType;
41  import org.w3c.dom.Element;
42  import org.w3c.dom.EntityReference;
43  import org.w3c.dom.Node;
44  import org.xml.sax.SAXException;
45  
46  /**
47   * A page that will be returned for response with content type "text/xml".
48   *
49   * @author Marc Guillemot
50   * @author David K. Taylor
51   * @author Ahmed Ashour
52   * @author Frank Danek
53   */
54  public class XmlPage extends SgmlPage {
55  
56      private static final Log LOG = LogFactory.getLog(XmlPage.class);
57  
58      private Node node_;
59  
60      /**
61       * Creates an instance.
62       * A warning is logged if an exception is thrown while parsing the XML content
63       * (for instance when the content is not a valid XML and can't be parsed).
64       *
65       * @param webResponse the response from the server
66       * @param enclosingWindow the window that holds the page
67       * @throws IOException if the page could not be created
68       */
69      public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow) throws IOException {
70          this(webResponse, enclosingWindow, true);
71      }
72  
73      /**
74       * Creates an instance.
75       * A warning is logged if an exception is thrown while parsing the XML content
76       * (for instance when the content is not a valid XML and can't be parsed).
77       *
78       * @param node the node to initialize this page with
79       * @param enclosingWindow the window that holds the page
80       */
81      public XmlPage(final Node node, final WebWindow enclosingWindow) {
82          super(null, enclosingWindow);
83          node_ = node;
84          if (node_ != null) {
85              XmlUtils.appendChild(this, this, node_, true);
86          }
87      }
88  
89      /**
90       * Creates an instance.
91       * A warning is logged if an exception is thrown while parsing the XML content
92       * (for instance when the content is not a valid XML and can't be parsed).
93       *
94       * @param webResponse the response from the server
95       * @param enclosingWindow the window that holds the page
96       * @param ignoreSAXException Whether to ignore {@link SAXException} or throw it as {@link IOException}
97       * @throws IOException if the page could not be created
98       */
99      public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow, final boolean ignoreSAXException)
100         throws IOException {
101         this(webResponse, enclosingWindow, ignoreSAXException, true);
102     }
103 
104     /**
105      * Creates an instance.
106      * A warning is logged if an exception is thrown while parsing the XML content
107      * (for instance when the content is not a valid XML and can't be parsed).
108      *
109      * @param webResponse the response from the server
110      * @param enclosingWindow the window that holds the page
111      * @param ignoreSAXException Whether to ignore {@link SAXException} or throw it as {@link IOException}
112      * @param handleXHTMLAsHTML if true elements from the XHTML namespace are handled as HTML elements instead of
113      *     DOM elements
114      * @throws IOException if the page could not be created
115      */
116     public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow, final boolean ignoreSAXException,
117         final boolean handleXHTMLAsHTML) throws IOException {
118         super(webResponse, enclosingWindow);
119 
120         try {
121             try {
122                 final Document document = XmlUtils.buildDocument(webResponse);
123                 node_ = document.getFirstChild();
124             }
125             catch (final SAXException e) {
126                 if (LOG.isWarnEnabled()) {
127                     LOG.warn("Failed parsing XML document '" + webResponse.getWebRequest().getUrl() + "'", e);
128                 }
129                 if (!ignoreSAXException) {
130                     throw new IOException(
131                             "Failed parsing XML document '" + webResponse.getWebRequest().getUrl() + "'", e);
132                 }
133             }
134         }
135         catch (final ParserConfigurationException e) {
136             if (LOG.isWarnEnabled()) {
137                 if (null == webResponse) {
138                     LOG.warn("Failed parsing XML empty document: " + e.getMessage(), e);
139                 }
140                 else {
141                     LOG.warn("Failed parsing XML empty document '" + webResponse.getWebRequest().getUrl() + "'", e);
142                 }
143             }
144         }
145 
146         final Map<Integer, List<String>> attributesOrderMap;
147         if (node_ != null) {
148             attributesOrderMap = XmlUtils.getAttributesOrderMap(node_.getOwnerDocument());
149         }
150         else {
151             attributesOrderMap = null;
152         }
153         for (Node node = node_; node != null; node = node.getNextSibling()) {
154             XmlUtils.appendChild(this, this, node, handleXHTMLAsHTML, attributesOrderMap);
155         }
156     }
157 
158     /**
159      * {@inheritDoc}
160      */
161     @Override
162     public void initialize() throws IOException {
163         // nothing to do here
164     }
165 
166     /**
167      * {@inheritDoc}
168      */
169     @Override
170     public boolean hasCaseSensitiveTagNames() {
171         return true;
172     }
173 
174     /**
175      * Returns the DOM representation of the XML content.
176      * @return {@code null} if the content couldn't be parsed
177      */
178     public Document getXmlDocument() {
179         if (node_ != null) {
180             return node_.getOwnerDocument();
181         }
182         return null;
183     }
184 
185     /**
186      * {@inheritDoc}
187      * Not yet implemented.
188      */
189     @Override
190     public Node adoptNode(final Node source) {
191         throw new UnsupportedOperationException("XmlPage.adoptNode is not yet implemented.");
192     }
193 
194     /**
195      * {@inheritDoc}
196      * Not yet implemented.
197      */
198     @Override
199     public Attr createAttributeNS(final String namespaceURI, final String qualifiedName) {
200         throw new UnsupportedOperationException("XmlPage.createAttributeNS is not yet implemented.");
201     }
202 
203     /**
204      * {@inheritDoc}
205      */
206     @Override
207     public DomElement createElement(final String tagName) {
208         return createElementNS(null, tagName);
209     }
210 
211     /**
212      * {@inheritDoc}
213      */
214     @Override
215     public DomElement createElementNS(final String namespaceURI, final String qualifiedName) {
216         return new DomElement(namespaceURI, qualifiedName, this, new HashMap<>());
217     }
218 
219     /**
220      * {@inheritDoc}
221      * Not yet implemented.
222      */
223     @Override
224     public EntityReference createEntityReference(final String name) {
225         throw new UnsupportedOperationException("XmlPage.createEntityReference is not yet implemented.");
226     }
227 
228     /**
229      * {@inheritDoc}
230      */
231     @Override
232     public DomProcessingInstruction createProcessingInstruction(final String target, final String data) {
233         return new DomProcessingInstruction(this, target, data);
234     }
235 
236     /**
237      * {@inheritDoc}
238      * Not yet implemented.
239      */
240     @Override
241     public String getDocumentURI() {
242         throw new UnsupportedOperationException("XmlPage.getDocumentURI is not yet implemented.");
243     }
244 
245     /**
246      * {@inheritDoc}
247      * Not yet implemented.
248      */
249     @Override
250     public DOMConfiguration getDomConfig() {
251         throw new UnsupportedOperationException("XmlPage.getDomConfig is not yet implemented.");
252     }
253 
254     /**
255      * {@inheritDoc}
256      * Not yet implemented.
257      */
258     @Override
259     public Element getElementById(final String elementId) {
260         throw new UnsupportedOperationException("XmlPage.getElementById is not yet implemented.");
261     }
262 
263     /**
264      * {@inheritDoc}
265      * Not yet implemented.
266      */
267     @Override
268     public DOMImplementation getImplementation() {
269         throw new UnsupportedOperationException("XmlPage.getImplementation is not yet implemented.");
270     }
271 
272     /**
273      * {@inheritDoc}
274      * Not yet implemented.
275      */
276     @Override
277     public String getInputEncoding() {
278         throw new UnsupportedOperationException("XmlPage.getInputEncoding is not yet implemented.");
279     }
280 
281     /**
282      * {@inheritDoc}
283      * Not yet implemented.
284      */
285     @Override
286     public boolean getStrictErrorChecking() {
287         throw new UnsupportedOperationException("XmlPage.getStrictErrorChecking is not yet implemented.");
288     }
289 
290     /**
291      * {@inheritDoc}
292      */
293     @Override
294     public String getXmlEncoding() {
295         return null;
296     }
297 
298     /**
299      * {@inheritDoc}
300      */
301     @Override
302     public boolean getXmlStandalone() {
303         return false;
304     }
305 
306     /**
307      * {@inheritDoc}
308      */
309     @Override
310     public String getXmlVersion() {
311         return "1.0";
312     }
313 
314     /**
315      * {@inheritDoc}
316      * Not yet implemented.
317      */
318     @Override
319     public Node importNode(final Node importedNode, final boolean deep) {
320         throw new UnsupportedOperationException("XmlPage.importNode is not yet implemented.");
321     }
322 
323     /**
324      * {@inheritDoc}
325      * Not yet implemented.
326      */
327     @Override
328     public Node renameNode(final Node n, final String namespaceURI, final String qualifiedName) {
329         throw new UnsupportedOperationException("XmlPage.renameNode is not yet implemented.");
330     }
331 
332     /**
333      * {@inheritDoc}
334      * Not yet implemented.
335      */
336     @Override
337     public void setDocumentURI(final String documentURI) {
338         throw new UnsupportedOperationException("XmlPage.setDocumentURI is not yet implemented.");
339     }
340 
341     /**
342      * {@inheritDoc}
343      * Not yet implemented.
344      */
345     @Override
346     public void setStrictErrorChecking(final boolean strictErrorChecking) {
347         throw new UnsupportedOperationException("XmlPage.setStrictErrorChecking is not yet implemented.");
348     }
349 
350     /**
351      * {@inheritDoc}
352      * Not yet implemented.
353      */
354     @Override
355     public void setXmlStandalone(final boolean xmlStandalone) {
356         throw new UnsupportedOperationException("XmlPage.setXmlStandalone is not yet implemented.");
357     }
358 
359     /**
360      * {@inheritDoc}
361      * Not yet implemented.
362      */
363     @Override
364     public void setXmlVersion(final String xmlVersion) {
365         throw new UnsupportedOperationException("XmlPage.setXmlVersion is not yet implemented.");
366     }
367 
368     /**
369      * {@inheritDoc}
370      */
371     @Override
372     public Charset getCharset() {
373         return UTF_8;
374     }
375 
376     /**
377      * {@inheritDoc}
378      */
379     @Override
380     public String getContentType() {
381         return MimeType.APPLICATION_XML;
382     }
383 
384     /**
385      * {@inheritDoc}
386      */
387     @Override
388     public void setDocumentType(final DocumentType type) {
389         super.setDocumentType(type);
390     }
391 
392     /**
393      * {@inheritDoc}
394      */
395     @Override
396     public void setNodeValue(final String value) {
397         // Default behavior is to do nothing, overridden in some subclasses
398     }
399 
400     /**
401      * {@inheritDoc}
402      */
403     @Override
404     public void setPrefix(final String prefix) {
405         // Empty.
406     }
407 }