View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.javascript.host.xml;
16  
17  import static org.htmlunit.BrowserVersionFeatures.JS_XSLT_TRANSFORM_INDENT;
18  
19  import java.io.ByteArrayOutputStream;
20  import java.nio.charset.Charset;
21  import java.nio.charset.StandardCharsets;
22  import java.util.Collections;
23  import java.util.HashMap;
24  import java.util.Map;
25  
26  import javax.xml.XMLConstants;
27  import javax.xml.parsers.DocumentBuilderFactory;
28  import javax.xml.transform.OutputKeys;
29  import javax.xml.transform.Source;
30  import javax.xml.transform.Transformer;
31  import javax.xml.transform.TransformerFactory;
32  import javax.xml.transform.dom.DOMResult;
33  import javax.xml.transform.dom.DOMSource;
34  import javax.xml.transform.stream.StreamResult;
35  
36  import org.htmlunit.SgmlPage;
37  import org.htmlunit.WebResponse;
38  import org.htmlunit.WebResponseData;
39  import org.htmlunit.html.DomDocumentFragment;
40  import org.htmlunit.html.DomNode;
41  import org.htmlunit.html.DomText;
42  import org.htmlunit.http.HttpStatus;
43  import org.htmlunit.javascript.HtmlUnitScriptable;
44  import org.htmlunit.javascript.JavaScriptEngine;
45  import org.htmlunit.javascript.configuration.JsxClass;
46  import org.htmlunit.javascript.configuration.JsxConstructor;
47  import org.htmlunit.javascript.configuration.JsxFunction;
48  import org.htmlunit.javascript.host.dom.Document;
49  import org.htmlunit.javascript.host.dom.DocumentFragment;
50  import org.htmlunit.javascript.host.dom.Node;
51  import org.htmlunit.util.EncodingSniffer;
52  import org.htmlunit.util.XmlUtils;
53  import org.htmlunit.xml.XmlPage;
54  import org.w3c.dom.NodeList;
55  
56  /**
57   * A JavaScript object for {@code XSLTProcessor}.
58   *
59   * @author Ahmed Ashour
60   * @author Ronald Brill
61   */
62  @JsxClass
63  public class XSLTProcessor extends HtmlUnitScriptable {
64  
65      private Node style_;
66      private final Map<String, Object> parameters_ = new HashMap<>();
67  
68      /**
69       * JavaScript constructor.
70       */
71      @JsxConstructor
72      public void jsConstructor() {
73          // nothing to do
74      }
75  
76      /**
77       * Imports the specified stylesheet into this XSLTProcessor for transformations. The specified node
78       * may be either a document node or an element node. If it is a document node, then the document can
79       * contain either a XSLT stylesheet or a LRE stylesheet. If it is an element node, it must be the
80       * xsl:stylesheet (or xsl:transform) element of an XSLT stylesheet.
81       *
82       * @param style the root-node of an XSLT stylesheet (may be a document node or an element node)
83       */
84      @JsxFunction
85      public void importStylesheet(final Node style) {
86          style_ = style;
87      }
88  
89      /**
90       * Transforms the node source applying the stylesheet given by the importStylesheet() function.
91       * The owner document of the output node owns the returned document fragment.
92       *
93       * @param source the node to be transformed
94       * @return the result of the transformation
95       */
96      @JsxFunction
97      public XMLDocument transformToDocument(final Node source) {
98          final XMLDocument doc = new XMLDocument();
99          doc.setPrototype(getPrototype(doc.getClass()));
100         doc.setParentScope(getParentScope());
101 
102         final Object transformResult = transform(source);
103         final org.w3c.dom.Node node;
104         if (transformResult instanceof org.w3c.dom.Node) {
105             final org.w3c.dom.Node transformedDoc = (org.w3c.dom.Node) transformResult;
106             node = transformedDoc.getFirstChild();
107         }
108         else {
109             node = null;
110         }
111         final XmlPage page = new XmlPage(node, getWindow().getWebWindow());
112         doc.setDomNode(page);
113         return doc;
114     }
115 
116     /**
117      * @return {@link Node} or {@link String}
118      */
119     private Object transform(final Node source) {
120         try {
121             final DomNode sourceDomNode = source.getDomNodeOrDie();
122             Source xmlSource = new DOMSource(sourceDomNode);
123 
124             final DomNode xsltDomNode = style_.getDomNodeOrDie();
125             final Source xsltSource = new DOMSource(xsltDomNode);
126 
127             final TransformerFactory transformerFactory = TransformerFactory.newInstance();
128 
129             // By default, the JDK turns on FSP for DOM and SAX parsers and XML schema validators,
130             // which sets a number of processing limits on the processors. Conversely, by default,
131             // the JDK turns off FSP for transformers and XPath, which enables extension functions for XSLT and XPath.
132             transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
133 
134             final SgmlPage page = sourceDomNode.getPage();
135             if (page != null && page.getWebClient().getBrowserVersion()
136                                             .hasFeature(JS_XSLT_TRANSFORM_INDENT)) {
137                 final DomNode outputNode = findOutputNode(xsltDomNode);
138                 if (outputNode != null) {
139                     final org.w3c.dom.Node indentNode = outputNode.getAttributes().getNamedItem("indent");
140                     if (indentNode != null && "yes".equalsIgnoreCase(indentNode.getNodeValue())) {
141                         try {
142                             transformerFactory.setAttribute("indent-number", Integer.valueOf(2));
143                         }
144                         catch (final IllegalArgumentException ignored) {
145                             // ignore
146                         }
147                         final Transformer transformer = transformerFactory.newTransformer(xsltSource);
148                         transformer.setOutputProperty(OutputKeys.INDENT, "yes");
149                         try {
150                             transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
151                         }
152                         catch (final IllegalArgumentException ignored) {
153                             // ignore
154                         }
155 
156                         for (final Map.Entry<String, Object> entry : parameters_.entrySet()) {
157                             transformer.setParameter(entry.getKey(), entry.getValue());
158                         }
159 
160                         // hack to preserve indention
161                         // the transformer only accepts the OutputKeys.INDENT setting if
162                         // the StreamResult is used
163                         try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
164                             transformer.transform(xmlSource, new StreamResult(out));
165                             final WebResponseData data = new WebResponseData(out.toByteArray(),
166                                     HttpStatus.OK_200, HttpStatus.OK_200_MSG, Collections.emptyList());
167                             final WebResponse response = new WebResponse(data, null, 0) {
168 
169                                 // XmlUtils.buildDocument reads the out stream using the contentCharset
170                                 // we have to provide the correct one
171                                 @Override
172                                 public Charset getContentCharset() {
173                                     final Charset cs = EncodingSniffer.toCharset(
174                                             transformer.getOutputProperty(OutputKeys.ENCODING));
175                                     if (cs == null) {
176                                         return StandardCharsets.UTF_8;
177                                     }
178                                     return cs;
179                                 }
180                             };
181                             return XmlUtils.buildDocument(response);
182                         }
183                     }
184                 }
185             }
186 
187             final Transformer transformer = transformerFactory.newTransformer(xsltSource);
188             transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
189 
190             for (final Map.Entry<String, Object> entry : parameters_.entrySet()) {
191                 transformer.setParameter(entry.getKey(), entry.getValue());
192             }
193 
194             final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
195             final org.w3c.dom.Document containerDocument = factory.newDocumentBuilder().newDocument();
196             final org.w3c.dom.Element containerElement = containerDocument.createElement("container");
197             containerDocument.appendChild(containerElement);
198 
199             final DOMResult result = new DOMResult(containerElement);
200             transformer.transform(xmlSource, result);
201 
202             final org.w3c.dom.Node transformedNode = result.getNode();
203             final org.w3c.dom.Node transformedFirstChild = transformedNode.getFirstChild();
204             if (transformedFirstChild != null && transformedFirstChild.getNodeType() == Node.ELEMENT_NODE) {
205                 return transformedNode;
206             }
207 
208             // output is not DOM (text)
209             xmlSource = new DOMSource(source.getDomNodeOrDie());
210             try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
211                 transformer.transform(xmlSource, new StreamResult(out));
212 
213                 final Charset cs = EncodingSniffer.toCharset(transformer.getOutputProperty(OutputKeys.ENCODING));
214                 if (cs == null) {
215                     return new String(out.toByteArray(), StandardCharsets.UTF_8);
216                 }
217                 return new String(out.toByteArray(), cs);
218             }
219         }
220         catch (final RuntimeException e) {
221             throw e;
222         }
223         catch (final Exception e) {
224             throw JavaScriptEngine.reportRuntimeError("Exception: " + e);
225         }
226     }
227 
228     /**
229      * Transforms the node source applying the stylesheet given by the importStylesheet() function.
230      * The owner document of the output node owns the returned document fragment.
231      * @param source the node to be transformed
232      * @param output This document is used to generate the output
233      * @return the result of the transformation
234      */
235     @JsxFunction
236     public DocumentFragment transformToFragment(final Node source, final Object output) {
237         final SgmlPage page = (SgmlPage) ((Document) output).getDomNodeOrDie();
238 
239         final DomDocumentFragment fragment = page.createDocumentFragment();
240         final DocumentFragment rv = new DocumentFragment();
241         rv.setPrototype(getPrototype(rv.getClass()));
242         rv.setParentScope(getParentScope());
243         rv.setDomNode(fragment);
244 
245         final Object result = transform(source);
246         if (result instanceof org.w3c.dom.Node) {
247             final SgmlPage parentPage = fragment.getPage();
248             final NodeList children = ((org.w3c.dom.Node) result).getChildNodes();
249             for (int i = 0; i < children.getLength(); i++) {
250                 XmlUtils.appendChild(parentPage, fragment, children.item(i), true);
251             }
252         }
253         else {
254             final DomText text = new DomText(fragment.getPage(), (String) result);
255             fragment.appendChild(text);
256         }
257 
258         return rv;
259     }
260 
261     /**
262      * Sets a parameter to be used in subsequent transformations with this nsIXSLTProcessor.
263      * If the parameter doesn't exist in the stylesheet the parameter will be ignored.
264      * @param namespaceURI the namespaceURI of the XSLT parameter
265      * @param localName the local name of the XSLT parameter
266      * @param value the new value of the XSLT parameter
267      */
268     @JsxFunction
269     public void setParameter(final String namespaceURI, final String localName, final Object value) {
270         parameters_.put(getQualifiedName(namespaceURI, localName), value);
271     }
272 
273     /**
274      * Gets a parameter if previously set by setParameter. Returns null otherwise.
275      * @param namespaceURI the namespaceURI of the XSLT parameter
276      * @param localName the local name of the XSLT parameter
277      * @return the value of the XSLT parameter
278      */
279     @JsxFunction
280     public Object getParameter(final String namespaceURI, final String localName) {
281         return parameters_.get(getQualifiedName(namespaceURI, localName));
282     }
283 
284     private static String getQualifiedName(final String namespaceURI, final String localName) {
285         final String qualifiedName;
286         if (namespaceURI != null && !namespaceURI.isEmpty() && !"null".equals(namespaceURI)) {
287             qualifiedName = '{' + namespaceURI + '}' + localName;
288         }
289         else {
290             qualifiedName = localName;
291         }
292         return qualifiedName;
293     }
294 
295     private static DomNode findOutputNode(final DomNode xsltDomNode) {
296         for (final DomNode child : xsltDomNode.getChildren()) {
297             if ("output".equals(child.getLocalName())) {
298                 return child;
299             }
300 
301             for (final DomNode child1 : child.getChildren()) {
302                 if ("output".equals(child1.getLocalName())) {
303                     return child1;
304                 }
305             }
306         }
307         return null;
308     }
309 }