View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.xpath;
16  
17  import javax.xml.transform.ErrorListener;
18  import javax.xml.transform.TransformerException;
19  
20  import org.htmlunit.xpath.Expression;
21  import org.htmlunit.xpath.XPathContext;
22  import org.htmlunit.xpath.compiler.Compiler;
23  import org.htmlunit.xpath.compiler.FunctionTable;
24  import org.htmlunit.xpath.compiler.XPathParser;
25  import org.htmlunit.xpath.objects.XObject;
26  import org.htmlunit.xpath.res.XPATHErrorResources;
27  import org.htmlunit.xpath.res.XPATHMessages;
28  import org.htmlunit.xpath.xml.utils.DefaultErrorHandler;
29  import org.htmlunit.xpath.xml.utils.PrefixResolver;
30  import org.htmlunit.xpath.xml.utils.WrappedRuntimeException;
31  
32  /**
33   * XPath adapter implementation for HtmlUnit.
34   *
35   * @author Ahmed Ashour
36   * @author Ronald Brill
37   */
38  public class XPathAdapter {
39  
40      private enum STATE {
41          DEFAULT,
42          DOUBLE_QUOTED,
43          SINGLE_QUOTED,
44          ATTRIB
45      }
46  
47      private final Expression mainExp_;
48      private FunctionTable funcTable_;
49  
50      /**
51       * Initiates the function table.
52       */
53      private void initFunctionTable() {
54          funcTable_ = new FunctionTable();
55      }
56  
57      /**
58       * Constructor.
59       * @param exprString the XPath expression
60       * @param prefixResolver a prefix resolver to use to resolve prefixes to namespace URIs
61       * @param caseSensitive whether the attributes should be case-sensitive
62       * @throws TransformerException if a syntax or other error occurs
63       */
64      public XPathAdapter(final String exprString, final PrefixResolver prefixResolver, final boolean caseSensitive)
65                  throws TransformerException {
66  
67          initFunctionTable();
68  
69          final ErrorListener errorHandler = new DefaultErrorHandler();
70          final XPathParser parser = new XPathParser(errorHandler);
71          final Compiler compiler = new Compiler(errorHandler, funcTable_);
72  
73          final String expression = preProcessXPath(exprString, caseSensitive);
74          parser.initXPath(compiler, expression, prefixResolver);
75  
76          mainExp_ = compiler.compile(0);
77      }
78  
79      /**
80       * Pre-processes the specified case-insensitive XPath expression before passing it to the engine.
81       * The current implementation lower-cases the attribute name, and anything outside the brackets.
82       *
83       * @param xpath the XPath expression to pre-process
84       * @param caseSensitive whether or not the XPath expression should be case-sensitive
85       * @return the processed XPath expression
86       */
87      private static String preProcessXPath(final String xpath, final boolean caseSensitive) {
88          if (caseSensitive) {
89              return xpath;
90          }
91  
92          final char[] charArray = xpath.toCharArray();
93          STATE state = STATE.DEFAULT;
94  
95          final int length = charArray.length;
96          int insideBrackets = 0;
97          for (int i = 0; i < length; i++) {
98              final char ch = charArray[i];
99              switch (ch) {
100                 case '@':
101                     if (state == STATE.DEFAULT) {
102                         state = STATE.ATTRIB;
103                     }
104                     break;
105 
106                 case '"':
107                     if (state == STATE.DEFAULT || state == STATE.ATTRIB) {
108                         state = STATE.DOUBLE_QUOTED;
109                     }
110                     else if (state == STATE.DOUBLE_QUOTED) {
111                         state = STATE.DEFAULT;
112                     }
113                     break;
114 
115                 case '\'':
116                     if (state == STATE.DEFAULT || state == STATE.ATTRIB) {
117                         state = STATE.SINGLE_QUOTED;
118                     }
119                     else if (state == STATE.SINGLE_QUOTED) {
120                         state = STATE.DEFAULT;
121                     }
122                     break;
123 
124                 case '[':
125                 case '(':
126                     if (state == STATE.ATTRIB) {
127                         state = STATE.DEFAULT;
128                     }
129                     insideBrackets++;
130                     break;
131 
132                 case ']':
133                 case ')':
134                     if (state == STATE.ATTRIB) {
135                         state = STATE.DEFAULT;
136                     }
137                     insideBrackets--;
138                     break;
139 
140                 default:
141                     if (insideBrackets == 0
142                             && state != STATE.SINGLE_QUOTED
143                             && state != STATE.DOUBLE_QUOTED) {
144                         charArray[i] = Character.toLowerCase(ch);
145                     }
146                     else if (state == STATE.ATTRIB) {
147                         charArray[i] = Character.toLowerCase(ch);
148                     }
149 
150                     if (state == STATE.ATTRIB) {
151                         final boolean isValidAttribChar =
152                                 ('a' <= ch && ch <= 'z')
153                                 || ('A' <= ch && ch <= 'Z')
154                                 || ('0' <= ch && ch <= '9')
155                                 || ('\u00C0' <= ch && ch <= '\u00D6')
156                                 || ('\u00D8' <= ch && ch <= '\u00F6')
157                                 || ('\u00F8' <= ch && ch <= '\u02FF')
158                                 || ('\u0370' <= ch && ch <= '\u037D')
159                                 || ('\u037F' <= ch && ch <= '\u1FFF')
160                                 || ('\u200C' <= ch && ch <= '\u200D')
161                                 || ('\u2C00' <= ch && ch <= '\u2FEF')
162                                 || ('\u3001' <= ch && ch <= '\uD7FF')
163                                 || ('\uF900' <= ch && ch <= '\uFDCF')
164                                 || ('\uFDF0' <= ch && ch <= '\uFFFD')
165                                 // [#x10000-#xEFFFF]
166                                 || ('\u00B7' == ch)
167                                 || ('\u0300' <= ch && ch <= '\u036F')
168                                 || ('\u203F' <= ch && ch <= '\u2040')
169                                 || ('_' == ch)
170                                 || ('-' == ch)
171                                 || ('.' == ch);
172 
173                         if (!isValidAttribChar) {
174                             state = STATE.DEFAULT;
175                         }
176                     }
177             }
178         }
179         return new String(charArray);
180     }
181 
182     /**
183      * Given an expression and a context, evaluate the XPath and return the result.
184      *
185      * @param xpathContext the execution context
186      * @param contextNode the node that "." expresses
187      * @param namespaceContext the context in which namespaces in the XPath are supposed to be expanded
188      * @return the result of the XPath or null if callbacks are used
189      * @throws TransformerException if the error condition is severe enough to halt processing
190      */
191     @SuppressWarnings("PMD.PreserveStackTrace")
192     XObject execute(final XPathContext xpathContext, final int contextNode,
193         final PrefixResolver namespaceContext) throws TransformerException {
194         xpathContext.pushNamespaceContext(namespaceContext);
195 
196         xpathContext.pushCurrentNodeAndExpression(contextNode);
197 
198         XObject xobj = null;
199 
200         try {
201             xobj = mainExp_.execute(xpathContext);
202         }
203         catch (final TransformerException ex) {
204             ex.setLocator(mainExp_);
205             final ErrorListener el = xpathContext.getErrorListener();
206             if (null != el) {
207                 el.error(ex);
208             }
209             else {
210                 throw ex;
211             }
212         }
213         catch (final Exception e) {
214             Exception unwrapped = e;
215             while (unwrapped instanceof WrappedRuntimeException) {
216                 unwrapped = ((WrappedRuntimeException) unwrapped).getException();
217             }
218             String msg = unwrapped.getMessage();
219 
220             if (msg == null || msg.isEmpty()) {
221                 msg = XPATHMessages.createXPATHMessage(XPATHErrorResources.ER_XPATH_ERROR, null);
222             }
223             final TransformerException te = new TransformerException(msg, mainExp_, unwrapped);
224             final ErrorListener el = xpathContext.getErrorListener();
225             if (null != el) {
226                 el.fatalError(te);
227             }
228             else {
229                 throw te;
230             }
231         }
232         finally {
233             xpathContext.popNamespaceContext();
234             xpathContext.popCurrentNodeAndExpression();
235         }
236 
237         return xobj;
238     }
239 }