View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.parser;
16  
17  import java.io.IOException;
18  
19  import org.htmlunit.SgmlPage;
20  import org.htmlunit.WebClient;
21  import org.htmlunit.WebResponse;
22  import org.htmlunit.html.DomNode;
23  import org.htmlunit.html.ElementFactory;
24  import org.htmlunit.html.HtmlPage;
25  import org.xml.sax.SAXException;
26  
27  /**
28   * <p>Interface for the parser used to parse HTML into a HtmlUnit-specific DOM (HU-DOM) tree.</p>
29   *
30   * @author Christian Sell
31   * @author David K. Taylor
32   * @author Chris Erskine
33   * @author Ahmed Ashour
34   * @author Marc Guillemot
35   * @author Ethan Glasser-Camp
36   * @author Sudhan Moghe
37   * @author Ronald Brill
38   * @author Frank Danek
39   * @author Carsten Steul
40   */
41  public interface HTMLParser {
42  
43      /**
44       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
45       *
46       * @param tagName an HTML element tag name
47       * @return a factory for creating HtmlElements representing the given tag
48       */
49      ElementFactory getFactory(String tagName);
50  
51      /**
52       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
53       *
54       * @return a factory for creating SvgElements representing the given tag
55       */
56      ElementFactory getSvgFactory();
57  
58      /**
59       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
60       *
61       * Returns the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory.
62       * @param page the page
63       * @param namespaceURI the namespace URI
64       * @param qualifiedName the qualified name
65       * @param insideSvg is the node inside an SVG node or not
66       * @param svgSupport true if called from javascript createElementNS
67       * @return the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory
68       */
69      ElementFactory getElementFactory(SgmlPage page, String namespaceURI,
70              String qualifiedName, boolean insideSvg, boolean svgSupport);
71  
72      /**
73       * Parses the HTML content from the given string into an object tree representation.
74       *
75       * @param webClient the {@link WebClient}
76       * @param parent where the new parsed nodes will be added to
77       * @param context the context to build the fragment context stack
78       * @param source the (X)HTML to be parsed
79       * @param createdByJavascript if true the (script) tag was created by javascript
80       * @throws SAXException if a SAX error occurs
81       * @throws IOException if an IO error occurs
82       */
83      void parseFragment(WebClient webClient, DomNode parent, DomNode context, String source,
84              boolean createdByJavascript) throws SAXException, IOException;
85  
86      /**
87       * Parses the WebResponse into an object tree representation.
88       *
89       * @param webClient the {@link WebClient}
90       * @param webResponse the response data
91       * @param page the HtmlPage to add the nodes
92       * @param xhtml if true use the XHtml parser
93       * @param createdByJavascript if true the (script) tag was created by javascript
94       * @throws IOException if there is an IO error
95       */
96      void parse(WebClient webClient, WebResponse webResponse, HtmlPage page,
97              boolean xhtml, boolean createdByJavascript) throws IOException;
98  }