View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.parser;
16  
17  import java.io.IOException;
18  
19  import org.htmlunit.SgmlPage;
20  import org.htmlunit.WebClient;
21  import org.htmlunit.WebResponse;
22  import org.htmlunit.html.DomNode;
23  import org.htmlunit.html.ElementFactory;
24  import org.htmlunit.html.HtmlPage;
25  import org.xml.sax.SAXException;
26  
27  /**
28   * <p>Interface for the parser used to parse HTML into a HtmlUnit-specific DOM (HU-DOM) tree.</p>
29   *
30   * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
31   * @author David K. Taylor
32   * @author Chris Erskine
33   * @author Ahmed Ashour
34   * @author Marc Guillemot
35   * @author Ethan Glasser-Camp
36   * @author Sudhan Moghe
37   * @author Ronald Brill
38   * @author Frank Danek
39   * @author Carsten Steul
40   */
41  public interface HTMLParser {
42  
43      /**
44       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
45       *
46       * @param tagName an HTML element tag name
47       * @return a factory for creating HtmlElements representing the given tag
48       */
49      ElementFactory getFactory(String tagName);
50  
51      /**
52       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
53       *
54       * @return a factory for creating SvgElements representing the given tag
55       */
56      ElementFactory getSvgFactory();
57  
58      /**
59       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
60       *
61       * Returns the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory.
62       * @param page the page
63       * @param namespaceURI the namespace URI
64       * @param qualifiedName the qualified name
65       * @param insideSvg is the node inside an SVG node or not
66       * @param svgSupport true if called from javascript createElementNS
67       * @return the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory
68       */
69      ElementFactory getElementFactory(SgmlPage page, String namespaceURI,
70              String qualifiedName, boolean insideSvg, boolean svgSupport);
71  
72      /**
73       * Parses the HTML content from the given string into an object tree representation.
74       *
75       * @param parent the parent for the new nodes
76       * @param source the (X)HTML to be parsed
77       * @throws SAXException if a SAX error occurs
78       * @throws IOException if an IO error occurs
79       *
80       * @deprecated as of version 4.12.0; use
81       *     {@link #parseFragment(WebClient, DomNode, DomNode, String, boolean)} instead.
82       */
83      @Deprecated
84      default void parseFragment(final DomNode parent, final String source) throws SAXException, IOException {
85          parseFragment(null, parent, parent, source, false);
86      }
87  
88      /**
89       * Parses the HTML content from the given string into an object tree representation.
90       *
91       * @param webClient the {@link WebClient}
92       * @param parent where the new parsed nodes will be added to
93       * @param context the context to build the fragment context stack
94       * @param source the (X)HTML to be parsed
95       * @param createdByJavascript if true the (script) tag was created by javascript
96       * @throws SAXException if a SAX error occurs
97       * @throws IOException if an IO error occurs
98       */
99      void parseFragment(WebClient webClient, DomNode parent, DomNode context, String source,
100             boolean createdByJavascript) throws SAXException, IOException;
101 
102     /**
103      * Parses the HTML content from the given string into an object tree representation.
104      *
105      * @param parent where the new parsed nodes will be added to
106      * @param context the context to build the fragment context stack
107      * @param source the (X)HTML to be parsed
108      * @param createdByJavascript if true the (script) tag was created by javascript
109      * @throws SAXException if a SAX error occurs
110      * @throws IOException if an IO error occurs
111      *
112      * @deprecated as of version 4.12.0; use
113      *     {@link #parseFragment(WebClient, DomNode, DomNode, String, boolean)} instead.
114      */
115     @Deprecated
116     default void parseFragment(final DomNode parent, final DomNode context, final String source,
117             final boolean createdByJavascript) throws SAXException, IOException {
118         parseFragment(null, parent, context, source, createdByJavascript);
119     }
120 
121     /**
122      * Parses the WebResponse into an object tree representation.
123      *
124      * @param webClient the {@link WebClient}
125      * @param webResponse the response data
126      * @param page the HtmlPage to add the nodes
127      * @param xhtml if true use the XHtml parser
128      * @param createdByJavascript if true the (script) tag was created by javascript
129      * @throws IOException if there is an IO error
130      */
131     void parse(WebClient webClient, WebResponse webResponse, HtmlPage page,
132             boolean xhtml, boolean createdByJavascript) throws IOException;
133 
134     /**
135      * Parses the WebResponse into an object tree representation.
136      *
137      * @param webResponse the response data
138      * @param page the HtmlPage to add the nodes
139      * @param xhtml if true use the XHtml parser
140      * @param createdByJavascript if true the (script) tag was created by javascript
141      * @throws IOException if there is an IO error
142      *
143      * @deprecated as of version 4.12.0; use
144      *     {@link #parse(WebClient, WebResponse, HtmlPage, boolean, boolean)} instead.
145      */
146     @Deprecated
147     default void parse(final WebResponse webResponse, final HtmlPage page, final boolean xhtml,
148             final boolean createdByJavascript) throws IOException {
149         parse(null, webResponse, page, xhtml, createdByJavascript);
150     }
151 }