View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.parser;
16  
17  import java.io.IOException;
18  
19  import org.htmlunit.SgmlPage;
20  import org.htmlunit.WebResponse;
21  import org.htmlunit.html.DomNode;
22  import org.htmlunit.html.ElementFactory;
23  import org.htmlunit.html.HtmlPage;
24  import org.xml.sax.SAXException;
25  
26  /**
27   * <p>Interface for the parser used to parse HTML into a HtmlUnit-specific DOM (HU-DOM) tree.</p>
28   *
29   * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
30   * @author David K. Taylor
31   * @author Chris Erskine
32   * @author Ahmed Ashour
33   * @author Marc Guillemot
34   * @author Ethan Glasser-Camp
35   * @author Sudhan Moghe
36   * @author Ronald Brill
37   * @author Frank Danek
38   * @author Carsten Steul
39   */
40  public interface HTMLParser {
41  
42      /**
43       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
44       *
45       * @param tagName an HTML element tag name
46       * @return a factory for creating HtmlElements representing the given tag
47       */
48      ElementFactory getFactory(String tagName);
49  
50      /**
51       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
52       *
53       * @return a factory for creating SvgElements representing the given tag
54       */
55      ElementFactory getSvgFactory();
56  
57      /**
58       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
59       *
60       * Returns the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory.
61       * @param page the page
62       * @param namespaceURI the namespace URI
63       * @param qualifiedName the qualified name
64       * @param insideSvg is the node inside an SVG node or not
65       * @param svgSupport true if called from javascript createElementNS
66       * @return the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory
67       */
68      ElementFactory getElementFactory(SgmlPage page, String namespaceURI,
69              String qualifiedName, boolean insideSvg, boolean svgSupport);
70  
71      /**
72       * Parses the HTML content from the given string into an object tree representation.
73       *
74       * @param parent the parent for the new nodes
75       * @param source the (X)HTML to be parsed
76       * @throws SAXException if a SAX error occurs
77       * @throws IOException if an IO error occurs
78       */
79      void parseFragment(DomNode parent, String source) throws SAXException, IOException;
80  
81      /**
82       * Parses the HTML content from the given string into an object tree representation.
83       *
84       * @param parent where the new parsed nodes will be added to
85       * @param context the context to build the fragment context stack
86       * @param source the (X)HTML to be parsed
87       * @param createdByJavascript if true the (script) tag was created by javascript
88       * @throws SAXException if a SAX error occurs
89       * @throws IOException if an IO error occurs
90       */
91      void parseFragment(DomNode parent, DomNode context, String source,
92              boolean createdByJavascript) throws SAXException, IOException;
93  
94      /**
95       * Parses the WebResponse into an object tree representation.
96       *
97       * @param webResponse the response data
98       * @param page the HtmlPage to add the nodes
99       * @param xhtml if true use the XHtml parser
100      * @param createdByJavascript if true the (script) tag was created by javascript
101      * @throws IOException if there is an IO error
102      */
103     void parse(WebResponse webResponse, HtmlPage page, boolean xhtml, boolean createdByJavascript) throws IOException;
104 }