1 /*
2 * Copyright (c) 2002-2025 Gargoyle Software Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 * https://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 package org.htmlunit.html.parser;
16
17 import java.io.IOException;
18
19 import org.htmlunit.SgmlPage;
20 import org.htmlunit.WebClient;
21 import org.htmlunit.WebResponse;
22 import org.htmlunit.html.DomNode;
23 import org.htmlunit.html.ElementFactory;
24 import org.htmlunit.html.HtmlPage;
25 import org.xml.sax.SAXException;
26
27 /**
28 * <p>Interface for the parser used to parse HTML into a HtmlUnit-specific DOM (HU-DOM) tree.</p>
29 *
30 * @author Christian Sell
31 * @author David K. Taylor
32 * @author Chris Erskine
33 * @author Ahmed Ashour
34 * @author Marc Guillemot
35 * @author Ethan Glasser-Camp
36 * @author Sudhan Moghe
37 * @author Ronald Brill
38 * @author Frank Danek
39 * @author Carsten Steul
40 */
41 public interface HTMLParser {
42
43 /**
44 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
45 *
46 * @param tagName an HTML element tag name
47 * @return a factory for creating HtmlElements representing the given tag
48 */
49 ElementFactory getFactory(String tagName);
50
51 /**
52 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
53 *
54 * @return a factory for creating SvgElements representing the given tag
55 */
56 ElementFactory getSvgFactory();
57
58 /**
59 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
60 *
61 * Returns the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory.
62 * @param page the page
63 * @param namespaceURI the namespace URI
64 * @param qualifiedName the qualified name
65 * @param insideSvg is the node inside an SVG node or not
66 * @param svgSupport true if called from javascript createElementNS
67 * @return the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory
68 */
69 ElementFactory getElementFactory(SgmlPage page, String namespaceURI,
70 String qualifiedName, boolean insideSvg, boolean svgSupport);
71
72 /**
73 * Parses the HTML content from the given string into an object tree representation.
74 *
75 * @param webClient the {@link WebClient}
76 * @param parent where the new parsed nodes will be added to
77 * @param context the context to build the fragment context stack
78 * @param source the (X)HTML to be parsed
79 * @param createdByJavascript if true the (script) tag was created by javascript
80 * @throws SAXException if a SAX error occurs
81 * @throws IOException if an IO error occurs
82 */
83 void parseFragment(WebClient webClient, DomNode parent, DomNode context, String source,
84 boolean createdByJavascript) throws SAXException, IOException;
85
86 /**
87 * Parses the WebResponse into an object tree representation.
88 *
89 * @param webClient the {@link WebClient}
90 * @param webResponse the response data
91 * @param page the HtmlPage to add the nodes
92 * @param xhtml if true use the XHtml parser
93 * @param createdByJavascript if true the (script) tag was created by javascript
94 * @throws IOException if there is an IO error
95 */
96 void parse(WebClient webClient, WebResponse webResponse, HtmlPage page,
97 boolean xhtml, boolean createdByJavascript) throws IOException;
98 }