1 /* 2 * Copyright (c) 2002-2025 Gargoyle Software Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * https://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 package org.htmlunit.html.parser; 16 17 import java.io.IOException; 18 19 import org.htmlunit.SgmlPage; 20 import org.htmlunit.WebClient; 21 import org.htmlunit.WebResponse; 22 import org.htmlunit.html.DomNode; 23 import org.htmlunit.html.ElementFactory; 24 import org.htmlunit.html.HtmlPage; 25 import org.xml.sax.SAXException; 26 27 /** 28 * <p>Interface for the parser used to parse HTML into a HtmlUnit-specific DOM (HU-DOM) tree.</p> 29 * 30 * @author <a href="mailto:cse@dynabean.de">Christian Sell</a> 31 * @author David K. Taylor 32 * @author Chris Erskine 33 * @author Ahmed Ashour 34 * @author Marc Guillemot 35 * @author Ethan Glasser-Camp 36 * @author Sudhan Moghe 37 * @author Ronald Brill 38 * @author Frank Danek 39 * @author Carsten Steul 40 */ 41 public interface HTMLParser { 42 43 /** 44 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> 45 * 46 * @param tagName an HTML element tag name 47 * @return a factory for creating HtmlElements representing the given tag 48 */ 49 ElementFactory getFactory(String tagName); 50 51 /** 52 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> 53 * 54 * @return a factory for creating SvgElements representing the given tag 55 */ 56 ElementFactory getSvgFactory(); 57 58 /** 59 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> 60 * 61 * Returns the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory. 62 * @param page the page 63 * @param namespaceURI the namespace URI 64 * @param qualifiedName the qualified name 65 * @param insideSvg is the node inside an SVG node or not 66 * @param svgSupport true if called from javascript createElementNS 67 * @return the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory 68 */ 69 ElementFactory getElementFactory(SgmlPage page, String namespaceURI, 70 String qualifiedName, boolean insideSvg, boolean svgSupport); 71 72 /** 73 * Parses the HTML content from the given string into an object tree representation. 74 * 75 * @param parent the parent for the new nodes 76 * @param source the (X)HTML to be parsed 77 * @throws SAXException if a SAX error occurs 78 * @throws IOException if an IO error occurs 79 * 80 * @deprecated as of version 4.12.0; use 81 * {@link #parseFragment(WebClient, DomNode, DomNode, String, boolean)} instead. 82 */ 83 @Deprecated 84 default void parseFragment(final DomNode parent, final String source) throws SAXException, IOException { 85 parseFragment(null, parent, parent, source, false); 86 } 87 88 /** 89 * Parses the HTML content from the given string into an object tree representation. 90 * 91 * @param webClient the {@link WebClient} 92 * @param parent where the new parsed nodes will be added to 93 * @param context the context to build the fragment context stack 94 * @param source the (X)HTML to be parsed 95 * @param createdByJavascript if true the (script) tag was created by javascript 96 * @throws SAXException if a SAX error occurs 97 * @throws IOException if an IO error occurs 98 */ 99 void parseFragment(WebClient webClient, DomNode parent, DomNode context, String source, 100 boolean createdByJavascript) throws SAXException, IOException; 101 102 /** 103 * Parses the HTML content from the given string into an object tree representation. 104 * 105 * @param parent where the new parsed nodes will be added to 106 * @param context the context to build the fragment context stack 107 * @param source the (X)HTML to be parsed 108 * @param createdByJavascript if true the (script) tag was created by javascript 109 * @throws SAXException if a SAX error occurs 110 * @throws IOException if an IO error occurs 111 * 112 * @deprecated as of version 4.12.0; use 113 * {@link #parseFragment(WebClient, DomNode, DomNode, String, boolean)} instead. 114 */ 115 @Deprecated 116 default void parseFragment(final DomNode parent, final DomNode context, final String source, 117 final boolean createdByJavascript) throws SAXException, IOException { 118 parseFragment(null, parent, context, source, createdByJavascript); 119 } 120 121 /** 122 * Parses the WebResponse into an object tree representation. 123 * 124 * @param webClient the {@link WebClient} 125 * @param webResponse the response data 126 * @param page the HtmlPage to add the nodes 127 * @param xhtml if true use the XHtml parser 128 * @param createdByJavascript if true the (script) tag was created by javascript 129 * @throws IOException if there is an IO error 130 */ 131 void parse(WebClient webClient, WebResponse webResponse, HtmlPage page, 132 boolean xhtml, boolean createdByJavascript) throws IOException; 133 134 /** 135 * Parses the WebResponse into an object tree representation. 136 * 137 * @param webResponse the response data 138 * @param page the HtmlPage to add the nodes 139 * @param xhtml if true use the XHtml parser 140 * @param createdByJavascript if true the (script) tag was created by javascript 141 * @throws IOException if there is an IO error 142 * 143 * @deprecated as of version 4.12.0; use 144 * {@link #parse(WebClient, WebResponse, HtmlPage, boolean, boolean)} instead. 145 */ 146 @Deprecated 147 default void parse(final WebResponse webResponse, final HtmlPage page, final boolean xhtml, 148 final boolean createdByJavascript) throws IOException { 149 parse(null, webResponse, page, xhtml, createdByJavascript); 150 } 151 }