1 /* 2 * Copyright (c) 2002-2025 Gargoyle Software Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * https://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 package org.htmlunit.html.parser; 16 17 import java.io.IOException; 18 19 import org.htmlunit.SgmlPage; 20 import org.htmlunit.WebClient; 21 import org.htmlunit.WebResponse; 22 import org.htmlunit.html.DomNode; 23 import org.htmlunit.html.ElementFactory; 24 import org.htmlunit.html.HtmlPage; 25 import org.xml.sax.SAXException; 26 27 /** 28 * <p>Interface for the parser used to parse HTML into a HtmlUnit-specific DOM (HU-DOM) tree.</p> 29 * 30 * @author Christian Sell 31 * @author David K. Taylor 32 * @author Chris Erskine 33 * @author Ahmed Ashour 34 * @author Marc Guillemot 35 * @author Ethan Glasser-Camp 36 * @author Sudhan Moghe 37 * @author Ronald Brill 38 * @author Frank Danek 39 * @author Carsten Steul 40 */ 41 public interface HTMLParser { 42 43 /** 44 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> 45 * 46 * @param tagName an HTML element tag name 47 * @return a factory for creating HtmlElements representing the given tag 48 */ 49 ElementFactory getFactory(String tagName); 50 51 /** 52 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> 53 * 54 * @return a factory for creating SvgElements representing the given tag 55 */ 56 ElementFactory getSvgFactory(); 57 58 /** 59 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> 60 * 61 * Returns the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory. 62 * @param page the page 63 * @param namespaceURI the namespace URI 64 * @param qualifiedName the qualified name 65 * @param insideSvg is the node inside an SVG node or not 66 * @param svgSupport true if called from javascript createElementNS 67 * @return the pre-registered element factory corresponding to the specified tag, or an UnknownElementFactory 68 */ 69 ElementFactory getElementFactory(SgmlPage page, String namespaceURI, 70 String qualifiedName, boolean insideSvg, boolean svgSupport); 71 72 /** 73 * Parses the HTML content from the given string into an object tree representation. 74 * 75 * @param webClient the {@link WebClient} 76 * @param parent where the new parsed nodes will be added to 77 * @param context the context to build the fragment context stack 78 * @param source the (X)HTML to be parsed 79 * @param createdByJavascript if true the (script) tag was created by javascript 80 * @throws SAXException if a SAX error occurs 81 * @throws IOException if an IO error occurs 82 */ 83 void parseFragment(WebClient webClient, DomNode parent, DomNode context, String source, 84 boolean createdByJavascript) throws SAXException, IOException; 85 86 /** 87 * Parses the WebResponse into an object tree representation. 88 * 89 * @param webClient the {@link WebClient} 90 * @param webResponse the response data 91 * @param page the HtmlPage to add the nodes 92 * @param xhtml if true use the XHtml parser 93 * @param createdByJavascript if true the (script) tag was created by javascript 94 * @throws IOException if there is an IO error 95 */ 96 void parse(WebClient webClient, WebResponse webResponse, HtmlPage page, 97 boolean xhtml, boolean createdByJavascript) throws IOException; 98 }