View Javadoc
1   /*
2    * Copyright (c) 2002-2026 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import java.io.IOException;
18  import java.net.MalformedURLException;
19  import java.net.URL;
20  import java.nio.charset.Charset;
21  import java.util.Map;
22  import java.util.Objects;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.htmlunit.FailingHttpStatusCodeException;
27  import org.htmlunit.FrameContentHandler;
28  import org.htmlunit.Page;
29  import org.htmlunit.SgmlPage;
30  import org.htmlunit.WebClient;
31  import org.htmlunit.WebClientOptions;
32  import org.htmlunit.WebRequest;
33  import org.htmlunit.WebWindow;
34  import org.htmlunit.javascript.AbstractJavaScriptEngine;
35  import org.htmlunit.javascript.PostponedAction;
36  import org.htmlunit.protocol.javascript.JavaScriptURLConnection;
37  import org.htmlunit.util.UrlUtils;
38  import org.w3c.dom.Attr;
39  
40  /**
41   * Base class for frame and iframe.
42   *
43   * @author Mike Bowler
44   * @author David K. Taylor
45   * @author Christian Sell
46   * @author Marc Guillemot
47   * @author David D. Kilzer
48   * @author Stefan Anzinger
49   * @author Ahmed Ashour
50   * @author Dmitri Zoubkov
51   * @author Daniel Gredler
52   * @author Ronald Brill
53   * @author Frank Danek
54   * @author Lai Quang Duong
55   */
56  public abstract class BaseFrameElement extends HtmlElement {
57  
58      private static final Log LOG = LogFactory.getLog(BaseFrameElement.class);
59      private FrameWindow enclosedWindow_;
60      private boolean contentLoaded_;
61      private boolean loadSrcWhenAddedToPage_;
62  
63      /**
64       * Creates an instance of BaseFrame.
65       *
66       * @param qualifiedName the qualified name of the element type to instantiate
67       * @param page the HtmlPage that contains this element
68       * @param attributes the initial attributes
69       */
70      protected BaseFrameElement(final String qualifiedName, final SgmlPage page,
71              final Map<String, DomAttr> attributes) {
72          super(qualifiedName, page, attributes);
73  
74          init();
75  
76          if (null != page && page.isHtmlPage() && ((HtmlPage) page).isParsingHtmlSnippet()) {
77              // if created by the HTMLParser the src attribute is not set via setAttribute() or some other method but is
78              // part of the given attributes already.
79              final String src = getSrcAttribute();
80  
81              // src-less IFrame or src='about:blank'
82              // these are loaded sync
83              if (ATTRIBUTE_NOT_DEFINED != src && !UrlUtils.ABOUT_BLANK.equals(src.trim())) {
84                  loadSrcWhenAddedToPage_ = true;
85              }
86          }
87      }
88  
89      private void init() {
90          FrameWindow enclosedWindow = null;
91          try {
92              final HtmlPage htmlPage = getHtmlPageOrNull();
93              if (null != htmlPage) { // if loaded as part of XHR.responseXML, don't load content
94                  enclosedWindow = new FrameWindow(this);
95                  // put about:blank in the window to allow JS to run on this frame before the
96                  // real content is loaded
97                  final WebClient webClient = htmlPage.getWebClient();
98                  final HtmlPage temporaryPage = webClient.getPage(enclosedWindow, WebRequest.newAboutBlankRequest());
99                  temporaryPage.setReadyState(READY_STATE_LOADING);
100             }
101         }
102         catch (final FailingHttpStatusCodeException | IOException ignored) {
103             // should never occur
104         }
105         enclosedWindow_ = enclosedWindow;
106     }
107 
108     /**
109      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
110      *
111      * Called after the node for the {@code frame} or {@code iframe} has been added to the containing page.
112      * The node needs to be added first to allow JavaScript in the frame to see the frame in the parent.
113      * @throws FailingHttpStatusCodeException if the server returns a failing status code AND the property
114      *      {@link org.htmlunit.WebClientOptions#setThrowExceptionOnFailingStatusCode(boolean)} is
115      *      set to true
116      */
117 
118     public void loadInnerPage() throws FailingHttpStatusCodeException {
119         String source = getSrcAttribute();
120         if (source.isEmpty()) {
121             source = UrlUtils.ABOUT_BLANK;
122         }
123 
124         loadInnerPageIfPossible(source);
125 
126         final Page enclosedPage = getEnclosedPage();
127         if (enclosedPage != null && enclosedPage.isHtmlPage()) {
128             final HtmlPage htmlPage = (HtmlPage) enclosedPage;
129 
130             final AbstractJavaScriptEngine<?> jsEngine = htmlPage.getWebClient().getJavaScriptEngine();
131             if (jsEngine != null && jsEngine.isScriptRunning()) {
132                 final PostponedAction action = new PostponedAction(getPage(), "BaseFrame.loadInnerPage") {
133                     @Override
134                     public void execute() {
135                         htmlPage.setReadyState(READY_STATE_COMPLETE);
136                     }
137                 };
138                 jsEngine.addPostponedAction(action);
139             }
140             else {
141                 htmlPage.setReadyState(READY_STATE_COMPLETE);
142             }
143         }
144     }
145 
146     /**
147      * Indicates if the content specified by the {@code src} attribute has been loaded or not.
148      * The initial state of a frame contains an "about:blank" that is not loaded like
149      * something specified in {@code src} attribute.
150      * @return {@code false} if the frame is still in its initial state.
151      */
152     boolean isContentLoaded() {
153         return contentLoaded_;
154     }
155 
156     /**
157      * Changes the state of the {@code contentLoaded_} attribute to true.
158      * This is needed, if the content is set from javascript to avoid
159      * later overwriting from method org.htmlunit.html.HtmlPage.loadFrames().
160      */
161     void setContentLoaded() {
162         contentLoaded_ = true;
163     }
164 
165     /**
166      * @throws FailingHttpStatusCodeException if the server returns a failing status code AND the property
167      *      {@link WebClientOptions#setThrowExceptionOnFailingStatusCode(boolean)} is set to true
168      */
169     private void loadInnerPageIfPossible(final String src) throws FailingHttpStatusCodeException {
170         setContentLoaded();
171 
172         String source = src;
173         final SgmlPage page = getPage();
174         final WebClient webClient = page.getWebClient();
175         final FrameContentHandler handler = webClient.getFrameContentHandler();
176         if (null != handler && !handler.loadFrameDocument(this)) {
177             source = UrlUtils.ABOUT_BLANK;
178         }
179 
180         if (!source.isEmpty()) {
181             final URL url;
182             try {
183                 url = ((HtmlPage) page).getFullyQualifiedUrl(source);
184             }
185             catch (final MalformedURLException e) {
186                 notifyIncorrectness("Invalid src attribute of " + getTagName() + ": url=[" + source + "]. Ignored.");
187                 return;
188             }
189 
190             final URL pageUrl = page.getUrl();
191 
192             // accessing to local resource is forbidden for security reason
193             if (!"file".equals(pageUrl.getProtocol()) && "file".equals(url.getProtocol())) {
194                 notifyIncorrectness("Not allowed to load local resource: " + source);
195                 return;
196             }
197 
198             final Charset pageCharset = page.getCharset();
199             final WebRequest request = new WebRequest(url, pageCharset, pageUrl);
200 
201             if (isAlreadyLoadedByAncestor(url, request.getCharset())) {
202                 notifyIncorrectness("Recursive src attribute of " + getTagName() + ": url=[" + source + "]. Ignored.");
203                 return;
204             }
205 
206             // Use parent document's charset as container charset if same origin
207             // https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
208             if (Objects.equals(pageUrl.getProtocol(), url.getProtocol())
209                     && Objects.equals(pageUrl.getAuthority(), url.getAuthority())) {
210                 request.setDefaultResponseContentCharset(pageCharset);
211             }
212 
213             try {
214                 webClient.getPage(enclosedWindow_, request);
215             }
216             catch (final IOException e) {
217                 if (LOG.isErrorEnabled()) {
218                     LOG.error("IOException when getting content for " + getTagName() + ": url=[" + url + "]", e);
219                 }
220             }
221         }
222     }
223 
224     /**
225      * Test if the provided URL is the one of the parents which would cause an infinite loop.
226      * @param url the URL to test
227      * @param charset the request charset
228      * @return {@code false} if no parent has already this URL
229      */
230     private boolean isAlreadyLoadedByAncestor(final URL url, final Charset charset) {
231         WebWindow window = getPage().getEnclosingWindow();
232         int nesting = 0;
233         while (window instanceof FrameWindow) {
234             nesting++;
235             if (nesting > 9) {
236                 return true;
237             }
238 
239             final URL encUrl = UrlUtils.encodeUrl(url, charset);
240             if (UrlUtils.sameFile(encUrl, window.getEnclosedPage().getUrl())) {
241                 return true;
242             }
243 
244             if (window == window.getParentWindow()) {
245                 // TODO: should getParentWindow() return null on top windows?
246                 window = null;
247             }
248             else {
249                 window = window.getParentWindow();
250             }
251         }
252         return false;
253     }
254 
255     /**
256      * Returns the value of the attribute {@code longdesc}. Refer to the
257      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
258      * documentation for details on the use of this attribute.
259      *
260      * @return the value of the attribute {@code longdesc} or an empty string if that attribute isn't defined
261      */
262     public final String getLongDescAttribute() {
263         return getAttributeDirect("longdesc");
264     }
265 
266     /**
267      * Returns the value of the attribute {@code name}. Refer to the
268      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
269      * documentation for details on the use of this attribute.
270      *
271      * @return the value of the attribute {@code name} or an empty string if that attribute isn't defined
272      */
273     public final String getNameAttribute() {
274         return getAttributeDirect(NAME_ATTRIBUTE);
275     }
276 
277     /**
278      * Sets the value of the {@code name} attribute.
279      *
280      * @param name the new window name
281      */
282     public final void setNameAttribute(final String name) {
283         setAttribute(NAME_ATTRIBUTE, name);
284     }
285 
286     /**
287      * Returns the value of the attribute {@code src}. Refer to the
288      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
289      * documentation for details on the use of this attribute.
290      *
291      * @return the value of the attribute {@code src} or an empty string if that attribute isn't defined
292      */
293     public final String getSrcAttribute() {
294         return getSrcAttributeNormalized();
295     }
296 
297     /**
298      * Returns the value of the attribute {@code frameborder}. Refer to the
299      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
300      * documentation for details on the use of this attribute.
301      *
302      * @return the value of the attribute {@code frameborder} or an empty string if that attribute isn't defined
303      */
304     public final String getFrameBorderAttribute() {
305         return getAttributeDirect("frameborder");
306     }
307 
308     /**
309      * Returns the value of the attribute {@code marginwidth}. Refer to the
310      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
311      * documentation for details on the use of this attribute.
312      *
313      * @return the value of the attribute {@code marginwidth} or an empty string if that attribute isn't defined
314      */
315     public final String getMarginWidthAttribute() {
316         return getAttributeDirect("marginwidth");
317     }
318 
319     /**
320      * Returns the value of the attribute {@code marginheight}. Refer to the
321      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
322      * documentation for details on the use of this attribute.
323      *
324      * @return the value of the attribute {@code marginheight} or an empty string if that attribute isn't defined
325      */
326     public final String getMarginHeightAttribute() {
327         return getAttributeDirect("marginheight");
328     }
329 
330     /**
331      * Returns the value of the attribute {@code noresize}. Refer to the
332      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
333      * documentation for details on the use of this attribute.
334      *
335      * @return the value of the attribute {@code noresize} or an empty string if that attribute isn't defined
336      */
337     public final String getNoResizeAttribute() {
338         return getAttributeDirect("noresize");
339     }
340 
341     /**
342      * Returns the value of the attribute {@code scrolling}. Refer to the
343      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
344      * documentation for details on the use of this attribute.
345      *
346      * @return the value of the attribute {@code scrolling} or an empty string if that attribute isn't defined
347      */
348     public final String getScrollingAttribute() {
349         return getAttributeDirect("scrolling");
350     }
351 
352     /**
353      * Returns the value of the attribute {@code onload}. This attribute is not
354      * actually supported by the HTML specification however it is supported
355      * by the popular browsers.
356      *
357      * @return the value of the attribute {@code onload} or an empty string if that attribute isn't defined
358      */
359     public final String getOnLoadAttribute() {
360         return getAttributeDirect("onload");
361     }
362 
363     /**
364      * Returns the currently loaded page in the enclosed window.
365      * This is a facility method for <code>getEnclosedWindow().getEnclosedPage()</code>.
366      * @see WebWindow#getEnclosedPage()
367      * @return the currently loaded page in the enclosed window, or {@code null} if no page has been loaded
368      */
369     public Page getEnclosedPage() {
370         return getEnclosedWindow().getEnclosedPage();
371     }
372 
373     /**
374      * Gets the window enclosed in this frame.
375      * @return the window enclosed in this frame
376      */
377     public FrameWindow getEnclosedWindow() {
378         return enclosedWindow_;
379     }
380 
381     /**
382      * Sets the value of the {@code src} attribute. Also loads the frame with the specified URL, if possible.
383      * @param attribute the new value of the {@code src} attribute
384      */
385     public final void setSrcAttribute(final String attribute) {
386         setAttribute(SRC_ATTRIBUTE, attribute);
387     }
388 
389     /**
390      * {@inheritDoc}
391      */
392     @Override
393     protected void setAttributeNS(final String namespaceURI, final String qualifiedName, final String attributeValue,
394             final boolean notifyAttributeChangeListeners, final boolean notifyMutationObserver) {
395         final String qualifiedNameLC = org.htmlunit.util.StringUtils.toRootLowerCase(qualifiedName);
396 
397         if (null != attributeValue && SRC_ATTRIBUTE.equals(qualifiedNameLC)) {
398             final String attributeValueTrimmed = attributeValue.trim();
399 
400             super.setAttributeNS(namespaceURI, qualifiedNameLC, attributeValueTrimmed, notifyAttributeChangeListeners,
401                     notifyMutationObserver);
402 
403             // do not use equals() here
404             // see HTMLIFrameElement2Test.documentCreateElement_onLoad_srcAboutBlank()
405             if (UrlUtils.ABOUT_BLANK != attributeValueTrimmed) {
406                 if (isAttachedToPage()) {
407                     loadSrc();
408                 }
409                 else {
410                     loadSrcWhenAddedToPage_ = true;
411                 }
412             }
413 
414             return;
415         }
416 
417         super.setAttributeNS(namespaceURI, qualifiedNameLC, attributeValue, notifyAttributeChangeListeners,
418                 notifyMutationObserver);
419     }
420 
421     /**
422      * {@inheritDoc}
423      */
424     @Override
425     public Attr setAttributeNode(final Attr attribute) {
426         final String qualifiedName = attribute.getName();
427         String attributeValue = null;
428         if (SRC_ATTRIBUTE.equals(qualifiedName)) {
429             attributeValue = attribute.getValue().trim();
430         }
431 
432         final Attr result = super.setAttributeNode(attribute);
433 
434         if (SRC_ATTRIBUTE.equals(qualifiedName) && !UrlUtils.ABOUT_BLANK.equals(attributeValue)) {
435             if (isAttachedToPage()) {
436                 loadSrc();
437             }
438             else {
439                 loadSrcWhenAddedToPage_ = true;
440             }
441         }
442 
443         return result;
444     }
445 
446     private void loadSrc() {
447         loadSrcWhenAddedToPage_ = false;
448         final String src = getSrcAttribute();
449 
450         // recreate a window if the old one was closed
451         if (enclosedWindow_.isClosed()) {
452             init();
453         }
454 
455         final AbstractJavaScriptEngine<?> jsEngine = getPage().getWebClient().getJavaScriptEngine();
456         // When src is set from a script, loading is postponed until script finishes
457         // in fact this implementation is probably wrong: JavaScript URL should be
458         // first evaluated and only loading, when any, should be postponed.
459         if (jsEngine == null || !jsEngine.isScriptRunning()
460                 || src.startsWith(JavaScriptURLConnection.JAVASCRIPT_PREFIX)) {
461             loadInnerPageIfPossible(src);
462         }
463         else {
464             final Page pageInFrame = getEnclosedPage();
465             final PostponedAction action = new PostponedAction(getPage(), "BaseFrame.loadSrc") {
466                 @Override
467                 public void execute() throws Exception {
468                     if (!src.isEmpty() && getSrcAttribute().equals(src)) {
469                         loadInnerPage();
470                     }
471                 }
472 
473                 @Override
474                 public boolean isStillAlive() {
475                     // skip if page in frame has already been changed
476                     return super.isStillAlive() && pageInFrame == getEnclosedPage();
477                 }
478             };
479             jsEngine.addPostponedAction(action);
480         }
481     }
482 
483     /**
484      * Creates a new {@link WebWindow} for the new clone.
485      * {@inheritDoc}
486      */
487     @Override
488     public DomNode cloneNode(final boolean deep) {
489         final BaseFrameElement clone = (BaseFrameElement) super.cloneNode(deep);
490         clone.init();
491         return clone;
492     }
493 
494     @Override
495     protected void onAddedToPage() {
496         super.onAddedToPage();
497 
498         if (loadSrcWhenAddedToPage_) {
499             loadSrc();
500         }
501     }
502 
503     @Override
504     public void remove() {
505         super.remove();
506         loadSrcWhenAddedToPage_ = true;
507         getEnclosedWindow().close();
508     }
509 
510     @Override
511     public final void removeAttribute(final String attributeName) {
512         super.removeAttribute(attributeName);
513 
514         // TODO find a better implementation without all the code duplication
515         if (isAttachedToPage()) {
516             loadSrcWhenAddedToPage_ = false;
517             final String src = getSrcAttribute();
518 
519             final AbstractJavaScriptEngine<?> jsEngine = getPage().getWebClient().getJavaScriptEngine();
520             // When src is set from a script, loading is postponed until script finishes
521             // in fact this implementation is probably wrong: JavaScript URL should be
522             // first evaluated and only loading, when any, should be postponed.
523             if (jsEngine == null || !jsEngine.isScriptRunning()) {
524                 loadInnerPageIfPossible(src);
525             }
526             else {
527                 final Page pageInFrame = getEnclosedPage();
528                 final PostponedAction action = new PostponedAction(getPage(), "BaseFrame.removeAttribute") {
529                     @Override
530                     public void execute() throws Exception {
531                         loadInnerPage();
532                     }
533 
534                     @Override
535                     public boolean isStillAlive() {
536                         // skip if page in frame has already been changed
537                         return super.isStillAlive() && pageInFrame == getEnclosedPage();
538                     }
539                 };
540                 jsEngine.addPostponedAction(action);
541             }
542         }
543         else {
544             loadSrcWhenAddedToPage_ = true;
545         }
546     }
547 }