View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.serializer;
16  
17  import static org.htmlunit.css.CssStyleSheet.BLOCK;
18  
19  import java.util.List;
20  
21  import org.apache.commons.lang3.StringUtils;
22  import org.htmlunit.Page;
23  import org.htmlunit.SgmlPage;
24  import org.htmlunit.WebWindow;
25  import org.htmlunit.css.ComputedCssStyleDeclaration;
26  import org.htmlunit.css.StyleAttributes.Definition;
27  import org.htmlunit.html.DomComment;
28  import org.htmlunit.html.DomElement;
29  import org.htmlunit.html.DomNode;
30  import org.htmlunit.html.DomText;
31  import org.htmlunit.html.HtmlBody;
32  import org.htmlunit.html.HtmlBreak;
33  import org.htmlunit.html.HtmlCheckBoxInput;
34  import org.htmlunit.html.HtmlDetails;
35  import org.htmlunit.html.HtmlHiddenInput;
36  import org.htmlunit.html.HtmlInlineFrame;
37  import org.htmlunit.html.HtmlInput;
38  import org.htmlunit.html.HtmlMenu;
39  import org.htmlunit.html.HtmlNoFrames;
40  import org.htmlunit.html.HtmlNoScript;
41  import org.htmlunit.html.HtmlOption;
42  import org.htmlunit.html.HtmlOrderedList;
43  import org.htmlunit.html.HtmlPreformattedText;
44  import org.htmlunit.html.HtmlRadioButtonInput;
45  import org.htmlunit.html.HtmlResetInput;
46  import org.htmlunit.html.HtmlScript;
47  import org.htmlunit.html.HtmlSelect;
48  import org.htmlunit.html.HtmlStyle;
49  import org.htmlunit.html.HtmlSubmitInput;
50  import org.htmlunit.html.HtmlSummary;
51  import org.htmlunit.html.HtmlTable;
52  import org.htmlunit.html.HtmlTableCell;
53  import org.htmlunit.html.HtmlTableFooter;
54  import org.htmlunit.html.HtmlTableHeader;
55  import org.htmlunit.html.HtmlTableRow;
56  import org.htmlunit.html.HtmlTextArea;
57  import org.htmlunit.html.HtmlTitle;
58  import org.htmlunit.html.HtmlUnorderedList;
59  import org.htmlunit.html.TableRowGroup;
60  import org.htmlunit.html.serializer.HtmlSerializerVisibleText.HtmlSerializerTextBuilder.Mode;
61  
62  /**
63   * Special serializer to generate the output we need
64   * at least for selenium WebElement#getText().
65   * <p>This is also used from estimations by ComputedCSSStyleDeclaration.</p>
66   *
67   * @author Ronald Brill
68   * @author cd alexndr
69   */
70  public class HtmlSerializerVisibleText {
71  
72      /**
73       * Converts an HTML node to text.
74       * @param node a node
75       * @return the text representation according to the setting of this serializer
76       */
77      public String asText(final DomNode node) {
78          if (node instanceof HtmlBreak) {
79              return "";
80          }
81          final HtmlSerializerTextBuilder builder = new HtmlSerializerTextBuilder();
82          appendNode(builder, node, whiteSpaceStyle(node, Mode.WHITE_SPACE_NORMAL));
83          return builder.getText();
84      }
85  
86      /**
87       * Iterate over all Children and call appendNode() for every.
88       *
89       * @param builder the StringBuilder to add to
90       * @param node the node to process
91       * @param mode the {@link Mode} to use for processing
92       */
93      protected void appendChildren(final HtmlSerializerTextBuilder builder, final DomNode node, final Mode mode) {
94          for (final DomNode child : node.getChildren()) {
95              appendNode(builder, child, updateWhiteSpaceStyle(node, mode));
96          }
97      }
98  
99      /**
100      * The core distribution method call the different appendXXX
101      * methods depending on the type of the given node.
102      *
103      * @param builder the StringBuilder to add to
104      * @param node the node to process
105      * @param mode the {@link Mode} to use for processing
106      */
107     protected void appendNode(final HtmlSerializerTextBuilder builder, final DomNode node, final Mode mode) {
108         if (node instanceof DomText) {
109             appendText(builder, (DomText) node, mode);
110         }
111         else if (node instanceof DomComment) {
112             appendComment(builder, (DomComment) node, mode);
113         }
114         else if (node instanceof HtmlBreak) {
115             appendBreak(builder, (HtmlBreak) node, mode);
116         }
117         else if (node instanceof HtmlHiddenInput) {
118             appendHiddenInput(builder, (HtmlHiddenInput) node, mode);
119         }
120         else if (node instanceof HtmlScript) {
121             appendScript(builder, (HtmlScript) node, mode);
122         }
123         else if (node instanceof HtmlStyle) {
124             appendStyle(builder, (HtmlStyle) node, mode);
125         }
126         else if (node instanceof HtmlNoFrames) {
127             appendNoFrames(builder, (HtmlNoFrames) node, mode);
128         }
129         else if (node instanceof HtmlTextArea) {
130             appendTextArea(builder, (HtmlTextArea) node, mode);
131         }
132         else if (node instanceof HtmlTitle) {
133             appendTitle(builder, (HtmlTitle) node, mode);
134         }
135         else if (node instanceof HtmlTableRow) {
136             appendTableRow(builder, (HtmlTableRow) node, mode);
137         }
138         else if (node instanceof HtmlSelect) {
139             appendSelect(builder, (HtmlSelect) node, mode);
140         }
141         else if (node instanceof HtmlOption) {
142             appendOption(builder, (HtmlOption) node, mode);
143         }
144         else if (node instanceof HtmlSubmitInput) {
145             appendSubmitInput(builder, (HtmlSubmitInput) node, mode);
146         }
147         else if (node instanceof HtmlResetInput) {
148             appendResetInput(builder, (HtmlResetInput) node, mode);
149         }
150         else if (node instanceof HtmlCheckBoxInput) {
151             appendCheckBoxInput(builder, (HtmlCheckBoxInput) node, mode);
152         }
153         else if (node instanceof HtmlRadioButtonInput) {
154             appendRadioButtonInput(builder, (HtmlRadioButtonInput) node, mode);
155         }
156         else if (node instanceof HtmlInput) {
157             // nothing
158         }
159         else if (node instanceof HtmlTable) {
160             appendTable(builder, (HtmlTable) node, mode);
161         }
162         else if (node instanceof HtmlOrderedList) {
163             appendOrderedList(builder, (HtmlOrderedList) node, mode);
164         }
165         else if (node instanceof HtmlUnorderedList) {
166             appendUnorderedList(builder, (HtmlUnorderedList) node, mode);
167         }
168         else if (node instanceof HtmlPreformattedText) {
169             appendPreformattedText(builder, (HtmlPreformattedText) node, mode);
170         }
171         else if (node instanceof HtmlInlineFrame) {
172             appendInlineFrame(builder, (HtmlInlineFrame) node, mode);
173         }
174         else if (node instanceof HtmlMenu) {
175             appendMenu(builder, (HtmlMenu) node, mode);
176         }
177         else if (node instanceof HtmlDetails) {
178             appendDetails(builder, (HtmlDetails) node, mode);
179         }
180         else if (node instanceof HtmlNoScript && node.getPage().getWebClient().isJavaScriptEnabled()) {
181             appendNoScript(builder, (HtmlNoScript) node, mode);
182         }
183         else {
184             appendDomNode(builder, node, mode);
185         }
186     }
187 
188     /**
189      * Process {@link DomNode}.
190      *
191      * @param builder the StringBuilder to add to
192      * @param domNode the target to process
193      * @param mode the {@link Mode} to use for processing
194      */
195     protected void appendDomNode(final HtmlSerializerTextBuilder builder,
196             final DomNode domNode, final Mode mode) {
197         final boolean block;
198         if (domNode instanceof HtmlBody) {
199             block = false;
200         }
201         else if (domNode instanceof DomElement) {
202             final WebWindow window = domNode.getPage().getEnclosingWindow();
203             final String display = window.getComputedStyle((DomElement) domNode, null).getDisplay();
204             block = BLOCK.equals(display);
205         }
206         else {
207             block = false;
208         }
209 
210         if (block) {
211             builder.appendBlockSeparator();
212         }
213         appendChildren(builder, domNode, mode);
214         if (block) {
215             builder.appendBlockSeparator();
216         }
217     }
218 
219     /**
220      * Process {@link HtmlHiddenInput}.
221      *
222      * @param builder the StringBuilder to add to
223      * @param htmlHiddenInput the target to process
224      * @param mode the {@link Mode} to use for processing
225      */
226     protected void appendHiddenInput(final HtmlSerializerTextBuilder builder,
227             final HtmlHiddenInput htmlHiddenInput, final Mode mode) {
228         // nothing to do
229     }
230 
231     /**
232      * Process {@link HtmlScript}.
233      *
234      * @param builder the StringBuilder to add to
235      * @param htmlScript the target to process
236      * @param mode the {@link Mode} to use for processing
237      */
238     protected void appendScript(final HtmlSerializerTextBuilder builder,
239             final HtmlScript htmlScript, final Mode mode) {
240         // nothing to do
241     }
242 
243     /**
244      * Process {@link HtmlStyle}.
245      *
246      * @param builder the StringBuilder to add to
247      * @param htmlStyle the target to process
248      * @param mode the {@link Mode} to use for processing
249      */
250     protected void appendStyle(final HtmlSerializerTextBuilder builder,
251             final HtmlStyle htmlStyle, final Mode mode) {
252         // nothing to do
253     }
254 
255     /**
256      * Process {@link HtmlNoScript}.
257      *
258      * @param builder the StringBuilder to add to
259      * @param htmlNoScript the target to process
260      * @param mode the {@link Mode} to use for processing
261      */
262     protected void appendNoScript(final HtmlSerializerTextBuilder builder,
263             final HtmlNoScript htmlNoScript, final Mode mode) {
264         // nothing to do
265     }
266 
267     /**
268      * Process {@link HtmlNoFrames}.
269      *
270      * @param builder the StringBuilder to add to
271      * @param htmlNoFrames the target to process
272      * @param mode the {@link Mode} to use for processing
273      */
274     protected void appendNoFrames(final HtmlSerializerTextBuilder builder,
275             final HtmlNoFrames htmlNoFrames, final Mode mode) {
276         // nothing to do
277     }
278 
279     /**
280      * Process {@link HtmlSubmitInput}.
281      *
282      * @param builder the StringBuilder to add to
283      * @param htmlSubmitInput the target to process
284      * @param mode the {@link Mode} to use for processing
285      */
286     protected void appendSubmitInput(final HtmlSerializerTextBuilder builder,
287             final HtmlSubmitInput htmlSubmitInput, final Mode mode) {
288         // nothing to do
289     }
290 
291     /**
292      * Process {@link HtmlInput}.
293      *
294      * @param builder the StringBuilder to add to
295      * @param htmlInput the target to process
296      * @param mode the {@link Mode} to use for processing
297      */
298     protected void appendInput(final HtmlSerializerTextBuilder builder,
299             final HtmlInput htmlInput, final Mode mode) {
300         builder.append(htmlInput.getValueAttribute(), mode);
301     }
302 
303     /**
304      * Process {@link HtmlResetInput}.
305      *
306      * @param builder the StringBuilder to add to
307      * @param htmlResetInput the target to process
308      * @param mode the {@link Mode} to use for processing
309      */
310     protected void appendResetInput(final HtmlSerializerTextBuilder builder,
311             final HtmlResetInput htmlResetInput, final Mode mode) {
312         // nothing to do
313     }
314 
315     /**
316      * Process {@link HtmlMenu}.
317      * @param builder the StringBuilder to add to
318      * @param htmlMenu the target to process
319      * @param mode the {@link Mode} to use for processing
320      */
321     protected void appendMenu(final HtmlSerializerTextBuilder builder,
322                     final HtmlMenu htmlMenu, final Mode mode) {
323         builder.appendBlockSeparator();
324         boolean first = true;
325         for (final DomNode item : htmlMenu.getChildren()) {
326             if (!first) {
327                 builder.appendBlockSeparator();
328             }
329             first = false;
330             appendNode(builder, item, mode);
331         }
332         builder.appendBlockSeparator();
333     }
334 
335     /**
336      * Process {@link HtmlDetails}.
337      * @param builder the StringBuilder to add to
338      * @param htmlDetails the target to process
339      * @param mode the {@link Mode} to use for processing
340      */
341     protected void appendDetails(final HtmlSerializerTextBuilder builder,
342                     final HtmlDetails htmlDetails, final Mode mode) {
343         if (htmlDetails.isOpen()) {
344             appendChildren(builder, htmlDetails, mode);
345             return;
346         }
347 
348         for (final DomNode child : htmlDetails.getChildren()) {
349             if (child instanceof HtmlSummary) {
350                 appendNode(builder, child, mode);
351             }
352         }
353     }
354 
355     /**
356      * Process {@link HtmlTitle}.
357      * @param builder the StringBuilder to add to
358      * @param htmlTitle the target to process
359      * @param mode the {@link Mode} to use for processing
360      */
361     protected void appendTitle(final HtmlSerializerTextBuilder builder,
362             final HtmlTitle htmlTitle, final Mode mode) {
363         // nothing to do
364     }
365 
366     /**
367      * Process {@link HtmlTableRow}.
368      *
369      * @param builder the StringBuilder to add to
370      * @param htmlTableRow the target to process
371      * @param mode the {@link Mode} to use for processing
372      */
373     protected void appendTableRow(final HtmlSerializerTextBuilder builder,
374             final HtmlTableRow htmlTableRow, final Mode mode) {
375         boolean first = true;
376         for (final HtmlTableCell cell : htmlTableRow.getCells()) {
377             if (!first) {
378                 builder.appendBlank();
379             }
380             else {
381                 first = false;
382             }
383             appendChildren(builder, cell, mode); // trim?
384         }
385     }
386 
387     /**
388      * Check domNode visibility.
389      * @param domNode the node to check
390      * @return true or false
391      */
392     protected boolean isDisplayed(final DomNode domNode) {
393         return domNode.isDisplayed();
394     }
395 
396     /**
397      * Process {@link HtmlTextArea}.
398      *
399      * @param builder the StringBuilder to add to
400      * @param htmlTextArea the target to process
401      * @param mode the {@link Mode} to use for processing
402      */
403     protected void appendTextArea(final HtmlSerializerTextBuilder builder,
404             final HtmlTextArea htmlTextArea, final Mode mode) {
405         if (isDisplayed(htmlTextArea)) {
406             builder.append(htmlTextArea.getDefaultValue(), whiteSpaceStyle(htmlTextArea, Mode.PRE));
407             builder.trimRight(Mode.PRE);
408         }
409     }
410 
411     /**
412      * Process {@link HtmlTable}.
413      *
414      * @param builder the StringBuilder to add to
415      * @param htmlTable the target to process
416      * @param mode the {@link Mode} to use for processing
417      */
418     protected void appendTable(final HtmlSerializerTextBuilder builder,
419             final HtmlTable htmlTable, final Mode mode) {
420         builder.appendBlockSeparator();
421         final String caption = htmlTable.getCaptionText();
422         if (caption != null) {
423             builder.append(caption, mode);
424             builder.appendBlockSeparator();
425         }
426 
427         boolean first = true;
428 
429         // first thead has to be displayed first and first tfoot has to be displayed last
430         final HtmlTableHeader tableHeader = htmlTable.getHeader();
431         if (tableHeader != null) {
432             first = appendTableRows(builder, mode, tableHeader.getRows(), true, null, null);
433         }
434         final HtmlTableFooter tableFooter = htmlTable.getFooter();
435 
436         final List<HtmlTableRow> tableRows = htmlTable.getRows();
437         first = appendTableRows(builder, mode, tableRows, first, tableHeader, tableFooter);
438 
439         if (tableFooter != null) {
440             first = appendTableRows(builder, mode, tableFooter.getRows(), first, null, null);
441         }
442         else if (tableRows.isEmpty()) {
443             final DomNode firstChild = htmlTable.getFirstChild();
444             if (firstChild != null) {
445                 appendNode(builder, firstChild, mode);
446             }
447         }
448 
449         builder.appendBlockSeparator();
450     }
451 
452     /**
453      * Process {@link HtmlTableRow}.
454      *
455      * @param builder the StringBuilder to add to
456      * @param mode the {@link Mode} to use for processing
457      * @param rows the rows
458      * @param first if true this is the first one
459      * @param skipParent1 skip row if the parent is this
460      * @param skipParent2 skip row if the parent is this
461      * @return true if this was the first one
462      */
463     protected boolean appendTableRows(final HtmlSerializerTextBuilder builder, final Mode mode,
464             final List<HtmlTableRow> rows, boolean first, final TableRowGroup skipParent1,
465             final TableRowGroup skipParent2) {
466         for (final HtmlTableRow row : rows) {
467             if (row.getParentNode() == skipParent1 || row.getParentNode() == skipParent2) {
468                 continue;
469             }
470             if (!first) {
471                 builder.appendBlockSeparator();
472             }
473             first = false;
474             appendTableRow(builder, row, mode);
475         }
476         return first;
477     }
478 
479     /**
480      * Process {@link HtmlSelect}.
481      *
482      * @param builder the StringBuilder to add to
483      * @param htmlSelect the target to process
484      * @param mode the {@link Mode} to use for processing
485      */
486     protected void appendSelect(final HtmlSerializerTextBuilder builder,
487             final HtmlSelect htmlSelect, final Mode mode) {
488         builder.appendBlockSeparator();
489         boolean leadingNlPending = false;
490         final Mode selectMode = whiteSpaceStyle(htmlSelect, mode);
491         for (final DomNode item : htmlSelect.getChildren()) {
492             if (leadingNlPending) {
493                 builder.appendBlockSeparator();
494                 leadingNlPending = false;
495             }
496 
497             builder.resetContentAdded();
498             appendNode(builder, item, whiteSpaceStyle(item, selectMode));
499             if (!leadingNlPending && builder.contentAdded_) {
500                 leadingNlPending = true;
501             }
502         }
503         builder.appendBlockSeparator();
504     }
505 
506     /**
507      * Process {@link HtmlSelect}.
508      *
509      * @param builder the StringBuilder to add to
510      * @param htmlOption the target to process
511      * @param mode the {@link Mode} to use for processing
512      */
513     protected void appendOption(final HtmlSerializerTextBuilder builder,
514             final HtmlOption htmlOption, final Mode mode) {
515         appendChildren(builder, htmlOption, mode);
516     }
517 
518     /**
519      * Process {@link HtmlOrderedList}.
520      *
521      * @param builder the StringBuilder to add to
522      * @param htmlOrderedList the OL element
523      * @param mode the {@link Mode} to use for processing
524      */
525     protected void appendOrderedList(final HtmlSerializerTextBuilder builder,
526             final HtmlOrderedList htmlOrderedList, final Mode mode) {
527         builder.appendBlockSeparator();
528         boolean leadingNlPending = false;
529         final Mode olMode = whiteSpaceStyle(htmlOrderedList, mode);
530         for (final DomNode item : htmlOrderedList.getChildren()) {
531             if (leadingNlPending) {
532                 builder.appendBlockSeparator();
533                 leadingNlPending = false;
534             }
535 
536             builder.resetContentAdded();
537             appendNode(builder, item, whiteSpaceStyle(item, olMode));
538             if (!leadingNlPending && builder.contentAdded_) {
539                 leadingNlPending = true;
540             }
541         }
542         builder.appendBlockSeparator();
543     }
544 
545     /**
546      * Process {@link HtmlUnorderedList}.
547      * @param builder the StringBuilder to add to
548      * @param htmlUnorderedList the target to process
549      * @param mode the {@link Mode} to use for processing
550      */
551     protected void appendUnorderedList(final HtmlSerializerTextBuilder builder,
552                     final HtmlUnorderedList htmlUnorderedList, final Mode mode) {
553         builder.appendBlockSeparator();
554         boolean leadingNlPending = false;
555         final Mode ulMode = whiteSpaceStyle(htmlUnorderedList, mode);
556         for (final DomNode item : htmlUnorderedList.getChildren()) {
557             if (leadingNlPending) {
558                 builder.appendBlockSeparator();
559                 leadingNlPending = false;
560             }
561 
562             builder.resetContentAdded();
563             appendNode(builder, item, whiteSpaceStyle(item, ulMode));
564             if (!leadingNlPending && builder.contentAdded_) {
565                 leadingNlPending = true;
566             }
567         }
568         builder.appendBlockSeparator();
569     }
570 
571     /**
572      * Process {@link HtmlPreformattedText}.
573      *
574      * @param builder the StringBuilder to add to
575      * @param htmlPreformattedText the target to process
576      * @param mode the {@link Mode} to use for processing
577      */
578     protected void appendPreformattedText(final HtmlSerializerTextBuilder builder,
579             final HtmlPreformattedText htmlPreformattedText, final Mode mode) {
580         if (isDisplayed(htmlPreformattedText)) {
581             builder.appendBlockSeparator();
582             appendChildren(builder, htmlPreformattedText, whiteSpaceStyle(htmlPreformattedText, Mode.PRE));
583             builder.appendBlockSeparator();
584         }
585     }
586 
587     /**
588      * Process {@link HtmlInlineFrame}.
589      *
590      * @param builder the StringBuilder to add to
591      * @param htmlInlineFrame the target to process
592      * @param mode the {@link Mode} to use for processing
593      */
594     protected void appendInlineFrame(final HtmlSerializerTextBuilder builder,
595             final HtmlInlineFrame htmlInlineFrame, final Mode mode) {
596         if (isDisplayed(htmlInlineFrame)) {
597             builder.appendBlockSeparator();
598             final Page page = htmlInlineFrame.getEnclosedPage();
599             if (page instanceof SgmlPage) {
600                 builder.append(((SgmlPage) page).asNormalizedText(), mode);
601             }
602             builder.appendBlockSeparator();
603         }
604     }
605 
606     /**
607      * Process {@link DomText}.
608      *
609      * @param builder the StringBuilder to add to
610      * @param domText the target to process
611      * @param mode the {@link Mode} to use for processing
612      */
613     protected void appendText(final HtmlSerializerTextBuilder builder, final DomText domText, final Mode mode) {
614         final DomNode parent = domText.getParentNode();
615         if (parent instanceof HtmlTitle
616                 || parent instanceof HtmlScript) {
617             builder.append(domText.getData(), Mode.WHITE_SPACE_PRE_LINE);
618         }
619 
620         if (parent == null
621                 || parent instanceof HtmlTitle
622                 || parent instanceof HtmlScript
623                 || isDisplayed(parent)) {
624             builder.append(domText.getData(), mode);
625         }
626     }
627 
628     /**
629      * Process {@link DomComment}.
630      *
631      * @param builder the StringBuilder to add to
632      * @param domComment the target to process
633      * @param mode the {@link Mode} to use for processing
634      */
635     protected void appendComment(final HtmlSerializerTextBuilder builder,
636             final DomComment domComment, final Mode mode) {
637         // nothing to do
638     }
639 
640     /**
641      * Process {@link HtmlBreak}.
642      *
643      * @param builder the StringBuilder to add to
644      * @param htmlBreak the target to process
645      * @param mode the {@link Mode} to use for processing
646      */
647     protected void appendBreak(final HtmlSerializerTextBuilder builder,
648             final HtmlBreak htmlBreak, final Mode mode) {
649         builder.appendBreak(mode);
650     }
651 
652     /**
653      * Process {@link HtmlCheckBoxInput}.
654      *
655      * @param builder the StringBuilder to add to
656      * @param htmlCheckBoxInput the target to process
657      * @param mode the {@link Mode} to use for processing
658      */
659     protected void appendCheckBoxInput(final HtmlSerializerTextBuilder builder,
660                     final HtmlCheckBoxInput htmlCheckBoxInput, final Mode mode) {
661         // nothing to do
662     }
663 
664     /**
665      * Process {@link HtmlRadioButtonInput}.
666      *
667      * @param builder the StringBuilder to add to
668      * @param htmlRadioButtonInput the target to process
669      * @param mode the {@link Mode} to use for processing
670      */
671     protected void appendRadioButtonInput(final HtmlSerializerTextBuilder builder,
672             final HtmlRadioButtonInput htmlRadioButtonInput, final Mode mode) {
673         // nothing to do
674     }
675 
676     protected Mode whiteSpaceStyle(final DomNode domNode, final Mode defaultMode) {
677         final Page page = domNode.getPage();
678         if (page != null) {
679             final WebWindow window = page.getEnclosingWindow();
680             if (window.getWebClient().getOptions().isCssEnabled()) {
681                 DomNode node = domNode;
682                 while (node != null) {
683                     if (node instanceof DomElement) {
684                         final ComputedCssStyleDeclaration style = window.getComputedStyle((DomElement) node, null);
685                         final String value = style.getStyleAttribute(Definition.WHITE_SPACE, false);
686                         if (StringUtils.isNoneEmpty(value)) {
687                             if ("normal".equalsIgnoreCase(value)) {
688                                 return Mode.WHITE_SPACE_NORMAL;
689                             }
690                             if ("nowrap".equalsIgnoreCase(value)) {
691                                 return Mode.WHITE_SPACE_NORMAL;
692                             }
693                             if ("pre".equalsIgnoreCase(value)) {
694                                 return Mode.WHITE_SPACE_PRE;
695                             }
696                             if ("pre-wrap".equalsIgnoreCase(value)) {
697                                 return Mode.WHITE_SPACE_PRE;
698                             }
699                             if ("pre-line".equalsIgnoreCase(value)) {
700                                 return Mode.WHITE_SPACE_PRE_LINE;
701                             }
702                         }
703                     }
704                     node = node.getParentNode();
705                 }
706             }
707         }
708         return defaultMode;
709     }
710 
711     protected Mode updateWhiteSpaceStyle(final DomNode domNode, final Mode defaultMode) {
712         final Page page = domNode.getPage();
713         if (page != null) {
714             final WebWindow window = page.getEnclosingWindow();
715             if (window.getWebClient().getOptions().isCssEnabled()) {
716                 if (domNode instanceof DomElement) {
717                     final ComputedCssStyleDeclaration style = window.getComputedStyle((DomElement) domNode, null);
718                     final String value = style.getStyleAttribute(Definition.WHITE_SPACE, false);
719                     if (StringUtils.isNoneEmpty(value)) {
720                         if ("normal".equalsIgnoreCase(value)) {
721                             return Mode.WHITE_SPACE_NORMAL;
722                         }
723                         if ("nowrap".equalsIgnoreCase(value)) {
724                             return Mode.WHITE_SPACE_NORMAL;
725                         }
726                         if ("pre".equalsIgnoreCase(value)) {
727                             return Mode.WHITE_SPACE_PRE;
728                         }
729                         if ("pre-wrap".equalsIgnoreCase(value)) {
730                             return Mode.WHITE_SPACE_PRE;
731                         }
732                         if ("pre-line".equalsIgnoreCase(value)) {
733                             return Mode.WHITE_SPACE_PRE_LINE;
734                         }
735                     }
736                 }
737             }
738         }
739         return defaultMode;
740     }
741 
742     /**
743      * Helper to compose the text for the serializer based on several modes.
744      */
745     protected static class HtmlSerializerTextBuilder {
746         /** Mode. */
747         protected enum Mode {
748             /**
749              * The mode for the pre tag.
750              */
751             PRE,
752 
753             /**
754              * Sequences of white space are collapsed. Newline characters
755              * in the source are handled the same as other white space.
756              * Lines are broken as necessary to fill line boxes.
757              */
758             WHITE_SPACE_NORMAL,
759 
760             /**
761              * Sequences of white space are preserved. Lines are only broken
762              * at newline characters in the source and at <br> elements.
763              */
764             WHITE_SPACE_PRE,
765 
766             /**
767              * Sequences of white space are collapsed. Lines are broken
768              * at newline characters, at <br>, and as necessary
769              * to fill line boxes.
770              */
771             WHITE_SPACE_PRE_LINE
772         }
773 
774         private enum State {
775             DEFAULT,
776             EMPTY,
777             BLANK_AT_END,
778             BLANK_AT_END_AFTER_NEWLINE,
779             NEWLINE_AT_END,
780             BREAK_AT_END,
781             BLOCK_SEPARATOR_AT_END
782         }
783 
784         private State state_;
785         private final StringBuilder builder_;
786         private int trimRightPos_;
787         private boolean contentAdded_;
788 
789         /**
790          * Ctor.
791          */
792         public HtmlSerializerTextBuilder() {
793             builder_ = new StringBuilder();
794             state_ = State.EMPTY;
795             trimRightPos_ = 0;
796         }
797 
798         /**
799          * Append the provided content.
800          * see https://drafts.csswg.org/css-text-3/#white-space
801          *
802          * @param content the content to add
803          * @param mode the {@link Mode}
804          */
805         public void append(final String content, final Mode mode) {
806             if (content == null) {
807                 return;
808             }
809             int length = content.length();
810             if (length == 0) {
811                 return;
812             }
813 
814             length--;
815             int i = -1;
816             for (char c : content.toCharArray()) {
817                 i++;
818 
819                 // handle \r
820                 if (c == '\r') {
821                     if (length != i) {
822                         continue;
823                     }
824                     c = '\n';
825                 }
826 
827                 if (c == '\n') {
828                     if (mode == Mode.WHITE_SPACE_PRE) {
829                         switch (state_) {
830                             case EMPTY:
831                             case BLOCK_SEPARATOR_AT_END:
832                                 break;
833                             default:
834                                 builder_.append('\n');
835                                 state_ = State.NEWLINE_AT_END;
836                                 trimRightPos_ = builder_.length();
837                                 break;
838                         }
839                         continue;
840                     }
841 
842                     if (mode == Mode.PRE) {
843                         builder_.append('\n');
844                         state_ = State.NEWLINE_AT_END;
845                         trimRightPos_ = builder_.length();
846 
847                         continue;
848                     }
849 
850                     if (mode == Mode.WHITE_SPACE_PRE_LINE) {
851                         switch (state_) {
852                             case EMPTY:
853                             case BLOCK_SEPARATOR_AT_END:
854                                 break;
855                             default:
856                                 builder_.append('\n');
857                                 state_ = State.NEWLINE_AT_END;
858                                 trimRightPos_ = builder_.length();
859                                 break;
860                         }
861                         continue;
862                     }
863 
864                     switch (state_) {
865                         case EMPTY:
866                         case BLANK_AT_END:
867                         case BLANK_AT_END_AFTER_NEWLINE:
868                         case BLOCK_SEPARATOR_AT_END:
869                         case NEWLINE_AT_END:
870                         case BREAK_AT_END:
871                             break;
872                         default:
873                             builder_.append(' ');
874                             state_ = State.BLANK_AT_END;
875                             break;
876                     }
877                     continue;
878                 }
879 
880                 if (c == ' ' || c == '\t' || c == '\f') {
881                     if (mode == Mode.WHITE_SPACE_PRE || mode == Mode.PRE) {
882                         appendBlank();
883                         continue;
884                     }
885 
886                     if (mode == Mode.WHITE_SPACE_PRE_LINE) {
887                         switch (state_) {
888                             case EMPTY:
889                             case BLANK_AT_END:
890                             case BLANK_AT_END_AFTER_NEWLINE:
891                             case BREAK_AT_END:
892                                 break;
893                             default:
894                                 builder_.append(' ');
895                                 state_ = State.BLANK_AT_END;
896                                 break;
897                         }
898                         continue;
899                     }
900 
901                     switch (state_) {
902                         case EMPTY:
903                         case BLANK_AT_END:
904                         case BLANK_AT_END_AFTER_NEWLINE:
905                         case BLOCK_SEPARATOR_AT_END:
906                         case NEWLINE_AT_END:
907                         case BREAK_AT_END:
908                             break;
909                         default:
910                             builder_.append(' ');
911                             state_ = State.BLANK_AT_END;
912                             break;
913                     }
914                     continue;
915                 }
916 
917                 if (c == (char) 160) {
918                     appendBlank();
919                     if (mode == Mode.WHITE_SPACE_NORMAL || mode == Mode.WHITE_SPACE_PRE_LINE) {
920                         state_ = State.DEFAULT;
921                     }
922                     continue;
923                 }
924                 builder_.append(c);
925                 state_ = State.DEFAULT;
926                 trimRightPos_ = builder_.length();
927                 contentAdded_ = true;
928             }
929         }
930 
931         /**
932          * Append a block separator.
933          */
934         public void appendBlockSeparator() {
935             switch (state_) {
936                 case EMPTY:
937                     break;
938                 case BLANK_AT_END:
939                     builder_.setLength(trimRightPos_);
940                     if (builder_.length() == 0) {
941                         state_ = State.EMPTY;
942                     }
943                     else {
944                         builder_.append('\n');
945                         state_ = State.BLOCK_SEPARATOR_AT_END;
946                     }
947                     break;
948                 case BLANK_AT_END_AFTER_NEWLINE:
949                     builder_.setLength(trimRightPos_ - 1);
950                     if (builder_.length() == 0) {
951                         state_ = State.EMPTY;
952                     }
953                     else {
954                         builder_.append('\n');
955                         state_ = State.BLOCK_SEPARATOR_AT_END;
956                     }
957                     break;
958                 case BLOCK_SEPARATOR_AT_END:
959                     break;
960                 case NEWLINE_AT_END:
961                 case BREAK_AT_END:
962                     builder_.setLength(builder_.length() - 1);
963                     trimRightPos_ = trimRightPos_ - 1;
964                     if (builder_.length() == 0) {
965                         state_ = State.EMPTY;
966                     }
967                     else {
968                         builder_.append('\n');
969                         state_ = State.BLOCK_SEPARATOR_AT_END;
970                     }
971                     break;
972                 default:
973                     builder_.append('\n');
974                     state_ = State.BLOCK_SEPARATOR_AT_END;
975                     break;
976             }
977         }
978 
979         /**
980          * Append a break.
981          *
982          * @param mode the {@link Mode}
983          */
984         public void appendBreak(final Mode mode) {
985             builder_.setLength(trimRightPos_);
986 
987             builder_.append('\n');
988             state_ = State.BREAK_AT_END;
989             trimRightPos_ = builder_.length();
990         }
991 
992         /**
993          * Append a blank.
994          */
995         public void appendBlank() {
996             builder_.append(' ');
997             state_ = State.BLANK_AT_END;
998             trimRightPos_ = builder_.length();
999         }
1000 
1001         /**
1002          * Remove all trailing whitespace from the end.
1003          *
1004          * @param mode the {@link Mode}
1005          */
1006         public void trimRight(final Mode mode) {
1007             if (mode == Mode.PRE) {
1008                 switch (state_) {
1009                     case BLOCK_SEPARATOR_AT_END:
1010                     case NEWLINE_AT_END:
1011                     case BREAK_AT_END:
1012                         if (trimRightPos_ == builder_.length()) {
1013                             trimRightPos_--;
1014                         }
1015                         break;
1016                     default:
1017                         break;
1018                 }
1019             }
1020 
1021             builder_.setLength(trimRightPos_);
1022             state_ = State.DEFAULT;
1023             if (builder_.length() == 0) {
1024                 state_ = State.EMPTY;
1025             }
1026         }
1027 
1028         /**
1029          * @return true if some content was already added
1030          */
1031         public boolean wasContentAdded() {
1032             return contentAdded_;
1033         }
1034 
1035         /**
1036          * Resets the contentAdded state to false.
1037          */
1038         public void resetContentAdded() {
1039             contentAdded_ = false;
1040         }
1041 
1042         /**
1043          * @return the constructed text.
1044          */
1045         public String getText() {
1046             return builder_.substring(0, trimRightPos_);
1047         }
1048     }
1049 }