1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html;
16
17 import java.io.Serializable;
18
19 import org.w3c.dom.DOMException;
20 import org.w3c.dom.Node;
21 import org.w3c.dom.traversal.NodeFilter;
22
23
24
25
26
27
28
29
30
31
32
33
34
35 public class HtmlDomTreeWalker implements Serializable {
36
37 private final DomNode root_;
38 private DomNode currentNode_;
39 private final int whatToShow_;
40 private final NodeFilter filter_;
41 private final boolean expandEntityReferences_;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58 public HtmlDomTreeWalker(final DomNode root, final int whatToShow, final NodeFilter filter,
59 final boolean expandEntityReferences) throws DOMException {
60 if (root == null) {
61 throw new IllegalArgumentException("root must not be null");
62 }
63 root_ = root;
64 whatToShow_ = whatToShow;
65 filter_ = filter;
66 expandEntityReferences_ = expandEntityReferences;
67 currentNode_ = root_;
68 }
69
70
71
72
73
74 public DomNode getRoot() {
75 return root_;
76 }
77
78
79
80
81
82 public int getWhatToShow() {
83 return whatToShow_;
84 }
85
86
87
88
89
90 public NodeFilter getFilter() {
91 return filter_;
92 }
93
94
95
96
97
98 @SuppressWarnings("PMD.BooleanGetMethodName")
99 public boolean getExpandEntityReferences() {
100 return expandEntityReferences_;
101 }
102
103
104
105
106
107 public DomNode getCurrentNode() {
108 return currentNode_;
109 }
110
111
112
113
114
115
116 public void setCurrentNode(final Node currentNode) throws DOMException {
117 if (currentNode == null) {
118 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
119 "currentNode cannot be set to null");
120 }
121 currentNode_ = (DomNode) currentNode;
122 }
123
124
125
126
127
128 public DomNode nextNode() {
129 final DomNode leftChild = getEquivalentLogical(currentNode_.getFirstChild(), false);
130 if (leftChild != null) {
131 currentNode_ = leftChild;
132 return leftChild;
133 }
134 final DomNode rightSibling = getEquivalentLogical(currentNode_.getNextSibling(), false);
135 if (rightSibling != null) {
136 currentNode_ = rightSibling;
137 return rightSibling;
138 }
139
140 final DomNode uncle = getFirstUncleNode(currentNode_);
141 if (uncle != null) {
142 currentNode_ = uncle;
143 return uncle;
144 }
145
146 return null;
147 }
148
149
150
151
152
153 private DomNode getFirstUncleNode(final DomNode n) {
154 if (n == root_ || n == null) {
155 return null;
156 }
157
158 final DomNode parent = n.getParentNode();
159 if (parent == null) {
160 return null;
161 }
162
163 final DomNode uncle = getEquivalentLogical(parent.getNextSibling(), false);
164 if (uncle != null) {
165 return uncle;
166 }
167
168 return getFirstUncleNode(parent);
169 }
170
171
172
173
174
175
176
177
178
179
180
181 private DomNode getEquivalentLogical(final DomNode n, final boolean lookLeft) {
182
183 if (n == null) {
184 return null;
185 }
186 if (isNodeVisible(n)) {
187 return n;
188 }
189
190
191 if (isNodeSkipped(n)) {
192 final DomNode child;
193 if (lookLeft) {
194 child = getEquivalentLogical(n.getLastChild(), lookLeft);
195 }
196 else {
197 child = getEquivalentLogical(n.getFirstChild(), lookLeft);
198 }
199
200 if (child != null) {
201 return child;
202 }
203 }
204
205
206
207 return getSibling(n, lookLeft);
208 }
209
210
211
212
213 private boolean isNodeVisible(final Node n) {
214 if (acceptNode(n) == NodeFilter.FILTER_ACCEPT) {
215 if (filter_ == null || filter_.acceptNode(n) == NodeFilter.FILTER_ACCEPT) {
216 return expandEntityReferences_ || n.getParentNode() == null
217 || n.getParentNode().getNodeType() != Node.ENTITY_REFERENCE_NODE;
218 }
219 }
220 return false;
221 }
222
223
224
225
226
227
228
229
230
231 private short acceptNode(final Node n) {
232 final int flag = getFlagForNode(n);
233
234 if ((whatToShow_ & flag) != 0) {
235 return NodeFilter.FILTER_ACCEPT;
236 }
237
238 return NodeFilter.FILTER_SKIP;
239 }
240
241
242
243
244
245
246
247
248
249 public static int getFlagForNode(final Node node) {
250 return switch (node.getNodeType()) {
251 case Node.ELEMENT_NODE -> NodeFilter.SHOW_ELEMENT;
252 case Node.ATTRIBUTE_NODE -> NodeFilter.SHOW_ATTRIBUTE;
253 case Node.TEXT_NODE -> NodeFilter.SHOW_TEXT;
254 case Node.CDATA_SECTION_NODE -> NodeFilter.SHOW_CDATA_SECTION;
255 case Node.ENTITY_REFERENCE_NODE -> NodeFilter.SHOW_ENTITY_REFERENCE;
256 case Node.ENTITY_NODE -> NodeFilter.SHOW_ENTITY;
257 case Node.PROCESSING_INSTRUCTION_NODE -> NodeFilter.SHOW_PROCESSING_INSTRUCTION;
258 case Node.COMMENT_NODE -> NodeFilter.SHOW_COMMENT;
259 case Node.DOCUMENT_NODE -> NodeFilter.SHOW_DOCUMENT;
260 case Node.DOCUMENT_TYPE_NODE -> NodeFilter.SHOW_DOCUMENT_TYPE;
261 case Node.DOCUMENT_FRAGMENT_NODE -> NodeFilter.SHOW_DOCUMENT_FRAGMENT;
262 case Node.NOTATION_NODE -> NodeFilter.SHOW_NOTATION;
263 default -> 0;
264 };
265 }
266
267
268 private boolean isNodeSkipped(final Node n) {
269 return !isNodeVisible(n) && !isNodeRejected(n);
270 }
271
272
273 private boolean isNodeRejected(final Node n) {
274 if (acceptNode(n) == NodeFilter.FILTER_REJECT) {
275 return true;
276 }
277 if (filter_ != null && filter_.acceptNode(n) == NodeFilter.FILTER_REJECT) {
278 return true;
279 }
280 return !expandEntityReferences_ && n.getParentNode() != null
281 && n.getParentNode().getNodeType() == Node.ENTITY_REFERENCE_NODE;
282 }
283
284
285 private DomNode getSibling(final DomNode n, final boolean lookLeft) {
286 if (n == null) {
287 return null;
288 }
289
290 if (isNodeVisible(n)) {
291 return null;
292 }
293
294 final DomNode sibling;
295 if (lookLeft) {
296 sibling = n.getPreviousSibling();
297 }
298 else {
299 sibling = n.getNextSibling();
300 }
301
302 if (sibling == null) {
303
304 if (n == root_) {
305 return null;
306 }
307 return getSibling(n.getParentNode(), lookLeft);
308
309 }
310 return getEquivalentLogical(sibling, lookLeft);
311 }
312
313
314
315
316
317 public DomNode nextSibling() {
318 if (currentNode_ == root_) {
319 return null;
320 }
321
322 final DomNode newNode = getEquivalentLogical(currentNode_.getNextSibling(), false);
323
324 if (newNode != null) {
325 currentNode_ = newNode;
326 }
327
328 return newNode;
329 }
330
331
332
333
334
335 public DomNode parentNode() {
336 if (currentNode_ == root_) {
337 return null;
338 }
339
340 DomNode newNode = currentNode_;
341
342 do {
343 newNode = newNode.getParentNode();
344 }
345 while (newNode != null && !isNodeVisible(newNode) && newNode != root_);
346
347 if (newNode == null || !isNodeVisible(newNode)) {
348 return null;
349 }
350 currentNode_ = newNode;
351 return newNode;
352 }
353
354
355
356
357
358 public DomNode previousSibling() {
359 if (currentNode_ == root_) {
360 return null;
361 }
362
363 final DomNode newNode = getEquivalentLogical(currentNode_.getPreviousSibling(), true);
364
365 if (newNode != null) {
366 currentNode_ = newNode;
367 }
368
369 return newNode;
370 }
371
372
373
374
375
376 public DomNode lastChild() {
377 final DomNode newNode = getEquivalentLogical(currentNode_.getLastChild(), true);
378
379 if (newNode != null) {
380 currentNode_ = newNode;
381 }
382
383 return newNode;
384 }
385
386
387
388
389
390 public DomNode previousNode() {
391 final DomNode newNode = getPreviousNode(currentNode_);
392
393 if (newNode != null) {
394 currentNode_ = newNode;
395 }
396
397 return newNode;
398 }
399
400
401
402
403
404 private DomNode getPreviousNode(final DomNode n) {
405 if (n == root_) {
406 return null;
407 }
408 final DomNode left = getEquivalentLogical(n.getPreviousSibling(), true);
409 if (left == null) {
410 final DomNode parent = n.getParentNode();
411 if (parent == null) {
412 return null;
413 }
414 if (isNodeVisible(parent)) {
415 return parent;
416 }
417 }
418
419 DomNode follow = left;
420 if (follow != null) {
421 while (follow.hasChildNodes()) {
422 final DomNode toFollow = getEquivalentLogical(follow.getLastChild(), true);
423 if (toFollow == null) {
424 break;
425 }
426 follow = toFollow;
427 }
428 }
429 return follow;
430 }
431
432
433
434
435
436 public DomNode firstChild() {
437 final DomNode newNode = getEquivalentLogical(currentNode_.getFirstChild(), false);
438
439 if (newNode != null) {
440 currentNode_ = newNode;
441 }
442
443 return newNode;
444 }
445 }