1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html;
16
17 import java.io.Serializable;
18
19 import org.w3c.dom.DOMException;
20 import org.w3c.dom.Node;
21 import org.w3c.dom.traversal.NodeFilter;
22
23
24
25
26
27
28
29
30
31
32
33
34
35 public class HtmlDomTreeWalker implements Serializable {
36
37 private final DomNode root_;
38 private DomNode currentNode_;
39 private final int whatToShow_;
40 private final NodeFilter filter_;
41 private final boolean expandEntityReferences_;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58 public HtmlDomTreeWalker(final DomNode root, final int whatToShow, final NodeFilter filter,
59 final boolean expandEntityReferences) throws DOMException {
60 if (root == null) {
61 throw new IllegalArgumentException("root must not be null");
62 }
63 root_ = root;
64 whatToShow_ = whatToShow;
65 filter_ = filter;
66 expandEntityReferences_ = expandEntityReferences;
67 currentNode_ = root_;
68 }
69
70
71
72
73
74 public DomNode getRoot() {
75 return root_;
76 }
77
78
79
80
81
82 public int getWhatToShow() {
83 return whatToShow_;
84 }
85
86
87
88
89
90 public NodeFilter getFilter() {
91 return filter_;
92 }
93
94
95
96
97
98 @SuppressWarnings("PMD.BooleanGetMethodName")
99 public boolean getExpandEntityReferences() {
100 return expandEntityReferences_;
101 }
102
103
104
105
106
107 public DomNode getCurrentNode() {
108 return currentNode_;
109 }
110
111
112
113
114
115
116 public void setCurrentNode(final Node currentNode) throws DOMException {
117 if (currentNode == null) {
118 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
119 "currentNode cannot be set to null");
120 }
121 currentNode_ = (DomNode) currentNode;
122 }
123
124
125
126
127
128 public DomNode nextNode() {
129 final DomNode leftChild = getEquivalentLogical(currentNode_.getFirstChild(), false);
130 if (leftChild != null) {
131 currentNode_ = leftChild;
132 return leftChild;
133 }
134 final DomNode rightSibling = getEquivalentLogical(currentNode_.getNextSibling(), false);
135 if (rightSibling != null) {
136 currentNode_ = rightSibling;
137 return rightSibling;
138 }
139
140 final DomNode uncle = getFirstUncleNode(currentNode_);
141 if (uncle != null) {
142 currentNode_ = uncle;
143 return uncle;
144 }
145
146 return null;
147 }
148
149
150
151
152
153 private DomNode getFirstUncleNode(final DomNode n) {
154 if (n == root_ || n == null) {
155 return null;
156 }
157
158 final DomNode parent = n.getParentNode();
159 if (parent == null) {
160 return null;
161 }
162
163 final DomNode uncle = getEquivalentLogical(parent.getNextSibling(), false);
164 if (uncle != null) {
165 return uncle;
166 }
167
168 return getFirstUncleNode(parent);
169 }
170
171
172
173
174
175
176
177
178
179
180
181 private DomNode getEquivalentLogical(final DomNode n, final boolean lookLeft) {
182
183 if (n == null) {
184 return null;
185 }
186 if (isNodeVisible(n)) {
187 return n;
188 }
189
190
191 if (isNodeSkipped(n)) {
192 final DomNode child;
193 if (lookLeft) {
194 child = getEquivalentLogical(n.getLastChild(), lookLeft);
195 }
196 else {
197 child = getEquivalentLogical(n.getFirstChild(), lookLeft);
198 }
199
200 if (child != null) {
201 return child;
202 }
203 }
204
205
206
207 return getSibling(n, lookLeft);
208 }
209
210
211
212
213 private boolean isNodeVisible(final Node n) {
214 if (acceptNode(n) == NodeFilter.FILTER_ACCEPT) {
215 if (filter_ == null || filter_.acceptNode(n) == NodeFilter.FILTER_ACCEPT) {
216 return expandEntityReferences_ || n.getParentNode() == null
217 || n.getParentNode().getNodeType() != Node.ENTITY_REFERENCE_NODE;
218 }
219 }
220 return false;
221 }
222
223
224
225
226
227
228
229
230
231 private short acceptNode(final Node n) {
232 final int flag = getFlagForNode(n);
233
234 if ((whatToShow_ & flag) != 0) {
235 return NodeFilter.FILTER_ACCEPT;
236 }
237
238 return NodeFilter.FILTER_SKIP;
239 }
240
241
242
243
244
245
246
247
248
249 public static int getFlagForNode(final Node node) {
250 switch (node.getNodeType()) {
251 case Node.ELEMENT_NODE:
252 return NodeFilter.SHOW_ELEMENT;
253 case Node.ATTRIBUTE_NODE:
254 return NodeFilter.SHOW_ATTRIBUTE;
255 case Node.TEXT_NODE:
256 return NodeFilter.SHOW_TEXT;
257 case Node.CDATA_SECTION_NODE:
258 return NodeFilter.SHOW_CDATA_SECTION;
259 case Node.ENTITY_REFERENCE_NODE:
260 return NodeFilter.SHOW_ENTITY_REFERENCE;
261 case Node.ENTITY_NODE:
262 return NodeFilter.SHOW_ENTITY;
263 case Node.PROCESSING_INSTRUCTION_NODE:
264 return NodeFilter.SHOW_PROCESSING_INSTRUCTION;
265 case Node.COMMENT_NODE:
266 return NodeFilter.SHOW_COMMENT;
267 case Node.DOCUMENT_NODE:
268 return NodeFilter.SHOW_DOCUMENT;
269 case Node.DOCUMENT_TYPE_NODE:
270 return NodeFilter.SHOW_DOCUMENT_TYPE;
271 case Node.DOCUMENT_FRAGMENT_NODE:
272 return NodeFilter.SHOW_DOCUMENT_FRAGMENT;
273 case Node.NOTATION_NODE:
274 return NodeFilter.SHOW_NOTATION;
275 default:
276 return 0;
277 }
278 }
279
280
281 private boolean isNodeSkipped(final Node n) {
282 return !isNodeVisible(n) && !isNodeRejected(n);
283 }
284
285
286 private boolean isNodeRejected(final Node n) {
287 if (acceptNode(n) == NodeFilter.FILTER_REJECT) {
288 return true;
289 }
290 if (filter_ != null && filter_.acceptNode(n) == NodeFilter.FILTER_REJECT) {
291 return true;
292 }
293 return !expandEntityReferences_ && n.getParentNode() != null
294 && n.getParentNode().getNodeType() == Node.ENTITY_REFERENCE_NODE;
295 }
296
297
298 private DomNode getSibling(final DomNode n, final boolean lookLeft) {
299 if (n == null) {
300 return null;
301 }
302
303 if (isNodeVisible(n)) {
304 return null;
305 }
306
307 final DomNode sibling;
308 if (lookLeft) {
309 sibling = n.getPreviousSibling();
310 }
311 else {
312 sibling = n.getNextSibling();
313 }
314
315 if (sibling == null) {
316
317 if (n == root_) {
318 return null;
319 }
320 return getSibling(n.getParentNode(), lookLeft);
321
322 }
323 return getEquivalentLogical(sibling, lookLeft);
324 }
325
326
327
328
329
330 public DomNode nextSibling() {
331 if (currentNode_ == root_) {
332 return null;
333 }
334
335 final DomNode newNode = getEquivalentLogical(currentNode_.getNextSibling(), false);
336
337 if (newNode != null) {
338 currentNode_ = newNode;
339 }
340
341 return newNode;
342 }
343
344
345
346
347
348 public DomNode parentNode() {
349 if (currentNode_ == root_) {
350 return null;
351 }
352
353 DomNode newNode = currentNode_;
354
355 do {
356 newNode = newNode.getParentNode();
357 }
358 while (newNode != null && !isNodeVisible(newNode) && newNode != root_);
359
360 if (newNode == null || !isNodeVisible(newNode)) {
361 return null;
362 }
363 currentNode_ = newNode;
364 return newNode;
365 }
366
367
368
369
370
371 public DomNode previousSibling() {
372 if (currentNode_ == root_) {
373 return null;
374 }
375
376 final DomNode newNode = getEquivalentLogical(currentNode_.getPreviousSibling(), true);
377
378 if (newNode != null) {
379 currentNode_ = newNode;
380 }
381
382 return newNode;
383 }
384
385
386
387
388
389 public DomNode lastChild() {
390 final DomNode newNode = getEquivalentLogical(currentNode_.getLastChild(), true);
391
392 if (newNode != null) {
393 currentNode_ = newNode;
394 }
395
396 return newNode;
397 }
398
399
400
401
402
403 public DomNode previousNode() {
404 final DomNode newNode = getPreviousNode(currentNode_);
405
406 if (newNode != null) {
407 currentNode_ = newNode;
408 }
409
410 return newNode;
411 }
412
413
414
415
416
417 private DomNode getPreviousNode(final DomNode n) {
418 if (n == root_) {
419 return null;
420 }
421 final DomNode left = getEquivalentLogical(n.getPreviousSibling(), true);
422 if (left == null) {
423 final DomNode parent = n.getParentNode();
424 if (parent == null) {
425 return null;
426 }
427 if (isNodeVisible(parent)) {
428 return parent;
429 }
430 }
431
432 DomNode follow = left;
433 if (follow != null) {
434 while (follow.hasChildNodes()) {
435 final DomNode toFollow = getEquivalentLogical(follow.getLastChild(), true);
436 if (toFollow == null) {
437 break;
438 }
439 follow = toFollow;
440 }
441 }
442 return follow;
443 }
444
445
446
447
448
449 public DomNode firstChild() {
450 final DomNode newNode = getEquivalentLogical(currentNode_.getFirstChild(), false);
451
452 if (newNode != null) {
453 currentNode_ = newNode;
454 }
455
456 return newNode;
457 }
458 }