1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.javascript.host.xml;
16
17 import java.util.Arrays;
18 import java.util.HashSet;
19 import java.util.Set;
20
21 import org.htmlunit.SgmlPage;
22 import org.htmlunit.html.DomAttr;
23 import org.htmlunit.html.DomElement;
24 import org.htmlunit.html.DomNode;
25 import org.htmlunit.html.HtmlAbbreviated;
26 import org.htmlunit.html.HtmlAcronym;
27 import org.htmlunit.html.HtmlAddress;
28 import org.htmlunit.html.HtmlAnchor;
29 import org.htmlunit.html.HtmlAudio;
30 import org.htmlunit.html.HtmlBidirectionalOverride;
31 import org.htmlunit.html.HtmlBig;
32 import org.htmlunit.html.HtmlBlockQuote;
33 import org.htmlunit.html.HtmlBody;
34 import org.htmlunit.html.HtmlBold;
35 import org.htmlunit.html.HtmlButton;
36 import org.htmlunit.html.HtmlCanvas;
37 import org.htmlunit.html.HtmlCaption;
38 import org.htmlunit.html.HtmlCenter;
39 import org.htmlunit.html.HtmlCitation;
40 import org.htmlunit.html.HtmlCode;
41 import org.htmlunit.html.HtmlDefinition;
42 import org.htmlunit.html.HtmlDefinitionDescription;
43 import org.htmlunit.html.HtmlDefinitionList;
44 import org.htmlunit.html.HtmlDefinitionTerm;
45 import org.htmlunit.html.HtmlDeletedText;
46 import org.htmlunit.html.HtmlDirectory;
47 import org.htmlunit.html.HtmlDivision;
48 import org.htmlunit.html.HtmlEmbed;
49 import org.htmlunit.html.HtmlEmphasis;
50 import org.htmlunit.html.HtmlExample;
51 import org.htmlunit.html.HtmlFieldSet;
52 import org.htmlunit.html.HtmlFont;
53 import org.htmlunit.html.HtmlForm;
54 import org.htmlunit.html.HtmlFrame;
55 import org.htmlunit.html.HtmlFrameSet;
56 import org.htmlunit.html.HtmlHead;
57 import org.htmlunit.html.HtmlHeading1;
58 import org.htmlunit.html.HtmlHeading2;
59 import org.htmlunit.html.HtmlHeading3;
60 import org.htmlunit.html.HtmlHeading4;
61 import org.htmlunit.html.HtmlHeading5;
62 import org.htmlunit.html.HtmlHeading6;
63 import org.htmlunit.html.HtmlHeadingGroup;
64 import org.htmlunit.html.HtmlHtml;
65 import org.htmlunit.html.HtmlInlineFrame;
66 import org.htmlunit.html.HtmlInlineQuotation;
67 import org.htmlunit.html.HtmlInsertedText;
68 import org.htmlunit.html.HtmlItalic;
69 import org.htmlunit.html.HtmlKeyboard;
70 import org.htmlunit.html.HtmlLabel;
71 import org.htmlunit.html.HtmlLegend;
72 import org.htmlunit.html.HtmlListItem;
73 import org.htmlunit.html.HtmlListing;
74 import org.htmlunit.html.HtmlMap;
75 import org.htmlunit.html.HtmlMarquee;
76 import org.htmlunit.html.HtmlMenu;
77 import org.htmlunit.html.HtmlNoBreak;
78 import org.htmlunit.html.HtmlNoEmbed;
79 import org.htmlunit.html.HtmlNoFrames;
80 import org.htmlunit.html.HtmlNoScript;
81 import org.htmlunit.html.HtmlObject;
82 import org.htmlunit.html.HtmlOption;
83 import org.htmlunit.html.HtmlOptionGroup;
84 import org.htmlunit.html.HtmlOrderedList;
85 import org.htmlunit.html.HtmlParagraph;
86 import org.htmlunit.html.HtmlPlainText;
87 import org.htmlunit.html.HtmlPreformattedText;
88 import org.htmlunit.html.HtmlS;
89 import org.htmlunit.html.HtmlSample;
90 import org.htmlunit.html.HtmlScript;
91 import org.htmlunit.html.HtmlSelect;
92 import org.htmlunit.html.HtmlSmall;
93 import org.htmlunit.html.HtmlSource;
94 import org.htmlunit.html.HtmlSpan;
95 import org.htmlunit.html.HtmlStrike;
96 import org.htmlunit.html.HtmlStrong;
97 import org.htmlunit.html.HtmlStyle;
98 import org.htmlunit.html.HtmlSubscript;
99 import org.htmlunit.html.HtmlSuperscript;
100 import org.htmlunit.html.HtmlTable;
101 import org.htmlunit.html.HtmlTableBody;
102 import org.htmlunit.html.HtmlTableColumn;
103 import org.htmlunit.html.HtmlTableColumnGroup;
104 import org.htmlunit.html.HtmlTableDataCell;
105 import org.htmlunit.html.HtmlTableFooter;
106 import org.htmlunit.html.HtmlTableHeader;
107 import org.htmlunit.html.HtmlTableHeaderCell;
108 import org.htmlunit.html.HtmlTableRow;
109 import org.htmlunit.html.HtmlTeletype;
110 import org.htmlunit.html.HtmlTextArea;
111 import org.htmlunit.html.HtmlTitle;
112 import org.htmlunit.html.HtmlUnderlined;
113 import org.htmlunit.html.HtmlUnorderedList;
114 import org.htmlunit.html.HtmlVariable;
115 import org.htmlunit.html.HtmlVideo;
116 import org.htmlunit.html.HtmlWordBreak;
117 import org.htmlunit.javascript.HtmlUnitScriptable;
118 import org.htmlunit.javascript.configuration.JsxClass;
119 import org.htmlunit.javascript.configuration.JsxConstructor;
120 import org.htmlunit.javascript.configuration.JsxFunction;
121 import org.htmlunit.javascript.host.Element;
122 import org.htmlunit.javascript.host.dom.Document;
123 import org.htmlunit.javascript.host.dom.DocumentFragment;
124 import org.htmlunit.javascript.host.dom.Node;
125 import org.htmlunit.util.StringUtils;
126 import org.w3c.dom.NamedNodeMap;
127
128
129
130
131
132
133
134
135
136
137 @JsxClass
138 public class XMLSerializer extends HtmlUnitScriptable {
139
140
141
142 private static final Set<String> NON_EMPTY_TAGS = new HashSet<>(Arrays.asList(
143 HtmlAbbreviated.TAG_NAME, HtmlAcronym.TAG_NAME,
144 HtmlAnchor.TAG_NAME, HtmlAddress.TAG_NAME, HtmlAudio.TAG_NAME,
145 HtmlBidirectionalOverride.TAG_NAME, HtmlBig.TAG_NAME,
146 HtmlBlockQuote.TAG_NAME, HtmlBody.TAG_NAME, HtmlBold.TAG_NAME,
147 HtmlButton.TAG_NAME, HtmlCanvas.TAG_NAME, HtmlCaption.TAG_NAME,
148 HtmlCenter.TAG_NAME, HtmlCitation.TAG_NAME, HtmlCode.TAG_NAME,
149 HtmlDefinition.TAG_NAME, HtmlDefinitionDescription.TAG_NAME,
150 HtmlDeletedText.TAG_NAME, HtmlDirectory.TAG_NAME,
151 HtmlDivision.TAG_NAME,
152 HtmlDefinitionList.TAG_NAME,
153 HtmlDefinitionTerm.TAG_NAME, HtmlEmbed.TAG_NAME,
154 HtmlEmphasis.TAG_NAME, HtmlFieldSet.TAG_NAME,
155 HtmlFont.TAG_NAME, HtmlForm.TAG_NAME,
156 HtmlFrame.TAG_NAME, HtmlFrameSet.TAG_NAME, HtmlHeading1.TAG_NAME,
157 HtmlHeading2.TAG_NAME, HtmlHeading3.TAG_NAME,
158 HtmlHeading4.TAG_NAME, HtmlHeading5.TAG_NAME,
159 HtmlHeading6.TAG_NAME, HtmlHead.TAG_NAME, HtmlHeadingGroup.TAG_NAME,
160 HtmlHtml.TAG_NAME, HtmlInlineFrame.TAG_NAME,
161 HtmlInsertedText.TAG_NAME,
162 HtmlItalic.TAG_NAME, HtmlKeyboard.TAG_NAME, HtmlLabel.TAG_NAME,
163 HtmlLegend.TAG_NAME, HtmlListing.TAG_NAME, HtmlListItem.TAG_NAME,
164 HtmlMap.TAG_NAME, HtmlMarquee.TAG_NAME,
165 HtmlMenu.TAG_NAME,
166 HtmlNoBreak.TAG_NAME, HtmlNoEmbed.TAG_NAME, HtmlNoFrames.TAG_NAME,
167 HtmlNoScript.TAG_NAME, HtmlObject.TAG_NAME, HtmlOrderedList.TAG_NAME,
168 HtmlOptionGroup.TAG_NAME, HtmlOption.TAG_NAME, HtmlParagraph.TAG_NAME,
169 HtmlPlainText.TAG_NAME, HtmlPreformattedText.TAG_NAME,
170 HtmlInlineQuotation.TAG_NAME, HtmlS.TAG_NAME, HtmlSample.TAG_NAME,
171 HtmlScript.TAG_NAME, HtmlSelect.TAG_NAME, HtmlSmall.TAG_NAME,
172 HtmlSource.TAG_NAME, HtmlSpan.TAG_NAME,
173 HtmlStrike.TAG_NAME, HtmlStrong.TAG_NAME, HtmlStyle.TAG_NAME,
174 HtmlSubscript.TAG_NAME, HtmlSuperscript.TAG_NAME, HtmlTitle.TAG_NAME,
175 HtmlTable.TAG_NAME, HtmlTableColumn.TAG_NAME, HtmlTableColumnGroup.TAG_NAME,
176 HtmlTableBody.TAG_NAME, HtmlTableDataCell.TAG_NAME, HtmlTableHeaderCell.TAG_NAME,
177 HtmlTableRow.TAG_NAME, HtmlTextArea.TAG_NAME, HtmlTableFooter.TAG_NAME,
178 HtmlTableHeader.TAG_NAME, HtmlTeletype.TAG_NAME, HtmlUnderlined.TAG_NAME,
179 HtmlUnorderedList.TAG_NAME, HtmlVariable.TAG_NAME, HtmlVideo.TAG_NAME,
180 HtmlWordBreak.TAG_NAME, HtmlExample.TAG_NAME
181 ));
182
183
184
185
186 @JsxConstructor
187 public void jsConstructor() {
188
189 }
190
191
192
193
194
195
196 @JsxFunction
197 public String serializeToString(Node root) {
198 if (root == null) {
199 return "";
200 }
201
202 if (root instanceof DocumentFragment) {
203 Node node = root.getFirstChild();
204 if (node == null) {
205 return "";
206 }
207
208 final StringBuilder builder = new StringBuilder();
209 while (node != null) {
210 builder.append(serializeToString(node));
211 node = node.getNextSibling();
212 }
213 return builder.toString().trim();
214 }
215
216 final boolean rootIsDocument = root instanceof Document;
217 if (rootIsDocument) {
218 root = ((Document) root).getDocumentElement();
219 }
220
221 if (root instanceof Element) {
222 final StringBuilder builder = new StringBuilder();
223 final DomNode node = root.getDomNodeOrDie();
224 final SgmlPage page = node.getPage();
225 final boolean isHtmlPage = page != null && page.isHtmlPage();
226
227 String forcedNamespace = null;
228 if (!rootIsDocument && isHtmlPage) {
229 forcedNamespace = "http://www.w3.org/1999/xhtml";
230 }
231 toXml(1, node, builder, forcedNamespace);
232
233 return builder.toString();
234 }
235
236 return root.getDomNodeOrDie().asXml();
237 }
238
239 private void toXml(final int indent,
240 final DomNode node, final StringBuilder builder, final String foredNamespace) {
241 final String nodeName = node.getNodeName();
242 builder.append('<').append(nodeName);
243
244 String optionalPrefix = "";
245 final String namespaceURI = node.getNamespaceURI();
246 final String prefix = node.getPrefix();
247 if (namespaceURI != null && prefix != null) {
248 boolean sameNamespace = false;
249 for (DomNode parentNode = node.getParentNode(); parentNode instanceof DomElement;
250 parentNode = parentNode.getParentNode()) {
251 if (namespaceURI.equals(parentNode.getNamespaceURI())) {
252 sameNamespace = true;
253 break;
254 }
255 }
256 if (node.getParentNode() == null || !sameNamespace) {
257 ((DomElement) node).setAttribute("xmlns:" + prefix, namespaceURI);
258 }
259 }
260 else if (foredNamespace != null) {
261 builder.append(" xmlns=\"").append(foredNamespace).append('"');
262 optionalPrefix = " ";
263 }
264
265 final NamedNodeMap attributesMap = node.getAttributes();
266 final int length = attributesMap.getLength();
267 for (int i = 0; i < length; i++) {
268 final DomAttr attrib = (DomAttr) attributesMap.item(i);
269 builder.append(' ').append(attrib.getQualifiedName())
270 .append("=\"").append(attrib.getValue()).append('"');
271 }
272 boolean startTagClosed = false;
273 for (final DomNode child : node.getChildren()) {
274 if (!startTagClosed) {
275 builder.append(optionalPrefix).append('>');
276 startTagClosed = true;
277 }
278 switch (child.getNodeType()) {
279 case Node.ELEMENT_NODE:
280 toXml(indent + 1, child, builder, null);
281 break;
282
283 case Node.TEXT_NODE:
284 String value = child.getNodeValue();
285 value = StringUtils.escapeXmlChars(value);
286 builder.append(value);
287 break;
288
289 case Node.CDATA_SECTION_NODE:
290 case Node.COMMENT_NODE:
291 builder.append(child.asXml());
292 break;
293
294 default:
295 break;
296 }
297 }
298
299 if (startTagClosed) {
300 builder.append("</").append(nodeName).append('>');
301 }
302 else {
303 final String tagName = StringUtils.toRootLowerCase(nodeName);
304 if (NON_EMPTY_TAGS.contains(tagName)) {
305 builder.append("></").append(nodeName).append('>');
306 }
307 else {
308 builder.append(optionalPrefix).append("/>");
309 }
310 }
311 }
312
313 }