View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.util;
16  
17  import java.nio.charset.Charset;
18  import java.util.Locale;
19  import java.util.Map;
20  import java.util.concurrent.ConcurrentHashMap;
21  import java.util.regex.Matcher;
22  import java.util.regex.Pattern;
23  
24  import org.htmlunit.html.impl.Color;
25  
26  /**
27   * String utilities class for utility functions not covered by third party libraries.
28   *
29   * @author Daniel Gredler
30   * @author Ahmed Ashour
31   * @author Martin Tamme
32   * @author Ronald Brill
33   */
34  public final class StringUtils {
35  
36      private static final Pattern HEX_COLOR = Pattern.compile("#([\\da-fA-F]{3}|[\\da-fA-F]{6})");
37      private static final Pattern RGB_COLOR =
38          Pattern.compile("rgb\\(\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])%?\\s*,"
39                              + "\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])%?\\s*,"
40                              + "\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])%?\\s*\\)");
41      private static final Pattern RGBA_COLOR =
42              Pattern.compile("rgba\\(\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])%?\\s*,"
43                                   + "\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])%?\\s*,"
44                                   + "\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])%?\\s*,"
45                                   + "\\s*((0?.[1-9])|[01])\\s*\\)");
46      private static final Pattern HSL_COLOR =
47              Pattern.compile("hsl\\(\\s*((0|[1-9]\\d?|[12]\\d\\d?|3[0-5]\\d)(.\\d*)?)\\s*,"
48                                  + "\\s*((0|[1-9]\\d?|100)(.\\d*)?)%\\s*,"
49                                  + "\\s*((0|[1-9]\\d?|100)(.\\d*)?)%\\s*\\)");
50      private static final Pattern ILLEGAL_FILE_NAME_CHARS = Pattern.compile("\\\\|/|\\||:|\\?|\\*|\"|<|>|\\p{Cntrl}");
51  
52      private static final Map<String, String> CAMELIZE_CACHE = new ConcurrentHashMap<>();
53  
54      /**
55       * Disallow instantiation of this class.
56       */
57      private StringUtils() {
58          // Empty.
59      }
60  
61      /**
62       * Returns true if the param is not null and empty. This is different from
63       * {@link org.apache.commons.lang3.StringUtils#isEmpty(CharSequence)} because
64       * this returns false if the provided string is null.
65       *
66       * @param s the string to check
67       * @return true if the param is not null and empty
68       */
69      public static boolean isEmptyString(final CharSequence s) {
70          return s != null && s.length() == 0;
71      }
72  
73      /**
74       * @param expected the char that we expect
75       * @param s the string to check
76       * @return true if the provided string has only one char and this matches the expectation
77       */
78      public static boolean equalsChar(final char expected, final CharSequence s) {
79          return s != null && s.length() == 1 && expected == s.charAt(0);
80      }
81  
82      /**
83       * @param s the string to check
84       * @param expectedStart the string that we expect at the beginning
85       * @return true if the provided string has only one char and this matches the expectation
86       */
87      public static boolean startsWithIgnoreCase(final String s, final String expectedStart) {
88          if (expectedStart == null || expectedStart.length() == 0) {
89              throw new IllegalArgumentException("Expected start string can't be null or empty");
90          }
91  
92          if (s == null) {
93              return false;
94          }
95          if (s == expectedStart) {
96              return true;
97          }
98  
99          return s.regionMatches(true, 0, expectedStart, 0, expectedStart.length());
100     }
101 
102     /**
103      * Escapes the characters '&lt;', '&gt;' and '&amp;' into their XML entity equivalents.
104      *
105      * @param s the string to escape
106      * @return the escaped form of the specified string
107      */
108     public static String escapeXmlChars(final String s) {
109         return org.apache.commons.lang3.StringUtils.
110                 replaceEach(s, new String[] {"&", "<", ">"}, new String[] {"&amp;", "&lt;", "&gt;"});
111     }
112 
113     /**
114      * Escape the string to be used as xml 1.0 content be replacing the
115      * characters '&quot;', '&amp;', '&#39;', '&lt;', and '&gt;' into their XML entity equivalents.
116      * @param text the attribute value
117      * @return the escaped value
118      */
119     public static String escapeXml(final String text) {
120         if (text == null) {
121             return null;
122         }
123 
124         StringBuilder escaped = null;
125 
126         final int offset = 0;
127         final int max = text.length();
128 
129         int readOffset = offset;
130 
131         for (int i = offset; i < max; i++) {
132             final int codepoint = Character.codePointAt(text, i);
133             final boolean codepointValid = supportedByXML10(codepoint);
134 
135             if (!codepointValid
136                     || codepoint == '<'
137                     || codepoint == '>'
138                     || codepoint == '&'
139                     || codepoint == '\''
140                     || codepoint == '"') {
141 
142                 // replacement required
143                 if (escaped == null) {
144                     escaped = new StringBuilder(max);
145                 }
146 
147                 if (i > readOffset) {
148                     escaped.append(text, readOffset, i);
149                 }
150 
151                 if (Character.charCount(codepoint) > 1) {
152                     i++;
153                 }
154                 readOffset = i + 1;
155 
156                 // skip
157                 if (!codepointValid) {
158                     continue;
159                 }
160 
161                 if (codepoint == '<') {
162                     escaped.append("&lt;");
163                 }
164                 else if (codepoint == '>') {
165                     escaped.append("&gt;");
166                 }
167                 else if (codepoint == '&') {
168                     escaped.append("&amp;");
169                 }
170                 else if (codepoint == '\'') {
171                     escaped.append("&apos;");
172                 }
173                 else if (codepoint == '\"') {
174                     escaped.append("&quot;");
175                 }
176             }
177         }
178 
179         if (escaped == null) {
180             return text;
181         }
182 
183         if (max > readOffset) {
184             escaped.append(text, readOffset, max);
185         }
186 
187         return escaped.toString();
188     }
189 
190     /**
191      * Escape the string to be used as attribute value.
192      * Only {@code <}, {@code &} and {@code "} have to be escaped (see
193      * <a href="http://www.w3.org/TR/REC-xml/#d0e888">http://www.w3.org/TR/REC-xml/#d0e888</a>).
194      * @param attValue the attribute value
195      * @return the escaped value
196      */
197     public static String escapeXmlAttributeValue(final String attValue) {
198         if (attValue == null) {
199             return null;
200         }
201 
202         StringBuilder escaped = null;
203 
204         final int offset = 0;
205         final int max = attValue.length();
206 
207         int readOffset = offset;
208 
209         for (int i = offset; i < max; i++) {
210             final int codepoint = Character.codePointAt(attValue, i);
211             final boolean codepointValid = supportedByXML10(codepoint);
212 
213             if (!codepointValid
214                     || codepoint == '<'
215                     || codepoint == '&'
216                     || codepoint == '"') {
217 
218                 // replacement required
219                 if (escaped == null) {
220                     escaped = new StringBuilder(max);
221                 }
222 
223                 if (i > readOffset) {
224                     escaped.append(attValue, readOffset, i);
225                 }
226 
227                 if (Character.charCount(codepoint) > 1) {
228                     i++;
229                 }
230                 readOffset = i + 1;
231 
232                 // skip
233                 if (!codepointValid) {
234                     continue;
235                 }
236 
237                 if (codepoint == '<') {
238                     escaped.append("&lt;");
239                 }
240                 else if (codepoint == '&') {
241                     escaped.append("&amp;");
242                 }
243                 else if (codepoint == '\"') {
244                     escaped.append("&quot;");
245                 }
246             }
247         }
248 
249         if (escaped == null) {
250             return attValue;
251         }
252 
253         if (max > readOffset) {
254             escaped.append(attValue, readOffset, max);
255         }
256 
257         return escaped.toString();
258     }
259 
260     /*
261      * XML 1.0 does not allow control characters or unpaired Unicode surrogate codepoints.
262      * We will remove characters that do not fit in the following ranges:
263      * #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
264      */
265     private static boolean supportedByXML10(final int codepoint) {
266         if (codepoint < 0x20) {
267             return codepoint == 0x9 || codepoint == 0xA || codepoint == 0xD;
268         }
269         if (codepoint <= 0xD7FF) {
270             return true;
271         }
272 
273         if (codepoint < 0xE000) {
274             return false;
275         }
276         if (codepoint <= 0xFFFD) {
277             return true;
278         }
279 
280         if (codepoint < 0x10000) {
281             return false;
282         }
283         if (codepoint <= 0x10FFFF) {
284             return true;
285         }
286 
287         return true;
288     }
289 
290     /**
291      * Returns the index within the specified string of the first occurrence of
292      * the specified search character.
293      *
294      * @param s the string to search
295      * @param searchChar the character to search for
296      * @param beginIndex the index at which to start the search
297      * @param endIndex the index at which to stop the search
298      * @return the index of the first occurrence of the character in the string or <code>-1</code>
299      */
300     public static int indexOf(final String s, final char searchChar, final int beginIndex, final int endIndex) {
301         for (int i = beginIndex; i < endIndex; i++) {
302             if (s.charAt(i) == searchChar) {
303                 return i;
304             }
305         }
306         return -1;
307     }
308 
309     /**
310      * Returns a Color parsed from the given RGB in hexadecimal notation.
311      * @param token the token to parse
312      * @return a Color whether the token is a color RGB in hexadecimal notation; otherwise null
313      */
314     public static Color asColorHexadecimal(final String token) {
315         if (token == null) {
316             return null;
317         }
318         final Matcher tmpMatcher = HEX_COLOR.matcher(token);
319         final boolean tmpFound = tmpMatcher.matches();
320         if (!tmpFound) {
321             return null;
322         }
323 
324         final String tmpHex = tmpMatcher.group(1);
325         if (tmpHex.length() == 6) {
326             final int tmpRed = Integer.parseInt(tmpHex.substring(0, 2), 16);
327             final int tmpGreen = Integer.parseInt(tmpHex.substring(2, 4), 16);
328             final int tmpBlue = Integer.parseInt(tmpHex.substring(4, 6), 16);
329             return new Color(tmpRed, tmpGreen, tmpBlue);
330         }
331 
332         final int tmpRed = Integer.parseInt(tmpHex.substring(0, 1) + tmpHex.substring(0, 1), 16);
333         final int tmpGreen = Integer.parseInt(tmpHex.substring(1, 2) + tmpHex.substring(1, 2), 16);
334         final int tmpBlue = Integer.parseInt(tmpHex.substring(2, 3) + tmpHex.substring(2, 3), 16);
335         return new Color(tmpRed, tmpGreen, tmpBlue);
336     }
337 
338     /**
339      * Returns a Color parsed from the given rgb notation if found inside the given string.
340      * @param token the token to parse
341      * @return a Color whether the token contains a color in RGB notation; otherwise null
342      */
343     public static Color findColorRGB(final String token) {
344         if (token == null) {
345             return null;
346         }
347         final Matcher tmpMatcher = RGB_COLOR.matcher(token);
348         if (!tmpMatcher.find()) {
349             return null;
350         }
351 
352         final int tmpRed = Integer.parseInt(tmpMatcher.group(1));
353         final int tmpGreen = Integer.parseInt(tmpMatcher.group(2));
354         final int tmpBlue = Integer.parseInt(tmpMatcher.group(3));
355         return new Color(tmpRed, tmpGreen, tmpBlue);
356     }
357 
358     /**
359      * Returns a Color parsed from the given rgb notation.
360      * @param token the token to parse
361      * @return a Color whether the token is a color in RGB notation; otherwise null
362      */
363     public static Color findColorRGBA(final String token) {
364         if (token == null) {
365             return null;
366         }
367         final Matcher tmpMatcher = RGBA_COLOR.matcher(token);
368         if (!tmpMatcher.find()) {
369             return null;
370         }
371 
372         final int tmpRed = Integer.parseInt(tmpMatcher.group(1));
373         final int tmpGreen = Integer.parseInt(tmpMatcher.group(2));
374         final int tmpBlue = Integer.parseInt(tmpMatcher.group(3));
375         final int tmpAlpha = (int) (Float.parseFloat(tmpMatcher.group(4)) * 255);
376         return new Color(tmpRed, tmpGreen, tmpBlue, tmpAlpha);
377     }
378 
379     /**
380      * Returns a Color parsed from the given hsl notation if found inside the given string.
381      * @param token the token to parse
382      * @return a Color whether the token contains a color in RGB notation; otherwise null
383      */
384     public static Color findColorHSL(final String token) {
385         if (token == null) {
386             return null;
387         }
388         final Matcher tmpMatcher = HSL_COLOR.matcher(token);
389         if (!tmpMatcher.find()) {
390             return null;
391         }
392 
393         final float tmpHue = Float.parseFloat(tmpMatcher.group(1)) / 360f;
394         final float tmpSaturation = Float.parseFloat(tmpMatcher.group(4)) / 100f;
395         final float tmpLightness = Float.parseFloat(tmpMatcher.group(7)) / 100f;
396         return hslToRgb(tmpHue, tmpSaturation, tmpLightness);
397     }
398 
399     /**
400      * Converts an HSL color value to RGB. Conversion formula
401      * adapted from http://en.wikipedia.org/wiki/HSL_color_space.
402      * Assumes h, s, and l are contained in the set [0, 1]
403      *
404      * @param h the hue
405      * @param s the saturation
406      * @param l the lightness
407      * @return {@link Color}
408      */
409     private static Color hslToRgb(final float h, final float s, final float l) {
410         if (s == 0f) {
411             return new Color(to255(l), to255(l), to255(l));
412         }
413 
414         final float q = l < 0.5f ? l * (1 + s) : l + s - l * s;
415         final float p = 2 * l - q;
416         final float r = hueToRgb(p, q, h + 1f / 3f);
417         final float g = hueToRgb(p, q, h);
418         final float b = hueToRgb(p, q, h - 1f / 3f);
419 
420         return new Color(to255(r), to255(g), to255(b));
421     }
422 
423     private static float hueToRgb(final float p, final float q, float t) {
424         if (t < 0f) {
425             t += 1f;
426         }
427 
428         if (t > 1f) {
429             t -= 1f;
430         }
431 
432         if (t < 1f / 6f) {
433             return p + (q - p) * 6f * t;
434         }
435 
436         if (t < 1f / 2f) {
437             return q;
438         }
439 
440         if (t < 2f / 3f) {
441             return p + (q - p) * (2f / 3f - t) * 6f;
442         }
443 
444         return p;
445     }
446 
447     private static int to255(final float value) {
448         return (int) Math.min(255, 256 * value);
449     }
450 
451     /**
452      * Formats the specified color.
453      *
454      * @param color the color to format
455      * @return the specified color, formatted
456      */
457     public static String formatColor(final Color color) {
458         return "rgb(" + color.getRed() + ", " + color.getGreen() + ", " + color.getBlue() + ")";
459     }
460 
461     /**
462      * Sanitize a string for use in Matcher.appendReplacement.
463      * Replaces all \ with \\ and $ as \$ because they are used as control
464      * characters in appendReplacement.
465      *
466      * @param toSanitize the string to sanitize
467      * @return sanitized version of the given string
468      */
469     public static String sanitizeForAppendReplacement(final String toSanitize) {
470         return org.apache.commons.lang3.StringUtils.replaceEach(toSanitize,
471                                     new String[] {"\\", "$"}, new String[]{"\\\\", "\\$"});
472     }
473 
474     /**
475      * Sanitizes a string for use as filename.
476      * Replaces \, /, |, :, ?, *, &quot;, &lt;, &gt;, control chars by _ (underscore).
477      *
478      * @param toSanitize the string to sanitize
479      * @return sanitized version of the given string
480      */
481     public static String sanitizeForFileName(final String toSanitize) {
482         return ILLEGAL_FILE_NAME_CHARS.matcher(toSanitize).replaceAll("_");
483     }
484 
485     /**
486      * Transforms the specified string from delimiter-separated (e.g. <code>font-size</code>)
487      * to camel-cased (e.g. <code>fontSize</code>).
488      * @param string the string to camelize
489      * @return the transformed string
490      */
491     public static String cssCamelize(final String string) {
492         if (string == null) {
493             return null;
494         }
495 
496         String result = CAMELIZE_CACHE.get(string);
497         if (null != result) {
498             return result;
499         }
500 
501         // not found in CamelizeCache_; convert and store in cache
502         final int pos = string.indexOf('-');
503         if (pos == -1 || pos == string.length() - 1) {
504             // cache also this strings for performance
505             CAMELIZE_CACHE.put(string, string);
506             return string;
507         }
508 
509         final StringBuilder builder = new StringBuilder(string);
510         builder.deleteCharAt(pos);
511         builder.setCharAt(pos, Character.toUpperCase(builder.charAt(pos)));
512 
513         int i = pos + 1;
514         while (i < builder.length() - 1) {
515             if (builder.charAt(i) == '-') {
516                 builder.deleteCharAt(i);
517                 builder.setCharAt(i, Character.toUpperCase(builder.charAt(i)));
518             }
519             i++;
520         }
521         result = builder.toString();
522         CAMELIZE_CACHE.put(string, result);
523 
524         return result;
525     }
526 
527     /**
528      * Lowercases a string by checking and check for null first. There
529      * is no cache involved and the ROOT locale is used to convert it.
530      *
531      * @param s the string to lowercase
532      * @return the lowercased string
533      */
534     public static String toRootLowerCase(final String s) {
535         return s == null ? null : s.toLowerCase(Locale.ROOT);
536     }
537 
538     /**
539      * Transforms the specified string from camel-cased (e.g. <code>fontSize</code>)
540      * to delimiter-separated (e.g. <code>font-size</code>).
541      * to camel-cased .
542      * @param string the string to decamelize
543      * @return the transformed string
544      */
545     public static String cssDeCamelize(final String string) {
546         if (string == null || string.isEmpty()) {
547             return string;
548         }
549 
550         final StringBuilder builder = new StringBuilder();
551         for (int i = 0; i < string.length(); i++) {
552             final char ch = string.charAt(i);
553             if (Character.isUpperCase(ch)) {
554                 builder.append('-').append(Character.toLowerCase(ch));
555             }
556             else {
557                 builder.append(ch);
558             }
559         }
560         return builder.toString();
561     }
562 
563     /**
564      * Converts a string into a byte array using the specified encoding.
565      *
566      * @param charset the charset
567      * @param content the string to convert
568      * @return the String as a byte[]; if the specified encoding is not supported an empty byte[] will be returned
569      */
570     public static byte[] toByteArray(final String content, final Charset charset) {
571         if (content ==  null || content.isEmpty()) {
572             return new byte[0];
573         }
574 
575         return content.getBytes(charset);
576     }
577 
578     /**
579      * Splits the provided text into an array, using whitespace as the
580      * separator.
581      * Whitespace is defined by {@link Character#isWhitespace(char)}.
582      *
583      * @param str  the String to parse, may be null
584      * @return an array of parsed Strings, an empty array if null String input
585      */
586     public static String[] splitAtJavaWhitespace(final String str) {
587         final String[] parts = org.apache.commons.lang3.StringUtils.split(str);
588         if (parts == null) {
589             return new String[0];
590         }
591         return parts;
592     }
593 
594     /**
595      * Splits the provided text into an array, using blank as the
596      * separator.
597      *
598      * @param str  the String to parse, may be null
599      * @return an array of parsed Strings, an empty array if null String input
600      */
601     public static String[] splitAtBlank(final String str) {
602         final String[] parts = org.apache.commons.lang3.StringUtils.split(str, ' ');
603         if (parts == null) {
604             return new String[0];
605         }
606         return parts;
607     }
608 
609     /**
610      * Splits the provided text into an array, using blank as the
611      * separator.
612      *
613      * @param str  the String to parse, may be null
614      * @return an array of parsed Strings, an empty array if null String input
615      */
616     public static String[] splitAtComma(final String str) {
617         final String[] parts = org.apache.commons.lang3.StringUtils.split(str, ',');
618         if (parts == null) {
619             return new String[0];
620         }
621         return parts;
622     }
623 
624     /**
625      * Splits the provided text into an array, using comma or blank as the
626      * separator.
627      *
628      * @param str  the String to parse, may be null
629      * @return an array of parsed Strings, an empty array if null String input
630      */
631     public static String[] splitAtCommaOrBlank(final String str) {
632         final String[] parts = org.apache.commons.lang3.StringUtils.split(str, ", ");
633         if (parts == null) {
634             return new String[0];
635         }
636         return parts;
637     }
638 }