View Javadoc
1   /*
2    * Copyright (c) 2002-2026 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.util;
16  
17  import static java.nio.charset.StandardCharsets.US_ASCII;
18  import static java.nio.charset.StandardCharsets.UTF_8;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.net.MalformedURLException;
22  import java.net.URI;
23  import java.net.URISyntaxException;
24  import java.net.URL;
25  import java.net.URLEncoder;
26  import java.net.URLStreamHandler;
27  import java.nio.charset.Charset;
28  import java.util.BitSet;
29  import java.util.Locale;
30  import java.util.Objects;
31  
32  import org.htmlunit.WebAssert;
33  import org.htmlunit.protocol.AnyHandler;
34  import org.htmlunit.protocol.javascript.JavaScriptURLConnection;
35  
36  /**
37   * URL utilities class that makes it easy to create new URLs based off of old URLs
38   * without having to assemble or parse them yourself.
39   *
40   * @author Daniel Gredler
41   * @author Martin Tamme
42   * @author Sudhan Moghe
43   * @author Marc Guillemot
44   * @author Ahmed Ashour
45   * @author Ronald Brill
46   * @author Joerg Werner
47   * @author Hartmut Arlt
48   */
49  public final class UrlUtils {
50  
51      /** "about". */
52      public static final String ABOUT = "about";
53      /** "about:". */
54      public static final String ABOUT_SCHEME = ABOUT + ":";
55      /** "about:blank". */
56      public static final String ABOUT_BLANK = ABOUT_SCHEME + "blank";
57      /** URL for "about:blank". */
58      public static final URL URL_ABOUT_BLANK;
59  
60      private static final URLStreamHandler JS_HANDLER;
61      private static final URLStreamHandler ABOUT_HANDLER;
62      private static final URLStreamHandler DATA_HANDLER;
63  
64      private static final BitSet PATH_ALLOWED_CHARS = new BitSet(256);
65      private static final BitSet QUERY_ALLOWED_CHARS = new BitSet(256);
66      private static final BitSet ANCHOR_ALLOWED_CHARS = new BitSet(256);
67      private static final BitSet HASH_ALLOWED_CHARS = new BitSet(256);
68  
69      /*
70        URI allowed char initialization; based on HttpClient 3.1's URI bit sets.
71       */
72      static {
73          // make sure the handlers are available first (before calling toUrlSafe())
74          JS_HANDLER = new org.htmlunit.protocol.javascript.Handler();
75          ABOUT_HANDLER = new org.htmlunit.protocol.about.Handler();
76          DATA_HANDLER = new org.htmlunit.protocol.data.Handler();
77  
78          try {
79              URL_ABOUT_BLANK = new URL(null, ABOUT_BLANK, ABOUT_HANDLER);
80          }
81          catch (final MalformedURLException e) {
82              // should never happen
83              throw new RuntimeException(e);
84          }
85  
86          final BitSet reserved = new BitSet(256);
87          reserved.set(';');
88          reserved.set('/');
89          reserved.set('?');
90          reserved.set(':');
91          reserved.set('@');
92          reserved.set('&');
93          reserved.set('=');
94          reserved.set('+');
95          reserved.set('$');
96          reserved.set(',');
97  
98          final BitSet mark = new BitSet(256);
99          mark.set('-');
100         mark.set('_');
101         mark.set('.');
102         mark.set('!');
103         mark.set('~');
104         mark.set('*');
105         mark.set('\'');
106         mark.set('(');
107         mark.set(')');
108 
109         final BitSet alpha = new BitSet(256);
110         for (int i = 'a'; i <= 'z'; i++) {
111             alpha.set(i);
112         }
113         for (int i = 'A'; i <= 'Z'; i++) {
114             alpha.set(i);
115         }
116 
117         final BitSet digit = new BitSet(256);
118         for (int i = '0'; i <= '9'; i++) {
119             digit.set(i);
120         }
121 
122         final BitSet alphanumeric = new BitSet(256);
123         alphanumeric.or(alpha);
124         alphanumeric.or(digit);
125 
126         final BitSet unreserved = new BitSet(256);
127         unreserved.or(alphanumeric);
128         unreserved.or(mark);
129 
130         final BitSet hex = new BitSet(256);
131         hex.or(digit);
132         for (int i = 'a'; i <= 'f'; i++) {
133             hex.set(i);
134         }
135         for (int i = 'A'; i <= 'F'; i++) {
136             hex.set(i);
137         }
138 
139         final BitSet escaped = new BitSet(256);
140         escaped.set('%');
141         escaped.or(hex);
142 
143         final BitSet uric = new BitSet(256);
144         uric.or(reserved);
145         uric.or(unreserved);
146         uric.or(escaped);
147 
148         final BitSet pchar = new BitSet(256);
149         pchar.or(unreserved);
150         pchar.or(escaped);
151         pchar.set(':');
152         pchar.set('@');
153         pchar.set('&');
154         pchar.set('=');
155         pchar.set('+');
156         pchar.set('$');
157         pchar.set(',');
158 
159         final BitSet segment = new BitSet(256);
160         segment.or(pchar);
161         segment.set(';');
162         segment.or(pchar);
163 
164         final BitSet pathSegments = new BitSet(256);
165         pathSegments.set('/');
166         pathSegments.or(segment);
167 
168         final BitSet absPath = new BitSet(256);
169         absPath.set('/');
170         absPath.or(pathSegments);
171 
172         final BitSet allowedAbsPath = new BitSet(256);
173         allowedAbsPath.or(absPath);
174 
175         final BitSet allowedFragment = new BitSet(256);
176         allowedFragment.or(uric);
177 
178         final BitSet allowedQuery = new BitSet(256);
179         allowedQuery.or(uric);
180 
181         final BitSet allowedHash = new BitSet(256);
182         allowedHash.or(uric);
183 
184         PATH_ALLOWED_CHARS.or(allowedAbsPath);
185         QUERY_ALLOWED_CHARS.or(allowedQuery);
186         ANCHOR_ALLOWED_CHARS.or(allowedFragment);
187         HASH_ALLOWED_CHARS.or(allowedHash);
188     }
189 
190     /**
191      * Disallow instantiation of this class.
192      */
193     private UrlUtils() {
194         // Empty.
195     }
196 
197     /**
198      * <p>Constructs a URL instance based on the specified URL string, taking into account the fact that the
199      * specified URL string may represent an <code>"about:..."</code> URL, a <code>"javascript:..."</code> URL, or
200      * a <code>data:...</code> URL.</p>
201      *
202      * <p>The caller should be sure that URL strings passed to this method will parse correctly as URLs, as
203      * this method never expects to have to handle {@link MalformedURLException}s.</p>
204      *
205      * @param url the URL string to convert into a URL instance
206      * @return the constructed URL instance
207      */
208     public static URL toUrlSafe(final String url) {
209         try {
210             return toUrlUnsafe(url);
211         }
212         catch (final MalformedURLException e) {
213             // Should never happen.
214             throw new RuntimeException(e);
215         }
216     }
217 
218     /**
219      * <p>Constructs a URL instance based on the specified URL string, taking into account the fact that the
220      * specified URL string may represent an <code>"about:..."</code> URL, a <code>"javascript:..."</code> URL, or
221      * a <code>data:...</code> URL.</p>
222      *
223      * <p>Unlike {@link #toUrlSafe(String)}, the caller need not be sure that URL strings passed to this
224      * method will parse correctly as URLs.</p>
225      *
226      * @param url the URL string to convert into a URL instance
227      * @return the constructed URL instance
228      * @throws MalformedURLException if the URL string cannot be converted to a URL instance
229      */
230     public static URL toUrlUnsafe(final String url) throws MalformedURLException {
231         WebAssert.notNull("url", url);
232 
233         final String protocol = StringUtils.substringBefore(url, ":").toLowerCase(Locale.ROOT);
234 
235         if (protocol.isEmpty() || UrlUtils.isNormalUrlProtocol(protocol)) {
236             final URL response = new URL(url);
237             if (response.getProtocol().startsWith("http")
238                     && StringUtils.isEmptyOrNull(response.getHost())) {
239                 throw new MalformedURLException("Missing host name in url: " + url);
240             }
241             return response;
242         }
243 
244         if (JavaScriptURLConnection.JAVASCRIPT_PREFIX.equals(protocol + ":")) {
245             return new URL(null, url, JS_HANDLER);
246         }
247 
248         if (ABOUT.equals(protocol)) {
249             if (ABOUT_BLANK.equalsIgnoreCase(url)) {
250                 return URL_ABOUT_BLANK;
251             }
252             return new URL(null, url, ABOUT_HANDLER);
253         }
254 
255         if ("data".equals(protocol)) {
256             return new URL(null, url, DATA_HANDLER);
257         }
258 
259         return new URL(null, url, AnyHandler.INSTANCE);
260     }
261 
262     /**
263      * <p>Encodes illegal characters in the specified URL's path, query string and anchor according to the URL
264      * encoding rules observed in real browsers.</p>
265      *
266      * <p>For example, this method changes
267      * <code>"http://first/?a=b c"</code> to <code>"http://first/?a=b%20c"</code>.</p>
268      *
269      * @param url the URL to encode
270      * @param charset the charset
271      * @return the encoded URL
272      */
273     public static URL encodeUrl(final URL url, final Charset charset) {
274         if (!isNormalUrlProtocol(url.getProtocol())) {
275             return url; // javascript:, about:, data: and anything not supported like foo:
276         }
277 
278         try {
279             String path = url.getPath();
280             if (path != null) {
281                 path = encode(path, PATH_ALLOWED_CHARS, UTF_8);
282             }
283             String query = url.getQuery();
284             if (query != null) {
285                 query = encode(query, QUERY_ALLOWED_CHARS, charset);
286             }
287             String anchor = url.getRef();
288             if (anchor != null) {
289                 anchor = encode(anchor, ANCHOR_ALLOWED_CHARS, UTF_8);
290             }
291             return createNewUrl(url.getProtocol(), url.getUserInfo(), url.getHost(),
292                                 url.getPort(), path, anchor, query);
293         }
294         catch (final MalformedURLException e) {
295             // Impossible... I think.
296             throw new RuntimeException(e);
297         }
298     }
299 
300     /**
301      * Encodes and escapes the specified URI anchor string.
302      *
303      * @param anchor the anchor string to encode and escape
304      * @return the encoded and escaped anchor string
305      */
306     public static String encodeAnchor(final String anchor) {
307         if (anchor == null) {
308             return null;
309         }
310         return encode(anchor, ANCHOR_ALLOWED_CHARS, UTF_8);
311     }
312 
313     /**
314      * Encodes and escapes the specified URI hash string.
315      *
316      * @param hash the anchor string to encode and escape
317      * @return the encoded and escaped hash string
318      */
319     public static String encodeHash(final String hash) {
320         if (hash == null) {
321             return null;
322         }
323         return encode(hash, HASH_ALLOWED_CHARS, UTF_8);
324     }
325 
326     /**
327      * Encodes and escapes the specified URI hash string.
328      *
329      * @param query the query string to encode and escape
330      * @return the encoded and escaped hash string
331      */
332     public static String encodeQuery(final String query) {
333         if (query == null) {
334             return null;
335         }
336         return encode(query, QUERY_ALLOWED_CHARS, UTF_8);
337     }
338 
339     /**
340      * Unescapes and decodes the specified string.
341      *
342      * @param escaped the string to be unescaped and decoded
343      * @return the unescaped and decoded string
344      */
345     public static String decode(final String escaped) {
346         try {
347             final byte[] bytes = escaped.getBytes(US_ASCII);
348             final byte[] bytes2 = decodeUrl(bytes);
349             return new String(bytes2, UTF_8);
350         }
351         catch (final IllegalArgumentException e) {
352             // Should never happen.
353             throw new RuntimeException(e);
354         }
355     }
356 
357     /**
358      * Escapes and encodes the specified string. Based on HttpClient 3.1's <code>URIUtil.encode()</code> method.
359      *
360      * @param unescaped the string to encode
361      * @param allowed allowed characters that shouldn't be escaped
362      * @param charset the charset to use
363      * @return the escaped string
364      */
365     private static String encode(final String unescaped, final BitSet allowed, final Charset charset) {
366         final byte[] bytes = unescaped.getBytes(charset);
367         final byte[] bytes2 = encodeUrl(allowed, bytes);
368         return encodePercentSign(bytes2);
369     }
370 
371     /**
372      * Encodes every occurrence of the escape character '%' in the given input
373      * string that is not followed by two hexadecimal characters.
374      * @param input the input bytes
375      * @return the given input string where every occurrence of <code>%</code> in
376      *         invalid escape sequences has been replace by <code>%25</code>
377      */
378     private static String encodePercentSign(final byte[] input) {
379         if (input == null) {
380             return null;
381         }
382 
383         final StringBuilder result = new StringBuilder(new String(input, US_ASCII));
384         int state = -0;
385         int offset = 0;
386         for (int i = 0; i < input.length; i++) {
387             final byte b = input[i];
388             if (state == 0 && b == '%') {
389                 state = 1;
390             }
391             else if (state == 1 || state == 2) {
392                 if (('0' <= b && b <= '9')
393                         || ('A' <= b && b <= 'F')
394                         || ('a' <= b && b <= 'f')) {
395                     state++;
396                     if (state == 3) {
397                         state = 0;
398                     }
399                 }
400                 else {
401                     final int st = i - state + offset;
402                     result.replace(st, st + 1, "%25");
403                     offset = offset + 2;
404                     state = b == '%' ? 1 : 0;
405                 }
406             }
407         }
408         if (state == 1 || state == 2) {
409             final int st = input.length - state + offset;
410             result.replace(st, st + 1, "%25");
411         }
412         return result.toString();
413     }
414 
415     /**
416      * Creates and returns a new URL using only the protocol and authority from the given one.
417      * @param u the URL on which to base the returned URL
418      * @return a new URL using only the protocol and authority from the given one
419      * @throws MalformedURLException if there is a problem creating the new URL
420      */
421     public static URL getUrlWithoutPathRefQuery(final URL u) throws MalformedURLException {
422         return createNewUrl(u.getProtocol(), u.getAuthority(), null, null, null);
423     }
424 
425     /**
426      * Creates and returns a new URL using only the protocol, authority and path
427      * from the given one.
428      * @param u the URL on which to base the returned URL
429      * @return a new URL using only the protocol and authority from the given one
430      * @throws MalformedURLException if there is a problem creating the new URL
431      */
432     public static URL getUrlWithoutRef(final URL u) throws MalformedURLException {
433         return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), null, u.getQuery());
434     }
435 
436     /**
437      * Creates and returns a new URL identical to the specified URL, except using the specified protocol.
438      * @param u the URL on which to base the returned URL
439      * @param newProtocol the new protocol to use in the returned URL
440      * @return a new URL identical to the specified URL, except using the specified protocol
441      * @throws MalformedURLException if there is a problem creating the new URL
442      */
443     public static URL getUrlWithNewProtocol(final URL u, final String newProtocol) throws MalformedURLException {
444         return createNewUrl(newProtocol, u.getAuthority(), u.getPath(), u.getRef(), u.getQuery());
445     }
446 
447     /**
448      * Creates and returns a new URL identical to the specified URL, except using the specified host.
449      * @param u the URL on which to base the returned URL
450      * @param newHost the new host to use in the returned URL
451      * @return a new URL identical to the specified URL, except using the specified host
452      * @throws MalformedURLException if there is a problem creating the new URL
453      */
454     public static URL getUrlWithNewHost(final URL u, final String newHost)
455         throws MalformedURLException {
456         return createNewUrl(u.getProtocol(), u.getUserInfo(), newHost,
457                             u.getPort(), u.getPath(), u.getRef(), u.getQuery());
458     }
459 
460     /**
461      * Creates and returns a new URL identical to the specified URL, except using the specified host.
462      * @param u the URL on which to base the returned URL
463      * @param newHost the new host to use in the returned URL
464      * @param newPort the new port to use in the returned URL
465      * @return a new URL identical to the specified URL, except using the specified host
466      * @throws MalformedURLException if there is a problem creating the new URL
467      */
468     public static URL getUrlWithNewHostAndPort(final URL u, final String newHost, final int newPort)
469         throws MalformedURLException {
470         return createNewUrl(u.getProtocol(), u.getUserInfo(), newHost, newPort, u.getPath(), u.getRef(), u.getQuery());
471     }
472 
473     /**
474      * Creates and returns a new URL identical to the specified URL, except using the specified port.
475      * @param u the URL on which to base the returned URL
476      * @param newPort the new port to use in the returned URL or -1 to remove it
477      * @return a new URL identical to the specified URL, except using the specified port
478      * @throws MalformedURLException if there is a problem creating the new URL
479      */
480     public static URL getUrlWithNewPort(final URL u, final int newPort) throws MalformedURLException {
481         return createNewUrl(u.getProtocol(), u.getUserInfo(), u.getHost(),
482                             newPort, u.getPath(), u.getRef(), u.getQuery());
483     }
484 
485     /**
486      * Creates and returns a new URL identical to the specified URL, except using the specified path.
487      * @param u the URL on which to base the returned URL
488      * @param newPath the new path to use in the returned URL
489      * @return a new URL identical to the specified URL, except using the specified path
490      * @throws MalformedURLException if there is a problem creating the new URL
491      */
492     public static URL getUrlWithNewPath(final URL u, final String newPath) throws MalformedURLException {
493         return createNewUrl(u.getProtocol(), u.getAuthority(), newPath, u.getRef(), u.getQuery());
494     }
495 
496     /**
497      * Creates and returns a new URL identical to the specified URL, except using the specified reference.
498      * @param u the URL on which to base the returned URL
499      * @param newRef the new reference to use in the returned URL or null to remove it
500      * @return a new URL identical to the specified URL, except using the specified reference
501      * @throws MalformedURLException if there is a problem creating the new URL
502      */
503     public static URL getUrlWithNewRef(final URL u, final String newRef) throws MalformedURLException {
504         return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), newRef, u.getQuery());
505     }
506 
507     /**
508      * Creates and returns a new URL identical to the specified URL, except using the specified query string.
509      * @param u the URL on which to base the returned URL
510      * @param newQuery the new query string to use in the returned URL
511      * @return a new URL identical to the specified URL, except using the specified query string
512      * @throws MalformedURLException if there is a problem creating the new URL
513      */
514     public static URL getUrlWithNewQuery(final URL u, final String newQuery) throws MalformedURLException {
515         return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), u.getRef(), newQuery);
516     }
517 
518     /**
519      * Creates and returns a new URL identical to the specified URL, ignoring path, protocol and query.
520      * @param u the URL on which to base the returned URL
521      * @return a new URL identical to the specified URL, ignoring path, protocol and query
522      * @throws MalformedURLException if there is a problem creating the new URL
523      */
524     public static URL getUrlWithProtocolAndAuthority(final URL u) throws MalformedURLException {
525         return createNewUrl(u.getProtocol(), u.getAuthority(), null, null, null);
526     }
527 
528     /**
529      * Creates and returns a new URL identical to the specified URL but with a changed user name.
530      * @param u the URL on which to base the returned URL
531      * @param newUserName the new user name or null to remove it
532      * @return a new URL identical to the specified URL; only user name updated
533      * @throws MalformedURLException if there is a problem creating the new URL
534      */
535     public static URL getUrlWithNewUserName(final URL u, final String newUserName) throws MalformedURLException {
536         String newUserInfo = newUserName == null ? "" : newUserName;
537         final String userInfo = u.getUserInfo();
538         if (StringUtils.isNotBlank(userInfo)) {
539             final int colonIdx = userInfo.indexOf(':');
540             if (colonIdx > -1) {
541                 newUserInfo = newUserInfo + userInfo.substring(colonIdx);
542             }
543         }
544         return createNewUrl(u.getProtocol(), newUserInfo.isEmpty() ? null : newUserInfo,
545                 u.getHost(), u.getPort(), u.getPath(), u.getRef(), u.getQuery());
546     }
547 
548     /**
549      * Creates and returns a new URL identical to the specified URL but with a changed user password.
550      * @param u the URL on which to base the returned URL
551      * @param newUserPassword the new user password or null to remove it
552      * @return a new URL identical to the specified URL; only user name updated
553      * @throws MalformedURLException if there is a problem creating the new URL
554      */
555     public static URL getUrlWithNewUserPassword(final URL u, final String newUserPassword)
556             throws MalformedURLException {
557         String newUserInfo = newUserPassword == null ? "" : ':' + newUserPassword;
558         final String userInfo = u.getUserInfo();
559         if (StringUtils.isNotBlank(userInfo)) {
560             final int colonIdx = userInfo.indexOf(':');
561             if (colonIdx > -1) {
562                 newUserInfo = userInfo.substring(0, colonIdx) + newUserInfo;
563             }
564             else {
565                 newUserInfo = userInfo + newUserInfo;
566             }
567         }
568         return createNewUrl(u.getProtocol(), newUserInfo.isEmpty() ? null : newUserInfo,
569                 u.getHost(), u.getPort(), u.getPath(), u.getRef(), u.getQuery());
570     }
571 
572     /**
573      * Creates a new URL based on the specified fragments.
574      * @param protocol the protocol to use (may not be {@code null})
575      * @param userInfo the user info to use (may be {@code null})
576      * @param host the host to use (may not be {@code null})
577      * @param port the port to use (may be <code>-1</code> if no port is specified)
578      * @param path the path to use (may be {@code null} and may omit the initial <code>'/'</code>)
579      * @param ref the reference to use (may be {@code null} and must not include the <code>'#'</code>)
580      * @param query the query to use (may be {@code null} and must not include the <code>'?'</code>)
581      * @return a new URL based on the specified fragments
582      * @throws MalformedURLException if there is a problem creating the new URL
583      */
584     private static URL createNewUrl(final String protocol, final String userInfo, final String host, final int port,
585             final String path, final String ref, final String query) throws MalformedURLException {
586         final StringBuilder s = new StringBuilder();
587         s.append(protocol).append("://");
588         if (userInfo != null) {
589             s.append(userInfo).append('@');
590         }
591         s.append(host);
592         if (port != -1) {
593             s.append(':').append(port);
594         }
595         if (path != null && !path.isEmpty()) {
596             if ('/' != path.charAt(0)) {
597                 s.append('/');
598             }
599             s.append(path);
600         }
601         if (query != null) {
602             s.append('?').append(query);
603         }
604         if (ref != null) {
605             if (ref.isEmpty() || ref.charAt(0) != '#') {
606                 s.append('#');
607             }
608             s.append(ref);
609         }
610 
611         return new URL(s.toString());
612     }
613 
614     /**
615      * Creates a new URL based on the specified fragments.
616      * @param protocol the protocol to use (may not be {@code null})
617      * @param authority the authority to use (may not be {@code null})
618      * @param path the path to use (may be {@code null} and may omit the initial <code>'/'</code>)
619      * @param ref the reference to use (may be {@code null} and must not include the <code>'#'</code>)
620      * @param query the query to use (may be {@code null} and must not include the <code>'?'</code>)
621      * @return a new URL based on the specified fragments
622      * @throws MalformedURLException if there is a problem creating the new URL
623      */
624     private static URL createNewUrl(final String protocol, final String authority,
625             final String path, final String ref, final String query) throws MalformedURLException {
626 
627         // pre-compute length of StringBuilder
628         int len = protocol.length() + 1;
629         if (authority != null && !authority.isEmpty()) {
630             len += 2 + authority.length();
631         }
632         if (path != null) {
633             len += path.length();
634         }
635         if (query != null) {
636             len += 1 + query.length();
637         }
638         if (ref != null) {
639             len += 1 + ref.length();
640         }
641 
642         final StringBuilder s = new StringBuilder(len);
643         s.append(protocol).append(':');
644         if (authority != null && !authority.isEmpty()) {
645             s.append("//").append(authority);
646         }
647         if (path != null) {
648             s.append(path);
649         }
650         if (query != null) {
651             s.append('?').append(query);
652         }
653         if (ref != null) {
654             if (ref.isEmpty() || ref.charAt(0) != '#') {
655                 s.append('#');
656             }
657             s.append(ref);
658         }
659 
660         return toUrlSafe(s.toString());
661     }
662 
663     /**
664      * Resolves a given relative URL against a base URL. See
665      * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>
666      * Section 4 for more details.
667      *
668      * @param baseUrl     The base URL in which to resolve the specification.
669      * @param relativeUrl The relative URL to resolve against the base URL.
670      * @return the resolved specification.
671      */
672     public static String resolveUrl(final String baseUrl, final String relativeUrl) {
673         if (baseUrl == null) {
674             throw new IllegalArgumentException("Base URL must not be null");
675         }
676         if (relativeUrl == null) {
677             throw new IllegalArgumentException("Relative URL must not be null");
678         }
679         final Url url = resolveUrl(parseUrl(baseUrl), relativeUrl);
680 
681         return url.toString();
682     }
683 
684     /**
685      * Resolves a given relative URL against a base URL. See
686      * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>
687      * Section 4 for more details.
688      *
689      * @param baseUrl     The base URL in which to resolve the specification.
690      * @param relativeUrl The relative URL to resolve against the base URL.
691      * @return the resolved specification.
692      */
693     public static String resolveUrl(final URL baseUrl, final String relativeUrl) {
694         if (baseUrl == null) {
695             throw new IllegalArgumentException("Base URL must not be null");
696         }
697         return resolveUrl(baseUrl.toExternalForm(), relativeUrl);
698     }
699 
700     /**
701      * Parses a given specification using the algorithm depicted in
702      * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>:
703      * <p>
704      * Section 2.4: Parsing a URL
705      * <p>
706      *   An accepted method for parsing URLs is useful to clarify the
707      *   generic-RL syntax of Section 2.2 and to describe the algorithm for
708      *   resolving relative URLs presented in Section 4. This section
709      *   describes the parsing rules for breaking down a URL (relative or
710      *   absolute) into the component parts described in Section 2.1.  The
711      *   rules assume that the URL has already been separated from any
712      *   surrounding text and copied to a "parse string". The rules are
713      *   listed in the order in which they would be applied by the parser.
714      *
715      * @param spec The specification to parse.
716      * @return the parsed specification.
717      */
718     private static Url parseUrl(String spec) {
719         final Url url = new Url();
720         int startIndex = 0;
721         int endIndex = spec.length();
722 
723         // see https://url.spec.whatwg.org/#concept-basic-url-parser
724         //   * If input contains any leading or trailing C0 control or space, validation error.
725         //     Remove any leading and trailing C0 control or space from input.
726         //   * If input contains any ASCII tab or newline, validation error.
727         //     Remove all ASCII tab or newline from input.
728 
729         if (endIndex > startIndex) {
730             StringBuilder sb = null;
731             boolean before = true;
732             int trailing = 0;
733 
734             for (int i = 0; i < endIndex; i++) {
735                 final char c = spec.charAt(i);
736                 boolean remove = false;
737 
738                 if (c == '\t' | c == '\r' | c == '\n') {
739                     remove = true;
740                 }
741                 else if ('\u0000' <= c && c <= '\u0020') {
742                     if (before) {
743                         remove = true;
744                     }
745                     else {
746                         trailing++;
747                     }
748                 }
749                 else {
750                     before = false;
751                     trailing = 0;
752                 }
753 
754                 if (remove) {
755                     if (sb == null) {
756                         sb = new StringBuilder(spec.substring(0, i));
757                     }
758                 }
759                 else if (sb != null) {
760                     sb.append(c);
761                 }
762             }
763 
764             if (sb == null) {
765                 if (trailing > 0) {
766                     endIndex = spec.length() - trailing;
767                     spec = spec.substring(0, endIndex);
768                 }
769             }
770             else {
771                 if (trailing > 0) {
772                     spec = sb.substring(0, sb.length() - trailing);
773                 }
774                 else {
775                     spec = sb.toString();
776                 }
777                 endIndex = spec.length();
778             }
779         }
780 
781         // Section 2.4.1: Parsing the Fragment Identifier
782         //
783         //   If the parse string contains a crosshatch "#" character, then the
784         //   substring after the first (left-most) crosshatch "#" and up to the
785         //   end of the parse string is the <fragment> identifier. If the
786         //   crosshatch is the last character, or no crosshatch is present, then
787         //   the fragment identifier is empty. The matched substring, including
788         //   the crosshatch character, is removed from the parse string before
789         //   continuing.
790         //
791         //   Note that the fragment identifier is not considered part of the URL.
792         //   However, since it is often attached to the URL, parsers must be able
793         //   to recognize and set aside fragment identifiers as part of the
794         //   process.
795         final int crosshatchIndex = StringUtils.indexOf(spec, '#', startIndex, endIndex);
796 
797         if (crosshatchIndex >= 0) {
798             url.fragment_ = spec.substring(crosshatchIndex + 1, endIndex);
799             endIndex = crosshatchIndex;
800         }
801         // Section 2.4.2: Parsing the Scheme
802         //
803         //   If the parse string contains a colon ":" after the first character
804         //   and before any characters not allowed as part of a scheme name (i.e.,
805         //   any not an alphanumeric, plus "+", period ".", or hyphen "-"), the
806         //   <scheme> of the URL is the substring of characters up to but not
807         //   including the first colon. These characters and the colon are then
808         //   removed from the parse string before continuing.
809         final int colonIndex = StringUtils.indexOf(spec, ':', startIndex, endIndex);
810 
811         if (colonIndex > 0) {
812             final String scheme = spec.substring(startIndex, colonIndex);
813             if (isValidScheme(scheme)) {
814                 url.scheme_ = scheme;
815                 startIndex = colonIndex + 1;
816             }
817         }
818         // Section 2.4.3: Parsing the Network Location/Login
819         //
820         //   If the parse string begins with a double-slash "//", then the
821         //   substring of characters after the double-slash and up to, but not
822         //   including, the next slash "/" character is the network location/login
823         //   (<net_loc>) of the URL. If no trailing slash "/" is present, the
824         //   entire remaining parse string is assigned to <net_loc>. The double-
825         //   slash and <net_loc> are removed from the parse string before
826         //   continuing.
827         //
828         // Note: We also accept a question mark "?" or a semicolon ";" character as
829         //       delimiters for the network location/login (<net_loc>) of the URL.
830         final int locationStartIndex;
831         int locationEndIndex;
832 
833         if (spec.startsWith("//", startIndex)) {
834             locationStartIndex = startIndex + 2;
835             locationEndIndex = StringUtils.indexOf(spec, '/', locationStartIndex, endIndex);
836             if (locationEndIndex >= 0) {
837                 startIndex = locationEndIndex;
838             }
839         }
840         else {
841             locationStartIndex = -1;
842             locationEndIndex = -1;
843         }
844         // Section 2.4.4: Parsing the Query Information
845         //
846         //   If the parse string contains a question mark "?" character, then the
847         //   substring after the first (left-most) question mark "?" and up to the
848         //   end of the parse string is the <query> information. If the question
849         //   mark is the last character, or no question mark is present, then the
850         //   query information is empty. The matched substring, including the
851         //   question mark character, is removed from the parse string before
852         //   continuing.
853         final int questionMarkIndex = StringUtils.indexOf(spec, '?', startIndex, endIndex);
854 
855         if (questionMarkIndex >= 0) {
856             if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
857                 // The substring of characters after the double-slash and up to, but not
858                 // including, the question mark "?" character is the network location/login
859                 // (<net_loc>) of the URL.
860                 locationEndIndex = questionMarkIndex;
861                 startIndex = questionMarkIndex;
862             }
863             url.query_ = spec.substring(questionMarkIndex + 1, endIndex);
864             endIndex = questionMarkIndex;
865         }
866         // Section 2.4.5: Parsing the Parameters
867         //
868         //   If the parse string contains a semicolon ";" character, then the
869         //   substring after the first (left-most) semicolon ";" and up to the end
870         //   of the parse string is the parameters (<params>). If the semicolon
871         //   is the last character, or no semicolon is present, then <params> is
872         //   empty. The matched substring, including the semicolon character, is
873         //   removed from the parse string before continuing.
874         final int semicolonIndex = StringUtils.indexOf(spec, ';', startIndex, endIndex);
875 
876         if (semicolonIndex >= 0) {
877             if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
878                 // The substring of characters after the double-slash and up to, but not
879                 // including, the semicolon ";" character is the network location/login
880                 // (<net_loc>) of the URL.
881                 locationEndIndex = semicolonIndex;
882                 startIndex = semicolonIndex;
883             }
884             url.parameters_ = spec.substring(semicolonIndex + 1, endIndex);
885             endIndex = semicolonIndex;
886         }
887         // Section 2.4.6: Parsing the Path
888         //
889         //   After the above steps, all that is left of the parse string is the
890         //   URL <path> and the slash "/" that may precede it. Even though the
891         //   initial slash is not part of the URL path, the parser must remember
892         //   whether or not it was present so that later processes can
893         //   differentiate between relative and absolute paths. Often this is
894         //   done by simply storing the preceding slash along with the path.
895         if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
896             // The entire remaining parse string is assigned to the network
897             // location/login (<net_loc>) of the URL.
898             locationEndIndex = endIndex;
899         }
900         else if (startIndex < endIndex) {
901             url.path_ = spec.substring(startIndex, endIndex);
902         }
903         // Set the network location/login (<net_loc>) of the URL.
904         if ((locationStartIndex >= 0) && (locationEndIndex >= 0)) {
905             url.location_ = spec.substring(locationStartIndex, locationEndIndex);
906         }
907         return url;
908     }
909 
910     /**
911      * Returns true if specified string is a valid scheme name.
912      * <p>
913      * https://tools.ietf.org/html/rfc1738
914      * <p>
915      * Scheme names consist of a sequence of characters. The lower case
916      * letters "a"--"z", digits, and the characters plus ("+"), period
917      * ("."), and hyphen ("-") are allowed. For resiliency, programs
918      * interpreting URLs should treat upper case letters as equivalent to
919      * lower case in scheme names (e.g., allow "HTTP" as well as "http").
920      *
921      * @param scheme the scheme string to check
922      * @return true if valid
923      */
924     public static boolean isValidScheme(final String scheme) {
925         final int length = scheme.length();
926         if (length < 1) {
927             return false;
928         }
929 
930         char c = scheme.charAt(0);
931         boolean isValid = ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
932         if (!isValid) {
933             return false;
934         }
935 
936         for (int i = 1; i < length; i++) {
937             c = scheme.charAt(i);
938             isValid =
939                     ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
940                     || ('0' <= c && c <= '9')
941                     || c == '+'
942                     || c == '.'
943                     || c == '-';
944             if (!isValid) {
945                 return false;
946             }
947         }
948 
949         return true;
950     }
951 
952     /**
953      * Returns true if specified string is a special scheme.
954      * see <a href='https://url.spec.whatwg.org/#special-scheme'>
955      * https://url.spec.whatwg.org/#special-scheme</a>
956      *
957      * @param scheme the scheme string to check
958      * @return true if special
959      */
960     public static boolean isSpecialScheme(final String scheme) {
961         final int length = scheme.length();
962         if (length < 2 || length > 5) {
963             return false;
964         }
965 
966         final String schemeLC = scheme.toLowerCase(Locale.ROOT);
967         return "ftp".equals(schemeLC)
968                 || "file".equals(schemeLC)
969                 || "http".equals(schemeLC)
970                 || "https".equals(schemeLC)
971                 || "ws".equals(schemeLC)
972                 || "wss".equals(schemeLC);
973     }
974 
975     /**
976      * Resolves a given relative URL against a base URL using the algorithm
977      * depicted in <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>:
978      * <p>
979      * Section 4: Resolving Relative URLs
980      * <p>
981      *   This section describes an example algorithm for resolving URLs within
982      *   a context in which the URLs may be relative, such that the result is
983      *   always a URL in absolute form. Although this algorithm cannot
984      *   guarantee that the resulting URL will equal that intended by the
985      *   original author, it does guarantee that any valid URL (relative or
986      *   absolute) can be consistently transformed to an absolute form given a
987      *   valid base URL.
988      *
989      * @param baseUrl     The base URL in which to resolve the specification.
990      * @param relativeUrl The relative URL to resolve against the base URL.
991      * @return the resolved specification.
992      */
993     private static Url resolveUrl(final Url baseUrl, final String relativeUrl) {
994         final Url url = parseUrl(relativeUrl);
995         // Step 1: The base URL is established according to the rules of
996         //         Section 3.  If the base URL is the empty string (unknown),
997         //         the embedded URL is interpreted as an absolute URL and
998         //         we are done.
999         if (baseUrl == null) {
1000             return url;
1001         }
1002         // Step 2: Both the base and embedded URLs are parsed into their
1003         //         component parts as described in Section 2.4.
1004         //      a) If the embedded URL is entirely empty, it inherits the
1005         //         entire base URL (i.e., is set equal to the base URL)
1006         //         and we are done.
1007         if (relativeUrl.isEmpty()) {
1008             return new Url(baseUrl);
1009         }
1010         //      b) If the embedded URL starts with a scheme name, it is
1011         //         interpreted as an absolute URL and we are done.
1012         if (url.scheme_ != null) {
1013             return url;
1014         }
1015         //      c) Otherwise, the embedded URL inherits the scheme of
1016         //         the base URL.
1017         url.scheme_ = baseUrl.scheme_;
1018         // Step 3: If the embedded URL's <net_loc> is non-empty, we skip to
1019         //         Step 7.  Otherwise, the embedded URL inherits the <net_loc>
1020         //         (if any) of the base URL.
1021         if (url.location_ != null) {
1022             return url;
1023         }
1024         url.location_ = baseUrl.location_;
1025         // Step 4: If the embedded URL path is preceded by a slash "/", the
1026         //         path is not relative and we skip to Step 7.
1027         if (url.path_ != null && !url.path_.isEmpty() && url.path_.charAt(0) == '/') {
1028             url.path_ = removeLeadingSlashPoints(url.path_);
1029             return url;
1030         }
1031         // Step 5: If the embedded URL path is empty (and not preceded by a
1032         //         slash), then the embedded URL inherits the base URL path,
1033         //         and
1034         if (url.path_ == null) {
1035             url.path_ = baseUrl.path_;
1036             //  a) if the embedded URL's <params> is non-empty, we skip to
1037             //     step 7; otherwise, it inherits the <params> of the base
1038             //     URL (if any) and
1039             if (url.parameters_ != null) {
1040                 return url;
1041             }
1042             url.parameters_ = baseUrl.parameters_;
1043             //  b) if the embedded URL's <query> is non-empty, we skip to
1044             //     step 7; otherwise, it inherits the <query> of the base
1045             //     URL (if any) and we skip to step 7.
1046             if (url.query_ != null) {
1047                 return url;
1048             }
1049             url.query_ = baseUrl.query_;
1050             return url;
1051         }
1052         // Step 6: The last segment of the base URL's path (anything
1053         //         following the rightmost slash "/", or the entire path if no
1054         //         slash is present) is removed and the embedded URL's path is
1055         //         appended in its place.  The following operations are
1056         //         then applied, in order, to the new path:
1057         final String basePath = baseUrl.path_;
1058         String path = "";
1059 
1060         if (basePath == null) {
1061             path = "/";
1062         }
1063         else {
1064             final int lastSlashIndex = basePath.lastIndexOf('/');
1065 
1066             if (lastSlashIndex >= 0) {
1067                 path = basePath.substring(0, lastSlashIndex + 1);
1068             }
1069         }
1070 
1071         path = path.concat(url.path_);
1072         //      a) All occurrences of "./", where "." is a complete path
1073         //         segment, are removed.
1074         int pathSegmentIndex;
1075 
1076         while ((pathSegmentIndex = path.indexOf("/./")) >= 0) {
1077             path = path.substring(0, pathSegmentIndex + 1).concat(path.substring(pathSegmentIndex + 3));
1078         }
1079         //      b) If the path ends with "." as a complete path segment,
1080         //         that "." is removed.
1081         if (path.endsWith("/.")) {
1082             path = path.substring(0, path.length() - 1);
1083         }
1084         //      c) All occurrences of "<segment>/../", where <segment> is a
1085         //         complete path segment not equal to "..", are removed.
1086         //         Removal of these path segments is performed iteratively,
1087         //         removing the leftmost matching pattern on each iteration,
1088         //         until no matching pattern remains.
1089         while ((pathSegmentIndex = path.indexOf("/../")) > 0) {
1090             final String pathSegment = path.substring(0, pathSegmentIndex);
1091             final int slashIndex = pathSegment.lastIndexOf('/');
1092 
1093             if (slashIndex >= 0) {
1094                 if (!"..".equals(pathSegment.substring(slashIndex))) {
1095                     path = path.substring(0, slashIndex + 1).concat(path.substring(pathSegmentIndex + 4));
1096                 }
1097             }
1098             else {
1099                 path = path.substring(pathSegmentIndex + 4);
1100             }
1101         }
1102         //      d) If the path ends with "<segment>/..", where <segment> is a
1103         //         complete path segment not equal to "..", that
1104         //         "<segment>/.." is removed.
1105         if (path.endsWith("/..")) {
1106             final String pathSegment = path.substring(0, path.length() - 3);
1107             final int slashIndex = pathSegment.lastIndexOf('/');
1108 
1109             if (slashIndex >= 0) {
1110                 path = path.substring(0, slashIndex + 1);
1111             }
1112         }
1113 
1114         path = removeLeadingSlashPoints(path);
1115 
1116         url.path_ = path;
1117         // Step 7: The resulting URL components, including any inherited from
1118         //         the base URL, are recombined to give the absolute form of
1119         //         the embedded URL.
1120         return url;
1121     }
1122 
1123     /**
1124      * "../" after the leading "/" should be removed as browsers do (not in RFC)
1125      */
1126     private static String removeLeadingSlashPoints(final String path) {
1127         int i = 1;
1128         while (path.startsWith("../", i)) {
1129             i = i + 3;
1130         }
1131 
1132         if (i > 1) {
1133             return "/" + path.substring(i);
1134         }
1135 
1136         return path;
1137     }
1138 
1139     /**
1140      * Class <code>Url</code> represents a Uniform Resource Locator.
1141      */
1142     private static class Url {
1143 
1144         private String scheme_;
1145         private String location_;
1146         private String path_;
1147         private String parameters_;
1148         private String query_;
1149         private String fragment_;
1150 
1151         /**
1152          * Creates a <code>Url</code> object.
1153          */
1154         Url() {
1155             super();
1156         }
1157 
1158         /**
1159          * Creates a <code>Url</code> object from the specified
1160          * <code>Url</code> object.
1161          *
1162          * @param url a <code>Url</code> object.
1163          */
1164         Url(final Url url) {
1165             scheme_ = url.scheme_;
1166             location_ = url.location_;
1167             path_ = url.path_;
1168             parameters_ = url.parameters_;
1169             query_ = url.query_;
1170             fragment_ = url.fragment_;
1171         }
1172 
1173         /**
1174          * Returns a string representation of the <code>Url</code> object.
1175          *
1176          * @return a string representation of the <code>Url</code> object.
1177          */
1178         @Override
1179         public String toString() {
1180             final StringBuilder sb = new StringBuilder();
1181 
1182             if (scheme_ != null) {
1183                 sb.append(scheme_).append(':');
1184             }
1185             if (location_ != null) {
1186                 sb.append("//").append(location_);
1187             }
1188             if (path_ != null) {
1189                 sb.append(path_);
1190             }
1191             if (parameters_ != null) {
1192                 sb.append(';').append(parameters_);
1193             }
1194             if (query_ != null) {
1195                 sb.append('?').append(query_);
1196             }
1197             if (fragment_ != null) {
1198                 sb.append('#').append(fragment_);
1199             }
1200             return sb.toString();
1201         }
1202     }
1203 
1204     static boolean isNormalUrlProtocol(final String protocol) {
1205         return "http".equals(protocol) || "https".equals(protocol) || "file".equals(protocol);
1206     }
1207 
1208     /**
1209      * More or less the same as sameFile(URL, URL) but without
1210      * resolving the host to an IP address for comparing.
1211      * Additionally we do some path normalization.
1212      *
1213      * @param u1 a URL object
1214      * @param u2 a URL object
1215      * @return true if u1 and u2 refer to the same file
1216      */
1217     public static boolean sameFile(final URL u1, final URL u2) {
1218         if (u1 == u2) {
1219             return true;
1220         }
1221         if (u1 == null || u2 == null) {
1222             return false;
1223         }
1224 
1225         // Compare the protocols.
1226         final String p1 = u1.getProtocol();
1227         final String p2 = u2.getProtocol();
1228         if (!(p1 == p2 || (p1 != null && p1.equalsIgnoreCase(p2)))) {
1229             return false;
1230         }
1231 
1232         // Compare the ports.
1233         final int port1 = (u1.getPort() == -1) ? u1.getDefaultPort() : u1.getPort();
1234         final int port2 = (u2.getPort() == -1) ? u2.getDefaultPort() : u2.getPort();
1235         if (port1 != port2) {
1236             return false;
1237         }
1238 
1239         // Compare the hosts.
1240         final String h1 = u1.getHost();
1241         final String h2 = u2.getHost();
1242         if (!(h1 == h2 || (h1 != null && h1.equalsIgnoreCase(h2)))) {
1243             return false;
1244         }
1245 
1246         // Compare the files.
1247         String f1 = u1.getFile();
1248         if (f1.isEmpty()) {
1249             f1 = "/";
1250         }
1251         String f2 = u2.getFile();
1252         if (f2.isEmpty()) {
1253             f2 = "/";
1254         }
1255         if (f1.indexOf('.') > 0 || f2.indexOf('.') > 0) {
1256             try {
1257                 f1 = u1.toURI().normalize().toURL().getFile();
1258                 f2 = u2.toURI().normalize().toURL().getFile();
1259             }
1260             catch (final RuntimeException e) {
1261                 throw e;
1262             }
1263             catch (final Exception ignored) {
1264                 // ignore
1265             }
1266         }
1267 
1268         return Objects.equals(f1, f2);
1269     }
1270 
1271     /**
1272      * Helper that constructs a normalized url string
1273      * usable as cache key.
1274      *
1275      * @param url a URL object
1276      * @return the normalized string
1277      */
1278     public static String normalize(final URL url) {
1279         final StringBuilder result = new StringBuilder();
1280         result.append(url.getProtocol())
1281                 .append("://")
1282                 .append(url.getHost())
1283                 .append(':')
1284                 .append((url.getPort() == -1) ? url.getDefaultPort() : url.getPort());
1285 
1286         // Compare the files.
1287         String f = url.getFile();
1288         if (f.isEmpty()) {
1289             result.append('/');
1290         }
1291         else {
1292             if (f.indexOf('.') > 0) {
1293                 try {
1294                     f = url.toURI().normalize().toURL().getFile();
1295                 }
1296                 catch (final Exception ignored) {
1297                     // ignore
1298                 }
1299             }
1300             result.append(f);
1301         }
1302 
1303         return result.toString();
1304     }
1305 
1306     /**
1307      * Constructs a {@link URI} using the specified URL.
1308      *
1309      * @param url the URL
1310      * @param query the query
1311      *
1312      * @throws URISyntaxException
1313      *         If both a scheme and a path are given but the path is
1314      *         relative, if the URI string constructed from the given
1315      *         components violates RFC&nbsp;2396, or if the authority
1316      *         component of the string is present but cannot be parsed
1317      *         as a server-based authority
1318      * @return the URI
1319      */
1320     public static URI toURI(final URL url, final String query) throws URISyntaxException {
1321         final String scheme = url.getProtocol();
1322         final String host = url.getHost();
1323         final int port = url.getPort();
1324         final String path = url.getPath();
1325         final StringBuilder buffer = new StringBuilder();
1326         if (host != null) {
1327             if (scheme != null) {
1328                 buffer.append(scheme).append("://");
1329             }
1330             buffer.append(host);
1331             if (port > 0) {
1332                 buffer.append(':').append(port);
1333             }
1334         }
1335         if (path == null || path.isEmpty() || path.charAt(0) != '/') {
1336             buffer.append('/');
1337         }
1338         if (path != null) {
1339             buffer.append(path);
1340         }
1341         if (query != null) {
1342             buffer.append('?').append(query);
1343         }
1344         return new URI(buffer.toString());
1345     }
1346 
1347     /**
1348      * @param part the part to encode
1349      * @return the ecoded string
1350      */
1351     public static String encodeQueryPart(final String part) {
1352         if (part == null || part.isEmpty()) {
1353             return "";
1354         }
1355 
1356         return URLEncoder.encode(part, UTF_8);
1357     }
1358 
1359     /**
1360      * Removes the well known ports if it can be deduced from protocol.
1361      * @param url the url to clean up
1362      * @return a new URL without the port or the given one
1363      * @throws MalformedURLException if the URL string cannot be converted to a URL instance
1364      */
1365     public static URL removeRedundantPort(final URL url) throws MalformedURLException {
1366         if (("https".equals(url.getProtocol()) && url.getPort() == 443)
1367                 || ("http".equals(url.getProtocol()) && url.getPort() == 80)) {
1368             return getUrlWithNewPort(url, -1);
1369         }
1370         return url;
1371     }
1372 
1373     /**
1374      * Decodes an array of URL safe 7-bit characters into an array of original bytes.
1375      * Escaped characters are converted back to their original representation.
1376      * @param bytes array of URL safe characters
1377      * @param removeWhitespace if true don't add whitespace chars to the output
1378      * @return array of original bytes
1379      * @throws IllegalArgumentException in case of error
1380      */
1381     public static byte[] decodeDataUrl(final byte[] bytes, final boolean removeWhitespace)
1382                             throws IllegalArgumentException  {
1383         // adapted from apache commons codec
1384         if (bytes == null) {
1385             return null;
1386         }
1387         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1388         for (int i = 0; i < bytes.length; i++) {
1389             int b = bytes[i];
1390             if (b == '%') {
1391                 try {
1392                     final int u = digit16(bytes[++i]);
1393                     final int l = digit16(bytes[++i]);
1394                     b = (u << 4) + l;
1395                 }
1396                 catch (final ArrayIndexOutOfBoundsException e) {
1397                     throw new IllegalArgumentException("Invalid URL encoding: ", e);
1398                 }
1399             }
1400             if (removeWhitespace
1401                     && (b == 9 || b == 10 || b == 12 || b == 13 || b == 32)) {
1402                 continue;
1403             }
1404 
1405             buffer.write(b);
1406         }
1407         return buffer.toByteArray();
1408     }
1409 
1410     /**
1411      * Decodes an array of URL safe 7-bit characters into an array of original bytes.
1412      * Escaped characters are converted back to their original representation.
1413      * @param bytes array of URL safe characters
1414      * @return array of original bytes
1415      * @throws IllegalArgumentException in case of error
1416      */
1417     public static byte[] decodeUrl(final byte[] bytes) throws IllegalArgumentException {
1418         // adapted from apache commons codec
1419         if (bytes == null) {
1420             return null;
1421         }
1422         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1423         for (int i = 0; i < bytes.length; i++) {
1424             final int b = bytes[i];
1425             if (b == '+') {
1426                 buffer.write(' ');
1427             }
1428             else if (b == '%') {
1429                 try {
1430                     final int u = digit16(bytes[++i]);
1431                     final int l = digit16(bytes[++i]);
1432                     buffer.write((char) ((u << 4) + l));
1433                 }
1434                 catch (final ArrayIndexOutOfBoundsException e) {
1435                     throw new IllegalArgumentException("Invalid URL encoding: ", e);
1436                 }
1437             }
1438             else {
1439                 buffer.write(b);
1440             }
1441         }
1442         return buffer.toByteArray();
1443     }
1444 
1445     private static int digit16(final byte b) throws IllegalArgumentException  {
1446         final int i = Character.digit((char) b, 16);
1447         if (i == -1) {
1448             throw new IllegalArgumentException("Invalid URL encoding: not a valid digit (radix 16): " + b);
1449         }
1450         return i;
1451     }
1452 
1453     /**
1454      * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
1455      * @param urlsafe bitset of characters deemed URL safe
1456      * @param bytes  array of bytes to convert to URL safe characters
1457      * @return array of bytes containing URL safe characters
1458      */
1459     public static byte[] encodeUrl(final BitSet urlsafe, final byte[] bytes) {
1460         // adapted from apache commons codec
1461         if (bytes == null) {
1462             return null;
1463         }
1464 
1465         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1466         for (final byte c : bytes) {
1467             int b = c;
1468             if (b < 0) {
1469                 b = 256 + b;
1470             }
1471             if (urlsafe.get(b)) {
1472                 if (b == ' ') {
1473                     b = '+';
1474                 }
1475                 buffer.write(b);
1476             }
1477             else {
1478                 buffer.write('%');
1479                 final char hex1 = hexDigit(b >> 4);
1480                 final char hex2 = hexDigit(b);
1481                 buffer.write(hex1);
1482                 buffer.write(hex2);
1483             }
1484         }
1485         return buffer.toByteArray();
1486     }
1487 
1488     private static char hexDigit(final int b) {
1489         return Character.toUpperCase(Character.forDigit(b & 0xF, 16));
1490     }
1491 
1492     /**
1493      * Determines whether two URLs share the same origin according to the Same-Origin Policy.
1494      * Two URLs are considered to have the same origin if they have the same protocol (scheme),
1495      * host, and port.
1496      *
1497      * <p>The method handles default ports correctly by using the URL's default port when
1498      * the explicit port is -1 (indicating no port was specified).
1499      *
1500      * @param originUrl the first URL to compare (must not be null)
1501      * @param newUrl the second URL to compare (must not be null)
1502      * @return {@code true} if both URLs have the same host and effective port; {@code false} otherwise
1503      */
1504     public static boolean isSameOrigin(final URL originUrl, final URL newUrl) {
1505         if (!originUrl.getProtocol().equals(newUrl.getProtocol())) {
1506             return false;
1507         }
1508 
1509         if (!originUrl.getHost().equalsIgnoreCase(newUrl.getHost())) {
1510             return false;
1511         }
1512 
1513         int originPort = originUrl.getPort();
1514         if (originPort == -1) {
1515             originPort = originUrl.getDefaultPort();
1516         }
1517         int newPort = newUrl.getPort();
1518         if (newPort == -1) {
1519             newPort = newUrl.getDefaultPort();
1520         }
1521         return originPort == newPort;
1522     }
1523 }