View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.util;
16  
17  import static java.nio.charset.StandardCharsets.US_ASCII;
18  import static java.nio.charset.StandardCharsets.UTF_8;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.UnsupportedEncodingException;
22  import java.net.MalformedURLException;
23  import java.net.URI;
24  import java.net.URISyntaxException;
25  import java.net.URL;
26  import java.net.URLEncoder;
27  import java.net.URLStreamHandler;
28  import java.nio.charset.Charset;
29  import java.util.BitSet;
30  import java.util.Locale;
31  import java.util.Objects;
32  
33  import org.htmlunit.WebAssert;
34  import org.htmlunit.protocol.AnyHandler;
35  import org.htmlunit.protocol.javascript.JavaScriptURLConnection;
36  
37  /**
38   * URL utilities class that makes it easy to create new URLs based off of old URLs
39   * without having to assemble or parse them yourself.
40   *
41   * @author Daniel Gredler
42   * @author Martin Tamme
43   * @author Sudhan Moghe
44   * @author Marc Guillemot
45   * @author Ahmed Ashour
46   * @author Ronald Brill
47   * @author Joerg Werner
48   * @author Hartmut Arlt
49   */
50  public final class UrlUtils {
51  
52      /** "about". */
53      public static final String ABOUT = "about";
54      /** "about:". */
55      public static final String ABOUT_SCHEME = ABOUT + ":";
56      /** "about:blank". */
57      public static final String ABOUT_BLANK = ABOUT_SCHEME + "blank";
58      /** URL for "about:blank". */
59      public static final URL URL_ABOUT_BLANK;
60  
61      private static final URLStreamHandler JS_HANDLER;
62      private static final URLStreamHandler ABOUT_HANDLER;
63      private static final URLStreamHandler DATA_HANDLER;
64  
65      private static final BitSet PATH_ALLOWED_CHARS = new BitSet(256);
66      private static final BitSet QUERY_ALLOWED_CHARS = new BitSet(256);
67      private static final BitSet ANCHOR_ALLOWED_CHARS = new BitSet(256);
68      private static final BitSet HASH_ALLOWED_CHARS = new BitSet(256);
69  
70      /*
71        URI allowed char initialization; based on HttpClient 3.1's URI bit sets.
72       */
73      static {
74          // make sure the handlers are available first (before calling toUrlSafe())
75          JS_HANDLER = new org.htmlunit.protocol.javascript.Handler();
76          ABOUT_HANDLER = new org.htmlunit.protocol.about.Handler();
77          DATA_HANDLER = new org.htmlunit.protocol.data.Handler();
78  
79          try {
80              URL_ABOUT_BLANK = new URL(null, ABOUT_BLANK, ABOUT_HANDLER);
81          }
82          catch (final MalformedURLException e) {
83              // should never happen
84              throw new RuntimeException(e);
85          }
86  
87          final BitSet reserved = new BitSet(256);
88          reserved.set(';');
89          reserved.set('/');
90          reserved.set('?');
91          reserved.set(':');
92          reserved.set('@');
93          reserved.set('&');
94          reserved.set('=');
95          reserved.set('+');
96          reserved.set('$');
97          reserved.set(',');
98  
99          final BitSet mark = new BitSet(256);
100         mark.set('-');
101         mark.set('_');
102         mark.set('.');
103         mark.set('!');
104         mark.set('~');
105         mark.set('*');
106         mark.set('\'');
107         mark.set('(');
108         mark.set(')');
109 
110         final BitSet alpha = new BitSet(256);
111         for (int i = 'a'; i <= 'z'; i++) {
112             alpha.set(i);
113         }
114         for (int i = 'A'; i <= 'Z'; i++) {
115             alpha.set(i);
116         }
117 
118         final BitSet digit = new BitSet(256);
119         for (int i = '0'; i <= '9'; i++) {
120             digit.set(i);
121         }
122 
123         final BitSet alphanumeric = new BitSet(256);
124         alphanumeric.or(alpha);
125         alphanumeric.or(digit);
126 
127         final BitSet unreserved = new BitSet(256);
128         unreserved.or(alphanumeric);
129         unreserved.or(mark);
130 
131         final BitSet hex = new BitSet(256);
132         hex.or(digit);
133         for (int i = 'a'; i <= 'f'; i++) {
134             hex.set(i);
135         }
136         for (int i = 'A'; i <= 'F'; i++) {
137             hex.set(i);
138         }
139 
140         final BitSet escaped = new BitSet(256);
141         escaped.set('%');
142         escaped.or(hex);
143 
144         final BitSet uric = new BitSet(256);
145         uric.or(reserved);
146         uric.or(unreserved);
147         uric.or(escaped);
148 
149         final BitSet pchar = new BitSet(256);
150         pchar.or(unreserved);
151         pchar.or(escaped);
152         pchar.set(':');
153         pchar.set('@');
154         pchar.set('&');
155         pchar.set('=');
156         pchar.set('+');
157         pchar.set('$');
158         pchar.set(',');
159 
160         final BitSet segment = new BitSet(256);
161         segment.or(pchar);
162         segment.set(';');
163         segment.or(pchar);
164 
165         final BitSet pathSegments = new BitSet(256);
166         pathSegments.set('/');
167         pathSegments.or(segment);
168 
169         final BitSet absPath = new BitSet(256);
170         absPath.set('/');
171         absPath.or(pathSegments);
172 
173         final BitSet allowedAbsPath = new BitSet(256);
174         allowedAbsPath.or(absPath);
175 
176         final BitSet allowedFragment = new BitSet(256);
177         allowedFragment.or(uric);
178 
179         final BitSet allowedQuery = new BitSet(256);
180         allowedQuery.or(uric);
181 
182         final BitSet allowedHash = new BitSet(256);
183         allowedHash.or(uric);
184 
185         PATH_ALLOWED_CHARS.or(allowedAbsPath);
186         QUERY_ALLOWED_CHARS.or(allowedQuery);
187         ANCHOR_ALLOWED_CHARS.or(allowedFragment);
188         HASH_ALLOWED_CHARS.or(allowedHash);
189     }
190 
191     /**
192      * Disallow instantiation of this class.
193      */
194     private UrlUtils() {
195         // Empty.
196     }
197 
198     /**
199      * <p>Constructs a URL instance based on the specified URL string, taking into account the fact that the
200      * specified URL string may represent an <code>"about:..."</code> URL, a <code>"javascript:..."</code> URL, or
201      * a <code>data:...</code> URL.</p>
202      *
203      * <p>The caller should be sure that URL strings passed to this method will parse correctly as URLs, as
204      * this method never expects to have to handle {@link MalformedURLException}s.</p>
205      *
206      * @param url the URL string to convert into a URL instance
207      * @return the constructed URL instance
208      */
209     public static URL toUrlSafe(final String url) {
210         try {
211             return toUrlUnsafe(url);
212         }
213         catch (final MalformedURLException e) {
214             // Should never happen.
215             throw new RuntimeException(e);
216         }
217     }
218 
219     /**
220      * <p>Constructs a URL instance based on the specified URL string, taking into account the fact that the
221      * specified URL string may represent an <code>"about:..."</code> URL, a <code>"javascript:..."</code> URL, or
222      * a <code>data:...</code> URL.</p>
223      *
224      * <p>Unlike {@link #toUrlSafe(String)}, the caller need not be sure that URL strings passed to this
225      * method will parse correctly as URLs.</p>
226      *
227      * @param url the URL string to convert into a URL instance
228      * @return the constructed URL instance
229      * @throws MalformedURLException if the URL string cannot be converted to a URL instance
230      */
231     public static URL toUrlUnsafe(final String url) throws MalformedURLException {
232         WebAssert.notNull("url", url);
233 
234         final String protocol = org.apache.commons.lang3.StringUtils.substringBefore(url, ":").toLowerCase(Locale.ROOT);
235 
236         if (protocol.isEmpty() || UrlUtils.isNormalUrlProtocol(protocol)) {
237             final URL response = new URL(url);
238             if (response.getProtocol().startsWith("http")
239                     && org.apache.commons.lang3.StringUtils.isEmpty(response.getHost())) {
240                 throw new MalformedURLException("Missing host name in url: " + url);
241             }
242             return response;
243         }
244 
245         if (JavaScriptURLConnection.JAVASCRIPT_PREFIX.equals(protocol + ":")) {
246             return new URL(null, url, JS_HANDLER);
247         }
248 
249         if (ABOUT.equals(protocol)) {
250             if (org.apache.commons.lang3.StringUtils.equalsIgnoreCase(ABOUT_BLANK, url)) {
251                 return URL_ABOUT_BLANK;
252             }
253             return new URL(null, url, ABOUT_HANDLER);
254         }
255 
256         if ("data".equals(protocol)) {
257             return new URL(null, url, DATA_HANDLER);
258         }
259 
260         return new URL(null, url, AnyHandler.INSTANCE);
261     }
262 
263     /**
264      * <p>Encodes illegal characters in the specified URL's path, query string and anchor according to the URL
265      * encoding rules observed in real browsers.</p>
266      *
267      * <p>For example, this method changes
268      * <code>"http://first/?a=b c"</code> to <code>"http://first/?a=b%20c"</code>.</p>
269      *
270      * @param url the URL to encode
271      * @param charset the charset
272      * @return the encoded URL
273      */
274     public static URL encodeUrl(final URL url, final Charset charset) {
275         if (!isNormalUrlProtocol(url.getProtocol())) {
276             return url; // javascript:, about:, data: and anything not supported like foo:
277         }
278 
279         try {
280             String path = url.getPath();
281             if (path != null) {
282                 path = encode(path, PATH_ALLOWED_CHARS, UTF_8);
283             }
284             String query = url.getQuery();
285             if (query != null) {
286                 query = encode(query, QUERY_ALLOWED_CHARS, charset);
287             }
288             String anchor = url.getRef();
289             if (anchor != null) {
290                 anchor = encode(anchor, ANCHOR_ALLOWED_CHARS, UTF_8);
291             }
292             return createNewUrl(url.getProtocol(), url.getUserInfo(), url.getHost(),
293                                 url.getPort(), path, anchor, query);
294         }
295         catch (final MalformedURLException e) {
296             // Impossible... I think.
297             throw new RuntimeException(e);
298         }
299     }
300 
301     /**
302      * Encodes and escapes the specified URI anchor string.
303      *
304      * @param anchor the anchor string to encode and escape
305      * @return the encoded and escaped anchor string
306      */
307     public static String encodeAnchor(final String anchor) {
308         if (anchor == null) {
309             return null;
310         }
311         return encode(anchor, ANCHOR_ALLOWED_CHARS, UTF_8);
312     }
313 
314     /**
315      * Encodes and escapes the specified URI hash string.
316      *
317      * @param hash the anchor string to encode and escape
318      * @return the encoded and escaped hash string
319      */
320     public static String encodeHash(final String hash) {
321         if (hash == null) {
322             return null;
323         }
324         return encode(hash, HASH_ALLOWED_CHARS, UTF_8);
325     }
326 
327     /**
328      * Encodes and escapes the specified URI hash string.
329      *
330      * @param query the query string to encode and escape
331      * @return the encoded and escaped hash string
332      */
333     public static String encodeQuery(final String query) {
334         if (query == null) {
335             return null;
336         }
337         return encode(query, QUERY_ALLOWED_CHARS, UTF_8);
338     }
339 
340     /**
341      * Unescapes and decodes the specified string.
342      *
343      * @param escaped the string to be unescaped and decoded
344      * @return the unescaped and decoded string
345      */
346     public static String decode(final String escaped) {
347         try {
348             final byte[] bytes = escaped.getBytes(US_ASCII);
349             final byte[] bytes2 = decodeUrl(bytes);
350             return new String(bytes2, UTF_8);
351         }
352         catch (final IllegalArgumentException e) {
353             // Should never happen.
354             throw new RuntimeException(e);
355         }
356     }
357 
358     /**
359      * Escapes and encodes the specified string. Based on HttpClient 3.1's <code>URIUtil.encode()</code> method.
360      *
361      * @param unescaped the string to encode
362      * @param allowed allowed characters that shouldn't be escaped
363      * @param charset the charset to use
364      * @return the escaped string
365      */
366     private static String encode(final String unescaped, final BitSet allowed, final Charset charset) {
367         final byte[] bytes = unescaped.getBytes(charset);
368         final byte[] bytes2 = encodeUrl(allowed, bytes);
369         return encodePercentSign(bytes2);
370     }
371 
372     /**
373      * Encodes every occurrence of the escape character '%' in the given input
374      * string that is not followed by two hexadecimal characters.
375      * @param input the input bytes
376      * @return the given input string where every occurrence of <code>%</code> in
377      *         invalid escape sequences has been replace by <code>%25</code>
378      */
379     private static String encodePercentSign(final byte[] input) {
380         if (input == null) {
381             return null;
382         }
383 
384         final StringBuilder result = new StringBuilder(new String(input, US_ASCII));
385         int state = -0;
386         int offset = 0;
387         for (int i = 0; i < input.length; i++) {
388             final byte b = input[i];
389             if (state == 0 && b == '%') {
390                 state = 1;
391             }
392             else if (state == 1 || state == 2) {
393                 if (('0' <= b && b <= '9')
394                         || ('A' <= b && b <= 'F')
395                         || ('a' <= b && b <= 'f')) {
396                     state++;
397                     if (state == 3) {
398                         state = 0;
399                     }
400                 }
401                 else {
402                     final int st = i - state + offset;
403                     result.replace(st, st + 1, "%25");
404                     offset = offset + 2;
405                     state = b == '%' ? 1 : 0;
406                 }
407             }
408         }
409         if (state == 1 || state == 2) {
410             final int st = input.length - state + offset;
411             result.replace(st, st + 1, "%25");
412         }
413         return result.toString();
414     }
415 
416     /**
417      * Creates and returns a new URL using only the protocol and authority from the given one.
418      * @param u the URL on which to base the returned URL
419      * @return a new URL using only the protocol and authority from the given one
420      * @throws MalformedURLException if there is a problem creating the new URL
421      */
422     public static URL getUrlWithoutPathRefQuery(final URL u) throws MalformedURLException {
423         return createNewUrl(u.getProtocol(), u.getAuthority(), null, null, null);
424     }
425 
426     /**
427      * Creates and returns a new URL using only the protocol, authority and path
428      * from the given one.
429      * @param u the URL on which to base the returned URL
430      * @return a new URL using only the protocol and authority from the given one
431      * @throws MalformedURLException if there is a problem creating the new URL
432      */
433     public static URL getUrlWithoutRef(final URL u) throws MalformedURLException {
434         return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), null, u.getQuery());
435     }
436 
437     /**
438      * Creates and returns a new URL identical to the specified URL, except using the specified protocol.
439      * @param u the URL on which to base the returned URL
440      * @param newProtocol the new protocol to use in the returned URL
441      * @return a new URL identical to the specified URL, except using the specified protocol
442      * @throws MalformedURLException if there is a problem creating the new URL
443      */
444     public static URL getUrlWithNewProtocol(final URL u, final String newProtocol) throws MalformedURLException {
445         return createNewUrl(newProtocol, u.getAuthority(), u.getPath(), u.getRef(), u.getQuery());
446     }
447 
448     /**
449      * Creates and returns a new URL identical to the specified URL, except using the specified host.
450      * @param u the URL on which to base the returned URL
451      * @param newHost the new host to use in the returned URL
452      * @return a new URL identical to the specified URL, except using the specified host
453      * @throws MalformedURLException if there is a problem creating the new URL
454      */
455     public static URL getUrlWithNewHost(final URL u, final String newHost)
456         throws MalformedURLException {
457         return createNewUrl(u.getProtocol(), u.getUserInfo(), newHost,
458                             u.getPort(), u.getPath(), u.getRef(), u.getQuery());
459     }
460 
461     /**
462      * Creates and returns a new URL identical to the specified URL, except using the specified host.
463      * @param u the URL on which to base the returned URL
464      * @param newHost the new host to use in the returned URL
465      * @param newPort the new port to use in the returned URL
466      * @return a new URL identical to the specified URL, except using the specified host
467      * @throws MalformedURLException if there is a problem creating the new URL
468      */
469     public static URL getUrlWithNewHostAndPort(final URL u, final String newHost, final int newPort)
470         throws MalformedURLException {
471         return createNewUrl(u.getProtocol(), u.getUserInfo(), newHost, newPort, u.getPath(), u.getRef(), u.getQuery());
472     }
473 
474     /**
475      * Creates and returns a new URL identical to the specified URL, except using the specified port.
476      * @param u the URL on which to base the returned URL
477      * @param newPort the new port to use in the returned URL or -1 to remove it
478      * @return a new URL identical to the specified URL, except using the specified port
479      * @throws MalformedURLException if there is a problem creating the new URL
480      */
481     public static URL getUrlWithNewPort(final URL u, final int newPort) throws MalformedURLException {
482         return createNewUrl(u.getProtocol(), u.getUserInfo(), u.getHost(),
483                             newPort, u.getPath(), u.getRef(), u.getQuery());
484     }
485 
486     /**
487      * Creates and returns a new URL identical to the specified URL, except using the specified path.
488      * @param u the URL on which to base the returned URL
489      * @param newPath the new path to use in the returned URL
490      * @return a new URL identical to the specified URL, except using the specified path
491      * @throws MalformedURLException if there is a problem creating the new URL
492      */
493     public static URL getUrlWithNewPath(final URL u, final String newPath) throws MalformedURLException {
494         return createNewUrl(u.getProtocol(), u.getAuthority(), newPath, u.getRef(), u.getQuery());
495     }
496 
497     /**
498      * Creates and returns a new URL identical to the specified URL, except using the specified reference.
499      * @param u the URL on which to base the returned URL
500      * @param newRef the new reference to use in the returned URL or null to remove it
501      * @return a new URL identical to the specified URL, except using the specified reference
502      * @throws MalformedURLException if there is a problem creating the new URL
503      */
504     public static URL getUrlWithNewRef(final URL u, final String newRef) throws MalformedURLException {
505         return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), newRef, u.getQuery());
506     }
507 
508     /**
509      * Creates and returns a new URL identical to the specified URL, except using the specified query string.
510      * @param u the URL on which to base the returned URL
511      * @param newQuery the new query string to use in the returned URL
512      * @return a new URL identical to the specified URL, except using the specified query string
513      * @throws MalformedURLException if there is a problem creating the new URL
514      */
515     public static URL getUrlWithNewQuery(final URL u, final String newQuery) throws MalformedURLException {
516         return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), u.getRef(), newQuery);
517     }
518 
519     /**
520      * Creates and returns a new URL identical to the specified URL, ignoring path, protocol and query.
521      * @param u the URL on which to base the returned URL
522      * @return a new URL identical to the specified URL, ignoring path, protocol and query
523      * @throws MalformedURLException if there is a problem creating the new URL
524      */
525     public static URL getUrlWithProtocolAndAuthority(final URL u) throws MalformedURLException {
526         return createNewUrl(u.getProtocol(), u.getAuthority(), null, null, null);
527     }
528 
529     /**
530      * Creates and returns a new URL identical to the specified URL but with a changed user name.
531      * @param u the URL on which to base the returned URL
532      * @param newUserName the new user name or null to remove it
533      * @return a new URL identical to the specified URL; only user name updated
534      * @throws MalformedURLException if there is a problem creating the new URL
535      */
536     public static URL getUrlWithNewUserName(final URL u, final String newUserName) throws MalformedURLException {
537         String newUserInfo = newUserName == null ? "" : newUserName;
538         final String userInfo = u.getUserInfo();
539         if (org.apache.commons.lang3.StringUtils.isNotBlank(userInfo)) {
540             final int colonIdx = userInfo.indexOf(':');
541             if (colonIdx > -1) {
542                 newUserInfo = newUserInfo + userInfo.substring(colonIdx);
543             }
544         }
545         return createNewUrl(u.getProtocol(), newUserInfo.isEmpty() ? null : newUserInfo,
546                 u.getHost(), u.getPort(), u.getPath(), u.getRef(), u.getQuery());
547     }
548 
549     /**
550      * Creates and returns a new URL identical to the specified URL but with a changed user password.
551      * @param u the URL on which to base the returned URL
552      * @param newUserPassword the new user password or null to remove it
553      * @return a new URL identical to the specified URL; only user name updated
554      * @throws MalformedURLException if there is a problem creating the new URL
555      */
556     public static URL getUrlWithNewUserPassword(final URL u, final String newUserPassword)
557             throws MalformedURLException {
558         String newUserInfo = newUserPassword == null ? "" : ':' + newUserPassword;
559         final String userInfo = u.getUserInfo();
560         if (org.apache.commons.lang3.StringUtils.isNotBlank(userInfo)) {
561             final int colonIdx = userInfo.indexOf(':');
562             if (colonIdx > -1) {
563                 newUserInfo = userInfo.substring(0, colonIdx) + newUserInfo;
564             }
565             else {
566                 newUserInfo = userInfo + newUserInfo;
567             }
568         }
569         return createNewUrl(u.getProtocol(), newUserInfo.isEmpty() ? null : newUserInfo,
570                 u.getHost(), u.getPort(), u.getPath(), u.getRef(), u.getQuery());
571     }
572 
573     /**
574      * Creates a new URL based on the specified fragments.
575      * @param protocol the protocol to use (may not be {@code null})
576      * @param userInfo the user info to use (may be {@code null})
577      * @param host the host to use (may not be {@code null})
578      * @param port the port to use (may be <code>-1</code> if no port is specified)
579      * @param path the path to use (may be {@code null} and may omit the initial <code>'/'</code>)
580      * @param ref the reference to use (may be {@code null} and must not include the <code>'#'</code>)
581      * @param query the query to use (may be {@code null} and must not include the <code>'?'</code>)
582      * @return a new URL based on the specified fragments
583      * @throws MalformedURLException if there is a problem creating the new URL
584      */
585     private static URL createNewUrl(final String protocol, final String userInfo, final String host, final int port,
586             final String path, final String ref, final String query) throws MalformedURLException {
587         final StringBuilder s = new StringBuilder();
588         s.append(protocol).append("://");
589         if (userInfo != null) {
590             s.append(userInfo).append('@');
591         }
592         s.append(host);
593         if (port != -1) {
594             s.append(':').append(port);
595         }
596         if (path != null && !path.isEmpty()) {
597             if ('/' != path.charAt(0)) {
598                 s.append('/');
599             }
600             s.append(path);
601         }
602         if (query != null) {
603             s.append('?').append(query);
604         }
605         if (ref != null) {
606             if (ref.isEmpty() || ref.charAt(0) != '#') {
607                 s.append('#');
608             }
609             s.append(ref);
610         }
611 
612         return new URL(s.toString());
613     }
614 
615     /**
616      * Creates a new URL based on the specified fragments.
617      * @param protocol the protocol to use (may not be {@code null})
618      * @param authority the authority to use (may not be {@code null})
619      * @param path the path to use (may be {@code null} and may omit the initial <code>'/'</code>)
620      * @param ref the reference to use (may be {@code null} and must not include the <code>'#'</code>)
621      * @param query the query to use (may be {@code null} and must not include the <code>'?'</code>)
622      * @return a new URL based on the specified fragments
623      * @throws MalformedURLException if there is a problem creating the new URL
624      */
625     private static URL createNewUrl(final String protocol, final String authority,
626             final String path, final String ref, final String query) throws MalformedURLException {
627 
628         // pre-compute length of StringBuilder
629         int len = protocol.length() + 1;
630         if (authority != null && !authority.isEmpty()) {
631             len += 2 + authority.length();
632         }
633         if (path != null) {
634             len += path.length();
635         }
636         if (query != null) {
637             len += 1 + query.length();
638         }
639         if (ref != null) {
640             len += 1 + ref.length();
641         }
642 
643         final StringBuilder s = new StringBuilder(len);
644         s.append(protocol).append(':');
645         if (authority != null && !authority.isEmpty()) {
646             s.append("//").append(authority);
647         }
648         if (path != null) {
649             s.append(path);
650         }
651         if (query != null) {
652             s.append('?').append(query);
653         }
654         if (ref != null) {
655             if (ref.isEmpty() || ref.charAt(0) != '#') {
656                 s.append('#');
657             }
658             s.append(ref);
659         }
660 
661         return toUrlSafe(s.toString());
662     }
663 
664     /**
665      * Resolves a given relative URL against a base URL. See
666      * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>
667      * Section 4 for more details.
668      *
669      * @param baseUrl     The base URL in which to resolve the specification.
670      * @param relativeUrl The relative URL to resolve against the base URL.
671      * @return the resolved specification.
672      */
673     public static String resolveUrl(final String baseUrl, final String relativeUrl) {
674         if (baseUrl == null) {
675             throw new IllegalArgumentException("Base URL must not be null");
676         }
677         if (relativeUrl == null) {
678             throw new IllegalArgumentException("Relative URL must not be null");
679         }
680         final Url url = resolveUrl(parseUrl(baseUrl), relativeUrl);
681 
682         return url.toString();
683     }
684 
685     /**
686      * Resolves a given relative URL against a base URL. See
687      * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>
688      * Section 4 for more details.
689      *
690      * @param baseUrl     The base URL in which to resolve the specification.
691      * @param relativeUrl The relative URL to resolve against the base URL.
692      * @return the resolved specification.
693      */
694     public static String resolveUrl(final URL baseUrl, final String relativeUrl) {
695         if (baseUrl == null) {
696             throw new IllegalArgumentException("Base URL must not be null");
697         }
698         return resolveUrl(baseUrl.toExternalForm(), relativeUrl);
699     }
700 
701     /**
702      * Parses a given specification using the algorithm depicted in
703      * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>:
704      * <p>
705      * Section 2.4: Parsing a URL
706      * <p>
707      *   An accepted method for parsing URLs is useful to clarify the
708      *   generic-RL syntax of Section 2.2 and to describe the algorithm for
709      *   resolving relative URLs presented in Section 4. This section
710      *   describes the parsing rules for breaking down a URL (relative or
711      *   absolute) into the component parts described in Section 2.1.  The
712      *   rules assume that the URL has already been separated from any
713      *   surrounding text and copied to a "parse string". The rules are
714      *   listed in the order in which they would be applied by the parser.
715      *
716      * @param spec The specification to parse.
717      * @return the parsed specification.
718      */
719     private static Url parseUrl(String spec) {
720         final Url url = new Url();
721         int startIndex = 0;
722         int endIndex = spec.length();
723 
724         // see https://url.spec.whatwg.org/#concept-basic-url-parser
725         //   * If input contains any leading or trailing C0 control or space, validation error.
726         //     Remove any leading and trailing C0 control or space from input.
727         //   * If input contains any ASCII tab or newline, validation error.
728         //     Remove all ASCII tab or newline from input.
729 
730         if (endIndex > startIndex) {
731             StringBuilder sb = null;
732             boolean before = true;
733             int trailing = 0;
734 
735             for (int i = 0; i < endIndex; i++) {
736                 final char c = spec.charAt(i);
737                 boolean remove = false;
738 
739                 if (c == '\t' | c == '\r' | c == '\n') {
740                     remove = true;
741                 }
742                 else if ('\u0000' <= c && c <= '\u0020') {
743                     if (before) {
744                         remove = true;
745                     }
746                     else {
747                         trailing++;
748                     }
749                 }
750                 else {
751                     before = false;
752                     trailing = 0;
753                 }
754 
755                 if (remove) {
756                     if (sb == null) {
757                         sb = new StringBuilder(spec.substring(0, i));
758                     }
759                 }
760                 else if (sb != null) {
761                     sb.append(c);
762                 }
763             }
764 
765             if (sb == null) {
766                 if (trailing > 0) {
767                     endIndex = spec.length() - trailing;
768                     spec = spec.substring(0, endIndex);
769                 }
770             }
771             else {
772                 if (trailing > 0) {
773                     spec = sb.substring(0, sb.length() - trailing);
774                 }
775                 else {
776                     spec = sb.toString();
777                 }
778                 endIndex = spec.length();
779             }
780         }
781 
782         // Section 2.4.1: Parsing the Fragment Identifier
783         //
784         //   If the parse string contains a crosshatch "#" character, then the
785         //   substring after the first (left-most) crosshatch "#" and up to the
786         //   end of the parse string is the <fragment> identifier. If the
787         //   crosshatch is the last character, or no crosshatch is present, then
788         //   the fragment identifier is empty. The matched substring, including
789         //   the crosshatch character, is removed from the parse string before
790         //   continuing.
791         //
792         //   Note that the fragment identifier is not considered part of the URL.
793         //   However, since it is often attached to the URL, parsers must be able
794         //   to recognize and set aside fragment identifiers as part of the
795         //   process.
796         final int crosshatchIndex = StringUtils.indexOf(spec, '#', startIndex, endIndex);
797 
798         if (crosshatchIndex >= 0) {
799             url.fragment_ = spec.substring(crosshatchIndex + 1, endIndex);
800             endIndex = crosshatchIndex;
801         }
802         // Section 2.4.2: Parsing the Scheme
803         //
804         //   If the parse string contains a colon ":" after the first character
805         //   and before any characters not allowed as part of a scheme name (i.e.,
806         //   any not an alphanumeric, plus "+", period ".", or hyphen "-"), the
807         //   <scheme> of the URL is the substring of characters up to but not
808         //   including the first colon. These characters and the colon are then
809         //   removed from the parse string before continuing.
810         final int colonIndex = StringUtils.indexOf(spec, ':', startIndex, endIndex);
811 
812         if (colonIndex > 0) {
813             final String scheme = spec.substring(startIndex, colonIndex);
814             if (isValidScheme(scheme)) {
815                 url.scheme_ = scheme;
816                 startIndex = colonIndex + 1;
817             }
818         }
819         // Section 2.4.3: Parsing the Network Location/Login
820         //
821         //   If the parse string begins with a double-slash "//", then the
822         //   substring of characters after the double-slash and up to, but not
823         //   including, the next slash "/" character is the network location/login
824         //   (<net_loc>) of the URL. If no trailing slash "/" is present, the
825         //   entire remaining parse string is assigned to <net_loc>. The double-
826         //   slash and <net_loc> are removed from the parse string before
827         //   continuing.
828         //
829         // Note: We also accept a question mark "?" or a semicolon ";" character as
830         //       delimiters for the network location/login (<net_loc>) of the URL.
831         final int locationStartIndex;
832         int locationEndIndex;
833 
834         if (spec.startsWith("//", startIndex)) {
835             locationStartIndex = startIndex + 2;
836             locationEndIndex = StringUtils.indexOf(spec, '/', locationStartIndex, endIndex);
837             if (locationEndIndex >= 0) {
838                 startIndex = locationEndIndex;
839             }
840         }
841         else {
842             locationStartIndex = -1;
843             locationEndIndex = -1;
844         }
845         // Section 2.4.4: Parsing the Query Information
846         //
847         //   If the parse string contains a question mark "?" character, then the
848         //   substring after the first (left-most) question mark "?" and up to the
849         //   end of the parse string is the <query> information. If the question
850         //   mark is the last character, or no question mark is present, then the
851         //   query information is empty. The matched substring, including the
852         //   question mark character, is removed from the parse string before
853         //   continuing.
854         final int questionMarkIndex = StringUtils.indexOf(spec, '?', startIndex, endIndex);
855 
856         if (questionMarkIndex >= 0) {
857             if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
858                 // The substring of characters after the double-slash and up to, but not
859                 // including, the question mark "?" character is the network location/login
860                 // (<net_loc>) of the URL.
861                 locationEndIndex = questionMarkIndex;
862                 startIndex = questionMarkIndex;
863             }
864             url.query_ = spec.substring(questionMarkIndex + 1, endIndex);
865             endIndex = questionMarkIndex;
866         }
867         // Section 2.4.5: Parsing the Parameters
868         //
869         //   If the parse string contains a semicolon ";" character, then the
870         //   substring after the first (left-most) semicolon ";" and up to the end
871         //   of the parse string is the parameters (<params>). If the semicolon
872         //   is the last character, or no semicolon is present, then <params> is
873         //   empty. The matched substring, including the semicolon character, is
874         //   removed from the parse string before continuing.
875         final int semicolonIndex = StringUtils.indexOf(spec, ';', startIndex, endIndex);
876 
877         if (semicolonIndex >= 0) {
878             if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
879                 // The substring of characters after the double-slash and up to, but not
880                 // including, the semicolon ";" character is the network location/login
881                 // (<net_loc>) of the URL.
882                 locationEndIndex = semicolonIndex;
883                 startIndex = semicolonIndex;
884             }
885             url.parameters_ = spec.substring(semicolonIndex + 1, endIndex);
886             endIndex = semicolonIndex;
887         }
888         // Section 2.4.6: Parsing the Path
889         //
890         //   After the above steps, all that is left of the parse string is the
891         //   URL <path> and the slash "/" that may precede it. Even though the
892         //   initial slash is not part of the URL path, the parser must remember
893         //   whether or not it was present so that later processes can
894         //   differentiate between relative and absolute paths. Often this is
895         //   done by simply storing the preceding slash along with the path.
896         if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
897             // The entire remaining parse string is assigned to the network
898             // location/login (<net_loc>) of the URL.
899             locationEndIndex = endIndex;
900         }
901         else if (startIndex < endIndex) {
902             url.path_ = spec.substring(startIndex, endIndex);
903         }
904         // Set the network location/login (<net_loc>) of the URL.
905         if ((locationStartIndex >= 0) && (locationEndIndex >= 0)) {
906             url.location_ = spec.substring(locationStartIndex, locationEndIndex);
907         }
908         return url;
909     }
910 
911     /**
912      * Returns true if specified string is a valid scheme name.
913      * <p>
914      * https://tools.ietf.org/html/rfc1738
915      * <p>
916      * Scheme names consist of a sequence of characters. The lower case
917      * letters "a"--"z", digits, and the characters plus ("+"), period
918      * ("."), and hyphen ("-") are allowed. For resiliency, programs
919      * interpreting URLs should treat upper case letters as equivalent to
920      * lower case in scheme names (e.g., allow "HTTP" as well as "http").
921      *
922      * @param scheme the scheme string to check
923      * @return true if valid
924      */
925     public static boolean isValidScheme(final String scheme) {
926         final int length = scheme.length();
927         if (length < 1) {
928             return false;
929         }
930 
931         char c = scheme.charAt(0);
932         boolean isValid = ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
933         if (!isValid) {
934             return false;
935         }
936 
937         for (int i = 1; i < length; i++) {
938             c = scheme.charAt(i);
939             isValid =
940                     ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
941                     || ('0' <= c && c <= '9')
942                     || c == '+'
943                     || c == '.'
944                     || c == '-';
945             if (!isValid) {
946                 return false;
947             }
948         }
949 
950         return true;
951     }
952 
953     /**
954      * Returns true if specified string is a special scheme.
955      * see <a href='https://url.spec.whatwg.org/#special-scheme'>
956      * https://url.spec.whatwg.org/#special-scheme</a>
957      *
958      * @param scheme the scheme string to check
959      * @return true if special
960      */
961     public static boolean isSpecialScheme(final String scheme) {
962         final int length = scheme.length();
963         if (length < 2 || length > 5) {
964             return false;
965         }
966 
967         final String schemeLC = scheme.toLowerCase(Locale.ROOT);
968         return "ftp".equals(schemeLC)
969                 || "file".equals(schemeLC)
970                 || "http".equals(schemeLC)
971                 || "https".equals(schemeLC)
972                 || "ws".equals(schemeLC)
973                 || "wss".equals(schemeLC);
974     }
975 
976     /**
977      * Resolves a given relative URL against a base URL using the algorithm
978      * depicted in <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>:
979      * <p>
980      * Section 4: Resolving Relative URLs
981      * <p>
982      *   This section describes an example algorithm for resolving URLs within
983      *   a context in which the URLs may be relative, such that the result is
984      *   always a URL in absolute form. Although this algorithm cannot
985      *   guarantee that the resulting URL will equal that intended by the
986      *   original author, it does guarantee that any valid URL (relative or
987      *   absolute) can be consistently transformed to an absolute form given a
988      *   valid base URL.
989      *
990      * @param baseUrl     The base URL in which to resolve the specification.
991      * @param relativeUrl The relative URL to resolve against the base URL.
992      * @return the resolved specification.
993      */
994     private static Url resolveUrl(final Url baseUrl, final String relativeUrl) {
995         final Url url = parseUrl(relativeUrl);
996         // Step 1: The base URL is established according to the rules of
997         //         Section 3.  If the base URL is the empty string (unknown),
998         //         the embedded URL is interpreted as an absolute URL and
999         //         we are done.
1000         if (baseUrl == null) {
1001             return url;
1002         }
1003         // Step 2: Both the base and embedded URLs are parsed into their
1004         //         component parts as described in Section 2.4.
1005         //      a) If the embedded URL is entirely empty, it inherits the
1006         //         entire base URL (i.e., is set equal to the base URL)
1007         //         and we are done.
1008         if (relativeUrl.isEmpty()) {
1009             return new Url(baseUrl);
1010         }
1011         //      b) If the embedded URL starts with a scheme name, it is
1012         //         interpreted as an absolute URL and we are done.
1013         if (url.scheme_ != null) {
1014             return url;
1015         }
1016         //      c) Otherwise, the embedded URL inherits the scheme of
1017         //         the base URL.
1018         url.scheme_ = baseUrl.scheme_;
1019         // Step 3: If the embedded URL's <net_loc> is non-empty, we skip to
1020         //         Step 7.  Otherwise, the embedded URL inherits the <net_loc>
1021         //         (if any) of the base URL.
1022         if (url.location_ != null) {
1023             return url;
1024         }
1025         url.location_ = baseUrl.location_;
1026         // Step 4: If the embedded URL path is preceded by a slash "/", the
1027         //         path is not relative and we skip to Step 7.
1028         if (url.path_ != null && !url.path_.isEmpty() && url.path_.charAt(0) == '/') {
1029             url.path_ = removeLeadingSlashPoints(url.path_);
1030             return url;
1031         }
1032         // Step 5: If the embedded URL path is empty (and not preceded by a
1033         //         slash), then the embedded URL inherits the base URL path,
1034         //         and
1035         if (url.path_ == null) {
1036             url.path_ = baseUrl.path_;
1037             //  a) if the embedded URL's <params> is non-empty, we skip to
1038             //     step 7; otherwise, it inherits the <params> of the base
1039             //     URL (if any) and
1040             if (url.parameters_ != null) {
1041                 return url;
1042             }
1043             url.parameters_ = baseUrl.parameters_;
1044             //  b) if the embedded URL's <query> is non-empty, we skip to
1045             //     step 7; otherwise, it inherits the <query> of the base
1046             //     URL (if any) and we skip to step 7.
1047             if (url.query_ != null) {
1048                 return url;
1049             }
1050             url.query_ = baseUrl.query_;
1051             return url;
1052         }
1053         // Step 6: The last segment of the base URL's path (anything
1054         //         following the rightmost slash "/", or the entire path if no
1055         //         slash is present) is removed and the embedded URL's path is
1056         //         appended in its place.  The following operations are
1057         //         then applied, in order, to the new path:
1058         final String basePath = baseUrl.path_;
1059         String path = "";
1060 
1061         if (basePath == null) {
1062             path = "/";
1063         }
1064         else {
1065             final int lastSlashIndex = basePath.lastIndexOf('/');
1066 
1067             if (lastSlashIndex >= 0) {
1068                 path = basePath.substring(0, lastSlashIndex + 1);
1069             }
1070         }
1071 
1072         path = path.concat(url.path_);
1073         //      a) All occurrences of "./", where "." is a complete path
1074         //         segment, are removed.
1075         int pathSegmentIndex;
1076 
1077         while ((pathSegmentIndex = path.indexOf("/./")) >= 0) {
1078             path = path.substring(0, pathSegmentIndex + 1).concat(path.substring(pathSegmentIndex + 3));
1079         }
1080         //      b) If the path ends with "." as a complete path segment,
1081         //         that "." is removed.
1082         if (path.endsWith("/.")) {
1083             path = path.substring(0, path.length() - 1);
1084         }
1085         //      c) All occurrences of "<segment>/../", where <segment> is a
1086         //         complete path segment not equal to "..", are removed.
1087         //         Removal of these path segments is performed iteratively,
1088         //         removing the leftmost matching pattern on each iteration,
1089         //         until no matching pattern remains.
1090         while ((pathSegmentIndex = path.indexOf("/../")) > 0) {
1091             final String pathSegment = path.substring(0, pathSegmentIndex);
1092             final int slashIndex = pathSegment.lastIndexOf('/');
1093 
1094             if (slashIndex >= 0) {
1095                 if (!"..".equals(pathSegment.substring(slashIndex))) {
1096                     path = path.substring(0, slashIndex + 1).concat(path.substring(pathSegmentIndex + 4));
1097                 }
1098             }
1099             else {
1100                 path = path.substring(pathSegmentIndex + 4);
1101             }
1102         }
1103         //      d) If the path ends with "<segment>/..", where <segment> is a
1104         //         complete path segment not equal to "..", that
1105         //         "<segment>/.." is removed.
1106         if (path.endsWith("/..")) {
1107             final String pathSegment = path.substring(0, path.length() - 3);
1108             final int slashIndex = pathSegment.lastIndexOf('/');
1109 
1110             if (slashIndex >= 0) {
1111                 path = path.substring(0, slashIndex + 1);
1112             }
1113         }
1114 
1115         path = removeLeadingSlashPoints(path);
1116 
1117         url.path_ = path;
1118         // Step 7: The resulting URL components, including any inherited from
1119         //         the base URL, are recombined to give the absolute form of
1120         //         the embedded URL.
1121         return url;
1122     }
1123 
1124     /**
1125      * "../" after the leading "/" should be removed as browsers do (not in RFC)
1126      */
1127     private static String removeLeadingSlashPoints(final String path) {
1128         int i = 1;
1129         while (path.startsWith("../", i)) {
1130             i = i + 3;
1131         }
1132 
1133         if (i > 1) {
1134             return "/" + path.substring(i);
1135         }
1136 
1137         return path;
1138     }
1139 
1140     /**
1141      * Class <code>Url</code> represents a Uniform Resource Locator.
1142      *
1143      * @author Martin Tamme
1144      */
1145     private static class Url {
1146 
1147         private String scheme_;
1148         private String location_;
1149         private String path_;
1150         private String parameters_;
1151         private String query_;
1152         private String fragment_;
1153 
1154         /**
1155          * Creates a <code>Url</code> object.
1156          */
1157         Url() {
1158             super();
1159         }
1160 
1161         /**
1162          * Creates a <code>Url</code> object from the specified
1163          * <code>Url</code> object.
1164          *
1165          * @param url a <code>Url</code> object.
1166          */
1167         Url(final Url url) {
1168             scheme_ = url.scheme_;
1169             location_ = url.location_;
1170             path_ = url.path_;
1171             parameters_ = url.parameters_;
1172             query_ = url.query_;
1173             fragment_ = url.fragment_;
1174         }
1175 
1176         /**
1177          * Returns a string representation of the <code>Url</code> object.
1178          *
1179          * @return a string representation of the <code>Url</code> object.
1180          */
1181         @Override
1182         public String toString() {
1183             final StringBuilder sb = new StringBuilder();
1184 
1185             if (scheme_ != null) {
1186                 sb.append(scheme_).append(':');
1187             }
1188             if (location_ != null) {
1189                 sb.append("//").append(location_);
1190             }
1191             if (path_ != null) {
1192                 sb.append(path_);
1193             }
1194             if (parameters_ != null) {
1195                 sb.append(';').append(parameters_);
1196             }
1197             if (query_ != null) {
1198                 sb.append('?').append(query_);
1199             }
1200             if (fragment_ != null) {
1201                 sb.append('#').append(fragment_);
1202             }
1203             return sb.toString();
1204         }
1205     }
1206 
1207     static boolean isNormalUrlProtocol(final String protocol) {
1208         return "http".equals(protocol) || "https".equals(protocol) || "file".equals(protocol);
1209     }
1210 
1211     /**
1212      * More or less the same as sameFile(URL, URL) but without
1213      * resolving the host to an IP address for comparing.
1214      * Additionally we do some path normalization.
1215      *
1216      * @param u1 a URL object
1217      * @param u2 a URL object
1218      * @return true if u1 and u2 refer to the same file
1219      */
1220     public static boolean sameFile(final URL u1, final URL u2) {
1221         if (u1 == u2) {
1222             return true;
1223         }
1224         if (u1 == null || u2 == null) {
1225             return false;
1226         }
1227 
1228         // Compare the protocols.
1229         final String p1 = u1.getProtocol();
1230         final String p2 = u2.getProtocol();
1231         if (!(p1 == p2 || (p1 != null && p1.equalsIgnoreCase(p2)))) {
1232             return false;
1233         }
1234 
1235         // Compare the ports.
1236         final int port1 = (u1.getPort() == -1) ? u1.getDefaultPort() : u1.getPort();
1237         final int port2 = (u2.getPort() == -1) ? u2.getDefaultPort() : u2.getPort();
1238         if (port1 != port2) {
1239             return false;
1240         }
1241 
1242         // Compare the hosts.
1243         final String h1 = u1.getHost();
1244         final String h2 = u2.getHost();
1245         if (!(h1 == h2 || (h1 != null && h1.equalsIgnoreCase(h2)))) {
1246             return false;
1247         }
1248 
1249         // Compare the files.
1250         String f1 = u1.getFile();
1251         if (f1.isEmpty()) {
1252             f1 = "/";
1253         }
1254         String f2 = u2.getFile();
1255         if (f2.isEmpty()) {
1256             f2 = "/";
1257         }
1258         if (f1.indexOf('.') > 0 || f2.indexOf('.') > 0) {
1259             try {
1260                 f1 = u1.toURI().normalize().toURL().getFile();
1261                 f2 = u2.toURI().normalize().toURL().getFile();
1262             }
1263             catch (final RuntimeException e) {
1264                 throw e;
1265             }
1266             catch (final Exception ignored) {
1267                 // ignore
1268             }
1269         }
1270 
1271         return Objects.equals(f1, f2);
1272     }
1273 
1274     /**
1275      * Helper that constructs a normalized url string
1276      * usable as cache key.
1277      *
1278      * @param url a URL object
1279      * @return the normalized string
1280      */
1281     public static String normalize(final URL url) {
1282         final StringBuilder result = new StringBuilder();
1283         result.append(url.getProtocol())
1284                 .append("://")
1285                 .append(url.getHost())
1286                 .append(':')
1287                 .append((url.getPort() == -1) ? url.getDefaultPort() : url.getPort());
1288 
1289         // Compare the files.
1290         String f = url.getFile();
1291         if (f.isEmpty()) {
1292             result.append('/');
1293         }
1294         else {
1295             if (f.indexOf('.') > 0) {
1296                 try {
1297                     f = url.toURI().normalize().toURL().getFile();
1298                 }
1299                 catch (final Exception ignored) {
1300                     // ignore
1301                 }
1302             }
1303             result.append(f);
1304         }
1305 
1306         return result.toString();
1307     }
1308 
1309     /**
1310      * Constructs a {@link URI} using the specified URL.
1311      *
1312      * @param url the URL
1313      * @param query the query
1314      *
1315      * @throws URISyntaxException
1316      *         If both a scheme and a path are given but the path is
1317      *         relative, if the URI string constructed from the given
1318      *         components violates RFC&nbsp;2396, or if the authority
1319      *         component of the string is present but cannot be parsed
1320      *         as a server-based authority
1321      * @return the URI
1322      */
1323     public static URI toURI(final URL url, final String query) throws URISyntaxException {
1324         final String scheme = url.getProtocol();
1325         final String host = url.getHost();
1326         final int port = url.getPort();
1327         final String path = url.getPath();
1328         final StringBuilder buffer = new StringBuilder();
1329         if (host != null) {
1330             if (scheme != null) {
1331                 buffer.append(scheme).append("://");
1332             }
1333             buffer.append(host);
1334             if (port > 0) {
1335                 buffer.append(':').append(port);
1336             }
1337         }
1338         if (path == null || path.isEmpty() || path.charAt(0) != '/') {
1339             buffer.append('/');
1340         }
1341         if (path != null) {
1342             buffer.append(path);
1343         }
1344         if (query != null) {
1345             buffer.append('?').append(query);
1346         }
1347         return new URI(buffer.toString());
1348     }
1349 
1350     /**
1351      * @param part the part to encode
1352      * @return the ecoded string
1353      */
1354     public static String encodeQueryPart(final String part) {
1355         if (part == null || part.isEmpty()) {
1356             return "";
1357         }
1358 
1359         try {
1360             return URLEncoder.encode(part, "UTF-8");
1361         }
1362         catch (final UnsupportedEncodingException e) {
1363             return part;
1364         }
1365     }
1366 
1367     /**
1368      * Removes the well known ports if it can be deduced from protocol.
1369      * @param url the url to clean up
1370      * @return a new URL without the port or the given one
1371      * @throws MalformedURLException if the URL string cannot be converted to a URL instance
1372      */
1373     public static URL removeRedundantPort(final URL url) throws MalformedURLException {
1374         if (("https".equals(url.getProtocol()) && url.getPort() == 443)
1375                 || ("http".equals(url.getProtocol()) && url.getPort() == 80)) {
1376             return getUrlWithNewPort(url, -1);
1377         }
1378         return url;
1379     }
1380 
1381     /**
1382      * Decodes an array of URL safe 7-bit characters into an array of original bytes.
1383      * Escaped characters are converted back to their original representation.
1384      * @param bytes array of URL safe characters
1385      * @return array of original bytes
1386      * @throws IllegalArgumentException in case of error
1387      *
1388      * @deprecated as of version 4.11.0; use {@link #decodeDataUrl(byte[], boolean)} instead
1389      */
1390     @Deprecated
1391     public static byte[] decodeDataUrl(final byte[] bytes) throws IllegalArgumentException  {
1392         return decodeDataUrl(bytes, false);
1393     }
1394 
1395     /**
1396      * Decodes an array of URL safe 7-bit characters into an array of original bytes.
1397      * Escaped characters are converted back to their original representation.
1398      * @param bytes array of URL safe characters
1399      * @param removeWhitespace if true don't add whitespace chars to the output
1400      * @return array of original bytes
1401      * @throws IllegalArgumentException in case of error
1402      */
1403     public static byte[] decodeDataUrl(final byte[] bytes, final boolean removeWhitespace)
1404                             throws IllegalArgumentException  {
1405         // adapted from apache commons codec
1406         if (bytes == null) {
1407             return null;
1408         }
1409         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1410         for (int i = 0; i < bytes.length; i++) {
1411             int b = bytes[i];
1412             if (b == '%') {
1413                 try {
1414                     final int u = digit16(bytes[++i]);
1415                     final int l = digit16(bytes[++i]);
1416                     b = (u << 4) + l;
1417                 }
1418                 catch (final ArrayIndexOutOfBoundsException e) {
1419                     throw new IllegalArgumentException("Invalid URL encoding: ", e);
1420                 }
1421             }
1422             if (removeWhitespace
1423                     && (b == 9 || b == 10 || b == 12 || b == 13 || b == 32)) {
1424                 continue;
1425             }
1426 
1427             buffer.write(b);
1428         }
1429         return buffer.toByteArray();
1430     }
1431 
1432     /**
1433      * Decodes an array of URL safe 7-bit characters into an array of original bytes.
1434      * Escaped characters are converted back to their original representation.
1435      * @param bytes array of URL safe characters
1436      * @return array of original bytes
1437      * @throws IllegalArgumentException in case of error
1438      */
1439     public static byte[] decodeUrl(final byte[] bytes) throws IllegalArgumentException {
1440         // adapted from apache commons codec
1441         if (bytes == null) {
1442             return null;
1443         }
1444         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1445         for (int i = 0; i < bytes.length; i++) {
1446             final int b = bytes[i];
1447             if (b == '+') {
1448                 buffer.write(' ');
1449             }
1450             else if (b == '%') {
1451                 try {
1452                     final int u = digit16(bytes[++i]);
1453                     final int l = digit16(bytes[++i]);
1454                     buffer.write((char) ((u << 4) + l));
1455                 }
1456                 catch (final ArrayIndexOutOfBoundsException e) {
1457                     throw new IllegalArgumentException("Invalid URL encoding: ", e);
1458                 }
1459             }
1460             else {
1461                 buffer.write(b);
1462             }
1463         }
1464         return buffer.toByteArray();
1465     }
1466 
1467     private static int digit16(final byte b) throws IllegalArgumentException  {
1468         final int i = Character.digit((char) b, 16);
1469         if (i == -1) {
1470             throw new IllegalArgumentException("Invalid URL encoding: not a valid digit (radix 16): " + b);
1471         }
1472         return i;
1473     }
1474 
1475     /**
1476      * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
1477      * @param urlsafe bitset of characters deemed URL safe
1478      * @param bytes  array of bytes to convert to URL safe characters
1479      * @return array of bytes containing URL safe characters
1480      */
1481     public static byte[] encodeUrl(final BitSet urlsafe, final byte[] bytes) {
1482         // adapted from apache commons codec
1483         if (bytes == null) {
1484             return null;
1485         }
1486 
1487         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1488         for (final byte c : bytes) {
1489             int b = c;
1490             if (b < 0) {
1491                 b = 256 + b;
1492             }
1493             if (urlsafe.get(b)) {
1494                 if (b == ' ') {
1495                     b = '+';
1496                 }
1497                 buffer.write(b);
1498             }
1499             else {
1500                 buffer.write('%');
1501                 final char hex1 = hexDigit(b >> 4);
1502                 final char hex2 = hexDigit(b);
1503                 buffer.write(hex1);
1504                 buffer.write(hex2);
1505             }
1506         }
1507         return buffer.toByteArray();
1508     }
1509 
1510     private static char hexDigit(final int b) {
1511         return Character.toUpperCase(Character.forDigit(b & 0xF, 16));
1512     }
1513 }