1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.util;
16
17 import static java.nio.charset.StandardCharsets.US_ASCII;
18 import static java.nio.charset.StandardCharsets.UTF_8;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.UnsupportedEncodingException;
22 import java.net.MalformedURLException;
23 import java.net.URI;
24 import java.net.URISyntaxException;
25 import java.net.URL;
26 import java.net.URLEncoder;
27 import java.net.URLStreamHandler;
28 import java.nio.charset.Charset;
29 import java.util.BitSet;
30 import java.util.Locale;
31 import java.util.Objects;
32
33 import org.htmlunit.WebAssert;
34 import org.htmlunit.protocol.AnyHandler;
35 import org.htmlunit.protocol.javascript.JavaScriptURLConnection;
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 public final class UrlUtils {
51
52
53 public static final String ABOUT = "about";
54
55 public static final String ABOUT_SCHEME = ABOUT + ":";
56
57 public static final String ABOUT_BLANK = ABOUT_SCHEME + "blank";
58
59 public static final URL URL_ABOUT_BLANK;
60
61 private static final URLStreamHandler JS_HANDLER;
62 private static final URLStreamHandler ABOUT_HANDLER;
63 private static final URLStreamHandler DATA_HANDLER;
64
65 private static final BitSet PATH_ALLOWED_CHARS = new BitSet(256);
66 private static final BitSet QUERY_ALLOWED_CHARS = new BitSet(256);
67 private static final BitSet ANCHOR_ALLOWED_CHARS = new BitSet(256);
68 private static final BitSet HASH_ALLOWED_CHARS = new BitSet(256);
69
70
71
72
73 static {
74
75 JS_HANDLER = new org.htmlunit.protocol.javascript.Handler();
76 ABOUT_HANDLER = new org.htmlunit.protocol.about.Handler();
77 DATA_HANDLER = new org.htmlunit.protocol.data.Handler();
78
79 try {
80 URL_ABOUT_BLANK = new URL(null, ABOUT_BLANK, ABOUT_HANDLER);
81 }
82 catch (final MalformedURLException e) {
83
84 throw new RuntimeException(e);
85 }
86
87 final BitSet reserved = new BitSet(256);
88 reserved.set(';');
89 reserved.set('/');
90 reserved.set('?');
91 reserved.set(':');
92 reserved.set('@');
93 reserved.set('&');
94 reserved.set('=');
95 reserved.set('+');
96 reserved.set('$');
97 reserved.set(',');
98
99 final BitSet mark = new BitSet(256);
100 mark.set('-');
101 mark.set('_');
102 mark.set('.');
103 mark.set('!');
104 mark.set('~');
105 mark.set('*');
106 mark.set('\'');
107 mark.set('(');
108 mark.set(')');
109
110 final BitSet alpha = new BitSet(256);
111 for (int i = 'a'; i <= 'z'; i++) {
112 alpha.set(i);
113 }
114 for (int i = 'A'; i <= 'Z'; i++) {
115 alpha.set(i);
116 }
117
118 final BitSet digit = new BitSet(256);
119 for (int i = '0'; i <= '9'; i++) {
120 digit.set(i);
121 }
122
123 final BitSet alphanumeric = new BitSet(256);
124 alphanumeric.or(alpha);
125 alphanumeric.or(digit);
126
127 final BitSet unreserved = new BitSet(256);
128 unreserved.or(alphanumeric);
129 unreserved.or(mark);
130
131 final BitSet hex = new BitSet(256);
132 hex.or(digit);
133 for (int i = 'a'; i <= 'f'; i++) {
134 hex.set(i);
135 }
136 for (int i = 'A'; i <= 'F'; i++) {
137 hex.set(i);
138 }
139
140 final BitSet escaped = new BitSet(256);
141 escaped.set('%');
142 escaped.or(hex);
143
144 final BitSet uric = new BitSet(256);
145 uric.or(reserved);
146 uric.or(unreserved);
147 uric.or(escaped);
148
149 final BitSet pchar = new BitSet(256);
150 pchar.or(unreserved);
151 pchar.or(escaped);
152 pchar.set(':');
153 pchar.set('@');
154 pchar.set('&');
155 pchar.set('=');
156 pchar.set('+');
157 pchar.set('$');
158 pchar.set(',');
159
160 final BitSet segment = new BitSet(256);
161 segment.or(pchar);
162 segment.set(';');
163 segment.or(pchar);
164
165 final BitSet pathSegments = new BitSet(256);
166 pathSegments.set('/');
167 pathSegments.or(segment);
168
169 final BitSet absPath = new BitSet(256);
170 absPath.set('/');
171 absPath.or(pathSegments);
172
173 final BitSet allowedAbsPath = new BitSet(256);
174 allowedAbsPath.or(absPath);
175
176 final BitSet allowedFragment = new BitSet(256);
177 allowedFragment.or(uric);
178
179 final BitSet allowedQuery = new BitSet(256);
180 allowedQuery.or(uric);
181
182 final BitSet allowedHash = new BitSet(256);
183 allowedHash.or(uric);
184
185 PATH_ALLOWED_CHARS.or(allowedAbsPath);
186 QUERY_ALLOWED_CHARS.or(allowedQuery);
187 ANCHOR_ALLOWED_CHARS.or(allowedFragment);
188 HASH_ALLOWED_CHARS.or(allowedHash);
189 }
190
191
192
193
194 private UrlUtils() {
195
196 }
197
198
199
200
201
202
203
204
205
206
207
208
209 public static URL toUrlSafe(final String url) {
210 try {
211 return toUrlUnsafe(url);
212 }
213 catch (final MalformedURLException e) {
214
215 throw new RuntimeException(e);
216 }
217 }
218
219
220
221
222
223
224
225
226
227
228
229
230
231 public static URL toUrlUnsafe(final String url) throws MalformedURLException {
232 WebAssert.notNull("url", url);
233
234 final String protocol = org.apache.commons.lang3.StringUtils.substringBefore(url, ":").toLowerCase(Locale.ROOT);
235
236 if (protocol.isEmpty() || UrlUtils.isNormalUrlProtocol(protocol)) {
237 final URL response = new URL(url);
238 if (response.getProtocol().startsWith("http")
239 && org.apache.commons.lang3.StringUtils.isEmpty(response.getHost())) {
240 throw new MalformedURLException("Missing host name in url: " + url);
241 }
242 return response;
243 }
244
245 if (JavaScriptURLConnection.JAVASCRIPT_PREFIX.equals(protocol + ":")) {
246 return new URL(null, url, JS_HANDLER);
247 }
248
249 if (ABOUT.equals(protocol)) {
250 if (org.apache.commons.lang3.StringUtils.equalsIgnoreCase(ABOUT_BLANK, url)) {
251 return URL_ABOUT_BLANK;
252 }
253 return new URL(null, url, ABOUT_HANDLER);
254 }
255
256 if ("data".equals(protocol)) {
257 return new URL(null, url, DATA_HANDLER);
258 }
259
260 return new URL(null, url, AnyHandler.INSTANCE);
261 }
262
263
264
265
266
267
268
269
270
271
272
273
274 public static URL encodeUrl(final URL url, final Charset charset) {
275 if (!isNormalUrlProtocol(url.getProtocol())) {
276 return url;
277 }
278
279 try {
280 String path = url.getPath();
281 if (path != null) {
282 path = encode(path, PATH_ALLOWED_CHARS, UTF_8);
283 }
284 String query = url.getQuery();
285 if (query != null) {
286 query = encode(query, QUERY_ALLOWED_CHARS, charset);
287 }
288 String anchor = url.getRef();
289 if (anchor != null) {
290 anchor = encode(anchor, ANCHOR_ALLOWED_CHARS, UTF_8);
291 }
292 return createNewUrl(url.getProtocol(), url.getUserInfo(), url.getHost(),
293 url.getPort(), path, anchor, query);
294 }
295 catch (final MalformedURLException e) {
296
297 throw new RuntimeException(e);
298 }
299 }
300
301
302
303
304
305
306
307 public static String encodeAnchor(final String anchor) {
308 if (anchor == null) {
309 return null;
310 }
311 return encode(anchor, ANCHOR_ALLOWED_CHARS, UTF_8);
312 }
313
314
315
316
317
318
319
320 public static String encodeHash(final String hash) {
321 if (hash == null) {
322 return null;
323 }
324 return encode(hash, HASH_ALLOWED_CHARS, UTF_8);
325 }
326
327
328
329
330
331
332
333 public static String encodeQuery(final String query) {
334 if (query == null) {
335 return null;
336 }
337 return encode(query, QUERY_ALLOWED_CHARS, UTF_8);
338 }
339
340
341
342
343
344
345
346 public static String decode(final String escaped) {
347 try {
348 final byte[] bytes = escaped.getBytes(US_ASCII);
349 final byte[] bytes2 = decodeUrl(bytes);
350 return new String(bytes2, UTF_8);
351 }
352 catch (final IllegalArgumentException e) {
353
354 throw new RuntimeException(e);
355 }
356 }
357
358
359
360
361
362
363
364
365
366 private static String encode(final String unescaped, final BitSet allowed, final Charset charset) {
367 final byte[] bytes = unescaped.getBytes(charset);
368 final byte[] bytes2 = encodeUrl(allowed, bytes);
369 return encodePercentSign(bytes2);
370 }
371
372
373
374
375
376
377
378
379 private static String encodePercentSign(final byte[] input) {
380 if (input == null) {
381 return null;
382 }
383
384 final StringBuilder result = new StringBuilder(new String(input, US_ASCII));
385 int state = -0;
386 int offset = 0;
387 for (int i = 0; i < input.length; i++) {
388 final byte b = input[i];
389 if (state == 0 && b == '%') {
390 state = 1;
391 }
392 else if (state == 1 || state == 2) {
393 if (('0' <= b && b <= '9')
394 || ('A' <= b && b <= 'F')
395 || ('a' <= b && b <= 'f')) {
396 state++;
397 if (state == 3) {
398 state = 0;
399 }
400 }
401 else {
402 final int st = i - state + offset;
403 result.replace(st, st + 1, "%25");
404 offset = offset + 2;
405 state = b == '%' ? 1 : 0;
406 }
407 }
408 }
409 if (state == 1 || state == 2) {
410 final int st = input.length - state + offset;
411 result.replace(st, st + 1, "%25");
412 }
413 return result.toString();
414 }
415
416
417
418
419
420
421
422 public static URL getUrlWithoutPathRefQuery(final URL u) throws MalformedURLException {
423 return createNewUrl(u.getProtocol(), u.getAuthority(), null, null, null);
424 }
425
426
427
428
429
430
431
432
433 public static URL getUrlWithoutRef(final URL u) throws MalformedURLException {
434 return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), null, u.getQuery());
435 }
436
437
438
439
440
441
442
443
444 public static URL getUrlWithNewProtocol(final URL u, final String newProtocol) throws MalformedURLException {
445 return createNewUrl(newProtocol, u.getAuthority(), u.getPath(), u.getRef(), u.getQuery());
446 }
447
448
449
450
451
452
453
454
455 public static URL getUrlWithNewHost(final URL u, final String newHost)
456 throws MalformedURLException {
457 return createNewUrl(u.getProtocol(), u.getUserInfo(), newHost,
458 u.getPort(), u.getPath(), u.getRef(), u.getQuery());
459 }
460
461
462
463
464
465
466
467
468
469 public static URL getUrlWithNewHostAndPort(final URL u, final String newHost, final int newPort)
470 throws MalformedURLException {
471 return createNewUrl(u.getProtocol(), u.getUserInfo(), newHost, newPort, u.getPath(), u.getRef(), u.getQuery());
472 }
473
474
475
476
477
478
479
480
481 public static URL getUrlWithNewPort(final URL u, final int newPort) throws MalformedURLException {
482 return createNewUrl(u.getProtocol(), u.getUserInfo(), u.getHost(),
483 newPort, u.getPath(), u.getRef(), u.getQuery());
484 }
485
486
487
488
489
490
491
492
493 public static URL getUrlWithNewPath(final URL u, final String newPath) throws MalformedURLException {
494 return createNewUrl(u.getProtocol(), u.getAuthority(), newPath, u.getRef(), u.getQuery());
495 }
496
497
498
499
500
501
502
503
504 public static URL getUrlWithNewRef(final URL u, final String newRef) throws MalformedURLException {
505 return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), newRef, u.getQuery());
506 }
507
508
509
510
511
512
513
514
515 public static URL getUrlWithNewQuery(final URL u, final String newQuery) throws MalformedURLException {
516 return createNewUrl(u.getProtocol(), u.getAuthority(), u.getPath(), u.getRef(), newQuery);
517 }
518
519
520
521
522
523
524
525 public static URL getUrlWithProtocolAndAuthority(final URL u) throws MalformedURLException {
526 return createNewUrl(u.getProtocol(), u.getAuthority(), null, null, null);
527 }
528
529
530
531
532
533
534
535
536 public static URL getUrlWithNewUserName(final URL u, final String newUserName) throws MalformedURLException {
537 String newUserInfo = newUserName == null ? "" : newUserName;
538 final String userInfo = u.getUserInfo();
539 if (org.apache.commons.lang3.StringUtils.isNotBlank(userInfo)) {
540 final int colonIdx = userInfo.indexOf(':');
541 if (colonIdx > -1) {
542 newUserInfo = newUserInfo + userInfo.substring(colonIdx);
543 }
544 }
545 return createNewUrl(u.getProtocol(), newUserInfo.isEmpty() ? null : newUserInfo,
546 u.getHost(), u.getPort(), u.getPath(), u.getRef(), u.getQuery());
547 }
548
549
550
551
552
553
554
555
556 public static URL getUrlWithNewUserPassword(final URL u, final String newUserPassword)
557 throws MalformedURLException {
558 String newUserInfo = newUserPassword == null ? "" : ':' + newUserPassword;
559 final String userInfo = u.getUserInfo();
560 if (org.apache.commons.lang3.StringUtils.isNotBlank(userInfo)) {
561 final int colonIdx = userInfo.indexOf(':');
562 if (colonIdx > -1) {
563 newUserInfo = userInfo.substring(0, colonIdx) + newUserInfo;
564 }
565 else {
566 newUserInfo = userInfo + newUserInfo;
567 }
568 }
569 return createNewUrl(u.getProtocol(), newUserInfo.isEmpty() ? null : newUserInfo,
570 u.getHost(), u.getPort(), u.getPath(), u.getRef(), u.getQuery());
571 }
572
573
574
575
576
577
578
579
580
581
582
583
584
585 private static URL createNewUrl(final String protocol, final String userInfo, final String host, final int port,
586 final String path, final String ref, final String query) throws MalformedURLException {
587 final StringBuilder s = new StringBuilder();
588 s.append(protocol).append("://");
589 if (userInfo != null) {
590 s.append(userInfo).append('@');
591 }
592 s.append(host);
593 if (port != -1) {
594 s.append(':').append(port);
595 }
596 if (path != null && !path.isEmpty()) {
597 if ('/' != path.charAt(0)) {
598 s.append('/');
599 }
600 s.append(path);
601 }
602 if (query != null) {
603 s.append('?').append(query);
604 }
605 if (ref != null) {
606 if (ref.isEmpty() || ref.charAt(0) != '#') {
607 s.append('#');
608 }
609 s.append(ref);
610 }
611
612 return new URL(s.toString());
613 }
614
615
616
617
618
619
620
621
622
623
624
625 private static URL createNewUrl(final String protocol, final String authority,
626 final String path, final String ref, final String query) throws MalformedURLException {
627
628
629 int len = protocol.length() + 1;
630 if (authority != null && !authority.isEmpty()) {
631 len += 2 + authority.length();
632 }
633 if (path != null) {
634 len += path.length();
635 }
636 if (query != null) {
637 len += 1 + query.length();
638 }
639 if (ref != null) {
640 len += 1 + ref.length();
641 }
642
643 final StringBuilder s = new StringBuilder(len);
644 s.append(protocol).append(':');
645 if (authority != null && !authority.isEmpty()) {
646 s.append("//").append(authority);
647 }
648 if (path != null) {
649 s.append(path);
650 }
651 if (query != null) {
652 s.append('?').append(query);
653 }
654 if (ref != null) {
655 if (ref.isEmpty() || ref.charAt(0) != '#') {
656 s.append('#');
657 }
658 s.append(ref);
659 }
660
661 return toUrlSafe(s.toString());
662 }
663
664
665
666
667
668
669
670
671
672
673 public static String resolveUrl(final String baseUrl, final String relativeUrl) {
674 if (baseUrl == null) {
675 throw new IllegalArgumentException("Base URL must not be null");
676 }
677 if (relativeUrl == null) {
678 throw new IllegalArgumentException("Relative URL must not be null");
679 }
680 final Url url = resolveUrl(parseUrl(baseUrl), relativeUrl);
681
682 return url.toString();
683 }
684
685
686
687
688
689
690
691
692
693
694 public static String resolveUrl(final URL baseUrl, final String relativeUrl) {
695 if (baseUrl == null) {
696 throw new IllegalArgumentException("Base URL must not be null");
697 }
698 return resolveUrl(baseUrl.toExternalForm(), relativeUrl);
699 }
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719 private static Url parseUrl(String spec) {
720 final Url url = new Url();
721 int startIndex = 0;
722 int endIndex = spec.length();
723
724
725
726
727
728
729
730 if (endIndex > startIndex) {
731 StringBuilder sb = null;
732 boolean before = true;
733 int trailing = 0;
734
735 for (int i = 0; i < endIndex; i++) {
736 final char c = spec.charAt(i);
737 boolean remove = false;
738
739 if (c == '\t' | c == '\r' | c == '\n') {
740 remove = true;
741 }
742 else if ('\u0000' <= c && c <= '\u0020') {
743 if (before) {
744 remove = true;
745 }
746 else {
747 trailing++;
748 }
749 }
750 else {
751 before = false;
752 trailing = 0;
753 }
754
755 if (remove) {
756 if (sb == null) {
757 sb = new StringBuilder(spec.substring(0, i));
758 }
759 }
760 else if (sb != null) {
761 sb.append(c);
762 }
763 }
764
765 if (sb == null) {
766 if (trailing > 0) {
767 endIndex = spec.length() - trailing;
768 spec = spec.substring(0, endIndex);
769 }
770 }
771 else {
772 if (trailing > 0) {
773 spec = sb.substring(0, sb.length() - trailing);
774 }
775 else {
776 spec = sb.toString();
777 }
778 endIndex = spec.length();
779 }
780 }
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796 final int crosshatchIndex = StringUtils.indexOf(spec, '#', startIndex, endIndex);
797
798 if (crosshatchIndex >= 0) {
799 url.fragment_ = spec.substring(crosshatchIndex + 1, endIndex);
800 endIndex = crosshatchIndex;
801 }
802
803
804
805
806
807
808
809
810 final int colonIndex = StringUtils.indexOf(spec, ':', startIndex, endIndex);
811
812 if (colonIndex > 0) {
813 final String scheme = spec.substring(startIndex, colonIndex);
814 if (isValidScheme(scheme)) {
815 url.scheme_ = scheme;
816 startIndex = colonIndex + 1;
817 }
818 }
819
820
821
822
823
824
825
826
827
828
829
830
831 final int locationStartIndex;
832 int locationEndIndex;
833
834 if (spec.startsWith("//", startIndex)) {
835 locationStartIndex = startIndex + 2;
836 locationEndIndex = StringUtils.indexOf(spec, '/', locationStartIndex, endIndex);
837 if (locationEndIndex >= 0) {
838 startIndex = locationEndIndex;
839 }
840 }
841 else {
842 locationStartIndex = -1;
843 locationEndIndex = -1;
844 }
845
846
847
848
849
850
851
852
853
854 final int questionMarkIndex = StringUtils.indexOf(spec, '?', startIndex, endIndex);
855
856 if (questionMarkIndex >= 0) {
857 if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
858
859
860
861 locationEndIndex = questionMarkIndex;
862 startIndex = questionMarkIndex;
863 }
864 url.query_ = spec.substring(questionMarkIndex + 1, endIndex);
865 endIndex = questionMarkIndex;
866 }
867
868
869
870
871
872
873
874
875 final int semicolonIndex = StringUtils.indexOf(spec, ';', startIndex, endIndex);
876
877 if (semicolonIndex >= 0) {
878 if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
879
880
881
882 locationEndIndex = semicolonIndex;
883 startIndex = semicolonIndex;
884 }
885 url.parameters_ = spec.substring(semicolonIndex + 1, endIndex);
886 endIndex = semicolonIndex;
887 }
888
889
890
891
892
893
894
895
896 if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
897
898
899 locationEndIndex = endIndex;
900 }
901 else if (startIndex < endIndex) {
902 url.path_ = spec.substring(startIndex, endIndex);
903 }
904
905 if ((locationStartIndex >= 0) && (locationEndIndex >= 0)) {
906 url.location_ = spec.substring(locationStartIndex, locationEndIndex);
907 }
908 return url;
909 }
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925 public static boolean isValidScheme(final String scheme) {
926 final int length = scheme.length();
927 if (length < 1) {
928 return false;
929 }
930
931 char c = scheme.charAt(0);
932 boolean isValid = ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
933 if (!isValid) {
934 return false;
935 }
936
937 for (int i = 1; i < length; i++) {
938 c = scheme.charAt(i);
939 isValid =
940 ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
941 || ('0' <= c && c <= '9')
942 || c == '+'
943 || c == '.'
944 || c == '-';
945 if (!isValid) {
946 return false;
947 }
948 }
949
950 return true;
951 }
952
953
954
955
956
957
958
959
960
961 public static boolean isSpecialScheme(final String scheme) {
962 final int length = scheme.length();
963 if (length < 2 || length > 5) {
964 return false;
965 }
966
967 final String schemeLC = scheme.toLowerCase(Locale.ROOT);
968 return "ftp".equals(schemeLC)
969 || "file".equals(schemeLC)
970 || "http".equals(schemeLC)
971 || "https".equals(schemeLC)
972 || "ws".equals(schemeLC)
973 || "wss".equals(schemeLC);
974 }
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994 private static Url resolveUrl(final Url baseUrl, final String relativeUrl) {
995 final Url url = parseUrl(relativeUrl);
996
997
998
999
1000 if (baseUrl == null) {
1001 return url;
1002 }
1003
1004
1005
1006
1007
1008 if (relativeUrl.isEmpty()) {
1009 return new Url(baseUrl);
1010 }
1011
1012
1013 if (url.scheme_ != null) {
1014 return url;
1015 }
1016
1017
1018 url.scheme_ = baseUrl.scheme_;
1019
1020
1021
1022 if (url.location_ != null) {
1023 return url;
1024 }
1025 url.location_ = baseUrl.location_;
1026
1027
1028 if (url.path_ != null && !url.path_.isEmpty() && url.path_.charAt(0) == '/') {
1029 url.path_ = removeLeadingSlashPoints(url.path_);
1030 return url;
1031 }
1032
1033
1034
1035 if (url.path_ == null) {
1036 url.path_ = baseUrl.path_;
1037
1038
1039
1040 if (url.parameters_ != null) {
1041 return url;
1042 }
1043 url.parameters_ = baseUrl.parameters_;
1044
1045
1046
1047 if (url.query_ != null) {
1048 return url;
1049 }
1050 url.query_ = baseUrl.query_;
1051 return url;
1052 }
1053
1054
1055
1056
1057
1058 final String basePath = baseUrl.path_;
1059 String path = "";
1060
1061 if (basePath == null) {
1062 path = "/";
1063 }
1064 else {
1065 final int lastSlashIndex = basePath.lastIndexOf('/');
1066
1067 if (lastSlashIndex >= 0) {
1068 path = basePath.substring(0, lastSlashIndex + 1);
1069 }
1070 }
1071
1072 path = path.concat(url.path_);
1073
1074
1075 int pathSegmentIndex;
1076
1077 while ((pathSegmentIndex = path.indexOf("/./")) >= 0) {
1078 path = path.substring(0, pathSegmentIndex + 1).concat(path.substring(pathSegmentIndex + 3));
1079 }
1080
1081
1082 if (path.endsWith("/.")) {
1083 path = path.substring(0, path.length() - 1);
1084 }
1085
1086
1087
1088
1089
1090 while ((pathSegmentIndex = path.indexOf("/../")) > 0) {
1091 final String pathSegment = path.substring(0, pathSegmentIndex);
1092 final int slashIndex = pathSegment.lastIndexOf('/');
1093
1094 if (slashIndex >= 0) {
1095 if (!"..".equals(pathSegment.substring(slashIndex))) {
1096 path = path.substring(0, slashIndex + 1).concat(path.substring(pathSegmentIndex + 4));
1097 }
1098 }
1099 else {
1100 path = path.substring(pathSegmentIndex + 4);
1101 }
1102 }
1103
1104
1105
1106 if (path.endsWith("/..")) {
1107 final String pathSegment = path.substring(0, path.length() - 3);
1108 final int slashIndex = pathSegment.lastIndexOf('/');
1109
1110 if (slashIndex >= 0) {
1111 path = path.substring(0, slashIndex + 1);
1112 }
1113 }
1114
1115 path = removeLeadingSlashPoints(path);
1116
1117 url.path_ = path;
1118
1119
1120
1121 return url;
1122 }
1123
1124
1125
1126
1127 private static String removeLeadingSlashPoints(final String path) {
1128 int i = 1;
1129 while (path.startsWith("../", i)) {
1130 i = i + 3;
1131 }
1132
1133 if (i > 1) {
1134 return "/" + path.substring(i);
1135 }
1136
1137 return path;
1138 }
1139
1140
1141
1142
1143
1144
1145 private static class Url {
1146
1147 private String scheme_;
1148 private String location_;
1149 private String path_;
1150 private String parameters_;
1151 private String query_;
1152 private String fragment_;
1153
1154
1155
1156
1157 Url() {
1158 super();
1159 }
1160
1161
1162
1163
1164
1165
1166
1167 Url(final Url url) {
1168 scheme_ = url.scheme_;
1169 location_ = url.location_;
1170 path_ = url.path_;
1171 parameters_ = url.parameters_;
1172 query_ = url.query_;
1173 fragment_ = url.fragment_;
1174 }
1175
1176
1177
1178
1179
1180
1181 @Override
1182 public String toString() {
1183 final StringBuilder sb = new StringBuilder();
1184
1185 if (scheme_ != null) {
1186 sb.append(scheme_).append(':');
1187 }
1188 if (location_ != null) {
1189 sb.append("//").append(location_);
1190 }
1191 if (path_ != null) {
1192 sb.append(path_);
1193 }
1194 if (parameters_ != null) {
1195 sb.append(';').append(parameters_);
1196 }
1197 if (query_ != null) {
1198 sb.append('?').append(query_);
1199 }
1200 if (fragment_ != null) {
1201 sb.append('#').append(fragment_);
1202 }
1203 return sb.toString();
1204 }
1205 }
1206
1207 static boolean isNormalUrlProtocol(final String protocol) {
1208 return "http".equals(protocol) || "https".equals(protocol) || "file".equals(protocol);
1209 }
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220 public static boolean sameFile(final URL u1, final URL u2) {
1221 if (u1 == u2) {
1222 return true;
1223 }
1224 if (u1 == null || u2 == null) {
1225 return false;
1226 }
1227
1228
1229 final String p1 = u1.getProtocol();
1230 final String p2 = u2.getProtocol();
1231 if (!(p1 == p2 || (p1 != null && p1.equalsIgnoreCase(p2)))) {
1232 return false;
1233 }
1234
1235
1236 final int port1 = (u1.getPort() == -1) ? u1.getDefaultPort() : u1.getPort();
1237 final int port2 = (u2.getPort() == -1) ? u2.getDefaultPort() : u2.getPort();
1238 if (port1 != port2) {
1239 return false;
1240 }
1241
1242
1243 final String h1 = u1.getHost();
1244 final String h2 = u2.getHost();
1245 if (!(h1 == h2 || (h1 != null && h1.equalsIgnoreCase(h2)))) {
1246 return false;
1247 }
1248
1249
1250 String f1 = u1.getFile();
1251 if (f1.isEmpty()) {
1252 f1 = "/";
1253 }
1254 String f2 = u2.getFile();
1255 if (f2.isEmpty()) {
1256 f2 = "/";
1257 }
1258 if (f1.indexOf('.') > 0 || f2.indexOf('.') > 0) {
1259 try {
1260 f1 = u1.toURI().normalize().toURL().getFile();
1261 f2 = u2.toURI().normalize().toURL().getFile();
1262 }
1263 catch (final RuntimeException e) {
1264 throw e;
1265 }
1266 catch (final Exception ignored) {
1267
1268 }
1269 }
1270
1271 return Objects.equals(f1, f2);
1272 }
1273
1274
1275
1276
1277
1278
1279
1280
1281 public static String normalize(final URL url) {
1282 final StringBuilder result = new StringBuilder();
1283 result.append(url.getProtocol())
1284 .append("://")
1285 .append(url.getHost())
1286 .append(':')
1287 .append((url.getPort() == -1) ? url.getDefaultPort() : url.getPort());
1288
1289
1290 String f = url.getFile();
1291 if (f.isEmpty()) {
1292 result.append('/');
1293 }
1294 else {
1295 if (f.indexOf('.') > 0) {
1296 try {
1297 f = url.toURI().normalize().toURL().getFile();
1298 }
1299 catch (final Exception ignored) {
1300
1301 }
1302 }
1303 result.append(f);
1304 }
1305
1306 return result.toString();
1307 }
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323 public static URI toURI(final URL url, final String query) throws URISyntaxException {
1324 final String scheme = url.getProtocol();
1325 final String host = url.getHost();
1326 final int port = url.getPort();
1327 final String path = url.getPath();
1328 final StringBuilder buffer = new StringBuilder();
1329 if (host != null) {
1330 if (scheme != null) {
1331 buffer.append(scheme).append("://");
1332 }
1333 buffer.append(host);
1334 if (port > 0) {
1335 buffer.append(':').append(port);
1336 }
1337 }
1338 if (path == null || path.isEmpty() || path.charAt(0) != '/') {
1339 buffer.append('/');
1340 }
1341 if (path != null) {
1342 buffer.append(path);
1343 }
1344 if (query != null) {
1345 buffer.append('?').append(query);
1346 }
1347 return new URI(buffer.toString());
1348 }
1349
1350
1351
1352
1353
1354 public static String encodeQueryPart(final String part) {
1355 if (part == null || part.isEmpty()) {
1356 return "";
1357 }
1358
1359 try {
1360 return URLEncoder.encode(part, "UTF-8");
1361 }
1362 catch (final UnsupportedEncodingException e) {
1363 return part;
1364 }
1365 }
1366
1367
1368
1369
1370
1371
1372
1373 public static URL removeRedundantPort(final URL url) throws MalformedURLException {
1374 if (("https".equals(url.getProtocol()) && url.getPort() == 443)
1375 || ("http".equals(url.getProtocol()) && url.getPort() == 80)) {
1376 return getUrlWithNewPort(url, -1);
1377 }
1378 return url;
1379 }
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390 @Deprecated
1391 public static byte[] decodeDataUrl(final byte[] bytes) throws IllegalArgumentException {
1392 return decodeDataUrl(bytes, false);
1393 }
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403 public static byte[] decodeDataUrl(final byte[] bytes, final boolean removeWhitespace)
1404 throws IllegalArgumentException {
1405
1406 if (bytes == null) {
1407 return null;
1408 }
1409 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1410 for (int i = 0; i < bytes.length; i++) {
1411 int b = bytes[i];
1412 if (b == '%') {
1413 try {
1414 final int u = digit16(bytes[++i]);
1415 final int l = digit16(bytes[++i]);
1416 b = (u << 4) + l;
1417 }
1418 catch (final ArrayIndexOutOfBoundsException e) {
1419 throw new IllegalArgumentException("Invalid URL encoding: ", e);
1420 }
1421 }
1422 if (removeWhitespace
1423 && (b == 9 || b == 10 || b == 12 || b == 13 || b == 32)) {
1424 continue;
1425 }
1426
1427 buffer.write(b);
1428 }
1429 return buffer.toByteArray();
1430 }
1431
1432
1433
1434
1435
1436
1437
1438
1439 public static byte[] decodeUrl(final byte[] bytes) throws IllegalArgumentException {
1440
1441 if (bytes == null) {
1442 return null;
1443 }
1444 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1445 for (int i = 0; i < bytes.length; i++) {
1446 final int b = bytes[i];
1447 if (b == '+') {
1448 buffer.write(' ');
1449 }
1450 else if (b == '%') {
1451 try {
1452 final int u = digit16(bytes[++i]);
1453 final int l = digit16(bytes[++i]);
1454 buffer.write((char) ((u << 4) + l));
1455 }
1456 catch (final ArrayIndexOutOfBoundsException e) {
1457 throw new IllegalArgumentException("Invalid URL encoding: ", e);
1458 }
1459 }
1460 else {
1461 buffer.write(b);
1462 }
1463 }
1464 return buffer.toByteArray();
1465 }
1466
1467 private static int digit16(final byte b) throws IllegalArgumentException {
1468 final int i = Character.digit((char) b, 16);
1469 if (i == -1) {
1470 throw new IllegalArgumentException("Invalid URL encoding: not a valid digit (radix 16): " + b);
1471 }
1472 return i;
1473 }
1474
1475
1476
1477
1478
1479
1480
1481 public static byte[] encodeUrl(final BitSet urlsafe, final byte[] bytes) {
1482
1483 if (bytes == null) {
1484 return null;
1485 }
1486
1487 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1488 for (final byte c : bytes) {
1489 int b = c;
1490 if (b < 0) {
1491 b = 256 + b;
1492 }
1493 if (urlsafe.get(b)) {
1494 if (b == ' ') {
1495 b = '+';
1496 }
1497 buffer.write(b);
1498 }
1499 else {
1500 buffer.write('%');
1501 final char hex1 = hexDigit(b >> 4);
1502 final char hex2 = hexDigit(b);
1503 buffer.write(hex1);
1504 buffer.write(hex2);
1505 }
1506 }
1507 return buffer.toByteArray();
1508 }
1509
1510 private static char hexDigit(final int b) {
1511 return Character.toUpperCase(Character.forDigit(b & 0xF, 16));
1512 }
1513 }