View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import java.io.IOException;
18  import java.io.ObjectInputStream;
19  import java.io.ObjectOutputStream;
20  import java.io.Serializable;
21  import java.net.IDN;
22  import java.net.MalformedURLException;
23  import java.net.URL;
24  import java.nio.charset.Charset;
25  import java.nio.charset.StandardCharsets;
26  import java.util.ArrayList;
27  import java.util.Collections;
28  import java.util.EnumSet;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Objects;
33  import java.util.Set;
34  import java.util.regex.Pattern;
35  
36  import org.apache.http.auth.Credentials;
37  import org.htmlunit.http.HttpUtils;
38  import org.htmlunit.httpclient.HtmlUnitUsernamePasswordCredentials;
39  import org.htmlunit.util.NameValuePair;
40  import org.htmlunit.util.UrlUtils;
41  
42  /**
43   * Parameter object for making web requests.
44   *
45   * @author Brad Clarke
46   * @author Hans Donner
47   * @author Ahmed Ashour
48   * @author Marc Guillemot
49   * @author Rodney Gitzel
50   * @author Ronald Brill
51   * @author Adam Afeltowicz
52   * @author Joerg Werner
53   * @author Michael Lueck
54   * @author Lai Quang Duong
55   * @author Kristof Neirynck
56   */
57  @SuppressWarnings("PMD.TooManyFields")
58  public class WebRequest implements Serializable {
59  
60      /**
61       * Enum to configure request creation.
62       */
63      public enum HttpHint {
64          /** Force to include the charset. */
65          IncludeCharsetInContentTypeHeader,
66  
67          /** Disable sending of stored cookies and receiving of new cookies. */
68          BlockCookies
69      }
70  
71      private static final Pattern DOT_PATTERN = Pattern.compile("/\\./");
72      private static final Pattern DOT_DOT_PATTERN = Pattern.compile("/(?!\\.\\.)[^/]*/\\.\\./");
73      private static final Pattern REMOVE_DOTS_PATTERN = Pattern.compile("^/(\\.\\.?/)*");
74  
75      private String url_; // String instead of java.net.URL because "about:blank" URLs don't serialize correctly
76      private String proxyHost_;
77      private int proxyPort_;
78      private String proxyScheme_;
79      private boolean isSocksProxy_;
80      private HttpMethod httpMethod_ = HttpMethod.GET;
81      private FormEncodingType encodingType_ = FormEncodingType.URL_ENCODED;
82      private Map<String, String> additionalHeaders_ = new HashMap<>();
83      private Credentials urlCredentials_;
84      private Credentials credentials_;
85      private int timeout_;
86      private transient Set<HttpHint> httpHints_;
87  
88      private transient Charset charset_ = StandardCharsets.ISO_8859_1;
89      // https://datatracker.ietf.org/doc/html/rfc6838#section-4.2.1
90      // private transient Charset defaultResponseContentCharset_ = StandardCharsets.UTF_8;
91      private transient Charset defaultResponseContentCharset_ = StandardCharsets.ISO_8859_1;
92  
93      /* These two are mutually exclusive; additionally, requestBody_ should only be set for POST requests. */
94      private List<NameValuePair> requestParameters_ = Collections.emptyList();
95      private String requestBody_;
96  
97      /**
98       * Instantiates a {@link WebRequest} for the specified URL.
99       * @param url the target URL
100      * @param acceptHeader the accept header to use
101      * @param acceptEncodingHeader the accept encoding header to use
102      */
103     public WebRequest(final URL url, final String acceptHeader, final String acceptEncodingHeader) {
104         setUrl(url);
105         if (acceptHeader != null) {
106             setAdditionalHeader(HttpHeader.ACCEPT, acceptHeader);
107         }
108         if (acceptEncodingHeader != null) {
109             setAdditionalHeader(HttpHeader.ACCEPT_ENCODING, acceptEncodingHeader);
110         }
111         timeout_ = -1;
112     }
113 
114     /**
115      * Instantiates a {@link WebRequest} for the specified URL.
116      * @param url the target URL
117      * @param charset the charset to use
118      * @param refererUrl the url be used by the referer header
119      */
120     public WebRequest(final URL url, final Charset charset, final URL refererUrl) {
121         setUrl(url);
122         setCharset(charset);
123         setRefererHeader(refererUrl);
124     }
125 
126     /**
127      * @return a new request for about:blank
128      */
129     public static WebRequest newAboutBlankRequest() {
130         return new WebRequest(UrlUtils.URL_ABOUT_BLANK, "*/*", "gzip, deflate");
131     }
132 
133     /**
134      * Instantiates a {@link WebRequest} for the specified URL.
135      * @param url the target URL
136      */
137     public WebRequest(final URL url) {
138         this(url, "*/*", "gzip, deflate");
139     }
140 
141     /**
142      * Instantiates a {@link WebRequest} for the specified URL using the specified HTTP submit method.
143      * @param url the target URL
144      * @param submitMethod the HTTP submit method to use
145      */
146     public WebRequest(final URL url, final HttpMethod submitMethod) {
147         this(url);
148         setHttpMethod(submitMethod);
149     }
150 
151     /**
152      * Returns the target URL.
153      * @return the target URL
154      */
155     public URL getUrl() {
156         return UrlUtils.toUrlSafe(url_);
157     }
158 
159     /**
160      * Sets the target URL. The URL may be simplified if needed (for instance eliminating
161      * irrelevant path portions like "/./").
162      * @param url the target URL
163      */
164     public void setUrl(URL url) {
165         if (url == null) {
166             url_ = null;
167             return;
168         }
169 
170         final String path = url.getPath();
171         if (path.isEmpty()) {
172             if (!url.getFile().isEmpty() || url.getProtocol().startsWith("http")) {
173                 url = buildUrlWithNewPath(url, "/");
174             }
175         }
176         else if (path.contains("/.")) {
177             url = buildUrlWithNewPath(url, removeDots(path));
178         }
179 
180         try {
181             final String idn = IDN.toASCII(url.getHost());
182             if (!idn.equals(url.getHost())) {
183                 url = UrlUtils.getUrlWithNewHost(url, idn);
184             }
185         }
186         catch (final Exception e) {
187             throw new IllegalArgumentException(
188                     "Cannot convert the hostname of URL: '" + url.toExternalForm() + "' to ASCII.", e);
189         }
190 
191         try {
192             url_ = UrlUtils.removeRedundantPort(url).toExternalForm();
193         }
194         catch (final MalformedURLException e) {
195             throw new RuntimeException("Cannot strip default port of URL: " + url.toExternalForm(), e);
196         }
197 
198         // http://john.smith:secret@localhost
199         final String userInfo = url.getUserInfo();
200         if (userInfo != null) {
201             final int splitPos = userInfo.indexOf(':');
202             if (splitPos == -1) {
203                 urlCredentials_ = new HtmlUnitUsernamePasswordCredentials(userInfo, new char[0]);
204             }
205             else {
206                 final String username = userInfo.substring(0, splitPos);
207                 final String password = userInfo.substring(splitPos + 1);
208                 urlCredentials_ = new HtmlUnitUsernamePasswordCredentials(username, password.toCharArray());
209             }
210         }
211     }
212 
213     /*
214      * Strip a URL string of "/./" and "/../" occurrences.
215      * <p>
216      * One trick here is to repeatedly create new matchers on a given
217      * pattern, so that we can see whether it needs to be re-applied;
218      * unfortunately .replaceAll() doesn't re-process its own output,
219      * so if we create a new match with a replacement, it is missed.
220      */
221     private static String removeDots(final String path) {
222         String newPath = path;
223 
224         // remove occurrences at the beginning
225         newPath = REMOVE_DOTS_PATTERN.matcher(newPath).replaceAll("/");
226         if ("/..".equals(newPath)) {
227             newPath = "/";
228         }
229 
230         // single dots have no effect, so just remove them
231         while (DOT_PATTERN.matcher(newPath).find()) {
232             newPath = DOT_PATTERN.matcher(newPath).replaceAll("/");
233         }
234 
235         // mid-path double dots should be removed WITH the previous subdirectory and replaced
236         //  with "/" BUT ONLY IF that subdirectory's not also ".." (a regex lookahead helps with this)
237         while (DOT_DOT_PATTERN.matcher(newPath).find()) {
238             newPath = DOT_DOT_PATTERN.matcher(newPath).replaceAll("/");
239         }
240 
241         return newPath;
242     }
243 
244     private static URL buildUrlWithNewPath(URL url, final String newPath) {
245         try {
246             url = UrlUtils.getUrlWithNewPath(url, newPath);
247         }
248         catch (final Exception e) {
249             throw new RuntimeException("Cannot change path of URL: " + url.toExternalForm(), e);
250         }
251         return url;
252     }
253 
254     /**
255      * Returns the proxy host to use.
256      * @return the proxy host to use
257      */
258     public String getProxyHost() {
259         return proxyHost_;
260     }
261 
262     /**
263      * Sets the proxy host to use.
264      * @param proxyHost the proxy host to use
265      */
266     public void setProxyHost(final String proxyHost) {
267         proxyHost_ = proxyHost;
268     }
269 
270     /**
271      * Returns the proxy port to use.
272      * @return the proxy port to use
273      */
274     public int getProxyPort() {
275         return proxyPort_;
276     }
277 
278     /**
279      * Sets the proxy port to use.
280      * @param proxyPort the proxy port to use
281      */
282     public void setProxyPort(final int proxyPort) {
283         proxyPort_ = proxyPort;
284     }
285 
286     /**
287      * Returns the proxy scheme to use.
288      * @return the proxy scheme to use
289      */
290     public String getProxyScheme() {
291         return proxyScheme_;
292     }
293 
294     /**
295      * Sets the proxy scheme to use.
296      * @param proxyScheme the proxy scheme to use
297      */
298     public void setProxyScheme(final String proxyScheme) {
299         proxyScheme_ = proxyScheme;
300     }
301 
302     /**
303      * Returns whether SOCKS proxy or not.
304      * @return whether SOCKS proxy or not
305      */
306     public boolean isSocksProxy() {
307         return isSocksProxy_;
308     }
309 
310     /**
311      * Sets whether SOCKS proxy or not.
312      * @param isSocksProxy whether SOCKS proxy or not
313      */
314     public void setSocksProxy(final boolean isSocksProxy) {
315         isSocksProxy_ = isSocksProxy;
316     }
317 
318     /**
319      * @return the timeout to use
320      */
321     public int getTimeout() {
322         return timeout_;
323     }
324 
325     /**
326      * Sets the timeout to use.
327      * @param timeout the timeout to use
328      */
329     public void setTimeout(final int timeout) {
330         timeout_ = timeout;
331     }
332 
333     /**
334      * Returns the form encoding type to use.
335      * @return the form encoding type to use
336      */
337     public FormEncodingType getEncodingType() {
338         return encodingType_;
339     }
340 
341     /**
342      * Sets the form encoding type to use.
343      * @param encodingType the form encoding type to use
344      */
345     public void setEncodingType(final FormEncodingType encodingType) {
346         encodingType_ = encodingType;
347     }
348 
349     /**
350      * <p>Retrieves the request parameters used. Similar to the servlet api function
351      * getParameterMap() this works depending on the request type and collects the
352      * url parameters and the body stuff.<br>
353      * The value is also normalized - null is converted to an empty string.</p>
354      * <p>In contrast to the servlet api this creates a separate KeyValuePair for every
355      * parameter. This means that pairs with the same name can be part of the list. The
356      * servlet api will return a string[] as value for the key in this case.<br>
357      * Additionally this method includes also the uploaded files for multipart post
358      * requests.</p>
359      *
360      * @return the request parameters to use
361      */
362     public List<NameValuePair> getParameters() {
363         // developer note:
364         // this has to be in sync with org.htmlunit.HttpWebConnection.makeHttpMethod(WebRequest, HttpClientBuilder)
365 
366         // developer note:
367         // the spring org.springframework.test.web.servlet.htmlunitHtmlUnitRequestBuilder uses
368         // this method and is sensitive to all the details of the current implementation.
369 
370         final List<NameValuePair> allParameters = new ArrayList<>(
371                 HttpUtils.parseUrlQuery(getUrl().getQuery(), getCharset()));
372 
373         // the servlet api ignores these parameters but to make spring happy we include them
374         final HttpMethod httpMethod = getHttpMethod();
375         if (httpMethod == HttpMethod.POST
376             || httpMethod == HttpMethod.PUT
377             || httpMethod == HttpMethod.PATCH
378             || httpMethod == HttpMethod.DELETE
379             || httpMethod == HttpMethod.OPTIONS) {
380             if (FormEncodingType.URL_ENCODED == getEncodingType()
381                 && httpMethod != HttpMethod.OPTIONS) {
382                 // spring ignores URL_ENCODED parameters for OPTIONS requests
383                 // getRequestParameters and getRequestBody are mutually exclusive
384                 if (getRequestBody() == null) {
385                     allParameters.addAll(getRequestParameters());
386                 }
387                 else {
388                     allParameters.addAll(HttpUtils.parseUrlQuery(getRequestBody(), getCharset()));
389                 }
390             }
391             else if (FormEncodingType.MULTIPART == getEncodingType()) {
392                 if (httpMethod == HttpMethod.POST) {
393                     allParameters.addAll(getRequestParameters());
394                 }
395                 else {
396                     // for PUT, PATCH, DELETE and OPTIONS spring moves the parameters up to the query
397                     // it doesn't replace the query
398                     allParameters.addAll(0, getRequestParameters());
399                 }
400             }
401         }
402 
403         return normalize(allParameters);
404     }
405 
406     private static List<NameValuePair> normalize(final List<NameValuePair> pairs) {
407         if (pairs == null || pairs.isEmpty()) {
408             return pairs;
409         }
410 
411         final List<NameValuePair> resultingPairs = new ArrayList<>();
412         for (final NameValuePair pair : pairs) {
413             resultingPairs.add(pair.normalized());
414         }
415 
416         return resultingPairs;
417     }
418 
419     /**
420      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
421      *
422      * Retrieves the request parameters to use. If set, these request parameters will overwrite any
423      * request parameters which may be present in the {@link #getUrl() URL}. Should not be used in
424      * combination with the {@link #setRequestBody(String) request body}.
425      * @return the request parameters to use
426      */
427     public List<NameValuePair> getRequestParameters() {
428         return requestParameters_;
429     }
430 
431     /**
432      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
433      *
434      * Sets the request parameters to use. If set, these request parameters will overwrite any request
435      * parameters which may be present in the {@link #getUrl() URL}. Should not be used in combination
436      * with the {@link #setRequestBody(String) request body}.
437      * @param requestParameters the request parameters to use
438      * @throws RuntimeException if the request body has already been set
439      */
440     public void setRequestParameters(final List<NameValuePair> requestParameters) throws RuntimeException {
441         if (requestBody_ != null) {
442             final String msg = "Trying to set the request parameters, but the request body has already been specified;"
443                              + "the two are mutually exclusive!";
444             throw new RuntimeException(msg);
445         }
446         requestParameters_ = requestParameters;
447     }
448 
449     /**
450      * Returns the body content to be submitted if this is a <code>POST</code> request. Ignored for all other request
451      * types. Should not be used in combination with {@link #setRequestParameters(List) request parameters}.
452      * @return the body content to be submitted if this is a <code>POST</code> request
453      */
454     public String getRequestBody() {
455         return requestBody_;
456     }
457 
458     /**
459      * Sets the body content to be submitted if this is a {@code POST}, {@code PUT} or {@code PATCH} request.
460      * Other request types result in {@link RuntimeException}.
461      * Should not be used in combination with {@link #setRequestParameters(List) request parameters}.
462      * @param requestBody the body content to be submitted if this is a {@code POST}, {@code PUT}
463      *        or {@code PATCH} request
464      * @throws RuntimeException if the request parameters have already been set
465      *                          or this is not a {@code POST}, {@code PUT} or {@code PATCH} request.
466      */
467     public void setRequestBody(final String requestBody) throws RuntimeException {
468         if (requestParameters_ != null && !requestParameters_.isEmpty()) {
469             final String msg = "Trying to set the request body, but the request parameters have already been specified;"
470                        + "the two are mutually exclusive!";
471             throw new RuntimeException(msg);
472         }
473         if (httpMethod_ != HttpMethod.POST
474                 && httpMethod_ != HttpMethod.PUT
475                 && httpMethod_ != HttpMethod.PATCH
476                 && httpMethod_ != HttpMethod.DELETE
477                 && httpMethod_ != HttpMethod.OPTIONS) {
478             final String msg = "The request body may only be set for POST, PUT, PATCH, DELETE or OPTIONS requests!";
479             throw new RuntimeException(msg);
480         }
481         requestBody_ = requestBody;
482     }
483 
484     /**
485      * Returns the HTTP submit method to use.
486      * @return the HTTP submit method to use
487      */
488     public HttpMethod getHttpMethod() {
489         return httpMethod_;
490     }
491 
492     /**
493      * Sets the HTTP submit method to use.
494      * @param submitMethod the HTTP submit method to use
495      */
496     public void setHttpMethod(final HttpMethod submitMethod) {
497         httpMethod_ = submitMethod;
498     }
499 
500     /**
501      * Returns the additional HTTP headers to use.
502      * @return the additional HTTP headers to use
503      */
504     public Map<String, String> getAdditionalHeaders() {
505         return additionalHeaders_;
506     }
507 
508     /**
509      * Sets the additional HTTP headers to use.
510      * @param additionalHeaders the additional HTTP headers to use
511      */
512     public void setAdditionalHeaders(final Map<String, String> additionalHeaders) {
513         additionalHeaders_ = additionalHeaders;
514     }
515 
516     /**
517      * Returns whether the specified header name is already included in the additional HTTP headers.
518      * @param name the name of the additional HTTP header
519      * @return true if the specified header name is included in the additional HTTP headers
520      */
521     public boolean isAdditionalHeader(final String name) {
522         for (final String key : additionalHeaders_.keySet()) {
523             if (name.equalsIgnoreCase(key)) {
524                 return true;
525             }
526         }
527         return false;
528     }
529 
530     /**
531      * Returns the header value associated with this name.
532      * @param name the name of the additional HTTP header
533      * @return the value or null
534      */
535     public String getAdditionalHeader(final String name) {
536         String newKey = name;
537         for (final String key : additionalHeaders_.keySet()) {
538             if (name.equalsIgnoreCase(key)) {
539                 newKey = key;
540                 break;
541             }
542         }
543         return additionalHeaders_.get(newKey);
544     }
545 
546     /**
547      * Sets the referer HTTP header - only if the provided url is valid.
548      * @param url the url for the referer HTTP header
549      */
550     public void setRefererHeader(final URL url) {
551         if (url == null || !url.getProtocol().startsWith("http")) {
552             return;
553         }
554 
555         try {
556             setAdditionalHeader(HttpHeader.REFERER, UrlUtils.getUrlWithoutRef(url).toExternalForm());
557         }
558         catch (final MalformedURLException ignored) {
559             // bad luck us the whole url from the pager
560         }
561     }
562 
563     /**
564      * Sets the specified name/value pair in the additional HTTP headers.
565      * @param name the name of the additional HTTP header
566      * @param value the value of the additional HTTP header
567      */
568     public void setAdditionalHeader(final String name, final String value) {
569         String newKey = name;
570         for (final String key : additionalHeaders_.keySet()) {
571             if (name.equalsIgnoreCase(key)) {
572                 newKey = key;
573                 break;
574             }
575         }
576         additionalHeaders_.put(newKey, value);
577     }
578 
579     /**
580      * Removed the specified name/value pair from the additional HTTP headers.
581      * @param name the name of the additional HTTP header
582      */
583     public void removeAdditionalHeader(String name) {
584         for (final String key : additionalHeaders_.keySet()) {
585             if (name.equalsIgnoreCase(key)) {
586                 name = key;
587                 break;
588             }
589         }
590         additionalHeaders_.remove(name);
591     }
592 
593     /**
594      * Returns the credentials to use.
595      * @return the credentials if set as part of the url
596      */
597     public Credentials getUrlCredentials() {
598         return urlCredentials_;
599     }
600 
601     /**
602      * Returns the credentials to use.
603      * @return the credentials if set from the external builder
604      */
605     public Credentials getCredentials() {
606         return credentials_;
607     }
608 
609     /**
610      * Sets the credentials to use.
611      * @param credentials the credentials to use
612      */
613     public void setCredentials(final Credentials credentials) {
614         credentials_ = credentials;
615     }
616 
617     /**
618      * Returns the character set to use to perform the request.
619      * @return the character set to use to perform the request
620      */
621     public Charset getCharset() {
622         return charset_;
623     }
624 
625     /**
626      * Sets the character set to use to perform the request. The default value
627      * is {@link java.nio.charset.StandardCharsets#ISO_8859_1}.
628      * @param charset the character set to use to perform the request
629      */
630     public void setCharset(final Charset charset) {
631         charset_ = charset;
632     }
633 
634     /**
635      * @return the default character set to use for the response when it does not specify one.
636      */
637     public Charset getDefaultResponseContentCharset() {
638         return defaultResponseContentCharset_;
639     }
640 
641     /**
642      * Sets the default character set to use for the response when it does not specify one.
643      * <p>
644      * Unless set, the default is {@link java.nio.charset.StandardCharsets#UTF_8}.
645      * @param defaultResponseContentCharset the default character set of the response
646      */
647     public void setDefaultResponseContentCharset(final Charset defaultResponseContentCharset) {
648         this.defaultResponseContentCharset_ = Objects.requireNonNull(defaultResponseContentCharset);
649     }
650 
651     /**
652      * @param hint the hint to check for
653      * @return true if the hint is enabled
654      */
655     public boolean hasHint(final HttpHint hint) {
656         if (httpHints_ == null) {
657             return false;
658         }
659         return httpHints_.contains(hint);
660     }
661 
662     /**
663      * Enables the hint.
664      * @param hint the hint to add
665      */
666     public void addHint(final HttpHint hint) {
667         if (httpHints_ == null) {
668             httpHints_ = EnumSet.noneOf(HttpHint.class);
669         }
670         httpHints_.add(hint);
671     }
672 
673     /**
674      * Returns a string representation of this object.
675      * @return a string representation of this object
676      */
677     @Override
678     public String toString() {
679         final StringBuilder builder = new StringBuilder(100)
680                 .append(getClass().getSimpleName())
681                 .append("[<url=\"").append(url_).append('"')
682                 .append(", ").append(httpMethod_)
683                 .append(", ").append(encodingType_)
684                 .append(", ").append(requestParameters_)
685                 .append(", ").append(additionalHeaders_)
686                 .append(", ").append(credentials_)
687                 .append(">]");
688         return builder.toString();
689     }
690 
691     private void writeObject(final ObjectOutputStream oos) throws IOException {
692         oos.defaultWriteObject();
693         oos.writeObject(charset_ == null ? null : charset_.name());
694         oos.writeObject(defaultResponseContentCharset_ == null ? null : defaultResponseContentCharset_.name());
695     }
696 
697     private void readObject(final ObjectInputStream ois) throws ClassNotFoundException, IOException {
698         ois.defaultReadObject();
699         final String charsetName = (String) ois.readObject();
700         if (charsetName != null) {
701             charset_ = Charset.forName(charsetName);
702         }
703         final String defaultResponseContentCharset = (String) ois.readObject();
704         if (defaultResponseContentCharset != null) {
705             defaultResponseContentCharset_ = Charset.forName(defaultResponseContentCharset);
706         }
707     }
708 }