View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import java.io.IOException;
18  import java.io.ObjectInputStream;
19  import java.io.ObjectOutputStream;
20  import java.io.Serializable;
21  import java.net.IDN;
22  import java.net.MalformedURLException;
23  import java.net.URL;
24  import java.nio.charset.Charset;
25  import java.nio.charset.StandardCharsets;
26  import java.util.ArrayList;
27  import java.util.Collections;
28  import java.util.EnumSet;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Set;
33  import java.util.regex.Pattern;
34  
35  import org.apache.http.auth.Credentials;
36  import org.htmlunit.http.HttpUtils;
37  import org.htmlunit.httpclient.HtmlUnitUsernamePasswordCredentials;
38  import org.htmlunit.util.NameValuePair;
39  import org.htmlunit.util.UrlUtils;
40  
41  /**
42   * Parameter object for making web requests.
43   *
44   * @author Brad Clarke
45   * @author Hans Donner
46   * @author Ahmed Ashour
47   * @author Marc Guillemot
48   * @author Rodney Gitzel
49   * @author Ronald Brill
50   * @author Adam Afeltowicz
51   * @author Joerg Werner
52   * @author Michael Lueck
53   * @author Lai Quang Duong
54   * @author Kristof Neirynck
55   */
56  @SuppressWarnings("PMD.TooManyFields")
57  public class WebRequest implements Serializable {
58  
59      /**
60       * Enum to configure request creation.
61       */
62      public enum HttpHint {
63          /** Force to include the charset. */
64          IncludeCharsetInContentTypeHeader,
65  
66          /** Disable sending of stored cookies and receiving of new cookies. */
67          BlockCookies
68      }
69  
70      private static final Pattern DOT_PATTERN = Pattern.compile("/\\./");
71      private static final Pattern DOT_DOT_PATTERN = Pattern.compile("/(?!\\.\\.)[^/]*/\\.\\./");
72      private static final Pattern REMOVE_DOTS_PATTERN = Pattern.compile("^/(\\.\\.?/)*");
73  
74      private String url_; // String instead of java.net.URL because "about:blank" URLs don't serialize correctly
75      private String proxyHost_;
76      private int proxyPort_;
77      private String proxyScheme_;
78      private boolean isSocksProxy_;
79      private HttpMethod httpMethod_ = HttpMethod.GET;
80      private FormEncodingType encodingType_ = FormEncodingType.URL_ENCODED;
81      private Map<String, String> additionalHeaders_ = new HashMap<>();
82      private Credentials urlCredentials_;
83      private Credentials credentials_;
84      private int timeout_;
85      private transient Set<HttpHint> httpHints_;
86  
87      private transient Charset charset_ = StandardCharsets.ISO_8859_1;
88      // https://datatracker.ietf.org/doc/html/rfc6838#section-4.2.1
89      // private transient Charset defaultResponseContentCharset_ = StandardCharsets.UTF_8;
90      private transient Charset defaultResponseContentCharset_ = StandardCharsets.ISO_8859_1;
91  
92      /* These two are mutually exclusive; additionally, requestBody_ should only be set for POST requests. */
93      private List<NameValuePair> requestParameters_ = Collections.emptyList();
94      private String requestBody_;
95  
96      /**
97       * Instantiates a {@link WebRequest} for the specified URL.
98       * @param url the target URL
99       * @param acceptHeader the accept header to use
100      * @param acceptEncodingHeader the accept encoding header to use
101      */
102     public WebRequest(final URL url, final String acceptHeader, final String acceptEncodingHeader) {
103         setUrl(url);
104         if (acceptHeader != null) {
105             setAdditionalHeader(HttpHeader.ACCEPT, acceptHeader);
106         }
107         if (acceptEncodingHeader != null) {
108             setAdditionalHeader(HttpHeader.ACCEPT_ENCODING, acceptEncodingHeader);
109         }
110         timeout_ = -1;
111     }
112 
113     /**
114      * Instantiates a {@link WebRequest} for the specified URL.
115      * @param url the target URL
116      * @param charset the charset to use
117      * @param refererUrl the url be used by the referer header
118      */
119     public WebRequest(final URL url, final Charset charset, final URL refererUrl) {
120         setUrl(url);
121         setCharset(charset);
122         setRefererHeader(refererUrl);
123     }
124 
125     /**
126      * @return a new request for about:blank
127      */
128     public static WebRequest newAboutBlankRequest() {
129         return new WebRequest(UrlUtils.URL_ABOUT_BLANK, "*/*", "gzip, deflate");
130     }
131 
132     /**
133      * Instantiates a {@link WebRequest} for the specified URL.
134      * @param url the target URL
135      */
136     public WebRequest(final URL url) {
137         this(url, "*/*", "gzip, deflate");
138     }
139 
140     /**
141      * Instantiates a {@link WebRequest} for the specified URL using the specified HTTP submit method.
142      * @param url the target URL
143      * @param submitMethod the HTTP submit method to use
144      */
145     public WebRequest(final URL url, final HttpMethod submitMethod) {
146         this(url);
147         setHttpMethod(submitMethod);
148     }
149 
150     /**
151      * Returns the target URL.
152      * @return the target URL
153      */
154     public URL getUrl() {
155         return UrlUtils.toUrlSafe(url_);
156     }
157 
158     /**
159      * Sets the target URL. The URL may be simplified if needed (for instance eliminating
160      * irrelevant path portions like "/./").
161      * @param url the target URL
162      */
163     public void setUrl(URL url) {
164         if (url == null) {
165             url_ = null;
166             return;
167         }
168 
169         final String path = url.getPath();
170         if (path.isEmpty()) {
171             if (!url.getFile().isEmpty() || url.getProtocol().startsWith("http")) {
172                 url = buildUrlWithNewPath(url, "/");
173             }
174         }
175         else if (path.contains("/.")) {
176             url = buildUrlWithNewPath(url, removeDots(path));
177         }
178 
179         try {
180             final String idn = IDN.toASCII(url.getHost());
181             if (!idn.equals(url.getHost())) {
182                 url = UrlUtils.getUrlWithNewHost(url, idn);
183             }
184         }
185         catch (final Exception e) {
186             throw new IllegalArgumentException(
187                     "Cannot convert the hostname of URL: '" + url.toExternalForm() + "' to ASCII.", e);
188         }
189 
190         try {
191             url_ = UrlUtils.removeRedundantPort(url).toExternalForm();
192         }
193         catch (final MalformedURLException e) {
194             throw new RuntimeException("Cannot strip default port of URL: " + url.toExternalForm(), e);
195         }
196 
197         // http://john.smith:secret@localhost
198         final String userInfo = url.getUserInfo();
199         if (userInfo != null) {
200             final int splitPos = userInfo.indexOf(':');
201             if (splitPos == -1) {
202                 urlCredentials_ = new HtmlUnitUsernamePasswordCredentials(userInfo, new char[0]);
203             }
204             else {
205                 final String username = userInfo.substring(0, splitPos);
206                 final String password = userInfo.substring(splitPos + 1);
207                 urlCredentials_ = new HtmlUnitUsernamePasswordCredentials(username, password.toCharArray());
208             }
209         }
210     }
211 
212     /*
213      * Strip a URL string of "/./" and "/../" occurrences.
214      * <p>
215      * One trick here is to repeatedly create new matchers on a given
216      * pattern, so that we can see whether it needs to be re-applied;
217      * unfortunately .replaceAll() doesn't re-process its own output,
218      * so if we create a new match with a replacement, it is missed.
219      */
220     private static String removeDots(final String path) {
221         String newPath = path;
222 
223         // remove occurrences at the beginning
224         newPath = REMOVE_DOTS_PATTERN.matcher(newPath).replaceAll("/");
225         if ("/..".equals(newPath)) {
226             newPath = "/";
227         }
228 
229         // single dots have no effect, so just remove them
230         while (DOT_PATTERN.matcher(newPath).find()) {
231             newPath = DOT_PATTERN.matcher(newPath).replaceAll("/");
232         }
233 
234         // mid-path double dots should be removed WITH the previous subdirectory and replaced
235         //  with "/" BUT ONLY IF that subdirectory's not also ".." (a regex lookahead helps with this)
236         while (DOT_DOT_PATTERN.matcher(newPath).find()) {
237             newPath = DOT_DOT_PATTERN.matcher(newPath).replaceAll("/");
238         }
239 
240         return newPath;
241     }
242 
243     private static URL buildUrlWithNewPath(URL url, final String newPath) {
244         try {
245             url = UrlUtils.getUrlWithNewPath(url, newPath);
246         }
247         catch (final Exception e) {
248             throw new RuntimeException("Cannot change path of URL: " + url.toExternalForm(), e);
249         }
250         return url;
251     }
252 
253     /**
254      * Returns the proxy host to use.
255      * @return the proxy host to use
256      */
257     public String getProxyHost() {
258         return proxyHost_;
259     }
260 
261     /**
262      * Sets the proxy host to use.
263      * @param proxyHost the proxy host to use
264      */
265     public void setProxyHost(final String proxyHost) {
266         proxyHost_ = proxyHost;
267     }
268 
269     /**
270      * Returns the proxy port to use.
271      * @return the proxy port to use
272      */
273     public int getProxyPort() {
274         return proxyPort_;
275     }
276 
277     /**
278      * Sets the proxy port to use.
279      * @param proxyPort the proxy port to use
280      */
281     public void setProxyPort(final int proxyPort) {
282         proxyPort_ = proxyPort;
283     }
284 
285     /**
286      * Returns the proxy scheme to use.
287      * @return the proxy scheme to use
288      */
289     public String getProxyScheme() {
290         return proxyScheme_;
291     }
292 
293     /**
294      * Sets the proxy scheme to use.
295      * @param proxyScheme the proxy scheme to use
296      */
297     public void setProxyScheme(final String proxyScheme) {
298         proxyScheme_ = proxyScheme;
299     }
300 
301     /**
302      * Returns whether SOCKS proxy or not.
303      * @return whether SOCKS proxy or not
304      */
305     public boolean isSocksProxy() {
306         return isSocksProxy_;
307     }
308 
309     /**
310      * Sets whether SOCKS proxy or not.
311      * @param isSocksProxy whether SOCKS proxy or not
312      */
313     public void setSocksProxy(final boolean isSocksProxy) {
314         isSocksProxy_ = isSocksProxy;
315     }
316 
317     /**
318      * @return the timeout to use
319      */
320     public int getTimeout() {
321         return timeout_;
322     }
323 
324     /**
325      * Sets the timeout to use.
326      * @param timeout the timeout to use
327      */
328     public void setTimeout(final int timeout) {
329         timeout_ = timeout;
330     }
331 
332     /**
333      * Returns the form encoding type to use.
334      * @return the form encoding type to use
335      */
336     public FormEncodingType getEncodingType() {
337         return encodingType_;
338     }
339 
340     /**
341      * Sets the form encoding type to use.
342      * @param encodingType the form encoding type to use
343      */
344     public void setEncodingType(final FormEncodingType encodingType) {
345         encodingType_ = encodingType;
346     }
347 
348     /**
349      * <p>Retrieves the request parameters used. Similar to the servlet api function
350      * getParameterMap() this works depending on the request type and collects the
351      * url parameters and the body stuff.<br>
352      * The value is also normalized - null is converted to an empty string.</p>
353      * <p>In contrast to the servlet api this creates a separate KeyValuePair for every
354      * parameter. This means that pairs with the same name can be part of the list. The
355      * servlet api will return a string[] as value for the key in this case.<br>
356      * Additionally this method includes also the uploaded files for multipart post
357      * requests.</p>
358      *
359      * @return the request parameters to use
360      */
361     public List<NameValuePair> getParameters() {
362         // developer note:
363         // this has to be in sync with org.htmlunit.HttpWebConnection.makeHttpMethod(WebRequest, HttpClientBuilder)
364 
365         // developer note:
366         // the spring org.springframework.test.web.servlet.htmlunitHtmlUnitRequestBuilder uses
367         // this method and is sensitive to all the details of the current implementation.
368 
369         final List<NameValuePair> allParameters = new ArrayList<>(
370                 HttpUtils.parseUrlQuery(getUrl().getQuery(), getCharset()));
371 
372         // the servlet api ignores these parameters but to make spring happy we include them
373         final HttpMethod httpMethod = getHttpMethod();
374         if (httpMethod == HttpMethod.POST
375             || httpMethod == HttpMethod.PUT
376             || httpMethod == HttpMethod.PATCH
377             || httpMethod == HttpMethod.DELETE
378             || httpMethod == HttpMethod.OPTIONS) {
379             if (FormEncodingType.URL_ENCODED == getEncodingType()
380                 && httpMethod != HttpMethod.OPTIONS) {
381                 // spring ignores URL_ENCODED parameters for OPTIONS requests
382                 // getRequestParameters and getRequestBody are mutually exclusive
383                 if (getRequestBody() == null) {
384                     allParameters.addAll(getRequestParameters());
385                 }
386                 else {
387                     allParameters.addAll(HttpUtils.parseUrlQuery(getRequestBody(), getCharset()));
388                 }
389             }
390             else if (FormEncodingType.MULTIPART == getEncodingType()) {
391                 if (httpMethod == HttpMethod.POST) {
392                     allParameters.addAll(getRequestParameters());
393                 }
394                 else {
395                     // for PUT, PATCH, DELETE and OPTIONS spring moves the parameters up to the query
396                     // it doesn't replace the query
397                     allParameters.addAll(0, getRequestParameters());
398                 }
399             }
400         }
401 
402         return normalize(allParameters);
403     }
404 
405     private static List<NameValuePair> normalize(final List<NameValuePair> pairs) {
406         if (pairs == null || pairs.isEmpty()) {
407             return pairs;
408         }
409 
410         final List<NameValuePair> resultingPairs = new ArrayList<>();
411         for (final NameValuePair pair : pairs) {
412             resultingPairs.add(pair.normalized());
413         }
414 
415         return resultingPairs;
416     }
417 
418     /**
419      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
420      *
421      * Retrieves the request parameters to use. If set, these request parameters will overwrite any
422      * request parameters which may be present in the {@link #getUrl() URL}. Should not be used in
423      * combination with the {@link #setRequestBody(String) request body}.
424      * @return the request parameters to use
425      */
426     public List<NameValuePair> getRequestParameters() {
427         return requestParameters_;
428     }
429 
430     /**
431      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
432      *
433      * Sets the request parameters to use. If set, these request parameters will overwrite any request
434      * parameters which may be present in the {@link #getUrl() URL}. Should not be used in combination
435      * with the {@link #setRequestBody(String) request body}.
436      * @param requestParameters the request parameters to use
437      * @throws RuntimeException if the request body has already been set
438      */
439     public void setRequestParameters(final List<NameValuePair> requestParameters) throws RuntimeException {
440         if (requestBody_ != null) {
441             final String msg = "Trying to set the request parameters, but the request body has already been specified;"
442                              + "the two are mutually exclusive!";
443             throw new RuntimeException(msg);
444         }
445         requestParameters_ = requestParameters;
446     }
447 
448     /**
449      * Returns the body content to be submitted if this is a <code>POST</code> request. Ignored for all other request
450      * types. Should not be used in combination with {@link #setRequestParameters(List) request parameters}.
451      * @return the body content to be submitted if this is a <code>POST</code> request
452      */
453     public String getRequestBody() {
454         return requestBody_;
455     }
456 
457     /**
458      * Sets the body content to be submitted if this is a {@code POST}, {@code PUT} or {@code PATCH} request.
459      * Other request types result in {@link RuntimeException}.
460      * Should not be used in combination with {@link #setRequestParameters(List) request parameters}.
461      * @param requestBody the body content to be submitted if this is a {@code POST}, {@code PUT}
462      *        or {@code PATCH} request
463      * @throws RuntimeException if the request parameters have already been set
464      *                          or this is not a {@code POST}, {@code PUT} or {@code PATCH} request.
465      */
466     public void setRequestBody(final String requestBody) throws RuntimeException {
467         if (requestParameters_ != null && !requestParameters_.isEmpty()) {
468             final String msg = "Trying to set the request body, but the request parameters have already been specified;"
469                        + "the two are mutually exclusive!";
470             throw new RuntimeException(msg);
471         }
472         if (httpMethod_ != HttpMethod.POST
473                 && httpMethod_ != HttpMethod.PUT
474                 && httpMethod_ != HttpMethod.PATCH
475                 && httpMethod_ != HttpMethod.DELETE
476                 && httpMethod_ != HttpMethod.OPTIONS) {
477             final String msg = "The request body may only be set for POST, PUT, PATCH, DELETE or OPTIONS requests!";
478             throw new RuntimeException(msg);
479         }
480         requestBody_ = requestBody;
481     }
482 
483     /**
484      * Returns the HTTP submit method to use.
485      * @return the HTTP submit method to use
486      */
487     public HttpMethod getHttpMethod() {
488         return httpMethod_;
489     }
490 
491     /**
492      * Sets the HTTP submit method to use.
493      * @param submitMethod the HTTP submit method to use
494      */
495     public void setHttpMethod(final HttpMethod submitMethod) {
496         httpMethod_ = submitMethod;
497     }
498 
499     /**
500      * Returns the additional HTTP headers to use.
501      * @return the additional HTTP headers to use
502      */
503     public Map<String, String> getAdditionalHeaders() {
504         return additionalHeaders_;
505     }
506 
507     /**
508      * Sets the additional HTTP headers to use.
509      * @param additionalHeaders the additional HTTP headers to use
510      */
511     public void setAdditionalHeaders(final Map<String, String> additionalHeaders) {
512         additionalHeaders_ = additionalHeaders;
513     }
514 
515     /**
516      * Returns whether the specified header name is already included in the additional HTTP headers.
517      * @param name the name of the additional HTTP header
518      * @return true if the specified header name is included in the additional HTTP headers
519      */
520     public boolean isAdditionalHeader(final String name) {
521         for (final String key : additionalHeaders_.keySet()) {
522             if (name.equalsIgnoreCase(key)) {
523                 return true;
524             }
525         }
526         return false;
527     }
528 
529     /**
530      * Returns the header value associated with this name.
531      * @param name the name of the additional HTTP header
532      * @return the value or null
533      */
534     public String getAdditionalHeader(final String name) {
535         String newKey = name;
536         for (final String key : additionalHeaders_.keySet()) {
537             if (name.equalsIgnoreCase(key)) {
538                 newKey = key;
539                 break;
540             }
541         }
542         return additionalHeaders_.get(newKey);
543     }
544 
545     /**
546      * Sets the referer HTTP header - only if the provided url is valid.
547      * @param url the url for the referer HTTP header
548      */
549     public void setRefererHeader(final URL url) {
550         if (url == null || !url.getProtocol().startsWith("http")) {
551             return;
552         }
553 
554         try {
555             setAdditionalHeader(HttpHeader.REFERER, UrlUtils.getUrlWithoutRef(url).toExternalForm());
556         }
557         catch (final MalformedURLException ignored) {
558             // bad luck us the whole url from the pager
559         }
560     }
561 
562     /**
563      * Sets the specified name/value pair in the additional HTTP headers.
564      * @param name the name of the additional HTTP header
565      * @param value the value of the additional HTTP header
566      */
567     public void setAdditionalHeader(final String name, final String value) {
568         String newKey = name;
569         for (final String key : additionalHeaders_.keySet()) {
570             if (name.equalsIgnoreCase(key)) {
571                 newKey = key;
572                 break;
573             }
574         }
575         additionalHeaders_.put(newKey, value);
576     }
577 
578     /**
579      * Removed the specified name/value pair from the additional HTTP headers.
580      * @param name the name of the additional HTTP header
581      */
582     public void removeAdditionalHeader(String name) {
583         for (final String key : additionalHeaders_.keySet()) {
584             if (name.equalsIgnoreCase(key)) {
585                 name = key;
586                 break;
587             }
588         }
589         additionalHeaders_.remove(name);
590     }
591 
592     /**
593      * Returns the credentials to use.
594      * @return the credentials if set as part of the url
595      */
596     public Credentials getUrlCredentials() {
597         return urlCredentials_;
598     }
599 
600     /**
601      * Returns the credentials to use.
602      * @return the credentials if set from the external builder
603      */
604     public Credentials getCredentials() {
605         return credentials_;
606     }
607 
608     /**
609      * Sets the credentials to use.
610      * @param credentials the credentials to use
611      */
612     public void setCredentials(final Credentials credentials) {
613         credentials_ = credentials;
614     }
615 
616     /**
617      * Returns the character set to use to perform the request.
618      * @return the character set to use to perform the request
619      */
620     public Charset getCharset() {
621         return charset_;
622     }
623 
624     /**
625      * Sets the character set to use to perform the request. The default value
626      * is {@link java.nio.charset.StandardCharsets#ISO_8859_1}.
627      * @param charset the character set to use to perform the request
628      */
629     public void setCharset(final Charset charset) {
630         charset_ = charset;
631     }
632 
633     /**
634      * @return the default character set to use for the response when it does not specify one.
635      */
636     public Charset getDefaultResponseContentCharset() {
637         return defaultResponseContentCharset_;
638     }
639 
640     /**
641      * Sets the default character set to use for the response when it does not specify one.
642      * <p>
643      * Unless set, the default is {@link java.nio.charset.StandardCharsets#UTF_8}.
644      * @param defaultResponseContentCharset the default character set of the response
645      */
646     public void setDefaultResponseContentCharset(final Charset defaultResponseContentCharset) {
647         WebAssert.notNull("defaultResponseContentCharset", defaultResponseContentCharset);
648         defaultResponseContentCharset_ = defaultResponseContentCharset;
649     }
650 
651     /**
652      * @param hint the hint to check for
653      * @return true if the hint is enabled
654      */
655     public boolean hasHint(final HttpHint hint) {
656         if (httpHints_ == null) {
657             return false;
658         }
659         return httpHints_.contains(hint);
660     }
661 
662     /**
663      * Enables the hint.
664      * @param hint the hint to add
665      */
666     public void addHint(final HttpHint hint) {
667         if (httpHints_ == null) {
668             httpHints_ = EnumSet.noneOf(HttpHint.class);
669         }
670         httpHints_.add(hint);
671     }
672 
673     /**
674      * Returns a string representation of this object.
675      * @return a string representation of this object
676      */
677     @Override
678     public String toString() {
679         final StringBuilder builder = new StringBuilder(100)
680                 .append(getClass().getSimpleName())
681                 .append("[<url=\"")
682                 .append(url_)
683                 .append("\", ").append(httpMethod_)
684                 .append(", ").append(encodingType_)
685                 .append(", ").append(requestParameters_)
686                 .append(", ").append(additionalHeaders_)
687                 .append(", ").append(credentials_)
688                 .append(">]");
689         return builder.toString();
690     }
691 
692     private void writeObject(final ObjectOutputStream oos) throws IOException {
693         oos.defaultWriteObject();
694         oos.writeObject(charset_ == null ? null : charset_.name());
695         oos.writeObject(defaultResponseContentCharset_ == null ? null : defaultResponseContentCharset_.name());
696     }
697 
698     private void readObject(final ObjectInputStream ois) throws ClassNotFoundException, IOException {
699         ois.defaultReadObject();
700         final String charsetName = (String) ois.readObject();
701         if (charsetName != null) {
702             charset_ = Charset.forName(charsetName);
703         }
704         final String defaultResponseContentCharset = (String) ois.readObject();
705         if (defaultResponseContentCharset != null) {
706             defaultResponseContentCharset_ = Charset.forName(defaultResponseContentCharset);
707         }
708     }
709 }