View Javadoc
1   /*
2    * Copyright (c) 2002-2026 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import static java.nio.charset.StandardCharsets.ISO_8859_1;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.Serializable;
22  import java.util.Collections;
23  import java.util.List;
24  import java.util.zip.GZIPInputStream;
25  import java.util.zip.Inflater;
26  import java.util.zip.InflaterInputStream;
27  
28  import org.apache.commons.io.ByteOrderMark;
29  import org.apache.commons.io.IOUtils;
30  import org.apache.commons.io.input.BOMInputStream;
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.htmlunit.util.ArrayUtils;
34  import org.htmlunit.util.MimeType;
35  import org.htmlunit.util.NameValuePair;
36  import org.htmlunit.util.StringUtils;
37  import org.htmlunit.util.brotli.BrotliInputStream;
38  
39  /**
40   * Simple data object to simplify WebResponse creation.
41   *
42   * @author Brad Clarke
43   * @author Daniel Gredler
44   * @author Ahmed Ashour
45   * @author Ronald Brill
46   * @author Sven Strickroth
47   */
48  public class WebResponseData implements Serializable {
49      private static final Log LOG = LogFactory.getLog(WebResponseData.class);
50  
51      private final int statusCode_;
52      private final String statusMessage_;
53      private final List<NameValuePair> responseHeaders_;
54      private final DownloadedContent downloadedContent_;
55  
56      /**
57       * Constructs with a raw byte[] (mostly for testing).
58       *
59       * @param body              Body of this response
60       * @param statusCode        Status code from the server
61       * @param statusMessage     Status message from the server
62       * @param responseHeaders   Headers in this response
63       */
64      public WebResponseData(final byte[] body, final int statusCode, final String statusMessage,
65              final List<NameValuePair> responseHeaders) {
66          this(new DownloadedContent.InMemory(body), statusCode, statusMessage, responseHeaders);
67      }
68  
69      /**
70       * Constructs without data stream for subclasses that override getBody().
71       *
72       * @param statusCode        Status code from the server
73       * @param statusMessage     Status message from the server
74       * @param responseHeaders   Headers in this response
75       */
76      protected WebResponseData(final int statusCode,
77              final String statusMessage, final List<NameValuePair> responseHeaders) {
78          this(ArrayUtils.EMPTY_BYTE_ARRAY, statusCode, statusMessage, responseHeaders);
79      }
80  
81      /**
82       * Constructor.
83       * @param downloadedContent the downloaded content
84       * @param statusCode        Status code from the server
85       * @param statusMessage     Status message from the server
86       * @param responseHeaders   Headers in this response
87       */
88      public WebResponseData(final DownloadedContent downloadedContent, final int statusCode, final String statusMessage,
89              final List<NameValuePair> responseHeaders) {
90          statusCode_ = statusCode;
91          statusMessage_ = statusMessage;
92          responseHeaders_ = Collections.unmodifiableList(responseHeaders);
93          downloadedContent_ = downloadedContent;
94      }
95  
96      private InputStream getStream(final ByteOrderMark... bomHeaders) throws IOException {
97          InputStream stream = downloadedContent_.getInputStream();
98          if (downloadedContent_.isEmpty()) {
99              return stream;
100         }
101 
102         final List<NameValuePair> headers = getResponseHeaders();
103         final String encoding = getHeader(headers, "content-encoding");
104         if (encoding != null) {
105             boolean isGzip = StringUtils.containsIgnoreCase(encoding, "gzip") && !"no-gzip".equals(encoding);
106             if ("gzip-only-text/html".equals(encoding)) {
107                 isGzip = MimeType.TEXT_HTML.equals(getHeader(headers, "content-type"));
108             }
109             if (isGzip) {
110                 try {
111                     stream = new GZIPInputStream(stream);
112                 }
113                 catch (final IOException e) {
114                     LOG.error("Reading gzip encodec content failed.", e);
115                     stream.close();
116                     stream = IOUtils.toInputStream(
117                                 """
118                                 <!DOCTYPE html><html>
119                                 <head><title>Problem loading page</title></head>
120                                 <body>
121                                 <h1>Content Encoding Error</h1>
122                                 <p>The page you are trying to view cannot be shown because\
123                                  it uses an invalid or unsupported form of compression.</p>
124                                 </body>
125                                 </html>""", ISO_8859_1);
126                 }
127                 if (stream != null && bomHeaders != null) {
128                     stream = BOMInputStream.builder().setInputStream(stream).setByteOrderMarks(bomHeaders).get();
129                 }
130                 return stream;
131             }
132 
133             if ("br".equals(encoding)) {
134                 try {
135                     stream = new BrotliInputStream(stream);
136                 }
137                 catch (final IOException e) {
138                     LOG.error("Reading Brotli encodec content failed.", e);
139                     stream.close();
140                     stream = IOUtils.toInputStream(
141                                 """
142                                 <!DOCTYPE html><html>
143                                 <head><title>Problem loading page</title></head>
144                                 <body>
145                                 <h1>Content Encoding Error</h1>
146                                 <p>The page you are trying to view cannot be shown because\
147                                  it uses an invalid or unsupported form of compression.</p>
148                                 </body>
149                                 </html>""", ISO_8859_1);
150                 }
151                 return stream;
152             }
153 
154             if (StringUtils.containsIgnoreCase(encoding, "deflate")) {
155                 boolean zlibHeader = false;
156                 if (stream.markSupported()) { // should be always the case as the content is in a byte[] or in a file
157                     stream.mark(2);
158                     final byte[] buffer = new byte[2];
159                     final int byteCount = IOUtils.read(stream, buffer, 0, 2);
160                     zlibHeader = byteCount == 2 && (((buffer[0] & 0xff) << 8) | (buffer[1] & 0xff)) == 0x789c;
161                     stream.reset();
162                 }
163                 if (zlibHeader) {
164                     stream = new InflaterInputStream(stream);
165                 }
166                 else {
167                     stream = new InflaterInputStream(stream, new Inflater(true));
168                 }
169                 return stream;
170             }
171         }
172 
173         if (stream != null && bomHeaders != null) {
174             stream = BOMInputStream.builder().setInputStream(stream).setByteOrderMarks(bomHeaders).get();
175         }
176         return stream;
177     }
178 
179     private static String getHeader(final List<NameValuePair> headers, final String name) {
180         for (final NameValuePair header : headers) {
181             final String headerName = header.getName().trim();
182             if (name.equalsIgnoreCase(headerName)) {
183                 return header.getValue();
184             }
185         }
186 
187         return null;
188     }
189 
190     /**
191      * Returns the response body.
192      * This may cause memory problem for very large responses.
193      * @return response body
194      */
195     public byte[] getBody() {
196         try (InputStream is = getInputStream()) {
197             return IOUtils.toByteArray(is);
198         }
199         catch (final IOException e) {
200             throw new RuntimeException(e); // shouldn't we allow the method to throw IOException?
201         }
202     }
203 
204     /**
205      * Returns a new {@link InputStream} allowing to read the downloaded content.
206      * @return the associated InputStream
207      * @throws IOException in case of IO problems
208      */
209     public InputStream getInputStream() throws IOException {
210         return getStream((ByteOrderMark[]) null);
211     }
212 
213     /**
214      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
215      *
216      * @param bomHeaders the supported bomHeaders
217      * @return the associated InputStream wrapped with a bom input stream if applicable
218      * @throws IOException in case of IO problems
219      */
220     public InputStream getInputStreamWithBomIfApplicable(final ByteOrderMark... bomHeaders) throws IOException {
221         return getStream(bomHeaders);
222     }
223 
224     /**
225      * @return response headers
226      */
227     public List<NameValuePair> getResponseHeaders() {
228         return responseHeaders_;
229     }
230 
231     /**
232      * @return response status code
233      */
234     public int getStatusCode() {
235         return statusCode_;
236     }
237 
238     /**
239      * @return response status message
240      */
241     public String getStatusMessage() {
242         return statusMessage_;
243     }
244 
245     /**
246      * Returns length of the content data.
247      * @return the length
248      */
249     public long getContentLength() {
250         return downloadedContent_.length();
251     }
252 
253     /**
254      * Clean up the downloaded content.
255      */
256     public void cleanUp() {
257         downloadedContent_.cleanUp();
258     }
259 }