View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.http;
16  
17  import java.nio.ByteBuffer;
18  import java.nio.CharBuffer;
19  import java.nio.charset.Charset;
20  import java.nio.charset.StandardCharsets;
21  import java.time.Instant;
22  import java.time.ZoneId;
23  import java.time.format.DateTimeFormatter;
24  import java.time.format.DateTimeFormatterBuilder;
25  import java.time.format.DateTimeParseException;
26  import java.util.ArrayList;
27  import java.util.BitSet;
28  import java.util.Date;
29  import java.util.List;
30  import java.util.Locale;
31  
32  import org.htmlunit.util.NameValuePair;
33  
34  /**
35   * Http related utils.
36   *
37   * @author Ronald Brill
38   */
39  public final class HttpUtils {
40  
41      /**
42       * Safe characters for x-www-form-urlencoded data;
43       * i.e. alphanumeric plus {@code "-", "_", ".", "*"}
44       */
45      private static final BitSet URLENCODER   = new BitSet(256);
46  
47      static {
48          for (int i = 'a'; i <= 'z'; i++) {
49              URLENCODER.set(i);
50          }
51          for (int i = 'A'; i <= 'Z'; i++) {
52              URLENCODER.set(i);
53          }
54  
55          for (int i = '0'; i <= '9'; i++) {
56              URLENCODER.set(i);
57          }
58          URLENCODER.set('_');
59          URLENCODER.set('-');
60          URLENCODER.set('.');
61          URLENCODER.set('*');
62      }
63  
64      /**
65       * Date format pattern used to parse HTTP date headers in RFC 1123 format.
66       */
67      private static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss zzz";
68  
69      /** RFC 1123 date formatter. */
70      private static final DateTimeFormatter FORMATTER_RFC1123 = new DateTimeFormatterBuilder()
71              .parseLenient()
72              .parseCaseInsensitive()
73              .appendPattern(PATTERN_RFC1123)
74              .toFormatter(Locale.ENGLISH);
75  
76      /**
77       * Date format pattern used to parse HTTP date headers in RFC 1036 format.
78       */
79      public static final String PATTERN_RFC1036 = "EEE, dd-MMM-yy HH:mm:ss zzz";
80  
81      /** RFC 1036 date formatter. */
82      private static final DateTimeFormatter FORMATTER_RFC1036 = new DateTimeFormatterBuilder()
83              .parseLenient()
84              .parseCaseInsensitive()
85              .appendPattern(PATTERN_RFC1036)
86              .toFormatter(Locale.ENGLISH);
87  
88      /**
89       * Date format pattern used to parse HTTP date headers in ANSI C
90       * {@code asctime()} format.
91       */
92      private static final String PATTERN_ASCTIME = "EEE MMM d HH:mm:ss yyyy";
93  
94      /** ASCII time date formatter. */
95      private static final DateTimeFormatter FORMATTER_ASCTIME = new DateTimeFormatterBuilder()
96              .parseLenient()
97              .parseCaseInsensitive()
98              .appendPattern(PATTERN_ASCTIME)
99              .toFormatter(Locale.ENGLISH);
100 
101     /**
102      * Standard date formatters: {@link #FORMATTER_RFC1123}, {@link #FORMATTER_RFC1036}, {@link #FORMATTER_ASCTIME}.
103      */
104     private static final DateTimeFormatter[] STANDARD_PATTERNS = {
105         FORMATTER_RFC1123,
106         FORMATTER_RFC1036,
107         FORMATTER_ASCTIME
108     };
109 
110     private static final ZoneId GMT_ID = ZoneId.of("GMT");
111 
112     /**
113      * Parses a date value.  The formats used for parsing the date value are retrieved from
114      * the default http params.
115      *
116      * @param dateValue the date value to parse
117      *
118      * @return the parsed date or null if input could not be parsed
119      */
120     public static Date parseDate(final String dateValue) {
121         if (dateValue == null) {
122             return null;
123         }
124 
125         String v = dateValue;
126         // trim single quotes around date if present
127         if (v.length() > 1 && v.startsWith("'") && v.endsWith("'")) {
128             v = v.substring(1, v.length() - 1);
129         }
130 
131         for (final DateTimeFormatter dateFormatter : STANDARD_PATTERNS) {
132             try {
133                 return new Date(Instant.from(dateFormatter.parse(v)).toEpochMilli());
134             }
135             catch (final DateTimeParseException ignored) {
136                 // ignore
137             }
138         }
139         return null;
140     }
141 
142     /**
143      * Formats the given date according to the RFC 1123 pattern.
144      *
145      * @param date The date to format.
146      * @return An RFC 1123 formatted date string.
147      *
148      * @see #PATTERN_RFC1123
149      */
150     public static String formatDate(final Date date) {
151         final Instant instant = Instant.ofEpochMilli(date.getTime());
152         return FORMATTER_RFC1123.format(instant.atZone(GMT_ID));
153     }
154 
155     /**
156      * Returns a list of {@link NameValuePair}s URI query parameters.
157      * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
158      *
159      * @param s URI query component.
160      * @param charset charset to use when decoding the parameters.
161      * @return list of query parameters.
162      */
163     public static List<NameValuePair> parseUrlQuery(final String s, final Charset charset) {
164         if (s == null) {
165             return new ArrayList<>(0);
166         }
167 
168         final BitSet delimSet = new BitSet();
169         delimSet.set('&');
170         delimSet.set(';');
171 
172         final ParseRange cursor = new ParseRange(0, s.length());
173         final List<NameValuePair> list = new ArrayList<>();
174         while (!cursor.atEnd()) {
175             delimSet.set('=');
176             final String name = parseToken(s, cursor, delimSet);
177             String value = null;
178             if (!cursor.atEnd()) {
179                 final int delim = s.charAt(cursor.getPos());
180                 cursor.updatePos(cursor.getPos() + 1);
181                 if (delim == '=') {
182                     delimSet.clear('=');
183                     value = parseToken(s, cursor, delimSet);
184                     if (!cursor.atEnd()) {
185                         cursor.updatePos(cursor.getPos() + 1);
186                     }
187                 }
188             }
189             if (!name.isEmpty()) {
190                 list.add(new NameValuePair(
191                         decodeFormFields(name, charset),
192                         decodeFormFields(value, charset)));
193             }
194         }
195         return list;
196     }
197 
198     private static String decodeFormFields(final String content, Charset charset) {
199         if (content == null) {
200             return null;
201         }
202 
203         if (charset == null) {
204             charset = StandardCharsets.UTF_8;
205         }
206 
207         final ByteBuffer bb = ByteBuffer.allocate(content.length());
208         final CharBuffer cb = CharBuffer.wrap(content);
209         while (cb.hasRemaining()) {
210             final char c = cb.get();
211             if (c == '%' && cb.remaining() >= 2) {
212                 final char uc = cb.get();
213                 final char lc = cb.get();
214                 final int u = Character.digit(uc, 16);
215                 final int l = Character.digit(lc, 16);
216                 if (u != -1 && l != -1) {
217                     bb.put((byte) ((u << 4) + l));
218                 }
219                 else {
220                     bb.put((byte) '%');
221                     bb.put((byte) uc);
222                     bb.put((byte) lc);
223                 }
224             }
225             else if (c == '+') {
226                 bb.put((byte) ' ');
227             }
228             else {
229                 bb.put((byte) c);
230             }
231         }
232         bb.flip();
233         return charset.decode(bb).toString();
234     }
235 
236     /**
237      * @param parameters the paramters
238      * @param charset the charset
239      * @return the query string from the given parameters
240      */
241     public static String toQueryFormFields(final Iterable<? extends NameValuePair> parameters, final Charset charset) {
242         final StringBuilder result = new StringBuilder();
243         for (final NameValuePair parameter : parameters) {
244             final String encodedName = encodeFormFields(parameter.getName(), charset);
245             final String encodedValue = encodeFormFields(parameter.getValue(), charset);
246             if (result.length() > 0) {
247                 result.append('&');
248             }
249             result.append(encodedName);
250             if (encodedValue != null) {
251                 result.append('=').append(encodedValue);
252             }
253         }
254         return result.toString();
255     }
256 
257     private static String encodeFormFields(final String content, Charset charset) {
258         if (content == null) {
259             return null;
260         }
261         if (charset == null) {
262             charset = StandardCharsets.UTF_8;
263         }
264 
265         final StringBuilder buf = new StringBuilder();
266         final ByteBuffer bb = charset.encode(content);
267         while (bb.hasRemaining()) {
268             final int b = bb.get() & 0xff;
269             if (URLENCODER.get(b)) {
270                 buf.append((char) b);
271             }
272             else if (b == ' ') {
273                 buf.append('+');
274             }
275             else {
276                 buf.append('%');
277                 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
278                 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
279                 buf.append(hex1).append(hex2);
280             }
281         }
282         return buf.toString();
283     }
284 
285     private HttpUtils() {
286         // util class
287     }
288 
289     /**
290      * Extracts from the sequence of chars a token terminated with any of the given delimiters
291      * discarding semantically insignificant whitespace characters.
292      *
293      * @param buf buffer with the sequence of chars to be parsed
294      * @param range defines the bounds and current position of the buffer
295      * @param delimiters set of delimiting characters. Can be {@code null} if the token
296      *        is not delimited by any character.
297      */
298     private static String parseToken(final String buf, final ParseRange range, final BitSet delimiters) {
299         final StringBuilder dst = new StringBuilder();
300         boolean whitespace = false;
301         while (!range.atEnd()) {
302             final char current = buf.charAt(range.getPos());
303             if (delimiters.get(current)) {
304                 break;
305             }
306             else if (isWhitespace(current)) {
307                 skipWhiteSpace(buf, range);
308                 whitespace = true;
309             }
310             else {
311                 if (whitespace && dst.length() > 0) {
312                     dst.append(' ');
313                 }
314                 copyContent(buf, range, delimiters, dst);
315                 whitespace = false;
316             }
317         }
318         return dst.toString();
319     }
320 
321     /**
322      * Skips semantically insignificant whitespace characters and moves the cursor to the closest
323      * non-whitespace character.
324      *
325      * @param buf buffer with the sequence of chars to be parsed
326      * @param range defines the bounds and current position of the buffer
327      */
328     private static void skipWhiteSpace(final String buf, final ParseRange range) {
329         int pos = range.getPos();
330         final int indexTo = range.getUpperBound();
331 
332         for (int i = pos; i < indexTo; i++) {
333             if (!isWhitespace(buf.charAt(i))) {
334                 break;
335             }
336             pos++;
337         }
338         range.updatePos(pos);
339     }
340 
341     /**
342      * Transfers content into the destination buffer until a whitespace character or any of
343      * the given delimiters is encountered.
344      *
345      * @param buf buffer with the sequence of chars to be parsed
346      * @param range defines the bounds and current position of the buffer
347      * @param delimiters set of delimiting characters. Can be {@code null} if the value
348      *        is delimited by a whitespace only.
349      * @param dst destination buffer
350      */
351     private static void copyContent(final String buf, final ParseRange range,
352             final BitSet delimiters, final StringBuilder dst) {
353         int pos = range.getPos();
354         final int indexTo = range.getUpperBound();
355 
356         for (int i = pos; i < indexTo; i++) {
357             final char current = buf.charAt(i);
358             if (delimiters.get(current) || isWhitespace(current)) {
359                 break;
360             }
361             pos++;
362             dst.append(current);
363         }
364 
365         range.updatePos(pos);
366     }
367 
368     private static boolean isWhitespace(final char ch) {
369         return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
370     }
371 
372     private static final class ParseRange {
373         private final int upperBound_;
374         private int pos_;
375 
376         ParseRange(final int pos, final int upperBound) {
377             upperBound_ = upperBound;
378             pos_ = pos;
379         }
380 
381         int getPos() {
382             return pos_;
383         }
384 
385         int getUpperBound() {
386             return upperBound_;
387         }
388 
389         void updatePos(final int pos) {
390             pos_ = pos;
391         }
392 
393         boolean atEnd() {
394             return pos_ >= upperBound_;
395         }
396     }
397 }
398