View Javadoc
1   /*
2    * Copyright (c) 2002-2026 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.http;
16  
17  import java.net.MalformedURLException;
18  import java.net.URL;
19  import java.text.ParseException;
20  import java.text.SimpleDateFormat;
21  import java.time.ZonedDateTime;
22  import java.time.format.DateTimeFormatter;
23  import java.time.format.DateTimeParseException;
24  import java.util.ArrayList;
25  import java.util.Date;
26  import java.util.List;
27  import java.util.Locale;
28  import java.util.regex.Pattern;
29  
30  import org.htmlunit.BrowserVersion;
31  import org.htmlunit.util.StringUtils;
32  import org.htmlunit.util.UrlUtils;
33  
34  /**
35   * Cookie parser based on the HTTP cookie specification.
36   * - RFC 2109 and RFC 2965 (versioned cookies)
37   * - Netscape cookie specification
38   * - Cookie attributes:  Domain, Path, Expires, Max-Age, Secure, HttpOnly, SameSite
39   *
40   * @author Ronald Brill
41   */
42  public final class CookieParser {
43  
44      /** The cookie name used for cookies with no name. */
45      public static final String EMPTY_COOKIE_NAME = "HTMLUNIT_EMPTY_COOKIE";
46  
47      /** Workaround for domain of local files. */
48      public static final String LOCAL_FILESYSTEM_DOMAIN = "local_filesystem";
49  
50      private static final String[] DATE_PATTERNS = {
51          "EEE, dd MMM yyyy HH:mm:ss z",      // RFC 1123
52          "EEE, dd-MMM-yy HH:mm:ss z",        // RFC 1036
53          "EEE MMM dd HH:mm:ss yyyy",         // ANSI C asctime()
54          "EEE, dd-MMM-yyyy HH:mm:ss z",      // Variant
55          "EEE MMM dd yyyy HH:mm: ss z",      // Variant
56          "EEE, dd MMM yy HH:mm:ss z"         // Variant
57      };
58  
59      // Max-Age should be 400 days at most
60      // https://httpwg.org/http-extensions/draft-ietf-httpbis-rfc6265bis.html#section-5.5
61      private static final int MAX_MAX_AGE = 400 * 24 * 60 * 60;
62  
63      private static final Pattern MAX_AGE_PATTERN = Pattern.compile("-?[0-9]+");
64  
65      private CookieParser() {
66          // Utility class
67      }
68  
69      /**
70       * Parses a cookie string and returns a list of Cookie objects.
71       *
72       * @param cookieString the string to parse
73       * @param pageUrl the page url as root
74       * @param browserVersion the {@link BrowserVersion}
75       * @return a list of {@link Cookie}'s
76       * @throws MalformedCookieException in case the cookie does not conform to the spec
77       */
78      public static List<Cookie> parseCookie(final String cookieString, final URL pageUrl,
79                                             final BrowserVersion browserVersion) throws MalformedCookieException {
80  
81          if (cookieString == null) {
82              throw new MalformedCookieException("Cookie string cannot be null");
83          }
84  
85          // Normalize the URL for cookie origin
86          final CookieOrigin origin = buildCookieOrigin(pageUrl);
87  
88          // Parse the cookie string
89          final String normalizedCookieString = normalizeCookieString(cookieString);
90          final ParsedCookie parsedCookie = parseNetscapeCookie(normalizedCookieString);
91  
92          // Create and validate the cookie
93          final Cookie cookie = createCookie(parsedCookie, origin, browserVersion);
94  
95          final List<Cookie> cookies = new ArrayList<>(1);
96          cookies.add(cookie);
97          return cookies;
98      }
99  
100     /**
101      * Normalizes the cookie string by handling empty names and whitespace.
102      */
103     private static String normalizeCookieString(String cookieString) {
104         cookieString = cookieString.trim();
105 
106         if (cookieString.isEmpty()) {
107             return EMPTY_COOKIE_NAME + "=";
108         }
109 
110         // Find the position of the first '=' or ';'
111         final int equalsPos = cookieString.indexOf('=');
112         final int semicolonPos = cookieString.indexOf(';');
113 
114         // Determine where the name ends
115         final int endPos;
116         if (equalsPos < 0 && semicolonPos < 0) {
117             // No '=' or ';', entire string is the value with no name
118             return EMPTY_COOKIE_NAME + "=" + cookieString;
119         }
120         else if (equalsPos < 0) {
121             // No '=', only ';'
122             return EMPTY_COOKIE_NAME + "=" + cookieString;
123         }
124         else if (semicolonPos < 0 || equalsPos < semicolonPos) {
125             endPos = equalsPos;
126         }
127         else {
128             // ';' comes before '='
129             return EMPTY_COOKIE_NAME + "=" + cookieString;
130         }
131 
132         // Check if name is empty or blank
133         final String name = cookieString.substring(0, endPos).trim();
134         if (name.isEmpty()) {
135             return EMPTY_COOKIE_NAME + cookieString.substring(endPos);
136         }
137 
138         return cookieString;
139     }
140 
141     /**
142      * Parses a Netscape-style cookie string.
143      */
144     private static ParsedCookie parseNetscapeCookie(final String cookieString)
145             throws MalformedCookieException {
146 
147         // Split by semicolon, but be careful with the first name=value pair
148         final String[] parts = cookieString.split(";");
149 
150         if (parts.length == 0) {
151             throw new MalformedCookieException("Empty cookie string");
152         }
153 
154         // First part is the name=value pair
155         final String[] nameValue = splitNameValue(parts[0].trim());
156 
157         final ParsedCookie result = new ParsedCookie(nameValue[0], nameValue[1]);
158 
159         if (StringUtils.isEmptyOrNull(result.getName())) {
160             throw new MalformedCookieException("Cookie name may not be empty");
161         }
162 
163         // Parse attributes
164         for (int i = 1; i < parts.length; i++) {
165             final String part = parts[i].trim();
166             if (part.isEmpty()) {
167                 continue;
168             }
169 
170             final String[] attrPair = splitNameValue(part);
171             final String attrName = attrPair[0].toLowerCase(Locale.ROOT);
172             final String attrValue = attrPair[1];
173 
174             switch (attrName) {
175                 case "domain":
176                     result.setDomain(attrValue);
177                     break;
178                 case "path":
179                     result.setPath(attrValue);
180                     break;
181                 case "expires":
182                     result.setExpires(parseDate(attrValue));
183                     break;
184                 case "max-age":
185                     result.setMaxAge(parseMaxAge(attrValue));
186                     break;
187                 case "secure":
188                     result.setSecure(true);
189                     break;
190                 case "httponly":
191                     result.setHttpOnly(true);
192                     break;
193                 case "samesite":
194                     result.setSameSite(attrValue);
195                     break;
196                 case "version":
197                     result.setVersion(parseVersion(attrValue));
198                     break;
199                 default:
200                     // Ignore unknown attributes
201                     break;
202             }
203         }
204 
205         return result;
206     }
207 
208     /**
209      * Splits a name=value pair.
210      */
211     private static String[] splitNameValue(final String nvp) {
212         final int equalsPos = nvp.indexOf('=');
213         if (equalsPos < 0) {
214             // No value, just a name (e.g., "secure")
215             return new String[] {nvp.trim(), null};
216         }
217 
218         final String name = nvp.substring(0, equalsPos).trim();
219         final String value = nvp.substring(equalsPos + 1).trim();
220 
221         // Handle quoted values
222         if (value.length() >= 2 && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') {
223             // Keep the quotes for compatibility
224             // value = value.substring(1, value.length() - 1);
225         }
226 
227         return new String[] {name, value};
228     }
229 
230     /**
231      * Parses a date string.
232      */
233     private static Date parseDate(final String dateString) throws MalformedCookieException {
234         if (StringUtils.isEmptyOrNull(dateString)) {
235             return null;
236         }
237 
238         // Try RFC 1123 format first (most common)
239         try {
240             final DateTimeFormatter formatter = DateTimeFormatter.RFC_1123_DATE_TIME;
241             final ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateString, formatter);
242             return Date.from(zonedDateTime.toInstant());
243         }
244         catch (final DateTimeParseException e) {
245             // Try other formats
246         }
247 
248         // Try legacy date formats
249         for (final String pattern : DATE_PATTERNS) {
250             try {
251                 final SimpleDateFormat sdf = new SimpleDateFormat(pattern, Locale.US);
252                 sdf.setLenient(false);
253                 return sdf.parse(dateString);
254             }
255             catch (final ParseException e) {
256                 // Try next pattern
257             }
258         }
259 
260         throw new MalformedCookieException("Unable to parse date: " + dateString);
261     }
262 
263     /**
264      * Parses max-age value.
265      */
266     private static Integer parseMaxAge(final String maxAgeString) throws MalformedCookieException {
267         if (StringUtils.isEmptyOrNull(maxAgeString)) {
268             return null;
269         }
270 
271         if (!MAX_AGE_PATTERN.matcher(maxAgeString).matches()) {
272             throw new MalformedCookieException("Invalid 'max-age' attribute: '" + maxAgeString + "'");
273         }
274 
275         if (maxAgeString.startsWith("-")) {
276             return -1;
277         }
278 
279         try {
280             return Math.min(Integer.parseInt(maxAgeString), MAX_MAX_AGE);
281         }
282         catch (final NumberFormatException e) {
283             return MAX_MAX_AGE;
284         }
285     }
286 
287     /**
288      * Parses version value.
289      */
290     private static Integer parseVersion(final String versionString) {
291         if (StringUtils.isEmptyOrNull(versionString)) {
292             return 0;
293         }
294 
295         try {
296             return Integer.parseInt(versionString);
297         }
298         catch (final NumberFormatException e) {
299             return 0;
300         }
301     }
302 
303     /**
304      * Creates a Cookie from the parsed data.
305      */
306     private static Cookie createCookie(final ParsedCookie parsed, final CookieOrigin origin,
307                                        final BrowserVersion browserVersion) throws MalformedCookieException {
308 
309         // Determine domain
310         String domain = parsed.getDomain();
311         if (StringUtils.isEmptyOrNull(domain)) {
312             domain = origin.host;
313         }
314         else {
315             // Validate domain
316             validateDomain(domain, origin, browserVersion);
317         }
318 
319         // Determine path
320         String path = parsed.getPath();
321         if (StringUtils.isEmptyOrNull(path)) {
322             path = getDefaultPath(origin);
323         }
324 
325         // Determine expiration
326         Date expires = parsed.getExpires();
327         if (parsed.getMaxAge() != null) {
328             if (parsed.getMaxAge() < 0) {
329                 expires = null; // Session cookie
330             }
331             else {
332                 expires = new Date(System.currentTimeMillis() + (parsed.getMaxAge() * 1000L));
333             }
334         }
335 
336         return new Cookie(domain, parsed.getName(), parsed.getValue(), path, expires,
337                 parsed.isSecure(), parsed.isHttpOnly(), parsed.getSameSite());
338     }
339 
340     /**
341      * Validates the domain attribute.
342      */
343     private static void validateDomain(final String domain, final CookieOrigin origin,
344                                        final BrowserVersion browserVersion) throws MalformedCookieException {
345 
346         if (StringUtils.isEmptyOrNull(domain)) {
347             throw new MalformedCookieException("Cookie domain may not be empty");
348         }
349 
350         // Remove leading dot
351         final String normalizedDomain = domain.startsWith(".") ? domain.substring(1) : domain;
352         final String originHost = origin.host;
353 
354         // Check if domain matches or is a parent of the origin host
355         if (!domainMatch(normalizedDomain, originHost)) {
356             // In permissive mode, some browsers allow this
357             // For strict compliance, throw an exception
358             if (!LOCAL_FILESYSTEM_DOMAIN.equals(originHost)) {
359                 // Allow for testing purposes
360                 // throw new MalformedCookieException("Illegal domain attribute: " + domain);
361             }
362         }
363     }
364 
365     /**
366      * Checks if the domain matches according to cookie rules.
367      */
368     private static boolean domainMatch(final String domain, final String host) {
369         if (domain.equalsIgnoreCase(host)) {
370             return true;
371         }
372 
373         if (host.endsWith("." + domain)) {
374             return true;
375         }
376 
377         return false;
378     }
379 
380     /**
381      * Gets the default path for a cookie.
382      */
383     private static String getDefaultPath(final CookieOrigin origin) {
384         String path = origin.path;
385 
386         if (StringUtils.isEmptyOrNull(path) || !path.startsWith("/")) {
387             return "/";
388         }
389 
390         // Remove everything after the last slash
391         final int lastSlash = path.lastIndexOf('/');
392         if (lastSlash > 0) {
393             path = path.substring(0, lastSlash);
394         }
395 
396         return path.isEmpty() ? "/" : path;
397     }
398 
399     /**
400      * Builds a CookieOrigin from a URL.
401      */
402     private static CookieOrigin buildCookieOrigin(final URL url) {
403         final URL normalizedUrl = replaceForCookieIfNecessary(url);
404 
405         int port = normalizedUrl.getPort();
406         if (port == -1) {
407             port = normalizedUrl.getDefaultPort();
408         }
409 
410         return new CookieOrigin(
411                 normalizedUrl.getHost(),
412                 port,
413                 normalizedUrl.getPath(),
414                 "https".equals(normalizedUrl.getProtocol()));
415     }
416 
417     /**
418      * Replaces file:// URLs with a bogus host for cookie handling.
419      */
420     private static URL replaceForCookieIfNecessary(URL url) {
421         final String protocol = url.getProtocol();
422         final boolean file = "file".equals(protocol);
423         if (file) {
424             try {
425                 url = UrlUtils.getUrlWithNewHostAndPort(url, LOCAL_FILESYSTEM_DOMAIN, 0);
426             }
427             catch (final MalformedURLException e) {
428                 throw new RuntimeException(e);
429             }
430         }
431         return url;
432     }
433 
434     /**
435      * Represents the origin of a cookie.
436      */
437     private record CookieOrigin(String host, int port, String path, boolean secure) {
438     }
439 
440     /**
441      * Intermediate representation of a parsed cookie.
442      */
443     private static final class ParsedCookie {
444         private final String name_;
445         private final String value_;
446 
447         private String domain_;
448         private String path_;
449         private Date expires_;
450         private Integer maxAge_;
451         private boolean secure_;
452         private boolean httpOnly_;
453         private String sameSite_;
454         private int version_;
455 
456         ParsedCookie(final String name, final String value) {
457             name_ = name;
458             value_ = value;
459             version_ = 0;
460         }
461 
462         public String getName() {
463             return name_;
464         }
465 
466         public String getValue() {
467             return value_;
468         }
469 
470         public String getDomain() {
471             return domain_;
472         }
473 
474         public void setDomain(final String domain) {
475             domain_ = domain;
476         }
477 
478         public String getPath() {
479             return path_;
480         }
481 
482         public void setPath(final String path) {
483             path_ = path;
484         }
485 
486         public Date getExpires() {
487             return expires_;
488         }
489 
490         public void setExpires(final Date expires) {
491             expires_ = expires;
492         }
493 
494         public Integer getMaxAge() {
495             return maxAge_;
496         }
497 
498         public void setMaxAge(final Integer maxAge) {
499             maxAge_ = maxAge;
500         }
501 
502         public boolean isSecure() {
503             return secure_;
504         }
505 
506         public void setSecure(final boolean secure) {
507             secure_ = secure;
508         }
509 
510         public boolean isHttpOnly() {
511             return httpOnly_;
512         }
513 
514         public void setHttpOnly(final boolean httpOnly) {
515             httpOnly_ = httpOnly;
516         }
517 
518         public String getSameSite() {
519             return sameSite_;
520         }
521 
522         public void setSameSite(final String sameSite) {
523             sameSite_ = sameSite;
524         }
525 
526         public int getVersion() {
527             return version_;
528         }
529 
530         public void setVersion(final int version) {
531             version_ = version;
532         }
533     }
534 }