|
32 | 32 | CONTEXT_SEPARATOR = "\x04" |
33 | 33 |
|
34 | 34 | # Maximum number of characters that will be parsed from the Accept-Language |
35 | | -# header to prevent possible denial of service or memory exhaustion attacks. |
36 | | -# About 10x longer than the longest value shown on MDN’s Accept-Language page. |
37 | | -ACCEPT_LANGUAGE_HEADER_MAX_LENGTH = 500 |
| 35 | +# header or cookie to prevent possible denial of service or memory exhaustion |
| 36 | +# attacks. About 10x longer than the longest value shown on MDN’s |
| 37 | +# Accept-Language page. |
| 38 | +LANGUAGE_CODE_MAX_LENGTH = 500 |
38 | 39 |
|
39 | 40 | # Format of Accept-Language header values. From RFC 9110 Sections 12.4.2 and |
40 | 41 | # 12.5.4, and RFC 5646 Section 2.1. |
@@ -498,11 +499,25 @@ def get_supported_language_variant(lang_code, strict=False): |
498 | 499 | If `strict` is False (the default), look for a country-specific variant |
499 | 500 | when neither the language code nor its generic variant is found. |
500 | 501 |
|
| 502 | + The language code is truncated to a maximum length to avoid potential |
| 503 | + denial of service attacks. |
| 504 | +
|
501 | 505 | lru_cache should have a maxsize to prevent from memory exhaustion attacks, |
502 | 506 | as the provided language codes are taken from the HTTP request. See also |
503 | 507 | <https://www.djangoproject.com/weblog/2007/oct/26/security-fix/>. |
504 | 508 | """ |
505 | 509 | if lang_code: |
| 510 | + # Truncate the language code to a maximum length to avoid potential |
| 511 | + # denial of service attacks. |
| 512 | + if len(lang_code) > LANGUAGE_CODE_MAX_LENGTH: |
| 513 | + if ( |
| 514 | + not strict |
| 515 | + and (index := lang_code.rfind("-", 0, LANGUAGE_CODE_MAX_LENGTH)) > 0 |
| 516 | + ): |
| 517 | + # There is a generic variant under the maximum length accepted length. |
| 518 | + lang_code = lang_code[:index] |
| 519 | + else: |
| 520 | + raise ValueError("'lang_code' exceeds the maximum accepted length") |
506 | 521 | # If 'zh-hant-tw' is not supported, try special fallback or subsequent |
507 | 522 | # language codes i.e. 'zh-hant' and 'zh'. |
508 | 523 | possible_lang_codes = [lang_code] |
@@ -626,13 +641,13 @@ def parse_accept_lang_header(lang_string): |
626 | 641 | functools.lru_cache() to avoid repetitive parsing of common header values. |
627 | 642 | """ |
628 | 643 | # If the header value doesn't exceed the maximum allowed length, parse it. |
629 | | - if len(lang_string) <= ACCEPT_LANGUAGE_HEADER_MAX_LENGTH: |
| 644 | + if len(lang_string) <= LANGUAGE_CODE_MAX_LENGTH: |
630 | 645 | return _parse_accept_lang_header(lang_string) |
631 | 646 |
|
632 | 647 | # If there is at least one comma in the value, parse up to the last comma |
633 | 648 | # before the max length, skipping any truncated parts at the end of the |
634 | 649 | # header value. |
635 | | - if (index := lang_string.rfind(",", 0, ACCEPT_LANGUAGE_HEADER_MAX_LENGTH)) > 0: |
| 650 | + if (index := lang_string.rfind(",", 0, LANGUAGE_CODE_MAX_LENGTH)) > 0: |
636 | 651 | return _parse_accept_lang_header(lang_string[:index]) |
637 | 652 |
|
638 | 653 | # Don't attempt to parse if there is only one language-range value which is |
|
0 commit comments