diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2aea055..4874e9d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,16 @@
+2.3.0 (August 26, 2025)
+-----------------------
+
+* The package name is changed from using an underscore (email_validator) to a dash (email-validator) to match PyPi's normalized package name.
+* The library no longer checks that the local part is at most 64 characters because a more careful reading of RFC 5321 indicates the limit is optional and such email addresses have been found in the wild. However the check can be restored using a new `strict=True` parameter, and the overall 254 character email address length limit is still in place.
+* New EmailSyntaxError messages are used for some exiting syntax errors related to @-sign homoglyphs and invalid characters in internationalized domains.
+* When using `allow_display_name=True`, display names are now returned with Unicode NFC normalization.
+* TypeError is now raised if something other than str (or bytes) is passed as the email address.
+
2.2.0 (June 20, 2024)
---------------------
-* Email addresses with internationalized local parts could, with rare Unicode characters, be returned as valid but actually be invalid in their normalized form (returned in the `normalized` field). Local parts now re-validated after Unicode NFC normalization to ensure that invalid characters cannot be injected into the normalized address and that characters with length-increasing NFC normalizations cannot cause a local part to exceed the maximum length after normalization.
+* Email addresses with internationalized local parts could, with rare Unicode characters, be returned as valid but actually be invalid in their normalized form (returned in the `normalized` field). In particular, it is possible to get a normalized address with a ";" character, which is not valid and could change the interpretation of the address. Local parts now re-validated after Unicode NFC normalization to ensure that invalid characters cannot be injected into the normalized address and that characters with length-increasing NFC normalizations cannot cause a local part to exceed the maximum length after normalization. Thanks to khanh@calif.io from https://calif.io for reporting the issue.
* The length check for email addresses with internationalized local parts is now also applied to the original address string prior to Unicode NFC normalization, which may be longer and could exceed the maximum email address length, to protect callers who do not use the returned normalized address.
* Improved error message for IDNA domains that are too long or have invalid characters after Unicode normalization.
* A new option to parse `My Name
` strings, i.e. a display name plus an email address in angle brackets, is now available. It is off by default.
@@ -77,7 +86,7 @@ Version 1.2.1 (May 1, 2022)
* example.com/net/org are removed from the special-use reserved domain names list so that they do not raise exceptions if check_deliverability is off.
* Improved README.
-Verison 1.2.0 (April 24, 2022)
+Version 1.2.0 (April 24, 2022)
------------------------------
* Reject domains with NULL MX records (when deliverability checks
diff --git a/README.md b/README.md
index 895dfa9..0d1f0eb 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ users by their email address like on a registration form.
Key features:
* Checks that an email address has the correct syntax --- great for
- email-based registration/login forms or validing data.
+ email-based registration/login forms or validating data.
* Gives friendly English error messages when validation fails that you
can display to end-users.
* Checks deliverability (optional): Does the domain name resolve?
@@ -143,13 +143,14 @@ The `validate_email` function also accepts the following keyword arguments
`allow_quoted_local=False`: Set to `True` to allow obscure and potentially problematic email addresses in which the part of the address before the @-sign contains spaces, @-signs, or other surprising characters when the local part is surrounded in quotes (so-called quoted-string local parts). In the object returned by `validate_email`, the normalized local part removes any unnecessary backslash-escaping and even removes the surrounding quotes if the address would be valid without them. You can also set `email_validator.ALLOW_QUOTED_LOCAL` to `True` to turn this on for all calls by default.
-`allow_domain_literal=False`: Set to `True` to allow bracketed IPv4 and "IPv6:"-prefixd IPv6 addresses in the domain part of the email address. No deliverability checks are performed for these addresses. In the object returned by `validate_email`, the normalized domain will use the condensed IPv6 format, if applicable. The object's `domain_address` attribute will hold the parsed `ipaddress.IPv4Address` or `ipaddress.IPv6Address` object if applicable. You can also set `email_validator.ALLOW_DOMAIN_LITERAL` to `True` to turn this on for all calls by default.
+`allow_domain_literal=False`: Set to `True` to allow bracketed IPv4 and "IPv6:"-prefixed IPv6 addresses in the domain part of the email address. No deliverability checks are performed for these addresses. In the object returned by `validate_email`, the normalized domain will use the condensed IPv6 format, if applicable. The object's `domain_address` attribute will hold the parsed `ipaddress.IPv4Address` or `ipaddress.IPv6Address` object if applicable. You can also set `email_validator.ALLOW_DOMAIN_LITERAL` to `True` to turn this on for all calls by default.
`allow_display_name=False`: Set to `True` to allow a display name and bracketed address in the input string, like `My Name `. It's implemented in the spirit but not the letter of RFC 5322 3.4, so it may be stricter or more relaxed than what you want. The display name, if present, is provided in the returned object's `display_name` field after being unquoted and unescaped. You can also set `email_validator.ALLOW_DISPLAY_NAME` to `True` to turn this on for all calls by default.
`allow_empty_local=False`: Set to `True` to allow an empty local part (i.e.
`@example.com`), e.g. for validating Postfix aliases.
+`strict=False`: Set to `True` to perform additional syntax checks (currently only a local part length check). This should be used by mail service providers at address creation to ensure email addresses meet broad compatibility requirements.
### DNS timeout and cache
diff --git a/email_validator/__init__.py b/email_validator/__init__.py
index 626aa00..38d0741 100644
--- a/email_validator/__init__.py
+++ b/email_validator/__init__.py
@@ -1,8 +1,8 @@
from typing import TYPE_CHECKING
# Export the main method, helper methods, and the public data types.
-from .exceptions_types import ValidatedEmail, EmailNotValidError, \
- EmailSyntaxError, EmailUndeliverableError
+from .exceptions import EmailNotValidError, EmailSyntaxError, EmailUndeliverableError
+from .types import ValidatedEmail
from .validate_email import validate_email
from .version import __version__
@@ -27,9 +27,11 @@ def caching_resolver(*args, **kwargs):
# Default values for keyword arguments.
ALLOW_SMTPUTF8 = True
+ALLOW_EMPTY_LOCAL = False
ALLOW_QUOTED_LOCAL = False
ALLOW_DOMAIN_LITERAL = False
ALLOW_DISPLAY_NAME = False
+STRICT = False
GLOBALLY_DELIVERABLE = True
CHECK_DELIVERABILITY = True
TEST_ENVIRONMENT = False
diff --git a/email_validator/__main__.py b/email_validator/__main__.py
index 52791c7..84d9fd4 100644
--- a/email_validator/__main__.py
+++ b/email_validator/__main__.py
@@ -12,7 +12,7 @@
# When using STDIN, no output will be given for valid email addresses.
#
# Keyword arguments to validate_email can be set in environment variables
-# of the same name but upprcase (see below).
+# of the same name but uppercase (see below).
import json
import os
@@ -21,7 +21,7 @@
from .validate_email import validate_email, _Resolver
from .deliverability import caching_resolver
-from .exceptions_types import EmailNotValidError
+from .exceptions import EmailNotValidError
def main(dns_resolver: Optional[_Resolver] = None) -> None:
@@ -29,7 +29,8 @@ def main(dns_resolver: Optional[_Resolver] = None) -> None:
# Set options from environment variables.
options: Dict[str, Any] = {}
- for varname in ('ALLOW_SMTPUTF8', 'ALLOW_QUOTED_LOCAL', 'ALLOW_DOMAIN_LITERAL',
+ for varname in ('ALLOW_SMTPUTF8', 'ALLOW_EMPTY_LOCAL', 'ALLOW_QUOTED_LOCAL', 'ALLOW_DOMAIN_LITERAL',
+ 'ALLOW_DISPLAY_NAME',
'GLOBALLY_DELIVERABLE', 'CHECK_DELIVERABILITY', 'TEST_ENVIRONMENT'):
if varname in os.environ:
options[varname.lower()] = bool(os.environ[varname])
@@ -38,7 +39,7 @@ def main(dns_resolver: Optional[_Resolver] = None) -> None:
options[varname.lower()] = float(os.environ[varname])
if len(sys.argv) == 1:
- # Validate the email addresses pased line-by-line on STDIN.
+ # Validate the email addresses passed line-by-line on STDIN.
dns_resolver = dns_resolver or caching_resolver()
for line in sys.stdin:
email = line.strip()
diff --git a/email_validator/deliverability.py b/email_validator/deliverability.py
index 90f5f9a..6100a31 100644
--- a/email_validator/deliverability.py
+++ b/email_validator/deliverability.py
@@ -2,7 +2,7 @@
import ipaddress
-from .exceptions_types import EmailUndeliverableError
+from .exceptions import EmailUndeliverableError
import dns.resolver
import dns.exception
diff --git a/email_validator/exceptions.py b/email_validator/exceptions.py
new file mode 100644
index 0000000..87ef13c
--- /dev/null
+++ b/email_validator/exceptions.py
@@ -0,0 +1,13 @@
+class EmailNotValidError(ValueError):
+ """Parent class of all exceptions raised by this module."""
+ pass
+
+
+class EmailSyntaxError(EmailNotValidError):
+ """Exception raised when an email address fails validation because of its form."""
+ pass
+
+
+class EmailUndeliverableError(EmailNotValidError):
+ """Exception raised when an email address fails validation because its domain name does not appear deliverable."""
+ pass
diff --git a/email_validator/rfc_constants.py b/email_validator/rfc_constants.py
index 39d8e31..e93441b 100644
--- a/email_validator/rfc_constants.py
+++ b/email_validator/rfc_constants.py
@@ -36,12 +36,24 @@
QTEXT_INTL = re.compile(r"[\u0020-\u007E\u0080-\U0010FFFF]")
# Length constants
+
# RFC 3696 + errata 1003 + errata 1690 (https://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690)
-# explains the maximum length of an email address is 254 octets.
+# explains the maximum length of an email address is 254 octets based on RFC 5321 4.5.3.1.3. A
+# maximum local part length is also given at RFC 5321 4.5.3.1.1.
+#
+# But RFC 5321 4.5.3.1 says that these (and other) limits are in a sense suggestions, and longer
+# local parts have been seen in the wild. Consequntely, the local part length is only checked
+# in "strict" mode. Although the email address maximum length is also somewhat of a suggestion,
+# I don't like the idea of having no length checks performed, so I'm leaving that to always be
+# checked.
EMAIL_MAX_LENGTH = 254
LOCAL_PART_MAX_LENGTH = 64
+
+# Although RFC 5321 4.5.3.1.2 gives a (suggested, see above) limit of 255 octets, RFC 1035 2.3.4 also
+# imposes a length limit (255 octets). But per https://stackoverflow.com/questions/32290167/what-is-the-maximum-length-of-a-dns-name,
+# two of those octets are taken up by the optional final dot and null root label.
DNS_LABEL_LENGTH_LIMIT = 63 # in "octets", RFC 1035 2.3.1
-DOMAIN_MAX_LENGTH = 253 # in "octets" as transmitted, RFC 1035 2.3.4 and RFC 5321 4.5.3.1.2, and see https://stackoverflow.com/questions/32290167/what-is-the-maximum-length-of-a-dns-name
+DOMAIN_MAX_LENGTH = 253 # in "octets" as transmitted
# RFC 2142
CASE_INSENSITIVE_MAILBOX_NAMES = [
diff --git a/email_validator/syntax.py b/email_validator/syntax.py
index c655451..0b1c7b0 100644
--- a/email_validator/syntax.py
+++ b/email_validator/syntax.py
@@ -1,4 +1,5 @@
-from .exceptions_types import EmailSyntaxError, ValidatedEmail
+from .exceptions import EmailSyntaxError
+from .types import ValidatedEmail
from .rfc_constants import EMAIL_MAX_LENGTH, LOCAL_PART_MAX_LENGTH, DOMAIN_MAX_LENGTH, \
DOT_ATOM_TEXT, DOT_ATOM_TEXT_INTL, ATEXT_RE, ATEXT_INTL_DOT_RE, ATEXT_HOSTNAME_INTL, QTEXT_INTL, \
DNS_LABEL_LENGTH_LIMIT, DOT_ATOM_TEXT_HOSTNAME, DOMAIN_NAME_REGEX, DOMAIN_LITERAL_CHARS
@@ -57,7 +58,7 @@ def split_string_at_unquoted_special(text: str, specials: Tuple[str, ...]) -> Tu
for i, c in enumerate(text):
# < plus U+0338 (Combining Long Solidus Overlay) normalizes to
# ≮ U+226E (Not Less-Than), and it would be confusing to treat
- # the < as the start of "" syntax in that case. Liekwise,
+ # the < as the start of "" syntax in that case. Likewise,
# if anything combines with an @ or ", we should probably not
# treat it as a special character.
if unicodedata.normalize("NFC", text[i:])[0] != c:
@@ -82,7 +83,28 @@ def split_string_at_unquoted_special(text: str, specials: Tuple[str, ...]) -> Tu
else:
left_part += c
+ # No special symbol found. The special symbols always
+ # include an at-sign, so this always indicates a missing
+ # at-sign. The other symbol is optional.
if len(left_part) == len(text):
+ # The full-width at-sign might occur in CJK contexts.
+ # We can't accept it because we only accept addresess
+ # that are actually valid. But if this is common we
+ # may want to consider accepting and normalizing full-
+ # width characters for the other special symbols (and
+ # full-width dot is already accepted in internationalized
+ # domains) with a new option.
+ # See https://news.ycombinator.com/item?id=42235268.
+ if "@" in text:
+ raise EmailSyntaxError("The email address has the \"full-width\" at-sign (@) character instead of a regular at-sign.")
+
+ # Check another near-homoglyph for good measure because
+ # homoglyphs in place of required characters could be
+ # very confusing. We may want to consider checking for
+ # homoglyphs anywhere we look for a special symbol.
+ if "﹫" in text:
+ raise EmailSyntaxError('The email address has the "small commercial at" character instead of a regular at-sign.')
+
raise EmailSyntaxError("An email address must have an @-sign.")
# The right part is whatever is left.
@@ -207,7 +229,7 @@ class LocalPartValidationResult(TypedDict):
def validate_email_local_part(local: str, allow_smtputf8: bool = True, allow_empty_local: bool = False,
- quoted_local_part: bool = False) -> LocalPartValidationResult:
+ quoted_local_part: bool = False, strict: bool = False) -> LocalPartValidationResult:
"""Validates the syntax of the local part of an email address."""
if len(local) == 0:
@@ -229,7 +251,7 @@ def validate_email_local_part(local: str, allow_smtputf8: bool = True, allow_emp
# internationalized, then the UTF-8 encoding may be longer, but
# that may not be relevant. We will check the total address length
# instead.
- if len(local) > LOCAL_PART_MAX_LENGTH:
+ if strict and len(local) > LOCAL_PART_MAX_LENGTH:
reason = get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH)
raise EmailSyntaxError(f"The email address is too long before the @-sign {reason}.")
@@ -280,8 +302,8 @@ def validate_email_local_part(local: str, allow_smtputf8: bool = True, allow_emp
valid = "dot-atom"
requires_smtputf8 = True
- # There are no syntactic restrictions on quoted local parts, so if
- # it was originally quoted, it is probably valid. More characters
+ # There are no dot-atom syntax restrictions on quoted local parts, so
+ # if it was originally quoted, it is probably valid. More characters
# are allowed, like @-signs, spaces, and quotes, and there are no
# restrictions on the placement of dots, as in dot-atom local parts.
elif quoted_local_part:
@@ -438,6 +460,36 @@ def check_dot_atom(label: str, start_descr: str, end_descr: str, is_hostname: bo
raise EmailSyntaxError("An email address cannot have a period and a hyphen next to each other.")
+def uts46_valid_char(char: str) -> bool:
+ # By exhaustively searching for characters rejected by
+ # for c in (chr(i) for i in range(0x110000)):
+ # idna.uts46_remap(c, std3_rules=False, transitional=False)
+ # I found the following rules are pretty close.
+ c = ord(char)
+ if 0x80 <= c <= 0x9f:
+ # 8-bit ASCII range.
+ return False
+ elif ((0x2010 <= c <= 0x2060 and not (0x2024 <= c <= 0x2026) and not (0x2028 <= c <= 0x202E))
+ or c in (0x00AD, 0x2064, 0xFF0E)
+ or 0x200B <= c <= 0x200D
+ or 0x1BCA0 <= c <= 0x1BCA3):
+ # Characters that are permitted but fall into one of the
+ # tests below.
+ return True
+ elif unicodedata.category(chr(c)) in ("Cf", "Cn", "Co", "Cs", "Zs", "Zl", "Zp"):
+ # There are a bunch of Zs characters including regular space
+ # that are allowed by UTS46 but are not allowed in domain
+ # names anyway.
+ #
+ # There are some Cn (unassigned) characters that the idna
+ # package doesn't reject but we can, I think.
+ return False
+ elif "002E" in unicodedata.decomposition(chr(c)).split(" "):
+ # Characters that decompose into a sequence with a dot.
+ return False
+ return True
+
+
class DomainNameValidationResult(TypedDict):
ascii_domain: str
domain: str
@@ -462,6 +514,15 @@ def validate_email_domain_name(domain: str, test_environment: bool = False, glob
# they may not be valid, safe, or sensible Unicode strings.
check_unsafe_chars(domain)
+ # Reject characters that would be rejected by UTS-46 normalization next but
+ # with an error message under our control.
+ bad_chars = {
+ safe_character_display(c) for c in domain
+ if not uts46_valid_char(c)
+ }
+ if bad_chars:
+ raise EmailSyntaxError("The part after the @-sign contains invalid characters: " + ", ".join(sorted(bad_chars)) + ".")
+
# Perform UTS-46 normalization, which includes casefolding, NFC normalization,
# and converting all label separators (the period/full stop, fullwidth full stop,
# ideographic full stop, and halfwidth ideographic full stop) to regular dots.
@@ -607,12 +668,12 @@ def validate_email_domain_name(domain: str, test_environment: bool = False, glob
# case for this.
bad_chars = {
safe_character_display(c)
- for c in domain
+ for c in domain_i18n
if not ATEXT_HOSTNAME_INTL.match(c)
}
if bad_chars:
raise EmailSyntaxError("The part after the @-sign contains invalid characters: " + ", ".join(sorted(bad_chars)) + ".")
- check_unsafe_chars(domain)
+ check_unsafe_chars(domain_i18n)
# Check that it can be encoded back to IDNA ASCII. We have no test
# case for this.
@@ -642,7 +703,7 @@ def validate_email_length(addrinfo: ValidatedEmail) -> None:
# form is checked first because it is the original input.
# 2) The normalized email address. We perform Unicode NFC normalization of
# the local part, we normalize the domain to internationalized characters
- # (if originaly IDNA ASCII) which also includes Unicode normalization,
+ # (if originally IDNA ASCII) which also includes Unicode normalization,
# and we may remove quotes in quoted local parts. We recommend that
# callers use this string, so it must be valid.
# 3) The email address with the IDNA ASCII representation of the domain
diff --git a/email_validator/exceptions_types.py b/email_validator/types.py
similarity index 92%
rename from email_validator/exceptions_types.py
rename to email_validator/types.py
index 928a94f..1df60ff 100644
--- a/email_validator/exceptions_types.py
+++ b/email_validator/types.py
@@ -2,21 +2,6 @@
from typing import Any, Dict, List, Optional, Tuple, Union
-class EmailNotValidError(ValueError):
- """Parent class of all exceptions raised by this module."""
- pass
-
-
-class EmailSyntaxError(EmailNotValidError):
- """Exception raised when an email address fails validation because of its form."""
- pass
-
-
-class EmailUndeliverableError(EmailNotValidError):
- """Exception raised when an email address fails validation because its domain name does not appear deliverable."""
- pass
-
-
class ValidatedEmail:
"""The validate_email function returns objects of this type holding the normalized form of the email address
and other information."""
diff --git a/email_validator/validate_email.py b/email_validator/validate_email.py
index a134c77..ae5d963 100644
--- a/email_validator/validate_email.py
+++ b/email_validator/validate_email.py
@@ -1,7 +1,8 @@
from typing import Optional, Union, TYPE_CHECKING
import unicodedata
-from .exceptions_types import EmailSyntaxError, ValidatedEmail
+from .exceptions import EmailSyntaxError
+from .types import ValidatedEmail
from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES
@@ -17,10 +18,11 @@ def validate_email(
/, # prior arguments are positional-only
*, # subsequent arguments are keyword-only
allow_smtputf8: Optional[bool] = None,
- allow_empty_local: bool = False,
+ allow_empty_local: Optional[bool] = None,
allow_quoted_local: Optional[bool] = None,
allow_domain_literal: Optional[bool] = None,
allow_display_name: Optional[bool] = None,
+ strict: Optional[bool] = None,
check_deliverability: Optional[bool] = None,
test_environment: Optional[bool] = None,
globally_deliverable: Optional[bool] = None,
@@ -34,16 +36,20 @@ def validate_email(
"""
# Fill in default values of arguments.
- from . import ALLOW_SMTPUTF8, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
- GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
+ from . import ALLOW_SMTPUTF8, ALLOW_EMPTY_LOCAL, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
+ STRICT, GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
if allow_smtputf8 is None:
allow_smtputf8 = ALLOW_SMTPUTF8
+ if allow_empty_local is None:
+ allow_empty_local = ALLOW_EMPTY_LOCAL
if allow_quoted_local is None:
allow_quoted_local = ALLOW_QUOTED_LOCAL
if allow_domain_literal is None:
allow_domain_literal = ALLOW_DOMAIN_LITERAL
if allow_display_name is None:
allow_display_name = ALLOW_DISPLAY_NAME
+ if strict is None:
+ strict = STRICT
if check_deliverability is None:
check_deliverability = CHECK_DELIVERABILITY
if test_environment is None:
@@ -53,14 +59,18 @@ def validate_email(
if timeout is None and dns_resolver is None:
timeout = DEFAULT_TIMEOUT
- # Allow email to be a str or bytes instance. If bytes,
- # it must be ASCII because that's how the bytes work
- # on the wire with SMTP.
- if not isinstance(email, str):
+ if isinstance(email, str):
+ pass
+ elif isinstance(email, bytes):
+ # Allow email to be a bytes instance as if it is what
+ # will be transmitted on the wire. But assume SMTPUTF8
+ # is unavailable, so it must be ASCII.
try:
email = email.decode("ascii")
except ValueError as e:
raise EmailSyntaxError("The email address is not valid ASCII.") from e
+ else:
+ raise TypeError("email must be str or bytes")
# Split the address into the display name (or None), the local part
# (before the @-sign), and the domain part (after the @-sign).
@@ -70,6 +80,14 @@ def validate_email(
display_name, local_part, domain_part, is_quoted_local_part \
= split_email(email)
+ if display_name:
+ # UTS #39 3.3 Email Security Profiles for Identifiers requires
+ # display names (incorrectly called "quoted-string-part" there)
+ # to be NFC normalized. Since these are not a part of what we
+ # are really validating, we won't check that the input was NFC
+ # normalized, but we'll normalize in output.
+ display_name = unicodedata.normalize("NFC", display_name)
+
# Collect return values in this instance.
ret = ValidatedEmail()
ret.original = ((local_part if not is_quoted_local_part
@@ -84,7 +102,8 @@ def validate_email(
local_part_info = validate_email_local_part(local_part,
allow_smtputf8=allow_smtputf8,
allow_empty_local=allow_empty_local,
- quoted_local_part=is_quoted_local_part)
+ quoted_local_part=is_quoted_local_part,
+ strict=strict)
ret.local_part = local_part_info["local_part"]
ret.ascii_local_part = local_part_info["ascii_local_part"]
ret.smtputf8 = local_part_info["smtputf8"]
@@ -92,13 +111,23 @@ def validate_email(
# RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
# so we'll return the NFC-normalized local part. Since the caller may use that
# string in place of the original string, ensure it is also valid.
+ #
+ # UTS #39 3.3 Email Security Profiles for Identifiers requires local parts
+ # to be NFKC normalized, which loses some information in characters that can
+ # be decomposed. We might want to consider applying NFKC normalization, but
+ # we can't make the change easily because it would break database lookups
+ # for any caller that put a normalized address from a previous version of
+ # this library. (UTS #39 seems to require that the *input* be NKFC normalized
+ # and has other requirements that are hard to check without additional Unicode
+ # data, and I don't know whether the rules really apply in the wild.)
normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
if normalized_local_part != ret.local_part:
try:
validate_email_local_part(normalized_local_part,
allow_smtputf8=allow_smtputf8,
allow_empty_local=allow_empty_local,
- quoted_local_part=is_quoted_local_part)
+ quoted_local_part=is_quoted_local_part,
+ strict=strict)
except EmailSyntaxError as e:
raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e
ret.local_part = normalized_local_part
diff --git a/email_validator/version.py b/email_validator/version.py
index 8a124bf..55e4709 100644
--- a/email_validator/version.py
+++ b/email_validator/version.py
@@ -1 +1 @@
-__version__ = "2.2.0"
+__version__ = "2.3.0"
diff --git a/setup.cfg b/setup.cfg
index 3387df1..8ceac96 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
[metadata]
-name = email_validator
+name = email-validator
version = attr: email_validator.version.__version__
description = A robust email address syntax and deliverability validation library.
long_description = file: README.md
diff --git a/tests/test_syntax.py b/tests/test_syntax.py
index ffe4963..9bd1385 100644
--- a/tests/test_syntax.py
+++ b/tests/test_syntax.py
@@ -295,24 +295,35 @@ def test_email_valid(email_input: str, output: ValidatedEmail) -> None:
),
),
(
- 's\u0323\u0307@nfc.tld',
+ '\"s\u0323\u0307\" ',
MakeValidatedEmail(
local_part='\u1E69',
smtputf8=True,
ascii_domain='nfc.tld',
domain='nfc.tld',
normalized='\u1E69@nfc.tld',
+ display_name='\u1E69'
+ ),
+ ),
+ (
+ '@@fullwidth.at',
+ MakeValidatedEmail(
+ local_part='@',
+ smtputf8=True,
+ ascii_domain='fullwidth.at',
+ domain='fullwidth.at',
+ normalized='@@fullwidth.at',
),
),
],
)
def test_email_valid_intl_local_part(email_input: str, output: ValidatedEmail) -> None:
# Check that it passes when allow_smtputf8 is True.
- assert validate_email(email_input, check_deliverability=False) == output
+ assert validate_email(email_input, check_deliverability=False, allow_display_name=True) == output
# Check that it fails when allow_smtputf8 is False.
with pytest.raises(EmailSyntaxError) as exc_info:
- validate_email(email_input, allow_smtputf8=False, check_deliverability=False)
+ validate_email(email_input, allow_smtputf8=False, check_deliverability=False, allow_display_name=True)
assert "Internationalized characters before the @-sign are not supported: " in str(exc_info.value)
@@ -363,6 +374,8 @@ def test_domain_literal() -> None:
'email_input,error_msg',
[
('hello.world', 'An email address must have an @-sign.'),
+ ('hello@world', 'The email address has the "full-width" at-sign (@) character instead of a regular at-sign.'),
+ ('hello﹫world', 'The email address has the "small commercial at" character instead of a regular at-sign.'),
('my@localhost', 'The part after the @-sign is not valid. It should have a period.'),
('my@.leadingdot.com', 'An email address cannot have a period immediately after the @-sign.'),
('my@.leadingfwdot.com', 'An email address cannot have a period immediately after the @-sign.'),
@@ -389,9 +402,7 @@ def test_domain_literal() -> None:
('.leadingdot@domain.com', 'An email address cannot start with a period.'),
('twodots..here@domain.com', 'An email address cannot have two periods in a row.'),
('trailingdot.@domain.email', 'An email address cannot have a period immediately before the @-sign.'),
- ('me@⒈wouldbeinvalid.com',
- "The part after the @-sign contains invalid characters (Codepoint U+2488 not allowed "
- "at position 1 in '⒈wouldbeinvalid.com')."),
+ ('me@⒈wouldbeinvalid.com', "The part after the @-sign contains invalid characters: '⒈'."),
('me@\u037e.com', "The part after the @-sign contains invalid characters after Unicode normalization: ';'."),
('me@\u1fef.com', "The part after the @-sign contains invalid characters after Unicode normalization: '`'."),
('@example.com', 'There must be something before the @-sign.'),
@@ -404,9 +415,6 @@ def test_domain_literal() -> None:
('test@\n', 'The part after the @-sign contains invalid characters: U+000A.'),
('bad"quotes"@example.com', 'The email address contains invalid characters before the @-sign: \'"\'.'),
('obsolete."quoted".atom@example.com', 'The email address contains invalid characters before the @-sign: \'"\'.'),
- ('11111111112222222222333333333344444444445555555555666666666677777@example.com', 'The email address is too long before the @-sign (1 character too many).'),
- ('111111111122222222223333333333444444444455555555556666666666777777@example.com', 'The email address is too long before the @-sign (2 characters too many).'),
- ('\uFB2C111111122222222223333333333444444444455555555556666666666777777@example.com', 'After Unicode normalization: The email address is too long before the @-sign (2 characters too many).'),
('me@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333333344444444445555555555.com', 'The email address is too long after the @-sign (1 character too many).'),
('me@中1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444.com', 'The email address is too long after the @-sign (1 byte too many after IDNA encoding).'),
('me@\uFB2C1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444.com', 'The email address is too long after the @-sign (5 bytes too many after IDNA encoding).'),
@@ -456,6 +464,22 @@ def test_email_invalid_syntax(email_input: str, error_msg: str) -> None:
assert str(exc_info.value) == error_msg
+@pytest.mark.parametrize(
+ 'email_input,error_msg',
+ [
+ ('11111111112222222222333333333344444444445555555555666666666677777@example.com', 'The email address is too long before the @-sign (1 character too many).'),
+ ('111111111122222222223333333333444444444455555555556666666666777777@example.com', 'The email address is too long before the @-sign (2 characters too many).'),
+ ('\uFB2C111111122222222223333333333444444444455555555556666666666777777@example.com', 'After Unicode normalization: The email address is too long before the @-sign (2 characters too many).'),
+ ])
+def test_email_invalid_syntax_strict(email_input: str, error_msg: str) -> None:
+ # Since these all have syntax errors, deliverability
+ # checks do not arise.
+ validate_email(email_input, check_deliverability=False) # pass without strict
+ with pytest.raises(EmailSyntaxError) as exc_info:
+ validate_email(email_input, strict=True, check_deliverability=False)
+ assert str(exc_info.value) == error_msg
+
+
@pytest.mark.parametrize(
'email_input',
[
@@ -717,6 +741,11 @@ def test_pyisemail_tests(email_input: str, status: str) -> None:
validate_email(email_input, test_environment=True)
validate_email(email_input, allow_quoted_local=True, allow_domain_literal=True, test_environment=True)
+ elif status == "ISEMAIL_RFC5322_LOCAL_TOOLONG":
+ # Requires strict.
+ with pytest.raises(EmailSyntaxError):
+ validate_email(email_input, strict=True, test_environment=True)
+
elif status == "ISEMAIL_RFC5321_QUOTEDSTRING":
# Quoted-literal local parts are only valid with an option.
with pytest.raises(EmailSyntaxError):