diff --git a/.travis.yml b/.travis.yml index 100a5ed..d0d8d02 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,14 @@ os: linux -dist: xenial +dist: bionic language: python cache: pip python: -#- '2.7' -#- '3.4' -- '3.5' - '3.6' -- '3.7' -- '3.8' +#- '3.7' +#- '3.8' - '3.9' +- '3.10' install: - make install diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..91738d5 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,111 @@ +Version 1.3.1 (January 21, 2023) +-------------------------------- + +* The new SPF 'v=spf1 -all' (reject-all) deliverability check is removed in most cases. It now is performed only for domains that do not have MX records but do have an A/AAAA fallback record. + +Version 1.3.0 (September 18, 2022) +---------------------------------- + +* Deliverability checks now check for 'v=spf1 -all' SPF records as a way to reject more bad domains. +* Special use domain names now raise EmailSyntaxError instead of EmailUndeliverableError since they are performed even if check_deliverability is off. +* New module-level attributes are added to override the default values of the keyword arguments and the special-use domains list. +* The keyword arguments of the public methods are now marked as keyword-only, ending support for Python 2.x. +* [pyIsEmail](https://github.com/michaelherold/pyIsEmail)'s test cases are added to the tests. +* Recommend that check_deliverability be set to False for validation on login pages. +* Added an undocumented globally_deliverable option. + +Version 1.2.1 (May 1, 2022) +--------------------------- + +* example.com/net/org are removed from the special-use reserved domain names list so that they do not raise exceptions if check_deliverability is off. +* Improved README. + +Verison 1.2.0 (April 24, 2022) +------------------------------ + +* Reject domains with NULL MX records (when deliverability checks + are turned on). +* Reject unsafe unicode characters. (Some of these checks you should + be doing on all of your user inputs already!) +* Reject most special-use reserved domain names with EmailUndeliverableError. A new `test_environment` option is added for using `@*.test` domains. +* Improved safety of exception text by not repeating an unsafe input character in the message. +* Minor fixes in tests. +* Invoking the module as a standalone program now caches DNS queries. +* Improved README. + +Version 1.1.3 (June 12, 2021) +----------------------------- + +* Allow passing a custom dns_resolver so that a DNS cache and a custom timeout can be set. + +Version 1.1.2 (Nov 5, 2020) +--------------------------- + +* Fix invoking the module as a standalone program. +* Fix deprecation warning in Python 3.8. +* Code improvements. +* Improved README. + +Version 1.1.1 (May 19, 2020) +---------------------------- + +* Fix exception when DNS queries time-out. +* Improved README. + +Version 1.1.0 (Spril 30, 2020) +------------------------------ + +* The main function now returns an object with attributes rather than a dict with keys, but accessing the object in the old way is still supported. +* Added overall email address length checks. +* Minor tweak to regular expressions. +* Improved error messages. +* Added tests. +* Linted source code files; changed README to Markdown. + +Version 1.0.5 (Oct 18, 2019) +---------------------------- + +* Prevent resolving domain names as if they were not fully qualified using a local search domain settings. + +Version 1.0.4 (May 2, 2019) +--------------------------- + +* Added a timeout argument for DNS queries. +* The wheel distribution is now a universal wheel. +* Improved README. + +Version 1.0.3 (Sept 12, 2017) +----------------------------- + +* Added a wheel distribution for easier installation. + +Version 1.0.2 (Dec 30, 2016) +---------------------------- + +* Fix dnspython package name in Python 3. +* Improved README. + +Version 1.0.1 (March 6, 2016) +----------------------------- + +* Fixed minor errors. + +Version 1.0.0 (Sept 5, 2015) +---------------------------- + +* Fail domains with a leading period. +* Improved error messages. +* Added tests. + +Version 0.5.0 (June 15, 2015) +----------------------------- + +* Use IDNA 2008 instead of IDNA 2003 and use the idna package's UTS46 normalization instead of our own. +* Fixes for Python 2. +* Improved error messages. +* Improved README. + +Version 0.1.0 (April 21, 2015) +------------------------------ + +Initial release! diff --git a/README.md b/README.md index 929b525..241a809 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ email-validator: Validate Email Addresses ========================================= A robust email address syntax and deliverability validation library for -Python 2.7/3.4+ by [Joshua Tauberer](https://joshdata.me). +Python by [Joshua Tauberer](https://joshdata.me). This library validates that a string is of the form `name@example.com`. This is the sort of validation you would want for an email-based login form on @@ -16,7 +16,7 @@ Key features: to end users). * (optionally) Checks deliverability: Does the domain name resolve? And you can override the default DNS resolver. * Supports internationalized domain names and (optionally) - internationalized local parts. + internationalized local parts, but blocks unsafe characters. * Normalizes email addresses (super important for internationalized addresses! see below). @@ -27,10 +27,13 @@ And this library does NOT permit obsolete forms of email addresses, so if you need strict validation against the email specs exactly, use [pyIsEmail](https://github.com/michaelherold/pyIsEmail). -This library was first published in 2015. The current version is 1.1.1 -(posted May 19, 2020). **Starting in version 1.1.0, the type of the value returned -from `validate_email` has changed, but dict-style access to the validated -address information still works, so it is backwards compatible.** +This library is tested with Python 3.6+ but should work in earlier versions: + +[![Build Status](https://app.travis-ci.com/JoshData/python-email-validator.svg?branch=main)](https://app.travis-ci.com/JoshData/python-email-validator) + +View the [CHANGELOG / Release Notes](CHANGELOG.md) for the version history of changes in the library. Occasionally this README is ahead of the latest published package --- see the CHANGELOG for details. + +--- Installation ------------ @@ -43,63 +46,55 @@ pip install email-validator `pip3` also works. -Usage ------ +Quick Start +----------- If you're validating a user's email address before creating a user -account, you might do this: +account in your application, you might do this: ```python from email_validator import validate_email, EmailNotValidError email = "my+address@mydomain.tld" +is_new_account = True # False for login pages try: - # Validate. - valid = validate_email(email) - - # Update with the normalized form. - email = valid.email + # Check that the email address is valid. + validation = validate_email(email, check_deliverability=is_new_account) + + # Take the normalized form of the email address + # for all logic beyond this point (especially + # before going to a database query where equality + # may not take into account Unicode normalization). + email = validation.email except EmailNotValidError as e: - # email is not valid, exception message is human-readable + # Email is not valid. + # The exception message is human-readable. print(str(e)) ``` This validates the address and gives you its normalized form. You should -put the normalized form in your database and always normalize before -checking if an address is in your database. - -When validating many email addresses or to control the timeout (the default is 15 seconds), create a caching [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to reuse in each call: - -```python -from email_validator import validate_email, caching_resolver +**put the normalized form in your database** and always normalize before +checking if an address is in your database. When using this in a login form, +set `check_deliverability` to `False` to avoid unnecessary DNS queries. -resolver = caching_resolver(timeout=10) - -while True: - valid = validate_email(email, dns_resolver=resolver) -``` - -The validator will accept internationalized email addresses, but not all -mail systems can send email to an addresses with non-ASCII characters in -the *local* part of the address (before the @-sign). See the `allow_smtputf8` -option below. +Usage +----- - -Overview --------- +### Overview The module provides a function `validate_email(email_address)` which -takes an email address (either a `str` or ASCII `bytes`) and: +takes an email address (either a `str` or `bytes`, but only non-internationalized +addresses are allowed when passing a `bytes`) and: - Raises a `EmailNotValidError` with a helpful, human-readable error message explaining why the email address is not valid, or -- Returns an object with a normalized form of the email address and - other information about it. +- Returns an object with a normalized form of the email address (which + you should use!) and other information about it. When an email address is not valid, `validate_email` raises either an `EmailSyntaxError` if the form of the address is invalid or an -`EmailUndeliverableError` if the domain name does not resolve. Both +`EmailUndeliverableError` if the domain name fails DNS checks. Both exception classes are subclasses of `EmailNotValidError`, which in turn is a subclass of `ValueError`. @@ -112,7 +107,9 @@ one uses anymore even though they are still valid and deliverable, since they will probably give you grief if you're using email for login. (See later in the document about that.) -The validator checks that the domain name in the email address resolves. +The validator checks that the domain name in the email address has a +DNS MX record (except a NULL MX record) indicating that it can receive +email (or a fallback A-record, see below). There is nothing to be gained by trying to actually contact an SMTP server, so that's not done here. For privacy, security, and practicality reasons servers are good at not giving away whether an address is @@ -121,26 +118,57 @@ can bounce mail after a delay, and bounced mail may indicate a temporary failure of a good email address (sometimes an intentional failure, like greylisting). -The function also accepts the following keyword arguments (default as -shown): +### Options + +The `validate_email` function also accepts the following keyword arguments +(defaults are as shown below): `allow_smtputf8=True`: Set to `False` to prohibit internationalized addresses that would require the - [SMTPUTF8](https://tools.ietf.org/html/rfc6531) extension. + [SMTPUTF8](https://tools.ietf.org/html/rfc6531) extension. You can also set `email_validator.ALLOW_SMTPUTF8` to `False` to turn it off for all calls by default. -`check_deliverability=True`: Set to `False` to skip the domain name resolution check. +`check_deliverability=True`: If true, a DNS query is made to check that a non-null MX record is present for the domain-part of the email address (or if not, an A/AAAA record as an MX fallback can be present but in that case a reject-all SPF record must not be present). Set to `False` to skip this DNS-based check. DNS is slow and sometimes unavailable, so consider whether these checks are useful for your use case. It is recommended to pass `False` when performing validation for login pages (but not account creation pages) since re-validation of a previously validated domain in your database by querying DNS at every login is probably undesirable. You can also set `email_validator.CHECK_DELIVERABILITY` to `False` to turn this off for all calls by default. `allow_empty_local=False`: Set to `True` to allow an empty local part (i.e. `@example.com`), e.g. for validating Postfix aliases. `dns_resolver=None`: Pass an instance of [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to control the DNS resolver including setting a timeout and [a cache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html). The `caching_resolver` function shown above is a helper function to construct a dns.resolver.Resolver with a [LRUCache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html#dns.resolver.LRUCache). Reuse the same resolver instance across calls to `validate_email` to make use of the cache. +`test_environment=False`: DNS-based deliverability checks are disabled and `test` and `subdomain.test` domain names are permitted (see below). You can also set `email_validator.TEST_ENVIRONMENT` to `True` to turn it on for all calls by default. + +### DNS timeout and cache + +When validating many email addresses or to control the timeout (the default is 15 seconds), create a caching [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to reuse in each call. The `caching_resolver` function returns one easily for you: + +```python +from email_validator import validate_email, caching_resolver + +resolver = caching_resolver(timeout=10) + +while True: + email = validate_email(email, dns_resolver=resolver).email +``` + +### Test addresses + +This library rejects email addresess that use the [Special Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml) `invalid`, `localhost`, `test`, and some others by raising `EmailSyntaxError`. This is to protect your system from abuse: You probably don't want a user to be able to cause an email to be sent to `localhost`. However, in your non-production test environments you may want to use `@test` or `@myname.test` email addresses. There are three ways you can allow this: + +1. Add `test_environment=True` to the call to `validate_email` (see above). +2. Set `email_validator.TEST_ENVIRONMENT` to `True`. +3. Remove the special-use domain name that you want to use from `email_validator.SPECIAL_USE_DOMAIN_NAMES`: + +```python +import email_validator +email_validator.SPECIAL_USE_DOMAIN_NAMES.remove("test") +``` + +It is tempting to use `@example.com/net/org` in tests. These domains are reserved to IANA for use in documentation so there is no risk of accidentally emailing someone at those domains. But beware that this library will reject these domain names if DNS-based deliverability checks are not disabled because these domains do not resolve to domains that accept email. In tests, consider using your own domain name or `@test` or `@myname.test` instead. Internationalized email addresses --------------------------------- The email protocol SMTP and the domain name system DNS have historically -only allowed ASCII characters in email addresses and domain names, +only allowed English (ASCII) characters in email addresses and domain names, respectively. Each has adapted to internationalization in a separate way, creating two separate aspects to email address internationalization. @@ -166,11 +194,33 @@ using the [idna](https://github.com/kjd/idna) module by Kim Davies. ### Internationalized local parts The second sort of internationalization is internationalization in the -*local* part of the address (before the @-sign). These email addresses -require that your mail submission library and the mail servers along the -route to the destination, including your own outbound mail server, all -support the [SMTPUTF8 (RFC 6531)](https://tools.ietf.org/html/rfc6531) -extension. Support for SMTPUTF8 varies. +*local* part of the address (before the @-sign). In non-internationalized +email addresses, only English letters, numbers, and some punctuation +(`._!#$%&'^``*+-=~/?{|}`) are allowed. In internationalized email address +local parts, a wider range of Unicode characters are allowed. + +A surprisingly large number of Unicode characters are not safe to display, +especially when the email address is concatenated with other text, so this +library tries to protect you by not permitting resvered, non-, private use, +formatting (which can be used to alter the display order of characters), +whitespace, and control characters, and combining characters +as the first character (so that they cannot combine with something outside +of the email address string). See https://qntm.org/safe and https://trojansource.codes/ +for relevant prior work. (Other than whitespace, these are checks that +you should be applying to nearly all user inputs in a security-sensitive +context.) + +These character checks are performed after Unicode normalization (see below), +so you are only fully protected if you replace all user-provided email addresses +with the normalized email address string returned by this library. This does not +guard against the well known problem that many Unicode characters look alike +(or are identical), which can be used to fool humans reading displayed text. + +Email addresses with these non-ASCII characters require that your mail +submission library and the mail servers along the route to the destination, +including your own outbound mail server, all support the +[SMTPUTF8 (RFC 6531)](https://tools.ietf.org/html/rfc6531) extension. +Support for SMTPUTF8 varies. See the `allow_smtputf8` parameter. ### If you know ahead of time that SMTPUTF8 is not supported by your mail submission stack @@ -201,12 +251,14 @@ part is converted to [IDNA ASCII](https://tools.ietf.org/html/rfc5891). (You probably should not do this at account creation time so you don't change the user's login information without telling them.) -### UCS-4 support required for Python 2.7 +### Support for Python 2.7 + +The last version of this library supporting Python 2.x is version 1.2.1. -Note that when using Python 2.7, it is required that it was built with +When using Python 2.x, it is required that it was built with UCS-4 support (see -[here](https://stackoverflow.com/questions/29109944/python-returns-length-of-2-for-single-unicode-character-string)); -otherwise emails with unicode characters outside of the BMP (Basic +[here](https://stackoverflow.com/questions/29109944/python-returns-length-of-2-for-single-unicode-character-string)). +Without UCS-4 support, unicode characters outside of the BMP (Basic Multilingual Plane) will not validate correctly. Normalization @@ -262,9 +314,7 @@ ValidatedEmail( ascii_email='test@joshdata.me', ascii_local_part='test', ascii_domain='joshdata.me', - smtputf8=False, - mx=[(10, 'box.occams.info')], - mx_fallback_type=None) + smtputf8=False) ``` For the fictitious address `example@ツ.life`, which has an @@ -331,6 +381,7 @@ are: | `smtputf8` | A boolean indicating that the [SMTPUTF8](https://tools.ietf.org/html/rfc6531) feature of your mail relay will be required to transmit messages to this address because the local part of the address has non-ASCII characters (the local part cannot be IDNA-encoded). If `allow_smtputf8=False` is passed as an argument, this flag will always be false because an exception is raised if it would have been true. | | `mx` | A list of (priority, domain) tuples of MX records specified in the DNS for the domain (see [RFC 5321 section 5](https://tools.ietf.org/html/rfc5321#section-5)). May be `None` if the deliverability check could not be completed because of a temporary issue like a timeout. | | `mx_fallback_type` | `None` if an `MX` record is found. If no MX records are actually specified in DNS and instead are inferred, through an obsolete mechanism, from A or AAAA records, the value is the type of DNS record used instead (`A` or `AAAA`). May be `None` if the deliverability check could not be completed because of a temporary issue like a timeout. | +| `spf` | Any SPF record found while checking deliverability. Only set if the SPF record is queried. | Assumptions ----------- @@ -340,14 +391,21 @@ strictly conform to the standards. Many email address forms are obsolete or likely to cause trouble: * The validator assumes the email address is intended to be - deliverable on the public Internet using DNS, and so the domain part - of the email address must be a resolvable domain name. -* The "quoted string" form of the local part of the email address (RFC - 5321 4.1.2) is not permitted --- no one uses this anymore anyway. + usable on the public Internet. The domain part + of the email address must be a resolvable domain name + (see the deliverability checks described above). + Most [Special Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml) + and their subdomains, as well as + domain names without a `.`, are rejected as a syntax error + (except see the `test_environment` parameter above). +* Obsolete email syntaxes are rejected: + The "quoted string" form of the local part of the email address (RFC + 5321 4.1.2) is not permitted. Quoted forms allow multiple @-signs, space characters, and other - troublesome conditions. -* The "literal" form for the domain part of an email address (an - IP address) is not accepted --- no one uses this anymore anyway. + troublesome conditions. The unsual [(comment) syntax](https://github.com/JoshData/python-email-validator/issues/77) + is also rejected. The "literal" form for the domain part of an email address (an + IP address in brackets) is rejected. Other obsolete and deprecated syntaxes are + rejected. No one uses these forms anymore. Testing ------- @@ -366,17 +424,16 @@ The package is distributed as a universal wheel and as a source package. To release: -* Update the version number. -* Follow the steps below to publish source and a universal wheel to pypi. +* Update CHANGELOG.md. +* Update the version number in setup.cfg. +* Make & push a commit with the new version number. +* Make & push a tag (`git tag v... && git push --tags`). * Make a release at https://github.com/JoshData/python-email-validator/releases/new. +* Follow the steps below to publish source and a universal wheel to pypi. ```sh -pip3 install twine -rm -rf dist -python3 setup.py sdist -python3 setup.py bdist_wheel -twine upload dist/* -git tag v1.0.XXX # replace with version in setup.py +./release_to_pypi.sh +git tag v$(grep version setup.cfg | sed "s/.*= //") git push --tags ``` diff --git a/email_validator/__init__.py b/email_validator/__init__.py index f960f67..c86584a 100644 --- a/email_validator/__init__.py +++ b/email_validator/__init__.py @@ -7,6 +7,13 @@ import dns.exception import idna # implements IDNA 2008; Python's codec is only IDNA 2003 +# Default values for keyword arguments. + +ALLOW_SMTPUTF8 = True +CHECK_DELIVERABILITY = True +TEST_ENVIRONMENT = False +GLOBALLY_DELIVERABLE = True +DEFAULT_TIMEOUT = 15 # secs # Based on RFC 2822 section 3.2.4 / RFC 5322 section 3.2.3, these # characters are permitted in email addresses (not taking into @@ -20,8 +27,8 @@ # addresses to also include three specific ranges of UTF8 defined in # RFC3629 section 4, which appear to be the Unicode code points from # U+0080 to U+10FFFF. -ATEXT_UTF8 = ATEXT + u"\u0080-\U0010FFFF" -DOT_ATOM_TEXT_UTF8 = '[' + ATEXT_UTF8 + ']+(?:\\.[' + ATEXT_UTF8 + ']+)*' +ATEXT_INTL = ATEXT + u"\u0080-\U0010FFFF" +DOT_ATOM_TEXT_INTL = '[' + ATEXT_INTL + ']+(?:\\.[' + ATEXT_INTL + ']+)*' # The domain part of the email address, after IDNA (ASCII) encoding, # must also satisfy the requirements of RFC 952/RFC 1123 which restrict @@ -36,6 +43,71 @@ LOCAL_PART_MAX_LENGTH = 64 DOMAIN_MAX_LENGTH = 255 +# IANA Special Use Domain Names +# Last Updated 2021-09-21 +# https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.txt +# +# The domain names without dots would be caught by the check that the domain +# name in an email address must have a period, but this list will also catch +# subdomains of these domains, which are also reserved. +SPECIAL_USE_DOMAIN_NAMES = [ + # The "arpa" entry here is consolidated from a lot of arpa subdomains + # for private address (i.e. non-routable IP addresses like 172.16.x.x) + # reverse mapping, plus some other subdomains. Although RFC 6761 says + # that application software should not treat these domains as special, + # they are private-use domains and so cannot have globally deliverable + # email addresses, which is an assumption of this library, and probably + # all of arpa is similarly special-use, so we reject it all. + "arpa", + + # RFC 6761 says applications "SHOULD NOT" treat the "example" domains + # as special, i.e. applications should accept these domains. + # + # The domain "example" alone fails our syntax validation because it + # lacks a dot (we assume no one has an email address on a TLD directly). + # "@example.com/net/org" will currently fail DNS-based deliverability + # checks because IANA publishes a NULL MX for these domains, and + # "@mail.example[.com/net/org]" and other subdomains will fail DNS- + # based deliverability checks because IANA does not publish MX or A + # DNS records for these subdomains. + # "example", # i.e. "wwww.example" + # "example.com", + # "example.net", + # "example.org", + + # RFC 6761 says that applications are permitted to treat this domain + # as special and that DNS should return an immediate negative response, + # so we also immediately reject this domain, which also follows the + # purpose of the domain. + "invalid", + + # RFC 6762 says that applications "may" treat ".local" as special and + # that "name resolution APIs and libraries SHOULD recognize these names + # as special," and since ".local" has no global definition, we reject + # it, as we expect email addresses to be gloally routable. + "local", + + # RFC 6761 says that applications (like this library) are permitted + # to treat "localhost" as special, and since it cannot have a globally + # deliverable email address, we reject it. + "localhost", + + # RFC 7686 says "applications that do not implement the Tor protocol + # SHOULD generate an error upon the use of .onion and SHOULD NOT + # perform a DNS lookup. + "onion", + + # Although RFC 6761 says that application software should not treat + # these domains as special, it also warns users that the address may + # resolve differently in different systems, and therefore it cannot + # have a globally routable email address, which is an assumption of + # this library, so we reject "@test" and "@*.test" addresses, unless + # the test_environment keyword argument is given, to allow their use + # in application-level test environments. These domains will generally + # fail deliverability checks because "test" is not an actual TLD. + "test", +] + # ease compatibility in type checking if sys.version_info >= (3,): unicode_class = str @@ -47,8 +119,6 @@ DOT_ATOM_TEXT = DOT_ATOM_TEXT.decode("ascii") ATEXT_HOSTNAME = ATEXT_HOSTNAME.decode("ascii") -DEFAULT_TIMEOUT = 15 # secs - class EmailNotValidError(ValueError): """Parent class of all exceptions raised by this module.""" @@ -142,6 +212,8 @@ def __getitem__(self, key): """Tests use this.""" def __eq__(self, other): + if not isinstance(other, ValidatedEmail): + return False return ( self.email == other.email and self.local_part == other.local_part @@ -180,7 +252,9 @@ def __get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH): return reason.format(prefix, diff, suffix) -def caching_resolver(timeout=DEFAULT_TIMEOUT, cache=None): +def caching_resolver(*, timeout=None, cache=None): + if timeout is None: + timeout = DEFAULT_TIMEOUT resolver = dns.resolver.Resolver() resolver.cache = cache or dns.resolver.LRUCache() resolver.lifetime = timeout # timeout, in seconds @@ -189,10 +263,14 @@ def caching_resolver(timeout=DEFAULT_TIMEOUT, cache=None): def validate_email( email, - allow_smtputf8=True, + # /, # not supported in Python 3.6, 3.7 + *, + allow_smtputf8=None, allow_empty_local=False, - check_deliverability=True, - timeout=DEFAULT_TIMEOUT, + check_deliverability=None, + test_environment=None, + globally_deliverable=GLOBALLY_DELIVERABLE, + timeout=None, dns_resolver=None ): """ @@ -201,6 +279,16 @@ def validate_email( but if bytes it must be ASCII-only. """ + # Fill in default values of arguments. + if allow_smtputf8 is None: + allow_smtputf8 = ALLOW_SMTPUTF8 + if check_deliverability is None: + check_deliverability = CHECK_DELIVERABILITY + if test_environment is None: + test_environment = TEST_ENVIRONMENT + if timeout is None: + timeout = DEFAULT_TIMEOUT + # Allow email to be a str or bytes instance. If bytes, # it must be ASCII because that's how the bytes work # on the wire with SMTP. @@ -228,7 +316,7 @@ def validate_email( ret.smtputf8 = local_part_info["smtputf8"] # Validate the email address's domain part syntax and get a normalized form. - domain_part_info = validate_email_domain_part(parts[1]) + domain_part_info = validate_email_domain_part(parts[1], test_environment=test_environment, globally_deliverable=globally_deliverable) ret.domain = domain_part_info["domain"] ret.ascii_domain = domain_part_info["ascii_domain"] @@ -278,15 +366,14 @@ def validate_email( reason = "(when encoded in bytes)" raise EmailSyntaxError("The email address is too long {}.".format(reason)) - if check_deliverability: - # Validate the email address's deliverability and update the - # return dict with metadata. + if check_deliverability and not test_environment: + # Validate the email address's deliverability using DNS + # and update the return dict with metadata. deliverability_info = validate_email_deliverability( ret["domain"], ret["domain_i18n"], timeout, dns_resolver ) - if "mx" in deliverability_info: - ret.mx = deliverability_info["mx"] - ret.mx_fallback_type = deliverability_info["mx-fallback"] + for key, value in deliverability_info.items(): + setattr(ret, key, value) return ret @@ -328,11 +415,11 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals else: # The local part failed the ASCII check. Now try the extended internationalized requirements. - m = re.match(DOT_ATOM_TEXT_UTF8 + "\\Z", local) + m = re.match(DOT_ATOM_TEXT_INTL + "\\Z", local) if not m: # It's not a valid internationalized address either. Report which characters were not valid. bad_chars = ', '.join(sorted(set( - c for c in local if not re.match(u"[" + (ATEXT if not allow_smtputf8 else ATEXT_UTF8) + u"]", c) + unicodedata.name(c, repr(c)) for c in local if not re.match(u"[" + (ATEXT if not allow_smtputf8 else ATEXT_INTL) + u"]", c) ))) raise EmailSyntaxError("The email address contains invalid characters before the @-sign: %s." % bad_chars) @@ -346,6 +433,40 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals # so we'll return the normalized local part in the return value. local = unicodedata.normalize("NFC", local) + # Check for unsafe characters. + # Some of this may be redundant with the range U+0080 to U+10FFFF that is checked + # by DOT_ATOM_TEXT_INTL. + for i, c in enumerate(local): + category = unicodedata.category(c) + if category[0] in ("L", "N", "P", "S"): + # letters, numbers, punctuation, and symbols are permitted + pass + elif category[0] == "M": + # combining character in first position would combine with something + # outside of the email address if concatenated to the right, but are + # otherwise permitted + if i == 0: + raise EmailSyntaxError("The email address contains an initial invalid character (%s)." + % unicodedata.name(c, repr(c))) + elif category[0] in ("Z", "C"): + # spaces and line/paragraph characters (Z) and + # control, format, surrogate, private use, and unassigned code points (C) + raise EmailSyntaxError("The email address contains an invalid character (%s)." + % unicodedata.name(c, repr(c))) + else: + # All categories should be handled above, but in case there is something new + # in the future. + raise EmailSyntaxError("The email address contains a character (%s; category %s) that may not be safe." + % (unicodedata.name(c, repr(c)), category)) + + # Try encoding to UTF-8. Failure is possible with some characters like + # surrogate code points, but those are checked above. Still, we don't + # want to have an unhandled exception later. + try: + local.encode("utf8") + except ValueError: + raise EmailSyntaxError("The email address contains an invalid character.") + # Flag that SMTPUTF8 will be required for deliverability. return { "local_part": local, @@ -354,7 +475,7 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals } -def validate_email_domain_part(domain): +def validate_email_domain_part(domain, test_environment=False, globally_deliverable=True): # Empty? if len(domain) == 0: raise EmailSyntaxError("There must be something after the @-sign.") @@ -432,14 +553,31 @@ def validate_email_domain_part(domain): if not m: raise EmailSyntaxError("The email address contains invalid characters after the @-sign.") - # All publicly deliverable addresses have domain named with at least - # one period. We also know that all TLDs end with a letter. - if "." not in ascii_domain: - raise EmailSyntaxError("The domain name %s is not valid. It should have a period." % domain_i18n) - if not re.search(r"[A-Za-z]\Z", ascii_domain): - raise EmailSyntaxError( - "The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n - ) + if globally_deliverable: + # All publicly deliverable addresses have domain named with at least + # one period, and we'll consider the lack of a period a syntax error + # since that will match people's sense of what an email address looks + # like. We'll skip this in test environments to allow '@test' email + # addresses. + if "." not in ascii_domain and not (ascii_domain == "test" and test_environment): + raise EmailSyntaxError("The domain name %s is not valid. It should have a period." % domain_i18n) + + # We also know that all TLDs currently end with a letter. + if not re.search(r"[A-Za-z]\Z", ascii_domain): + raise EmailSyntaxError( + "The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n + ) + + # Check special-use and reserved domain names. + # Some might fail DNS-based deliverability checks, but that + # can be turned off, so we should fail them all sooner. + for d in SPECIAL_USE_DOMAIN_NAMES: + # See the note near the definition of SPECIAL_USE_DOMAIN_NAMES. + if d == "test" and test_environment: + continue + + if ascii_domain == d or ascii_domain.endswith("." + d): + raise EmailSyntaxError("The domain name %s is a special-use or reserved name that cannot be used with email." % domain_i18n) # Return the IDNA ASCII-encoded form of the domain, which is how it # would be transmitted on the wire (except when used with SMTPUTF8 @@ -456,6 +594,8 @@ def validate_email_domain_part(domain): def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT, dns_resolver=None): # Check that the domain resolves to an MX record. If there is no MX record, # try an A or AAAA record which is a deprecated fallback for deliverability. + # (Note that changing the DEFAULT_TIMEOUT module-level attribute + # will not change the default value of this method's timeout argument.) # If no dns.resolver.Resolver was given, get dnspython's default resolver. # Override the default resolver's timeout. This may affect other uses of @@ -464,6 +604,8 @@ def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT, dns_resolver = dns.resolver.get_default_resolver() dns_resolver.lifetime = timeout + deliverability_info = {} + def dns_resolver_resolve_shim(domain, record): try: # dns.resolver.Resolver.resolve is new to dnspython 2.x. @@ -487,30 +629,63 @@ def dns_resolver_resolve_shim(domain, record): raise dns.exception.Timeout() try: - # Try resolving for MX records and get them in sorted priority order. + # Try resolving for MX records. response = dns_resolver_resolve_shim(domain, "MX") + + # For reporting, put them in priority order and remove the trailing dot in the qnames. mtas = sorted([(r.preference, str(r.exchange).rstrip('.')) for r in response]) - mx_fallback = None + + # Remove "null MX" records from the list (their value is (0, ".") but we've stripped + # trailing dots, so the 'exchange' is just ""). If there was only a null MX record, + # email is not deliverable. + mtas = [(preference, exchange) for preference, exchange in mtas + if exchange != ""] + if len(mtas) == 0: + raise EmailUndeliverableError("The domain name %s does not accept email." % domain_i18n) + + deliverability_info["mx"] = mtas + deliverability_info["mx_fallback_type"] = None + except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): - # If there was no MX record, fall back to an A record. + # If there was no MX record, fall back to an A record, as SMTP servers do. try: response = dns_resolver_resolve_shim(domain, "A") - mtas = [(0, str(r)) for r in response] - mx_fallback = "A" + deliverability_info["mx"] = [(0, str(r)) for r in response] + deliverability_info["mx_fallback_type"] = "A" except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): # If there was no A record, fall back to an AAAA record. try: response = dns_resolver_resolve_shim(domain, "AAAA") - mtas = [(0, str(r)) for r in response] - mx_fallback = "AAAA" + deliverability_info["mx"] = [(0, str(r)) for r in response] + deliverability_info["mx_fallback_type"] = "AAAA" except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): # If there was no MX, A, or AAAA record, then mail to # this domain is not deliverable. raise EmailUndeliverableError("The domain name %s does not exist." % domain_i18n) + # Check for a SPF reject-all record ("v=spf1 -all") which indicates + # no emails are sent from this domain (similar to a NULL MX record + # but for sending rather than receiving). In combination with the + # absence of an MX record, this is probably a good sign that the + # domain is not used for email. + try: + response = dns_resolver_resolve_shim(domain, "TXT") + for rec in response: + value = b"".join(rec.strings) + if value.startswith(b"v=spf1 "): + deliverability_info["spf"] = value.decode("ascii", errors='replace') + if value == b"v=spf1 -all": + raise EmailUndeliverableError("The domain name %s does not send email." % domain_i18n) + except dns.resolver.NoAnswer: + # No TXT records means there is no SPF policy, so we cannot take any action. + pass + except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN): + # Failure to resolve at this step will be ignored. + pass + except dns.exception.Timeout: # A timeout could occur for various reasons, so don't treat it as a failure. return { @@ -527,14 +702,10 @@ def dns_resolver_resolve_shim(domain, record): "There was an error while checking if the domain name in the email address is deliverable: " + str(e) ) - return { - "mx": mtas, - "mx-fallback": mx_fallback, - } + return deliverability_info def main(): - import sys import json def __utf8_input_shim(input_str): @@ -548,10 +719,12 @@ def __utf8_output_shim(output_str): return output_str if len(sys.argv) == 1: + # Validate the email addresses pased line-by-line on STDIN. + dns_resolver = caching_resolver() for line in sys.stdin: email = __utf8_input_shim(line.strip()) try: - validate_email(email) + validate_email(email, dns_resolver=dns_resolver) except EmailNotValidError as e: print(__utf8_output_shim("{} {}".format(email, e))) else: diff --git a/release_to_pypi.sh b/release_to_pypi.sh new file mode 100755 index 0000000..d8d5e05 --- /dev/null +++ b/release_to_pypi.sh @@ -0,0 +1,6 @@ +#!/bin/sh +pip3 install --upgrade twine +rm -rf dist +python3 setup.py sdist +python3 setup.py bdist_wheel +twine upload -u __token__ dist/* # username: __token__ password: pypi API token diff --git a/setup.cfg b/setup.cfg index 43ce496..6a92d0a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [metadata] name = email_validator -version = 1.1.3 -description = A robust email syntax and deliverability validation library for Python 2.x/3.x. +version = 1.3.1 +description = A robust email address syntax and deliverability validation library. long_description = file: README.md long_description_content_type = text/markdown url = https://github.com/JoshData/python-email-validator @@ -13,14 +13,11 @@ classifiers = Development Status :: 5 - Production/Stable Intended Audience :: Developers License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication - Programming Language :: Python :: 2 - Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 - Programming Language :: Python :: 3.5 - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 Topic :: Software Development :: Libraries :: Python Modules keywords = email address validator @@ -29,7 +26,7 @@ packages = find: install_requires = dnspython>=1.15.0 idna>=2.0.0 -python_requires = >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.* +python_requires = >=3.5 [options.entry_points] console_scripts = diff --git a/test_requirements.txt b/test_requirements.txt index e8431c4..38dab84 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,5 +1,26 @@ -coverage==4.5.4 -docutils==0.15.2 -flake8==3.7.9 -pytest==5.2.2 -pytest-cov==2.8.1 +# This file was generated by running +# pip install dnspython idna # from setup.cfg +# pip install pytest pytest-cov coverage flake8 +# pip freeze +# in a virtualenv with Python 3.6. (Some packages' latest versions +# are not compatible with Python 3.6, so we must pin versions for +# repeatable testing in earlier versions of Python.) +attrs==21.4.0 +coverage==6.2 +dnspython==2.2.1 +flake8==4.0.1 +idna==3.3 +importlib-metadata==4.2.0 +iniconfig==1.1.1 +mccabe==0.6.1 +packaging==21.3 +pluggy==1.0.0 +py==1.11.0 +pycodestyle==2.8.0 +pyflakes==2.4.0 +pyparsing==3.0.7 +pytest==7.0.1 +pytest-cov==3.0.0 +tomli==1.2.3 +typing_extensions==4.1.1 +zipp==3.6.0 diff --git a/tests/test_main.py b/tests/test_main.py index d2fd923..adcbc1e 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,5 +1,5 @@ -from unittest import mock import dns.resolver +import re import pytest from email_validator import EmailSyntaxError, EmailUndeliverableError, \ validate_email, validate_email_deliverability, \ @@ -12,51 +12,51 @@ 'email_input,output', [ ( - 'Abc@example.com', + 'Abc@example.tld', ValidatedEmail( local_part='Abc', ascii_local_part='Abc', smtputf8=False, - ascii_domain='example.com', - domain='example.com', - email='Abc@example.com', - ascii_email='Abc@example.com', + ascii_domain='example.tld', + domain='example.tld', + email='Abc@example.tld', + ascii_email='Abc@example.tld', ), ), ( - 'Abc.123@example.com', + 'Abc.123@test-example.com', ValidatedEmail( local_part='Abc.123', ascii_local_part='Abc.123', smtputf8=False, - ascii_domain='example.com', - domain='example.com', - email='Abc.123@example.com', - ascii_email='Abc.123@example.com', + ascii_domain='test-example.com', + domain='test-example.com', + email='Abc.123@test-example.com', + ascii_email='Abc.123@test-example.com', ), ), ( - 'user+mailbox/department=shipping@example.com', + 'user+mailbox/department=shipping@example.tld', ValidatedEmail( local_part='user+mailbox/department=shipping', ascii_local_part='user+mailbox/department=shipping', smtputf8=False, - ascii_domain='example.com', - domain='example.com', - email='user+mailbox/department=shipping@example.com', - ascii_email='user+mailbox/department=shipping@example.com', + ascii_domain='example.tld', + domain='example.tld', + email='user+mailbox/department=shipping@example.tld', + ascii_email='user+mailbox/department=shipping@example.tld', ), ), ( - "!#$%&'*+-/=?^_`.{|}~@example.com", + "!#$%&'*+-/=?^_`.{|}~@example.tld", ValidatedEmail( local_part="!#$%&'*+-/=?^_`.{|}~", ascii_local_part="!#$%&'*+-/=?^_`.{|}~", smtputf8=False, - ascii_domain='example.com', - domain='example.com', - email="!#$%&'*+-/=?^_`.{|}~@example.com", - ascii_email="!#$%&'*+-/=?^_`.{|}~@example.com", + ascii_domain='example.tld', + domain='example.tld', + email="!#$%&'*+-/=?^_`.{|}~@example.tld", + ascii_email="!#$%&'*+-/=?^_`.{|}~@example.tld", ), ), ( @@ -142,43 +142,43 @@ ), ), ( - 'ñoñó@example.com', + 'ñoñó@example.tld', ValidatedEmail( local_part='ñoñó', smtputf8=True, - ascii_domain='example.com', - domain='example.com', - email='ñoñó@example.com', + ascii_domain='example.tld', + domain='example.tld', + email='ñoñó@example.tld', ), ), ( - '我買@example.com', + '我買@example.tld', ValidatedEmail( local_part='我買', smtputf8=True, - ascii_domain='example.com', - domain='example.com', - email='我買@example.com', + ascii_domain='example.tld', + domain='example.tld', + email='我買@example.tld', ), ), ( - '甲斐黒川日本@example.com', + '甲斐黒川日本@example.tld', ValidatedEmail( local_part='甲斐黒川日本', smtputf8=True, - ascii_domain='example.com', - domain='example.com', - email='甲斐黒川日本@example.com', + ascii_domain='example.tld', + domain='example.tld', + email='甲斐黒川日本@example.tld', ), ), ( - 'чебурашкаящик-с-апельсинами.рф@example.com', + 'чебурашкаящик-с-апельсинами.рф@example.tld', ValidatedEmail( local_part='чебурашкаящик-с-апельсинами.рф', smtputf8=True, - ascii_domain='example.com', - domain='example.com', - email='чебурашкаящик-с-апельсинами.рф@example.com', + ascii_domain='example.tld', + domain='example.tld', + email='чебурашкаящик-с-апельсинами.рф@example.tld', ), ), ( @@ -211,6 +211,7 @@ def test_email_valid(email_input, output): @pytest.mark.parametrize( 'email_input,error_msg', [ + ('my@localhost', 'The domain name localhost is not valid. It should have a period.'), ('my@.leadingdot.com', 'An email address cannot have a period immediately after the @-sign.'), ('my@..leadingfwdot.com', 'An email address cannot have a period immediately after the @-sign.'), ('my@..twodots.com', 'An email address cannot have a period immediately after the @-sign.'), @@ -227,16 +228,16 @@ def test_email_valid(email_input, output): ('my@example\n.com', 'The domain name example\n.com contains invalid characters (Codepoint U+000A at position 8 of ' '\'example\\n\' not allowed).'), - ('.leadingdot@domain.com', 'The email address contains invalid characters before the @-sign: ..'), - ('..twodots@domain.com', 'The email address contains invalid characters before the @-sign: ..'), - ('twodots..here@domain.com', 'The email address contains invalid characters before the @-sign: ..'), + ('.leadingdot@domain.com', 'The email address contains invalid characters before the @-sign: FULL STOP.'), + ('..twodots@domain.com', 'The email address contains invalid characters before the @-sign: FULL STOP.'), + ('twodots..here@domain.com', 'The email address contains invalid characters before the @-sign: FULL STOP.'), ('me@⒈wouldbeinvalid.com', "The domain name ⒈wouldbeinvalid.com contains invalid characters (Codepoint U+2488 not allowed " "at position 1 in '⒈wouldbeinvalid.com')."), ('@example.com', 'There must be something before the @-sign.'), - ('\nmy@example.com', 'The email address contains invalid characters before the @-sign: \n.'), - ('m\ny@example.com', 'The email address contains invalid characters before the @-sign: \n.'), - ('my\n@example.com', 'The email address contains invalid characters before the @-sign: \n.'), + ('\nmy@example.com', 'The email address contains invalid characters before the @-sign: \'\\n\'.'), + ('m\ny@example.com', 'The email address contains invalid characters before the @-sign: \'\\n\'.'), + ('my\n@example.com', 'The email address contains invalid characters before the @-sign: \'\\n\'.'), ('11111111112222222222333333333344444444445555555555666666666677777@example.com', 'The email address is too long before the @-sign (1 character too many).'), ('111111111122222222223333333333444444444455555555556666666666777777@example.com', 'The email address is too long before the @-sign (2 characters too many).'), ('me@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.111111111122222222223333333333444444444455555555556.com', 'The email address is too long after the @-sign.'), @@ -247,28 +248,288 @@ def test_email_valid(email_input, output): ('my.λong.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444.info', 'The email address is too long (at least 1 character too many).'), ], ) -def test_email_invalid(email_input, error_msg): +def test_email_invalid_syntax(email_input, error_msg): + # Since these all have syntax errors, deliverability + # checks do not arise. with pytest.raises(EmailSyntaxError) as exc_info: validate_email(email_input) # print(f'({email_input!r}, {str(exc_info.value)!r}),') assert str(exc_info.value) == error_msg +@pytest.mark.parametrize( + 'email_input', + [ + ('me@anything.arpa'), + ('me@valid.invalid'), + ('me@link.local'), + ('me@host.localhost'), + ('me@onion.onion.onion'), + ('me@test.test.test'), + ], +) +def test_email_invalid_reserved_domain(email_input): + # Since these all fail deliverabiltiy from a static list, + # DNS deliverability checks do not arise. + with pytest.raises(EmailSyntaxError) as exc_info: + validate_email(email_input) + # print(f'({email_input!r}, {str(exc_info.value)!r}),') + assert "is a special-use or reserved name" in str(exc_info.value) + + +@pytest.mark.parametrize( + 'email_input', + [ + ('me@mail.example'), + ('me@example.com'), + ('me@mail.example.com'), + ], +) +def test_email_example_reserved_domain(email_input): + # Since these all fail deliverabiltiy from a static list, + # DNS deliverability checks do not arise. + with pytest.raises(EmailUndeliverableError) as exc_info: + validate_email(email_input) + # print(f'({email_input!r}, {str(exc_info.value)!r}),') + assert re.match(r"The domain name [a-z\.]+ does not (accept email|exist)\.", str(exc_info.value)) is not None + + +@pytest.mark.parametrize( + 'email_input', + [ + ('white space@test'), + ('\n@test'), + ('\u2005@test'), # four-per-em space (Zs) + ('\u009C@test'), # string terminator (Cc) + ('\u200B@test'), # zero-width space (Cf) + ('\u202Dforward-\u202Ereversed@test'), # BIDI (Cf) + ('\uD800@test'), # surrogate (Cs) + ('\uE000@test'), # private use (Co) + ('\uFDEF@test'), # unassigned (Cn) + ], +) +def test_email_unsafe_character(email_input): + # Check for various unsafe characters: + with pytest.raises(EmailSyntaxError) as exc_info: + validate_email(email_input, test_environment=True) + assert "invalid character" in str(exc_info.value) + + +def test_email_test_domain_name_in_test_environment(): + validate_email("anything@test", test_environment=True) + validate_email("anything@mycompany.test", test_environment=True) + + +# This is the pyIsEmail (https://github.com/michaelherold/pyIsEmail) test suite. +# +# The test data was extracted by: +# +# $ wget https://raw.githubusercontent.com/michaelherold/pyIsEmail/master/tests/data/tests.xml +# $ xmllint --xpath '/tests/test/address/text()' tests.xml > t1 +# $ xmllint --xpath "/tests/test[not(address='')]/diagnosis/text()" tests.xml > t2 +# +# tests = [] +# def fixup_char(c): +# if ord(c) >= 0x2400 and ord(c) <= 0x2432: +# c = chr(ord(c)-0x2400) +# return c +# for email, diagnosis in zip(open("t1"), open("t2")): +# email = email[:-1] # strip trailing \n but not more because trailing whitespace is significant +# email = "".join(fixup_char(c) for c in email).replace("&", "&") +# tests.append([email, diagnosis.strip()]) +# print(repr(tests).replace("'], ['", "'],\n['")) +@pytest.mark.parametrize( + ('email_input', 'status'), + [ + ['test', 'ISEMAIL_ERR_NODOMAIN'], + ['@', 'ISEMAIL_ERR_NOLOCALPART'], + ['test@', 'ISEMAIL_ERR_NODOMAIN'], + # ['test@io', 'ISEMAIL_VALID'], # we reject domains without a dot, knowing they are not deliverable + ['@io', 'ISEMAIL_ERR_NOLOCALPART'], + ['@iana.org', 'ISEMAIL_ERR_NOLOCALPART'], + ['test@iana.org', 'ISEMAIL_VALID'], + ['test@nominet.org.uk', 'ISEMAIL_VALID'], + ['test@about.museum', 'ISEMAIL_VALID'], + ['a@iana.org', 'ISEMAIL_VALID'], + ['test.test@iana.org', 'ISEMAIL_VALID'], + ['.test@iana.org', 'ISEMAIL_ERR_DOT_START'], + ['test.@iana.org', 'ISEMAIL_ERR_DOT_END'], + ['test..iana.org', 'ISEMAIL_ERR_CONSECUTIVEDOTS'], + ['test_exa-mple.com', 'ISEMAIL_ERR_NODOMAIN'], + ['!#$%&`*+/=?^`{|}~@iana.org', 'ISEMAIL_VALID'], + ['test\\@test@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['123@iana.org', 'ISEMAIL_VALID'], + ['test@123.com', 'ISEMAIL_VALID'], + ['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@iana.org', 'ISEMAIL_VALID'], + ['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklmn@iana.org', 'ISEMAIL_RFC5322_LOCAL_TOOLONG'], + ['test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm.com', 'ISEMAIL_RFC5322_LABEL_TOOLONG'], + ['test@mason-dixon.com', 'ISEMAIL_VALID'], + ['test@-iana.org', 'ISEMAIL_ERR_DOMAINHYPHENSTART'], + ['test@iana-.com', 'ISEMAIL_ERR_DOMAINHYPHENEND'], + ['test@g--a.com', 'ISEMAIL_VALID'], + ['test@.iana.org', 'ISEMAIL_ERR_DOT_START'], + ['test@iana.org.', 'ISEMAIL_ERR_DOT_END'], + ['test@iana..com', 'ISEMAIL_ERR_CONSECUTIVEDOTS'], + ['abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghij', 'ISEMAIL_RFC5322_TOOLONG'], + ['a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hij', 'ISEMAIL_RFC5322_TOOLONG'], + ['a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hijk', 'ISEMAIL_RFC5322_DOMAIN_TOOLONG'], + ['"test"@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'], + ['""@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'], + ['"""@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['"\\a"@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'], + ['"\\""@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'], + ['"\\"@iana.org', 'ISEMAIL_ERR_UNCLOSEDQUOTEDSTR'], + ['"\\\\"@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'], + ['test"@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['"test@iana.org', 'ISEMAIL_ERR_UNCLOSEDQUOTEDSTR'], + ['"test"test@iana.org', 'ISEMAIL_ERR_ATEXT_AFTER_QS'], + ['test"text"@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['"test""test"@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['"test"."test"@iana.org', 'ISEMAIL_DEPREC_LOCALPART'], + ['"test\\ test"@iana.org', 'ISEMAIL_RFC5321_QUOTEDSTRING'], + ['"test".test@iana.org', 'ISEMAIL_DEPREC_LOCALPART'], + ['"test\x00"@iana.org', 'ISEMAIL_ERR_EXPECTING_QTEXT'], + ['"test\\\x00"@iana.org', 'ISEMAIL_DEPREC_QP'], + ['"abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghj"@iana.org', 'ISEMAIL_RFC5322_LOCAL_TOOLONG'], + ['"abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefg\\h"@iana.org', 'ISEMAIL_RFC5322_LOCAL_TOOLONG'], + ['test@[255.255.255.255]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'], + ['test@a[255.255.255.255]', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['test@[255.255.255]', 'ISEMAIL_RFC5322_DOMAINLITERAL'], + ['test@[255.255.255.255.255]', 'ISEMAIL_RFC5322_DOMAINLITERAL'], + ['test@[255.255.255.256]', 'ISEMAIL_RFC5322_DOMAINLITERAL'], + ['test@[1111:2222:3333:4444:5555:6666:7777:8888]', 'ISEMAIL_RFC5322_DOMAINLITERAL'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666:7777]', 'ISEMAIL_RFC5322_IPV6_GRPCOUNT'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666:7777:8888:9999]', 'ISEMAIL_RFC5322_IPV6_GRPCOUNT'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666:7777:888G]', 'ISEMAIL_RFC5322_IPV6_BADCHAR'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666::8888]', 'ISEMAIL_RFC5321_IPV6DEPRECATED'], + ['test@[IPv6:1111:2222:3333:4444:5555::8888]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666::7777:8888]', 'ISEMAIL_RFC5322_IPV6_MAXGRPS'], + ['test@[IPv6::3333:4444:5555:6666:7777:8888]', 'ISEMAIL_RFC5322_IPV6_COLONSTRT'], + ['test@[IPv6:::3333:4444:5555:6666:7777:8888]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'], + ['test@[IPv6:1111::4444:5555::8888]', 'ISEMAIL_RFC5322_IPV6_2X2XCOLON'], + ['test@[IPv6:::]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'], + ['test@[IPv6:1111:2222:3333:4444:5555:255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_GRPCOUNT'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666:255.255.255.255]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666:7777:255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_GRPCOUNT'], + ['test@[IPv6:1111:2222:3333:4444::255.255.255.255]', 'ISEMAIL_RFC5321_ADDRESSLITERAL'], + ['test@[IPv6:1111:2222:3333:4444:5555:6666::255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_MAXGRPS'], + ['test@[IPv6:1111:2222:3333:4444:::255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_2X2XCOLON'], + ['test@[IPv6::255.255.255.255]', 'ISEMAIL_RFC5322_IPV6_COLONSTRT'], + [' test @iana.org', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'], + ['test@ iana .com', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'], + ['test . test@iana.org', 'ISEMAIL_DEPREC_FWS'], + ['\r\n test@iana.org', 'ISEMAIL_CFWS_FWS'], + ['\r\n \r\n test@iana.org', 'ISEMAIL_DEPREC_FWS'], + ['(comment)test@iana.org', 'ISEMAIL_CFWS_COMMENT'], + ['((comment)test@iana.org', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'], + ['(comment(comment))test@iana.org', 'ISEMAIL_CFWS_COMMENT'], + ['test@(comment)iana.org', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'], + ['test(comment)test@iana.org', 'ISEMAIL_ERR_ATEXT_AFTER_CFWS'], + ['test@(comment)[255.255.255.255]', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'], + ['(comment)abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@iana.org', 'ISEMAIL_CFWS_COMMENT'], + ['test@(comment)abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.com', 'ISEMAIL_DEPREC_CFWS_NEAR_AT'], + ['(comment)test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghik.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghik.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstuvwxyzabcdefghijk.abcdefghijklmnopqrstu', 'ISEMAIL_CFWS_COMMENT'], + ['test@iana.org\n', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['test@xn--hxajbheg2az3al.xn--jxalpdlp', 'ISEMAIL_VALID'], + ['xn--test@iana.org', 'ISEMAIL_VALID'], + ['test@iana.org-', 'ISEMAIL_ERR_DOMAINHYPHENEND'], + ['"test@iana.org', 'ISEMAIL_ERR_UNCLOSEDQUOTEDSTR'], + ['(test@iana.org', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'], + ['test@(iana.org', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'], + ['test@[1.2.3.4', 'ISEMAIL_ERR_UNCLOSEDDOMLIT'], + ['"test\\"@iana.org', 'ISEMAIL_ERR_UNCLOSEDQUOTEDSTR'], + ['(comment\\)test@iana.org', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'], + ['test@iana.org(comment\\)', 'ISEMAIL_ERR_UNCLOSEDCOMMENT'], + ['test@iana.org(comment\\', 'ISEMAIL_ERR_BACKSLASHEND'], + ['test@[RFC-5322-domain-literal]', 'ISEMAIL_RFC5322_DOMAINLITERAL'], + ['test@[RFC-5322]-domain-literal]', 'ISEMAIL_ERR_ATEXT_AFTER_DOMLIT'], + ['test@[RFC-5322-[domain-literal]', 'ISEMAIL_ERR_EXPECTING_DTEXT'], + ['test@[RFC-5322-\\\x07-domain-literal]', 'ISEMAIL_RFC5322_DOMLIT_OBSDTEXT'], + ['test@[RFC-5322-\\\t-domain-literal]', 'ISEMAIL_RFC5322_DOMLIT_OBSDTEXT'], + ['test@[RFC-5322-\\]-domain-literal]', 'ISEMAIL_RFC5322_DOMLIT_OBSDTEXT'], + ['test@[RFC-5322-domain-literal\\]', 'ISEMAIL_ERR_UNCLOSEDDOMLIT'], + ['test@[RFC-5322-domain-literal\\', 'ISEMAIL_ERR_BACKSLASHEND'], + ['test@[RFC 5322 domain literal]', 'ISEMAIL_RFC5322_DOMAINLITERAL'], + ['test@[RFC-5322-domain-literal] (comment)', 'ISEMAIL_RFC5322_DOMAINLITERAL'], + ['\x7f@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['test@\x7f.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['"\x7f"@iana.org', 'ISEMAIL_DEPREC_QTEXT'], + ['"\\\x7f"@iana.org', 'ISEMAIL_DEPREC_QP'], + ['(\x7f)test@iana.org', 'ISEMAIL_DEPREC_CTEXT'], + ['test@iana.org\r', 'ISEMAIL_ERR_CR_NO_LF'], + ['\rtest@iana.org', 'ISEMAIL_ERR_CR_NO_LF'], + ['"\rtest"@iana.org', 'ISEMAIL_ERR_CR_NO_LF'], + ['(\r)test@iana.org', 'ISEMAIL_ERR_CR_NO_LF'], + ['test@iana.org(\r)', 'ISEMAIL_ERR_CR_NO_LF'], + ['\ntest@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['"\n"@iana.org', 'ISEMAIL_ERR_EXPECTING_QTEXT'], + ['"\\\n"@iana.org', 'ISEMAIL_DEPREC_QP'], + ['(\n)test@iana.org', 'ISEMAIL_ERR_EXPECTING_CTEXT'], + ['\x07@iana.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['test@\x07.org', 'ISEMAIL_ERR_EXPECTING_ATEXT'], + ['"\x07"@iana.org', 'ISEMAIL_DEPREC_QTEXT'], + ['"\\\x07"@iana.org', 'ISEMAIL_DEPREC_QP'], + ['(\x07)test@iana.org', 'ISEMAIL_DEPREC_CTEXT'], + ['\r\ntest@iana.org', 'ISEMAIL_ERR_FWS_CRLF_END'], + ['\r\n \r\ntest@iana.org', 'ISEMAIL_ERR_FWS_CRLF_END'], + [' \r\ntest@iana.org', 'ISEMAIL_ERR_FWS_CRLF_END'], + [' \r\n test@iana.org', 'ISEMAIL_CFWS_FWS'], + [' \r\n \r\ntest@iana.org', 'ISEMAIL_ERR_FWS_CRLF_END'], + [' \r\n\r\ntest@iana.org', 'ISEMAIL_ERR_FWS_CRLF_X2'], + [' \r\n\r\n test@iana.org', 'ISEMAIL_ERR_FWS_CRLF_X2'], + ['test@iana.org\r\n ', 'ISEMAIL_CFWS_FWS'], + ['test@iana.org\r\n \r\n ', 'ISEMAIL_DEPREC_FWS'], + ['test@iana.org\r\n', 'ISEMAIL_ERR_FWS_CRLF_END'], + ['test@iana.org\r\n \r\n', 'ISEMAIL_ERR_FWS_CRLF_END'], + ['test@iana.org \r\n', 'ISEMAIL_ERR_FWS_CRLF_END'], + ['test@iana.org \r\n ', 'ISEMAIL_CFWS_FWS'], + ['test@iana.org \r\n \r\n', 'ISEMAIL_ERR_FWS_CRLF_END'], + ['test@iana.org \r\n\r\n', 'ISEMAIL_ERR_FWS_CRLF_X2'], + ['test@iana.org \r\n\r\n ', 'ISEMAIL_ERR_FWS_CRLF_X2'], + [' test@iana.org', 'ISEMAIL_CFWS_FWS'], + ['test@iana.org ', 'ISEMAIL_CFWS_FWS'], + ['test@[IPv6:1::2:]', 'ISEMAIL_RFC5322_IPV6_COLONEND'], + ['"test\\©"@iana.org', 'ISEMAIL_ERR_EXPECTING_QPAIR'], + ['test@iana/icann.org', 'ISEMAIL_RFC5322_DOMAIN'], + ['test.(comment)test@iana.org', 'ISEMAIL_DEPREC_COMMENT'] + ] +) +def test_pyisemail_tests(email_input, status): + if status == "ISEMAIL_VALID": + # All standard email address forms should not raise an exception. + validate_email(email_input, test_environment=True) + elif "_ERR_" in status or "_TOOLONG" in status \ + or "_CFWS_FWS" in status or "_CFWS_COMMENT" in status \ + or "_IPV6" in status or status == "ISEMAIL_RFC5322_DOMAIN": + # Invalid syntax, extranous whitespace, and "(comments)" should be rejected. + # The _IPV6_ diagnoses appear to represent syntactically invalid domain literals. + # The ISEMAIL_RFC5322_DOMAIN diagnosis appears to be a syntactically invalid domain. + with pytest.raises(EmailSyntaxError): + validate_email(email_input, test_environment=True) + elif "_DEPREC_" in status \ + or "RFC5321_QUOTEDSTRING" in status \ + or "DOMAINLITERAL" in status or "_DOMLIT_" in status or "_ADDRESSLITERAL" in status: + # Quoted strings in the local part, domain literals (IP addresses in brackets), + # and other deprecated syntax are valid email addresses and are accepted by pyIsEmail, + # but we reject them. + with pytest.raises(EmailSyntaxError): + validate_email(email_input, test_environment=True) + else: + raise ValueError("status {} is not recognized".format(status)) + + def test_dict_accessor(): - input_email = "testaddr@example.com" + input_email = "testaddr@example.tld" valid_email = validate_email(input_email, check_deliverability=False) assert isinstance(valid_email.as_dict(), dict) assert valid_email.as_dict()["original_email"] == input_email -def test_deliverability_no_records(): - assert validate_email_deliverability('example.com', 'example.com') == {'mx': [(0, '')], 'mx-fallback': None} - - def test_deliverability_found(): response = validate_email_deliverability('gmail.com', 'gmail.com') - assert response.keys() == {'mx', 'mx-fallback'} - assert response['mx-fallback'] is None + assert response.keys() == {'mx', 'mx_fallback_type'} + assert response['mx_fallback_type'] is None assert len(response['mx']) > 1 assert len(response['mx'][0]) == 2 assert isinstance(response['mx'][0][0], int) @@ -276,10 +537,16 @@ def test_deliverability_found(): def test_deliverability_fails(): + # No MX record. domain = 'xkxufoekjvjfjeodlfmdfjcu.com' with pytest.raises(EmailUndeliverableError, match='The domain name {} does not exist'.format(domain)): validate_email_deliverability(domain, domain) + # Null MX record. + domain = 'example.com' + with pytest.raises(EmailUndeliverableError, match='The domain name {} does not accept email'.format(domain)): + validate_email_deliverability(domain, domain) + def test_deliverability_dns_timeout(): validate_email_deliverability.TEST_CHECK_TIMEOUT = True @@ -292,7 +559,7 @@ def test_deliverability_dns_timeout(): def test_main_single_good_input(monkeypatch, capsys): import json - test_email = "test@example.com" + test_email = "google@google.com" monkeypatch.setattr('sys.argv', ['email_validator', test_email]) validator_main() stdout, _ = capsys.readouterr() @@ -311,7 +578,7 @@ def test_main_single_bad_input(monkeypatch, capsys): def test_main_multi_input(monkeypatch, capsys): import io - test_cases = ["test@example.com", "test2@example.com", "test@.com", "test3@.com"] + test_cases = ["google1@google.com", "google2@google.com", "test@.com", "test3@.com"] test_input = io.StringIO("\n".join(test_cases)) monkeypatch.setattr('sys.stdin', test_input) monkeypatch.setattr('sys.argv', ['email_validator']) @@ -326,7 +593,7 @@ def test_main_multi_input(monkeypatch, capsys): def test_main_input_shim(monkeypatch, capsys): import json monkeypatch.setattr('sys.version_info', (2, 7)) - test_email = b"test@example.com" + test_email = b"google@google.com" monkeypatch.setattr('sys.argv', ['email_validator', test_email]) validator_main() stdout, _ = capsys.readouterr() @@ -348,25 +615,34 @@ def test_main_output_shim(monkeypatch, capsys): assert stdout == "b'An email address cannot have a period immediately after the @-sign.'\n" -@mock.patch("dns.resolver.LRUCache.put") -def test_validate_email__with_caching_resolver(mocked_put): - dns_resolver = caching_resolver() - validate_email("test@gmail.com", dns_resolver=dns_resolver) - assert mocked_put.called +def test_validate_email__with_caching_resolver(): + # unittest.mock.patch("dns.resolver.LRUCache.get") doesn't + # work --- it causes get to always return an empty list. + # So we'll mock our own way. + class MockedCache: + get_called = False + put_called = False - with mock.patch("dns.resolver.LRUCache.get") as mocked_get: - validate_email("test@gmail.com", dns_resolver=dns_resolver) - assert mocked_get.called + def get(self, key): + self.get_called = True + return None + def put(self, key, value): + self.put_called = True -@mock.patch("dns.resolver.LRUCache.put") -def test_validate_email__with_configured_resolver(mocked_put): + # Test with caching_resolver helper method. + mocked_cache = MockedCache() + dns_resolver = caching_resolver(cache=mocked_cache) + validate_email("test@gmail.com", dns_resolver=dns_resolver) + assert mocked_cache.put_called + validate_email("test@gmail.com", dns_resolver=dns_resolver) + assert mocked_cache.get_called + + # Test with dns.resolver.Resolver instance. dns_resolver = dns.resolver.Resolver() dns_resolver.lifetime = 10 - dns_resolver.cache = dns.resolver.LRUCache(max_size=1000) + dns_resolver.cache = MockedCache() validate_email("test@gmail.com", dns_resolver=dns_resolver) - assert mocked_put.called - - with mock.patch("dns.resolver.LRUCache.get") as mocked_get: - validate_email("test@gmail.com", dns_resolver=dns_resolver) - assert mocked_get.called + assert mocked_cache.put_called + validate_email("test@gmail.com", dns_resolver=dns_resolver) + assert mocked_cache.get_called