diff --git a/.travis.yml b/.travis.yml index 2c6514b9..92b1ed78 100644 --- a/.travis.yml +++ b/.travis.yml @@ -121,11 +121,10 @@ before_script: - cmake -B_build -GNinja - -DSkyr_WARNINGS_AS_ERRORS=OFF - -DSkyr_BUILD_TESTS=ON - -DSkyr_BUILD_WPT_TESTS=ON - -DSkyr_BUILD_DOCS=OFF - -DSkyr_BUILD_EXAMPLES=OFF + -Dskyr_WARNINGS_AS_ERRORS=OFF + -Dskyr_BUILD_TESTS=ON + -Dskyr_BUILD_DOCS=OFF + -Dskyr_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DBUILD_SHARED_LIBS=${SHARED_LIB} -DCMAKE_TOOLCHAIN_FILE=${DEPS_DIR}/vcpkg/scripts/buildsystems/vcpkg.cmake diff --git a/include/skyr/v1/domain/domain.hpp b/include/skyr/v1/domain/domain.hpp index 41f2db2a..31b81b81 100644 --- a/include/skyr/v1/domain/domain.hpp +++ b/include/skyr/v1/domain/domain.hpp @@ -16,53 +16,62 @@ inline namespace v1 { /// Converts a UTF-8 encoded domain to ASCII using /// [IDNA processing](https://www.domain.org/reports/tr46/#Processing) /// -/// \param domain A domain +/// \param domain_name A domain /// \param be_strict Tells the processor to be strict /// \param validation_error /// \returns An ASCII domain, or an error auto domain_to_ascii( - std::string_view domain, + std::string_view domain_name, bool be_strict, bool *validation_error) -> tl::expected; /// Converts a UTF-8 encoded domain to ASCII using /// [IDNA processing](https://www.domain.org/reports/tr46/#Processing) /// -/// \param domain A domain +/// \param domain_name A domain /// \param be_strict Tells the processor to be strict /// \returns An ASCII domain, or an error inline auto domain_to_ascii( - std::string_view domain, + std::string_view domain_name, bool be_strict) -> tl::expected { [[maybe_unused]] bool validation_error = false; - return domain_to_ascii(domain, be_strict, &validation_error); + return domain_to_ascii(domain_name, be_strict, &validation_error); } /// Converts a UTF-8 encoded domain to ASCII using /// [IDNA processing](https://www.domain.org/reports/tr46/#Processing) /// -/// \param domain A domain +/// \param domain_name A domain /// \param validation_error /// \returns An ASCII domain, or an error -inline auto domain_to_ascii(std::string_view domain, bool *validation_error) { - return domain_to_ascii(domain, false, validation_error); +inline auto domain_to_ascii(std::string_view domain_name, bool *validation_error) { + return domain_to_ascii(domain_name, false, validation_error); } /// Converts a UTF-8 encoded domain to ASCII using /// [IDNA processing](https://www.domain.org/reports/tr46/#Processing) /// -/// \param domain A domain +/// \param domain_name A domain /// \returns An ASCII domain, or an error -inline auto domain_to_ascii(std::string_view domain) { +inline auto domain_to_ascii(std::string_view domain_name) { [[maybe_unused]] bool validation_error = false; - return domain_to_ascii(domain, false, &validation_error); + return domain_to_ascii(domain_name, false, &validation_error); } /// Converts a Punycode encoded domain to UTF-8 /// -/// \param ascii A Punycode encoded domain +/// \param domain_name A Punycode encoded domain /// \returns A valid UTF-8 encoded domain, or an error -auto domain_to_u8(std::string_view ascii) -> tl::expected; +auto domain_to_u8(std::string_view domain_name, bool *validation_error) -> tl::expected; + +/// Converts a Punycode encoded domain to UTF-8 +/// +/// \param domain_name A Punycode encoded domain +/// \returns A valid UTF-8 encoded domain, or an error +inline auto domain_to_u8(std::string_view domain_name) -> tl::expected { + [[maybe_unused]] bool validation_error = false; + return domain_to_u8(domain_name, &validation_error); +} } // namespace v1 } // namespace skyr diff --git a/include/skyr/v1/domain/errors.hpp b/include/skyr/v1/domain/errors.hpp index c6ceed04..c2a9c0fe 100644 --- a/include/skyr/v1/domain/errors.hpp +++ b/include/skyr/v1/domain/errors.hpp @@ -21,6 +21,8 @@ enum class domain_errc { encoding_error, /// Invalid domain name length invalid_length, + /// Empty domain + empty_string, }; } // namespace v1 } // namespace skyr diff --git a/include/skyr/v1/string/starts_with.hpp b/include/skyr/v1/string/starts_with.hpp index 0f1554fe..1699bba4 100644 --- a/include/skyr/v1/string/starts_with.hpp +++ b/include/skyr/v1/string/starts_with.hpp @@ -11,9 +11,10 @@ namespace skyr { inline namespace v1 { +template inline auto starts_with( - std::string_view input, - std::string_view chars) noexcept { + std::basic_string_view input, + decltype(input) chars) noexcept { return (input.size() >= chars.size()) && (input.substr(0, chars.size()) == chars); } } // namespace v1 diff --git a/src/v1/domain/domain.cpp b/src/v1/domain/domain.cpp index 43dbc11c..373d511b 100644 --- a/src/v1/domain/domain.cpp +++ b/src/v1/domain/domain.cpp @@ -73,36 +73,84 @@ auto map_code_points( return result; } -auto unicode_to_ascii( - std::u32string_view domain_name, bool check_hyphens, [[maybe_unused]] bool check_bidi, - bool check_joiners, bool use_std3_ascii_rules, bool transitional_processing, - bool verify_dns_length) -> tl::expected { - constexpr static auto is_contextj = [] (auto cp) { - return (cp == U'\x200c') || (cp == U'\x200d'); - }; - - for (auto label : split(std::u32string_view(domain_name), U"\x002e\xff0e\x3002\0xff61")) { - if (check_hyphens) { - if ((label.size() >= 4) && (label.substr(2, 4) == U"--")) { - return tl::make_unexpected(domain_errc::bad_input); - } +auto validate_label(std::u32string_view label, [[maybe_unused]] bool use_std3_ascii_rules, bool check_hyphens, + [[maybe_unused]] bool check_bidi, bool check_joiners, [[maybe_unused]] bool transitional_processing) + -> tl::expected { + /// https://www.unicode.org/reports/tr46/#Validity_Criteria - if ((label.front() == U'-') || (label.back() == U'-')) { - return tl::make_unexpected(domain_errc::bad_input); - } + auto first = begin(label), last = end(label); + + if (check_hyphens) { + /// Criterion 2 + if ((label.size() >= 4) && (label.substr(2, 4) == U"--")) { + return tl::make_unexpected(domain_errc::bad_input); + } + + /// Criterion 3 + if ((label.front() == U'-') || (label.back() == U'-')) { + return tl::make_unexpected(domain_errc::bad_input); + } + } + + if (check_joiners) { + /// Criterion 7 + constexpr static auto is_not_contextj = [] (auto cp) { + return (cp == U'\x200c') || (cp == U'\x200d'); + }; + + auto it = std::find_if(first, last, is_not_contextj); + if (it != last) { + return tl::make_unexpected(domain_errc::bad_input); } + } + + return {}; +} + +auto idna_process(std::u32string_view domain_name, bool use_std3_ascii_rules, bool check_hyphens, + bool check_bidi, bool check_joiners, bool transitional_processing) + -> tl::expected { + using namespace std::string_view_literals; + + auto result = map_code_points(domain_name, use_std3_ascii_rules, transitional_processing); + if (result) { + for (auto label : split(std::u32string_view(result.value()), U"."sv)) { + if ((label.size() >= 4) && (label.substr(0, 4) == U"xn--")) { + auto decoded = punycode_decode(label.substr(4)); + if (!decoded) { + return tl::make_unexpected(decoded.error()); + } - if (check_joiners) { - auto first = begin(label), last = end(label); - auto it = std::find_if(first, last, is_contextj); - if (it != last) { - return tl::make_unexpected(domain_errc::bad_input); + auto validated = + validate_label(decoded.value(), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, false); + if (!validated) { + return tl::make_unexpected(validated.error()); + } + } else { + auto validated = validate_label(label, use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, + transitional_processing); + if (!validated) { + return tl::make_unexpected(validated.error()); + } } } } + return result; +} + +auto domain_to_ascii( + std::string_view domain_name, bool check_hyphens, bool check_bidi, + bool check_joiners, bool use_std3_ascii_rules, bool transitional_processing, + bool verify_dns_length) -> tl::expected { + /// https://www.unicode.org/reports/tr46/#ToASCII - auto domain = map_code_points(domain_name, use_std3_ascii_rules, transitional_processing); + auto utf32 = unicode::as(unicode::views::as_u8(domain_name) | unicode::transforms::to_u32); + if (!utf32) { + return tl::make_unexpected(domain_errc::encoding_error); + } + auto domain = idna_process( + utf32.value(), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, transitional_processing); if (!domain) { return tl::make_unexpected(domain.error()); } @@ -137,38 +185,33 @@ auto unicode_to_ascii( return join(labels, '.'); } +} // namespace auto domain_to_ascii( - std::u32string_view domain, bool be_strict, bool *validation_error) -> tl::expected { - auto result = - unicode_to_ascii(domain, false, true, true, be_strict, false, be_strict); + std::string_view domain_name, bool be_strict, bool *validation_error) -> tl::expected { + auto result = domain_to_ascii(domain_name, false, true, true, be_strict, false, be_strict); if (!result) { *validation_error |= true; return tl::make_unexpected(result.error()); } - return result; -} -} // namespace - -auto domain_to_ascii( - std::string_view domain, bool be_strict, bool *validation_error) -> tl::expected { - auto utf32 = unicode::as(unicode::views::as_u8(domain) | unicode::transforms::to_u32); - if (!utf32) { - return tl::make_unexpected(domain_errc::encoding_error); + else if (result.value().empty()) { + *validation_error |= true; + return tl::make_unexpected(domain_errc::empty_string); } - return domain_to_ascii(utf32.value(), be_strict, validation_error); + return result; } -auto domain_to_u8(std::string_view ascii) -> tl::expected { +auto domain_to_u8(std::string_view domain_name, [[maybe_unused]] bool *validation_error) + -> tl::expected { auto labels = std::vector{}; - for (auto label : split(ascii, ".")) { + for (auto label : split(domain_name, ".")) { if (label.substr(0, 4) == "xn--") { label.remove_prefix(4); - auto encoded = punycode_decode(label); - if (!encoded) { - return tl::make_unexpected(encoded.error()); + auto decoded = punycode_decode(label); + if (!decoded) { + return tl::make_unexpected(decoded.error()); } - labels.emplace_back(encoded.value()); + labels.emplace_back(decoded.value()); } else { labels.emplace_back(begin(label), end(label)); diff --git a/src/v1/domain/punycode.cpp b/src/v1/domain/punycode.cpp index 0f35b194..6990e9b7 100644 --- a/src/v1/domain/punycode.cpp +++ b/src/v1/domain/punycode.cpp @@ -71,6 +71,10 @@ auto adapt(uint32_t delta, uint32_t numpoints, bool firsttime) { auto punycode_encode( std::u32string_view input) -> tl::expected { + if (input.empty()) { + return tl::make_unexpected(domain_errc::empty_string); + } + auto result = std::string{}; result.reserve(256); @@ -140,6 +144,10 @@ auto punycode_encode( auto punycode_decode( std::string_view input) -> tl::expected { + if (input.empty()) { + return tl::make_unexpected(domain_errc::empty_string); + } + auto result = std::u32string(); result.reserve(256); @@ -206,8 +214,25 @@ auto punycode_decode( return u8_result.value(); } +auto punycode_decode( + std::u32string_view input) -> tl::expected { + if (input.empty()) { + return tl::make_unexpected(domain_errc::empty_string); + } + + auto u8input = unicode::as(input | unicode::transforms::to_u8).value(); + return punycode_decode(std::string_view(u8input)) + .and_then([] (auto &&output) -> tl::expected { + return unicode::as(unicode::views::as_u8(output) | unicode::transforms::to_u32).value(); + }); +} + auto punycode_encode( std::string_view input) -> tl::expected { + if (input.empty()) { + return tl::make_unexpected(domain_errc::empty_string); + } + auto utf32 = unicode::as(unicode::views::as_u8(input) | unicode::transforms::to_u32); if (!utf32) { return tl::make_unexpected(domain_errc::bad_input); diff --git a/src/v1/domain/punycode.hpp b/src/v1/domain/punycode.hpp index fb284fe8..33c116d3 100644 --- a/src/v1/domain/punycode.hpp +++ b/src/v1/domain/punycode.hpp @@ -44,6 +44,14 @@ auto punycode_encode( /// \returns The decoded UTF-8 domain, or an error auto punycode_decode( std::string_view input) -> tl::expected; + +/// Performs Punycode decoding based on a reference implementation +/// defined in [RFC 3492](https://tools.ietf.org/html/rfc3492) +/// +/// \param input An ASCII encoded domain to be decoded +/// \returns The decoded UTF-8 domain, or an error +auto punycode_decode( + std::u32string_view input) -> tl::expected; } // namespace v1 } // namespace skyr diff --git a/tests/domain/domain_tests.cpp b/tests/domain/domain_tests.cpp index 25232c4f..432e128a 100644 --- a/tests/domain/domain_tests.cpp +++ b/tests/domain/domain_tests.cpp @@ -1,4 +1,4 @@ -// Copyright 2018-19 Glyn Matthews. +// Copyright 2018-20 Glyn Matthews. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt of copy at // http://www.boost.org/LICENSE_1_0.txt) @@ -25,6 +25,7 @@ TEST_CASE("valid domains to ascii", "[domain]") { SECTION("domain_to_ascii_tests") { const auto &[input, expected] = domain; auto instance = skyr::domain_to_ascii(input); + INFO("input = " << input << ", expected = " << expected); REQUIRE(instance); CHECK(expected == instance.value()); }