Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Domain name processing #122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,10 @@ before_script:
- cmake
-B_build
-GNinja
-DSkyr_WARNINGS_AS_ERRORS=OFF
-DSkyr_BUILD_TESTS=ON
-DSkyr_BUILD_WPT_TESTS=ON
-DSkyr_BUILD_DOCS=OFF
-DSkyr_BUILD_EXAMPLES=OFF
-Dskyr_WARNINGS_AS_ERRORS=OFF
-Dskyr_BUILD_TESTS=ON
-Dskyr_BUILD_DOCS=OFF
-Dskyr_BUILD_EXAMPLES=OFF
-DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-DBUILD_SHARED_LIBS=${SHARED_LIB}
-DCMAKE_TOOLCHAIN_FILE=${DEPS_DIR}/vcpkg/scripts/buildsystems/vcpkg.cmake
Expand Down
35 changes: 22 additions & 13 deletions include/skyr/v1/domain/domain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,53 +16,62 @@ inline namespace v1 {
/// Converts a UTF-8 encoded domain to ASCII using
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
///
/// \param domain A domain
/// \param domain_name A domain
/// \param be_strict Tells the processor to be strict
/// \param validation_error
/// \returns An ASCII domain, or an error
auto domain_to_ascii(
std::string_view domain,
std::string_view domain_name,
bool be_strict,
bool *validation_error) -> tl::expected<std::string, domain_errc>;

/// Converts a UTF-8 encoded domain to ASCII using
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
///
/// \param domain A domain
/// \param domain_name A domain
/// \param be_strict Tells the processor to be strict
/// \returns An ASCII domain, or an error
inline auto domain_to_ascii(
std::string_view domain,
std::string_view domain_name,
bool be_strict) -> tl::expected<std::string, domain_errc> {
[[maybe_unused]] bool validation_error = false;
return domain_to_ascii(domain, be_strict, &validation_error);
return domain_to_ascii(domain_name, be_strict, &validation_error);
}

/// Converts a UTF-8 encoded domain to ASCII using
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
///
/// \param domain A domain
/// \param domain_name A domain
/// \param validation_error
/// \returns An ASCII domain, or an error
inline auto domain_to_ascii(std::string_view domain, bool *validation_error) {
return domain_to_ascii(domain, false, validation_error);
inline auto domain_to_ascii(std::string_view domain_name, bool *validation_error) {
return domain_to_ascii(domain_name, false, validation_error);
}

/// Converts a UTF-8 encoded domain to ASCII using
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
///
/// \param domain A domain
/// \param domain_name A domain
/// \returns An ASCII domain, or an error
inline auto domain_to_ascii(std::string_view domain) {
inline auto domain_to_ascii(std::string_view domain_name) {
[[maybe_unused]] bool validation_error = false;
return domain_to_ascii(domain, false, &validation_error);
return domain_to_ascii(domain_name, false, &validation_error);
}

/// Converts a Punycode encoded domain to UTF-8
///
/// \param ascii A Punycode encoded domain
/// \param domain_name A Punycode encoded domain
/// \returns A valid UTF-8 encoded domain, or an error
auto domain_to_u8(std::string_view ascii) -> tl::expected<std::string, domain_errc>;
auto domain_to_u8(std::string_view domain_name, bool *validation_error) -> tl::expected<std::string, domain_errc>;

/// Converts a Punycode encoded domain to UTF-8
///
/// \param domain_name A Punycode encoded domain
/// \returns A valid UTF-8 encoded domain, or an error
inline auto domain_to_u8(std::string_view domain_name) -> tl::expected<std::string, domain_errc> {
[[maybe_unused]] bool validation_error = false;
return domain_to_u8(domain_name, &validation_error);
}
} // namespace v1
} // namespace skyr

Expand Down
2 changes: 2 additions & 0 deletions include/skyr/v1/domain/errors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ enum class domain_errc {
encoding_error,
/// Invalid domain name length
invalid_length,
/// Empty domain
empty_string,
};
} // namespace v1
} // namespace skyr
Expand Down
5 changes: 3 additions & 2 deletions include/skyr/v1/string/starts_with.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@

namespace skyr {
inline namespace v1 {
template <class charT>
inline auto starts_with(
std::string_view input,
std::string_view chars) noexcept {
std::basic_string_view<charT> input,
decltype(input) chars) noexcept {
return (input.size() >= chars.size()) && (input.substr(0, chars.size()) == chars);
}
} // namespace v1
Expand Down
125 changes: 84 additions & 41 deletions src/v1/domain/domain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,36 +73,84 @@ auto map_code_points(
return result;
}

auto unicode_to_ascii(
std::u32string_view domain_name, bool check_hyphens, [[maybe_unused]] bool check_bidi,
bool check_joiners, bool use_std3_ascii_rules, bool transitional_processing,
bool verify_dns_length) -> tl::expected<std::string, domain_errc> {
constexpr static auto is_contextj = [] (auto cp) {
return (cp == U'\x200c') || (cp == U'\x200d');
};

for (auto label : split(std::u32string_view(domain_name), U"\x002e\xff0e\x3002\0xff61")) {
if (check_hyphens) {
if ((label.size() >= 4) && (label.substr(2, 4) == U"--")) {
return tl::make_unexpected(domain_errc::bad_input);
}
auto validate_label(std::u32string_view label, [[maybe_unused]] bool use_std3_ascii_rules, bool check_hyphens,
[[maybe_unused]] bool check_bidi, bool check_joiners, [[maybe_unused]] bool transitional_processing)
-> tl::expected<void, domain_errc> {
/// https://www.unicode.org/reports/tr46/#Validity_Criteria

if ((label.front() == U'-') || (label.back() == U'-')) {
return tl::make_unexpected(domain_errc::bad_input);
}
auto first = begin(label), last = end(label);

if (check_hyphens) {
/// Criterion 2
if ((label.size() >= 4) && (label.substr(2, 4) == U"--")) {
return tl::make_unexpected(domain_errc::bad_input);
}

/// Criterion 3
if ((label.front() == U'-') || (label.back() == U'-')) {
return tl::make_unexpected(domain_errc::bad_input);
}
}

if (check_joiners) {
/// Criterion 7
constexpr static auto is_not_contextj = [] (auto cp) {
return (cp == U'\x200c') || (cp == U'\x200d');
};

auto it = std::find_if(first, last, is_not_contextj);
if (it != last) {
return tl::make_unexpected(domain_errc::bad_input);
}
}

return {};
}

auto idna_process(std::u32string_view domain_name, bool use_std3_ascii_rules, bool check_hyphens,
bool check_bidi, bool check_joiners, bool transitional_processing)
-> tl::expected<std::u32string, domain_errc> {
using namespace std::string_view_literals;

auto result = map_code_points(domain_name, use_std3_ascii_rules, transitional_processing);
if (result) {
for (auto label : split(std::u32string_view(result.value()), U"."sv)) {
if ((label.size() >= 4) && (label.substr(0, 4) == U"xn--")) {
auto decoded = punycode_decode(label.substr(4));
if (!decoded) {
return tl::make_unexpected(decoded.error());
}

if (check_joiners) {
auto first = begin(label), last = end(label);
auto it = std::find_if(first, last, is_contextj);
if (it != last) {
return tl::make_unexpected(domain_errc::bad_input);
auto validated =
validate_label(decoded.value(), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, false);
if (!validated) {
return tl::make_unexpected(validated.error());
}
} else {
auto validated = validate_label(label, use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners,
transitional_processing);
if (!validated) {
return tl::make_unexpected(validated.error());
}
}
}
}
return result;
}

auto domain_to_ascii(
std::string_view domain_name, bool check_hyphens, bool check_bidi,
bool check_joiners, bool use_std3_ascii_rules, bool transitional_processing,
bool verify_dns_length) -> tl::expected<std::string, domain_errc> {
/// https://www.unicode.org/reports/tr46/#ToASCII

auto domain = map_code_points(domain_name, use_std3_ascii_rules, transitional_processing);
auto utf32 = unicode::as<std::u32string>(unicode::views::as_u8(domain_name) | unicode::transforms::to_u32);
if (!utf32) {
return tl::make_unexpected(domain_errc::encoding_error);
}

auto domain = idna_process(
utf32.value(), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, transitional_processing);
if (!domain) {
return tl::make_unexpected(domain.error());
}
Expand Down Expand Up @@ -137,38 +185,33 @@ auto unicode_to_ascii(

return join(labels, '.');
}
} // namespace

auto domain_to_ascii(
std::u32string_view domain, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
auto result =
unicode_to_ascii(domain, false, true, true, be_strict, false, be_strict);
std::string_view domain_name, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
auto result = domain_to_ascii(domain_name, false, true, true, be_strict, false, be_strict);
if (!result) {
*validation_error |= true;
return tl::make_unexpected(result.error());
}
return result;
}
} // namespace

auto domain_to_ascii(
std::string_view domain, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
auto utf32 = unicode::as<std::u32string>(unicode::views::as_u8(domain) | unicode::transforms::to_u32);
if (!utf32) {
return tl::make_unexpected(domain_errc::encoding_error);
else if (result.value().empty()) {
*validation_error |= true;
return tl::make_unexpected(domain_errc::empty_string);
}
return domain_to_ascii(utf32.value(), be_strict, validation_error);
return result;
}

auto domain_to_u8(std::string_view ascii) -> tl::expected<std::string, domain_errc> {
auto domain_to_u8(std::string_view domain_name, [[maybe_unused]] bool *validation_error)
-> tl::expected<std::string, domain_errc> {
auto labels = std::vector<std::string>{};
for (auto label : split(ascii, ".")) {
for (auto label : split(domain_name, ".")) {
if (label.substr(0, 4) == "xn--") {
label.remove_prefix(4);
auto encoded = punycode_decode(label);
if (!encoded) {
return tl::make_unexpected(encoded.error());
auto decoded = punycode_decode(label);
if (!decoded) {
return tl::make_unexpected(decoded.error());
}
labels.emplace_back(encoded.value());
labels.emplace_back(decoded.value());
}
else {
labels.emplace_back(begin(label), end(label));
Expand Down
25 changes: 25 additions & 0 deletions src/v1/domain/punycode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ auto adapt(uint32_t delta, uint32_t numpoints, bool firsttime) {

auto punycode_encode(
std::u32string_view input) -> tl::expected<std::string, domain_errc> {
if (input.empty()) {
return tl::make_unexpected(domain_errc::empty_string);
}

auto result = std::string{};
result.reserve(256);

Expand Down Expand Up @@ -140,6 +144,10 @@ auto punycode_encode(

auto punycode_decode(
std::string_view input) -> tl::expected<std::string, domain_errc> {
if (input.empty()) {
return tl::make_unexpected(domain_errc::empty_string);
}

auto result = std::u32string();
result.reserve(256);

Expand Down Expand Up @@ -206,8 +214,25 @@ auto punycode_decode(
return u8_result.value();
}

auto punycode_decode(
std::u32string_view input) -> tl::expected<std::u32string, domain_errc> {
if (input.empty()) {
return tl::make_unexpected(domain_errc::empty_string);
}

auto u8input = unicode::as<std::string>(input | unicode::transforms::to_u8).value();
return punycode_decode(std::string_view(u8input))
.and_then([] (auto &&output) -> tl::expected<std::u32string, domain_errc> {
return unicode::as<std::u32string>(unicode::views::as_u8(output) | unicode::transforms::to_u32).value();
});
}

auto punycode_encode(
std::string_view input) -> tl::expected<std::string, domain_errc> {
if (input.empty()) {
return tl::make_unexpected(domain_errc::empty_string);
}

auto utf32 = unicode::as<std::u32string>(unicode::views::as_u8(input) | unicode::transforms::to_u32);
if (!utf32) {
return tl::make_unexpected(domain_errc::bad_input);
Expand Down
8 changes: 8 additions & 0 deletions src/v1/domain/punycode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ auto punycode_encode(
/// \returns The decoded UTF-8 domain, or an error
auto punycode_decode(
std::string_view input) -> tl::expected<std::string, domain_errc>;

/// Performs Punycode decoding based on a reference implementation
/// defined in [RFC 3492](https://tools.ietf.org/html/rfc3492)
///
/// \param input An ASCII encoded domain to be decoded
/// \returns The decoded UTF-8 domain, or an error
auto punycode_decode(
std::u32string_view input) -> tl::expected<std::u32string, domain_errc>;
} // namespace v1
} // namespace skyr

Expand Down
3 changes: 2 additions & 1 deletion tests/domain/domain_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018-19 Glyn Matthews.
// Copyright 2018-20 Glyn Matthews.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt of copy at
// http://www.boost.org/LICENSE_1_0.txt)
Expand All @@ -25,6 +25,7 @@ TEST_CASE("valid domains to ascii", "[domain]") {
SECTION("domain_to_ascii_tests") {
const auto &[input, expected] = domain;
auto instance = skyr::domain_to_ascii(input);
INFO("input = " << input << ", expected = " << expected);
REQUIRE(instance);
CHECK(expected == instance.value());
}
Expand Down