Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 133e269

Browse files
authored
Domain name processing (#122)
* Continued updating domain processing functions * Added check in Punycode encoding/decoding for empty strings * Disabled some code because UTs are failing inconsistently on CI * Fixed error in .travis.yml
1 parent 46cc449 commit 133e269

File tree

8 files changed

+150
-62
lines changed

8 files changed

+150
-62
lines changed

.travis.yml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,10 @@ before_script:
121121
- cmake
122122
-B_build
123123
-GNinja
124-
-DSkyr_WARNINGS_AS_ERRORS=OFF
125-
-DSkyr_BUILD_TESTS=ON
126-
-DSkyr_BUILD_WPT_TESTS=ON
127-
-DSkyr_BUILD_DOCS=OFF
128-
-DSkyr_BUILD_EXAMPLES=OFF
124+
-Dskyr_WARNINGS_AS_ERRORS=OFF
125+
-Dskyr_BUILD_TESTS=ON
126+
-Dskyr_BUILD_DOCS=OFF
127+
-Dskyr_BUILD_EXAMPLES=OFF
129128
-DCMAKE_BUILD_TYPE=${BUILD_TYPE}
130129
-DBUILD_SHARED_LIBS=${SHARED_LIB}
131130
-DCMAKE_TOOLCHAIN_FILE=${DEPS_DIR}/vcpkg/scripts/buildsystems/vcpkg.cmake

include/skyr/v1/domain/domain.hpp

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,53 +16,62 @@ inline namespace v1 {
1616
/// Converts a UTF-8 encoded domain to ASCII using
1717
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
1818
///
19-
/// \param domain A domain
19+
/// \param domain_name A domain
2020
/// \param be_strict Tells the processor to be strict
2121
/// \param validation_error
2222
/// \returns An ASCII domain, or an error
2323
auto domain_to_ascii(
24-
std::string_view domain,
24+
std::string_view domain_name,
2525
bool be_strict,
2626
bool *validation_error) -> tl::expected<std::string, domain_errc>;
2727

2828
/// Converts a UTF-8 encoded domain to ASCII using
2929
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
3030
///
31-
/// \param domain A domain
31+
/// \param domain_name A domain
3232
/// \param be_strict Tells the processor to be strict
3333
/// \returns An ASCII domain, or an error
3434
inline auto domain_to_ascii(
35-
std::string_view domain,
35+
std::string_view domain_name,
3636
bool be_strict) -> tl::expected<std::string, domain_errc> {
3737
[[maybe_unused]] bool validation_error = false;
38-
return domain_to_ascii(domain, be_strict, &validation_error);
38+
return domain_to_ascii(domain_name, be_strict, &validation_error);
3939
}
4040

4141
/// Converts a UTF-8 encoded domain to ASCII using
4242
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
4343
///
44-
/// \param domain A domain
44+
/// \param domain_name A domain
4545
/// \param validation_error
4646
/// \returns An ASCII domain, or an error
47-
inline auto domain_to_ascii(std::string_view domain, bool *validation_error) {
48-
return domain_to_ascii(domain, false, validation_error);
47+
inline auto domain_to_ascii(std::string_view domain_name, bool *validation_error) {
48+
return domain_to_ascii(domain_name, false, validation_error);
4949
}
5050

5151
/// Converts a UTF-8 encoded domain to ASCII using
5252
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
5353
///
54-
/// \param domain A domain
54+
/// \param domain_name A domain
5555
/// \returns An ASCII domain, or an error
56-
inline auto domain_to_ascii(std::string_view domain) {
56+
inline auto domain_to_ascii(std::string_view domain_name) {
5757
[[maybe_unused]] bool validation_error = false;
58-
return domain_to_ascii(domain, false, &validation_error);
58+
return domain_to_ascii(domain_name, false, &validation_error);
5959
}
6060

6161
/// Converts a Punycode encoded domain to UTF-8
6262
///
63-
/// \param ascii A Punycode encoded domain
63+
/// \param domain_name A Punycode encoded domain
6464
/// \returns A valid UTF-8 encoded domain, or an error
65-
auto domain_to_u8(std::string_view ascii) -> tl::expected<std::string, domain_errc>;
65+
auto domain_to_u8(std::string_view domain_name, bool *validation_error) -> tl::expected<std::string, domain_errc>;
66+
67+
/// Converts a Punycode encoded domain to UTF-8
68+
///
69+
/// \param domain_name A Punycode encoded domain
70+
/// \returns A valid UTF-8 encoded domain, or an error
71+
inline auto domain_to_u8(std::string_view domain_name) -> tl::expected<std::string, domain_errc> {
72+
[[maybe_unused]] bool validation_error = false;
73+
return domain_to_u8(domain_name, &validation_error);
74+
}
6675
} // namespace v1
6776
} // namespace skyr
6877

include/skyr/v1/domain/errors.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ enum class domain_errc {
2121
encoding_error,
2222
/// Invalid domain name length
2323
invalid_length,
24+
/// Empty domain
25+
empty_string,
2426
};
2527
} // namespace v1
2628
} // namespace skyr

include/skyr/v1/string/starts_with.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111

1212
namespace skyr {
1313
inline namespace v1 {
14+
template <class charT>
1415
inline auto starts_with(
15-
std::string_view input,
16-
std::string_view chars) noexcept {
16+
std::basic_string_view<charT> input,
17+
decltype(input) chars) noexcept {
1718
return (input.size() >= chars.size()) && (input.substr(0, chars.size()) == chars);
1819
}
1920
} // namespace v1

src/v1/domain/domain.cpp

Lines changed: 84 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -73,36 +73,84 @@ auto map_code_points(
7373
return result;
7474
}
7575

76-
auto unicode_to_ascii(
77-
std::u32string_view domain_name, bool check_hyphens, [[maybe_unused]] bool check_bidi,
78-
bool check_joiners, bool use_std3_ascii_rules, bool transitional_processing,
79-
bool verify_dns_length) -> tl::expected<std::string, domain_errc> {
80-
constexpr static auto is_contextj = [] (auto cp) {
81-
return (cp == U'\x200c') || (cp == U'\x200d');
82-
};
83-
84-
for (auto label : split(std::u32string_view(domain_name), U"\x002e\xff0e\x3002\0xff61")) {
85-
if (check_hyphens) {
86-
if ((label.size() >= 4) && (label.substr(2, 4) == U"--")) {
87-
return tl::make_unexpected(domain_errc::bad_input);
88-
}
76+
auto validate_label(std::u32string_view label, [[maybe_unused]] bool use_std3_ascii_rules, bool check_hyphens,
77+
[[maybe_unused]] bool check_bidi, bool check_joiners, [[maybe_unused]] bool transitional_processing)
78+
-> tl::expected<void, domain_errc> {
79+
/// https://www.unicode.org/reports/tr46/#Validity_Criteria
8980

90-
if ((label.front() == U'-') || (label.back() == U'-')) {
91-
return tl::make_unexpected(domain_errc::bad_input);
92-
}
81+
auto first = begin(label), last = end(label);
82+
83+
if (check_hyphens) {
84+
/// Criterion 2
85+
if ((label.size() >= 4) && (label.substr(2, 4) == U"--")) {
86+
return tl::make_unexpected(domain_errc::bad_input);
87+
}
88+
89+
/// Criterion 3
90+
if ((label.front() == U'-') || (label.back() == U'-')) {
91+
return tl::make_unexpected(domain_errc::bad_input);
92+
}
93+
}
94+
95+
if (check_joiners) {
96+
/// Criterion 7
97+
constexpr static auto is_not_contextj = [] (auto cp) {
98+
return (cp == U'\x200c') || (cp == U'\x200d');
99+
};
100+
101+
auto it = std::find_if(first, last, is_not_contextj);
102+
if (it != last) {
103+
return tl::make_unexpected(domain_errc::bad_input);
93104
}
105+
}
106+
107+
return {};
108+
}
109+
110+
auto idna_process(std::u32string_view domain_name, bool use_std3_ascii_rules, bool check_hyphens,
111+
bool check_bidi, bool check_joiners, bool transitional_processing)
112+
-> tl::expected<std::u32string, domain_errc> {
113+
using namespace std::string_view_literals;
114+
115+
auto result = map_code_points(domain_name, use_std3_ascii_rules, transitional_processing);
116+
if (result) {
117+
for (auto label : split(std::u32string_view(result.value()), U"."sv)) {
118+
if ((label.size() >= 4) && (label.substr(0, 4) == U"xn--")) {
119+
auto decoded = punycode_decode(label.substr(4));
120+
if (!decoded) {
121+
return tl::make_unexpected(decoded.error());
122+
}
94123

95-
if (check_joiners) {
96-
auto first = begin(label), last = end(label);
97-
auto it = std::find_if(first, last, is_contextj);
98-
if (it != last) {
99-
return tl::make_unexpected(domain_errc::bad_input);
124+
auto validated =
125+
validate_label(decoded.value(), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, false);
126+
if (!validated) {
127+
return tl::make_unexpected(validated.error());
128+
}
129+
} else {
130+
auto validated = validate_label(label, use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners,
131+
transitional_processing);
132+
if (!validated) {
133+
return tl::make_unexpected(validated.error());
134+
}
100135
}
101136
}
102137
}
138+
return result;
139+
}
140+
141+
auto domain_to_ascii(
142+
std::string_view domain_name, bool check_hyphens, bool check_bidi,
143+
bool check_joiners, bool use_std3_ascii_rules, bool transitional_processing,
144+
bool verify_dns_length) -> tl::expected<std::string, domain_errc> {
145+
/// https://www.unicode.org/reports/tr46/#ToASCII
103146

104-
auto domain = map_code_points(domain_name, use_std3_ascii_rules, transitional_processing);
147+
auto utf32 = unicode::as<std::u32string>(unicode::views::as_u8(domain_name) | unicode::transforms::to_u32);
148+
if (!utf32) {
149+
return tl::make_unexpected(domain_errc::encoding_error);
150+
}
105151

152+
auto domain = idna_process(
153+
utf32.value(), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, transitional_processing);
106154
if (!domain) {
107155
return tl::make_unexpected(domain.error());
108156
}
@@ -137,38 +185,33 @@ auto unicode_to_ascii(
137185

138186
return join(labels, '.');
139187
}
188+
} // namespace
140189

141190
auto domain_to_ascii(
142-
std::u32string_view domain, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
143-
auto result =
144-
unicode_to_ascii(domain, false, true, true, be_strict, false, be_strict);
191+
std::string_view domain_name, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
192+
auto result = domain_to_ascii(domain_name, false, true, true, be_strict, false, be_strict);
145193
if (!result) {
146194
*validation_error |= true;
147195
return tl::make_unexpected(result.error());
148196
}
149-
return result;
150-
}
151-
} // namespace
152-
153-
auto domain_to_ascii(
154-
std::string_view domain, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
155-
auto utf32 = unicode::as<std::u32string>(unicode::views::as_u8(domain) | unicode::transforms::to_u32);
156-
if (!utf32) {
157-
return tl::make_unexpected(domain_errc::encoding_error);
197+
else if (result.value().empty()) {
198+
*validation_error |= true;
199+
return tl::make_unexpected(domain_errc::empty_string);
158200
}
159-
return domain_to_ascii(utf32.value(), be_strict, validation_error);
201+
return result;
160202
}
161203

162-
auto domain_to_u8(std::string_view ascii) -> tl::expected<std::string, domain_errc> {
204+
auto domain_to_u8(std::string_view domain_name, [[maybe_unused]] bool *validation_error)
205+
-> tl::expected<std::string, domain_errc> {
163206
auto labels = std::vector<std::string>{};
164-
for (auto label : split(ascii, ".")) {
207+
for (auto label : split(domain_name, ".")) {
165208
if (label.substr(0, 4) == "xn--") {
166209
label.remove_prefix(4);
167-
auto encoded = punycode_decode(label);
168-
if (!encoded) {
169-
return tl::make_unexpected(encoded.error());
210+
auto decoded = punycode_decode(label);
211+
if (!decoded) {
212+
return tl::make_unexpected(decoded.error());
170213
}
171-
labels.emplace_back(encoded.value());
214+
labels.emplace_back(decoded.value());
172215
}
173216
else {
174217
labels.emplace_back(begin(label), end(label));

src/v1/domain/punycode.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ auto adapt(uint32_t delta, uint32_t numpoints, bool firsttime) {
7171

7272
auto punycode_encode(
7373
std::u32string_view input) -> tl::expected<std::string, domain_errc> {
74+
if (input.empty()) {
75+
return tl::make_unexpected(domain_errc::empty_string);
76+
}
77+
7478
auto result = std::string{};
7579
result.reserve(256);
7680

@@ -140,6 +144,10 @@ auto punycode_encode(
140144

141145
auto punycode_decode(
142146
std::string_view input) -> tl::expected<std::string, domain_errc> {
147+
if (input.empty()) {
148+
return tl::make_unexpected(domain_errc::empty_string);
149+
}
150+
143151
auto result = std::u32string();
144152
result.reserve(256);
145153

@@ -206,8 +214,25 @@ auto punycode_decode(
206214
return u8_result.value();
207215
}
208216

217+
auto punycode_decode(
218+
std::u32string_view input) -> tl::expected<std::u32string, domain_errc> {
219+
if (input.empty()) {
220+
return tl::make_unexpected(domain_errc::empty_string);
221+
}
222+
223+
auto u8input = unicode::as<std::string>(input | unicode::transforms::to_u8).value();
224+
return punycode_decode(std::string_view(u8input))
225+
.and_then([] (auto &&output) -> tl::expected<std::u32string, domain_errc> {
226+
return unicode::as<std::u32string>(unicode::views::as_u8(output) | unicode::transforms::to_u32).value();
227+
});
228+
}
229+
209230
auto punycode_encode(
210231
std::string_view input) -> tl::expected<std::string, domain_errc> {
232+
if (input.empty()) {
233+
return tl::make_unexpected(domain_errc::empty_string);
234+
}
235+
211236
auto utf32 = unicode::as<std::u32string>(unicode::views::as_u8(input) | unicode::transforms::to_u32);
212237
if (!utf32) {
213238
return tl::make_unexpected(domain_errc::bad_input);

src/v1/domain/punycode.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ auto punycode_encode(
4444
/// \returns The decoded UTF-8 domain, or an error
4545
auto punycode_decode(
4646
std::string_view input) -> tl::expected<std::string, domain_errc>;
47+
48+
/// Performs Punycode decoding based on a reference implementation
49+
/// defined in [RFC 3492](https://tools.ietf.org/html/rfc3492)
50+
///
51+
/// \param input An ASCII encoded domain to be decoded
52+
/// \returns The decoded UTF-8 domain, or an error
53+
auto punycode_decode(
54+
std::u32string_view input) -> tl::expected<std::u32string, domain_errc>;
4755
} // namespace v1
4856
} // namespace skyr
4957

tests/domain/domain_tests.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018-19 Glyn Matthews.
1+
// Copyright 2018-20 Glyn Matthews.
22
// Distributed under the Boost Software License, Version 1.0.
33
// (See accompanying file LICENSE_1_0.txt of copy at
44
// http://www.boost.org/LICENSE_1_0.txt)
@@ -25,6 +25,7 @@ TEST_CASE("valid domains to ascii", "[domain]") {
2525
SECTION("domain_to_ascii_tests") {
2626
const auto &[input, expected] = domain;
2727
auto instance = skyr::domain_to_ascii(input);
28+
INFO("input = " << input << ", expected = " << expected);
2829
REQUIRE(instance);
2930
CHECK(expected == instance.value());
3031
}

0 commit comments

Comments
 (0)