@@ -73,36 +73,84 @@ auto map_code_points(
73
73
return result;
74
74
}
75
75
76
- auto unicode_to_ascii (
77
- std::u32string_view domain_name, bool check_hyphens, [[maybe_unused]] bool check_bidi,
78
- bool check_joiners, bool use_std3_ascii_rules, bool transitional_processing,
79
- bool verify_dns_length) -> tl::expected<std::string, domain_errc> {
80
- constexpr static auto is_contextj = [] (auto cp) {
81
- return (cp == U' \x20 0c' ) || (cp == U' \x20 0d' );
82
- };
83
-
84
- for (auto label : split (std::u32string_view (domain_name), U" \x002e\xff0e\x3002\0 xff61" )) {
85
- if (check_hyphens) {
86
- if ((label.size () >= 4 ) && (label.substr (2 , 4 ) == U" --" )) {
87
- return tl::make_unexpected (domain_errc::bad_input);
88
- }
76
+ auto validate_label (std::u32string_view label, [[maybe_unused]] bool use_std3_ascii_rules, bool check_hyphens,
77
+ [[maybe_unused]] bool check_bidi, bool check_joiners, [[maybe_unused]] bool transitional_processing)
78
+ -> tl::expected<void, domain_errc> {
79
+ // / https://www.unicode.org/reports/tr46/#Validity_Criteria
89
80
90
- if ((label.front () == U' -' ) || (label.back () == U' -' )) {
91
- return tl::make_unexpected (domain_errc::bad_input);
92
- }
81
+ auto first = begin (label), last = end (label);
82
+
83
+ if (check_hyphens) {
84
+ // / Criterion 2
85
+ if ((label.size () >= 4 ) && (label.substr (2 , 4 ) == U" --" )) {
86
+ return tl::make_unexpected (domain_errc::bad_input);
87
+ }
88
+
89
+ // / Criterion 3
90
+ if ((label.front () == U' -' ) || (label.back () == U' -' )) {
91
+ return tl::make_unexpected (domain_errc::bad_input);
92
+ }
93
+ }
94
+
95
+ if (check_joiners) {
96
+ // / Criterion 7
97
+ constexpr static auto is_not_contextj = [] (auto cp) {
98
+ return (cp == U' \x20 0c' ) || (cp == U' \x20 0d' );
99
+ };
100
+
101
+ auto it = std::find_if (first, last, is_not_contextj);
102
+ if (it != last) {
103
+ return tl::make_unexpected (domain_errc::bad_input);
93
104
}
105
+ }
106
+
107
+ return {};
108
+ }
109
+
110
+ auto idna_process (std::u32string_view domain_name, bool use_std3_ascii_rules, bool check_hyphens,
111
+ bool check_bidi, bool check_joiners, bool transitional_processing)
112
+ -> tl::expected<std::u32string, domain_errc> {
113
+ using namespace std ::string_view_literals;
114
+
115
+ auto result = map_code_points (domain_name, use_std3_ascii_rules, transitional_processing);
116
+ if (result) {
117
+ for (auto label : split (std::u32string_view (result.value ()), U" ." sv)) {
118
+ if ((label.size () >= 4 ) && (label.substr (0 , 4 ) == U" xn--" )) {
119
+ auto decoded = punycode_decode (label.substr (4 ));
120
+ if (!decoded) {
121
+ return tl::make_unexpected (decoded.error ());
122
+ }
94
123
95
- if (check_joiners) {
96
- auto first = begin (label), last = end (label);
97
- auto it = std::find_if (first, last, is_contextj);
98
- if (it != last) {
99
- return tl::make_unexpected (domain_errc::bad_input);
124
+ auto validated =
125
+ validate_label (decoded.value (), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, false );
126
+ if (!validated) {
127
+ return tl::make_unexpected (validated.error ());
128
+ }
129
+ } else {
130
+ auto validated = validate_label (label, use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners,
131
+ transitional_processing);
132
+ if (!validated) {
133
+ return tl::make_unexpected (validated.error ());
134
+ }
100
135
}
101
136
}
102
137
}
138
+ return result;
139
+ }
140
+
141
+ auto domain_to_ascii (
142
+ std::string_view domain_name, bool check_hyphens, bool check_bidi,
143
+ bool check_joiners, bool use_std3_ascii_rules, bool transitional_processing,
144
+ bool verify_dns_length) -> tl::expected<std::string, domain_errc> {
145
+ // / https://www.unicode.org/reports/tr46/#ToASCII
103
146
104
- auto domain = map_code_points (domain_name, use_std3_ascii_rules, transitional_processing);
147
+ auto utf32 = unicode::as<std::u32string>(unicode::views::as_u8 (domain_name) | unicode::transforms::to_u32);
148
+ if (!utf32) {
149
+ return tl::make_unexpected (domain_errc::encoding_error);
150
+ }
105
151
152
+ auto domain = idna_process (
153
+ utf32.value (), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, transitional_processing);
106
154
if (!domain) {
107
155
return tl::make_unexpected (domain.error ());
108
156
}
@@ -137,38 +185,33 @@ auto unicode_to_ascii(
137
185
138
186
return join (labels, ' .' );
139
187
}
188
+ } // namespace
140
189
141
190
auto domain_to_ascii (
142
- std::u32string_view domain, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
143
- auto result =
144
- unicode_to_ascii (domain, false , true , true , be_strict, false , be_strict);
191
+ std::string_view domain_name, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
192
+ auto result = domain_to_ascii (domain_name, false , true , true , be_strict, false , be_strict);
145
193
if (!result) {
146
194
*validation_error |= true ;
147
195
return tl::make_unexpected (result.error ());
148
196
}
149
- return result;
150
- }
151
- } // namespace
152
-
153
- auto domain_to_ascii (
154
- std::string_view domain, bool be_strict, bool *validation_error) -> tl::expected<std::string, domain_errc> {
155
- auto utf32 = unicode::as<std::u32string>(unicode::views::as_u8 (domain) | unicode::transforms::to_u32);
156
- if (!utf32) {
157
- return tl::make_unexpected (domain_errc::encoding_error);
197
+ else if (result.value ().empty ()) {
198
+ *validation_error |= true ;
199
+ return tl::make_unexpected (domain_errc::empty_string);
158
200
}
159
- return domain_to_ascii (utf32. value (), be_strict, validation_error) ;
201
+ return result ;
160
202
}
161
203
162
- auto domain_to_u8 (std::string_view ascii) -> tl::expected<std::string, domain_errc> {
204
+ auto domain_to_u8 (std::string_view domain_name, [[maybe_unused]] bool *validation_error)
205
+ -> tl::expected<std::string, domain_errc> {
163
206
auto labels = std::vector<std::string>{};
164
- for (auto label : split (ascii , " ." )) {
207
+ for (auto label : split (domain_name , " ." )) {
165
208
if (label.substr (0 , 4 ) == " xn--" ) {
166
209
label.remove_prefix (4 );
167
- auto encoded = punycode_decode (label);
168
- if (!encoded ) {
169
- return tl::make_unexpected (encoded .error ());
210
+ auto decoded = punycode_decode (label);
211
+ if (!decoded ) {
212
+ return tl::make_unexpected (decoded .error ());
170
213
}
171
- labels.emplace_back (encoded .value ());
214
+ labels.emplace_back (decoded .value ());
172
215
}
173
216
else {
174
217
labels.emplace_back (begin (label), end (label));
0 commit comments