diff --git a/include/skyr/v1/percent_encoding/percent_encode.hpp b/include/skyr/v1/percent_encoding/percent_encode.hpp index 4705615e..7056190c 100644 --- a/include/skyr/v1/percent_encoding/percent_encode.hpp +++ b/include/skyr/v1/percent_encoding/percent_encode.hpp @@ -14,21 +14,11 @@ namespace skyr { inline namespace v1 { /// Percent encodes the input -/// \returns The percent encoded output when successful, an error otherwise. -inline auto percent_encode(std::string_view input) { - using percent_encoding::percent_encoded_char; - +/// \returns The percent encoded output. +inline auto percent_encode(std::string_view input) -> std::string { static constexpr auto encode = [] (auto byte) { - if ((byte == '\x2a') || (byte == '\x2d') || (byte == '\x2e') || - ((byte >= '\x30') && (byte <= '\x39')) || - ((byte >= '\x41') && (byte <= '\x5a')) || (byte == '\x5f') || - ((byte >= '\x61') && (byte <= '\x7a'))) { - return percent_encoded_char( - std::byte(byte), percent_encoded_char::no_encode()); - } else if (byte == '\x20') { - return percent_encoded_char(std::byte('+'), percent_encoded_char::no_encode()); - } - return percent_encoded_char(std::byte(byte)); + using percent_encoding::percent_encode_byte; + return percent_encode_byte(std::byte(byte), percent_encoding::encode_set::component); }; auto result = std::string{}; diff --git a/include/skyr/v1/percent_encoding/percent_encoded_char.hpp b/include/skyr/v1/percent_encoding/percent_encoded_char.hpp index 3dce8ffd..8aa6da10 100644 --- a/include/skyr/v1/percent_encoding/percent_encoded_char.hpp +++ b/include/skyr/v1/percent_encoding/percent_encoded_char.hpp @@ -52,11 +52,33 @@ inline constexpr auto is_fragment_byte(std::byte value) { /// /// \param value /// \return -inline constexpr auto is_path_byte(std::byte value) { +inline constexpr auto is_query_byte(std::byte value) { return - is_fragment_byte(value) || + is_c0_control_byte(value) || + (value == std::byte(0x20)) || + (value == std::byte(0x22)) || (value == std::byte(0x23)) || + (value == std::byte(0x3c)) || + (value == std::byte(0x3e)); +} + +/// +/// \param value +/// \return +inline constexpr auto is_special_query_byte(std::byte value) { + return + is_query_byte(value) || + (value == std::byte(0x27)); +} + +/// +/// \param value +/// \return +inline constexpr auto is_path_byte(std::byte value) { + return + is_query_byte(value) || (value == std::byte(0x3f)) || + (value == std::byte(0x60)) || (value == std::byte(0x7b)) || (value == std::byte(0x7d)); } @@ -78,20 +100,39 @@ inline constexpr auto is_userinfo_byte(std::byte value) { (value == std::byte(0x5e)) || (value == std::byte(0x7c)); } + +/// +/// \param value +/// \return +inline constexpr auto is_component_byte(std::byte value) { + return + is_userinfo_byte(value) || + (value == std::byte(0x24)) || + (value == std::byte(0x25)) || + (value == std::byte(0x26)) || + (value == std::byte(0x2b)) || + (value == std::byte(0x2c)); +} } // namespace details /// enum class encode_set { /// - none = 0, + any = 0, /// c0_control, /// fragment, /// + query, + /// + special_query, + /// path, /// userinfo, + /// + component, }; /// @@ -199,18 +240,24 @@ inline auto percent_encode_byte(std::byte byte, Pred pred) -> percent_encoded_ch /// /// \param value -/// \param excludes +/// \param encodes /// \return -inline auto percent_encode_byte(std::byte value, encode_set excludes) -> percent_encoded_char { - switch (excludes) { - case encode_set::none: +inline auto percent_encode_byte(std::byte value, encode_set encodes) -> percent_encoded_char { + switch (encodes) { + case encode_set::any: return percent_encoding::percent_encoded_char(value); case encode_set::c0_control: return percent_encode_byte(value, details::is_c0_control_byte); + case encode_set::component: + return percent_encode_byte(value, details::is_component_byte); case encode_set::userinfo: return percent_encode_byte(value, details::is_userinfo_byte); case encode_set::path: return percent_encode_byte(value, details::is_path_byte); + case encode_set::special_query: + return percent_encode_byte(value, details::is_special_query_byte); + case encode_set::query: + return percent_encode_byte(value, details::is_query_byte); case encode_set::fragment: return percent_encode_byte(value, details::is_fragment_byte); } diff --git a/include/skyr/v2/core/parse_path.hpp b/include/skyr/v2/core/parse_path.hpp index 4278a783..0abf82c1 100644 --- a/include/skyr/v2/core/parse_path.hpp +++ b/include/skyr/v2/core/parse_path.hpp @@ -19,7 +19,7 @@ inline auto parse_path( std::string_view path, bool *validation_error) -> tl::expected, url_parse_errc> { auto url = details::basic_parse(path, validation_error, nullptr, nullptr, url_parse_state::path_start); if (url) { - return url.value().path; + return std::move(url.value()).path; } return tl::make_unexpected(url.error()); } diff --git a/include/skyr/v2/core/parse_query.hpp b/include/skyr/v2/core/parse_query.hpp index 18f7f019..41ac257b 100644 --- a/include/skyr/v2/core/parse_query.hpp +++ b/include/skyr/v2/core/parse_query.hpp @@ -11,7 +11,6 @@ #include #include #include -#include namespace skyr::inline v2 { /// @@ -46,17 +45,17 @@ inline auto parse_query( if (url) { static constexpr auto is_separator = [](auto c) { return c == '&' || c == ';'; }; - static constexpr auto to_nvp = [](auto &¶m) -> query_parameter { - if (ranges::empty(param)) { + static constexpr auto to_nvp = [](auto &¶meter) -> query_parameter { + if (ranges::empty(parameter)) { return {}; } - auto element = std::string_view(std::addressof(*std::begin(param)), ranges::distance(param)); - auto delim = element.find_first_of('='); + auto view = std::string_view(std::addressof(*std::begin(parameter)), ranges::distance(parameter)); + auto delim = view.find_first_of('='); if (delim != std::string_view::npos) { - return {std::string(element.substr(0, delim)), std::string(element.substr(delim + 1))}; + return {std::string(view.substr(0, delim)), std::string(view.substr(delim + 1))}; } else { - return {std::string(element)}; + return {std::string(view)}; } }; diff --git a/include/skyr/v2/core/url_parser_context.hpp b/include/skyr/v2/core/url_parser_context.hpp index e9746392..0a93eea6 100644 --- a/include/skyr/v2/core/url_parser_context.hpp +++ b/include/skyr/v2/core/url_parser_context.hpp @@ -889,7 +889,7 @@ class url_parser_context { if (!url.query) { set_empty_query(); } - auto pct_encoded = percent_encode_byte(std::byte(byte), percent_encoding::encode_set::none); + auto pct_encoded = percent_encode_byte(std::byte(byte), percent_encoding::encode_set::any); url.query.value() += std::move(pct_encoded).to_string(); } diff --git a/include/skyr/v2/domain/domain.hpp b/include/skyr/v2/domain/domain.hpp index ef9a93f5..e827606d 100644 --- a/include/skyr/v2/domain/domain.hpp +++ b/include/skyr/v2/domain/domain.hpp @@ -141,7 +141,7 @@ inline auto domain_to_ascii_impl(domain_to_ascii_context &&context) -> tl::expec } }; - constexpr auto process_labels = [](auto &&ctx) -> tl::expected { + constexpr auto process_labels = [](auto &&ctx) -> tl::expected, domain_errc> { using namespace std::string_view_literals; constexpr auto to_string_view = [](auto &&label) { @@ -198,7 +198,7 @@ inline auto domain_to_ascii_impl(domain_to_ascii_context &&context) -> tl::expec }; constexpr auto check_length = - [](domain_to_ascii_context &&ctx) -> tl::expected { + [](auto &&ctx) -> tl::expected, domain_errc> { constexpr auto max_domain_length = 253; constexpr auto max_label_length = 63; diff --git a/include/skyr/v2/percent_encoding/percent_encode.hpp b/include/skyr/v2/percent_encoding/percent_encode.hpp index 74da44b8..7ca5a65a 100644 --- a/include/skyr/v2/percent_encoding/percent_encode.hpp +++ b/include/skyr/v2/percent_encoding/percent_encode.hpp @@ -14,17 +14,10 @@ namespace skyr::inline v2 { /// Percent encodes the input /// \returns The percent encoded output when successful, an error otherwise. -inline auto percent_encode(std::string_view input) { - using percent_encoding::percent_encoded_char; - - static constexpr auto encode = [](auto byte) { - if ((byte == '\x2a') || (byte == '\x2d') || (byte == '\x2e') || ((byte >= '\x30') && (byte <= '\x39')) || - ((byte >= '\x41') && (byte <= '\x5a')) || (byte == '\x5f') || ((byte >= '\x61') && (byte <= '\x7a'))) { - return percent_encoded_char(std::byte(byte), percent_encoded_char::no_encode()); - } else if (byte == '\x20') { - return percent_encoded_char(std::byte('+'), percent_encoded_char::no_encode()); - } - return percent_encoded_char(std::byte(byte)); +inline auto percent_encode_bytes(std::string_view input, percent_encoding::encode_set encodes) -> std::string { + static auto encode = [&encodes] (auto byte) { + using percent_encoding::percent_encode_byte; + return percent_encode_byte(std::byte(byte), encodes); }; auto result = std::string{}; @@ -33,6 +26,10 @@ inline auto percent_encode(std::string_view input) { } return result; } + +inline auto percent_encode(std::string_view input) -> std::string { + return percent_encode_bytes(input, percent_encoding::encode_set::component); +} } // namespace skyr::inline v2 #endif // SKYR_V2_PERCENT_ENCODING_PERCENT_ENCODE_HPP diff --git a/include/skyr/v2/percent_encoding/percent_encoded_char.hpp b/include/skyr/v2/percent_encoding/percent_encoded_char.hpp index 19d87b7e..ce119387 100644 --- a/include/skyr/v2/percent_encoding/percent_encoded_char.hpp +++ b/include/skyr/v2/percent_encoding/percent_encoded_char.hpp @@ -1,4 +1,4 @@ -// Copyright 2019-20 Glyn Matthews. +// Copyright 2019-21 Glyn Matthews. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) @@ -10,7 +10,8 @@ #include #include -namespace skyr::inline v2 { +namespace skyr { +inline namespace v2 { namespace percent_encoding { namespace details { /// @@ -39,50 +40,110 @@ inline constexpr auto is_c0_control_byte(std::byte value) noexcept { /// \param value /// \return inline constexpr auto is_fragment_byte(std::byte value) { - return is_c0_control_byte(value) || (value == std::byte(0x20)) || (value == std::byte(0x22)) || - (value == std::byte(0x3c)) || (value == std::byte(0x3e)) || (value == std::byte(0x60)); + return + is_c0_control_byte(value) || + (value == std::byte(0x20)) || + (value == std::byte(0x22)) || + (value == std::byte(0x3c)) || + (value == std::byte(0x3e)) || + (value == std::byte(0x60)); +} + +/// +/// \param value +/// \return +inline constexpr auto is_query_byte(std::byte value) { + return + is_c0_control_byte(value) || + (value == std::byte(0x20)) || + (value == std::byte(0x22)) || + (value == std::byte(0x23)) || + (value == std::byte(0x3c)) || + (value == std::byte(0x3e)); +} + +/// +/// \param value +/// \return +inline constexpr auto is_special_query_byte(std::byte value) { + return + is_query_byte(value) || + (value == std::byte(0x27)); } /// /// \param value /// \return inline constexpr auto is_path_byte(std::byte value) { - return is_fragment_byte(value) || (value == std::byte(0x23)) || (value == std::byte(0x3f)) || - (value == std::byte(0x7b)) || (value == std::byte(0x7d)); + return + is_query_byte(value) || + (value == std::byte(0x3f)) || + (value == std::byte(0x60)) || + (value == std::byte(0x7b)) || + (value == std::byte(0x7d)); } /// /// \param value /// \return inline constexpr auto is_userinfo_byte(std::byte value) { - return is_path_byte(value) || (value == std::byte(0x2f)) || (value == std::byte(0x3a)) || - (value == std::byte(0x3b)) || (value == std::byte(0x3d)) || (value == std::byte(0x40)) || - (value == std::byte(0x5b)) || (value == std::byte(0x5c)) || (value == std::byte(0x5d)) || - (value == std::byte(0x5e)) || (value == std::byte(0x7c)); + return + is_path_byte(value) || + (value == std::byte(0x2f)) || + (value == std::byte(0x3a)) || + (value == std::byte(0x3b)) || + (value == std::byte(0x3d)) || + (value == std::byte(0x40)) || + (value == std::byte(0x5b)) || + (value == std::byte(0x5c)) || + (value == std::byte(0x5d)) || + (value == std::byte(0x5e)) || + (value == std::byte(0x7c)); +} + +/// +/// \param value +/// \return +inline constexpr auto is_component_byte(std::byte value) { + return + is_userinfo_byte(value) || + (value == std::byte(0x24)) || + (value == std::byte(0x25)) || + (value == std::byte(0x26)) || + (value == std::byte(0x2b)) || + (value == std::byte(0x2c)); } } // namespace details /// enum class encode_set { /// - none = 0, + any = 0, /// c0_control, /// fragment, /// + query, + /// + special_query, + /// path, /// userinfo, + /// + component, }; /// struct percent_encoded_char { + using impl_type = std::string; static constexpr std::byte mask = std::byte(0x0f); public: + /// using const_iterator = impl_type::const_iterator; /// @@ -100,14 +161,14 @@ struct percent_encoded_char { /// /// \param value - percent_encoded_char(std::byte value, no_encode) : impl_{static_cast(value)} { - } + percent_encoded_char(std::byte value, no_encode) + : impl_{static_cast(value)} {} /// /// \param value explicit percent_encoded_char(std::byte value) - : impl_{'%', details::hex_to_alnum((value >> 4u) & mask), details::hex_to_alnum(value & mask)} { - } + : impl_{ + '%', details::hex_to_alnum((value >> 4u) & mask), details::hex_to_alnum(value & mask)} {} /// /// \return @@ -147,18 +208,20 @@ struct percent_encoded_char { /// /// \return - [[nodiscard]] auto to_string() const& -> std::string { + [[nodiscard]] auto to_string() const & -> std::string { return impl_; } /// /// \return - [[nodiscard]] auto to_string() && noexcept -> std::string&& { + [[nodiscard]] auto to_string() && noexcept -> std::string && { return std::move(impl_); } private: + impl_type impl_; + }; /// @@ -171,23 +234,30 @@ inline auto percent_encode_byte(std::byte byte, Pred pred) -> percent_encoded_ch if (pred(byte)) { return percent_encoding::percent_encoded_char(byte); } - return percent_encoding::percent_encoded_char(byte, percent_encoding::percent_encoded_char::no_encode()); + return percent_encoding::percent_encoded_char( + byte, percent_encoding::percent_encoded_char::no_encode()); } /// /// \param value -/// \param excludes +/// \param encodes /// \return -inline auto percent_encode_byte(std::byte value, encode_set excludes) -> percent_encoded_char { - switch (excludes) { - case encode_set::none: +inline auto percent_encode_byte(std::byte value, encode_set encodes) -> percent_encoded_char { + switch (encodes) { + case encode_set::any: return percent_encoding::percent_encoded_char(value); case encode_set::c0_control: return percent_encode_byte(value, details::is_c0_control_byte); + case encode_set::component: + return percent_encode_byte(value, details::is_component_byte); case encode_set::userinfo: return percent_encode_byte(value, details::is_userinfo_byte); case encode_set::path: return percent_encode_byte(value, details::is_path_byte); + case encode_set::special_query: + return percent_encode_byte(value, details::is_special_query_byte); + case encode_set::query: + return percent_encode_byte(value, details::is_query_byte); case encode_set::fragment: return percent_encode_byte(value, details::is_fragment_byte); } @@ -198,11 +268,15 @@ inline auto percent_encode_byte(std::byte value, encode_set excludes) -> percent /// \param input An ASCII string /// \returns `true` if the input string contains percent encoded /// values, `false` otherwise -constexpr inline auto is_percent_encoded(std::string_view input) noexcept { - return (input.size() == 3) && (input[0] == '%') && std::isxdigit(input[1], std::locale::classic()) && - std::isxdigit(input[2], std::locale::classic()); +inline auto is_percent_encoded(std::string_view input) noexcept { + return + (input.size() == 3) && + (input[0] == '%') && + std::isxdigit(input[1], std::locale::classic()) && + std::isxdigit(input[2], std::locale::classic()); } } // namespace percent_encoding -} // namespace skyr::inline v2 +} // namespace v2 +} // namespace skyr -#endif // SKYR_V2_PERCENT_ENCODING_PERCENT_ENCODED_CHAR_HPP +#endif //SKYR_V2_PERCENT_ENCODING_PERCENT_ENCODED_CHAR_HPP diff --git a/include/skyr/v2/unicode/core.hpp b/include/skyr/v2/unicode/core.hpp index 3e060cdf..8392a37e 100644 --- a/include/skyr/v2/unicode/core.hpp +++ b/include/skyr/v2/unicode/core.hpp @@ -87,8 +87,7 @@ constexpr inline auto sequence_length(uint8_t lead_value) { /// \tparam OctetIterator template struct sequence_state { - constexpr sequence_state(OctetIterator it, char32_t value) : it(it), value(value) { - } + constexpr sequence_state(OctetIterator it, char32_t value) : it(it), value(value) {} /// The current iterator OctetIterator it; diff --git a/include/skyr/v2/unicode/ranges/transforms/u8_transform.hpp b/include/skyr/v2/unicode/ranges/transforms/u8_transform.hpp index c3ba4c0c..eedf2800 100644 --- a/include/skyr/v2/unicode/ranges/transforms/u8_transform.hpp +++ b/include/skyr/v2/unicode/ranges/transforms/u8_transform.hpp @@ -44,8 +44,7 @@ class u8_transform_iterator { /// /// \param first The first iterator in the code point sequence /// \param last The end iterator in the code point sequence - constexpr u8_transform_iterator(CodePointIterator first, Sentinel last) : it_(first), last_(last) { - } + constexpr u8_transform_iterator(CodePointIterator first, Sentinel last) : it_(first), last_(last) {} /// Pre-increment operator /// \return A reference to this iterator @@ -170,8 +169,7 @@ class transform_u8_range { /// Constructor /// \param range A range of code points - explicit constexpr transform_u8_range(const CodePointRange &range) : first_(std::cbegin(range), std::cend(range)) { - } + explicit constexpr transform_u8_range(const CodePointRange &range) : first_(std::cbegin(range), std::cend(range)) {} /// Returns an iterator to the first element in the code point sequence /// \return \c const_iterator diff --git a/src/v1/core/url_parser_context.cpp b/src/v1/core/url_parser_context.cpp index b46ae75b..ec7d30f5 100644 --- a/src/v1/core/url_parser_context.cpp +++ b/src/v1/core/url_parser_context.cpp @@ -697,7 +697,7 @@ auto url_parser_context::parse_query(char byte) -> tl::expected '~') || (contains(R"("#<>)"sv, byte)) || ((byte == '\'') && url.is_special())) { - auto pct_encoded = percent_encode_byte(std::byte(byte), percent_encoding::encode_set::none); + auto pct_encoded = percent_encode_byte(std::byte(byte), percent_encoding::encode_set::any); url.query.value() += pct_encoded.to_string(); } else { url.query.value().push_back(byte); diff --git a/tests/v1/percent_encoding/percent_encoding_tests.cpp b/tests/v1/percent_encoding/percent_encoding_tests.cpp index d3cb2823..2287b475 100644 --- a/tests/v1/percent_encoding/percent_encoding_tests.cpp +++ b/tests/v1/percent_encoding/percent_encoding_tests.cpp @@ -7,6 +7,7 @@ #include #define FMT_HEADER_ONLY #include +#include #include TEST_CASE("encode fragment", "[percent_encoding]") { @@ -61,4 +62,38 @@ TEST_CASE("encode_tests", "[percent_encoding]") { CHECK(fmt::format("%{:02X}", i) == encoded.to_string()); } } + + SECTION("encode_0x25") { + auto encoded = skyr::percent_encoding::percent_encode_byte( + std::byte(0x25), skyr::percent_encoding::encode_set::any); + CHECK("%25" == encoded.to_string()); + } + + SECTION("encode_0x2b") { + auto encoded = skyr::percent_encoding::percent_encode_byte( + std::byte(0x2b), skyr::percent_encoding::encode_set::any); + CHECK("%2B" == encoded.to_string()); + } } + +// https://url.spec.whatwg.org/#example-percent-encode-operations +TEST_CASE("example_percent_encode_operations") +{ + SECTION("0x23") + { + auto encoded = skyr::percent_encode(R"(#)"); + CHECK("%23" == encoded); + } + + SECTION("0x7f") + { + auto encoded = skyr::percent_encode("\x7f"); + CHECK("%7F" == encoded); + } + + SECTION("0x7f") + { + auto encoded = skyr::percent_encode("\x7f"); + CHECK("%7F" == encoded); + } +} \ No newline at end of file diff --git a/tests/v2/percent_encoding/percent_encoding_tests.cpp b/tests/v2/percent_encoding/percent_encoding_tests.cpp index 905a8db9..e3f23e64 100644 --- a/tests/v2/percent_encoding/percent_encoding_tests.cpp +++ b/tests/v2/percent_encoding/percent_encoding_tests.cpp @@ -61,4 +61,16 @@ TEST_CASE("encode_tests", "[percent_encoding]") { CHECK(fmt::format("%{:02X}", i) == encoded.to_string()); } } + + SECTION("encode_0x25") { + auto encoded = skyr::percent_encoding::percent_encode_byte( + std::byte(0x25), skyr::percent_encoding::encode_set::any); + CHECK("%25" == encoded.to_string()); + } + + SECTION("encode_0x2b") { + auto encoded = skyr::percent_encoding::percent_encode_byte( + std::byte(0x2b), skyr::percent_encoding::encode_set::any); + CHECK("%2B" == encoded.to_string()); + } }