From 72c91d195999ca270e7353a17e1ab470e50ca55f Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sun, 24 Jul 2022 22:45:47 +0800 Subject: [PATCH 01/46] optimize ColumnString Append --- clickhouse/columns/string.cpp | 64 ++++++--------------------- clickhouse/columns/string.h | 82 +++++++++++++++++++++++++++++++++-- ut/columns_ut.cpp | 12 +++++ 3 files changed, 104 insertions(+), 54 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index cfd4c061..01ef25dc 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -4,7 +4,6 @@ #include "../base/wire_format.h" namespace { -const size_t DEFAULT_BLOCK_SIZE = 4096; template size_t ComputeTotalSize(const Container & strings, size_t begin = 0, size_t len = -1) @@ -120,47 +119,6 @@ ItemView ColumnFixedString::GetItem(size_t index) const { return ItemView{Type::FixedString, this->At(index)}; } -struct ColumnString::Block -{ - using CharT = typename std::string::value_type; - - explicit Block(size_t starting_capacity) - : size(0), - capacity(starting_capacity), - data_(new CharT[capacity]) - {} - - inline auto GetAvailable() const - { - return capacity - size; - } - - std::string_view AppendUnsafe(std::string_view str) - { - const auto pos = &data_[size]; - - memcpy(pos, str.data(), str.size()); - size += str.size(); - - return std::string_view(pos, str.size()); - } - - auto GetCurrentWritePos() - { - return &data_[size]; - } - - std::string_view ConsumeTailAsStringViewUnsafe(size_t len) - { - const auto start = &data_[size]; - size += len; - return std::string_view(start, len); - } - - size_t size; - const size_t capacity; - std::unique_ptr data_; -}; ColumnString::ColumnString() : Column(Type::CreateString()) @@ -179,18 +137,21 @@ ColumnString::ColumnString(const std::vector & data) } } -ColumnString::~ColumnString() -{} +ColumnString::ColumnString(std::vector&& data) + : Column(Type::CreateString()) +{ + items_.reserve(data.size()); -void ColumnString::Append(std::string_view str) { - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) - { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); + for (auto&& d : data) { + append_data_.emplace_back(std::move(d)); + auto& last_data = append_data_.back(); + items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); } - - items_.emplace_back(blocks_.back().AppendUnsafe(str)); } +ColumnString::~ColumnString() +{} + void ColumnString::AppendUnsafe(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); @@ -199,6 +160,8 @@ void ColumnString::AppendUnsafe(std::string_view str) void ColumnString::Clear() { items_.clear(); blocks_.clear(); + append_data_.clear(); + append_data_.shrink_to_fit(); } std::string_view ColumnString::At(size_t n) const { @@ -283,6 +246,7 @@ void ColumnString::Swap(Column& other) { auto & col = dynamic_cast(other); items_.swap(col.items_); blocks_.swap(col.blocks_); + append_data_.swap(col.append_data_); } ItemView ColumnString::GetItem(size_t index) const { diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index d6defe50..cb7638e9 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -6,9 +6,17 @@ #include #include #include +#include +#include +#include namespace clickhouse { +constexpr size_t DEFAULT_BLOCK_SIZE = 4096; + +template +inline constexpr bool always_false_v = false; + /** * Represents column of fixed-length strings. */ @@ -78,12 +86,10 @@ class ColumnString : public Column { ~ColumnString(); explicit ColumnString(const std::vector & data); + explicit ColumnString(std::vector&& data); ColumnString& operator=(const ColumnString&) = delete; ColumnString(const ColumnString&) = delete; - /// Appends one element to the column. - void Append(std::string_view str); - /// Returns element at given row number. std::string_view At(size_t n) const; @@ -116,10 +122,78 @@ class ColumnString : public Column { void AppendUnsafe(std::string_view); private: - struct Block; + struct Block { + using CharT = typename std::string::value_type; + + explicit Block(size_t starting_capacity) + : size(0), + capacity(starting_capacity), + data_(new CharT[capacity]) + {} + + inline auto GetAvailable() const + { + return capacity - size; + } + + std::string_view AppendUnsafe(std::string_view str) + { + const auto pos = &data_[size]; + + memcpy(pos, str.data(), str.size()); + size += str.size(); + + return std::string_view(pos, str.size()); + } + + auto GetCurrentWritePos() + { + return &data_[size]; + } + + std::string_view ConsumeTailAsStringViewUnsafe(size_t len) + { + const auto start = &data_[size]; + size += len; + return std::string_view(start, len); + } + + size_t size; + const size_t capacity; + std::unique_ptr data_; + }; std::vector items_; std::vector blocks_; + std::deque append_data_; + +public: + /// Appends one element to the column. Copy or move str + template + void Append(StringType&& str) { + using str_type = decltype(str); + if (std::is_same_v> && std::is_rvalue_reference_v) { + append_data_.emplace_back(std::move(str)); + auto& last_data = append_data_.back(); + items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); + } + else if constexpr (std::is_convertible_v, std::string_view>) { + auto data_view = std::string_view(str); + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < data_view.length()) { + blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, data_view.size())); + } + items_.emplace_back(blocks_.back().AppendUnsafe(data_view)); + } + else { + static_assert(always_false_v, "the StringType is not correct"); + } + } + + /// Appends one element to the column. + /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server + void AppendNoManagedLifetime(std::string_view str) { + items_.emplace_back(str); + } }; } diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 9a806e39..16e6aa43 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -111,6 +111,18 @@ TEST(ColumnsCase, StringInit) { ASSERT_EQ(col->At(3), "abcd"); } +TEST(ColumnsCase, StringAppend) { + auto col = std::make_shared(); + std::string data = "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"; + col->Append(data); + col->Append(std::move(data)); + col->Append("11"); + + ASSERT_EQ(col->Size(), 3u); + ASSERT_EQ(col->At(0), "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"); + ASSERT_EQ(col->At(1), "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"); + ASSERT_EQ(col->At(2), "11"); +} TEST(ColumnsCase, TupleAppend){ auto tuple1 = std::make_shared(std::vector({ From 6c58f13e9ad086bbd0332c7fe659e122663e4315 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Mon, 25 Jul 2022 21:06:42 +0800 Subject: [PATCH 02/46] adjust code --- clickhouse/columns/string.cpp | 62 +++++++++++++++++++++++++++ clickhouse/columns/string.h | 79 +++++------------------------------ ut/columns_ut.cpp | 7 ++-- 3 files changed, 77 insertions(+), 71 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 01ef25dc..9607f16c 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -119,6 +119,47 @@ ItemView ColumnFixedString::GetItem(size_t index) const { return ItemView{Type::FixedString, this->At(index)}; } +struct ColumnString::Block +{ + using CharT = typename std::string::value_type; + + explicit Block(size_t starting_capacity) + : size(0), + capacity(starting_capacity), + data_(new CharT[capacity]) + {} + + inline auto GetAvailable() const + { + return capacity - size; + } + + std::string_view AppendUnsafe(std::string_view str) + { + const auto pos = &data_[size]; + + memcpy(pos, str.data(), str.size()); + size += str.size(); + + return std::string_view(pos, str.size()); + } + + auto GetCurrentWritePos() + { + return &data_[size]; + } + + std::string_view ConsumeTailAsStringViewUnsafe(size_t len) + { + const auto start = &data_[size]; + size += len; + return std::string_view(start, len); + } + + size_t size; + const size_t capacity; + std::unique_ptr data_; +}; ColumnString::ColumnString() : Column(Type::CreateString()) @@ -152,6 +193,27 @@ ColumnString::ColumnString(std::vector&& data) ColumnString::~ColumnString() {} +void ColumnString::Append(const std::string_view& str) { + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) + { + blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); + } + + items_.emplace_back(blocks_.back().AppendUnsafe(str)); +} + +void ColumnString::Append(std::string&& steal_value) +{ + append_data_.emplace_back(std::move(steal_value)); + auto& last_data = append_data_.back(); + items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); +} + +void ColumnString::AppendNoManagedLifetime(std::string_view str) +{ + items_.emplace_back(str); +} + void ColumnString::AppendUnsafe(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index cb7638e9..36cb9caf 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -90,6 +90,16 @@ class ColumnString : public Column { ColumnString& operator=(const ColumnString&) = delete; ColumnString(const ColumnString&) = delete; + /// Appends one element to the column. + void Append(const std::string_view& str); + + /// Appends one element to the column. + void Append(std::string&& steal_value); + + /// Appends one element to the column. + /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server + void AppendNoManagedLifetime(std::string_view str); + /// Returns element at given row number. std::string_view At(size_t n) const; @@ -122,78 +132,11 @@ class ColumnString : public Column { void AppendUnsafe(std::string_view); private: - struct Block { - using CharT = typename std::string::value_type; - - explicit Block(size_t starting_capacity) - : size(0), - capacity(starting_capacity), - data_(new CharT[capacity]) - {} - - inline auto GetAvailable() const - { - return capacity - size; - } - - std::string_view AppendUnsafe(std::string_view str) - { - const auto pos = &data_[size]; - - memcpy(pos, str.data(), str.size()); - size += str.size(); - - return std::string_view(pos, str.size()); - } - - auto GetCurrentWritePos() - { - return &data_[size]; - } - - std::string_view ConsumeTailAsStringViewUnsafe(size_t len) - { - const auto start = &data_[size]; - size += len; - return std::string_view(start, len); - } - - size_t size; - const size_t capacity; - std::unique_ptr data_; - }; + struct Block; std::vector items_; std::vector blocks_; std::deque append_data_; - -public: - /// Appends one element to the column. Copy or move str - template - void Append(StringType&& str) { - using str_type = decltype(str); - if (std::is_same_v> && std::is_rvalue_reference_v) { - append_data_.emplace_back(std::move(str)); - auto& last_data = append_data_.back(); - items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); - } - else if constexpr (std::is_convertible_v, std::string_view>) { - auto data_view = std::string_view(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < data_view.length()) { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, data_view.size())); - } - items_.emplace_back(blocks_.back().AppendUnsafe(data_view)); - } - else { - static_assert(always_false_v, "the StringType is not correct"); - } - } - - /// Appends one element to the column. - /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server - void AppendNoManagedLifetime(std::string_view str) { - items_.emplace_back(str); - } }; } diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 16e6aa43..3d045ba2 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -113,14 +113,15 @@ TEST(ColumnsCase, StringInit) { TEST(ColumnsCase, StringAppend) { auto col = std::make_shared(); - std::string data = "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"; + const char* expected = "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"; + std::string data(expected); col->Append(data); col->Append(std::move(data)); col->Append("11"); ASSERT_EQ(col->Size(), 3u); - ASSERT_EQ(col->At(0), "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"); - ASSERT_EQ(col->At(1), "ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe"); + ASSERT_EQ(col->At(0), expected); + ASSERT_EQ(col->At(1), expected); ASSERT_EQ(col->At(2), "11"); } From e56d9bfa3d4b2e72d3a87ec6af6fd37fac1a3624 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Fri, 29 Jul 2022 20:31:42 +0800 Subject: [PATCH 03/46] remove extra headers and delegate constructor --- clickhouse/columns/string.cpp | 8 ++++---- clickhouse/columns/string.h | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 9607f16c..a6faec9c 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -166,8 +166,8 @@ ColumnString::ColumnString() { } -ColumnString::ColumnString(const std::vector & data) - : Column(Type::CreateString()) +ColumnString::ColumnString(const std::vector& data) + : ColumnString() { items_.reserve(data.size()); blocks_.emplace_back(ComputeTotalSize(data)); @@ -176,10 +176,10 @@ ColumnString::ColumnString(const std::vector & data) { AppendUnsafe(s); } -} +}; ColumnString::ColumnString(std::vector&& data) - : Column(Type::CreateString()) + : ColumnString() { items_.reserve(data.size()); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 36cb9caf..8b165a2b 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -6,8 +6,6 @@ #include #include #include -#include -#include #include namespace clickhouse { From 87bcac9751bc85ba318529b31b7ac056a2316dba Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sat, 30 Jul 2022 13:39:09 +0800 Subject: [PATCH 04/46] fix gcc clang compile error --- clickhouse/columns/string.cpp | 12 +++++++++++- clickhouse/columns/string.h | 5 ++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index a6faec9c..2694d26a 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -193,7 +193,7 @@ ColumnString::ColumnString(std::vector&& data) ColumnString::~ColumnString() {} -void ColumnString::Append(const std::string_view& str) { +void ColumnString::Append(std::string_view str) { if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); @@ -209,6 +209,16 @@ void ColumnString::Append(std::string&& steal_value) items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); } +void ColumnString::Append(const char* str) { + auto len = strlen(str); + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) + { + blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); + } + + items_.emplace_back(blocks_.back().AppendUnsafe(str)); +} + void ColumnString::AppendNoManagedLifetime(std::string_view str) { items_.emplace_back(str); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 8b165a2b..3e726e86 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -89,11 +89,14 @@ class ColumnString : public Column { ColumnString(const ColumnString&) = delete; /// Appends one element to the column. - void Append(const std::string_view& str); + void Append(std::string_view str); /// Appends one element to the column. void Append(std::string&& steal_value); + /// Appends one element to the column. + void Append(const char* str); + /// Appends one element to the column. /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server void AppendNoManagedLifetime(std::string_view str); From feff79ce69c1bbd0f74e1421450310b223ece855 Mon Sep 17 00:00:00 2001 From: Ilya Khoroshenkiy Date: Sat, 30 Jul 2022 19:00:14 +0300 Subject: [PATCH 05/46] Add recv/send timeouts to socket --- clickhouse/base/socket.cpp | 28 ++++++++++++++++++++++------ clickhouse/base/socket.h | 9 +++++++-- clickhouse/base/sslsocket.cpp | 11 ++++++----- clickhouse/base/sslsocket.h | 6 ++++-- clickhouse/client.h | 4 ++++ ut/socket_ut.cpp | 28 ++++++++++++++++++++++++++-- 6 files changed, 69 insertions(+), 17 deletions(-) diff --git a/clickhouse/base/socket.cpp b/clickhouse/base/socket.cpp index c6dc920e..e0f8fb1c 100644 --- a/clickhouse/base/socket.cpp +++ b/clickhouse/base/socket.cpp @@ -112,6 +112,16 @@ void SetNonBlock(SOCKET fd, bool value) { #endif } +void SetTimeout(SOCKET fd, const SocketTimeoutParams& timeout_params) { +#if defined(_unix_) + timeval recv_timeout { .tv_sec = timeout_params.recv_timeout.count(), .tv_usec = 0 }; + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &recv_timeout, sizeof(recv_timeout)); + + timeval send_timeout { .tv_sec = timeout_params.send_timeout.count(), .tv_usec = 0 }; + setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &send_timeout, sizeof(send_timeout)); +#endif +}; + ssize_t Poll(struct pollfd* fds, int nfds, int timeout) noexcept { #if defined(_win_) return WSAPoll(fds, nfds, timeout); @@ -120,7 +130,7 @@ ssize_t Poll(struct pollfd* fds, int nfds, int timeout) noexcept { #endif } -SOCKET SocketConnect(const NetworkAddress& addr) { +SOCKET SocketConnect(const NetworkAddress& addr, const SocketTimeoutParams& timeout_params) { int last_err = 0; for (auto res = addr.Info(); res != nullptr; res = res->ai_next) { SOCKET s(socket(res->ai_family, res->ai_socktype, res->ai_protocol)); @@ -130,6 +140,7 @@ SOCKET SocketConnect(const NetworkAddress& addr) { } SetNonBlock(s, true); + SetTimeout(s, timeout_params); if (connect(s, res->ai_addr, (int)res->ai_addrlen) != 0) { int err = getSocketErrorCode(); @@ -213,6 +224,7 @@ NetworkAddress::~NetworkAddress() { const struct addrinfo* NetworkAddress::Info() const { return info_; } + const std::string & NetworkAddress::Host() const { return host_; } @@ -220,6 +232,7 @@ const std::string & NetworkAddress::Host() const { SocketBase::~SocketBase() = default; + SocketFactory::~SocketFactory() = default; void SocketFactory::sleepFor(const std::chrono::milliseconds& duration) { @@ -227,8 +240,8 @@ void SocketFactory::sleepFor(const std::chrono::milliseconds& duration) { } -Socket::Socket(const NetworkAddress& addr) - : handle_(SocketConnect(addr)) +Socket::Socket(const NetworkAddress& addr, const SocketTimeoutParams& timeout_params) + : handle_(SocketConnect(addr, timeout_params)) {} Socket::Socket(Socket&& other) noexcept @@ -300,19 +313,21 @@ std::unique_ptr Socket::makeOutputStream() const { return std::make_unique(handle_); } + NonSecureSocketFactory::~NonSecureSocketFactory() {} std::unique_ptr NonSecureSocketFactory::connect(const ClientOptions &opts) { const auto address = NetworkAddress(opts.host, std::to_string(opts.port)); - auto socket = doConnect(address); + auto socket = doConnect(address, opts); setSocketOptions(*socket, opts); return socket; } -std::unique_ptr NonSecureSocketFactory::doConnect(const NetworkAddress& address) { - return std::make_unique(address); +std::unique_ptr NonSecureSocketFactory::doConnect(const NetworkAddress& address, const ClientOptions& opts) { + SocketTimeoutParams timeout_params { opts.connection_recv_timeout, opts.connection_send_timeout }; + return std::make_unique(address, timeout_params); } void NonSecureSocketFactory::setSocketOptions(Socket &socket, const ClientOptions &opts) { @@ -327,6 +342,7 @@ void NonSecureSocketFactory::setSocketOptions(Socket &socket, const ClientOption } } + SocketInput::SocketInput(SOCKET s) : s_(s) { diff --git a/clickhouse/base/socket.h b/clickhouse/base/socket.h index e7cacc19..b3d916e1 100644 --- a/clickhouse/base/socket.h +++ b/clickhouse/base/socket.h @@ -82,9 +82,14 @@ class SocketFactory { }; +struct SocketTimeoutParams { + const std::chrono::seconds recv_timeout {0}; + const std::chrono::seconds send_timeout {0}; +}; + class Socket : public SocketBase { public: - Socket(const NetworkAddress& addr); + Socket(const NetworkAddress& addr, const SocketTimeoutParams& timeout_params); Socket(Socket&& other) noexcept; Socket& operator=(Socket&& other) noexcept; @@ -119,7 +124,7 @@ class NonSecureSocketFactory : public SocketFactory { std::unique_ptr connect(const ClientOptions& opts) override; protected: - virtual std::unique_ptr doConnect(const NetworkAddress& address); + virtual std::unique_ptr doConnect(const NetworkAddress& address, const ClientOptions& opts); void setSocketOptions(Socket& socket, const ClientOptions& opts); }; diff --git a/clickhouse/base/sslsocket.cpp b/clickhouse/base/sslsocket.cpp index 392c22fd..29efa504 100644 --- a/clickhouse/base/sslsocket.cpp +++ b/clickhouse/base/sslsocket.cpp @@ -198,9 +198,9 @@ SSL_CTX * SSLContext::getContext() { << "\n\t handshake state: " << SSL_get_state(ssl_) \ << std::endl */ -SSLSocket::SSLSocket(const NetworkAddress& addr, const SSLParams & ssl_params, - SSLContext& context) - : Socket(addr) +SSLSocket::SSLSocket(const NetworkAddress& addr, const SocketTimeoutParams& timeout_params, + const SSLParams & ssl_params, SSLContext& context) + : Socket(addr, timeout_params) , ssl_(SSL_new(context.getContext()), &SSL_free) { auto ssl = ssl_.get(); @@ -267,8 +267,9 @@ SSLSocketFactory::SSLSocketFactory(const ClientOptions& opts) SSLSocketFactory::~SSLSocketFactory() = default; -std::unique_ptr SSLSocketFactory::doConnect(const NetworkAddress& address) { - return std::make_unique(address, ssl_params_, *ssl_context_); +std::unique_ptr SSLSocketFactory::doConnect(const NetworkAddress& address, const ClientOptions& opts) { + SocketTimeoutParams timeout_params { opts.connection_recv_timeout, opts.connection_send_timeout }; + return std::make_unique(address, timeout_params, ssl_params_, *ssl_context_); } std::unique_ptr SSLSocket::makeInputStream() const { diff --git a/clickhouse/base/sslsocket.h b/clickhouse/base/sslsocket.h index f37e4a5a..945de86d 100644 --- a/clickhouse/base/sslsocket.h +++ b/clickhouse/base/sslsocket.h @@ -48,7 +48,9 @@ class SSLContext class SSLSocket : public Socket { public: - explicit SSLSocket(const NetworkAddress& addr, const SSLParams & ssl_params, SSLContext& context); + explicit SSLSocket(const NetworkAddress& addr, const SocketTimeoutParams& timeout_params, + const SSLParams& ssl_params, SSLContext& context); + SSLSocket(SSLSocket &&) = default; ~SSLSocket() override = default; @@ -69,7 +71,7 @@ class SSLSocketFactory : public NonSecureSocketFactory { ~SSLSocketFactory() override; protected: - std::unique_ptr doConnect(const NetworkAddress& address) override; + std::unique_ptr doConnect(const NetworkAddress& address, const ClientOptions& opts) override; private: const SSLParams ssl_params_; diff --git a/clickhouse/client.h b/clickhouse/client.h index 6de09b8a..679dd32c 100644 --- a/clickhouse/client.h +++ b/clickhouse/client.h @@ -86,6 +86,10 @@ struct ClientOptions { // TCP options DECLARE_FIELD(tcp_nodelay, bool, TcpNoDelay, true); + /// Connection socket timeout. If the timeout is set to zero then the operation will never timeout. + DECLARE_FIELD(connection_recv_timeout, std::chrono::seconds, SetConnectionRecvTimeout, std::chrono::seconds(0)); + DECLARE_FIELD(connection_send_timeout, std::chrono::seconds, SetConnectionSendTimeout, std::chrono::seconds(0)); + // TODO deprecate setting /** It helps to ease migration of the old codebases, which can't afford to switch * to using ColumnLowCardinalityT or ColumnLowCardinality directly, diff --git a/ut/socket_ut.cpp b/ut/socket_ut.cpp index 6f428428..36b6a65b 100644 --- a/ut/socket_ut.cpp +++ b/ut/socket_ut.cpp @@ -18,7 +18,7 @@ TEST(Socketcase, connecterror) { std::this_thread::sleep_for(std::chrono::seconds(1)); try { - Socket socket(addr); + Socket socket(addr, SocketTimeoutParams {}); } catch (const std::system_error& e) { FAIL(); } @@ -26,13 +26,37 @@ TEST(Socketcase, connecterror) { std::this_thread::sleep_for(std::chrono::seconds(1)); server.stop(); try { - Socket socket(addr); + Socket socket(addr, SocketTimeoutParams {}); FAIL(); } catch (const std::system_error& e) { ASSERT_NE(EINPROGRESS,e.code().value()); } } +TEST(Socketcase, timeoutrecv) { + using Seconds = std::chrono::seconds; + + int port = 19979; + NetworkAddress addr("localhost", std::to_string(port)); + LocalTcpServer server(port); + server.start(); + + std::this_thread::sleep_for(std::chrono::seconds(1)); + try { + Socket socket(addr, SocketTimeoutParams { .recv_timeout = Seconds(5), .send_timeout = Seconds(5) }); + + std::unique_ptr ptr_input_stream = socket.makeInputStream(); + char buf[1024]; + ptr_input_stream->Read(buf, sizeof(buf)); + + } catch (const std::system_error& e) { + ASSERT_EQ(EAGAIN, e.code().value()); + } + + std::this_thread::sleep_for(std::chrono::seconds(1)); + server.stop(); +} + // Test to verify that reading from empty socket doesn't hangs. //TEST(Socketcase, ReadFromEmptySocket) { // const int port = 12345; From 863728525c1b29e10182cea06b7931455dfbd974 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 1 Aug 2022 18:06:29 +0300 Subject: [PATCH 06/46] Minor; removed unnecessary code --- clickhouse/columns/string.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 3e726e86..f7bff737 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -10,11 +10,6 @@ namespace clickhouse { -constexpr size_t DEFAULT_BLOCK_SIZE = 4096; - -template -inline constexpr bool always_false_v = false; - /** * Represents column of fixed-length strings. */ From 70d3329b1904c983753e7a250cb12b2a4a29cbae Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 1 Aug 2022 18:07:50 +0300 Subject: [PATCH 07/46] Minor: stype + DEFAULT_BLOCK_SIZE --- clickhouse/columns/string.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 2694d26a..1227c113 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -5,9 +5,10 @@ namespace { +constexpr size_t DEFAULT_BLOCK_SIZE = 4096; + template -size_t ComputeTotalSize(const Container & strings, size_t begin = 0, size_t len = -1) -{ +size_t ComputeTotalSize(const Container & strings, size_t begin = 0, size_t len = -1) { size_t result = 0; if (begin < strings.size()) { len = std::min(len, strings.size() - begin); @@ -63,8 +64,7 @@ std::string_view ColumnFixedString::operator [](size_t n) const { return std::string_view(&data_[pos], string_size_); } -size_t ColumnFixedString::FixedSize() const -{ +size_t ColumnFixedString::FixedSize() const { return string_size_; } @@ -202,8 +202,7 @@ void ColumnString::Append(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); } -void ColumnString::Append(std::string&& steal_value) -{ +void ColumnString::Append(std::string&& steal_value) { append_data_.emplace_back(std::move(steal_value)); auto& last_data = append_data_.back(); items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); @@ -211,21 +210,18 @@ void ColumnString::Append(std::string&& steal_value) void ColumnString::Append(const char* str) { auto len = strlen(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) - { + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); } items_.emplace_back(blocks_.back().AppendUnsafe(str)); } -void ColumnString::AppendNoManagedLifetime(std::string_view str) -{ +void ColumnString::AppendNoManagedLifetime(std::string_view str) { items_.emplace_back(str); } -void ColumnString::AppendUnsafe(std::string_view str) -{ +void ColumnString::AppendUnsafe(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); } From 91efd349d452629867b9fbc14a5c6789ebab6424 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Wed, 3 Aug 2022 08:21:58 +0300 Subject: [PATCH 08/46] add modificators: explicit and override for CompressedInput and CompressedOutput. --- clickhouse/base/compressed.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clickhouse/base/compressed.h b/clickhouse/base/compressed.h index 0b40d0be..8ba01fa8 100644 --- a/clickhouse/base/compressed.h +++ b/clickhouse/base/compressed.h @@ -8,8 +8,8 @@ namespace clickhouse { class CompressedInput : public ZeroCopyInput { public: - CompressedInput(InputStream* input); - ~CompressedInput(); + explicit CompressedInput(InputStream* input); + ~CompressedInput() override; protected: size_t DoNext(const void** ptr, size_t len) override; @@ -25,8 +25,8 @@ class CompressedInput : public ZeroCopyInput { class CompressedOutput : public OutputStream { public: - CompressedOutput(OutputStream * destination, size_t max_compressed_chunk_size = 0); - ~CompressedOutput(); + explicit CompressedOutput(OutputStream * destination, size_t max_compressed_chunk_size = 0); + ~CompressedOutput() override; protected: size_t DoWrite(const void* data, size_t len) override; From 7bdc11fff7d7ee2a4b3b4d93c22c67ed5fc13fa3 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 4 Aug 2022 16:44:52 +0300 Subject: [PATCH 09/46] Removed Append(const char*) overload Users should call string_view overload instead. --- clickhouse/columns/string.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 1227c113..b1e53244 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -208,15 +208,6 @@ void ColumnString::Append(std::string&& steal_value) { items_.emplace_back(std::string_view{ last_data.data(),last_data.length() }); } -void ColumnString::Append(const char* str) { - auto len = strlen(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); - } - - items_.emplace_back(blocks_.back().AppendUnsafe(str)); -} - void ColumnString::AppendNoManagedLifetime(std::string_view str) { items_.emplace_back(str); } From f6d3d8c6a17c556d5e8f77f8edfe44de95771fb5 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 4 Aug 2022 16:45:35 +0300 Subject: [PATCH 10/46] Update string.h --- clickhouse/columns/string.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index f7bff737..30066148 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -89,9 +89,6 @@ class ColumnString : public Column { /// Appends one element to the column. void Append(std::string&& steal_value); - /// Appends one element to the column. - void Append(const char* str); - /// Appends one element to the column. /// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server void AppendNoManagedLifetime(std::string_view str); From 2bbc74afd19f3b6e3520d1ef1a220ad7c8b2eb15 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sun, 7 Aug 2022 12:21:47 +0800 Subject: [PATCH 11/46] Append(const char*) overload is necessary for gcc and clang --- clickhouse/columns/string.cpp | 9 +++++++++ clickhouse/columns/string.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index b1e53244..8ea362c4 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -202,6 +202,15 @@ void ColumnString::Append(std::string_view str) { items_.emplace_back(blocks_.back().AppendUnsafe(str)); } +void ColumnString::Append(const char* str) { + auto len = strlen(str); + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { + blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); + } + + items_.emplace_back(blocks_.back().AppendUnsafe(str)); +} + void ColumnString::Append(std::string&& steal_value) { append_data_.emplace_back(std::move(steal_value)); auto& last_data = append_data_.back(); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 30066148..f2216f40 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -86,6 +86,9 @@ class ColumnString : public Column { /// Appends one element to the column. void Append(std::string_view str); + /// Appends one element to the column. + void Append(const char* str); + /// Appends one element to the column. void Append(std::string&& steal_value); From 47cdeec309ba23d32d62f89a561b23d80e0a1ddb Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sun, 14 Aug 2022 18:51:05 +0800 Subject: [PATCH 12/46] fix CreateColumnFromAst --- clickhouse/columns/factory.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index 47c3feeb..de2e4718 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -137,12 +137,17 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti case TypeAst::Enum: { std::vector enum_items; - - enum_items.reserve(ast.elements.size() / 2); - for (size_t i = 0; i < ast.elements.size(); i += 2) { + if (ast.elements.size() == 1) { enum_items.push_back( - Type::EnumItem{ast.elements[i].value_string, - (int16_t)ast.elements[i + 1].value}); + Type::EnumItem{ ast.elements[0].value_string, 0 }); + } + else { + enum_items.reserve(ast.elements.size() / 2); + for (size_t i = 0; i < ast.elements.size(); i += 2) { + enum_items.push_back( + Type::EnumItem{ ast.elements[i].value_string, + (int16_t)ast.elements[i + 1].value }); + } } if (ast.code == Type::Enum8) { From 1ec61ff3014c850b91e33f85c8e89d7c8523bf25 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Thu, 18 Aug 2022 23:48:52 +0800 Subject: [PATCH 13/46] add ErrorEnumContent ut --- clickhouse/columns/factory.cpp | 19 +++++++++---------- ut/types_ut.cpp | 16 ++++++++++++++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index de2e4718..38b02e1d 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -137,17 +137,16 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti case TypeAst::Enum: { std::vector enum_items; - if (ast.elements.size() == 1) { - enum_items.push_back( - Type::EnumItem{ ast.elements[0].value_string, 0 }); + //ast.elements.size() minimum is 1. + if ((ast.elements.size() % 2) != 0) { + throw ValidationError(ast.name + " content is not correct"); } - else { - enum_items.reserve(ast.elements.size() / 2); - for (size_t i = 0; i < ast.elements.size(); i += 2) { - enum_items.push_back( - Type::EnumItem{ ast.elements[i].value_string, - (int16_t)ast.elements[i + 1].value }); - } + + enum_items.reserve(ast.elements.size() / 2); + for (size_t i = 0; i < ast.elements.size(); i += 2) { + enum_items.push_back( + Type::EnumItem{ ast.elements[i].value_string, + (int16_t)ast.elements[i + 1].value }); } if (ast.code == Type::Enum8) { diff --git a/ut/types_ut.cpp b/ut/types_ut.cpp index 8e355ee1..c5922d0e 100644 --- a/ut/types_ut.cpp +++ b/ut/types_ut.cpp @@ -86,8 +86,6 @@ TEST(TypesCase, IsEqual) { "DateTime64(3, 'UTC')", "Decimal(9,3)", "Decimal(18,3)", - "Enum8()", - "Enum16()", "Enum8('ONE' = 1)", "Enum8('ONE' = 1, 'TWO' = 2)", "Enum16('ONE' = 1, 'TWO' = 2, 'THREE' = 3, 'FOUR' = 4)", @@ -127,3 +125,17 @@ TEST(TypesCase, IsEqual) { } } } + +TEST(TypesCase, ErrorEnumContent) { + const std::string type_names[] = { + "Enum8()", + "Enum8('ONE')", + "Enum8('ONE'=1,'TWO')", + "Enum16('TWO'=,'TWO')", + }; + + for (const auto& type_name : type_names) { + SCOPED_TRACE(type_name); + EXPECT_THROW(clickhouse::CreateColumnByType(type_name)->Type(), ValidationError); + } +} \ No newline at end of file From 9d8d3750d1753502c99bb93f5890e4c58f905bf7 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Thu, 15 Sep 2022 20:17:39 +0800 Subject: [PATCH 14/46] fix windows compile error (C-style designated initializer syntax) --- ut/socket_ut.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ut/socket_ut.cpp b/ut/socket_ut.cpp index 36b6a65b..9e4a2529 100644 --- a/ut/socket_ut.cpp +++ b/ut/socket_ut.cpp @@ -43,7 +43,7 @@ TEST(Socketcase, timeoutrecv) { std::this_thread::sleep_for(std::chrono::seconds(1)); try { - Socket socket(addr, SocketTimeoutParams { .recv_timeout = Seconds(5), .send_timeout = Seconds(5) }); + Socket socket(addr, SocketTimeoutParams { Seconds(5), Seconds(5) }); std::unique_ptr ptr_input_stream = socket.makeInputStream(); char buf[1024]; From ebc7ab1aba7bd035deeebc44ca0be9860d739992 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Thu, 15 Sep 2022 21:29:18 +0800 Subject: [PATCH 15/46] add windows socket timeout implementation. --- clickhouse/base/socket.cpp | 28 ++++++++++++++++++++++++---- clickhouse/base/socket.h | 7 +++++-- ut/socket_ut.cpp | 14 ++++++++++---- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/clickhouse/base/socket.cpp b/clickhouse/base/socket.cpp index e0f8fb1c..dbfc11c3 100644 --- a/clickhouse/base/socket.cpp +++ b/clickhouse/base/socket.cpp @@ -114,11 +114,27 @@ void SetNonBlock(SOCKET fd, bool value) { void SetTimeout(SOCKET fd, const SocketTimeoutParams& timeout_params) { #if defined(_unix_) - timeval recv_timeout { .tv_sec = timeout_params.recv_timeout.count(), .tv_usec = 0 }; - setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &recv_timeout, sizeof(recv_timeout)); + timeval recv_timeout { timeout_params.recv_timeout_s.count(), timeout_params.recv_timeout_us.count() }; + auto recv_ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &recv_timeout, sizeof(recv_timeout)); - timeval send_timeout { .tv_sec = timeout_params.send_timeout.count(), .tv_usec = 0 }; - setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &send_timeout, sizeof(send_timeout)); + timeval send_timeout { timeout_params.send_timeout_s.count(), timeout_params.send_timeout_us.count() }; + auto send_ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &send_timeout, sizeof(send_timeout)); + + if (recv_ret == -1 || send_ret == -1) { + throw std::system_error(getSocketErrorCode(), getErrorCategory(), "fail to set socket timeout"); + } +#else + const struct timeval recv_tv { timeout_params.recv_timeout_s.count(), timeout_params.recv_timeout_us.count()}; + DWORD recv_timeout = recv_tv.tv_sec * 1000 + recv_tv.tv_usec / 1000; + auto recv_ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&recv_timeout, sizeof(DWORD)); + + const struct timeval send_tv { timeout_params.send_timeout_s.count(), timeout_params.send_timeout_us.count()}; + DWORD send_timeout = send_tv.tv_sec * 1000 + send_tv.tv_usec / 1000; + auto send_ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&send_timeout, sizeof(DWORD)); + + if (recv_ret == SOCKET_ERROR || send_ret == SOCKET_ERROR) { + throw std::system_error(getSocketErrorCode(), getErrorCategory(), "fail to set socket timeout"); + } #endif }; @@ -244,6 +260,10 @@ Socket::Socket(const NetworkAddress& addr, const SocketTimeoutParams& timeout_pa : handle_(SocketConnect(addr, timeout_params)) {} +Socket::Socket(const NetworkAddress & addr) + : handle_(SocketConnect(addr, SocketTimeoutParams{})) +{} + Socket::Socket(Socket&& other) noexcept : handle_(other.handle_) { diff --git a/clickhouse/base/socket.h b/clickhouse/base/socket.h index b3d916e1..eaee403b 100644 --- a/clickhouse/base/socket.h +++ b/clickhouse/base/socket.h @@ -83,13 +83,16 @@ class SocketFactory { struct SocketTimeoutParams { - const std::chrono::seconds recv_timeout {0}; - const std::chrono::seconds send_timeout {0}; + const std::chrono::seconds recv_timeout_s {0}; + const std::chrono::seconds send_timeout_s {0}; + const std::chrono::microseconds recv_timeout_us{ 0 }; + const std::chrono::microseconds send_timeout_us{ 0 }; }; class Socket : public SocketBase { public: Socket(const NetworkAddress& addr, const SocketTimeoutParams& timeout_params); + Socket(const NetworkAddress& addr); Socket(Socket&& other) noexcept; Socket& operator=(Socket&& other) noexcept; diff --git a/ut/socket_ut.cpp b/ut/socket_ut.cpp index 9e4a2529..5a263435 100644 --- a/ut/socket_ut.cpp +++ b/ut/socket_ut.cpp @@ -18,7 +18,7 @@ TEST(Socketcase, connecterror) { std::this_thread::sleep_for(std::chrono::seconds(1)); try { - Socket socket(addr, SocketTimeoutParams {}); + Socket socket(addr); } catch (const std::system_error& e) { FAIL(); } @@ -26,7 +26,7 @@ TEST(Socketcase, connecterror) { std::this_thread::sleep_for(std::chrono::seconds(1)); server.stop(); try { - Socket socket(addr, SocketTimeoutParams {}); + Socket socket(addr); FAIL(); } catch (const std::system_error& e) { ASSERT_NE(EINPROGRESS,e.code().value()); @@ -49,8 +49,14 @@ TEST(Socketcase, timeoutrecv) { char buf[1024]; ptr_input_stream->Read(buf, sizeof(buf)); - } catch (const std::system_error& e) { - ASSERT_EQ(EAGAIN, e.code().value()); + } + catch (const std::system_error& e) { +#if defined(_unix_) + auto expected = EAGAIN; +#else + auto expected = WSAETIMEDOUT; +#endif + ASSERT_EQ(expected, e.code().value()); } std::this_thread::sleep_for(std::chrono::seconds(1)); From 79c9dd3b7bec74c973a40a10e14f9a9deeef225b Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Thu, 15 Sep 2022 21:46:53 +0800 Subject: [PATCH 16/46] fix macOS compile --- clickhouse/base/socket.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clickhouse/base/socket.cpp b/clickhouse/base/socket.cpp index dbfc11c3..76ffef90 100644 --- a/clickhouse/base/socket.cpp +++ b/clickhouse/base/socket.cpp @@ -114,10 +114,10 @@ void SetNonBlock(SOCKET fd, bool value) { void SetTimeout(SOCKET fd, const SocketTimeoutParams& timeout_params) { #if defined(_unix_) - timeval recv_timeout { timeout_params.recv_timeout_s.count(), timeout_params.recv_timeout_us.count() }; + timeval recv_timeout { timeout_params.recv_timeout_s.count(), static_cast(timeout_params.recv_timeout_us.count()) }; auto recv_ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &recv_timeout, sizeof(recv_timeout)); - timeval send_timeout { timeout_params.send_timeout_s.count(), timeout_params.send_timeout_us.count() }; + timeval send_timeout { timeout_params.send_timeout_s.count(), static_cast(timeout_params.send_timeout_us.count()) }; auto send_ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &send_timeout, sizeof(send_timeout)); if (recv_ret == -1 || send_ret == -1) { From 9e3491f5a8a61b2ec03c5886fc96e16eee32feef Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Mon, 19 Sep 2022 23:40:24 +0800 Subject: [PATCH 17/46] optimize interface --- clickhouse/base/socket.cpp | 12 +++++------- clickhouse/base/socket.h | 6 ++---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/clickhouse/base/socket.cpp b/clickhouse/base/socket.cpp index 76ffef90..e9ed5fd2 100644 --- a/clickhouse/base/socket.cpp +++ b/clickhouse/base/socket.cpp @@ -114,22 +114,20 @@ void SetNonBlock(SOCKET fd, bool value) { void SetTimeout(SOCKET fd, const SocketTimeoutParams& timeout_params) { #if defined(_unix_) - timeval recv_timeout { timeout_params.recv_timeout_s.count(), static_cast(timeout_params.recv_timeout_us.count()) }; + timeval recv_timeout{ timeout_params.recv_timeout.count() / 1000, static_cast(timeout_params.recv_timeout.count() % 1000 * 1000) }; auto recv_ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &recv_timeout, sizeof(recv_timeout)); - timeval send_timeout { timeout_params.send_timeout_s.count(), static_cast(timeout_params.send_timeout_us.count()) }; + timeval send_timeout{ timeout_params.send_timeout.count() / 1000, static_cast(timeout_params.send_timeout.count() % 1000 * 1000) }; auto send_ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &send_timeout, sizeof(send_timeout)); if (recv_ret == -1 || send_ret == -1) { throw std::system_error(getSocketErrorCode(), getErrorCategory(), "fail to set socket timeout"); } #else - const struct timeval recv_tv { timeout_params.recv_timeout_s.count(), timeout_params.recv_timeout_us.count()}; - DWORD recv_timeout = recv_tv.tv_sec * 1000 + recv_tv.tv_usec / 1000; + DWORD recv_timeout = static_cast(timeout_params.recv_timeout.count()); auto recv_ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&recv_timeout, sizeof(DWORD)); - - const struct timeval send_tv { timeout_params.send_timeout_s.count(), timeout_params.send_timeout_us.count()}; - DWORD send_timeout = send_tv.tv_sec * 1000 + send_tv.tv_usec / 1000; + + DWORD send_timeout = static_cast(timeout_params.send_timeout.count()); auto send_ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&send_timeout, sizeof(DWORD)); if (recv_ret == SOCKET_ERROR || send_ret == SOCKET_ERROR) { diff --git a/clickhouse/base/socket.h b/clickhouse/base/socket.h index eaee403b..c68f250d 100644 --- a/clickhouse/base/socket.h +++ b/clickhouse/base/socket.h @@ -83,10 +83,8 @@ class SocketFactory { struct SocketTimeoutParams { - const std::chrono::seconds recv_timeout_s {0}; - const std::chrono::seconds send_timeout_s {0}; - const std::chrono::microseconds recv_timeout_us{ 0 }; - const std::chrono::microseconds send_timeout_us{ 0 }; + std::chrono::milliseconds recv_timeout{ 0 }; + std::chrono::milliseconds send_timeout{ 0 }; }; class Socket : public SocketBase { From 9782b5c477f50ddc5700e570e034e1ede2ad0894 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 3 Oct 2022 16:02:53 +0400 Subject: [PATCH 18/46] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ac64d979..10088ed1 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,9 @@ client.Select("SELECT id, name FROM test.numbers", [] (const Block& block) /// Delete table. client.Execute("DROP TABLE test.numbers"); ``` -Please note that `Client` instance is NOT thread-safe. I.e. you must create a separate `Client` for each thread or utilize some synchronization techniques. + +## Thread-safety +⚠ Please note that `Client` instance is NOT thread-safe. I.e. you must create a separate `Client` for each thread or utilize some synchronization techniques. ⚠ ## Retries If you wish to implement some retry logic atop of `clickhouse::Client` there are few simple rules to make you life easier: From 3981129287c035f706497c04fa749194e0cdb330 Mon Sep 17 00:00:00 2001 From: peter279k Date: Wed, 5 Oct 2022 16:29:49 +0800 Subject: [PATCH 19/46] Correct example codes --- README.md | 60 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 10088ed1..acb4232a 100644 --- a/README.md +++ b/README.md @@ -34,46 +34,52 @@ $ make ## Example ```cpp +#include #include using namespace clickhouse; -/// Initialize client connection. -Client client(ClientOptions().SetHost("localhost")); +int main() +{ + /// Initialize client connection. + Client client(ClientOptions().SetHost("localhost")); -/// Create a table. -client.Execute("CREATE TABLE IF NOT EXISTS test.numbers (id UInt64, name String) ENGINE = Memory"); + /// Create a table. + client.Execute("CREATE TABLE IF NOT EXISTS default.numbers (id UInt64, name String) ENGINE = Memory"); -/// Insert some values. -{ - Block block; + /// Insert some values. + { + Block block; - auto id = std::make_shared(); - id->Append(1); - id->Append(7); + auto id = std::make_shared(); + id->Append(1); + id->Append(7); - auto name = std::make_shared(); - name->Append("one"); - name->Append("seven"); + auto name = std::make_shared(); + name->Append("one"); + name->Append("seven"); - block.AppendColumn("id" , id); - block.AppendColumn("name", name); + block.AppendColumn("id" , id); + block.AppendColumn("name", name); - client.Insert("test.numbers", block); -} + client.Insert("default.numbers", block); + } -/// Select values inserted in the previous step. -client.Select("SELECT id, name FROM test.numbers", [] (const Block& block) - { - for (size_t i = 0; i < block.GetRowCount(); ++i) { - std::cout << block[0]->As()->At(i) << " " - << block[1]->As()->At(i) << "\n"; + /// Select values inserted in the previous step. + client.Select("SELECT id, name FROM default.numbers", [] (const Block& block) + { + for (size_t i = 0; i < block.GetRowCount(); ++i) { + std::cout << block[0]->As()->At(i) << " " + << block[1]->As()->At(i) << "\n"; + } } - } -); + ); -/// Delete table. -client.Execute("DROP TABLE test.numbers"); + /// Delete table. + client.Execute("DROP TABLE default.numbers"); + + return 0; +} ``` ## Thread-safety From 959fe572d7efe3405c209647ca2f29fd6bf26ec7 Mon Sep 17 00:00:00 2001 From: den818 Date: Sun, 9 Oct 2022 16:25:35 +0400 Subject: [PATCH 20/46] Receiving columns metadata --- clickhouse/client.cpp | 16 ++++++++++++++++ clickhouse/protocol.h | 5 +++-- clickhouse/query.h | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 36c1bcb3..c11c5d29 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -430,6 +430,22 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { return false; } + case ServerCodes::TableColumns: { + // external table name + if (!WireFormat::SkipString(*input_)) { + return false; + } + + std::string columns_metadata; + if (!WireFormat::ReadString(*input_, &columns_metadata)) { + return false; + } + if (events_) { + events_->OnColumnsMetadata(columns_metadata); + } + return true; + } + default: throw UnimplementedError("unimplemented " + std::to_string((int)packet_type)); break; diff --git a/clickhouse/protocol.h b/clickhouse/protocol.h index dc51f32a..47a76534 100644 --- a/clickhouse/protocol.h +++ b/clickhouse/protocol.h @@ -16,6 +16,7 @@ namespace clickhouse { Extremes = 8, /// Block of mins and maxs, may be compressed. TablesStatusResponse = 9, /// Response to TableStatus. Log = 10, /// Query execution log. + TableColumns = 11, /// Columns' description for default values calculation }; } @@ -23,7 +24,7 @@ namespace clickhouse { namespace ClientCodes { enum { Hello = 0, /// Name, version, default database name. - Query = 1, /** Query id, query settings, query processing stage, + Query = 1, /** Query id, query settings, query processing stage, * compression status, and query text (no INSERT data). */ Data = 2, /// Data `Block` (e.g. INSERT data), may be compressed. @@ -32,7 +33,7 @@ namespace clickhouse { }; } - /// Should we compress `Block`s of data + /// Should we compress `Block`s of data namespace CompressionState { enum { Disable = 0, diff --git a/clickhouse/query.h b/clickhouse/query.h index ae98690d..0f326c40 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -66,6 +66,8 @@ class QueryEvents { virtual void OnProgress(const Progress& progress) = 0; + virtual void OnColumnsMetadata(const std::string& columns_metadata) = 0; + virtual void OnFinish() = 0; }; @@ -74,6 +76,7 @@ using ExceptionCallback = std::function; using ProgressCallback = std::function; using SelectCallback = std::function; using SelectCancelableCallback = std::function; +using ColumnsMetadataCallback = std::function; class Query : public QueryEvents { @@ -116,6 +119,12 @@ class Query : public QueryEvents { return *this; } + /// Set handler for receiving a metedata of column of query. + inline Query& OnColumnsMetadata(ColumnsMetadataCallback cb) { + columns_metadata_cb_ = std::move(cb); + return *this; + } + static const std::string default_query_id; private: @@ -149,6 +158,12 @@ class Query : public QueryEvents { } } + void OnColumnsMetadata(const std::string& columns_metadata) override { + if (columns_metadata_cb_) { + columns_metadata_cb_(columns_metadata); + } + } + void OnFinish() override { } @@ -159,6 +174,7 @@ class Query : public QueryEvents { ProgressCallback progress_cb_; SelectCallback select_cb_; SelectCancelableCallback select_cancelable_cb_; + ColumnsMetadataCallback columns_metadata_cb_; }; } From 0600012adeedacc8ea6e1656ad340dd6aaf862b5 Mon Sep 17 00:00:00 2001 From: den818 Date: Sat, 15 Oct 2022 00:26:28 +0400 Subject: [PATCH 21/46] fix --- clickhouse/client.cpp | 7 ++----- clickhouse/query.h | 16 ---------------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index c11c5d29..3bdc83ba 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -436,13 +436,10 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { return false; } - std::string columns_metadata; - if (!WireFormat::ReadString(*input_, &columns_metadata)) { + // columns metadata + if (!WireFormat::SkipString(*input_)) { return false; } - if (events_) { - events_->OnColumnsMetadata(columns_metadata); - } return true; } diff --git a/clickhouse/query.h b/clickhouse/query.h index 0f326c40..ae98690d 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -66,8 +66,6 @@ class QueryEvents { virtual void OnProgress(const Progress& progress) = 0; - virtual void OnColumnsMetadata(const std::string& columns_metadata) = 0; - virtual void OnFinish() = 0; }; @@ -76,7 +74,6 @@ using ExceptionCallback = std::function; using ProgressCallback = std::function; using SelectCallback = std::function; using SelectCancelableCallback = std::function; -using ColumnsMetadataCallback = std::function; class Query : public QueryEvents { @@ -119,12 +116,6 @@ class Query : public QueryEvents { return *this; } - /// Set handler for receiving a metedata of column of query. - inline Query& OnColumnsMetadata(ColumnsMetadataCallback cb) { - columns_metadata_cb_ = std::move(cb); - return *this; - } - static const std::string default_query_id; private: @@ -158,12 +149,6 @@ class Query : public QueryEvents { } } - void OnColumnsMetadata(const std::string& columns_metadata) override { - if (columns_metadata_cb_) { - columns_metadata_cb_(columns_metadata); - } - } - void OnFinish() override { } @@ -174,7 +159,6 @@ class Query : public QueryEvents { ProgressCallback progress_cb_; SelectCallback select_cb_; SelectCancelableCallback select_cancelable_cb_; - ColumnsMetadataCallback columns_metadata_cb_; }; } From 51ea715beb85516aac01b75f3ad767b9d84605dc Mon Sep 17 00:00:00 2001 From: den818 Date: Sat, 15 Oct 2022 00:48:20 +0400 Subject: [PATCH 22/46] fix --- clickhouse/client.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 3bdc83ba..209f70c1 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -34,8 +34,9 @@ #define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372 #define DBMS_MIN_REVISION_WITH_VERSION_PATCH 54401 #define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405 +#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410 -#define REVISION DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE +#define REVISION DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA namespace clickhouse { From 7cad355836aec12f3ca2cfc12ec4f8f73d6f3bba Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 17 Oct 2022 16:11:47 +0400 Subject: [PATCH 23/46] Socket RAII wrapper to prevent leaking socket --- clickhouse/base/socket.cpp | 68 ++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/clickhouse/base/socket.cpp b/clickhouse/base/socket.cpp index e9ed5fd2..e36554f0 100644 --- a/clickhouse/base/socket.cpp +++ b/clickhouse/base/socket.cpp @@ -144,19 +144,51 @@ ssize_t Poll(struct pollfd* fds, int nfds, int timeout) noexcept { #endif } +const SOCKET INVALID_SOCKET = -1; + +void CloseSocket(SOCKET socket) { + if (socket == INVALID_SOCKET) + return; + +#if defined(_win_) + closesocket(socket); +#else + close(socket); +#endif +} + +struct SocketRAIIWrapper { + SOCKET socket = INVALID_SOCKET; + + ~SocketRAIIWrapper() { + CloseSocket(socket); + } + + SOCKET operator*() const { + return socket; + } + + SOCKET release() { + auto result = socket; + socket = INVALID_SOCKET; + + return result; + } +}; + SOCKET SocketConnect(const NetworkAddress& addr, const SocketTimeoutParams& timeout_params) { int last_err = 0; for (auto res = addr.Info(); res != nullptr; res = res->ai_next) { - SOCKET s(socket(res->ai_family, res->ai_socktype, res->ai_protocol)); + SocketRAIIWrapper s{socket(res->ai_family, res->ai_socktype, res->ai_protocol)}; - if (s == -1) { + if (*s == INVALID_SOCKET) { continue; } - SetNonBlock(s, true); - SetTimeout(s, timeout_params); + SetNonBlock(*s, true); + SetTimeout(*s, timeout_params); - if (connect(s, res->ai_addr, (int)res->ai_addrlen) != 0) { + if (connect(*s, res->ai_addr, (int)res->ai_addrlen) != 0) { int err = getSocketErrorCode(); if ( err == EINPROGRESS || err == EAGAIN || err == EWOULDBLOCK @@ -165,7 +197,7 @@ SOCKET SocketConnect(const NetworkAddress& addr, const SocketTimeoutParams& time #endif ) { pollfd fd; - fd.fd = s; + fd.fd = *s; fd.events = POLLOUT; fd.revents = 0; ssize_t rval = Poll(&fd, 1, 5000); @@ -175,18 +207,18 @@ SOCKET SocketConnect(const NetworkAddress& addr, const SocketTimeoutParams& time } if (rval > 0) { socklen_t len = sizeof(err); - getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&err, &len); + getsockopt(*s, SOL_SOCKET, SO_ERROR, (char*)&err, &len); if (!err) { - SetNonBlock(s, false); - return s; + SetNonBlock(*s, false); + return s.release(); } last_err = err; } } } else { - SetNonBlock(s, false); - return s; + SetNonBlock(*s, false); + return s.release(); } } if (last_err > 0) { @@ -265,7 +297,7 @@ Socket::Socket(const NetworkAddress & addr) Socket::Socket(Socket&& other) noexcept : handle_(other.handle_) { - other.handle_ = -1; + other.handle_ = INVALID_SOCKET; } Socket& Socket::operator=(Socket&& other) noexcept { @@ -273,7 +305,7 @@ Socket& Socket::operator=(Socket&& other) noexcept { Close(); handle_ = other.handle_; - other.handle_ = -1; + other.handle_ = INVALID_SOCKET; } return *this; @@ -284,14 +316,8 @@ Socket::~Socket() { } void Socket::Close() { - if (handle_ != -1) { -#if defined(_win_) - closesocket(handle_); -#else - close(handle_); -#endif - handle_ = -1; - } + CloseSocket(handle_); + handle_ = INVALID_SOCKET; } void Socket::SetTcpKeepAlive(int idle, int intvl, int cnt) noexcept { From 038548ee5f5ef8bd724f9bdd9374c5b7aa777a66 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 17 Oct 2022 17:25:18 +0400 Subject: [PATCH 24/46] Fixed Windows builds --- clickhouse/base/socket.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clickhouse/base/socket.cpp b/clickhouse/base/socket.cpp index e36554f0..f5501d99 100644 --- a/clickhouse/base/socket.cpp +++ b/clickhouse/base/socket.cpp @@ -144,7 +144,9 @@ ssize_t Poll(struct pollfd* fds, int nfds, int timeout) noexcept { #endif } +#ifndef INVALID_SOCKET const SOCKET INVALID_SOCKET = -1; +#endif void CloseSocket(SOCKET socket) { if (socket == INVALID_SOCKET) From 5e88c2f3e0cb83d62cf9b6d0636885283f11a2fa Mon Sep 17 00:00:00 2001 From: den818 Date: Sun, 9 Oct 2022 13:15:54 +0400 Subject: [PATCH 25/46] feat support CLIENT_WRITE_INFO --- clickhouse/client.cpp | 12 +++++++++++- clickhouse/query.h | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 209f70c1..4ef5ca70 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -35,8 +35,9 @@ #define DBMS_MIN_REVISION_WITH_VERSION_PATCH 54401 #define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405 #define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410 +#define DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO 54420 -#define REVISION DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA +#define REVISION DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO namespace clickhouse { @@ -408,6 +409,15 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { return false; } } + if (REVISION >= DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO) + { + if (!WireFormat::ReadUInt64(*input_, &info.written_rows)) { + return false; + } + if (!WireFormat::ReadUInt64(*input_, &info.written_bytes)) { + return false; + } + } if (events_) { events_->OnProgress(info); diff --git a/clickhouse/query.h b/clickhouse/query.h index ae98690d..5a0319d8 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -48,6 +48,8 @@ struct Progress { uint64_t rows = 0; uint64_t bytes = 0; uint64_t total_rows = 0; + uint64_t written_rows = 0; + uint64_t written_bytes = 0; }; @@ -92,6 +94,7 @@ class Query : public QueryEvents { return query_id_; } + /// Set handler for receiving result data. inline Query& OnData(SelectCallback cb) { select_cb_ = std::move(cb); From 576ef2b52cd3bf5a0de67e6ce948eecdd3cdd329 Mon Sep 17 00:00:00 2001 From: den818 Date: Sun, 9 Oct 2022 13:46:36 +0400 Subject: [PATCH 26/46] fix codestyle --- clickhouse/query.h | 1 - 1 file changed, 1 deletion(-) diff --git a/clickhouse/query.h b/clickhouse/query.h index 5a0319d8..d4fbdda5 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -94,7 +94,6 @@ class Query : public QueryEvents { return query_id_; } - /// Set handler for receiving result data. inline Query& OnData(SelectCallback cb) { select_cb_ = std::move(cb); From 72504b25eeadbc6217aae5ccf7abea5f1522d1ef Mon Sep 17 00:00:00 2001 From: den818 Date: Mon, 17 Oct 2022 23:20:26 +0400 Subject: [PATCH 27/46] tests --- clickhouse/block.cpp | 5 +++++ clickhouse/block.h | 3 +++ clickhouse/client.cpp | 2 +- ut/client_ut.cpp | 17 +++++++++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/clickhouse/block.cpp b/clickhouse/block.cpp index aca77c00..28f0ddc9 100644 --- a/clickhouse/block.cpp +++ b/clickhouse/block.cpp @@ -71,6 +71,11 @@ const BlockInfo& Block::Info() const { return info_; } +/// Set block info +void Block::SetInfo(BlockInfo info) { + info_ = std::move(info); +} + /// Count of rows in the block. size_t Block::GetRowCount() const { return rows_; diff --git a/clickhouse/block.h b/clickhouse/block.h index a647f12d..5b8f57da 100644 --- a/clickhouse/block.h +++ b/clickhouse/block.h @@ -73,6 +73,9 @@ class Block { const BlockInfo& Info() const; + /// Set block info + void SetInfo(BlockInfo info); + /// Count of rows in the block. size_t GetRowCount() const; diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 4ef5ca70..46a7235e 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -485,7 +485,7 @@ bool Client::Impl::ReadBlock(InputStream& input, Block* block) { return false; } - // TODO use data + block->SetInfo(std::move(info)); } uint64_t num_columns = 0; diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 5cc1b81a..9e32fd77 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -1010,6 +1010,23 @@ TEST_P(ClientCase, RoundtripArrayTString) { EXPECT_TRUE(CompareRecursive(*array, *result_typed)); } +TEST_P(ClientCase, WriteInfo) { + Block block; + createTableWithOneColumn(block); + + std::optional received_progress; + Query query("INSERT INTO " + table_name + " (*) VALUES (\'Foo\'), (\'Bar\')" ); + query.OnProgress([&](const Progress& progress) { + received_progress = progress; + }); + client_->Execute(query); + + EXPECT_TRUE(received_progress.has_value()); + // server for some reason sent "rows" instead "written_rows" + EXPECT_GT(received_progress->rows + received_progress->written_rows , 0ul); + EXPECT_GT(received_progress->bytes + received_progress->written_bytes, 0ul); +} + const auto LocalHostEndpoint = ClientOptions() .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) From 058fd628e4dee7420d4470103566743f43f803cd Mon Sep 17 00:00:00 2001 From: den818 Date: Mon, 17 Oct 2022 23:39:49 +0400 Subject: [PATCH 28/46] fix test --- ut/client_ut.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 9e32fd77..ae1b28be 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -1010,7 +1010,7 @@ TEST_P(ClientCase, RoundtripArrayTString) { EXPECT_TRUE(CompareRecursive(*array, *result_typed)); } -TEST_P(ClientCase, WriteInfo) { +TEST_P(ClientCase, OnProgress) { Block block; createTableWithOneColumn(block); @@ -1022,9 +1022,8 @@ TEST_P(ClientCase, WriteInfo) { client_->Execute(query); EXPECT_TRUE(received_progress.has_value()); - // server for some reason sent "rows" instead "written_rows" - EXPECT_GT(received_progress->rows + received_progress->written_rows , 0ul); - EXPECT_GT(received_progress->bytes + received_progress->written_bytes, 0ul); + // Unfortunately server has different behavior in different version. + // So checking value of rows, bytes, etc is absolutely useless } const auto LocalHostEndpoint = ClientOptions() From c3c1aab462f525d318ea9c2bc9c33495614fc95e Mon Sep 17 00:00:00 2001 From: Thomas1664 <46387399+Thomas1664@users.noreply.github.com> Date: Mon, 17 Oct 2022 19:53:02 +0000 Subject: [PATCH 29/46] Use `FormatMessageA` --- clickhouse/base/socket.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clickhouse/base/socket.cpp b/clickhouse/base/socket.cpp index f5501d99..e62e90df 100644 --- a/clickhouse/base/socket.cpp +++ b/clickhouse/base/socket.cpp @@ -27,7 +27,7 @@ char const* windowsErrorCategory::name() const noexcept { std::string windowsErrorCategory::message(int c) const { char error[UINT8_MAX]; - auto len = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, static_cast(c), 0, error, sizeof(error), nullptr); + auto len = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, static_cast(c), 0, error, sizeof(error), nullptr); if (len == 0) { return "unknown"; } From 6aa45e68232a01f8a06e4b594aa05ab7dee66ffe Mon Sep 17 00:00:00 2001 From: den818 Date: Tue, 18 Oct 2022 15:40:46 +0400 Subject: [PATCH 30/46] fix tests --- ut/client_ut.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index ae1b28be..7cb57fa5 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -1022,8 +1022,21 @@ TEST_P(ClientCase, OnProgress) { client_->Execute(query); EXPECT_TRUE(received_progress.has_value()); - // Unfortunately server has different behavior in different version. - // So checking value of rows, bytes, etc is absolutely useless + + EXPECT_GE(received_progress->rows, 0u); + EXPECT_LE(received_progress->rows, 2u); + + EXPECT_GE(received_progress->bytes, 0u); + EXPECT_LE(received_progress->bytes, 10000u); + + EXPECT_GE(received_progress->total_rows, 0u); + EXPECT_LE(received_progress->total_rows, 2u); + + EXPECT_GE(received_progress->written_rows, 0u); + EXPECT_LE(received_progress->written_rows, 2u); + + EXPECT_GE(received_progress->written_bytes, 0u); + EXPECT_LE(received_progress->written_bytes, 10000u); } const auto LocalHostEndpoint = ClientOptions() From 85f8c35d4577850b0c0062620055be7f9fb4ce37 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 18 Oct 2022 16:58:58 +0400 Subject: [PATCH 31/46] Minor test fix Test driver wouldn't crash if client hasn't received the progress --- ut/client_ut.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 7cb57fa5..969efb18 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -1021,7 +1021,7 @@ TEST_P(ClientCase, OnProgress) { }); client_->Execute(query); - EXPECT_TRUE(received_progress.has_value()); + ASSERT_TRUE(received_progress.has_value()); EXPECT_GE(received_progress->rows, 0u); EXPECT_LE(received_progress->rows, 2u); From d4c106c4c1f795bac10c0fd820bdf7bf801f73e1 Mon Sep 17 00:00:00 2001 From: den818 Date: Mon, 10 Oct 2022 02:03:36 +0400 Subject: [PATCH 32/46] implementation per query settings --- clickhouse/client.cpp | 26 +++++++++++-------- clickhouse/query.h | 49 ++++++++++++++++++++---------------- ut/client_ut.cpp | 58 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 32 deletions(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 46a7235e..9439b4ef 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -36,8 +36,9 @@ #define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405 #define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410 #define DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO 54420 +#define DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS 54429 -#define REVISION DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO +#define REVISION DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS namespace clickhouse { @@ -131,7 +132,7 @@ class Client::Impl { bool ReceivePacket(uint64_t* server_packet = nullptr); - void SendQuery(const std::string& query, const std::string& query_id); + void SendQuery(const Query& query); void SendData(const Block& block); @@ -230,7 +231,7 @@ void Client::Impl::ExecuteQuery(Query query) { RetryGuard([this]() { Ping(); }); } - SendQuery(query.GetText(), query.GetQueryID()); + SendQuery(query); while (ReceivePacket()) { ; @@ -272,7 +273,8 @@ void Client::Impl::Insert(const std::string& table_name, const std::string& quer } } - SendQuery("INSERT INTO " + table_name + " ( " + fields_section.str() + " ) VALUES", query_id); + Query query("INSERT INTO " + table_name + " ( " + fields_section.str() + " ) VALUES", query_id); + SendQuery(query); uint64_t server_packet; // Receive data packet. @@ -608,9 +610,9 @@ void Client::Impl::SendCancel() { output_->Flush(); } -void Client::Impl::SendQuery(const std::string& query, const std::string& query_id) { +void Client::Impl::SendQuery(const Query& query) { WireFormat::WriteUInt64(*output_, ClientCodes::Query); - WireFormat::WriteString(*output_, query_id); + WireFormat::WriteString(*output_, query.GetQueryID()); /// Client info. if (server_info_.revision >= DBMS_MIN_REVISION_WITH_CLIENT_INFO) { @@ -644,14 +646,18 @@ void Client::Impl::SendQuery(const std::string& query, const std::string& query_ } /// Per query settings. - //if (settings) - // settings->serialize(*out); - //else + assert(server_info_.revision >= DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS); + for(const auto& [name, field] : query.GetQuerySettings()) { + WireFormat::WriteString(*output_, name); + WireFormat::WriteVarint64(*output_, field.flags); + WireFormat::WriteString(*output_, field.value); + } + // Empty string signals end of serialized settings WireFormat::WriteString(*output_, std::string()); WireFormat::WriteUInt64(*output_, Stages::Complete); WireFormat::WriteUInt64(*output_, compression_); - WireFormat::WriteString(*output_, query); + WireFormat::WriteString(*output_, query.GetText()); // Send empty block as marker of // end of data SendData(Block()); diff --git a/clickhouse/query.h b/clickhouse/query.h index d4fbdda5..0fbc0e87 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -7,32 +7,21 @@ #include #include #include +#include namespace clickhouse { -/** - * Settings of individual query. - */ -struct QuerySettings { - /// Maximum thread to use on the server-side to process a query. Default - let the server choose. - int max_threads = 0; - /// Compute min and max values of the result. - bool extremes = false; - /// Silently skip unavailable shards. - bool skip_unavailable_shards = false; - /// Write statistics about read rows, bytes, time elapsed, etc. - bool output_format_write_statistics = true; - /// Use client timezone for interpreting DateTime string values, instead of adopting server timezone. - bool use_client_time_zone = false; - - // connect_timeout - // max_block_size - // distributed_group_by_no_merge = false - // strict_insert_defaults = 0 - // network_compression_method = LZ4 - // priority = 0 +struct QuerySettingsField { + enum Flags : uint64_t + { + IMPORTANT = 0x01, + CUSTOM = 0x02, + }; + std::string value; + uint64_t flags{0}; }; +using QuerySettings = std::unordered_map; struct Profile { uint64_t rows = 0; @@ -94,6 +83,22 @@ class Query : public QueryEvents { return query_id_; } + inline const QuerySettings& GetQuerySettings() const { + return query_settings_; + } + + /// Set per query settings + inline Query& SetQuerySettings(QuerySettings query_settings) { + query_settings_ = std::move(query_settings); + return *this; + } + + /// Set per query setting + inline Query& SetSetting(const std::string& key, const QuerySettingsField& value) { + query_settings_[key] = value; + return *this; + } + /// Set handler for receiving result data. inline Query& OnData(SelectCallback cb) { select_cb_ = std::move(cb); @@ -111,7 +116,6 @@ class Query : public QueryEvents { return *this; } - /// Set handler for receiving a progress of query exceution. inline Query& OnProgress(ProgressCallback cb) { progress_cb_ = std::move(cb); @@ -157,6 +161,7 @@ class Query : public QueryEvents { private: const std::string query_; const std::string query_id_; + QuerySettings query_settings_; ExceptionCallback exception_cb_; ProgressCallback progress_cb_; SelectCallback select_cb_; diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 969efb18..39e94ba8 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -1039,6 +1039,64 @@ TEST_P(ClientCase, OnProgress) { EXPECT_LE(received_progress->written_bytes, 10000u); } +TEST_P(ClientCase, QuerySettings) { + client_->Execute("DROP TEMPORARY TABLE IF EXISTS test_clickhouse_query_settings_table_1;"); + client_->Execute("CREATE TEMPORARY TABLE IF NOT EXISTS test_clickhouse_query_settings_table_1 ( id Int64 )"); + + client_->Execute("DROP TEMPORARY TABLE IF EXISTS test_clickhouse_query_settings_table_2;"); + client_->Execute("CREATE TEMPORARY TABLE IF NOT EXISTS test_clickhouse_query_settings_table_2 ( id Int64, value Int64 )"); + + client_->Execute("INSERT INTO test_clickhouse_query_settings_table_1 (*) VALUES (1)"); + + Query query("SELECT value " + "FROM test_clickhouse_query_settings_table_1 " + "LEFT OUTER JOIN test_clickhouse_query_settings_table_2 " + "ON test_clickhouse_query_settings_table_1.id = test_clickhouse_query_settings_table_2.id"); + + + bool checked = false; + + query.SetSetting("join_use_nulls", {"1"}); + + query.OnData( + [&](const Block& block) { + if (block.GetRowCount() == 0) + return; + ASSERT_EQ(1U, block.GetColumnCount()); + ASSERT_EQ(1U, block.GetRowCount()); + ASSERT_TRUE(block[0]->GetType().IsEqual(Type::CreateNullable(Type::CreateSimple()))); + auto cl = block[0]->As(); + EXPECT_TRUE(cl->IsNull(0)); + checked = true; + }); + client_->Execute(query); + + EXPECT_TRUE(checked); + + query.SetSetting("join_use_nulls", {"0"}); + + query.OnData( + [&](const Block& block) { + if (block.GetRowCount() == 0) + return; + ASSERT_EQ(1U, block.GetColumnCount()); + ASSERT_EQ(1U, block.GetRowCount()); + ASSERT_TRUE(block[0]->GetType().IsEqual(Type::CreateSimple())); + auto cl = block[0]->As(); + EXPECT_EQ(cl->At(0), 0); + checked = true; + } + ); + checked = false; + client_->Execute(query); + + EXPECT_TRUE(checked); + + query.SetSetting("wrong_setting_name", {"0", QuerySettingsField::IMPORTANT}); + + EXPECT_THROW(client_->Execute(query), ServerException); +} + const auto LocalHostEndpoint = ClientOptions() .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) From 83701e00b6698dcb157b46fa8a04f3c26f87e002 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Fri, 21 Oct 2022 14:55:46 +0400 Subject: [PATCH 33/46] Better handling of older CH server Erroring right away if user tries to send query settings to an old server. --- clickhouse/client.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 9439b4ef..d708a87c 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -645,12 +645,17 @@ void Client::Impl::SendQuery(const Query& query) { } } - /// Per query settings. - assert(server_info_.revision >= DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS); - for(const auto& [name, field] : query.GetQuerySettings()) { - WireFormat::WriteString(*output_, name); - WireFormat::WriteVarint64(*output_, field.flags); - WireFormat::WriteString(*output_, field.value); + /// Per query settings + if (server_info_.revision >= DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS) { + for(const auto& [name, field] : query.GetQuerySettings()) { + WireFormat::WriteString(*output_, name); + WireFormat::WriteVarint64(*output_, field.flags); + WireFormat::WriteString(*output_, field.value); + } + } + else if (query.GetQuerySettings().size() > 0) { + // Current implementation works only for server version >= v20.1.2.4-stable, since we do not implement binary settings serialization. + throw UnimplementedError(std::string("Can't send query settings to a server, server version is too old")); } // Empty string signals end of serialized settings WireFormat::WriteString(*output_, std::string()); From a3ccf49f12f7e1386dde6a94b0bb9f970b5f7772 Mon Sep 17 00:00:00 2001 From: den818 Date: Sun, 9 Oct 2022 14:52:40 +0400 Subject: [PATCH 34/46] receiving server logs --- clickhouse/client.cpp | 20 +++++++++++++++++++- clickhouse/query.h | 20 ++++++++++++++++++++ ut/client_ut.cpp | 18 ++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index d708a87c..4ccdc98f 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -443,6 +443,25 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { return false; } + case ServerCodes::Log: { + // log tag + if (!WireFormat::SkipString(*input_)) { + return false; + } + Block block; + + // Use uncompressed stream since log blocks usually contain only one row + if (!ReadBlock(*input_, &block)) { + return false; + } + + if (events_) { + events_->OnServerLog(block); + } + + return true; + } + case ServerCodes::TableColumns: { // external table name if (!WireFormat::SkipString(*input_)) { @@ -453,7 +472,6 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { if (!WireFormat::SkipString(*input_)) { return false; } - return true; } default: diff --git a/clickhouse/query.h b/clickhouse/query.h index 0fbc0e87..b6c00dd4 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -57,6 +57,12 @@ class QueryEvents { virtual void OnProgress(const Progress& progress) = 0; + /** Handle query execution logs provided by server. + * Amount of logs regulated by `send_logs_level` setting. + * By-default only `fatal` log events are sent to the client side. + */ + virtual void OnServerLog(const Block& block) = 0; + virtual void OnFinish() = 0; }; @@ -65,6 +71,7 @@ using ExceptionCallback = std::function; using ProgressCallback = std::function; using SelectCallback = std::function; using SelectCancelableCallback = std::function; +using SelectServerLogCallback = std::function; class Query : public QueryEvents { @@ -122,6 +129,12 @@ class Query : public QueryEvents { return *this; } + /// Set handler for receiving a server log of query exceution. + inline Query& OnServerLog(SelectServerLogCallback cb) { + select_server_log_cb_ = std::move(cb); + return *this; + } + static const std::string default_query_id; private: @@ -155,6 +168,12 @@ class Query : public QueryEvents { } } + void OnServerLog(const Block& block) override { + if (select_server_log_cb_) { + select_server_log_cb_(block); + } + } + void OnFinish() override { } @@ -166,6 +185,7 @@ class Query : public QueryEvents { ProgressCallback progress_cb_; SelectCallback select_cb_; SelectCancelableCallback select_cancelable_cb_; + SelectServerLogCallback select_server_log_cb_; }; } diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 39e94ba8..c94e776d 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -1097,6 +1097,24 @@ TEST_P(ClientCase, QuerySettings) { EXPECT_THROW(client_->Execute(query), ServerException); } +TEST_P(ClientCase, ServerLogs) { + + Block block; + createTableWithOneColumn(block); + + size_t received_row_count = 0; + Query query("INSERT INTO " + table_name + " (*) VALUES (\'Foo\'), (\'Bar\')" ); + query.SetSetting("send_logs_level", {"trace"}); + query.OnServerLog([&](const Block& block) { + received_row_count += block.GetRowCount(); + return true; + }); + client_->Execute(query); + + EXPECT_GT(received_row_count, 0U); +} + + const auto LocalHostEndpoint = ClientOptions() .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) From 69c039d01b41bcbca6eb551fdf8f298af588e195 Mon Sep 17 00:00:00 2001 From: den818 Date: Fri, 21 Oct 2022 23:08:59 +0400 Subject: [PATCH 35/46] fix --- clickhouse/client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 4ccdc98f..e0726184 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -458,7 +458,6 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { if (events_) { events_->OnServerLog(block); } - return true; } @@ -472,6 +471,7 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { if (!WireFormat::SkipString(*input_)) { return false; } + return true; } default: From 004da779fc3332c62f74fa302349d3f5762f1c07 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 24 Oct 2022 19:27:57 +0400 Subject: [PATCH 36/46] Fixed typos in comments --- clickhouse/base/sslsocket.cpp | 6 +++--- clickhouse/client.h | 6 ++---- clickhouse/columns/array.h | 4 ++-- clickhouse/columns/date.h | 4 ++-- clickhouse/columns/itemview.cpp | 2 +- clickhouse/protocol.h | 2 +- clickhouse/query.h | 2 +- ut/Column_ut.cpp | 4 ++-- ut/client_ut.cpp | 4 ++-- ut/columns_ut.cpp | 4 ++-- ut/ssl_ut.cpp | 4 ++-- 11 files changed, 20 insertions(+), 22 deletions(-) diff --git a/clickhouse/base/sslsocket.cpp b/clickhouse/base/sslsocket.cpp index 29efa504..03b064b1 100644 --- a/clickhouse/base/sslsocket.cpp +++ b/clickhouse/base/sslsocket.cpp @@ -78,11 +78,11 @@ void configureSSL(const clickhouse::SSLParams::ConfigurationType & configuration else if (err == 1 && value_present) throw clickhouse::OpenSSLError("Failed to configure OpenSSL: command '" + kv.first + "' needs no value"); else if (err == -2) - throw clickhouse::OpenSSLError("Failed to cofigure OpenSSL: unknown command '" + kv.first + "'"); + throw clickhouse::OpenSSLError("Failed to configure OpenSSL: unknown command '" + kv.first + "'"); else if (err == -3) - throw clickhouse::OpenSSLError("Failed to cofigure OpenSSL: command '" + kv.first + "' requires a value"); + throw clickhouse::OpenSSLError("Failed to configure OpenSSL: command '" + kv.first + "' requires a value"); else - throw clickhouse::OpenSSLError("Failed to cofigure OpenSSL: command '" + kv.first + "' unknown error: " + std::to_string(err)); + throw clickhouse::OpenSSLError("Failed to configure OpenSSL: command '" + kv.first + "' unknown error: " + std::to_string(err)); } } diff --git a/clickhouse/client.h b/clickhouse/client.h index 679dd32c..185315ad 100644 --- a/clickhouse/client.h +++ b/clickhouse/client.h @@ -101,11 +101,9 @@ struct ClientOptions { /** Set max size data to compress if compression enabled. * - * Allows choosing tradeoff betwen RAM\CPU: + * Allows choosing tradeoff between RAM\CPU: * - Lower value reduces RAM usage, but slightly increases CPU usage. * - Higher value increases RAM usage but slightly decreases CPU usage. - * - * Default is 0, use natural implementation-defined chunk size. */ DECLARE_FIELD(max_compression_chunk_size, unsigned int, SetMaxCompressionChunkSize, 65535); @@ -133,7 +131,7 @@ struct ClientOptions { * If no CAs are configured, the server's identity can't be validated, and the Client would err. * See https://www.openssl.org/docs/man1.1.1/man3/SSL_CTX_set_default_verify_paths.html */ - /// Load deafult CA certificates from deafult locations. + /// Load default CA certificates from default locations. DECLARE_FIELD(use_default_ca_locations, bool, SetUseDefaultCALocations, true); /// Path to the CA files to verify server certificate, may be empty. DECLARE_FIELD(path_to_ca_files, std::vector, SetPathToCAFiles, {}); diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index 6144e430..1d3eb192 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -19,7 +19,7 @@ class ColumnArray : public Column { /** Create an array of given type. * - * `data` is used internaly (and modified) by ColumnArray. + * `data` is used internally (and modified) by ColumnArray. * Users are strongly advised against supplying non-empty columns and/or modifying * contents of `data` afterwards. */ @@ -35,7 +35,7 @@ class ColumnArray : public Column { /// Converts input column to array and appends as one row to the current column. void AppendAsColumn(ColumnRef array); - /// Convets array at pos n to column. + /// Converts array at pos n to column. /// Type of element of result column same as type of array element. ColumnRef GetAsColumn(size_t n) const; diff --git a/clickhouse/columns/date.h b/clickhouse/columns/date.h index 3518aa1e..2a240c90 100644 --- a/clickhouse/columns/date.h +++ b/clickhouse/columns/date.h @@ -145,8 +145,8 @@ class ColumnDateTime64 : public Column { /// Appends one element to the end of column. void Append(const Int64& value); - // It is a bit controversal: users might expect it to parse string of ISO8601 or some other human-friendly format, - // but current implemntation parses it as fractional integer with decimal point, e.g. "123.456". + // It is a bit controversial: users might expect it to parse string of ISO8601 or some other human-friendly format, + // but current implementation parses it as fractional integer with decimal point, e.g. "123.456". // void Append(const std::string& value); /// Returns element at given row number. diff --git a/clickhouse/columns/itemview.cpp b/clickhouse/columns/itemview.cpp index 3a186531..a2cb69c2 100644 --- a/clickhouse/columns/itemview.cpp +++ b/clickhouse/columns/itemview.cpp @@ -90,7 +90,7 @@ void ItemView::ValidateData(Type::Code type, DataType data) { return AssertSize({4, 8, 16}); default: - throw UnimplementedError("Unknon type code:" + std::to_string(static_cast(type))); + throw UnimplementedError("Unknown type code:" + std::to_string(static_cast(type))); } } diff --git a/clickhouse/protocol.h b/clickhouse/protocol.h index 47a76534..bd0ced62 100644 --- a/clickhouse/protocol.h +++ b/clickhouse/protocol.h @@ -7,7 +7,7 @@ namespace clickhouse { enum { Hello = 0, /// Name, version, revision. Data = 1, /// `Block` of data, may be compressed. - Exception = 2, /// Exception that occured on server side during query execution. + Exception = 2, /// Exception that occurred on server side during query execution. Progress = 3, /// Query execcution progress: rows and bytes read. Pong = 4, /// response to Ping sent by client. EndOfStream = 5, /// All packets were sent. diff --git a/clickhouse/query.h b/clickhouse/query.h index 0fbc0e87..0a6e74dd 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -116,7 +116,7 @@ class Query : public QueryEvents { return *this; } - /// Set handler for receiving a progress of query exceution. + /// Set handler for receiving a progress of query execution. inline Query& OnProgress(ProgressCallback cb) { progress_cb_ = std::move(cb); return *this; diff --git a/ut/Column_ut.cpp b/ut/Column_ut.cpp index de8a21ac..51cd4980 100644 --- a/ut/Column_ut.cpp +++ b/ut/Column_ut.cpp @@ -283,14 +283,14 @@ TYPED_TEST(GenericColumnTest, RoundTrip) { // Date32 first appeared in v21.9.2.17-stable const auto server_info = client.GetServerInfo(); if (versionNumber(server_info) < versionNumber(21, 9)) { - GTEST_SKIP() << "Date32 is availble since v21.9.2.17-stable and can't be tested against server: " << server_info; + GTEST_SKIP() << "Date32 is available since v21.9.2.17-stable and can't be tested against server: " << server_info; } } if constexpr (std::is_same_v) { const auto server_info = client.GetServerInfo(); if (versionNumber(server_info) < versionNumber(21, 7)) { - GTEST_SKIP() << "ColumnInt128 is availble since v21.7.2.7-stable and can't be tested against server: " << server_info; + GTEST_SKIP() << "ColumnInt128 is available since v21.7.2.7-stable and can't be tested against server: " << server_info; } } diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 39e94ba8..403e00be 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -428,7 +428,7 @@ TEST_P(ClientCase, Cancellable) { "CREATE TEMPORARY TABLE IF NOT EXISTS test_clickhouse_cpp_cancel (x UInt64) "); /// Insert a few blocks. In order to make cancel have effect, we have to - /// insert a relative larget amount of data. + /// insert a relative larger amount of data. const int kBlock = 10; const int kRowEachBlock = 1000000; for (unsigned j = 0; j < kBlock; j++) { @@ -856,7 +856,7 @@ TEST_P(ClientCase, Query_ID) { // DB::Exception: clickhouse_cpp_cicd: Not enough privileges. To execute this query it's necessary to have grant SYSTEM FLUSH LOGS ON if (std::string(e.what()).find("To execute this query it's necessary to have grant SYSTEM FLUSH LOGS ON") != std::string::npos) { // Insufficient privileges, the only safe way is to wait long enough for system - // to flush the logs automaticaly. Usualy it takes 7.5 seconds, so just in case, + // to flush the logs automatically. Usually it takes 7.5 seconds, so just in case, // wait 3 times that to ensure that all previously executed queries are in the logs now. const auto wait_duration = std::chrono::seconds(23); std::cerr << "Got error while flushing logs, now we wait " << wait_duration << "..." << std::endl; diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 3d045ba2..a7f528e4 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -626,7 +626,7 @@ TEST(ColumnsCase, ColumnDecimal128_from_string_overflow) { EXPECT_ANY_THROW(col->Append("400000000000000000000000000000000000000")); #ifndef ABSL_HAVE_INTRINSIC_INT128 - // unfortunatelly std::numeric_limits::min() overflows when there is no __int128 intrinsic type. + // unfortunately std::numeric_limits::min() overflows when there is no __int128 intrinsic type. EXPECT_ANY_THROW(col->Append("-170141183460469231731687303715884105728")); #endif } @@ -682,7 +682,7 @@ TEST(ColumnsCase, ColumnLowCardinalityString_Load) { } } -// This is temporary diabled since we are not 100% compatitable with ClickHouse +// This is temporary disabled since we are not 100% compatitable with ClickHouse // on how we serailize LC columns, but we check interoperability in other tests (see client_ut.cpp) TEST(ColumnsCase, DISABLED_ColumnLowCardinalityString_Save) { const size_t items_count = 10; diff --git a/ut/ssl_ut.cpp b/ut/ssl_ut.cpp index 2539ac93..f68db08c 100644 --- a/ut/ssl_ut.cpp +++ b/ut/ssl_ut.cpp @@ -75,7 +75,7 @@ INSTANTIATE_TEST_SUITE_P( } )); -// For some reasons doen't work on MacOS. +// For some reasons doesn't work on MacOS. // Looks like `VerifyCAPath` has no effect, while parsing and setting value works. // Also for some reason SetPathToCADirectory() + SSL_CTX_load_verify_locations() works. #if !defined(__APPLE__) @@ -141,7 +141,7 @@ INSTANTIATE_TEST_SUITE_P( ClientOptions(ClickHouseExplorerConfig) .SetSSLOptions(ClientOptions::SSLOptions() .SetUseDefaultCALocations(false) - .SetSkipVerification(true)), // No CA loaded, but verfication is skipped + .SetSkipVerification(true)), // No CA loaded, but verification is skipped {"SELECT 1;"} } )); From 43f8dc4c6a685668590037ed4299001b794c1966 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 24 Oct 2022 19:40:20 +0400 Subject: [PATCH 37/46] Marked certain APIs deprecated --- CMakeLists.txt | 3 ++- clickhouse/base/string_utils.h | 2 ++ clickhouse/client.h | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 31cd8a5a..8bbc0103 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,8 @@ PROJECT (CLICKHOUSE-CLIENT) ENDIF () SET (CMAKE_EXE_LINKER_FLAGS, "${CMAKE_EXE_LINKER_FLAGS} -lpthread") # -Wpedantic makes int128 support somewhat harder and less performant (by not allowing builtin __int128) - SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") + # -Wno-deprecated-declarations to produce less cluttered output when building library itself (`deprecated` attributes are for library users) + SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror -Wno-deprecated-declarations") ENDIF () INCLUDE_DIRECTORIES (.) diff --git a/clickhouse/base/string_utils.h b/clickhouse/base/string_utils.h index f2e66ba7..4d19485d 100644 --- a/clickhouse/base/string_utils.h +++ b/clickhouse/base/string_utils.h @@ -8,6 +8,7 @@ namespace clickhouse { template +[[deprecated("Not used by clickhosue-cpp itself, and will be removed in next major release (3.0) ")]] inline T FromString(const std::string& s) { std::istringstream iss(s); T result; @@ -16,6 +17,7 @@ inline T FromString(const std::string& s) { } template +[[deprecated("Not used by clickhosue-cpp itself, and will be removed in next major release (3.0) ")]] inline T FromString(const StringView& s) { std::istringstream iss((std::string(s))); T result; diff --git a/clickhouse/client.h b/clickhouse/client.h index 185315ad..452226d9 100644 --- a/clickhouse/client.h +++ b/clickhouse/client.h @@ -90,13 +90,13 @@ struct ClientOptions { DECLARE_FIELD(connection_recv_timeout, std::chrono::seconds, SetConnectionRecvTimeout, std::chrono::seconds(0)); DECLARE_FIELD(connection_send_timeout, std::chrono::seconds, SetConnectionSendTimeout, std::chrono::seconds(0)); - // TODO deprecate setting /** It helps to ease migration of the old codebases, which can't afford to switch * to using ColumnLowCardinalityT or ColumnLowCardinality directly, * but still want to benefit from smaller on-wire LowCardinality bandwidth footprint. * * @see LowCardinalitySerializationAdaptor, CreateColumnByType */ + [[deprecated("Makes implementation of LC(X) harder and code uglier. Will be removed in next major release (3.0) ")]] DECLARE_FIELD(backward_compatibility_lowcardinality_as_wrapped_column, bool, SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn, true); /** Set max size data to compress if compression enabled. From b9abe43a6f233861b89bf5029c6a8229255bca93 Mon Sep 17 00:00:00 2001 From: den818 Date: Sun, 16 Oct 2022 23:03:21 +0400 Subject: [PATCH 38/46] support open telemetry --- clickhouse/CMakeLists.txt | 2 + clickhouse/base/open_telemetry.h | 23 ++++++++++++ clickhouse/base/uuid.h | 12 ++++++ clickhouse/client.cpp | 29 ++++++++++++++- clickhouse/columns/uuid.cpp | 11 +++--- clickhouse/columns/uuid.h | 10 ++--- clickhouse/query.h | 14 +++++++ clickhouse/types/type_parser.cpp | 1 - ut/client_ut.cpp | 63 +++++++++++++++++++++++++------- ut/columns_ut.cpp | 9 ++--- ut/utils.cpp | 25 ++++++++++++- ut/utils.h | 3 ++ ut/utils_ut.cpp | 6 +++ ut/value_generators.cpp | 10 ++--- ut/value_generators.h | 2 +- 15 files changed, 182 insertions(+), 38 deletions(-) create mode 100644 clickhouse/base/open_telemetry.h create mode 100644 clickhouse/base/uuid.h diff --git a/clickhouse/CMakeLists.txt b/clickhouse/CMakeLists.txt index d96ff88a..2813445b 100644 --- a/clickhouse/CMakeLists.txt +++ b/clickhouse/CMakeLists.txt @@ -92,12 +92,14 @@ INSTALL(FILES query.h DESTINATION include/clickhouse/) INSTALL(FILES base/buffer.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/compressed.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/input.h DESTINATION include/clickhouse/base/) +INSTALL(FILES base/open_telemetry.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/output.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/platform.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/singleton.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/socket.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/string_utils.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/string_view.h DESTINATION include/clickhouse/base/) +INSTALL(FILES base/uuid.h DESTINATION include/clickhouse/base/) INSTALL(FILES base/wire_format.h DESTINATION include/clickhouse/base/) # columns diff --git a/clickhouse/base/open_telemetry.h b/clickhouse/base/open_telemetry.h new file mode 100644 index 00000000..34f33113 --- /dev/null +++ b/clickhouse/base/open_telemetry.h @@ -0,0 +1,23 @@ +#pragma once + +#include "uuid.h" + +#include + +namespace clickhouse::open_telemetry { + +/// See https://www.w3.org/TR/trace-context/ for trace_flags definition +enum TraceFlags : uint8_t { + TRACE_FLAG_NONE = 0, + TRACE_FLAG_SAMPLED = 1, +}; + +/// The runtime info we need to create new OpenTelemetry spans. +struct TracingContext { + UUID trace_id{}; + uint64_t span_id = 0; + std::string tracestate; + uint8_t trace_flags = TRACE_FLAG_NONE; +}; + +} // namespace clickhouse::open_telemetry diff --git a/clickhouse/base/uuid.h b/clickhouse/base/uuid.h new file mode 100644 index 00000000..d78a1866 --- /dev/null +++ b/clickhouse/base/uuid.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +namespace clickhouse { + +using UInt128 = std::pair; + +using UUID = UInt128; + +} diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index e0726184..df013a0a 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -37,8 +37,10 @@ #define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410 #define DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO 54420 #define DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS 54429 +#define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441 +#define DBMS_MIN_REVISION_WITH_OPENTELEMETRY 54442 -#define REVISION DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS +#define REVISION DBMS_MIN_REVISION_WITH_OPENTELEMETRY namespace clickhouse { @@ -661,6 +663,27 @@ void Client::Impl::SendQuery(const Query& query) { if (server_info_.revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) { WireFormat::WriteUInt64(*output_, info.client_version_patch); } + + if (server_info_.revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) { + if (const auto& tracing_context = query.GetTracingContext()) { + // Have OpenTelemetry header. + WireFormat::WriteFixed(*output_, uint8_t(1)); + // No point writing these numbers with variable length, because they + // are random and will probably require the full length anyway. + WireFormat::WriteFixed(*output_, tracing_context->trace_id); + WireFormat::WriteFixed(*output_, tracing_context->span_id); + WireFormat::WriteString(*output_, tracing_context->tracestate); + WireFormat::WriteFixed(*output_, tracing_context->trace_flags); + } else { + // Don't have OpenTelemetry header. + WireFormat::WriteFixed(*output_, uint8_t(0)); + } + } else { + if (query.GetTracingContext()) { + // Current implementation works only for server version >= v20.11.2.1-stable + throw UnimplementedError(std::string("Can't send open telemetry tracing context to a server, server version is too old")); + } + } } /// Per query settings @@ -678,6 +701,10 @@ void Client::Impl::SendQuery(const Query& query) { // Empty string signals end of serialized settings WireFormat::WriteString(*output_, std::string()); + if (server_info_.revision >= DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET) { + WireFormat::WriteString(*output_, ""); + } + WireFormat::WriteUInt64(*output_, Stages::Complete); WireFormat::WriteUInt64(*output_, compression_); WireFormat::WriteString(*output_, query.GetText()); diff --git a/clickhouse/columns/uuid.cpp b/clickhouse/columns/uuid.cpp index 8e89f7af..19e94761 100644 --- a/clickhouse/columns/uuid.cpp +++ b/clickhouse/columns/uuid.cpp @@ -21,7 +21,7 @@ ColumnUUID::ColumnUUID(ColumnRef data) } } -void ColumnUUID::Append(const UInt128& value) { +void ColumnUUID::Append(const UUID& value) { data_->Append(value.first); data_->Append(value.second); } @@ -30,12 +30,12 @@ void ColumnUUID::Clear() { data_->Clear(); } -const UInt128 ColumnUUID::At(size_t n) const { - return UInt128(data_->At(n * 2), data_->At(n * 2 + 1)); +const UUID ColumnUUID::At(size_t n) const { + return UUID(data_->At(n * 2), data_->At(n * 2 + 1)); } -const UInt128 ColumnUUID::operator [] (size_t n) const { - return UInt128((*data_)[n * 2], (*data_)[n * 2 + 1]); +const UUID ColumnUUID::operator [] (size_t n) const { + return UUID((*data_)[n * 2], (*data_)[n * 2 + 1]); } void ColumnUUID::Append(ColumnRef column) { @@ -78,4 +78,3 @@ ItemView ColumnUUID::GetItem(size_t index) const { } } - diff --git a/clickhouse/columns/uuid.h b/clickhouse/columns/uuid.h index 2b7b58de..dd7d0b9d 100644 --- a/clickhouse/columns/uuid.h +++ b/clickhouse/columns/uuid.h @@ -1,11 +1,11 @@ #pragma once +#include "../base/uuid.h" #include "column.h" #include "numeric.h" namespace clickhouse { -using UInt128 = std::pair; /** * Represents a UUID column. @@ -17,13 +17,13 @@ class ColumnUUID : public Column { explicit ColumnUUID(ColumnRef data); /// Appends one element to the end of column. - void Append(const UInt128& value); + void Append(const UUID& value); /// Returns element at given row number. - const UInt128 At(size_t n) const; + const UUID At(size_t n) const; /// Returns element at given row number. - const UInt128 operator [] (size_t n) const; + const UUID operator [] (size_t n) const; public: /// Appends content of given column to the end of current one. @@ -34,7 +34,7 @@ class ColumnUUID : public Column { /// Saves column data to output stream. void SaveBody(OutputStream* output) override; - + /// Clear column data . void Clear() override; diff --git a/clickhouse/query.h b/clickhouse/query.h index 33b4603b..4b728e36 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -3,9 +3,12 @@ #include "block.h" #include "server_exception.h" +#include "base/open_telemetry.h" + #include #include #include +#include #include #include @@ -106,6 +109,16 @@ class Query : public QueryEvents { return *this; } + inline const std::optional& GetTracingContext() const { + return tracing_context_; + } + + /// Set tracing context for open telemetry signals + inline Query& SetTracingContext(open_telemetry::TracingContext tracing_context) { + tracing_context_ = std::move(tracing_context); + return *this; + } + /// Set handler for receiving result data. inline Query& OnData(SelectCallback cb) { select_cb_ = std::move(cb); @@ -180,6 +193,7 @@ class Query : public QueryEvents { private: const std::string query_; const std::string query_id_; + std::optional tracing_context_; QuerySettings query_settings_; ExceptionCallback exception_cb_; ProgressCallback progress_cb_; diff --git a/clickhouse/types/type_parser.cpp b/clickhouse/types/type_parser.cpp index 36bd9271..37a049a0 100644 --- a/clickhouse/types/type_parser.cpp +++ b/clickhouse/types/type_parser.cpp @@ -1,5 +1,4 @@ #include "type_parser.h" -#include "../base/string_utils.h" #include #include diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index e28772f2..ff102e1a 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -43,6 +43,30 @@ class ClientCase : public testing::TestWithParam { return "SELECT " + column_name + " FROM " + table_name; } + void FlushLogs() { + try { + client_->Execute("SYSTEM FLUSH LOGS"); + } catch (const std::exception & e) { + std::cerr << "Got error while flushing logs: " << e.what() << std::endl; + const auto wait_for_flush = []() { + // Insufficient privileges, the only safe way is to wait long enough for system + // to flush the logs automaticaly. Usually it takes 7.5 seconds, so just in case, + // wait 3 times that to ensure that all previously executed queries are in the logs now. + const auto wait_duration = std::chrono::seconds(23); + std::cerr << "Now we wait " << wait_duration << "..." << std::endl; + std::this_thread::sleep_for(wait_duration); + }; + // DB::Exception: clickhouse_cpp_cicd: Not enough privileges. To execute this query it's necessary to have grant SYSTEM FLUSH LOGS ON + if (std::string(e.what()).find("To execute this query it's necessary to have grant SYSTEM FLUSH LOGS ON") != std::string::npos) { + wait_for_flush(); + } + // DB::Exception: clickhouse_cpp_cicd: Cannot execute query in readonly mode + if (std::string(e.what()).find("Cannot execute query in readonly mode") != std::string::npos) { + wait_for_flush(); + } + } + } + std::unique_ptr client_; const std::string table_name = "test_clickhouse_cpp_test_ut_table"; const std::string column_name = "test_column"; @@ -850,19 +874,7 @@ TEST_P(ClientCase, Query_ID) { client_->SelectCancelable("SELECT 'b', count(*) FROM " + table_name, query_id, [](const Block &) { return true; }); client_->Execute(Query("TRUNCATE TABLE " + table_name, query_id)); - try { - client_->Execute("SYSTEM FLUSH LOGS"); - } catch (const std::exception & e) { - // DB::Exception: clickhouse_cpp_cicd: Not enough privileges. To execute this query it's necessary to have grant SYSTEM FLUSH LOGS ON - if (std::string(e.what()).find("To execute this query it's necessary to have grant SYSTEM FLUSH LOGS ON") != std::string::npos) { - // Insufficient privileges, the only safe way is to wait long enough for system - // to flush the logs automatically. Usually it takes 7.5 seconds, so just in case, - // wait 3 times that to ensure that all previously executed queries are in the logs now. - const auto wait_duration = std::chrono::seconds(23); - std::cerr << "Got error while flushing logs, now we wait " << wait_duration << "..." << std::endl; - std::this_thread::sleep_for(wait_duration); - } - } + FlushLogs(); size_t total_count = 0; client_->Select("SELECT type, query_kind, query_id, query " @@ -1115,6 +1127,31 @@ TEST_P(ClientCase, ServerLogs) { } +TEST_P(ClientCase, TracingContext) { + Block block; + createTableWithOneColumn(block); + + Query query("INSERT INTO " + table_name + " (*) VALUES (\'Foo\'), (\'Bar\')" ); + open_telemetry::TracingContext tracing_context; + std::srand(std::time(0)); + tracing_context.trace_id = {std::rand(), std::rand()}; + query.SetTracingContext(tracing_context); + client_->Execute(query); + + FlushLogs(); + + size_t received_rows = 0; + client_->Select("SELECT trace_id, toString(trace_id), operation_name " + "FROM system.opentelemetry_span_log " + "WHERE trace_id = toUUID(\'" + ToString(tracing_context.trace_id) + "\');", + [&](const Block& block) { + // std::cerr << PrettyPrintBlock{block} << std::endl; + received_rows += block.GetRowCount(); + }); + + EXPECT_GT(received_rows, 0u); +} + const auto LocalHostEndpoint = ClientOptions() .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index a7f528e4..604c94a4 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -377,8 +377,8 @@ TEST(ColumnsCase, UUIDInit) { auto col = std::make_shared(std::make_shared(MakeUUID_data())); ASSERT_EQ(col->Size(), 3u); - ASSERT_EQ(col->At(0), UInt128(0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu)); - ASSERT_EQ(col->At(2), UInt128(0x3507213c178649f9llu, 0x9faf035d662f60aellu)); + ASSERT_EQ(col->At(0), UUID(0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu)); + ASSERT_EQ(col->At(2), UUID(0x3507213c178649f9llu, 0x9faf035d662f60aellu)); } TEST(ColumnsCase, UUIDSlice) { @@ -386,8 +386,8 @@ TEST(ColumnsCase, UUIDSlice) { auto sub = col->Slice(1, 2)->As(); ASSERT_EQ(sub->Size(), 2u); - ASSERT_EQ(sub->At(0), UInt128(0x84b9f24bc26b49c6llu, 0xa03b4ab723341951llu)); - ASSERT_EQ(sub->At(1), UInt128(0x3507213c178649f9llu, 0x9faf035d662f60aellu)); + ASSERT_EQ(sub->At(0), UUID(0x84b9f24bc26b49c6llu, 0xa03b4ab723341951llu)); + ASSERT_EQ(sub->At(1), UUID(0x3507213c178649f9llu, 0x9faf035d662f60aellu)); } TEST(ColumnsCase, Int128) { @@ -787,4 +787,3 @@ TEST(ColumnsCase, ColumnLowCardinalityString_WithEmptyString_3) { EXPECT_EQ(values[i], col.At(i)) << " at pos: " << i; } } - diff --git a/ut/utils.cpp b/ut/utils.cpp index 07ae5174..e624f45c 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -12,9 +12,11 @@ #include #include #include +#include #include // for ipv4-ipv6 platform-specific stuff +#include #include #include @@ -114,6 +116,15 @@ bool doPrintValue(const ColumnRef & c, const size_t row, std: return false; } +template <> +bool doPrintValue(const ColumnRef & c, const size_t row, std::ostream & ostr) { + if (const auto & uuid_col = c->As()) { + ostr << ToString(uuid_col->At(row)); + return true; + } + return false; +} + std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostream & ostr) { const auto r = false @@ -138,7 +149,8 @@ std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostre || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) - || doPrintValue(c, row, ostr); + || doPrintValue(c, row, ostr) + || doPrintValue(c, row, ostr); if (!r) ostr << "Unable to print value of type " << c->GetType().GetName(); @@ -271,3 +283,14 @@ std::ostream & operator<<(std::ostream & ostr, const ServerInfo & server_info) { uint64_t versionNumber(const ServerInfo & server_info) { return versionNumber(server_info.version_major, server_info.version_minor, server_info.version_patch, server_info.revision); } + +std::string ToString(const clickhouse::UUID& v) { + std::string result(36, 0); + // ffff ff ff ss ssssss + const int count = std::snprintf(result.data(), result.size() + 1, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.12" PRIx64, + v.first >> 32, (v.first >> 16) & 0xffff, v.first & 0xffff, v.second >> 48, v.second & 0xffffffffffff); + if (count != 36) { + throw std::runtime_error("Error while converting UUID to string"); + } + return result; +} diff --git a/ut/utils.h b/ut/utils.h index f0a6194f..621119fb 100644 --- a/ut/utils.h +++ b/ut/utils.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "utils_meta.h" #include "utils_comparison.h" @@ -157,3 +158,5 @@ inline uint64_t versionNumber( } uint64_t versionNumber(const clickhouse::ServerInfo & server_info); + +std::string ToString(const clickhouse::UUID& v); diff --git a/ut/utils_ut.cpp b/ut/utils_ut.cpp index a600ada0..0f67c043 100644 --- a/ut/utils_ut.cpp +++ b/ut/utils_ut.cpp @@ -33,3 +33,9 @@ TEST(TestCompareContainer, CompareNested) { EXPECT_FALSE(CompareRecursive(std::vector>{{1, 2, 3}, {4, 5, 6}}, std::vector>{{1, 2, 3}, {}})); EXPECT_FALSE(CompareRecursive(std::vector>{{1, 2, 3}, {4, 5, 6}}, std::vector>{{}})); } + +TEST(StringUtils, UUID) { + const clickhouse::UUID& uuid{0x0102030405060708, 0x090a0b0c0d0e0f10}; + const std::string uuid_string = "01020304-0506-0708-090a-0b0c0d0e0f10"; + EXPECT_EQ(ToString(uuid), uuid_string); +} diff --git a/ut/value_generators.cpp b/ut/value_generators.cpp index 43b9dff5..41e36a61 100644 --- a/ut/value_generators.cpp +++ b/ut/value_generators.cpp @@ -29,12 +29,12 @@ std::vector MakeStrings() { return {"a", "ab", "abc", "abcd"}; } -std::vector MakeUUIDs() { +std::vector MakeUUIDs() { return { - UInt128(0llu, 0llu), - UInt128(0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu), - UInt128(0x84b9f24bc26b49c6llu, 0xa03b4ab723341951llu), - UInt128(0x3507213c178649f9llu, 0x9faf035d662f60aellu) + UUID(0llu, 0llu), + UUID(0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu), + UUID(0x84b9f24bc26b49c6llu, 0xa03b4ab723341951llu), + UUID(0x3507213c178649f9llu, 0x9faf035d662f60aellu) }; } diff --git a/ut/value_generators.h b/ut/value_generators.h index a3004102..3632ca9e 100644 --- a/ut/value_generators.h +++ b/ut/value_generators.h @@ -38,7 +38,7 @@ std::vector MakeDates32(); std::vector MakeDateTimes(); std::vector MakeIPv4s(); std::vector MakeIPv6s(); -std::vector MakeUUIDs(); +std::vector MakeUUIDs(); std::vector MakeInt128s(); std::vector MakeDecimals(size_t precision, size_t scale); From cac2a03ef9cdbc3d9b5275919d2aba5bd728ef17 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 1 Nov 2022 03:47:31 +0400 Subject: [PATCH 39/46] CH server 22.3 for linux workflow (#240) * Using docker image clickhouse-server:22.3 --- .github/workflows/linux.yml | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index e64b8406..caeb4497 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -8,7 +8,8 @@ on: env: BUILD_TYPE: Release - CH_SERVER_VERSION: 21.3.17.2 + CLICKHOUSE_SERVER_IMAGE: "clickhouse/clickhouse-server:22.3" + jobs: build: runs-on: ubuntu-latest @@ -48,7 +49,16 @@ jobs: - uses: actions/checkout@v2 - name: Install dependencies - run: sudo apt-get install -y cmake ${{ matrix.INSTALL }} ${{ matrix.INSTALL_SSL }} + run: sudo apt-get install -y docker cmake ${{ matrix.INSTALL }} ${{ matrix.INSTALL_SSL }} + + - name: Install dependencies - Docker + run: | + sudo apt remove -y docker docker-engine docker.io containerd runc + sudo apt install -y apt-transport-https ca-certificates curl gnupg lsb-release + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt update -q + sudo apt install docker-ce docker-ce-cli containerd.io - name: Configure CMake run: | @@ -62,17 +72,14 @@ jobs: - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --target all - - name: Start ClickHouse server + - name: Test - Start ClickHouse server in background run: | - sudo apt-get install apt-transport-https ca-certificates dirmngr - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 - echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee /etc/apt/sources.list.d/clickhouse.list - sudo apt-get update - sudo apt-get install -y \ - clickhouse-server=${{env.CH_SERVER_VERSION}} \ - clickhouse-client=${{env.CH_SERVER_VERSION}} \ - clickhouse-common-static=${{env.CH_SERVER_VERSION}} - sudo service clickhouse-server start + docker pull ${CLICKHOUSE_SERVER_IMAGE} + docker run -d --name clickhouse -p 9000:9000 ${CLICKHOUSE_SERVER_IMAGE} + docker ps -a + docker stats -a --no-stream + ## Check and wait until CH is ready to accept connections + docker exec clickhouse bash -c 'for i in {1..10}; do echo checking if clickhouse server is started attempt \#$i; if ( grep -q " Application: Ready for connections." /var/log/clickhouse-server/clickhouse-server.log ); then echo seems like clickhouse server is started; exit 0; fi; sleep 1; done; exit -1' - name: Test working-directory: ${{github.workspace}}/build/ut From 333a1ebdd578d61b0af1ce9d4c55baf77c452480 Mon Sep 17 00:00:00 2001 From: den818 Date: Thu, 27 Oct 2022 23:23:07 +0400 Subject: [PATCH 40/46] Profile events for query --- clickhouse/client.cpp | 26 +++++++++++++++++++++++++- clickhouse/protocol.h | 5 +++++ clickhouse/query.h | 17 +++++++++++++++++ ut/client_ut.cpp | 21 ++++++++++++++++++++- 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index df013a0a..e4b0c7ef 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -39,8 +39,11 @@ #define DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS 54429 #define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441 #define DBMS_MIN_REVISION_WITH_OPENTELEMETRY 54442 +#define DBMS_MIN_REVISION_WITH_DISTRIBUTED_DEPTH 54448 +#define DBMS_MIN_REVISION_WITH_INITIAL_QUERY_START_TIME 54449 +#define DBMS_MIN_REVISION_WITH_INCREMENTAL_PROFILE_EVENTS 54451 -#define REVISION DBMS_MIN_REVISION_WITH_OPENTELEMETRY +#define REVISION DBMS_MIN_REVISION_WITH_INCREMENTAL_PROFILE_EVENTS namespace clickhouse { @@ -476,6 +479,22 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { return true; } + case ServerCodes::ProfileEvents: { + if (!WireFormat::SkipString(*input_)) { + return false; + } + + Block block; + if (!ReadBlock(*input_, &block)) { + return false; + } + + if (events_) { + events_->OnProfileEvents(block); + } + return true; + } + default: throw UnimplementedError("unimplemented " + std::to_string((int)packet_type)); break; @@ -649,6 +668,9 @@ void Client::Impl::SendQuery(const Query& query) { WireFormat::WriteString(*output_, info.initial_user); WireFormat::WriteString(*output_, info.initial_query_id); WireFormat::WriteString(*output_, info.initial_address); + if (server_info_.revision >= DBMS_MIN_REVISION_WITH_INITIAL_QUERY_START_TIME) { + WireFormat::WriteFixed(*output_, 0); + } WireFormat::WriteFixed(*output_, info.iface_type); WireFormat::WriteString(*output_, info.os_user); @@ -660,6 +682,8 @@ void Client::Impl::SendQuery(const Query& query) { if (server_info_.revision >= DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO) WireFormat::WriteString(*output_, info.quota_key); + if (server_info_.revision >= DBMS_MIN_REVISION_WITH_DISTRIBUTED_DEPTH) + WireFormat::WriteUInt64(*output_, 0u); if (server_info_.revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) { WireFormat::WriteUInt64(*output_, info.client_version_patch); } diff --git a/clickhouse/protocol.h b/clickhouse/protocol.h index bd0ced62..8d361936 100644 --- a/clickhouse/protocol.h +++ b/clickhouse/protocol.h @@ -17,6 +17,11 @@ namespace clickhouse { TablesStatusResponse = 9, /// Response to TableStatus. Log = 10, /// Query execution log. TableColumns = 11, /// Columns' description for default values calculation + PartUUIDs = 12, /// List of unique parts ids. + ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed + /// This is such an inverted logic, where server sends requests + /// And client returns back response + ProfileEvents = 14, /// Packet with profile events from server. }; } diff --git a/clickhouse/query.h b/clickhouse/query.h index 4b728e36..21d7231f 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -66,6 +66,9 @@ class QueryEvents { */ virtual void OnServerLog(const Block& block) = 0; + /// Handle query execution profile events. + virtual void OnProfileEvents(const Block& block) = 0; + virtual void OnFinish() = 0; }; @@ -75,6 +78,7 @@ using ProgressCallback = std::function; using SelectCallback = std::function; using SelectCancelableCallback = std::function; using SelectServerLogCallback = std::function; +using ProfileEventsCallback = std::function; class Query : public QueryEvents { @@ -148,6 +152,12 @@ class Query : public QueryEvents { return *this; } + /// Set handler for receiving profile events. + inline Query& OnProfileEvents(ProfileEventsCallback cb) { + profile_events_callback_cb_ = std::move(cb); + return *this; + } + static const std::string default_query_id; private: @@ -187,6 +197,12 @@ class Query : public QueryEvents { } } + void OnProfileEvents(const Block& block) override { + if (profile_events_callback_cb_) { + profile_events_callback_cb_(block); + } + } + void OnFinish() override { } @@ -200,6 +216,7 @@ class Query : public QueryEvents { SelectCallback select_cb_; SelectCancelableCallback select_cancelable_cb_; SelectServerLogCallback select_server_log_cb_; + ProfileEventsCallback profile_events_callback_cb_; }; } diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index ff102e1a..6a0af56b 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -1126,7 +1126,6 @@ TEST_P(ClientCase, ServerLogs) { EXPECT_GT(received_row_count, 0U); } - TEST_P(ClientCase, TracingContext) { Block block; createTableWithOneColumn(block); @@ -1152,6 +1151,26 @@ TEST_P(ClientCase, TracingContext) { EXPECT_GT(received_rows, 0u); } +TEST_P(ClientCase, OnProfileEvents) { + Block block; + createTableWithOneColumn(block); + + client_->Execute("INSERT INTO " + table_name + " (*) VALUES (\'Foo\'), (\'Bar\')"); + size_t received_row_count = 0; + Query query("SELECT * FROM " + table_name); + + query.OnProfileEvents([&](const Block& block) { + received_row_count += block.GetRowCount(); + return true; + }); + client_->Execute(query); + + const int DBMS_MIN_REVISION_WITH_INCREMENTAL_PROFILE_EVENTS = 54451; + if (client_->GetServerInfo().revision >= DBMS_MIN_REVISION_WITH_INCREMENTAL_PROFILE_EVENTS) { + EXPECT_GT(received_row_count, 0U); + } +} + const auto LocalHostEndpoint = ClientOptions() .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) From 28e199dda95ffff9d72a9ad8b4f90af352f40924 Mon Sep 17 00:00:00 2001 From: Yuheng Zou Date: Tue, 15 Nov 2022 10:49:16 +0800 Subject: [PATCH 41/46] Update CMake include directories --- CMakeLists.txt | 3 --- clickhouse/CMakeLists.txt | 6 ++++++ contrib/absl/CMakeLists.txt | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bbc0103..c7635c79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,9 +27,6 @@ PROJECT (CLICKHOUSE-CLIENT) SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror -Wno-deprecated-declarations") ENDIF () - INCLUDE_DIRECTORIES (.) - INCLUDE_DIRECTORIES (contrib) - SUBDIRS ( clickhouse contrib/absl diff --git a/clickhouse/CMakeLists.txt b/clickhouse/CMakeLists.txt index 2813445b..db1c8692 100644 --- a/clickhouse/CMakeLists.txt +++ b/clickhouse/CMakeLists.txt @@ -43,6 +43,9 @@ TARGET_LINK_LIBRARIES (clickhouse-cpp-lib cityhash-lib lz4-lib ) +TARGET_INCLUDE_DIRECTORIES (clickhouse-cpp-lib + PUBLIC ${PROJECT_SOURCE_DIR} +) ADD_LIBRARY (clickhouse-cpp-lib-static STATIC ${clickhouse-cpp-lib-src}) TARGET_LINK_LIBRARIES (clickhouse-cpp-lib-static @@ -50,6 +53,9 @@ TARGET_LINK_LIBRARIES (clickhouse-cpp-lib-static cityhash-lib lz4-lib ) +TARGET_INCLUDE_DIRECTORIES (clickhouse-cpp-lib-static + PUBLIC ${PROJECT_SOURCE_DIR} +) IF (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") INCLUDE (CheckCXXSourceCompiles) diff --git a/contrib/absl/CMakeLists.txt b/contrib/absl/CMakeLists.txt index 87c8dff1..2cd0f2be 100644 --- a/contrib/absl/CMakeLists.txt +++ b/contrib/absl/CMakeLists.txt @@ -1,3 +1,6 @@ ADD_LIBRARY (absl-lib STATIC numeric/int128.cc ) +TARGET_INCLUDE_DIRECTORIES (absl-lib + PUBLIC ${PROJECT_SOURCE_DIR}/contrib +) From e76cb292271875799a0dd4de6d9ef71077bac203 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sat, 19 Nov 2022 13:44:48 +0800 Subject: [PATCH 42/46] remove items_.reserve for batch write --- clickhouse/columns/string.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 8ea362c4..ed128579 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -247,7 +247,6 @@ void ColumnString::Append(ColumnRef column) { // TODO: fill up existing block with some items and then add a new one for the rest of items if (blocks_.size() == 0 || blocks_.back().GetAvailable() < total_size) blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, total_size)); - items_.reserve(items_.size() + col->Size()); for (size_t i = 0; i < column->Size(); ++i) { this->AppendUnsafe((*col)[i]); From b18be403856808b9462f130109603a6d6456e33d Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Sat, 19 Nov 2022 21:06:46 +0800 Subject: [PATCH 43/46] ColumnString improve performance(26%) by avoiding vector reallocate --- clickhouse/columns/string.cpp | 9 ++++++++- clickhouse/columns/string.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 8ea362c4..40c384aa 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -166,6 +166,13 @@ ColumnString::ColumnString() { } +ColumnString::ColumnString(size_t element_count) + : Column(Type::CreateString()) +{ + items_.reserve(element_count); + blocks_.reserve(element_count / 2); +} + ColumnString::ColumnString(const std::vector& data) : ColumnString() { @@ -291,7 +298,7 @@ size_t ColumnString::Size() const { } ColumnRef ColumnString::Slice(size_t begin, size_t len) const { - auto result = std::make_shared(); + auto result = std::make_shared(len); if (begin < items_.size()) { len = std::min(len, items_.size() - begin); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index f2216f40..9b83a088 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -78,6 +78,7 @@ class ColumnString : public Column { ColumnString(); ~ColumnString(); + explicit ColumnString(size_t element_count); explicit ColumnString(const std::vector & data); explicit ColumnString(std::vector&& data); ColumnString& operator=(const ColumnString&) = delete; From e3183cb54acb8308c8591d6b664c6d2a01ca8001 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 21 Nov 2022 18:39:32 +0400 Subject: [PATCH 44/46] Deprecated StringView --- clickhouse/base/string_view.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clickhouse/base/string_view.h b/clickhouse/base/string_view.h index 46619ed5..ad71907e 100644 --- a/clickhouse/base/string_view.h +++ b/clickhouse/base/string_view.h @@ -11,7 +11,9 @@ template < typename TChar, typename TTraits = std::char_traits > -class StringViewImpl { +class +[[deprecated("Obsolete due to C++17's std::string_view. Will be removed in next major release (3.0) ")]] +StringViewImpl { public: using size_type = size_t; using traits_type = TTraits; From 69062c943973b62b42cdf212217a6134c464f0e8 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 22 Nov 2022 19:04:18 +0400 Subject: [PATCH 45/46] Minor comment --- clickhouse/columns/string.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index ed128579..ead1d9a6 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -248,6 +248,7 @@ void ColumnString::Append(ColumnRef column) { if (blocks_.size() == 0 || blocks_.back().GetAvailable() < total_size) blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, total_size)); + // Intentionally not doing items_.reserve() since that cripples performance. for (size_t i = 0; i < column->Size(); ++i) { this->AppendUnsafe((*col)[i]); } From 4f463cb59becb6f1462f55186c74442b11f2f835 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 23 Nov 2022 12:07:02 +0400 Subject: [PATCH 46/46] More conservative `reserve`-ing of vectors + other - `ColumnString` c-tor: assuming that there are about ~100 rows in each `ColumnString::Block`, rather than 2. - `ColumnString::Slice`: only reserving for exact number of elements in `items_`. - `ColumnString::Append` less code duplication - minor style fixes --- clickhouse/columns/string.cpp | 37 ++++++++++++----------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 40c384aa..38cfb90c 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -37,8 +37,7 @@ void ColumnFixedString::Append(std::string_view str) { + std::to_string(str.size()) + " bytes."); } - if (data_.capacity() - data_.size() < str.size()) - { + if (data_.capacity() - data_.size() < str.size()) { // round up to the next block size const auto new_size = (((data_.size() + string_size_) / DEFAULT_BLOCK_SIZE) + 1) * DEFAULT_BLOCK_SIZE; data_.reserve(new_size); @@ -129,13 +128,11 @@ struct ColumnString::Block data_(new CharT[capacity]) {} - inline auto GetAvailable() const - { + inline auto GetAvailable() const { return capacity - size; } - std::string_view AppendUnsafe(std::string_view str) - { + std::string_view AppendUnsafe(std::string_view str) { const auto pos = &data_[size]; memcpy(pos, str.data(), str.size()); @@ -144,13 +141,11 @@ struct ColumnString::Block return std::string_view(pos, str.size()); } - auto GetCurrentWritePos() - { + auto GetCurrentWritePos() { return &data_[size]; } - std::string_view ConsumeTailAsStringViewUnsafe(size_t len) - { + std::string_view ConsumeTailAsStringViewUnsafe(size_t len) { const auto start = &data_[size]; size += len; return std::string_view(start, len); @@ -170,7 +165,8 @@ ColumnString::ColumnString(size_t element_count) : Column(Type::CreateString()) { items_.reserve(element_count); - blocks_.reserve(element_count / 2); + // 100 is arbitrary number, assumption that string values are about ~40 bytes long. + blocks_.reserve(std::max(1, element_count / 100)); } ColumnString::ColumnString(const std::vector& data) @@ -179,8 +175,7 @@ ColumnString::ColumnString(const std::vector& data) items_.reserve(data.size()); blocks_.emplace_back(ComputeTotalSize(data)); - for (const auto & s : data) - { + for (const auto & s : data) { AppendUnsafe(s); } }; @@ -201,8 +196,7 @@ ColumnString::~ColumnString() {} void ColumnString::Append(std::string_view str) { - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) - { + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); } @@ -210,12 +204,7 @@ void ColumnString::Append(std::string_view str) { } void ColumnString::Append(const char* str) { - auto len = strlen(str); - if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) { - blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); - } - - items_.emplace_back(blocks_.back().AppendUnsafe(str)); + Append(std::string_view(str, strlen(str))); } void ColumnString::Append(std::string&& steal_value) { @@ -298,14 +287,14 @@ size_t ColumnString::Size() const { } ColumnRef ColumnString::Slice(size_t begin, size_t len) const { - auto result = std::make_shared(len); + auto result = std::make_shared(); if (begin < items_.size()) { len = std::min(len, items_.size() - begin); + result->items_.reserve(len); result->blocks_.emplace_back(ComputeTotalSize(items_, begin, len)); - for (size_t i = begin; i < begin + len; ++i) - { + for (size_t i = begin; i < begin + len; ++i) { result->Append(items_[i]); } }