diff --git a/.buckconfig b/.buckconfig deleted file mode 100644 index c8854519..00000000 --- a/.buckconfig +++ /dev/null @@ -1,2 +0,0 @@ -[cxx] - gtest_dep = //contrib/gtest:gtest diff --git a/BUCK b/BUCK deleted file mode 100644 index 0c86b120..00000000 --- a/BUCK +++ /dev/null @@ -1,20 +0,0 @@ -cxx_library( - name = 'clickhouse-cpp', - header_namespace = 'clickhouse', - exported_headers = subdir_glob([ - ('clickhouse', '**/*.h'), - ]), - srcs = glob([ - 'clickhouse/**/*.cpp', - ]), - compiler_flags = [ - '-std=c++11', - ], - visibility = [ - 'PUBLIC', - ], - deps = [ - '//contrib/cityhash:cityhash', - '//contrib/lz4:lz4', - ] -) diff --git a/LICENSE b/LICENSE index f3d0343f..7327b333 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2018-2020 Yandex LLC +Copyright 2018-2022 ClickHouse, Inc Copyright 2017 Pavel Artemkin Apache License diff --git a/clickhouse/CMakeLists.txt b/clickhouse/CMakeLists.txt index 3ad235a3..6a851241 100644 --- a/clickhouse/CMakeLists.txt +++ b/clickhouse/CMakeLists.txt @@ -7,6 +7,7 @@ SET ( clickhouse-cpp-lib-src base/wire_format.cpp columns/array.cpp + columns/column.cpp columns/date.cpp columns/decimal.cpp columns/enum.cpp @@ -51,18 +52,25 @@ TARGET_LINK_LIBRARIES (clickhouse-cpp-lib-static ) IF (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - # there is a problem with __builtin_mul_overflow call at link time - # the error looks like: ... undefined reference to `__muloti4' ... - # caused by clang bug https://bugs.llvm.org/show_bug.cgi?id=16404 - # explicit linking to compiler-rt allows to workaround the problem - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --rtlib=compiler-rt") - - # some workaround for linking issues on linux: - # /usr/bin/ld: CMakeFiles/simple-test.dir/main.cpp.o: undefined reference to symbol '_Unwind_Resume@@GCC_3.0' - # /usr/bin/ld: /lib/x86_64-linux-gnu/libgcc_s.so.1: error adding symbols: DSO missing from command line - # FIXME: that workaround breaks clang build on mingw - TARGET_LINK_LIBRARIES (clickhouse-cpp-lib gcc_s) - TARGET_LINK_LIBRARIES (clickhouse-cpp-lib-static gcc_s) + INCLUDE (CheckCXXSourceCompiles) + + CHECK_CXX_SOURCE_COMPILES("#include \nint main() { return __GLIBCXX__ != 0; }" + BUILDING_WITH_LIB_STDCXX) + + IF (BUILDING_WITH_LIB_STDCXX) + # there is a problem with __builtin_mul_overflow call at link time + # the error looks like: ... undefined reference to `__muloti4' ... + # caused by clang bug https://bugs.llvm.org/show_bug.cgi?id=16404 + # explicit linking to compiler-rt allows to workaround the problem + SET (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --rtlib=compiler-rt") + + # some workaround for linking issues on linux: + # /usr/bin/ld: CMakeFiles/simple-test.dir/main.cpp.o: undefined reference to symbol '_Unwind_Resume@@GCC_3.0' + # /usr/bin/ld: /lib/x86_64-linux-gnu/libgcc_s.so.1: error adding symbols: DSO missing from command line + # FIXME: that workaround breaks clang build on mingw + TARGET_LINK_LIBRARIES (clickhouse-cpp-lib gcc_s) + TARGET_LINK_LIBRARIES (clickhouse-cpp-lib-static gcc_s) + ENDIF () ENDIF () INSTALL (TARGETS clickhouse-cpp-lib clickhouse-cpp-lib-static diff --git a/clickhouse/base/compressed.cpp b/clickhouse/base/compressed.cpp index f1c3a569..8478ad28 100644 --- a/clickhouse/base/compressed.cpp +++ b/clickhouse/base/compressed.cpp @@ -1,6 +1,7 @@ #include "compressed.h" #include "wire_format.h" #include "output.h" +#include "../exceptions.h" #include #include @@ -30,7 +31,7 @@ CompressedInput::~CompressedInput() { #else if (!std::uncaught_exceptions()) { #endif - throw std::runtime_error("some data was not read"); + throw LZ4Error("some data was not read"); } } } @@ -59,7 +60,7 @@ bool CompressedInput::Decompress() { } if (method != COMPRESSION_METHOD) { - throw std::runtime_error("unsupported compression method " + std::to_string(int(method))); + throw LZ4Error("unsupported compression method " + std::to_string(int(method))); } else { if (!WireFormat::ReadFixed(*input_, &compressed)) { return false; @@ -69,12 +70,12 @@ bool CompressedInput::Decompress() { } if (compressed > DBMS_MAX_COMPRESSED_SIZE) { - throw std::runtime_error("compressed data too big"); + throw LZ4Error("compressed data too big"); } Buffer tmp(compressed); - // Заполнить заголовок сжатых данных. + // Data header { BufferOutput out(&tmp); out.Write(&method, sizeof(method)); @@ -87,14 +88,14 @@ bool CompressedInput::Decompress() { return false; } else { if (hash != CityHash128((const char*)tmp.data(), compressed)) { - throw std::runtime_error("data was corrupted"); + throw LZ4Error("data was corrupted"); } } data_ = Buffer(original); if (LZ4_decompress_safe((const char*)tmp.data() + HEADER_SIZE, (char*)data_.data(), compressed - HEADER_SIZE, original) < 0) { - throw std::runtime_error("can't decompress data"); + throw LZ4Error("can't decompress data"); } else { mem_.Reset(data_.data(), original); } @@ -143,7 +144,7 @@ void CompressedOutput::Compress(const void * data, size_t len) { len, static_cast(compressed_buffer_.size() - HEADER_SIZE)); if (compressed_size <= 0) - throw std::runtime_error("Failed to compress chunk of " + std::to_string(len) + " bytes, " + throw LZ4Error("Failed to compress chunk of " + std::to_string(len) + " bytes, " "LZ4 error: " + std::to_string(compressed_size)); { @@ -165,7 +166,7 @@ void CompressedOutput::Compress(const void * data, size_t len) { void CompressedOutput::PreallocateCompressBuffer(size_t input_size) { const auto estimated_compressed_buffer_size = LZ4_compressBound(static_cast(input_size)); if (estimated_compressed_buffer_size <= 0) - throw std::runtime_error("Failed to estimate compressed buffer size, LZ4 error: " + std::to_string(estimated_compressed_buffer_size)); + throw LZ4Error("Failed to estimate compressed buffer size, LZ4 error: " + std::to_string(estimated_compressed_buffer_size)); compressed_buffer_.resize(estimated_compressed_buffer_size + HEADER_SIZE + EXTRA_COMPRESS_BUFFER_SIZE); } diff --git a/clickhouse/base/sslsocket.cpp b/clickhouse/base/sslsocket.cpp index 2b9a6cac..392c22fd 100644 --- a/clickhouse/base/sslsocket.cpp +++ b/clickhouse/base/sslsocket.cpp @@ -1,5 +1,6 @@ #include "sslsocket.h" #include "../client.h" +#include "../exceptions.h" #include @@ -45,7 +46,7 @@ void throwSSLError(SSL * ssl, int error, const char * /*location*/, const char * // << "\n\t last err: " << ERR_peek_last_error() // << std::endl; - throw std::runtime_error(prefix + std::to_string(error) + " : " + reason_str); + throw clickhouse::OpenSSLError(prefix + std::to_string(error) + " : " + reason_str); } void configureSSL(const clickhouse::SSLParams::ConfigurationType & configuration, SSL * ssl, SSL_CTX * context = nullptr) { @@ -75,13 +76,13 @@ void configureSSL(const clickhouse::SSLParams::ConfigurationType & configuration else if (err == 0) throwSSLError(ssl, SSL_ERROR_NONE, nullptr, nullptr, "Failed to configure OpenSSL with command '" + kv.first + "' "); else if (err == 1 && value_present) - throw std::runtime_error("Failed to configure OpenSSL: command '" + kv.first + "' needs no value"); + throw clickhouse::OpenSSLError("Failed to configure OpenSSL: command '" + kv.first + "' needs no value"); else if (err == -2) - throw std::runtime_error("Failed to cofigure OpenSSL: unknown command '" + kv.first + "'"); + throw clickhouse::OpenSSLError("Failed to cofigure OpenSSL: unknown command '" + kv.first + "'"); else if (err == -3) - throw std::runtime_error("Failed to cofigure OpenSSL: command '" + kv.first + "' requires a value"); + throw clickhouse::OpenSSLError("Failed to cofigure OpenSSL: command '" + kv.first + "' requires a value"); else - throw std::runtime_error("Failed to cofigure OpenSSL: command '" + kv.first + "' unknown error: " + std::to_string(err)); + throw clickhouse::OpenSSLError("Failed to cofigure OpenSSL: command '" + kv.first + "' unknown error: " + std::to_string(err)); } } @@ -104,7 +105,7 @@ SSL_CTX * prepareSSLContext(const clickhouse::SSLParams & context_params) { std::unique_ptr ctx(SSL_CTX_new(method), &SSL_CTX_free); if (!ctx) - throw std::runtime_error("Failed to initialize SSL context"); + throw clickhouse::OpenSSLError("Failed to initialize SSL context"); #define HANDLE_SSL_CTX_ERROR(statement) do { \ if (const auto ret_code = (statement); !ret_code) \ @@ -204,7 +205,7 @@ SSLSocket::SSLSocket(const NetworkAddress& addr, const SSLParams & ssl_params, { auto ssl = ssl_.get(); if (!ssl) - throw std::runtime_error("Failed to create SSL instance"); + throw clickhouse::OpenSSLError("Failed to create SSL instance"); std::unique_ptr ip_addr(a2i_IPADDRESS(addr.Host().c_str()), &ASN1_OCTET_STRING_free); @@ -228,7 +229,7 @@ SSLSocket::SSLSocket(const NetworkAddress& addr, const SSLParams & ssl_params, if (const auto verify_result = SSL_get_verify_result(ssl); !ssl_params.skip_verification && verify_result != X509_V_OK) { auto error_message = X509_verify_cert_error_string(verify_result); - throw std::runtime_error("Failed to verify SSL connection, X509_v error: " + throw clickhouse::OpenSSLError("Failed to verify SSL connection, X509_v error: " + std::to_string(verify_result) + " " + error_message + "\nServer certificate: " + getCertificateInfo(SSL_get_peer_certificate(ssl))); diff --git a/clickhouse/base/wire_format.cpp b/clickhouse/base/wire_format.cpp index 00a806f8..62a21833 100644 --- a/clickhouse/base/wire_format.cpp +++ b/clickhouse/base/wire_format.cpp @@ -3,6 +3,8 @@ #include "input.h" #include "output.h" +#include "../exceptions.h" + #include namespace { @@ -38,7 +40,7 @@ void WireFormat::WriteAll(OutputStream& output, const void* buf, size_t len) { } if (len) { - throw std::runtime_error("Failed to write " + std::to_string(original_len) + throw ProtocolError("Failed to write " + std::to_string(original_len) + " bytes, only written " + std::to_string(original_len - len)); } } diff --git a/clickhouse/block.cpp b/clickhouse/block.cpp index c4ddb855..aca77c00 100644 --- a/clickhouse/block.cpp +++ b/clickhouse/block.cpp @@ -1,5 +1,7 @@ #include "block.h" +#include "exceptions.h" + #include namespace clickhouse { @@ -54,7 +56,7 @@ void Block::AppendColumn(const std::string& name, const ColumnRef& col) { if (columns_.empty()) { rows_ = col->Size(); } else if (col->Size() != rows_) { - throw std::runtime_error("all columns in block must have same count of rows. Name: ["+name+"], rows: ["+std::to_string(rows_)+"], columns: [" + std::to_string(col->Size())+"]"); + throw ValidationError("all columns in block must have same count of rows. Name: ["+name+"], rows: ["+std::to_string(rows_)+"], columns: [" + std::to_string(col->Size())+"]"); } columns_.push_back(ColumnItem{name, col}); @@ -86,7 +88,7 @@ size_t Block::RefreshRowCount() if (idx == 0UL) rows = col->Size(); else if (rows != col->Size()) - throw std::runtime_error("all columns in block must have same count of rows. Name: ["+name+"], rows: ["+std::to_string(rows)+"], columns: [" + std::to_string(col->Size())+"]"); + throw ValidationError("all columns in block must have same count of rows. Name: ["+name+"], rows: ["+std::to_string(rows)+"], columns: [" + std::to_string(col->Size())+"]"); } rows_ = rows; diff --git a/clickhouse/client.cpp b/clickhouse/client.cpp index 08121f26..36c1bcb3 100644 --- a/clickhouse/client.cpp +++ b/clickhouse/client.cpp @@ -87,7 +87,7 @@ ClientOptions& ClientOptions::SetSSLOptions(ClientOptions::SSLOptions options) return *this; #else (void)options; - throw std::runtime_error("Library was built with no SSL support"); + throw OpenSSLError("Library was built with no SSL support"); #endif } @@ -152,7 +152,7 @@ class Client::Impl { private: /// In case of network errors tries to reconnect to server and /// call fuc several times. - void RetryGuard(std::function fuc); + void RetryGuard(std::function func); private: class EnsureNull { @@ -187,10 +187,6 @@ class Client::Impl { std::unique_ptr output_; std::unique_ptr socket_; -#if defined(WITH_OPENSSL) - std::unique_ptr ssl_context_; -#endif - ServerInfo server_info_; }; @@ -282,7 +278,7 @@ void Client::Impl::Insert(const std::string& table_name, const std::string& quer bool ret = ReceivePacket(&server_packet); if (!ret) { - throw std::runtime_error("fail to receive data packet"); + throw ProtocolError("fail to receive data packet"); } if (server_packet == ServerCodes::Data) { break; @@ -306,7 +302,7 @@ void Client::Impl::Insert(const std::string& table_name, const std::string& quer if (eos_packet != ServerCodes::EndOfStream && eos_packet != ServerCodes::Exception && eos_packet != ServerCodes::Log && options_.rethrow_exceptions) { - throw std::runtime_error(std::string{"unexpected packet from server while receiving end of query, expected (expected Exception, EndOfStream or Log, got: "} + throw ProtocolError(std::string{"unexpected packet from server while receiving end of query, expected (expected Exception, EndOfStream or Log, got: "} + (eos_packet ? std::to_string(eos_packet) : "nothing") + ")"); } } @@ -319,7 +315,7 @@ void Client::Impl::Ping() { const bool ret = ReceivePacket(&server_packet); if (!ret || server_packet != ServerCodes::Pong) { - throw std::runtime_error("fail to ping server"); + throw ProtocolError("fail to ping server"); } } @@ -327,7 +323,7 @@ void Client::Impl::ResetConnection() { InitializeStreams(socket_factory_->connect(options_)); if (!Handshake()) { - throw std::runtime_error("fail to connect to " + options_.host); + throw ProtocolError("fail to connect to " + options_.host); } } @@ -358,7 +354,7 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { switch (packet_type) { case ServerCodes::Data: { if (!ReceiveData()) { - throw std::runtime_error("can't read data packet from input stream"); + throw ProtocolError("can't read data packet from input stream"); } return true; } @@ -423,6 +419,10 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { return true; } + case ServerCodes::Hello: { + return true; + } + case ServerCodes::EndOfStream: { if (events_) { events_->OnFinish(); @@ -431,7 +431,7 @@ bool Client::Impl::ReceivePacket(uint64_t* server_packet) { } default: - throw std::runtime_error("unimplemented " + std::to_string((int)packet_type)); + throw UnimplementedError("unimplemented " + std::to_string((int)packet_type)); break; } @@ -489,12 +489,12 @@ bool Client::Impl::ReadBlock(InputStream& input, Block* block) { if (ColumnRef col = CreateColumnByType(type, create_column_settings)) { if (num_rows && !col->Load(&input, num_rows)) { - throw std::runtime_error("can't load column '" + name + "' of type " + type); + throw ProtocolError("can't load column '" + name + "' of type " + type); } block->AppendColumn(name, col); } else { - throw std::runtime_error(std::string("unsupported column type: ") + type); + throw UnimplementedError(std::string("unsupported column type: ") + type); } } @@ -573,7 +573,7 @@ bool Client::Impl::ReceiveException(bool rethrow) { } if (rethrow || options_.rethrow_exceptions) { - throw ServerException(std::move(e)); + throw ServerError(std::move(e)); } return exception_received; @@ -653,7 +653,12 @@ void Client::Impl::WriteBlock(const Block& block, OutputStream& output) { WireFormat::WriteString(output, bi.Name()); WireFormat::WriteString(output, bi.Type()->GetName()); - bi.Column()->Save(&output); + // Empty columns are not serialized and occupy exactly 0 bytes. + // ref https://github.com/ClickHouse/ClickHouse/blob/39b37a3240f74f4871c8c1679910e065af6bea19/src/Formats/NativeWriter.cpp#L163 + const bool containsData = block.GetRowCount() > 0; + if (containsData) { + bi.Column()->Save(&output); + } } output.Flush(); } @@ -668,8 +673,8 @@ void Client::Impl::SendData(const Block& block) { if (compression_ == CompressionState::Enable) { assert(options_.compression_method == CompressionMethod::LZ4); - std::unique_ptr compressed_ouput = std::make_unique(output_.get(), options_.max_compression_chunk_size); - BufferedOutput buffered(std::move(compressed_ouput), options_.max_compression_chunk_size); + std::unique_ptr compressed_output = std::make_unique(output_.get(), options_.max_compression_chunk_size); + BufferedOutput buffered(std::move(compressed_output), options_.max_compression_chunk_size); WriteBlock(block, buffered); } else { @@ -794,19 +799,19 @@ void Client::Execute(const Query& query) { } void Client::Select(const std::string& query, SelectCallback cb) { - Execute(Query(query).OnData(cb)); + Execute(Query(query).OnData(std::move(cb))); } void Client::Select(const std::string& query, const std::string& query_id, SelectCallback cb) { - Execute(Query(query, query_id).OnData(cb)); + Execute(Query(query, query_id).OnData(std::move(cb))); } void Client::SelectCancelable(const std::string& query, SelectCancelableCallback cb) { - Execute(Query(query).OnDataCancelable(cb)); + Execute(Query(query).OnDataCancelable(std::move(cb))); } void Client::SelectCancelable(const std::string& query, const std::string& query_id, SelectCancelableCallback cb) { - Execute(Query(query, query_id).OnDataCancelable(cb)); + Execute(Query(query, query_id).OnDataCancelable(std::move(cb))); } void Client::Select(const Query& query) { diff --git a/clickhouse/client.h b/clickhouse/client.h index 7f2b97dd..6de09b8a 100644 --- a/clickhouse/client.h +++ b/clickhouse/client.h @@ -86,6 +86,7 @@ struct ClientOptions { // TCP options DECLARE_FIELD(tcp_nodelay, bool, TcpNoDelay, true); + // TODO deprecate setting /** It helps to ease migration of the old codebases, which can't afford to switch * to using ColumnLowCardinalityT or ColumnLowCardinality directly, * but still want to benefit from smaller on-wire LowCardinality bandwidth footprint. diff --git a/clickhouse/columns/array.cpp b/clickhouse/columns/array.cpp index a83ba045..9ef160b5 100644 --- a/clickhouse/columns/array.cpp +++ b/clickhouse/columns/array.cpp @@ -1,47 +1,62 @@ #include "array.h" #include "numeric.h" + #include namespace clickhouse { ColumnArray::ColumnArray(ColumnRef data) + : ColumnArray(data, std::make_shared()) +{ +} + +ColumnArray::ColumnArray(ColumnRef data, std::shared_ptr offsets) : Column(Type::CreateArray(data->Type())) , data_(data) - , offsets_(std::make_shared()) + , offsets_(offsets) +{ +} + +ColumnArray::ColumnArray(ColumnArray&& other) + : Column(other.Type()) + , data_(std::move(other.data_)) + , offsets_(std::move(other.offsets_)) { } void ColumnArray::AppendAsColumn(ColumnRef array) { if (!data_->Type()->IsEqual(array->Type())) { - throw std::runtime_error( + throw ValidationError( "can't append column of type " + array->Type()->GetName() + " " "to column type " + data_->Type()->GetName()); } - if (offsets_->Size() == 0) { - offsets_->Append(array->Size()); - } else { - offsets_->Append((*offsets_)[offsets_->Size() - 1] + array->Size()); - } - + AddOffset(array->Size()); data_->Append(array); } ColumnRef ColumnArray::GetAsColumn(size_t n) const { + if (n >= Size()) + throw ValidationError("Index is out ouf bounds: " + std::to_string(n)); + return data_->Slice(GetOffset(n), GetSize(n)); } ColumnRef ColumnArray::Slice(size_t begin, size_t size) const { - auto result = std::make_shared(GetAsColumn(begin)); - result->OffsetsIncrease(1); + if (size && begin + size > Size()) + throw ValidationError("Slice indexes are out of bounds"); - for (size_t i = 1; i < size; i++) { - result->Append(std::make_shared(GetAsColumn(begin + i))); - } + auto result = std::make_shared(data_->Slice(GetOffset(begin), GetOffset(begin + size) - GetOffset(begin))); + for (size_t i = 0; i < size; i++) + result->AddOffset(GetSize(begin + i)); return result; } +ColumnRef ColumnArray::CloneEmpty() const { + return std::make_shared(data_->CloneEmpty()); +} + void ColumnArray::Append(ColumnRef column) { if (auto col = column->As()) { if (!col->data_->Type()->IsEqual(data_->Type())) { @@ -54,22 +69,34 @@ void ColumnArray::Append(ColumnRef column) { } } -bool ColumnArray::Load(InputStream* input, size_t rows) { +bool ColumnArray::LoadPrefix(InputStream* input, size_t rows) { if (!rows) { return true; } - if (!offsets_->Load(input, rows)) { + + return data_->LoadPrefix(input, rows); +} + +bool ColumnArray::LoadBody(InputStream* input, size_t rows) { + if (!rows) { + return true; + } + if (!offsets_->LoadBody(input, rows)) { return false; } - if (!data_->Load(input, (*offsets_)[rows - 1])) { + if (!data_->LoadBody(input, (*offsets_)[rows - 1])) { return false; } return true; } -void ColumnArray::Save(OutputStream* output) { - offsets_->Save(output); - data_->Save(output); +void ColumnArray::SavePrefix(OutputStream* output) { + data_->SavePrefix(output); +} + +void ColumnArray::SaveBody(OutputStream* output) { + offsets_->SaveBody(output); + data_->SaveBody(output); } void ColumnArray::Clear() { @@ -92,11 +119,29 @@ void ColumnArray::OffsetsIncrease(size_t n) { } size_t ColumnArray::GetOffset(size_t n) const { + return (n == 0) ? 0 : (*offsets_)[n - 1]; } +void ColumnArray::AddOffset(size_t n) { + if (offsets_->Size() == 0) { + offsets_->Append(n); + } else { + offsets_->Append((*offsets_)[offsets_->Size() - 1] + n); + } +} + size_t ColumnArray::GetSize(size_t n) const { return (n == 0) ? (*offsets_)[n] : ((*offsets_)[n] - (*offsets_)[n - 1]); } +ColumnRef ColumnArray::GetData() { + return data_; +} + +void ColumnArray::Reset() { + data_.reset(); + offsets_.reset(); +} + } diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index e96e70c4..6144e430 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -3,32 +3,63 @@ #include "column.h" #include "numeric.h" +#include + namespace clickhouse { +template +class ColumnArrayT; + /** * Represents column of Array(T). */ class ColumnArray : public Column { public: - ColumnArray(ColumnRef data); + using ValueType = ColumnRef; + + /** Create an array of given type. + * + * `data` is used internaly (and modified) by ColumnArray. + * Users are strongly advised against supplying non-empty columns and/or modifying + * contents of `data` afterwards. + */ + explicit ColumnArray(ColumnRef data); + + /** Create an array of given type, with actual values and offsets. + * + * Both `data` and `offsets` are used (and modified) internally bye ColumnArray. + * Users are strongly advised against modifying contents of `data` or `offsets` afterwards. + */ + ColumnArray(ColumnRef data, std::shared_ptr offsets); - /// Converts input column to array and appends - /// as one row to the current column. + /// Converts input column to array and appends as one row to the current column. void AppendAsColumn(ColumnRef array); /// Convets array at pos n to column. /// Type of element of result column same as type of array element. ColumnRef GetAsColumn(size_t n) const; + /// Shorthand to get a column casted to a proper type. + template + auto GetAsColumnTyped(size_t n) const { + return GetAsColumn(n)->AsStrict(); + } + public: /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; + /// Loads column prefix from input stream. + bool LoadPrefix(InputStream* input, size_t rows) override; + /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; + + /// Saves column prefix to output stream. + void SavePrefix(OutputStream* output) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -38,19 +69,214 @@ class ColumnArray : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t, size_t) const override; - + ColumnRef CloneEmpty() const override; void Swap(Column&) override; void OffsetsIncrease(size_t); -private: - size_t GetOffset(size_t n) const; +protected: + template friend class ColumnArrayT; + ColumnArray(ColumnArray&& array); + + size_t GetOffset(size_t n) const; size_t GetSize(size_t n) const; + ColumnRef GetData(); + void AddOffset(size_t n); + void Reset(); private: ColumnRef data_; std::shared_ptr offsets_; }; +template +class ColumnArrayT : public ColumnArray { +public: + class ArrayValueView; + using ValueType = ArrayValueView; + using NestedColumnType = ColumnType; + + explicit ColumnArrayT(std::shared_ptr data) + : ColumnArray(data) + , typed_nested_data_(data) + {} + + ColumnArrayT(std::shared_ptr data, std::shared_ptr offsets) + : ColumnArray(data, offsets) + , typed_nested_data_(data) + {} + + template + explicit ColumnArrayT(Args &&... args) + : ColumnArrayT(std::make_shared(std::forward(args)...)) + {} + + /** Create a ColumnArrayT from a ColumnArray, without copying data and offsets, but by 'stealing' those from `col`. + * + * Ownership of column internals is transferred to returned object, original (argument) object + * MUST NOT BE USED IN ANY WAY, it is only safe to dispose it. + * + * Throws an exception if `col` is of wrong type, it is safe to use original col in this case. + * This is a static method to make such conversion verbose. + */ + static auto Wrap(ColumnArray&& col) { + if constexpr (std::is_base_of_v && !std::is_same_v) { + // assuming NestedColumnType is ArrayT specialization + return std::make_shared>(NestedColumnType::Wrap(col.GetData()), col.offsets_); + } else { + auto nested_data = col.GetData()->template AsStrict(); + return std::make_shared>(nested_data, col.offsets_); + } + } + + static auto Wrap(Column&& col) { + return Wrap(std::move(dynamic_cast(col))); + } + + // Helper to simplify integration with other APIs + static auto Wrap(ColumnRef&& col) { + return Wrap(std::move(*col->AsStrict())); + } + + /// A single (row) value of the Array-column, i.e. readonly array of items. + class ArrayValueView { + const std::shared_ptr typed_nested_data_; + const size_t offset_; + const size_t size_; + + public: + using ValueType = typename NestedColumnType::ValueType; + + ArrayValueView(std::shared_ptr data, size_t offset = 0, size_t size = std::numeric_limits::max()) + : typed_nested_data_(data) + , offset_(offset) + , size_(std::min(typed_nested_data_->Size() - offset, size)) + {} + + inline auto operator[](size_t index) const { + return (*typed_nested_data_)[offset_ + index]; + } + + inline auto At(size_t index) const { + if (index >= size_) + throw ValidationError("ColumnArray value index out of bounds: " + + std::to_string(index) + ", max is " + std::to_string(size_)); + return typed_nested_data_->At(offset_ + index); + } + + class Iterator { + const std::shared_ptr typed_nested_data_; + const size_t offset_; + const size_t size_; + size_t index_; + public: + Iterator(std::shared_ptr typed_nested_data, size_t offset, size_t size, size_t index) + : typed_nested_data_(typed_nested_data) + , offset_(offset) + , size_(size) + , index_(index) + {} + + using ValueType = typename NestedColumnType::ValueType; + + inline auto operator*() const { + return typed_nested_data_->At(offset_ + index_); + } + + inline Iterator& operator++() { + ++index_; + return *this; + } + + inline bool operator==(const Iterator& other) const { + return this->typed_nested_data_ == other.typed_nested_data_ + && this->offset_ == other.offset_ + && this->size_ == other.size_ + && this->index_ == other.index_; + } + + inline bool operator!=(const Iterator& other) const { + return !(*this == other); + } + }; + + // minimalistic stl-like container interface, hence the lowercase + inline Iterator begin() const { + return Iterator{typed_nested_data_, offset_, size_, 0}; + } + + inline Iterator cbegin() const { + return Iterator{typed_nested_data_, offset_, size_, 0}; + } + + inline Iterator end() const { + return Iterator{typed_nested_data_, offset_, size_, size_}; + } + + inline Iterator cend() const { + return Iterator{typed_nested_data_, offset_, size_, size_}; + } + + inline size_t size() const { + return size_; + } + + // It is ugly to have both size() and Size(), but it is for compatitability with both STL and rest of the clickhouse-cpp. + inline size_t Size() const { + return size_; + } + }; + + inline auto At(size_t index) const { + if (index >= Size()) + throw ValidationError("ColumnArray row index out of bounds: " + + std::to_string(index) + ", max is " + std::to_string(Size())); + + return ArrayValueView{typed_nested_data_, GetOffset(index), GetSize(index)}; + } + + inline auto operator[](size_t index) const { + return ArrayValueView{typed_nested_data_, GetOffset(index), GetSize(index)}; + } + + using ColumnArray::Append; + + template + inline void Append(const Container& container) { + Append(std::begin(container), std::end(container)); + } + + template + inline void Append(const std::initializer_list& container) { + Append(std::begin(container), std::end(container)); + } + + template + inline void Append(Begin begin, const End & end) { + auto & nested_data = *typed_nested_data_; + size_t counter = 0; + + while (begin != end) { + nested_data.Append(*begin); + ++begin; + ++counter; + } + + // Even if there are 0 items, increase counter, creating empty array item. + AddOffset(counter); + } + +private: + /// Helper to allow wrapping a "typeless" ColumnArray + ColumnArrayT(ColumnArray&& array, std::shared_ptr nested_data) + : ColumnArray(std::move(array)) + , typed_nested_data_(std::move(nested_data)) + {} + + +private: + std::shared_ptr typed_nested_data_; +}; + } diff --git a/clickhouse/columns/column.cpp b/clickhouse/columns/column.cpp new file mode 100644 index 00000000..7f881d7c --- /dev/null +++ b/clickhouse/columns/column.cpp @@ -0,0 +1,24 @@ +#include "column.h" + +namespace clickhouse { + +bool Column::LoadPrefix(InputStream*, size_t) { + /// does nothing by default + return true; +} + +bool Column::Load(InputStream* input, size_t rows) { + return LoadPrefix(input, rows) && LoadBody(input, rows); +} + +void Column::SavePrefix(OutputStream*) { + /// does nothing by default +} + +/// Saves column data to output stream. +void Column::Save(OutputStream* output) { + SavePrefix(output); + SaveBody(output); +} + +} diff --git a/clickhouse/columns/column.h b/clickhouse/columns/column.h index 19e50988..b54cbdee 100644 --- a/clickhouse/columns/column.h +++ b/clickhouse/columns/column.h @@ -2,6 +2,7 @@ #include "../types/types.h" #include "../columns/itemview.h" +#include "../exceptions.h" #include #include @@ -34,6 +35,16 @@ class Column : public std::enable_shared_from_this { return std::dynamic_pointer_cast(shared_from_this()); } + /// Downcast pointer to the specific column's subtype. + template + inline std::shared_ptr AsStrict() { + auto result = std::dynamic_pointer_cast(shared_from_this()); + if (!result) { + throw ValidationError("Can't cast from " + type_->GetName()); + } + return result; + } + /// Get type object of the column. inline TypeRef Type() const { return type_; } inline const class Type& GetType() const { return *type_; } @@ -41,11 +52,27 @@ class Column : public std::enable_shared_from_this { /// Appends content of given column to the end of current one. virtual void Append(ColumnRef column) = 0; + /// Template method to load column data from input stream. It'll call LoadPrefix and LoadBody. + /// Should be called only once from the client. Derived classes should not call it. + bool Load(InputStream* input, size_t rows); + + /// Loads column prefix from input stream. + virtual bool LoadPrefix(InputStream* input, size_t rows); + /// Loads column data from input stream. - virtual bool Load(InputStream* input, size_t rows) = 0; + virtual bool LoadBody(InputStream* input, size_t rows) = 0; + + /// Saves column prefix to output stream. Column types with prefixes must implement it. + virtual void SavePrefix(OutputStream* output); - /// Saves column data to output stream. - virtual void Save(OutputStream* output) = 0; + /// Saves column body to output stream. + virtual void SaveBody(OutputStream* output) = 0; + + /// Template method to save to output stream. It'll call SavePrefix and SaveBody respectively + /// Should be called only once from the client. Derived classes should not call it. + /// Save is split in Prefix and Body because some data types require prefixes and specific serialization order. + /// For instance, Array(LowCardinality(X)) requires LowCardinality.key_version bytes to come before Array.offsets + void Save(OutputStream* output); /// Clear column data . virtual void Clear() = 0; @@ -56,12 +83,14 @@ class Column : public std::enable_shared_from_this { /// Makes slice of the current column. virtual ColumnRef Slice(size_t begin, size_t len) const = 0; + virtual ColumnRef CloneEmpty() const = 0; + virtual void Swap(Column&) = 0; /// Get a view on raw item data if it is supported by column, will throw an exception if index is out of range. /// Please note that view is invalidated once column items are added or deleted, column is loaded from strean or destroyed. virtual ItemView GetItem(size_t) const { - throw std::runtime_error("GetItem() is not supported for column of " + type_->GetName()); + throw UnimplementedError("GetItem() is not supported for column of " + type_->GetName()); } friend void swap(Column& left, Column& right) { diff --git a/clickhouse/columns/date.cpp b/clickhouse/columns/date.cpp index 1301c2ff..1ef67c44 100644 --- a/clickhouse/columns/date.cpp +++ b/clickhouse/columns/date.cpp @@ -27,12 +27,12 @@ void ColumnDate::Append(ColumnRef column) { } } -bool ColumnDate::Load(InputStream* input, size_t rows) { - return data_->Load(input, rows); +bool ColumnDate::LoadBody(InputStream* input, size_t rows) { + return data_->LoadBody(input, rows); } -void ColumnDate::Save(OutputStream* output) { - data_->Save(output); +void ColumnDate::SaveBody(OutputStream* output) { + data_->SaveBody(output); } size_t ColumnDate::Size() const { @@ -48,15 +48,79 @@ ColumnRef ColumnDate::Slice(size_t begin, size_t len) const { return result; } +ColumnRef ColumnDate::CloneEmpty() const { + return std::make_shared(); +} + void ColumnDate::Swap(Column& other) { auto & col = dynamic_cast(other); data_.swap(col.data_); } ItemView ColumnDate::GetItem(size_t index) const { - return data_->GetItem(index); + return ItemView(Type::Date, data_->GetItem(index)); +} + + + +ColumnDate32::ColumnDate32() + : Column(Type::CreateDate32()) + , data_(std::make_shared()) +{ +} + +void ColumnDate32::Append(const std::time_t& value) { + /// TODO: This code is fundamentally wrong. + data_->Append(static_cast(value / std::time_t(86400))); +} + +void ColumnDate32::Clear() { + data_->Clear(); } +std::time_t ColumnDate32::At(size_t n) const { + return static_cast(data_->At(n)) * 86400; +} + +void ColumnDate32::Append(ColumnRef column) { + if (auto col = column->As()) { + data_->Append(col->data_); + } +} + +bool ColumnDate32::LoadBody(InputStream* input, size_t rows) { + return data_->LoadBody(input, rows); +} + +void ColumnDate32::SaveBody(OutputStream* output) { + data_->SaveBody(output); +} + +size_t ColumnDate32::Size() const { + return data_->Size(); +} + +ColumnRef ColumnDate32::Slice(size_t begin, size_t len) const { + auto col = data_->Slice(begin, len)->As(); + auto result = std::make_shared(); + + result->data_->Append(col); + + return result; +} + +ColumnRef ColumnDate32::CloneEmpty() const { + return std::make_shared(); +} + +void ColumnDate32::Swap(Column& other) { + auto & col = dynamic_cast(other); + data_.swap(col.data_); +} + +ItemView ColumnDate32::GetItem(size_t index) const { + return ItemView{Type()->GetCode(), data_->GetItem(index)}; +} ColumnDateTime::ColumnDateTime() @@ -89,12 +153,12 @@ void ColumnDateTime::Append(ColumnRef column) { } } -bool ColumnDateTime::Load(InputStream* input, size_t rows) { - return data_->Load(input, rows); +bool ColumnDateTime::LoadBody(InputStream* input, size_t rows) { + return data_->LoadBody(input, rows); } -void ColumnDateTime::Save(OutputStream* output) { - data_->Save(output); +void ColumnDateTime::SaveBody(OutputStream* output) { + data_->SaveBody(output); } size_t ColumnDateTime::Size() const { @@ -114,13 +178,17 @@ ColumnRef ColumnDateTime::Slice(size_t begin, size_t len) const { return result; } +ColumnRef ColumnDateTime::CloneEmpty() const { + return std::make_shared(); +} + void ColumnDateTime::Swap(Column& other) { auto & col = dynamic_cast(other); data_.swap(col.data_); } ItemView ColumnDateTime::GetItem(size_t index) const { - return data_->GetItem(index); + return ItemView(Type::DateTime, data_->GetItem(index)); } ColumnDateTime64::ColumnDateTime64(size_t precision) @@ -162,12 +230,12 @@ void ColumnDateTime64::Append(ColumnRef column) { } } -bool ColumnDateTime64::Load(InputStream* input, size_t rows) { - return data_->Load(input, rows); +bool ColumnDateTime64::LoadBody(InputStream* input, size_t rows) { + return data_->LoadBody(input, rows); } -void ColumnDateTime64::Save(OutputStream* output) { - data_->Save(output); +void ColumnDateTime64::SaveBody(OutputStream* output) { + data_->SaveBody(output); } void ColumnDateTime64::Clear() { @@ -178,13 +246,13 @@ size_t ColumnDateTime64::Size() const { } ItemView ColumnDateTime64::GetItem(size_t index) const { - return data_->GetItem(index); + return ItemView(Type::DateTime64, data_->GetItem(index)); } void ColumnDateTime64::Swap(Column& other) { auto& col = dynamic_cast(other); if (col.GetPrecision() != GetPrecision()) { - throw std::runtime_error("Can't swap DateTime64 columns when precisions are not the same: " + throw ValidationError("Can't swap DateTime64 columns when precisions are not the same: " + std::to_string(GetPrecision()) + "(this) != " + std::to_string(col.GetPrecision()) + "(that)"); } @@ -197,6 +265,10 @@ ColumnRef ColumnDateTime64::Slice(size_t begin, size_t len) const { return ColumnRef{new ColumnDateTime64(type_, sliced_data)}; } +ColumnRef ColumnDateTime64::CloneEmpty() const { + return ColumnRef{new ColumnDateTime64(type_, data_->CloneEmpty()->As())}; +} + size_t ColumnDateTime64::GetPrecision() const { return precision_; } diff --git a/clickhouse/columns/date.h b/clickhouse/columns/date.h index 62ca4e05..3518aa1e 100644 --- a/clickhouse/columns/date.h +++ b/clickhouse/columns/date.h @@ -26,10 +26,10 @@ class ColumnDate : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -39,7 +39,7 @@ class ColumnDate : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; - + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; @@ -48,6 +48,48 @@ class ColumnDate : public Column { std::shared_ptr data_; }; + +class ColumnDate32 : public Column { +public: + using ValueType = std::time_t; + + ColumnDate32(); + + /// Appends one element to the end of column. + /// TODO: The implementation is fundamentally wrong. + void Append(const std::time_t& value); + + /// Returns element at given row number. + /// TODO: The implementation is fundamentally wrong. + std::time_t At(size_t n) const; + + /// Appends content of given column to the end of current one. + void Append(ColumnRef column) override; + + /// Loads column data from input stream. + bool LoadBody(InputStream* input, size_t rows) override; + + /// Saves column data to output stream. + void SaveBody(OutputStream* output) override; + + /// Clear column data . + void Clear() override; + + /// Returns count of rows in the column. + size_t Size() const override; + + /// Makes slice of the current column. + ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; + void Swap(Column& other) override; + + ItemView GetItem(size_t index) const override; + +private: + std::shared_ptr data_; +}; + + /** */ class ColumnDateTime : public Column { public: @@ -70,20 +112,20 @@ class ColumnDateTime : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Clear column data . void Clear() override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Returns count of rows in the column. size_t Size() const override; /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; - + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; @@ -118,20 +160,20 @@ class ColumnDateTime64 : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Clear column data . void Clear() override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Returns count of rows in the column. size_t Size() const override; /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; - + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; diff --git a/clickhouse/columns/decimal.cpp b/clickhouse/columns/decimal.cpp index 489dab5f..d44dc0c0 100644 --- a/clickhouse/columns/decimal.cpp +++ b/clickhouse/columns/decimal.cpp @@ -156,21 +156,21 @@ void ColumnDecimal::Append(const std::string& value) { } else if (*c >= '0' && *c <= '9') { if (mulOverflow(int_value, 10, &int_value) || addOverflow(int_value, *c - '0', &int_value)) { - throw std::runtime_error("value is too big for 128-bit integer"); + throw AssertionError("value is too big for 128-bit integer"); } } else { - throw std::runtime_error(std::string("unexpected symbol '") + (*c) + "' in decimal value"); + throw ValidationError(std::string("unexpected symbol '") + (*c) + "' in decimal value"); } ++c; } if (c != end) { - throw std::runtime_error("unexpected symbol '-' in decimal value"); + throw ValidationError("unexpected symbol '-' in decimal value"); } while (zeros) { if (mulOverflow(int_value, 10, &int_value)) { - throw std::runtime_error("value is too big for 128-bit integer"); + throw AssertionError("value is too big for 128-bit integer"); } --zeros; } @@ -187,7 +187,7 @@ Int128 ColumnDecimal::At(size_t i) const { case Type::Int128: return data_->As()->At(i); default: - throw std::runtime_error("Invalid data_ column type in ColumnDecimal"); + throw ValidationError("Invalid data_ column type in ColumnDecimal"); } } @@ -197,12 +197,12 @@ void ColumnDecimal::Append(ColumnRef column) { } } -bool ColumnDecimal::Load(InputStream * input, size_t rows) { - return data_->Load(input, rows); +bool ColumnDecimal::LoadBody(InputStream * input, size_t rows) { + return data_->LoadBody(input, rows); } -void ColumnDecimal::Save(OutputStream* output) { - data_->Save(output); +void ColumnDecimal::SaveBody(OutputStream* output) { + data_->SaveBody(output); } void ColumnDecimal::Clear() { @@ -218,13 +218,18 @@ ColumnRef ColumnDecimal::Slice(size_t begin, size_t len) const { return ColumnRef{new ColumnDecimal(type_, data_->Slice(begin, len))}; } +ColumnRef ColumnDecimal::CloneEmpty() const { + // coundn't use std::make_shared since this c-tor is private + return ColumnRef{new ColumnDecimal(type_, data_->CloneEmpty())}; +} + void ColumnDecimal::Swap(Column& other) { auto & col = dynamic_cast(other); data_.swap(col.data_); } ItemView ColumnDecimal::GetItem(size_t index) const { - return data_->GetItem(index); + return ItemView{GetType().GetCode(), data_->GetItem(index)}; } size_t ColumnDecimal::GetScale() const diff --git a/clickhouse/columns/decimal.h b/clickhouse/columns/decimal.h index b28699ae..d3c05ea2 100644 --- a/clickhouse/columns/decimal.h +++ b/clickhouse/columns/decimal.h @@ -21,11 +21,12 @@ class ColumnDecimal : public Column { public: void Append(ColumnRef column) override; - bool Load(InputStream* input, size_t rows) override; - void Save(OutputStream* output) override; + bool LoadBody(InputStream* input, size_t rows) override; + void SaveBody(OutputStream* output) override; void Clear() override; size_t Size() const override; ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; diff --git a/clickhouse/columns/enum.cpp b/clickhouse/columns/enum.cpp index fc07e629..1361e817 100644 --- a/clickhouse/columns/enum.cpp +++ b/clickhouse/columns/enum.cpp @@ -44,7 +44,7 @@ const T& ColumnEnum::At(size_t n) const { } template -const std::string ColumnEnum::NameAt(size_t n) const { +std::string_view ColumnEnum::NameAt(size_t n) const { return type_->As()->GetEnumName(data_.at(n)); } @@ -74,13 +74,13 @@ void ColumnEnum::Append(ColumnRef column) { } template -bool ColumnEnum::Load(InputStream* input, size_t rows) { +bool ColumnEnum::LoadBody(InputStream* input, size_t rows) { data_.resize(rows); return WireFormat::ReadBytes(*input, data_.data(), data_.size() * sizeof(T)); } template -void ColumnEnum::Save(OutputStream* output) { +void ColumnEnum::SaveBody(OutputStream* output) { WireFormat::WriteBytes(*output, data_.data(), data_.size() * sizeof(T)); } @@ -94,6 +94,11 @@ ColumnRef ColumnEnum::Slice(size_t begin, size_t len) const { return std::make_shared>(type_, SliceVector(data_, begin, len)); } +template +ColumnRef ColumnEnum::CloneEmpty() const { + return std::make_shared>(type_); +} + template void ColumnEnum::Swap(Column& other) { auto & col = dynamic_cast &>(other); diff --git a/clickhouse/columns/enum.h b/clickhouse/columns/enum.h index 34c672f6..c31b81ff 100644 --- a/clickhouse/columns/enum.h +++ b/clickhouse/columns/enum.h @@ -19,7 +19,7 @@ class ColumnEnum : public Column { /// Returns element at given row number. const T& At(size_t n) const; - const std::string NameAt(size_t n) const; + std::string_view NameAt(size_t n) const; /// Returns element at given row number. const T& operator[] (size_t n) const; @@ -33,12 +33,12 @@ class ColumnEnum : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; - - /// Clear column data . + void SaveBody(OutputStream* output) override; + + /// Clear column data. void Clear() override; /// Returns count of rows in the column. @@ -46,7 +46,7 @@ class ColumnEnum : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; - + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index 08b362e9..47c3feeb 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -17,6 +17,8 @@ #include "../types/type_parser.h" +#include "../exceptions.h" + #include namespace clickhouse { @@ -83,6 +85,8 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) { } case Type::Date: return std::make_shared(); + case Type::Date32: + return std::make_shared(); case Type::IPv4: return std::make_shared(); @@ -161,8 +165,10 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti return std::make_shared>(); case Type::FixedString: return std::make_shared>(nested.elements.front().value); + case Type::Nullable: + throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported with LowCardinalityAsWrappedColumn on"); default: - throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported"); + throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported"); } } else { @@ -172,8 +178,15 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti return std::make_shared>(); case Type::FixedString: return std::make_shared>(nested.elements.front().value); + case Type::Nullable: + return std::make_shared( + std::make_shared( + CreateColumnFromAst(nested.elements.front(), settings), + std::make_shared() + ) + ); default: - throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported"); + throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported"); } } } diff --git a/clickhouse/columns/ip4.cpp b/clickhouse/columns/ip4.cpp index f7362362..9c269b85 100644 --- a/clickhouse/columns/ip4.cpp +++ b/clickhouse/columns/ip4.cpp @@ -16,13 +16,13 @@ ColumnIPv4::ColumnIPv4(ColumnRef data) , data_(data ? data->As() : nullptr) { if (!data_) - throw std::runtime_error("Expecting ColumnUInt32, got " + (data ? data->GetType().GetName() : "null")); + throw ValidationError("Expecting ColumnUInt32, got " + (data ? data->GetType().GetName() : "null")); } void ColumnIPv4::Append(const std::string& str) { uint32_t address; if (inet_pton(AF_INET, str.c_str(), &address) != 1) - throw std::runtime_error("invalid IPv4 format, ip: " + str); + throw ValidationError("invalid IPv4 format, ip: " + str); data_->Append(htonl(address)); } @@ -71,12 +71,12 @@ void ColumnIPv4::Append(ColumnRef column) { } } -bool ColumnIPv4::Load(InputStream * input, size_t rows) { - return data_->Load(input, rows); +bool ColumnIPv4::LoadBody(InputStream * input, size_t rows) { + return data_->LoadBody(input, rows); } -void ColumnIPv4::Save(OutputStream* output) { - data_->Save(output); +void ColumnIPv4::SaveBody(OutputStream* output) { + data_->SaveBody(output); } size_t ColumnIPv4::Size() const { @@ -87,13 +87,17 @@ ColumnRef ColumnIPv4::Slice(size_t begin, size_t len) const { return std::make_shared(data_->Slice(begin, len)); } +ColumnRef ColumnIPv4::CloneEmpty() const { + return std::make_shared(data_->CloneEmpty()); +} + void ColumnIPv4::Swap(Column& other) { auto & col = dynamic_cast(other); data_.swap(col.data_); } ItemView ColumnIPv4::GetItem(size_t index) const { - return data_->GetItem(index); + return ItemView(Type::IPv4, data_->GetItem(index)); } } diff --git a/clickhouse/columns/ip4.h b/clickhouse/columns/ip4.h index 0b1d41ab..3f25e6d5 100644 --- a/clickhouse/columns/ip4.h +++ b/clickhouse/columns/ip4.h @@ -41,10 +41,10 @@ class ColumnIPv4 : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -54,7 +54,7 @@ class ColumnIPv4 : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; - + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; diff --git a/clickhouse/columns/ip6.cpp b/clickhouse/columns/ip6.cpp index d1f3995d..838bcce0 100644 --- a/clickhouse/columns/ip6.cpp +++ b/clickhouse/columns/ip6.cpp @@ -1,6 +1,6 @@ - #include "ip6.h" #include "../base/socket.h" // for IPv6 platform-specific stuff +#include "../exceptions.h" #include @@ -19,13 +19,13 @@ ColumnIPv6::ColumnIPv6(ColumnRef data) , data_(data ? data->As() : nullptr) { if (!data_ || data_->FixedSize() != sizeof(in6_addr)) - throw std::runtime_error("Expecting ColumnFixedString(16), got " + (data ? data->GetType().GetName() : "null")); + throw ValidationError("Expecting ColumnFixedString(16), got " + (data ? data->GetType().GetName() : "null")); } void ColumnIPv6::Append(const std::string_view& str) { unsigned char buf[16]; if (inet_pton(AF_INET6, str.data(), buf) != 1) { - throw std::runtime_error("invalid IPv6 format, ip: " + std::string(str)); + throw ValidationError("invalid IPv6 format, ip: " + std::string(str)); } data_->Append(std::string_view((const char*)buf, 16)); } @@ -71,12 +71,12 @@ void ColumnIPv6::Append(ColumnRef column) { } } -bool ColumnIPv6::Load(InputStream* input, size_t rows) { - return data_->Load(input, rows); +bool ColumnIPv6::LoadBody(InputStream* input, size_t rows) { + return data_->LoadBody(input, rows); } -void ColumnIPv6::Save(OutputStream* output) { - data_->Save(output); +void ColumnIPv6::SaveBody(OutputStream* output) { + data_->SaveBody(output); } size_t ColumnIPv6::Size() const { @@ -87,13 +87,17 @@ ColumnRef ColumnIPv6::Slice(size_t begin, size_t len) const { return std::make_shared(data_->Slice(begin, len)); } +ColumnRef ColumnIPv6::CloneEmpty() const { + return std::make_shared(data_->CloneEmpty()); +} + void ColumnIPv6::Swap(Column& other) { auto & col = dynamic_cast(other); data_.swap(col.data_); } ItemView ColumnIPv6::GetItem(size_t index) const { - return data_->GetItem(index); + return ItemView{Type::IPv6, data_->GetItem(index)}; } } diff --git a/clickhouse/columns/ip6.h b/clickhouse/columns/ip6.h index e523912a..74d8c1e1 100644 --- a/clickhouse/columns/ip6.h +++ b/clickhouse/columns/ip6.h @@ -39,10 +39,10 @@ class ColumnIPv6 : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -52,6 +52,7 @@ class ColumnIPv6 : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; diff --git a/clickhouse/columns/itemview.cpp b/clickhouse/columns/itemview.cpp index 9dc8533f..3a186531 100644 --- a/clickhouse/columns/itemview.cpp +++ b/clickhouse/columns/itemview.cpp @@ -1,44 +1,72 @@ #include "../columns/itemview.h" +#include +#include + +namespace { + +template +std::string ContainerToString(Container container, const char * separator = ", ") { + std::stringstream sstr; + const auto end = std::end(container); + for (auto i = std::begin(container); i != end; /*intentionally no ++i*/) { + const auto & elem = *i; + sstr << elem; + + if (++i != end) { + sstr << separator; + } + } + + return sstr.str(); +} + +} + namespace clickhouse { void ItemView::ValidateData(Type::Code type, DataType data) { - int expected_size = 0; + + auto AssertSize = [type, &data](std::initializer_list allowed_sizes) -> void { + const auto end = std::end(allowed_sizes); + if (std::find(std::begin(allowed_sizes), end, static_cast(data.size())) == end) { + throw AssertionError(std::string("ItemView value size mismatch for ") + + Type::TypeName(type) + + " expected: " + ContainerToString(allowed_sizes, " or ") + + ", got: " + std::to_string(data.size())); + } + }; + switch (type) { case Type::Code::Void: - expected_size = 0; - break; + return AssertSize({0}); case Type::Code::Int8: case Type::Code::UInt8: case Type::Code::Enum8: - expected_size = 1; - break; + return AssertSize({1}); case Type::Code::Int16: case Type::Code::UInt16: case Type::Code::Date: case Type::Code::Enum16: - expected_size = 2; - break; + return AssertSize({2}); case Type::Code::Int32: case Type::Code::UInt32: case Type::Code::Float32: case Type::Code::DateTime: + case Type::Code::Date32: case Type::Code::IPv4: case Type::Code::Decimal32: - expected_size = 4; - break; + return AssertSize({4}); case Type::Code::Int64: case Type::Code::UInt64: case Type::Code::Float64: case Type::Code::DateTime64: - case Type::Code::IPv6: case Type::Code::Decimal64: - expected_size = 8; - break; + return AssertSize({8}); case Type::Code::String: case Type::Code::FixedString: @@ -49,23 +77,20 @@ void ItemView::ValidateData(Type::Code type, DataType data) { case Type::Code::Nullable: case Type::Code::Tuple: case Type::Code::LowCardinality: - throw std::runtime_error("Unsupported type in ItemView: " + std::to_string(static_cast(type))); + throw AssertionError("Unsupported type in ItemView: " + std::string(Type::TypeName(type))); + case Type::Code::IPv6: case Type::Code::UUID: case Type::Code::Int128: - case Type::Code::Decimal: case Type::Code::Decimal128: - expected_size = 16; - break; + return AssertSize({16}); - default: - throw std::runtime_error("Unknon type code:" + std::to_string(static_cast(type))); - } + case Type::Code::Decimal: + // Could be either Decimal32, Decimal64 or Decimal128 + return AssertSize({4, 8, 16}); - if (expected_size != static_cast(data.size())) { - throw std::runtime_error("Value size mismatch for type " - + std::to_string(static_cast(type)) + " expected: " - + std::to_string(expected_size) + ", got: " + std::to_string(data.size())); + default: + throw UnimplementedError("Unknon type code:" + std::to_string(static_cast(type))); } } diff --git a/clickhouse/columns/itemview.h b/clickhouse/columns/itemview.h index e4e0d5ea..ae48a362 100644 --- a/clickhouse/columns/itemview.h +++ b/clickhouse/columns/itemview.h @@ -1,9 +1,11 @@ #pragma once #include "../types/types.h" +#include "../exceptions.h" #include #include +#include namespace clickhouse { @@ -42,8 +44,15 @@ struct ItemView { ValidateData(type, data); } + ItemView(Type::Code type, ItemView other) + : type(type), + data(other.data) + { + ValidateData(type, data); + } + explicit ItemView() - : ItemView(Type::Void, {nullptr, 0}) + : ItemView(Type::Void, std::string_view{}) {} template @@ -52,14 +61,16 @@ struct ItemView { {} template - T get() const { - if constexpr (std::is_same_v || std::is_same_v) { + auto get() const { + using ValueType = std::remove_cv_t>; + if constexpr (std::is_same_v || std::is_same_v) { return data; - } else if constexpr (std::is_fundamental_v || std::is_same_v) { - if (sizeof(T) == data.size()) { + } else if constexpr (std::is_fundamental_v || std::is_same_v) { + if (sizeof(ValueType) == data.size()) { return *reinterpret_cast(data.data()); } else { - throw std::runtime_error("Incompatitable value type and size."); + throw AssertionError("Incompatitable value type and size. Requested size: " + + std::to_string(sizeof(ValueType)) + " stored size: " + std::to_string(data.size())); } } } diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 1cb21fef..0ffef7e0 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -49,7 +49,7 @@ ColumnRef createIndexColumn(IndexType type) { return std::make_shared(); } - throw std::runtime_error("Invalid LowCardinality index type value: " + std::to_string(static_cast(type))); + throw ValidationError("Invalid LowCardinality index type value: " + std::to_string(static_cast(type))); } IndexType indexTypeFromIndexColumn(const Column & index_column) { @@ -63,7 +63,7 @@ IndexType indexTypeFromIndexColumn(const Column & index_column) { case Type::UInt64: return IndexType::UInt64; default: - throw std::runtime_error("Invalid index column type for LowCardinality column:" + index_column.Type()->GetName()); + throw ValidationError("Invalid index column type for LowCardinality column:" + index_column.Type()->GetName()); } } @@ -90,10 +90,51 @@ inline auto VisitIndexColumn(Vizitor && vizitor, ColumnType && col) { case Type::UInt64: return vizitor(column_down_cast(col)); default: - throw std::runtime_error("Invalid index column type " + col.GetType().GetName()); + throw ValidationError("Invalid index column type " + col.GetType().GetName()); } } +// A special NULL-item, which is expected at pos(0) in dictionary, +// note that we distinguish empty string from NULL-value. +inline auto GetNullItemForDictionary(const ColumnRef dictionary) { + if (auto n = dictionary->As()) { + return ItemView {}; + } else { + return ItemView{dictionary->Type()->GetCode(), std::string_view{}}; + } +} + +// A special default item, which is expected at pos(0) in dictionary, +// note that we distinguish empty string from NULL-value. +inline ItemView GetDefaultItemForDictionary(const ColumnRef dictionary) { + if (auto n = dictionary->As()) { + return GetDefaultItemForDictionary(n->Nested()); + } else { + return ItemView{dictionary->Type()->GetCode(), std::string_view{}}; + } +} + +void AppendToDictionary(Column& dictionary, const ItemView & item); + +inline void AppendNullableToDictionary(ColumnNullable& nullable, const ItemView & item) { + auto nested = nullable.Nested(); + + const bool isNullValue = item.type == Type::Void; + + if (isNullValue) { + AppendToDictionary(*nested, GetNullItemForDictionary(nested)); + } else { + const auto nestedType = nested->GetType().GetCode(); + if (nestedType != item.type) { + throw ValidationError("Invalid value. Type expected: " + nested->GetType().GetName()); + } + + AppendToDictionary(*nested, item); + } + + nullable.Append(isNullValue); +} + inline void AppendToDictionary(Column& dictionary, const ItemView & item) { switch (dictionary.GetType().GetCode()) { case Type::FixedString: @@ -102,18 +143,11 @@ inline void AppendToDictionary(Column& dictionary, const ItemView & item) { case Type::String: column_down_cast(dictionary).Append(item.get()); return; + case Type::Nullable: + AppendNullableToDictionary(column_down_cast(dictionary), item); + return; default: - throw std::runtime_error("Unexpected dictionary column type: " + dictionary.GetType().GetName()); - } -} - -// A special NULL-item, which is expected at pos(0) in dictionary, -// note that we distinguish empty string from NULL-value. -inline auto GetNullItemForDictionary(const ColumnRef dictionary) { - if (auto n = dictionary->As()) { - return ItemView{}; - } else { - return ItemView{dictionary->Type()->GetCode(), std::string_view{}}; + throw ValidationError("Unexpected dictionary column type: " + dictionary.GetType().GetName()); } } @@ -122,10 +156,26 @@ inline auto GetNullItemForDictionary(const ColumnRef dictionary) { namespace clickhouse { ColumnLowCardinality::ColumnLowCardinality(ColumnRef dictionary_column) : Column(Type::CreateLowCardinality(dictionary_column->Type())), - dictionary_column_(dictionary_column->Slice(0, 0)), // safe way to get an column of the same type. + dictionary_column_(dictionary_column->CloneEmpty()), // safe way to get an column of the same type. + index_column_(std::make_shared()) +{ + Setup(dictionary_column); +} + +ColumnLowCardinality::ColumnLowCardinality(std::shared_ptr dictionary_column) + : Column(Type::CreateLowCardinality(dictionary_column->Type())), + dictionary_column_(dictionary_column->CloneEmpty()), // safe way to get an column of the same type. index_column_(std::make_shared()) { - AppendNullItemToEmptyColumn(); + AppendNullItem(); + Setup(dictionary_column); +} + +ColumnLowCardinality::~ColumnLowCardinality() +{} + +void ColumnLowCardinality::Setup(ColumnRef dictionary_column) { + AppendDefaultItem(); if (dictionary_column->Size() != 0) { // Add values, updating index_column_ and unique_items_map_. @@ -140,9 +190,6 @@ ColumnLowCardinality::ColumnLowCardinality(ColumnRef dictionary_column) } } -ColumnLowCardinality::~ColumnLowCardinality() -{} - std::uint64_t ColumnLowCardinality::getDictionaryIndex(std::uint64_t item_index) const { return VisitIndexColumn([item_index](const auto & arg) -> std::uint64_t { return arg[item_index]; @@ -197,47 +244,50 @@ auto Load(ColumnRef new_dictionary_column, InputStream& input, size_t rows) { // (see corresponding serializeBinaryBulkStateSuffix, serializeBinaryBulkStatePrefix, and serializeBinaryBulkWithMultipleStreams), // but with certain simplifications: no shared dictionaries, no on-the-fly dictionary updates. // - // As for now those fetures not used in client-server protocol and minimal implimintation suffice, + // As for now those features are not used in client-server protocol and minimal implementation suffices, // however some day they may. - // prefix - uint64_t key_version; - if (!WireFormat::ReadFixed(input, &key_version)) - throw std::runtime_error("Failed to read key serialization version."); - - if (key_version != KeySerializationVersion::SharedDictionariesWithAdditionalKeys) - throw std::runtime_error("Invalid key serialization version value."); - - // body uint64_t index_serialization_type; if (!WireFormat::ReadFixed(input, &index_serialization_type)) - throw std::runtime_error("Failed to read index serializaton type."); + throw ProtocolError("Failed to read index serializaton type."); auto new_index_column = createIndexColumn(static_cast(index_serialization_type & IndexTypeMask)); if (index_serialization_type & IndexFlag::NeedGlobalDictionaryBit) - throw std::runtime_error("Global dictionary is not supported."); + throw UnimplementedError("Global dictionary is not supported."); if ((index_serialization_type & IndexFlag::HasAdditionalKeysBit) == 0) - throw std::runtime_error("HasAdditionalKeysBit is missing."); + throw ValidationError("HasAdditionalKeysBit is missing."); uint64_t number_of_keys; if (!WireFormat::ReadFixed(input, &number_of_keys)) - throw std::runtime_error("Failed to read number of rows in dictionary column."); + throw ProtocolError("Failed to read number of rows in dictionary column."); - if (!new_dictionary_column->Load(&input, number_of_keys)) - throw std::runtime_error("Failed to read values of dictionary column."); + auto dataColumn = new_dictionary_column; + if (auto nullable = new_dictionary_column->As()) { + dataColumn = nullable->Nested(); + } + + if (!dataColumn->LoadBody(&input, number_of_keys)) + throw ProtocolError("Failed to read values of dictionary column."); uint64_t number_of_rows; if (!WireFormat::ReadFixed(input, &number_of_rows)) - throw std::runtime_error("Failed to read number of rows in index column."); + throw ProtocolError("Failed to read number of rows in index column."); if (number_of_rows != rows) - throw std::runtime_error("LowCardinality column must be read in full."); + throw AssertionError("LowCardinality column must be read in full."); + + new_index_column->LoadBody(&input, number_of_rows); - new_index_column->Load(&input, number_of_rows); + if (auto nullable = new_dictionary_column->As()) { + nullable->Append(true); + for(std::size_t i = 1; i < new_index_column->Size(); i++) { + nullable->Append(false); + } + } ColumnLowCardinality::UniqueItems new_unique_items_map; - for (size_t i = 0; i < new_dictionary_column->Size(); ++i) { + for (size_t i = 0; i < dataColumn->Size(); ++i) { const auto key = ColumnLowCardinality::computeHashKey(new_dictionary_column->GetItem(i)); new_unique_items_map.emplace(key, i); } @@ -250,9 +300,21 @@ auto Load(ColumnRef new_dictionary_column, InputStream& input, size_t rows) { } -bool ColumnLowCardinality::Load(InputStream* input, size_t rows) { +bool ColumnLowCardinality::LoadPrefix(InputStream* input, size_t) { + uint64_t key_version; + + if (!WireFormat::ReadFixed(*input, &key_version)) + throw ProtocolError("Failed to read key serialization version."); + + if (key_version != KeySerializationVersion::SharedDictionariesWithAdditionalKeys) + throw ProtocolError("Invalid key serialization version value."); + + return true; +} + +bool ColumnLowCardinality::LoadBody(InputStream* input, size_t rows) { try { - auto [new_dictionary, new_index, new_unique_items_map] = ::Load(dictionary_column_->Slice(0, 0), *input, rows); + auto [new_dictionary, new_index, new_unique_items_map] = ::Load(dictionary_column_->CloneEmpty(), *input, rows); dictionary_column_->Swap(*new_dictionary); index_column_.swap(new_index); @@ -264,25 +326,28 @@ bool ColumnLowCardinality::Load(InputStream* input, size_t rows) { } } -void ColumnLowCardinality::Save(OutputStream* output) { - // prefix - const uint64_t version = static_cast(KeySerializationVersion::SharedDictionariesWithAdditionalKeys); +void ColumnLowCardinality::SavePrefix(OutputStream* output) { + const auto version = static_cast(KeySerializationVersion::SharedDictionariesWithAdditionalKeys); WireFormat::WriteFixed(*output, version); +} - // body +void ColumnLowCardinality::SaveBody(OutputStream* output) { const uint64_t index_serialization_type = indexTypeFromIndexColumn(*index_column_) | IndexFlag::HasAdditionalKeysBit; WireFormat::WriteFixed(*output, index_serialization_type); const uint64_t number_of_keys = dictionary_column_->Size(); WireFormat::WriteFixed(*output, number_of_keys); - dictionary_column_->Save(output); + + if (auto columnNullable = dictionary_column_->As()) { + columnNullable->Nested()->SaveBody(output); + } else { + dictionary_column_->SaveBody(output); + } const uint64_t number_of_rows = index_column_->Size(); WireFormat::WriteFixed(*output, number_of_rows); - index_column_->Save(output); - // suffix - // NOP + index_column_->SaveBody(output); } void ColumnLowCardinality::Clear() { @@ -290,7 +355,10 @@ void ColumnLowCardinality::Clear() { dictionary_column_->Clear(); unique_items_map_.clear(); - AppendNullItemToEmptyColumn(); + if (auto columnNullable = dictionary_column_->As()) { + AppendNullItem(); + } + AppendDefaultItem(); } size_t ColumnLowCardinality::Size() const { @@ -301,7 +369,7 @@ ColumnRef ColumnLowCardinality::Slice(size_t begin, size_t len) const { begin = std::min(begin, Size()); len = std::min(len, Size() - begin); - auto result = std::make_shared(dictionary_column_->Slice(0, 0)); + auto result = std::make_shared(dictionary_column_->CloneEmpty()); for (size_t i = begin; i < begin + len; ++i) result->AppendUnsafe(this->GetItem(i)); @@ -309,10 +377,14 @@ ColumnRef ColumnLowCardinality::Slice(size_t begin, size_t len) const { return result; } +ColumnRef ColumnLowCardinality::CloneEmpty() const { + return std::make_shared(dictionary_column_->CloneEmpty()); +} + void ColumnLowCardinality::Swap(Column& other) { auto & col = dynamic_cast(other); if (!dictionary_column_->Type()->IsEqual(col.dictionary_column_->Type())) - throw std::runtime_error("Can't swap() LowCardinality columns of different types."); + throw ValidationError("Can't swap() LowCardinality columns of different types."); // It is important here not to swap pointers to dictionary object, // but swap contents of dictionaries, so the object inside shared_ptr stays the same @@ -324,7 +396,17 @@ void ColumnLowCardinality::Swap(Column& other) { } ItemView ColumnLowCardinality::GetItem(size_t index) const { - return dictionary_column_->GetItem(getDictionaryIndex(index)); + const auto dictionaryIndex = getDictionaryIndex(index); + + if (auto nullable = dictionary_column_->As()) { + const auto isNull = dictionaryIndex == 0u; + + if (isNull) { + return GetNullItemForDictionary(nullable); + } + } + + return dictionary_column_->GetItem(dictionaryIndex); } // No checks regarding value type or validity of value is made. @@ -355,19 +437,20 @@ void ColumnLowCardinality::AppendUnsafe(const ItemView & value) { } } -void ColumnLowCardinality::AppendNullItemToEmptyColumn() +void ColumnLowCardinality::AppendNullItem() { - // INVARIANT: Empty LC column has an (invisible) null-item at pos 0, which MUST be present in - // unique_items_map_ in order to reuse dictionary posistion on subsequent Append()-s. - - // Should be only performed on empty LC column. - assert(dictionary_column_->Size() == 0 && unique_items_map_.empty()); - const auto null_item = GetNullItemForDictionary(dictionary_column_); AppendToDictionary(*dictionary_column_, null_item); unique_items_map_.emplace(computeHashKey(null_item), 0); } +void ColumnLowCardinality::AppendDefaultItem() +{ + const auto defaultItem = GetDefaultItemForDictionary(dictionary_column_); + unique_items_map_.emplace(computeHashKey(defaultItem), dictionary_column_->Size()); + AppendToDictionary(*dictionary_column_, defaultItem); +} + size_t ColumnLowCardinality::GetDictionarySize() const { return dictionary_column_->Size(); } diff --git a/clickhouse/columns/lowcardinality.h b/clickhouse/columns/lowcardinality.h index 6d834428..3d8581fc 100644 --- a/clickhouse/columns/lowcardinality.h +++ b/clickhouse/columns/lowcardinality.h @@ -2,6 +2,7 @@ #include "column.h" #include "numeric.h" +#include "nullable.h" #include #include @@ -32,6 +33,11 @@ struct LowCardinalityHashKeyHash { } +/* + * LC column contains an "invisible" default item at the beginning of the collection. [default, ...] + * If the nested type is Nullable, it contains a null-item at the beginning and a default item at the second position. [null, default, ...] + * Null map is not serialized in LC columns. Instead, nulls are tracked by having an index of 0. + * */ class ColumnLowCardinality : public Column { public: using UniqueItems = std::unordered_map; @@ -49,16 +55,22 @@ class ColumnLowCardinality : public Column { public: // c-tor makes a deep copy of the dictionary_column. explicit ColumnLowCardinality(ColumnRef dictionary_column); + explicit ColumnLowCardinality(std::shared_ptr dictionary_column); ~ColumnLowCardinality(); /// Appends another LowCardinality column to the end of this one, updating dictionary. void Append(ColumnRef /*column*/) override; + bool LoadPrefix(InputStream* input, size_t rows) override; + /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; + + /// Saves column prefix to output stream. + void SavePrefix(OutputStream* output) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data. void Clear() override; @@ -68,7 +80,7 @@ class ColumnLowCardinality : public Column { /// Makes slice of current column, with compacted dictionary ColumnRef Slice(size_t begin, size_t len) const override; - + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; @@ -79,12 +91,14 @@ class ColumnLowCardinality : public Column { std::uint64_t getDictionaryIndex(std::uint64_t item_index) const; void appendIndex(std::uint64_t item_index); void removeLastIndex(); - ColumnRef GetDictionary(); + void AppendUnsafe(const ItemView &); private: - void AppendNullItemToEmptyColumn(); + void Setup(ColumnRef dictionary_column); + void AppendNullItem(); + void AppendDefaultItem(); public: static details::LowCardinalityHashKey computeHashKey(const ItemView &); diff --git a/clickhouse/columns/lowcardinalityadaptor.h b/clickhouse/columns/lowcardinalityadaptor.h index 70261a51..8b579a0d 100644 --- a/clickhouse/columns/lowcardinalityadaptor.h +++ b/clickhouse/columns/lowcardinalityadaptor.h @@ -27,12 +27,19 @@ class LowCardinalitySerializationAdaptor : public AdaptedColumnType public: using AdaptedColumnType::AdaptedColumnType; - /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override { + bool LoadPrefix(InputStream* input, size_t rows) override { auto new_data_column = this->Slice(0, 0)->template As(); + ColumnLowCardinalityT low_cardinality_col(new_data_column); + + return low_cardinality_col.LoadPrefix(input, rows); + } + + /// Loads column data from input stream. + bool LoadBody(InputStream* input, size_t rows) override { + auto new_data_column = this->CloneEmpty()->template As(); ColumnLowCardinalityT low_cardinality_col(new_data_column); - if (!low_cardinality_col.Load(input, rows)) + if (!low_cardinality_col.LoadBody(input, rows)) return false; // It safe to reuse `flat_data_column` later since ColumnLowCardinalityT makes a deep copy, but still check just in case. @@ -46,8 +53,8 @@ class LowCardinalitySerializationAdaptor : public AdaptedColumnType } /// Saves column data to output stream. - void Save(OutputStream* output) override { - ColumnLowCardinalityT(this->template As()).Save(output); + void SaveBody(OutputStream* output) override { + ColumnLowCardinalityT(this->template As()).SaveBody(output); } }; diff --git a/clickhouse/columns/nothing.h b/clickhouse/columns/nothing.h index ef7004b5..36ddeaea 100644 --- a/clickhouse/columns/nothing.h +++ b/clickhouse/columns/nothing.h @@ -14,13 +14,13 @@ namespace clickhouse { */ class ColumnNothing : public Column { public: - ColumnNothing() + ColumnNothing() : Column(Type::CreateNothing()) , size_(0) { } - explicit ColumnNothing(size_t n) + explicit ColumnNothing(size_t n) : Column(Type::CreateNothing()) , size_(n) { @@ -30,37 +30,41 @@ class ColumnNothing : public Column { void Append(std::unique_ptr) { ++size_; } /// Returns element at given row number. - std::nullptr_t At(size_t) const { return nullptr; }; + std::nullptr_t At(size_t) const { return nullptr; }; /// Returns element at given row number. - std::nullptr_t operator [] (size_t) const { return nullptr; }; + std::nullptr_t operator [] (size_t) const { return nullptr; }; /// Makes slice of the current column. ColumnRef Slice(size_t, size_t len) const override { - return std::make_shared(len); - } + return std::make_shared(len); + } + + ColumnRef CloneEmpty() const override { + return std::make_shared(); + } ItemView GetItem(size_t /*index*/) const override { return ItemView{}; } public: /// Appends content of given column to the end of current one. void Append(ColumnRef column) override { - if (auto col = column->As()) { - size_ += col->Size(); - } - } + if (auto col = column->As()) { + size_ += col->Size(); + } + } /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override { - input->Skip(rows); - size_ += rows; - return true; - } + bool LoadBody(InputStream* input, size_t rows) override { + input->Skip(rows); + size_ += rows; + return true; + } /// Saves column data to output stream. - void Save(OutputStream*) override { - throw std::runtime_error("method Save is not supported for Nothing column"); - } + void SaveBody(OutputStream*) override { + throw UnimplementedError("method SaveBody is not supported for Nothing column"); + } /// Clear column data . void Clear() override { size_ = 0; } @@ -74,7 +78,7 @@ class ColumnNothing : public Column { } private: - size_t size_; + size_t size_; }; } diff --git a/clickhouse/columns/nullable.cpp b/clickhouse/columns/nullable.cpp index 1f0f1efb..dd863545 100644 --- a/clickhouse/columns/nullable.cpp +++ b/clickhouse/columns/nullable.cpp @@ -11,7 +11,7 @@ ColumnNullable::ColumnNullable(ColumnRef nested, ColumnRef nulls) , nulls_(nulls->As()) { if (nested_->Size() != nulls->Size()) { - throw std::runtime_error("count of elements in nested and nulls should be the same"); + throw ValidationError("count of elements in nested and nulls should be the same"); } } @@ -50,23 +50,30 @@ void ColumnNullable::Clear() { nulls_->Clear(); } -bool ColumnNullable::Load(InputStream* input, size_t rows) { - if (!nulls_->Load(input, rows)) { +bool ColumnNullable::LoadPrefix(InputStream* input, size_t rows) { + return nested_->LoadPrefix(input, rows); +} + +bool ColumnNullable::LoadBody(InputStream* input, size_t rows) { + if (!nulls_->LoadBody(input, rows)) { return false; } - if (!nested_->Load(input, rows)) { + if (!nested_->LoadBody(input, rows)) { return false; } return true; } -void ColumnNullable::Save(OutputStream* output) { - nulls_->Save(output); - nested_->Save(output); +void ColumnNullable::SavePrefix(OutputStream* output) { + nested_->SavePrefix(output); +} + +void ColumnNullable::SaveBody(OutputStream* output) { + nulls_->SaveBody(output); + nested_->SaveBody(output); } size_t ColumnNullable::Size() const { - assert(nested_->Size() == nulls_->Size()); return nulls_->Size(); } @@ -74,10 +81,14 @@ ColumnRef ColumnNullable::Slice(size_t begin, size_t len) const { return std::make_shared(nested_->Slice(begin, len), nulls_->Slice(begin, len)); } +ColumnRef ColumnNullable::CloneEmpty() const { + return std::make_shared(nested_->CloneEmpty(), nulls_->CloneEmpty()); +} + void ColumnNullable::Swap(Column& other) { auto & col = dynamic_cast(other); if (!nested_->Type()->IsEqual(col.nested_->Type())) - throw std::runtime_error("Can't swap() Nullable columns of different types."); + throw ValidationError("Can't swap() Nullable columns of different types."); nested_.swap(col.nested_); nulls_.swap(col.nulls_); diff --git a/clickhouse/columns/nullable.h b/clickhouse/columns/nullable.h index 8cde2781..41806624 100644 --- a/clickhouse/columns/nullable.h +++ b/clickhouse/columns/nullable.h @@ -28,11 +28,17 @@ class ColumnNullable : public Column { /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; + /// Loads column prefix from input stream. + bool LoadPrefix(InputStream* input, size_t rows) override; + /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; + + /// Saves column prefix to output stream. + void SavePrefix(OutputStream* output) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -42,6 +48,7 @@ class ColumnNullable : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; void Swap(Column&) override; ItemView GetItem(size_t) const override; diff --git a/clickhouse/columns/numeric.cpp b/clickhouse/columns/numeric.cpp index 479d1e79..4e8d54bf 100644 --- a/clickhouse/columns/numeric.cpp +++ b/clickhouse/columns/numeric.cpp @@ -61,14 +61,14 @@ void ColumnVector::Append(ColumnRef column) { } template -bool ColumnVector::Load(InputStream* input, size_t rows) { +bool ColumnVector::LoadBody(InputStream* input, size_t rows) { data_.resize(rows); return WireFormat::ReadBytes(*input, data_.data(), data_.size() * sizeof(T)); } template -void ColumnVector::Save(OutputStream* output) { +void ColumnVector::SaveBody(OutputStream* output) { WireFormat::WriteBytes(*output, data_.data(), data_.size() * sizeof(T)); } @@ -82,6 +82,11 @@ ColumnRef ColumnVector::Slice(size_t begin, size_t len) const { return std::make_shared>(SliceVector(data_, begin, len)); } +template +ColumnRef ColumnVector::CloneEmpty() const { + return std::make_shared>(); +} + template void ColumnVector::Swap(Column& other) { auto & col = dynamic_cast &>(other); diff --git a/clickhouse/columns/numeric.h b/clickhouse/columns/numeric.h index 65b21300..c6da981e 100644 --- a/clickhouse/columns/numeric.h +++ b/clickhouse/columns/numeric.h @@ -35,10 +35,10 @@ class ColumnVector : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -48,6 +48,7 @@ class ColumnVector : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t index) const override; diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index a2138b96..cfd4c061 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -32,7 +32,7 @@ ColumnFixedString::ColumnFixedString(size_t n) void ColumnFixedString::Append(std::string_view str) { if (str.size() > string_size_) { - throw std::runtime_error("Expected string of length not greater than " + throw ValidationError("Expected string of length not greater than " + std::to_string(string_size_) + " bytes, received " + std::to_string(str.size()) + " bytes."); } @@ -77,7 +77,7 @@ void ColumnFixedString::Append(ColumnRef column) { } } -bool ColumnFixedString::Load(InputStream * input, size_t rows) { +bool ColumnFixedString::LoadBody(InputStream * input, size_t rows) { data_.resize(string_size_ * rows); if (!WireFormat::ReadBytes(*input, &data_[0], data_.size())) { return false; @@ -86,7 +86,7 @@ bool ColumnFixedString::Load(InputStream * input, size_t rows) { return true; } -void ColumnFixedString::Save(OutputStream* output) { +void ColumnFixedString::SaveBody(OutputStream* output) { WireFormat::WriteBytes(*output, data_.data(), data_.size()); } @@ -106,6 +106,10 @@ ColumnRef ColumnFixedString::Slice(size_t begin, size_t len) const { return result; } +ColumnRef ColumnFixedString::CloneEmpty() const { + return std::make_shared(string_size_); +} + void ColumnFixedString::Swap(Column& other) { auto & col = dynamic_cast(other); std::swap(string_size_, col.string_size_); @@ -126,7 +130,7 @@ struct ColumnString::Block data_(new CharT[capacity]) {} - inline auto GetAvailble() const + inline auto GetAvailable() const { return capacity - size; } @@ -179,7 +183,7 @@ ColumnString::~ColumnString() {} void ColumnString::Append(std::string_view str) { - if (blocks_.size() == 0 || blocks_.back().GetAvailble() < str.length()) + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); } @@ -210,7 +214,7 @@ void ColumnString::Append(ColumnRef column) { const auto total_size = ComputeTotalSize(col->items_); // TODO: fill up existing block with some items and then add a new one for the rest of items - if (blocks_.size() == 0 || blocks_.back().GetAvailble() < total_size) + if (blocks_.size() == 0 || blocks_.back().GetAvailable() < total_size) blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, total_size)); items_.reserve(items_.size() + col->Size()); @@ -220,7 +224,7 @@ void ColumnString::Append(ColumnRef column) { } } -bool ColumnString::Load(InputStream* input, size_t rows) { +bool ColumnString::LoadBody(InputStream* input, size_t rows) { items_.clear(); blocks_.clear(); @@ -233,7 +237,7 @@ bool ColumnString::Load(InputStream* input, size_t rows) { if (!WireFormat::ReadUInt64(*input, &len)) return false; - if (blocks_.size() == 0 || len > block->GetAvailble()) + if (blocks_.size() == 0 || len > block->GetAvailable()) block = &blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len)); if (!WireFormat::ReadBytes(*input, block->GetCurrentWritePos(), len)) @@ -245,7 +249,7 @@ bool ColumnString::Load(InputStream* input, size_t rows) { return true; } -void ColumnString::Save(OutputStream* output) { +void ColumnString::SaveBody(OutputStream* output) { for (const auto & item : items_) { WireFormat::WriteString(*output, item); } @@ -271,6 +275,10 @@ ColumnRef ColumnString::Slice(size_t begin, size_t len) const { return result; } +ColumnRef ColumnString::CloneEmpty() const { + return std::make_shared(); +} + void ColumnString::Swap(Column& other) { auto & col = dynamic_cast(other); items_.swap(col.items_); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index d1cec652..d6defe50 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -43,10 +43,10 @@ class ColumnFixedString : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -56,7 +56,7 @@ class ColumnFixedString : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; - + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t) const override; @@ -95,10 +95,10 @@ class ColumnString : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -108,6 +108,7 @@ class ColumnString : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t) const override; diff --git a/clickhouse/columns/tuple.cpp b/clickhouse/columns/tuple.cpp index d799d104..42dc6e63 100644 --- a/clickhouse/columns/tuple.cpp +++ b/clickhouse/columns/tuple.cpp @@ -22,7 +22,7 @@ size_t ColumnTuple::TupleSize() const { void ColumnTuple::Append(ColumnRef column) { if (!this->Type()->IsEqual(column->Type())) { - throw std::runtime_error( + throw ValidationError( "can't append column of type " + column->Type()->GetName() + " " "to column type " + this->Type()->GetName()); } @@ -34,19 +34,31 @@ void ColumnTuple::Append(ColumnRef column) { size_t ColumnTuple::Size() const { return columns_.empty() ? 0 : columns_[0]->Size(); } + ColumnRef ColumnTuple::Slice(size_t begin, size_t len) const { std::vector sliced_columns; sliced_columns.reserve(columns_.size()); - for(const auto &column : columns_){ + for(const auto &column : columns_) { sliced_columns.push_back(column->Slice(begin, len)); } return std::make_shared(sliced_columns); } -bool ColumnTuple::Load(InputStream* input, size_t rows) { +ColumnRef ColumnTuple::CloneEmpty() const { + std::vector result_columns; + result_columns.reserve(columns_.size()); + + for(const auto &column : columns_) { + result_columns.push_back(column->CloneEmpty()); + } + + return std::make_shared(result_columns); +} + +bool ColumnTuple::LoadPrefix(InputStream* input, size_t rows) { for (auto ci = columns_.begin(); ci != columns_.end(); ++ci) { - if (!(*ci)->Load(input, rows)) { + if (!(*ci)->LoadPrefix(input, rows)) { return false; } } @@ -54,9 +66,25 @@ bool ColumnTuple::Load(InputStream* input, size_t rows) { return true; } -void ColumnTuple::Save(OutputStream* output) { +bool ColumnTuple::LoadBody(InputStream* input, size_t rows) { for (auto ci = columns_.begin(); ci != columns_.end(); ++ci) { - (*ci)->Save(output); + if (!(*ci)->LoadBody(input, rows)) { + return false; + } + } + + return true; +} + +void ColumnTuple::SavePrefix(OutputStream* output) { + for (auto & column : columns_) { + column->SavePrefix(output); + } +} + +void ColumnTuple::SaveBody(OutputStream* output) { + for (auto & column : columns_) { + column->SaveBody(output); } } diff --git a/clickhouse/columns/tuple.h b/clickhouse/columns/tuple.h index b1ac784a..63cfc689 100644 --- a/clickhouse/columns/tuple.h +++ b/clickhouse/columns/tuple.h @@ -24,11 +24,17 @@ class ColumnTuple : public Column { /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; + /// Loads column prefix from input stream. + bool LoadPrefix(InputStream* input, size_t rows) override; + /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; + + /// Saves column prefix to output stream. + void SavePrefix(OutputStream* output) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -38,6 +44,7 @@ class ColumnTuple : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t, size_t) const override; + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; private: diff --git a/clickhouse/columns/uuid.cpp b/clickhouse/columns/uuid.cpp index b69ab9c6..8e89f7af 100644 --- a/clickhouse/columns/uuid.cpp +++ b/clickhouse/columns/uuid.cpp @@ -1,5 +1,6 @@ #include "uuid.h" #include "utils.h" +#include "../exceptions.h" #include @@ -16,7 +17,7 @@ ColumnUUID::ColumnUUID(ColumnRef data) , data_(data->As()) { if (data_->Size() % 2 != 0) { - throw std::runtime_error("number of entries must be even (two 64-bit numbers for each UUID)"); + throw ValidationError("number of entries must be even (two 64-bit numbers for each UUID)"); } } @@ -43,12 +44,12 @@ void ColumnUUID::Append(ColumnRef column) { } } -bool ColumnUUID::Load(InputStream* input, size_t rows) { - return data_->Load(input, rows * 2); +bool ColumnUUID::LoadBody(InputStream* input, size_t rows) { + return data_->LoadBody(input, rows * 2); } -void ColumnUUID::Save(OutputStream* output) { - data_->Save(output); +void ColumnUUID::SaveBody(OutputStream* output) { + data_->SaveBody(output); } size_t ColumnUUID::Size() const { @@ -59,13 +60,21 @@ ColumnRef ColumnUUID::Slice(size_t begin, size_t len) const { return std::make_shared(data_->Slice(begin * 2, len * 2)); } +ColumnRef ColumnUUID::CloneEmpty() const { + return std::make_shared(); +} + void ColumnUUID::Swap(Column& other) { auto & col = dynamic_cast(other); data_.swap(col.data_); } ItemView ColumnUUID::GetItem(size_t index) const { - return data_->GetItem(index); + // We know that ColumnUInt64 stores it's data in continius memory region, + // and that every 2 values from data represent 1 UUID value. + const auto data_item_view = data_->GetItem(index * 2); + + return ItemView{Type::UUID, std::string_view{data_item_view.data.data(), data_item_view.data.size() * 2}}; } } diff --git a/clickhouse/columns/uuid.h b/clickhouse/columns/uuid.h index 40319459..2b7b58de 100644 --- a/clickhouse/columns/uuid.h +++ b/clickhouse/columns/uuid.h @@ -30,10 +30,10 @@ class ColumnUUID : public Column { void Append(ColumnRef column) override; /// Loads column data from input stream. - bool Load(InputStream* input, size_t rows) override; + bool LoadBody(InputStream* input, size_t rows) override; /// Saves column data to output stream. - void Save(OutputStream* output) override; + void SaveBody(OutputStream* output) override; /// Clear column data . void Clear() override; @@ -43,6 +43,7 @@ class ColumnUUID : public Column { /// Makes slice of the current column. ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; void Swap(Column& other) override; ItemView GetItem(size_t) const override; diff --git a/clickhouse/exceptions.h b/clickhouse/exceptions.h index 44f400f0..4cd5309d 100644 --- a/clickhouse/exceptions.h +++ b/clickhouse/exceptions.h @@ -1,15 +1,47 @@ #pragma once -#include "query.h" +#include "server_exception.h" #include namespace clickhouse { -class ServerException : public std::runtime_error { +class Error : public std::runtime_error { + using std::runtime_error::runtime_error; +}; + +// Caused by any user-related code, like invalid column types or arguments passed to any method. +class ValidationError : public Error { + using Error::Error; +}; + +// Buffers+IO errors, failure to serialize/deserialize, checksum mismatches, etc. +class ProtocolError : public Error { + using Error::Error; +}; + +class UnimplementedError : public Error { + using Error::Error; +}; + +// Internal validation error. +class AssertionError : public Error { + using Error::Error; +}; + +class OpenSSLError : public Error { + using Error::Error; +}; + +class LZ4Error : public Error { + using Error::Error; +}; + +// Exception received from server. +class ServerException : public Error { public: ServerException(std::unique_ptr e) - : runtime_error(std::string()) + : Error(std::string()) , exception_(std::move(e)) { } @@ -29,5 +61,6 @@ class ServerException : public std::runtime_error { private: std::unique_ptr exception_; }; +using ServerError = ServerException; } diff --git a/clickhouse/protocol.h b/clickhouse/protocol.h index 9e8cfacb..dc51f32a 100644 --- a/clickhouse/protocol.h +++ b/clickhouse/protocol.h @@ -2,38 +2,37 @@ namespace clickhouse { - /// То, что передаёт сервер. + /// Types of packets received from server namespace ServerCodes { enum { - Hello = 0, /// Имя, версия, ревизия. - Data = 1, /// Блок данных со сжатием или без. - Exception = 2, /// Исключение во время обработки запроса. - Progress = 3, /// Прогресс выполнения запроса: строк считано, байт считано. - Pong = 4, /// Ответ на Ping. - EndOfStream = 5, /// Все пакеты были переданы. - ProfileInfo = 6, /// Пакет с профайлинговой информацией. - Totals = 7, /// Блок данных с тотальными значениями, со сжатием или без. - Extremes = 8, /// Блок данных с минимумами и максимумами, аналогично. - TablesStatusResponse = 9, /// Ответ на запрос TableStatus. - Log = 10, /// Системный лог исполнения запроса. + Hello = 0, /// Name, version, revision. + Data = 1, /// `Block` of data, may be compressed. + Exception = 2, /// Exception that occured on server side during query execution. + Progress = 3, /// Query execcution progress: rows and bytes read. + Pong = 4, /// response to Ping sent by client. + EndOfStream = 5, /// All packets were sent. + ProfileInfo = 6, /// Profiling data + Totals = 7, /// Block of totals, may be compressed. + Extremes = 8, /// Block of mins and maxs, may be compressed. + TablesStatusResponse = 9, /// Response to TableStatus. + Log = 10, /// Query execution log. }; } - /// То, что передаёт клиент. + /// Types of packets sent by client. namespace ClientCodes { enum { - Hello = 0, /// Имя, версия, ревизия, БД по-умолчанию. - Query = 1, /** Идентификатор запроса, настройки на отдельный запрос, - * информация, до какой стадии исполнять запрос, - * использовать ли сжатие, текст запроса (без данных для INSERT-а). + Hello = 0, /// Name, version, default database name. + Query = 1, /** Query id, query settings, query processing stage, + * compression status, and query text (no INSERT data). */ - Data = 2, /// Блок данных со сжатием или без. - Cancel = 3, /// Отменить выполнение запроса. - Ping = 4, /// Проверка живости соединения с сервером. + Data = 2, /// Data `Block` (e.g. INSERT data), may be compressed. + Cancel = 3, /// Cancel query. + Ping = 4, /// Check server connection. }; } - /// Использовать ли сжатие. + /// Should we compress `Block`s of data namespace CompressionState { enum { Disable = 0, diff --git a/clickhouse/query.h b/clickhouse/query.h index e9e12856..ae98690d 100644 --- a/clickhouse/query.h +++ b/clickhouse/query.h @@ -1,6 +1,7 @@ #pragma once #include "block.h" +#include "server_exception.h" #include #include @@ -13,11 +14,11 @@ namespace clickhouse { * Settings of individual query. */ struct QuerySettings { - /// Максимальное количество потоков выполнения запроса. По-умолчанию - определять автоматически. + /// Maximum thread to use on the server-side to process a query. Default - let the server choose. int max_threads = 0; - /// Считать минимумы и максимумы столбцов результата. + /// Compute min and max values of the result. bool extremes = false; - /// Тихо пропускать недоступные шарды. + /// Silently skip unavailable shards. bool skip_unavailable_shards = false; /// Write statistics about read rows, bytes, time elapsed, etc. bool output_format_write_statistics = true; @@ -33,16 +34,6 @@ struct QuerySettings { }; -struct Exception { - int code = 0; - std::string name; - std::string display_text; - std::string stack_trace; - /// Pointer to nested exception. - std::unique_ptr nested; -}; - - struct Profile { uint64_t rows = 0; uint64_t blocks = 0; @@ -90,7 +81,7 @@ class Query : public QueryEvents { Query(); Query(const char* query, const char* query_id = nullptr); Query(const std::string& query, const std::string& query_id = default_query_id); - ~Query(); + ~Query() override; /// inline const std::string& GetText() const { @@ -103,25 +94,25 @@ class Query : public QueryEvents { /// Set handler for receiving result data. inline Query& OnData(SelectCallback cb) { - select_cb_ = cb; + select_cb_ = std::move(cb); return *this; } inline Query& OnDataCancelable(SelectCancelableCallback cb) { - select_cancelable_cb_ = cb; + select_cancelable_cb_ = std::move(cb); return *this; } /// Set handler for receiving server's exception. inline Query& OnException(ExceptionCallback cb) { - exception_cb_ = cb; + exception_cb_ = std::move(cb); return *this; } /// Set handler for receiving a progress of query exceution. inline Query& OnProgress(ProgressCallback cb) { - progress_cb_ = cb; + progress_cb_ = std::move(cb); return *this; } diff --git a/clickhouse/server_exception.h b/clickhouse/server_exception.h new file mode 100644 index 00000000..dcc97c51 --- /dev/null +++ b/clickhouse/server_exception.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace clickhouse { +struct Exception { + int code = 0; + std::string name; + std::string display_text; + std::string stack_trace; + /// Pointer to nested exception. + std::unique_ptr nested; +}; + +} diff --git a/clickhouse/types/type_parser.cpp b/clickhouse/types/type_parser.cpp index 185a59c8..36bd9271 100644 --- a/clickhouse/types/type_parser.cpp +++ b/clickhouse/types/type_parser.cpp @@ -32,6 +32,7 @@ static const std::unordered_map kTypeCode = { { "DateTime", Type::DateTime }, { "DateTime64", Type::DateTime64 }, { "Date", Type::Date }, + { "Date32", Type::Date32 }, { "Array", Type::Array }, { "Nullable", Type::Nullable }, { "Tuple", Type::Tuple }, diff --git a/clickhouse/types/types.cpp b/clickhouse/types/types.cpp index 32ef9321..c34c0b4e 100644 --- a/clickhouse/types/types.cpp +++ b/clickhouse/types/types.cpp @@ -1,5 +1,7 @@ #include "types.h" +#include "../exceptions.h" + #include #include @@ -11,50 +13,71 @@ Type::Type(const Code code) , type_unique_id_(0) {} +const char* Type::TypeName(Type::Code code) { + switch (code) { + case Type::Code::Void: return "Void"; + case Type::Code::Int8: return "Int8"; + case Type::Code::Int16: return "Int16"; + case Type::Code::Int32: return "Int32"; + case Type::Code::Int64: return "Int64"; + case Type::Code::UInt8: return "UInt8"; + case Type::Code::UInt16: return "UInt16"; + case Type::Code::UInt32: return "UInt32"; + case Type::Code::UInt64: return "UInt64"; + case Type::Code::Float32: return "Float32"; + case Type::Code::Float64: return "Float64"; + case Type::Code::String: return "String"; + case Type::Code::FixedString: return "FixedString"; + case Type::Code::DateTime: return "DateTime"; + case Type::Code::Date: return "Date"; + case Type::Code::Array: return "Array"; + case Type::Code::Nullable: return "Nullable"; + case Type::Code::Tuple: return "Tuple"; + case Type::Code::Enum8: return "Enum8"; + case Type::Code::Enum16: return "Enum16"; + case Type::Code::UUID: return "UUID"; + case Type::Code::IPv4: return "IPv4"; + case Type::Code::IPv6: return "IPv6"; + case Type::Code::Int128: return "Int128"; + case Type::Code::Decimal: return "Decimal"; + case Type::Code::Decimal32: return "Decimal32"; + case Type::Code::Decimal64: return "Decimal64"; + case Type::Code::Decimal128: return "Decimal128"; + case Type::Code::LowCardinality: return "LowCardinality"; + case Type::Code::DateTime64: return "DateTime64"; + case Type::Code::Date32: return "Date32"; + } + + return "Unknown type"; +} + std::string Type::GetName() const { switch (code_) { case Void: - return "Void"; case Int8: - return "Int8"; case Int16: - return "Int16"; case Int32: - return "Int32"; case Int64: - return "Int64"; case Int128: - return "Int128"; case UInt8: - return "UInt8"; case UInt16: - return "UInt16"; case UInt32: - return "UInt32"; case UInt64: - return "UInt64"; case UUID: - return "UUID"; case Float32: - return "Float32"; case Float64: - return "Float64"; case String: - return "String"; - case FixedString: - return As()->GetName(); case IPv4: - return "IPv4"; case IPv6: - return "IPv6"; + case Date: + case Date32: + return TypeName(code_); + case FixedString: + return As()->GetName(); case DateTime: - { return As()->GetName(); - } case DateTime64: return As()->GetName(); - case Date: - return "Date"; case Array: return As()->GetName(); case Nullable: @@ -99,6 +122,7 @@ uint64_t Type::GetTypeUniqueId() const { case IPv4: case IPv6: case Date: + case Date32: // For simple types, unique ID is the same as Type::Code return code_; @@ -141,6 +165,10 @@ TypeRef Type::CreateDate() { return TypeRef(new Type(Type::Date)); } +TypeRef Type::CreateDate32() { + return TypeRef(new Type(Type::Date32)); +} + TypeRef Type::CreateDateTime(std::string timezone) { return TypeRef(new DateTimeType(std::move(timezone))); } @@ -231,8 +259,8 @@ std::string DecimalType::GetName() const { EnumType::EnumType(Type::Code type, const std::vector& items) : Type(type) { for (const auto& item : items) { - value_to_name_[item.second] = item.first; - name_to_value_[item.first] = item.second; + auto result = name_to_value_.insert(item); + value_to_name_[item.second] = result.first->first; } } @@ -263,7 +291,7 @@ std::string EnumType::GetName() const { return result; } -const std::string& EnumType::GetEnumName(int16_t value) const { +std::string_view EnumType::GetEnumName(int16_t value) const { return value_to_name_.at(value); } @@ -319,7 +347,7 @@ DateTime64Type::DateTime64Type(size_t precision, std::string timezone) : Type(DateTime64), details::TypeWithTimeZoneMixin(std::move(timezone)), precision_(precision) { if (precision_ > 18) { - throw std::runtime_error("DateTime64 precision is > 18"); + throw ValidationError("DateTime64 precision is > 18"); } } diff --git a/clickhouse/types/types.h b/clickhouse/types/types.h index 173edd96..8a27257a 100644 --- a/clickhouse/types/types.h +++ b/clickhouse/types/types.h @@ -49,6 +49,7 @@ class Type { Decimal128, LowCardinality, DateTime64, + Date32, }; using EnumItem = std::pair; @@ -83,11 +84,16 @@ class Type { bool IsEqual(const TypeRef& other) const { return IsEqual(*other); } + /// Simple name, doesn't depend on parameters and\or nested types, caller MUST NOT free returned value. + static const char* TypeName(Code); + public: static TypeRef CreateArray(TypeRef item_type); static TypeRef CreateDate(); + static TypeRef CreateDate32(); + static TypeRef CreateDateTime(std::string timezone = std::string()); static TypeRef CreateDateTime64(size_t precision, std::string timezone = std::string()); @@ -206,13 +212,13 @@ class EnumType : public Type { std::string GetName() const; /// Methods to work with enum types. - const std::string& GetEnumName(int16_t value) const; + std::string_view GetEnumName(int16_t value) const; int16_t GetEnumValue(const std::string& name) const; bool HasEnumName(const std::string& name) const; bool HasEnumValue(int16_t value) const; private: - using ValueToNameType = std::map; + using ValueToNameType = std::map; using NameToValueType = std::map; using ValueToNameIterator = ValueToNameType::const_iterator; diff --git a/ut/CMakeLists.txt b/ut/CMakeLists.txt index baa72b91..86066965 100644 --- a/ut/CMakeLists.txt +++ b/ut/CMakeLists.txt @@ -4,17 +4,26 @@ SET ( clickhouse-cpp-ut-src block_ut.cpp client_ut.cpp columns_ut.cpp + column_array_ut.cpp itemview_ut.cpp socket_ut.cpp stream_ut.cpp type_parser_ut.cpp types_ut.cpp + utils_ut.cpp performance_tests.cpp tcp_server.cpp - utils.cpp readonly_client_test.cpp connection_failed_client_test.cpp + array_of_low_cardinality_tests.cpp + CreateColumnByType_ut.cpp + Column_ut.cpp + roundtrip_column.cpp + + utils.cpp + value_generators.cpp + low_cardinality_nullable_tests.cpp ) IF (WITH_OPENSSL) diff --git a/ut/Column_ut.cpp b/ut/Column_ut.cpp new file mode 100644 index 00000000..de8a21ac --- /dev/null +++ b/ut/Column_ut.cpp @@ -0,0 +1,299 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for ipv4-ipv6 platform-specific stuff + +#include + +#include + +#include "utils.h" +#include "roundtrip_column.h" +#include "value_generators.h" + +namespace { +using namespace clickhouse; +} + +namespace clickhouse{ + +std::ostream& operator<<(std::ostream& ostr, const Type::Code& type_code) { + return ostr << Type::TypeName(type_code) << " (" << static_cast(type_code) << ")"; +} + +} + + +// Generic tests for a Column subclass against basic API: +// 1. Constructor: Create, ensure that it is empty +// 2. Append: Create, add some data one by one via Append, make sure that values inserted match extracted with At() and operator[] +// 3. Slice: Create, add some data via Append, do Slice() +// 4. CloneEmpty Create, invoke CloneEmplty, ensure that clone is Empty +// 5. Clear: Create, add some data, invoke Clear(), make sure column is empty +// 6. Swap: create two instances, populate one with data, swap with second, make sure has data was transferred +// 7. Load/Save: create, append some data, save to buffer, load from same buffer into new column, make sure columns match. + +template +class GenericColumnTest : public testing::Test { +public: + using ColumnType = std::decay_t; + + static auto MakeColumn() { + if constexpr (std::is_same_v) { + return std::make_shared(12); + } else if constexpr (std::is_same_v) { + return std::make_shared(3); + } else if constexpr (std::is_same_v) { + return std::make_shared(10, 5); + } else { + return std::make_shared(); + } + } + + static auto GenerateValues(size_t values_size) { + if constexpr (std::is_same_v) { + return GenerateVector(values_size, FooBarGenerator); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeFixedStrings(12)}); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeDates()}); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeDateTimes()}); + } else if constexpr (std::is_same_v) { + return MakeDateTime64s(3u, values_size); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeDates32()}); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeIPv4s()}); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeIPv6s()}); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeInt128s()}); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeDecimals(3, 10)}); + } else if constexpr (std::is_same_v) { + return GenerateVector(values_size, FromVectorGenerator{MakeUUIDs()}); + } else if constexpr (std::is_integral_v) { + // ColumnUIntX and ColumnIntX + return GenerateVector(values_size, RandomGenerator()); + } else if constexpr (std::is_floating_point_v) { + // OR ColumnFloatX + return GenerateVector(values_size, RandomGenerator()); + } + } + + template + static void AppendValues(std::shared_ptr column, const Values& values) { + for (const auto & v : values) { + column->Append(v); + } + } + + static auto MakeColumnWithValues(size_t values_size) { + auto column = MakeColumn(); + auto values = GenerateValues(values_size); + AppendValues(column, values); + + return std::tuple{column, values}; + } +}; + +using ValueColumns = ::testing::Types< + ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64 + , ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64 + , ColumnFloat32, ColumnFloat64 + , ColumnString, ColumnFixedString + , ColumnDate, ColumnDateTime, ColumnDateTime64, ColumnDate32 + , ColumnIPv4, ColumnIPv6 + , ColumnInt128 + , ColumnDecimal + , ColumnUUID +>; +TYPED_TEST_SUITE(GenericColumnTest, ValueColumns); + +TYPED_TEST(GenericColumnTest, Construct) { + auto column = this->MakeColumn(); + ASSERT_EQ(0u, column->Size()); +} + +TYPED_TEST(GenericColumnTest, EmptyColumn) { + auto column = this->MakeColumn(); + ASSERT_EQ(0u, column->Size()); + + // verify that Column methods work as expected on empty column: + // some throw exceptions, some return poper values (like CloneEmpty) + + // Shouldn't be able to get items on empty column. + ASSERT_ANY_THROW(column->At(0)); + + { + auto slice = column->Slice(0, 0); + ASSERT_NO_THROW(slice->template AsStrict()); + ASSERT_EQ(0u, slice->Size()); + } + + { + auto clone = column->CloneEmpty(); + ASSERT_NO_THROW(clone->template AsStrict()); + ASSERT_EQ(0u, clone->Size()); + } + + ASSERT_NO_THROW(column->Clear()); + ASSERT_NO_THROW(column->Swap(*this->MakeColumn())); +} + +TYPED_TEST(GenericColumnTest, Append) { + auto column = this->MakeColumn(); + const auto values = this->GenerateValues(100); + + for (const auto & v : values) { + EXPECT_NO_THROW(column->Append(v)); + } + + EXPECT_TRUE(CompareRecursive(values, *column)); +} + +// To make some value types compatitable with Column::GetItem() +template +inline auto convertValueForGetItem(const ColumnType& col, ValueType&& t) { + using T = std::remove_cv_t>; + + if constexpr (std::is_same_v) { + // Since ColumnDecimal can hold 32, 64, 128-bit wide data and there is no way telling at run-time. + const ItemView item = col.GetItem(0); + return std::string_view(reinterpret_cast(&t), item.data.size()); + } else if constexpr (std::is_same_v + || std::is_same_v) { + return std::string_view{reinterpret_cast(&t), sizeof(T)}; + } else if constexpr (std::is_same_v) { + return htonl(t.s_addr); + } else if constexpr (std::is_same_v) { + return std::string_view(reinterpret_cast(t.s6_addr), 16); + } else if constexpr (std::is_same_v) { + return static_cast(t / std::time_t(86400)); + } else if constexpr (std::is_same_v) { + return static_cast(t / std::time_t(86400)); + } else if constexpr (std::is_same_v) { + return static_cast(t); + } else { + return t; + } +} + +TYPED_TEST(GenericColumnTest, GetItem) { + auto [column, values] = this->MakeColumnWithValues(100); + + ASSERT_EQ(values.size(), column->Size()); + ASSERT_EQ(column->GetItem(0).type, column->GetType().GetCode()); + + for (size_t i = 0; i < values.size(); ++i) { + const auto v = convertValueForGetItem(*column, values[i]); + const ItemView item = column->GetItem(i); + + ASSERT_TRUE(CompareRecursive(item.get(), v)); + } +} + +TYPED_TEST(GenericColumnTest, Slice) { + auto [column, values] = this->MakeColumnWithValues(100); + + auto untyped_slice = column->Slice(0, column->Size()); + auto slice = untyped_slice->template AsStrict(); + EXPECT_EQ(column->GetType(), slice->GetType()); + + EXPECT_TRUE(CompareRecursive(values, *slice)); + + // TODO: slices of different sizes +} + +TYPED_TEST(GenericColumnTest, CloneEmpty) { + auto [column, values] = this->MakeColumnWithValues(100); + EXPECT_EQ(values.size(), column->Size()); + + auto clone_untyped = column->CloneEmpty(); + // Check that type matches + auto clone = clone_untyped->template AsStrict(); + EXPECT_EQ(0u, clone->Size()); + + EXPECT_EQ(column->GetType(), clone->GetType()); +} + +TYPED_TEST(GenericColumnTest, Clear) { + auto [column, values] = this->MakeColumnWithValues(100); + EXPECT_EQ(values.size(), column->Size()); + + column->Clear(); + EXPECT_EQ(0u, column->Size()); +} + +TYPED_TEST(GenericColumnTest, Swap) { + auto [column_A, values] = this->MakeColumnWithValues(100); + auto column_B = this->MakeColumn(); + + column_A->Swap(*column_B); + + EXPECT_EQ(0u, column_A->Size()); + EXPECT_TRUE(CompareRecursive(values, *column_B)); +} + +TYPED_TEST(GenericColumnTest, LoadAndSave) { + auto [column_A, values] = this->MakeColumnWithValues(100); + + char buffer[4096] = {'\0'}; + { + ArrayOutput output(buffer, sizeof(buffer)); + // Save + EXPECT_NO_THROW(column_A->Save(&output)); + } + + auto column_B = this->MakeColumn(); + { + ArrayInput input(buffer, sizeof(buffer)); + // Load + EXPECT_TRUE(column_B->Load(&input, values.size())); + } + + EXPECT_TRUE(CompareRecursive(*column_A, *column_B)); +} + +const auto LocalHostEndpoint = ClientOptions() + .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) + .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) + .SetUser( getEnvOrDefault("CLICKHOUSE_USER", "default")) + .SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", "")) + .SetDefaultDatabase(getEnvOrDefault("CLICKHOUSE_DB", "default")); + +TYPED_TEST(GenericColumnTest, RoundTrip) { + auto [column, values] = this->MakeColumnWithValues(100); + EXPECT_EQ(values.size(), column->Size()); + + clickhouse::Client client(LocalHostEndpoint); + + if constexpr (std::is_same_v) { + // Date32 first appeared in v21.9.2.17-stable + const auto server_info = client.GetServerInfo(); + if (versionNumber(server_info) < versionNumber(21, 9)) { + GTEST_SKIP() << "Date32 is availble since v21.9.2.17-stable and can't be tested against server: " << server_info; + } + } + + if constexpr (std::is_same_v) { + const auto server_info = client.GetServerInfo(); + if (versionNumber(server_info) < versionNumber(21, 7)) { + GTEST_SKIP() << "ColumnInt128 is availble since v21.7.2.7-stable and can't be tested against server: " << server_info; + } + } + + auto result_typed = RoundtripColumnValues(client, column)->template AsStrict(); + EXPECT_TRUE(CompareRecursive(*column, *result_typed)); +} diff --git a/ut/CreateColumnByType_ut.cpp b/ut/CreateColumnByType_ut.cpp new file mode 100644 index 00000000..fb7ffd85 --- /dev/null +++ b/ut/CreateColumnByType_ut.cpp @@ -0,0 +1,84 @@ +#include +#include +#include +#include + +#include + +namespace { +using namespace clickhouse; +} + +TEST(CreateColumnByType, CreateSimpleAggregateFunction) { + auto col = CreateColumnByType("SimpleAggregateFunction(funt, Int32)"); + + ASSERT_EQ("Int32", col->Type()->GetName()); + ASSERT_EQ(Type::Int32, col->Type()->GetCode()); + ASSERT_NE(nullptr, col->As()); +} + +TEST(CreateColumnByType, UnmatchedBrackets) { + // When type string has unmatched brackets, CreateColumnByType must return nullptr. + ASSERT_EQ(nullptr, CreateColumnByType("FixedString(10")); + ASSERT_EQ(nullptr, CreateColumnByType("Nullable(FixedString(10000")); + ASSERT_EQ(nullptr, CreateColumnByType("Nullable(FixedString(10000)")); + ASSERT_EQ(nullptr, CreateColumnByType("LowCardinality(Nullable(FixedString(10000")); + ASSERT_EQ(nullptr, CreateColumnByType("LowCardinality(Nullable(FixedString(10000)")); + ASSERT_EQ(nullptr, CreateColumnByType("LowCardinality(Nullable(FixedString(10000))")); + ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000")); + ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000)")); + ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000))")); + ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000)))")); +} + +TEST(CreateColumnByType, LowCardinalityAsWrappedColumn) { + CreateColumnByTypeSettings create_column_settings; + create_column_settings.low_cardinality_as_wrapped_column = true; + + ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->GetType().GetCode()); + ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->As()->GetType().GetCode()); + + ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->GetType().GetCode()); + ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->As()->GetType().GetCode()); +} + +TEST(CreateColumnByType, DateTime) { + ASSERT_NE(nullptr, CreateColumnByType("DateTime")); + ASSERT_NE(nullptr, CreateColumnByType("DateTime('Europe/Moscow')")); + + ASSERT_EQ(CreateColumnByType("DateTime('UTC')")->As()->Timezone(), "UTC"); + ASSERT_EQ(CreateColumnByType("DateTime64(3, 'UTC')")->As()->Timezone(), "UTC"); +} + +class CreateColumnByTypeWithName : public ::testing::TestWithParam +{}; + +TEST_P(CreateColumnByTypeWithName, CreateColumnByType) +{ + const auto col = CreateColumnByType(GetParam()); + ASSERT_NE(nullptr, col); + EXPECT_EQ(col->GetType().GetName(), GetParam()); +} + +INSTANTIATE_TEST_SUITE_P(Basic, CreateColumnByTypeWithName, ::testing::Values( + "Int8", "Int16", "Int32", "Int64", + "UInt8", "UInt16", "UInt32", "UInt64", + "String", "Date", "DateTime", + "UUID", "Int128" +)); + +INSTANTIATE_TEST_SUITE_P(Parametrized, CreateColumnByTypeWithName, ::testing::Values( + "FixedString(0)", "FixedString(10000)", + "DateTime('UTC')", "DateTime64(3, 'UTC')", + "Decimal(9,3)", "Decimal(18,3)", + "Enum8('ONE' = 1, 'TWO' = 2)", + "Enum16('ONE' = 1, 'TWO' = 2, 'THREE' = 3, 'FOUR' = 4)" +)); + + +INSTANTIATE_TEST_SUITE_P(Nested, CreateColumnByTypeWithName, ::testing::Values( + "Nullable(FixedString(10000))", + "Nullable(LowCardinality(FixedString(10000)))", + "Array(Nullable(LowCardinality(FixedString(10000))))", + "Array(Enum8('ONE' = 1, 'TWO' = 2))" +)); diff --git a/ut/array_of_low_cardinality_tests.cpp b/ut/array_of_low_cardinality_tests.cpp new file mode 100644 index 00000000..56171236 --- /dev/null +++ b/ut/array_of_low_cardinality_tests.cpp @@ -0,0 +1,105 @@ +#include +#include +#include +#include + +#include +#include +#include +#include "clickhouse/block.h" +#include "clickhouse/client.h" +#include "utils.h" +#include "clickhouse/base/buffer.h" +#include "clickhouse/base/output.h" + +namespace +{ +using namespace clickhouse; +} + +std::shared_ptr buildTestColumn(const std::vector>& rows) { + auto arrayColumn = std::make_shared(std::make_shared>()); + + for (const auto& row : rows) { + auto column = std::make_shared>(); + + for (const auto& string : row) { + column->Append(string); + } + + arrayColumn->AppendAsColumn(column); + } + + return arrayColumn; +} + +TEST(ArrayOfLowCardinality, Serialization) { + const auto inputColumn = buildTestColumn({ + { "aa", "bb" }, + { "cc" } + }); + + // The serialization data was extracted from a successful insert. + // When compared to what Clickhouse/NativeWriter does for the same fields, the only differences are the index type and indexes. + // Since we are setting a different index type in clickhouse-cpp, it's expected to have different indexes. + const std::vector expectedSerialization { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x61, 0x61, + 0x02, 0x62, 0x62, 0x02, 0x63, 0x63, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00 + }; + + Buffer buf; + + BufferOutput output(&buf); + inputColumn->Save(&output); + output.Flush(); + + ASSERT_EQ(expectedSerialization, buf); +} + +TEST(ArrayOfLowCardinality, InsertAndQuery) { + + const auto localHostEndpoint = ClientOptions() + .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) + .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) + .SetUser( getEnvOrDefault("CLICKHOUSE_USER", "default")) + .SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", "")) + .SetDefaultDatabase(getEnvOrDefault("CLICKHOUSE_DB", "default")); + + Client client(ClientOptions(localHostEndpoint) + .SetPingBeforeQuery(true)); + + const auto testData = std::vector> { + { "aa", "bb" }, + {}, + { "cc" }, + {} + }; + + auto column = buildTestColumn(testData); + + Block block; + block.AppendColumn("arr", column); + + client.Execute("DROP TEMPORARY TABLE IF EXISTS array_lc"); + client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS array_lc (arr Array(LowCardinality(String))) ENGINE = Memory"); + client.Insert("array_lc", block); + + client.Select("SELECT * FROM array_lc", [&](const Block& bl) { + for (size_t c = 0; c < bl.GetRowCount(); ++c) { + auto col = bl[0]->As()->GetAsColumn(c); + for (size_t i = 0; i < col->Size(); ++i) { + if (auto string_column = col->As()) { + const auto string = string_column->At(i); + ASSERT_EQ(testData[c][i], string); + } else if (auto lc_string_column = col->As>()) { + const auto string = lc_string_column->At(i); + ASSERT_EQ(testData[c][i], string); + } else { + FAIL() << "Unexpected column type: " << col->Type()->GetName(); + } + } + } + }); +} diff --git a/ut/client_ut.cpp b/ut/client_ut.cpp index 9a8be221..5cc1b81a 100644 --- a/ut/client_ut.cpp +++ b/ut/client_ut.cpp @@ -3,53 +3,15 @@ #include "readonly_client_test.h" #include "connection_failed_client_test.h" #include "utils.h" +#include "roundtrip_column.h" #include -#include #include #include using namespace clickhouse; -namespace clickhouse { -std::ostream & operator<<(std::ostream & ostr, const ServerInfo & server_info) { - return ostr << server_info.name << "/" << server_info.display_name - << " ver " - << server_info.version_major << "." - << server_info.version_minor << "." - << server_info.version_patch - << " (" << server_info.revision << ")"; -} -} - -namespace { - -uint64_t versionNumber( - uint64_t version_major, - uint64_t version_minor, - uint64_t version_patch = 0, - uint64_t revision = 0) { - - // in this case version_major can be up to 1000 - static auto revision_decimal_places = 8; - static auto patch_decimal_places = 4; - static auto minor_decimal_places = 4; - - auto const result = version_major * static_cast(std::pow(10, minor_decimal_places + patch_decimal_places + revision_decimal_places)) - + version_minor * static_cast(std::pow(10, patch_decimal_places + revision_decimal_places)) - + version_patch * static_cast(std::pow(10, revision_decimal_places)) - + revision; - - return result; -} - -uint64_t versionNumber(const ServerInfo & server_info) { - return versionNumber(server_info.version_major, server_info.version_minor, server_info.version_patch, server_info.revision); -} - -} - // Use value-parameterized tests to run same tests with different client // options. class ClientCase : public testing::TestWithParam { @@ -58,7 +20,9 @@ class ClientCase : public testing::TestWithParam { client_ = std::make_unique(GetParam()); } - void TearDown() override {} + void TearDown() override { + client_.reset(); + } template std::shared_ptr createTableWithOneColumn(Block & block) @@ -913,6 +877,139 @@ TEST_P(ClientCase, Query_ID) { EXPECT_EQ(5u, total_count); } +// Spontaneosly fails on INSERTint data. +TEST_P(ClientCase, DISABLED_ArrayArrayUInt64) { + // Based on https://github.com/ClickHouse/clickhouse-cpp/issues/43 + std::cerr << "Connected to: " << client_->GetServerInfo() << std::endl; + std::cerr << "DROPPING TABLE" << std::endl; + client_->Execute("DROP TEMPORARY TABLE IF EXISTS multiarray"); + + std::cerr << "CREATING TABLE" << std::endl; + client_->Execute(Query(R"sql(CREATE TEMPORARY TABLE IF NOT EXISTS multiarray + ( + `arr` Array(Array(UInt64)) + ); +)sql")); + + std::cerr << "INSERTING VALUES" << std::endl; + client_->Execute(Query(R"sql(INSERT INTO multiarray VALUES ([[0,1,2,3,4,5], [100, 200], [10,20, 50, 70]]), ([[456, 789], [1011, 1213], [], [14]]), ([[]]);)sql")); + std::cerr << "INSERTED" << std::endl; + + auto result = std::make_shared(std::make_shared(std::make_shared())); + ASSERT_EQ(0u, result->Size()); + + std::cerr << "SELECTING VALUES" << std::endl; + client_->Select("SELECT arr FROM multiarray", [&result](const Block& block) { + std::cerr << "GOT BLOCK: " << block.GetRowCount() << std::endl; + if (block.GetRowCount() == 0) + return; + + result->Append(block[0]); + }); + + std::cerr << "DONE SELECTING VALUES" << std::endl; + client_.reset(); + + ASSERT_EQ(3u, result->Size()); + { + // ([[0,1,2,3,4,5], [100, 200], [10,20, 50, 70]]) + const std::vector> expected_vals = { + {0, 1, 2, 3, 4, 5}, + {100, 200}, + {10, 20, 50, 70} + }; + + auto row = result->GetAsColumnTyped(0); + ASSERT_EQ(3u, row->Size()); + EXPECT_TRUE(CompareRecursive(expected_vals[0], *row->GetAsColumnTyped(0))); + EXPECT_TRUE(CompareRecursive(expected_vals[1], *row->GetAsColumnTyped(1))); + EXPECT_TRUE(CompareRecursive(expected_vals[2], *row->GetAsColumnTyped(2))); + } + + { + // ([[456, 789], [1011, 1213], [], [14]]) + const std::vector> expected_vals = { + {456, 789}, + {1011, 1213}, + {}, + {14} + }; + + auto row = result->GetAsColumnTyped(1); + ASSERT_EQ(4u, row->Size()); + EXPECT_TRUE(CompareRecursive(expected_vals[0], *row->GetAsColumnTyped(0))); + EXPECT_TRUE(CompareRecursive(expected_vals[1], *row->GetAsColumnTyped(1))); + EXPECT_TRUE(CompareRecursive(expected_vals[2], *row->GetAsColumnTyped(2))); + EXPECT_TRUE(CompareRecursive(expected_vals[3], *row->GetAsColumnTyped(3))); + } + + { + // ([[]]) + auto row = result->GetAsColumnTyped(2); + ASSERT_EQ(1u, row->Size()); + EXPECT_TRUE(CompareRecursive(std::vector{}, *row->GetAsColumnTyped(0))); + } +} + +TEST_P(ClientCase, RoundtripArrayTUint64) { + auto array = std::make_shared>(); + array->Append({0, 1, 2}); + + auto result = RoundtripColumnValues(*client_, array)->AsStrict(); + auto row = result->GetAsColumn(0)->As(); + + EXPECT_EQ(0u, row->At(0)); + EXPECT_EQ(1u, (*row)[1]); + EXPECT_EQ(2u, (*row)[2]); +} + +TEST_P(ClientCase, RoundtripArrayTArrayTUint64) { + const std::vector> row_values = { + {1, 2, 3}, + {4, 5, 6}, + {7, 8, 9, 10} + }; + + auto array = std::make_shared>>(); + array->Append(row_values); + + auto result_typed = ColumnArrayT>::Wrap(RoundtripColumnValues(*client_, array)); + EXPECT_TRUE(CompareRecursive(*array, *result_typed)); +} + +TEST_P(ClientCase, RoundtripArrayTArrayTArrayTUint64) { + using ColumnType = ColumnArrayT>>; + const std::vector>> row_values = { + {{1, 2, 3}, {3, 2, 1}}, + {{4, 5, 6}, {6, 5, 4}}, + {{7, 8, 9, 10}, {}}, + {{}, {10, 9, 8, 7}} + }; + + auto array = std::make_shared(); + array->Append(row_values); + + auto result_typed = ColumnType::Wrap(RoundtripColumnValues(*client_, array)); + EXPECT_TRUE(CompareRecursive(*array, *result_typed)); +} + + +TEST_P(ClientCase, RoundtripArrayTFixedString) { + auto array = std::make_shared>(6); + array->Append({"hello", "world"}); + + auto result_typed = ColumnArrayT::Wrap(RoundtripColumnValues(*client_, array)); + EXPECT_TRUE(CompareRecursive(*array, *result_typed)); +} + +TEST_P(ClientCase, RoundtripArrayTString) { + auto array = std::make_shared>(); + array->Append({"hello", "world"}); + + auto result_typed = ColumnArrayT::Wrap(RoundtripColumnValues(*client_, array)); + EXPECT_TRUE(CompareRecursive(*array, *result_typed)); +} + const auto LocalHostEndpoint = ClientOptions() .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) diff --git a/ut/column_array_ut.cpp b/ut/column_array_ut.cpp new file mode 100644 index 00000000..001cc62e --- /dev/null +++ b/ut/column_array_ut.cpp @@ -0,0 +1,315 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "utils.h" + +#include +#include +#include + +namespace { +using namespace clickhouse; + +template +std::shared_ptr Create2DArray(const ValuesContainer& values) { + auto result = std::make_shared(std::make_shared(std::make_shared())); + for (size_t i = 0; i < values.size(); ++i) { + auto array_col = std::make_shared(std::make_shared()); + for (size_t j = 0; j < values[i].size(); ++j) + array_col->AppendAsColumn(std::make_shared(values[i][j])); + + result->AppendAsColumn(array_col); + } + + return result; +} + +template +std::shared_ptr CreateArray(const ValuesContainer& values) { + auto result = std::make_shared(std::make_shared()); + for (size_t i = 0; i < values.size(); ++i) { + result->AppendAsColumn(std::make_shared(values[i])); + } + + return result; +} + +} + +TEST(ColumnArray, Append) { + auto arr1 = std::make_shared(std::make_shared()); + auto arr2 = std::make_shared(std::make_shared()); + + auto id = std::make_shared(); + id->Append(1); + arr1->AppendAsColumn(id); + + id->Append(3); + arr2->AppendAsColumn(id); + + arr1->Append(arr2); + + auto col = arr1->GetAsColumn(1); + + ASSERT_EQ(arr1->Size(), 2u); + ASSERT_EQ(col->As()->At(0), 1u); + ASSERT_EQ(col->As()->At(1), 3u); +} + +TEST(ColumnArray, ArrayOfDecimal) { + auto column = std::make_shared(18, 10); + auto array = std::make_shared(column->CloneEmpty()); + + column->Append("1"); + column->Append("2"); + EXPECT_EQ(2u, column->Size()); + + array->AppendAsColumn(column); + ASSERT_EQ(1u, array->Size()); + EXPECT_EQ(2u, array->GetAsColumn(0)->Size()); +} + +TEST(ColumnArray, GetAsColumn) { + // Verify that result of GetAsColumn + // - is of proper type + // - has expected length + // - values match ones predefined ones + + const std::vector> values = { + {1u, 2u, 3u}, + {4u, 5u, 6u, 7u, 8u, 9u}, + {0u}, + {}, + {13, 14} + }; + + auto array = CreateArray(values); + ASSERT_EQ(values.size(), array->Size()); + + for (size_t i = 0; i < values.size(); ++i) { + auto row = array->GetAsColumn(i); + std::shared_ptr typed_row; + + EXPECT_NO_THROW(typed_row = row->As()); + EXPECT_TRUE(CompareRecursive(values[i], *typed_row)); + } + + EXPECT_THROW(array->GetAsColumn(array->Size()), ValidationError); + EXPECT_THROW(array->GetAsColumn(array->Size() + 1), ValidationError); +} + +TEST(ColumnArray, Slice) { + const std::vector> values = { + {1u, 2u, 3u}, + {4u, 5u, 6u, 7u, 8u, 9u}, + {0u}, + {}, + {13, 14, 15} + }; + + std::shared_ptr untyped_array = CreateArray(values); + + for (size_t i = 0; i < values.size() - 1; ++i) { + auto slice = untyped_array->Slice(i, 1)->AsStrict(); + EXPECT_EQ(1u, slice->Size()); + EXPECT_TRUE(CompareRecursive(values[i], *slice->GetAsColumnTyped(0))); + } + + EXPECT_EQ(0u, untyped_array->Slice(0, 0)->Size()); + EXPECT_ANY_THROW(untyped_array->Slice(values.size(), 1)); + EXPECT_ANY_THROW(untyped_array->Slice(0, values.size() + 1)); +} + +TEST(ColumnArray, Slice_2D) { + // Verify that ColumnArray::Slice on 2D Array produces a 2D Array of proper type, size and contents. + // Also check that slices can be of any size. + const std::vector>> values = { + {{1u, 2u}, {3u}}, + {{4u}, {5u, 6u, 7u}, {8u, 9u}, {}}, + {{0u}}, + {{}}, + {{13}, {14, 15}} + }; + + std::shared_ptr untyped_array = Create2DArray(values); + for (size_t i = 0; i < values.size() - 1; ++i) { + for (size_t slice_size = 0; slice_size < values.size() - i; ++slice_size) { + auto slice = untyped_array->Slice(i, slice_size)->AsStrict(); + EXPECT_EQ(slice_size, slice->Size()); + + for (size_t slice_row = 0; slice_row < slice_size; ++slice_row) { + SCOPED_TRACE(::testing::Message() << "i: " << i << " slice_size:" << slice_size << " row:" << slice_row); + auto val = slice->GetAsColumnTyped(slice_row); + ASSERT_EQ(values[i + slice_row].size(), val->Size()); + + for (size_t j = 0; j < values[i + slice_row].size(); ++j) { + ASSERT_TRUE(CompareRecursive(values[i + slice_row][j], *val->GetAsColumnTyped(j))); + } + } + } + } +} + + +template +auto AppendRowAndTest(ArrayTSpecialization& array, const RowValuesContainer& values) { + const size_t prev_size = array.Size(); + + array.Append(values); + EXPECT_EQ(prev_size + 1u, array.Size()); + + EXPECT_TRUE(CompareRecursive(values, array.At(prev_size))); + EXPECT_TRUE(CompareRecursive(values, array[prev_size])); + + // Check that both subscript and At() work properly. + const auto & new_row = array.At(prev_size); + for (size_t i = 0; i < values.size(); ++i) { + EXPECT_TRUE(CompareRecursive(*(values.begin() + i), new_row[i])) + << " at pos: " << i; + EXPECT_TRUE(CompareRecursive(*(values.begin() + i), new_row.At(i))) + << " at pos: " << i; + } + EXPECT_THROW(new_row.At(new_row.size() + 1), clickhouse::ValidationError); +}; + +template +auto CreateAndTestColumnArrayT(const AllValuesContainer& all_values) { + auto array = std::make_shared>(); + + for (const auto & row : all_values) { + EXPECT_NO_FATAL_FAILURE(AppendRowAndTest(*array, row)); + } + EXPECT_TRUE(CompareRecursive(all_values, *array)); + EXPECT_THROW(array->At(array->Size() + 1), clickhouse::ValidationError); + + return array; +} + +TEST(ColumnArrayT, SimpleUInt64) { + auto array = std::make_shared>(); + array->Append({0, 1, 2}); + + ASSERT_EQ(1u, array->Size()); + EXPECT_EQ(0u, array->At(0).At(0)); + EXPECT_EQ(1u, (*array)[0][1]); + + EXPECT_THROW(array->At(2), ValidationError); + EXPECT_THROW(array->At(0).At(3), ValidationError); + EXPECT_THROW((*array)[0].At(3), ValidationError); +} + +TEST(ColumnArrayT, SimpleFixedString) { + using namespace std::literals; + auto array = std::make_shared>(6); + array->Append({"hello", "world"}); + + // Additional \0 since strings are padded from right with zeros in FixedString(6). + EXPECT_EQ("hello\0"sv, array->At(0).At(0)); + + auto row = array->At(0); + EXPECT_EQ("hello\0"sv, row.At(0)); + EXPECT_EQ(6u, row[0].length()); + EXPECT_EQ("hello", row[0].substr(0, 5)); + + EXPECT_EQ("world\0"sv, (*array)[0][1]); +} + +TEST(ColumnArrayT, SimpleUInt64_2D) { + // Nested 2D-arrays are supported too: + auto array = std::make_shared>>(); + array->Append(std::vector>{{0}, {1, 1}, {2, 2, 2}}); + + ASSERT_EQ(1u, array->Size()); + EXPECT_EQ(0u, array->At(0).At(0).At(0)); + EXPECT_EQ(1u, (*array)[0][1][1]); + + EXPECT_THROW(array->At(2), ValidationError); +} + +TEST(ColumnArrayT, UInt64) { + // Check inserting\reading back data from clickhouse::ColumnArrayT + + const std::vector> values = { + {1u, 2u, 3u}, + {4u, 5u, 6u, 7u, 8u, 9u}, + {0u}, + {}, + {13, 14} + }; + CreateAndTestColumnArrayT(values); +} + +TEST(ColumnArrayT, UInt64_2D) { + // Check inserting\reading back data from 2D array: ColumnArrayT> + + const std::vector>> values = { + {{1u, 2u, 3u}, {4u, 5u, 6u}}, + {{4u, 5u, 6u}, {7u, 8u, 9u}, {10u, 11u}}, + {{}, {}}, + {}, + {{13, 14}, {}} + }; + + auto array_ptr = CreateAndTestColumnArrayT>(values); + const auto & array = *array_ptr; + + { + EXPECT_EQ(0u, array[3].size()); + + // operator[] doesn't check bounds. + // On empty rows attempt to access out-of-bound elements + // would actually cause access to the elements of the next row. + // hence non-0 value of `array[3][0].size()`, + // it is effectively the same as `array[3 + 1][0].size()` + EXPECT_EQ(2u, array[3][0].size()); + EXPECT_EQ(14u, array[3][0][1]); + EXPECT_EQ(0u, array[3][1].size()); + } +} + +TEST(ColumnArrayT, Wrap_UInt64) { + // Check that ColumnArrayT can wrap a pre-existing ColumnArray. + + const std::vector> values = { + {1u, 2u, 3u}, + {4u, 5u, 6u, 7u, 8u, 9u}, + {0u}, + {}, + {13, 14} + }; + + auto wrapped_array = ColumnArrayT::Wrap(CreateArray(values)); + const auto & array = *wrapped_array; + + EXPECT_TRUE(CompareRecursive(values, array)); +} + +TEST(ColumnArrayT, Wrap_UInt64_2D) { + // Check that ColumnArrayT can wrap a pre-existing ColumnArray. + + const std::vector>> values = { + {{1u, 2u}, {3u}}, + {{4u}, {5u, 6u, 7u}, {8u, 9u}, {}}, + {{0u}}, + {{}}, + {{13}, {14, 15}} + }; + + auto wrapped_array = ColumnArrayT>::Wrap(Create2DArray(values)); + const auto & array = *wrapped_array; + + EXPECT_TRUE(CompareRecursive(values, array)); +} diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 28c6f88d..9a806e39 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -16,149 +16,24 @@ #include #include "utils.h" +#include "value_generators.h" #include #include - - -// only compare PODs of equal size this way -template , std::is_pod>>> -bool operator==(const L & left, const R& right) { - return memcmp(&left, &right, sizeof(left)) == 0; -} - -bool operator==(const in6_addr & left, const std::string_view & right) { - return right.size() == sizeof(left) && memcmp(&left, right.data(), sizeof(left)) == 0; -} - -bool operator==(const std::string_view & left, const in6_addr & right) { - return left.size() == sizeof(right) && memcmp(left.data(), &right, sizeof(right)) == 0; -} +#include +#include namespace { using namespace clickhouse; using namespace std::literals::string_view_literals; -in_addr MakeIPv4(uint32_t ip) { - static_assert(sizeof(in_addr) == sizeof(ip)); - in_addr result; - memcpy(&result, &ip, sizeof(ip)); - - return result; -} - -in6_addr MakeIPv6(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, - uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, - uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) { - return in6_addr{{{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}}}; -} - -in6_addr MakeIPv6(uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) { - return in6_addr{{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, v10, v11, v12, v13, v14, v15}}}; -} - -static std::vector MakeNumbers() { - return std::vector - {1, 2, 3, 7, 11, 13, 17, 19, 23, 29, 31}; -} - -static std::vector MakeBools() { - return std::vector - {1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0}; -} - -static std::vector MakeFixedStrings() { - return std::vector - {"aaa", "bbb", "ccc", "ddd"}; -} - -static std::vector MakeStrings() { - return std::vector - {"a", "ab", "abc", "abcd"}; -} - -static std::vector MakeUUIDs() { - return std::vector - {0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu, - 0x84b9f24bc26b49c6llu, 0xa03b4ab723341951llu, - 0x3507213c178649f9llu, 0x9faf035d662f60aellu}; -} - static const auto LOWCARDINALITY_STRING_FOOBAR_10_ITEMS_BINARY = "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00" "\x09\x00\x00\x00\x00\x00\x00\x00\x00\x06\x46\x6f\x6f\x42\x61\x72" "\x01\x31\x01\x32\x03\x46\x6f\x6f\x01\x34\x03\x42\x61\x72\x01\x37" "\x01\x38\x0a\x00\x00\x00\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06" "\x04\x07\x08\x04"sv; - -template -auto GenerateVector(size_t items, Generator && gen) { - std::vector> result; - result.reserve(items); - for (size_t i = 0; i < items; ++i) { - result.push_back(std::move(gen(i))); - } - - return result; -} - -std::string FooBarSeq(size_t i) { - std::string result; - if (i % 3 == 0) - result += "Foo"; - if (i % 5 == 0) - result += "Bar"; - if (result.empty()) - result = std::to_string(i); - - return result; -} - -template -auto SameValueSeq(const U & value) { - return [&value](size_t) -> T { - return value; - }; -} - -template -auto AlternateGenerators(Generator1 && gen1, Generator2 && gen2) { - return [&gen1, &gen2](size_t i) -> ResultType { - if (i % 2 == 0) - return gen1(i/2); - else - return gen2(i/2); - }; -} - -template -std::vector ConcatSequences(std::vector && vec1, std::vector && vec2) { - std::vector result(vec1); - - result.reserve(vec1.size() + vec2.size()); - result.insert(result.end(), vec2.begin(), vec2.end()); - - return result; -} - -static std::vector MakeDateTime64s() { - static const auto seconds_multiplier = 1'000'000; - static const auto year = 86400ull * 365 * seconds_multiplier; // ~approx, but this doesn't matter here. - - // Approximatelly +/- 200 years around epoch (and value of epoch itself) - // with non zero seconds and sub-seconds. - // Please note there are values outside of DateTime (32-bit) range that might - // not have correct string representation in CH yet, - // but still are supported as Int64 values. - return GenerateVector(200, - [] (size_t i )-> Int64 { - return (i - 100) * year * 2 + (i * 10) * seconds_multiplier + i; - }); -} - } // TODO: add tests for ColumnDecimal. @@ -184,7 +59,7 @@ TEST(ColumnsCase, NumericSlice) { TEST(ColumnsCase, FixedStringInit) { - const auto column_data = MakeFixedStrings(); + const auto column_data = MakeFixedStrings(3); auto col = std::make_shared(3, column_data); ASSERT_EQ(col->Size(), column_data.size()); @@ -201,7 +76,7 @@ TEST(ColumnsCase, FixedString_Append_SmallStrings) { // are padded with zeroes on insertion. const size_t string_size = 7; - const auto column_data = MakeFixedStrings(); + const auto column_data = MakeFixedStrings(3); auto col = std::make_shared(string_size); size_t i = 0; @@ -237,26 +112,6 @@ TEST(ColumnsCase, StringInit) { } -TEST(ColumnsCase, ArrayAppend) { - auto arr1 = std::make_shared(std::make_shared()); - auto arr2 = std::make_shared(std::make_shared()); - - auto id = std::make_shared(); - id->Append(1); - arr1->AppendAsColumn(id); - - id->Append(3); - arr2->AppendAsColumn(id); - - arr1->Append(arr2); - - auto col = arr1->GetAsColumn(1); - - ASSERT_EQ(arr1->Size(), 2u); - ASSERT_EQ(col->As()->At(0), 1u); - ASSERT_EQ(col->As()->At(1), 3u); -} - TEST(ColumnsCase, TupleAppend){ auto tuple1 = std::make_shared(std::vector({ std::make_shared(), @@ -324,7 +179,7 @@ TEST(ColumnsCase, DateTime64_6) { TEST(ColumnsCase, DateTime64_Append_At) { auto column = std::make_shared(6ul); - const auto data = MakeDateTime64s(); + const auto data = MakeDateTime64s(6ul); for (const auto & v : data) { column->Append(v); } @@ -342,7 +197,7 @@ TEST(ColumnsCase, DateTime64_Clear) { ASSERT_NO_THROW(column->Clear()); ASSERT_EQ(0u, column->Size()); - const auto data = MakeDateTime64s(); + const auto data = MakeDateTime64s(6ul); for (const auto & v : data) { column->Append(v); } @@ -354,7 +209,7 @@ TEST(ColumnsCase, DateTime64_Clear) { TEST(ColumnsCase, DateTime64_Swap) { auto column = std::make_shared(6ul); - const auto data = MakeDateTime64s(); + const auto data = MakeDateTime64s(6ul); for (const auto & v : data) { column->Append(v); } @@ -379,12 +234,12 @@ TEST(ColumnsCase, DateTime64_Slice) { { // Empty slice on empty column - auto slice = column->Slice(0, 0)->As(); + auto slice = column->CloneEmpty()->As(); ASSERT_EQ(0u, slice->Size()); ASSERT_EQ(column->GetPrecision(), slice->GetPrecision()); } - const auto data = MakeDateTime64s(); + const auto data = MakeDateTime64s(6ul); const size_t size = data.size(); ASSERT_GT(size, 4u); // so the partial slice below has half of the elements of the column @@ -394,7 +249,7 @@ TEST(ColumnsCase, DateTime64_Slice) { { // Empty slice on non-empty column - auto slice = column->Slice(0, 0)->As(); + auto slice = column->CloneEmpty()->As(); ASSERT_EQ(0u, slice->Size()); ASSERT_EQ(column->GetPrecision(), slice->GetPrecision()); } @@ -433,7 +288,7 @@ TEST(ColumnsCase, DateTime64_Slice_OUTOFBAND) { // Non-Empty slice on empty column EXPECT_EQ(0u, column->Slice(0, 10)->Size()); - const auto data = MakeDateTime64s(); + const auto data = MakeDateTime64s(6ul); for (const auto & v : data) { column->Append(v); } @@ -459,14 +314,6 @@ TEST(ColumnsCase, Date2038) { ASSERT_EQ(largeDate, col1->At(0)); } -TEST(ColumnsCase, DateTime) { - ASSERT_NE(nullptr, CreateColumnByType("DateTime")); - ASSERT_NE(nullptr, CreateColumnByType("DateTime('Europe/Moscow')")); - - ASSERT_EQ(CreateColumnByType("DateTime('UTC')")->As()->Timezone(), "UTC"); - ASSERT_EQ(CreateColumnByType("DateTime64(3, 'UTC')")->As()->Timezone(), "UTC"); -} - TEST(ColumnsCase, EnumTest) { std::vector enum_items = {{"Hi", 1}, {"Hello", 2}}; @@ -504,8 +351,17 @@ TEST(ColumnsCase, NullableSlice) { ASSERT_EQ(subData->At(3), 17u); } +// internal representation of UUID data in ColumnUUID +std::vector MakeUUID_data() { + return { + 0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu, + 0x84b9f24bc26b49c6llu, 0xa03b4ab723341951llu, + 0x3507213c178649f9llu, 0x9faf035d662f60aellu + }; +} + TEST(ColumnsCase, UUIDInit) { - auto col = std::make_shared(std::make_shared(MakeUUIDs())); + auto col = std::make_shared(std::make_shared(MakeUUID_data())); ASSERT_EQ(col->Size(), 3u); ASSERT_EQ(col->At(0), UInt128(0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu)); @@ -513,7 +369,7 @@ TEST(ColumnsCase, UUIDInit) { } TEST(ColumnsCase, UUIDSlice) { - auto col = std::make_shared(std::make_shared(MakeUUIDs())); + auto col = std::make_shared(std::make_shared(MakeUUID_data())); auto sub = col->Slice(1, 2)->As(); ASSERT_EQ(sub->Size(), 2u); @@ -765,7 +621,7 @@ TEST(ColumnsCase, ColumnDecimal128_from_string_overflow) { TEST(ColumnsCase, ColumnLowCardinalityString_Append_and_Read) { const size_t items_count = 11; ColumnLowCardinalityT col; - for (const auto & item : GenerateVector(items_count, &FooBarSeq)) { + for (const auto & item : GenerateVector(items_count, &FooBarGenerator)) { col.Append(item); } @@ -773,15 +629,15 @@ TEST(ColumnsCase, ColumnLowCardinalityString_Append_and_Read) { ASSERT_EQ(col.GetDictionarySize(), 8u + 1); // 8 unique items from sequence + 1 null-item for (size_t i = 0; i < items_count; ++i) { - ASSERT_EQ(col.At(i), FooBarSeq(i)) << " at pos: " << i; - ASSERT_EQ(col[i], FooBarSeq(i)) << " at pos: " << i; + ASSERT_EQ(col.At(i), FooBarGenerator(i)) << " at pos: " << i; + ASSERT_EQ(col[i], FooBarGenerator(i)) << " at pos: " << i; } } TEST(ColumnsCase, ColumnLowCardinalityString_Clear_and_Append) { const size_t items_count = 11; ColumnLowCardinalityT col; - for (const auto & item : GenerateVector(items_count, &FooBarSeq)) + for (const auto & item : GenerateVector(items_count, &FooBarGenerator)) { col.Append(item); } @@ -790,7 +646,7 @@ TEST(ColumnsCase, ColumnLowCardinalityString_Clear_and_Append) { ASSERT_EQ(col.Size(), 0u); ASSERT_EQ(col.GetDictionarySize(), 1u); // null-item - for (const auto & item : GenerateVector(items_count, &FooBarSeq)) + for (const auto & item : GenerateVector(items_count, &FooBarGenerator)) { col.Append(item); } @@ -809,7 +665,7 @@ TEST(ColumnsCase, ColumnLowCardinalityString_Load) { ASSERT_TRUE(col.Load(&buffer, items_count)); for (size_t i = 0; i < items_count; ++i) { - EXPECT_EQ(col.At(i), FooBarSeq(i)) << " at pos: " << i; + EXPECT_EQ(col.At(i), FooBarGenerator(i)) << " at pos: " << i; } } @@ -818,7 +674,7 @@ TEST(ColumnsCase, ColumnLowCardinalityString_Load) { TEST(ColumnsCase, DISABLED_ColumnLowCardinalityString_Save) { const size_t items_count = 10; ColumnLowCardinalityT col; - for (const auto & item : GenerateVector(items_count, &FooBarSeq)) { + for (const auto & item : GenerateVector(items_count, &FooBarGenerator)) { col.Append(item); } @@ -855,7 +711,7 @@ TEST(ColumnsCase, ColumnLowCardinalityString_SaveAndLoad) { // Verify that we can load binary representation back ColumnLowCardinalityT col; - const auto items = GenerateVector(10, &FooBarSeq); + const auto items = GenerateVector(10, &FooBarGenerator); for (const auto & item : items) { col.Append(item); } @@ -882,7 +738,7 @@ TEST(ColumnsCase, ColumnLowCardinalityString_SaveAndLoad) { TEST(ColumnsCase, ColumnLowCardinalityString_WithEmptyString_1) { // Verify that when empty string is added to a LC column it can be retrieved back as empty string. ColumnLowCardinalityT col; - const auto values = GenerateVector(10, AlternateGenerators(SameValueSeq(""), FooBarSeq)); + const auto values = GenerateVector(10, AlternateGenerators(SameValueGenerator(""), FooBarGenerator)); for (const auto & item : values) { col.Append(item); } @@ -896,7 +752,7 @@ TEST(ColumnsCase, ColumnLowCardinalityString_WithEmptyString_2) { // Verify that when empty string is added to a LC column it can be retrieved back as empty string. // (Ver2): Make sure that outcome doesn't depend if empty values are on odd positions ColumnLowCardinalityT col; - const auto values = GenerateVector(10, AlternateGenerators(FooBarSeq, SameValueSeq(""))); + const auto values = GenerateVector(10, AlternateGenerators(FooBarGenerator, SameValueGenerator(""))); for (const auto & item : values) { col.Append(item); } @@ -909,7 +765,7 @@ TEST(ColumnsCase, ColumnLowCardinalityString_WithEmptyString_2) { TEST(ColumnsCase, ColumnLowCardinalityString_WithEmptyString_3) { // When we have many leading empty strings and some non-empty values. ColumnLowCardinalityT col; - const auto values = ConcatSequences(GenerateVector(100, SameValueSeq("")), GenerateVector(5, FooBarSeq)); + const auto values = ConcatSequences(GenerateVector(100, SameValueGenerator("")), GenerateVector(5, FooBarGenerator)); for (const auto & item : values) { col.Append(item); } @@ -919,83 +775,3 @@ TEST(ColumnsCase, ColumnLowCardinalityString_WithEmptyString_3) { } } -TEST(ColumnsCase, CreateSimpleAggregateFunction) { - auto col = CreateColumnByType("SimpleAggregateFunction(funt, Int32)"); - - ASSERT_EQ("Int32", col->Type()->GetName()); - ASSERT_EQ(Type::Int32, col->Type()->GetCode()); - ASSERT_NE(nullptr, col->As()); -} - - -TEST(ColumnsCase, UnmatchedBrackets) { - // When type string has unmatched brackets, CreateColumnByType must return nullptr. - ASSERT_EQ(nullptr, CreateColumnByType("FixedString(10")); - ASSERT_EQ(nullptr, CreateColumnByType("Nullable(FixedString(10000")); - ASSERT_EQ(nullptr, CreateColumnByType("Nullable(FixedString(10000)")); - ASSERT_EQ(nullptr, CreateColumnByType("LowCardinality(Nullable(FixedString(10000")); - ASSERT_EQ(nullptr, CreateColumnByType("LowCardinality(Nullable(FixedString(10000)")); - ASSERT_EQ(nullptr, CreateColumnByType("LowCardinality(Nullable(FixedString(10000))")); - ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000")); - ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000)")); - ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000))")); - ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000)))")); -} - -TEST(ColumnsCase, LowCardinalityAsWrappedColumn) { - CreateColumnByTypeSettings create_column_settings; - create_column_settings.low_cardinality_as_wrapped_column = true; - - ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->GetType().GetCode()); - ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->As()->GetType().GetCode()); - - ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->GetType().GetCode()); - ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->As()->GetType().GetCode()); -} - -TEST(ColumnsCase, ArrayOfDecimal) { - auto column = std::make_shared(18, 10); - auto array = std::make_shared(column->Slice(0, 0)); - - column->Append("1"); - column->Append("2"); - EXPECT_EQ(2u, column->Size()); - - array->AppendAsColumn(column); - ASSERT_EQ(1u, array->Size()); - EXPECT_EQ(2u, array->GetAsColumn(0)->Size()); -} - - -class ColumnsCaseWithName : public ::testing::TestWithParam -{}; - -TEST_P(ColumnsCaseWithName, CreateColumnByType) -{ - const auto col = CreateColumnByType(GetParam()); - ASSERT_NE(nullptr, col); - EXPECT_EQ(col->GetType().GetName(), GetParam()); -} - -INSTANTIATE_TEST_SUITE_P(Basic, ColumnsCaseWithName, ::testing::Values( - "Int8", "Int16", "Int32", "Int64", - "UInt8", "UInt16", "UInt32", "UInt64", - "String", "Date", "DateTime", - "UUID", "Int128" -)); - -INSTANTIATE_TEST_SUITE_P(Parametrized, ColumnsCaseWithName, ::testing::Values( - "FixedString(0)", "FixedString(10000)", - "DateTime('UTC')", "DateTime64(3, 'UTC')", - "Decimal(9,3)", "Decimal(18,3)", - "Enum8('ONE' = 1, 'TWO' = 2)", - "Enum16('ONE' = 1, 'TWO' = 2, 'THREE' = 3, 'FOUR' = 4)" -)); - - -INSTANTIATE_TEST_SUITE_P(Nested, ColumnsCaseWithName, ::testing::Values( - "Nullable(FixedString(10000))", - "Nullable(LowCardinality(FixedString(10000)))", - "Array(Nullable(LowCardinality(FixedString(10000))))", - "Array(Enum8('ONE' = 1, 'TWO' = 2))" -)); diff --git a/ut/itemview_ut.cpp b/ut/itemview_ut.cpp index 6f174070..40da5027 100644 --- a/ut/itemview_ut.cpp +++ b/ut/itemview_ut.cpp @@ -13,7 +13,8 @@ using namespace clickhouse; } TEST(ItemView, StorableTypes) { -/// Validate that is it possible to store proper value of a proper type into a ItemView. +/// Validate that is it possible to store proper value of a proper type into a ItemView +/// and get it back with corresponding ItemView::get #define TEST_ITEMVIEW_TYPE_VALUE(TypeCode, NativeType, NativeValue) \ EXPECT_EQ(static_cast(NativeValue), ItemView(TypeCode, static_cast(NativeValue)).get()) \ @@ -49,7 +50,12 @@ TEST(ItemView, StorableTypes) { TEST_ITEMVIEW_TYPE_VALUES(Type::Code::DateTime, uint32_t); TEST_ITEMVIEW_TYPE_VALUES(Type::Code::DateTime64, int64_t); + TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal, int32_t); + TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal, int64_t); TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal, Int128); + TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal, uint32_t); + TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal, uint64_t); +// TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal, UInt128); TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal32, int32_t); TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal64, int64_t); TEST_ITEMVIEW_TYPE_VALUES(Type::Code::Decimal128, Int128); @@ -64,122 +70,120 @@ TEST(ItemView, StorableTypes) { TEST_ITEMVIEW_TYPE_VALUE(Type::Code::FixedString, std::string_view, "here is a string"); } -#define TEST_ITEMVIEW_ERROR(TypeCode, NativeType) \ - EXPECT_ANY_THROW(ItemView(TypeCode, static_cast(0))) \ +#define EXPECT_ITEMVIEW_ERROR(TypeCode, NativeType) \ + EXPECT_THROW(ItemView(TypeCode, static_cast(0)), AssertionError) \ << " TypeCode:" << #TypeCode << " NativeType: " << #NativeType; TEST(ItemView, ErrorTypes) { // Types that is impossible to store certain Type::Code into an ItemView. - TEST_ITEMVIEW_ERROR(Type::Code::Array, int); - TEST_ITEMVIEW_ERROR(Type::Code::Nullable, int); - TEST_ITEMVIEW_ERROR(Type::Code::Tuple, int); - TEST_ITEMVIEW_ERROR(Type::Code::LowCardinality, int); + EXPECT_ITEMVIEW_ERROR(Type::Code::Array, int); + EXPECT_ITEMVIEW_ERROR(Type::Code::Nullable, int); + EXPECT_ITEMVIEW_ERROR(Type::Code::Tuple, int); + EXPECT_ITEMVIEW_ERROR(Type::Code::LowCardinality, int); } TEST(ItemView, TypeSizeMismatch) { // Validate that it is impossible to initialize ItemView with mismatching Type::Code and native value. - TEST_ITEMVIEW_ERROR(Type::Code::Int8, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int8, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int8, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int8, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Int16, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int16, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int16, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int16, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Int32, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int32, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int32, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int32, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Int64, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int64, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int64, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int64, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Int128, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int128, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int128, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Int128, int64_t); - - TEST_ITEMVIEW_ERROR(Type::Code::UInt8, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt8, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt8, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt8, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::UInt16, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt16, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt16, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt16, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::UInt32, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt32, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt32, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt32, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::UInt64, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt64, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt64, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::UInt64, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Float32, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Float32, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Float32, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::Float32, Int128); - TEST_ITEMVIEW_ERROR(Type::Code::Float32, double); - - TEST_ITEMVIEW_ERROR(Type::Code::Float64, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Float64, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Float64, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Float64, Int128); - TEST_ITEMVIEW_ERROR(Type::Code::Float64, float); - - TEST_ITEMVIEW_ERROR(Type::Code::Date, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Date, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Date, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::Date, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::DateTime, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::DateTime, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::DateTime, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::DateTime, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::DateTime64, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::DateTime64, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::DateTime64, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::DateTime64, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Decimal, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal, int64_t); - - TEST_ITEMVIEW_ERROR(Type::Code::Decimal32, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal32, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal32, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal32, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Decimal64, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal64, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal64, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal64, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Decimal128, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal128, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal128, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Decimal128, int64_t); - - TEST_ITEMVIEW_ERROR(Type::Code::Enum8, int16_t); - TEST_ITEMVIEW_ERROR(Type::Code::Enum8, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Enum8, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::Enum8, Int128); - - TEST_ITEMVIEW_ERROR(Type::Code::Enum16, int8_t); - TEST_ITEMVIEW_ERROR(Type::Code::Enum16, int32_t); - TEST_ITEMVIEW_ERROR(Type::Code::Enum16, int64_t); - TEST_ITEMVIEW_ERROR(Type::Code::Enum16, Int128); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int8, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int8, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int8, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int8, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Int16, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int16, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int16, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int16, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Int32, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int32, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int32, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int32, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Int64, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int64, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int64, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int64, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Int128, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int128, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int128, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Int128, int64_t); + + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt8, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt8, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt8, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt8, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt16, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt16, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt16, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt16, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt32, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt32, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt32, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt32, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt64, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt64, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt64, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::UInt64, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Float32, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Float32, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Float32, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Float32, Int128); + EXPECT_ITEMVIEW_ERROR(Type::Code::Float32, double); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Float64, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Float64, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Float64, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Float64, Int128); + EXPECT_ITEMVIEW_ERROR(Type::Code::Float64, float); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Date, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Date, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Date, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Date, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::DateTime, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::DateTime, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::DateTime, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::DateTime, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::DateTime64, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::DateTime64, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::DateTime64, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::DateTime64, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal, int8_t); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal32, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal32, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal32, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal32, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal64, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal64, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal64, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal64, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal128, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal128, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal128, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Decimal128, int64_t); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Enum8, int16_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Enum8, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Enum8, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Enum8, Int128); + + EXPECT_ITEMVIEW_ERROR(Type::Code::Enum16, int8_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Enum16, int32_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Enum16, int64_t); + EXPECT_ITEMVIEW_ERROR(Type::Code::Enum16, Int128); } TEST(ItemView, Int128_values) { diff --git a/ut/low_cardinality_nullable_tests.cpp b/ut/low_cardinality_nullable_tests.cpp new file mode 100644 index 00000000..41c0d3c5 --- /dev/null +++ b/ut/low_cardinality_nullable_tests.cpp @@ -0,0 +1,116 @@ +#include +#include +#include "clickhouse/columns/nullable.h" +#include "clickhouse/columns/lowcardinality.h" +#include "clickhouse/client.h" +#include "utils.h" +#include "clickhouse/base/wire_format.h" +#include + +namespace +{ +using namespace clickhouse; +} + +static const auto localHostEndpoint = ClientOptions() + .SetHost( getEnvOrDefault("CLICKHOUSE_HOST", "localhost")) + .SetPort( getEnvOrDefault("CLICKHOUSE_PORT", "9000")) + .SetUser( getEnvOrDefault("CLICKHOUSE_USER", "default")) + .SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", "")) + .SetDefaultDatabase(getEnvOrDefault("CLICKHOUSE_DB", "default")); + + +ColumnRef buildTestColumn(const std::vector& rowsData, const std::vector& nulls) { + auto stringColumn = std::make_shared(rowsData); + auto nullsColumn = std::make_shared(nulls); + auto lowCardinalityColumn = std::make_shared( + std::make_shared(stringColumn, nullsColumn) + ); + + return lowCardinalityColumn; +} + +void createTable(Client& client) { + client.Execute("DROP TEMPORARY TABLE IF EXISTS lc_of_nullable"); + client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS lc_of_nullable (words LowCardinality(Nullable(String))) ENGINE = Memory"); +} + +TEST(LowCardinalityOfNullable, InsertAndQuery) { + const auto rowsData = std::vector { + "eminem", + "", + "tupac", + "shady", + "fifty", + "dre", + "", + "cube" + }; + + const auto nulls = std::vector { + false, false, true, false, true, true, false, false + }; + + auto column = buildTestColumn(rowsData, nulls); + + Block block; + block.AppendColumn("words", column); + + Client client(ClientOptions(localHostEndpoint) + .SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn(false) + .SetPingBeforeQuery(true)); + + createTable(client); + + client.Insert("lc_of_nullable", block); + + client.Select("SELECT * FROM lc_of_nullable", [&](const Block& bl) { + for (size_t row = 0; row < bl.GetRowCount(); row++) { + auto lc_col = bl[0]->As(); + auto item = lc_col->GetItem(row); + + if (nulls[row]) { + ASSERT_EQ(Type::Code::Void, item.type); + } else { + ASSERT_EQ(rowsData[row], item.get()); + } + } + }); +} + +TEST(LowCardinalityOfNullable, InsertAndQueryEmpty) { + auto column = buildTestColumn({}, {}); + + Block block; + block.AppendColumn("words", column); + + Client client(ClientOptions(localHostEndpoint) + .SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn(false) + .SetPingBeforeQuery(true)); + + createTable(client); + + EXPECT_NO_THROW(client.Insert("lc_of_nullable", block)); + + client.Select("SELECT * FROM lc_of_nullable", [&](const Block& bl) { + ASSERT_EQ(bl.GetRowCount(), 0u); + }); +} + +TEST(LowCardinalityOfNullable, ThrowOnBackwardsCompatibleLCColumn) { + auto column = buildTestColumn({}, {}); + + Block block; + block.AppendColumn("words", column); + + Client client(ClientOptions(localHostEndpoint) + .SetPingBeforeQuery(true)); + + createTable(client); + + EXPECT_THROW(client.Insert("lc_of_nullable", block), UnimplementedError); + + client.Select("SELECT * FROM lc_of_nullable", [&](const Block& bl) { + ASSERT_EQ(bl.GetRowCount(), 0u); + }); +} \ No newline at end of file diff --git a/ut/performance_tests.cpp b/ut/performance_tests.cpp index d772644f..bafa07dd 100644 --- a/ut/performance_tests.cpp +++ b/ut/performance_tests.cpp @@ -15,6 +15,7 @@ #include #include "utils.h" +#include "utils_performance.h" using namespace clickhouse; @@ -84,9 +85,9 @@ TYPED_TEST_SUITE_P(ColumnPerformanceTest); // Turns out this is the easiest way to skip test with current version of gtest #ifndef NDEBUG -# define SKIP_IN_DEBUG_BUILDS() do { std::cerr << "Test skipped...\n"; return; } while(0) -#else # define SKIP_IN_DEBUG_BUILDS() (void)(0) +#else +# define SKIP_IN_DEBUG_BUILDS() GTEST_SKIP_("Test skipped for DEBUG build...") #endif TYPED_TEST_P(ColumnPerformanceTest, SaveAndLoad) { diff --git a/ut/roundtrip_column.cpp b/ut/roundtrip_column.cpp new file mode 100644 index 00000000..c4685a3b --- /dev/null +++ b/ut/roundtrip_column.cpp @@ -0,0 +1,40 @@ +#include "roundtrip_column.h" + +#include +#include + +#include + +namespace { +using namespace clickhouse; +} + +ColumnRef RoundtripColumnValues(Client& client, ColumnRef expected) { + // Create a temporary table with a single column + // insert values from `expected` + // select and aggregate all values from block into `result` column + auto result = expected->CloneEmpty(); + + const std::string type_name = result->GetType().GetName(); + client.Execute("DROP TEMPORARY TABLE IF EXISTS temporary_roundtrip_table;"); + client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (col " + type_name + ");"); + { + Block block; + block.AppendColumn("col", expected); + block.RefreshRowCount(); + client.Insert("temporary_roundtrip_table", block); + } + + client.Select("SELECT col FROM temporary_roundtrip_table", [&result](const Block& b) { + if (b.GetRowCount() == 0) + return; + + ASSERT_EQ(1u, b.GetColumnCount()); + result->Append(b[0]); + }); + + EXPECT_EQ(expected->GetType(), result->GetType()); + EXPECT_EQ(expected->Size(), result->Size()); + + return result; +} diff --git a/ut/roundtrip_column.h b/ut/roundtrip_column.h new file mode 100644 index 00000000..30097997 --- /dev/null +++ b/ut/roundtrip_column.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace clickhouse { + class Client; +} + +clickhouse::ColumnRef RoundtripColumnValues(clickhouse::Client& client, clickhouse::ColumnRef expected); diff --git a/ut/ssl_ut.cpp b/ut/ssl_ut.cpp index 92cc5bfc..2539ac93 100644 --- a/ut/ssl_ut.cpp +++ b/ut/ssl_ut.cpp @@ -55,7 +55,7 @@ INSTANTIATE_TEST_SUITE_P( const auto ClickHouseExplorerConfig = ClientOptions() - .SetHost( getEnvOrDefault("CLICKHOUSE_SECURE2_HOST", "gh-api.clickhouse.tech")) + .SetHost( getEnvOrDefault("CLICKHOUSE_SECURE2_HOST", "play.clickhouse.com")) .SetPort( getEnvOrDefault("CLICKHOUSE_SECURE2_PORT", "9440")) .SetUser( getEnvOrDefault("CLICKHOUSE_SECURE2_USER", "explorer")) .SetPassword( getEnvOrDefault("CLICKHOUSE_SECURE2_PASSWORD", "")) @@ -79,7 +79,7 @@ INSTANTIATE_TEST_SUITE_P( // Looks like `VerifyCAPath` has no effect, while parsing and setting value works. // Also for some reason SetPathToCADirectory() + SSL_CTX_load_verify_locations() works. #if !defined(__APPLE__) -TEST(OpenSSLConfiguration, ValidValues) { +TEST(OpenSSLConfiguration, DISABLED_ValidValues) { // Verify that Client with valid configuration set via SetConfiguration is able to connect. EXPECT_NO_THROW( diff --git a/ut/utils.cpp b/ut/utils.cpp index dcac371b..07ae5174 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -1,22 +1,28 @@ #include "utils.h" #include +#include #include +#include #include #include #include -#include -#include #include #include +#include +#include +#include #include // for ipv4-ipv6 platform-specific stuff #include #include + namespace { using namespace clickhouse; +std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostream & ostr); + struct DateTimeValue { explicit DateTimeValue(const time_t & v) : value(v) @@ -69,6 +75,45 @@ bool doPrintValue(const ColumnRef & c, const size_t row, std::ostr return doPrintEnumValue(c, row, ostr); } +template <> +bool doPrintValue(const ColumnRef & c, const size_t row, std::ostream & ostr) { + // via temporary stream to preserve fill and alignment of the ostr + std::stringstream sstr; + + if (const auto & array_col = c->As()) { + const auto & row_values = array_col->GetAsColumn(row); + sstr << "["; + for (size_t i = 0; i < row_values->Size(); ++i) { + printColumnValue(row_values, i, sstr); + + if (i < row_values->Size() - 1) + sstr << ", "; + } + sstr << "]"; + ostr << sstr.str(); + + return true; + } + return false; +} + +template <> +bool doPrintValue(const ColumnRef & c, const size_t row, std::ostream & ostr) { + if (const auto & tupple_col = c->As()) { + ostr << "("; + for (size_t i = 0; i < tupple_col->TupleSize(); ++i) { + const auto & nested_col = (*tupple_col)[i]; + printColumnValue(nested_col, row, ostr); + + if (i < tupple_col->TupleSize() - 1) + ostr << ", "; + } + ostr << ")"; + return true; + } + return false; +} + std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostream & ostr) { const auto r = false @@ -91,7 +136,9 @@ std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostre || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) - || doPrintValue(c, row, ostr); + || doPrintValue(c, row, ostr) + || doPrintValue(c, row, ostr) + || doPrintValue(c, row, ostr); if (!r) ostr << "Unable to print value of type " << c->GetType().GetName(); @@ -109,28 +156,6 @@ std::ostream & operator<<(std::ostream & ostr, const ColumnValue& v) { } -std::ostream& operator<<(std::ostream & ostr, const Block & block) { - if (block.GetRowCount() == 0 || block.GetColumnCount() == 0) - return ostr; - - for (size_t col = 0; col < block.GetColumnCount(); ++col) { - const auto & c = block[col]; - ostr << c->GetType().GetName() << " ["; - - for (size_t row = 0; row < block.GetRowCount(); ++row) { - printColumnValue(c, row, ostr); - if (row != block.GetRowCount() - 1) - ostr << ", "; - } - ostr << "]"; - - if (col != block.GetColumnCount() - 1) - ostr << "\n"; - } - - return ostr; -} - std::ostream& operator<<(std::ostream & ostr, const PrettyPrintBlock & pretty_print_block) { // Pretty-print block: // - names of each column @@ -203,3 +228,46 @@ std::ostream& operator<<(std::ostream& ostr, const in6_addr& addr) { return ostr << ip_str; } + +namespace clickhouse { + +std::ostream& operator<<(std::ostream & ostr, const Block & block) { + if (block.GetRowCount() == 0 || block.GetColumnCount() == 0) + return ostr; + + for (size_t col = 0; col < block.GetColumnCount(); ++col) { + const auto & c = block[col]; + ostr << c->GetType().GetName() << " ["; + + for (size_t row = 0; row < block.GetRowCount(); ++row) { + printColumnValue(c, row, ostr); + if (row != block.GetRowCount() - 1) + ostr << ", "; + } + ostr << "]"; + + if (col != block.GetColumnCount() - 1) + ostr << "\n"; + } + + return ostr; +} + +std::ostream& operator<<(std::ostream & ostr, const Type & type) { + return ostr << type.GetName(); +} + +std::ostream & operator<<(std::ostream & ostr, const ServerInfo & server_info) { + return ostr << server_info.name << "/" << server_info.display_name + << " ver " + << server_info.version_major << "." + << server_info.version_minor << "." + << server_info.version_patch + << " (" << server_info.revision << ")"; +} + +} + +uint64_t versionNumber(const ServerInfo & server_info) { + return versionNumber(server_info.version_major, server_info.version_minor, server_info.version_patch, server_info.revision); +} diff --git a/ut/utils.h b/ut/utils.h index 834b0bcf..f0a6194f 100644 --- a/ut/utils.h +++ b/ut/utils.h @@ -2,48 +2,56 @@ #include -#include -#include +#include "utils_meta.h" +#include "utils_comparison.h" + #include #include +#include #include -#include #include +#include +#include #include +#include + namespace clickhouse { + class Client; class Block; + class Type; + struct ServerInfo; } -template -struct Timer { - using DurationType = ChronoDurationType; - - Timer() - : started_at(Now()) - {} - - void Restart() { - started_at = Now(); - } - - void Start() { - Restart(); - } +template +auto getEnvOrDefault(const std::string& env, const char * default_val) { + const char* v = std::getenv(env.c_str()); + if (!v && !default_val) + throw std::runtime_error("Environment var '" + env + "' is not set."); - auto Elapsed() const { - return std::chrono::duration_cast(Now() - started_at); - } + const std::string value = v ? v : default_val; -private: - static auto Now() { - return std::chrono::high_resolution_clock::now().time_since_epoch(); + if constexpr (std::is_same_v) { + return value; + } else if constexpr (std::is_integral_v) { + // since std::from_chars is not available on GCC-7 on linux + if constexpr (std::is_signed_v) { + if constexpr (sizeof(ResultType) <= sizeof(int)) + return std::stoi(value); + else if constexpr (sizeof(ResultType) <= sizeof(long)) + return std::stol(value); + else if constexpr (sizeof(ResultType) <= sizeof(long long)) + return std::stoll(value); + } else if constexpr (std::is_unsigned_v) { + if constexpr (sizeof(ResultType) <= sizeof(unsigned long)) + return std::stoul(value); + else if constexpr (sizeof(ResultType) <= sizeof(unsigned long long)) + return std::stoull(value); + } } +} -private: - std::chrono::nanoseconds started_at; -}; template inline const char * getPrefix() { @@ -72,47 +80,14 @@ template inline ostream & operator<<(ostream & ostr, const chrono::duration & d) { return ostr << d.count() << ::getPrefix

() << "s"; } -} - -// Since result_of is deprecated in C++17, and invoke_result_of is unavailable until C++20... -template -using my_result_of_t = -#if __cplusplus >= 201703L - std::invoke_result_t; -#else - std::result_of_t; -#endif - -template -class MeasuresCollector { -public: - using Result = my_result_of_t; - - explicit MeasuresCollector(MeasureFunc && measurment_func, const size_t preallocate_results = 10) - : measurment_func_(std::move(measurment_func)) - { - results_.reserve(preallocate_results); - } - - template - void Add(NameType && name) { - results_.emplace_back(name, measurment_func_()); - } - - const auto & GetResults() const { - return results_; - } -private: - MeasureFunc measurment_func_; - std::vector> results_; -}; - -template -MeasuresCollector collect(MeasureFunc && f) { - return MeasuresCollector(std::forward(f)); +template +inline ostream & operator<<(ostream & ostr, const pair & t) { + return ostr << "{ " << t.first << ", " << t.second << " }"; +} } + struct in_addr; struct in6_addr; // Helper for pretty-printing of the Block @@ -120,36 +95,65 @@ struct PrettyPrintBlock { const clickhouse::Block & block; }; -std::ostream& operator<<(std::ostream & ostr, const clickhouse::Block & block); +namespace clickhouse { +std::ostream& operator<<(std::ostream & ostr, const Block & block); +std::ostream& operator<<(std::ostream & ostr, const Type & type); +std::ostream & operator<<(std::ostream & ostr, const ServerInfo & server_info); +} + std::ostream& operator<<(std::ostream & ostr, const PrettyPrintBlock & block); std::ostream& operator<<(std::ostream& ostr, const in_addr& addr); std::ostream& operator<<(std::ostream& ostr, const in6_addr& addr); -template -auto getEnvOrDefault(const std::string& env, const char * default_val) { - const char* v = std::getenv(env.c_str()); - if (!v && !default_val) - throw std::runtime_error("Environment var '" + env + "' is not set."); +template +struct PrintContainer { + const Container & container_; - const std::string value = v ? v : default_val; + explicit PrintContainer(const Container& container) + : container_(container) + {} +}; - if constexpr (std::is_same_v) { - return value; - } else if constexpr (std::is_integral_v) { - // since std::from_chars is not available on GCC-7 on linux - if constexpr (std::is_signed_v) { - if constexpr (sizeof(ResultType) <= sizeof(int)) - return std::stoi(value); - else if constexpr (sizeof(ResultType) <= sizeof(long)) - return std::stol(value); - else if constexpr (sizeof(ResultType) <= sizeof(long long)) - return std::stoll(value); - } else if constexpr (std::is_unsigned_v) { - if constexpr (sizeof(ResultType) <= sizeof(unsigned long)) - return std::stoul(value); - else if constexpr (sizeof(ResultType) <= sizeof(unsigned long long)) - return std::stoull(value); +template +std::ostream& operator<<(std::ostream & ostr, const PrintContainer& print_container) { + ostr << "["; + + const auto & container = print_container.container_; + for (auto i = std::begin(container); i != std::end(container); /*intentionally no ++i*/) { + const auto & elem = *i; + + if constexpr (is_container_v>) { + ostr << PrintContainer{elem}; + } else { + ostr << elem; + } + + if (++i != std::end(container)) { + ostr << ", "; } } + + return ostr << "]"; +} + +inline uint64_t versionNumber( + uint64_t version_major, + uint64_t version_minor, + uint64_t version_patch = 0, + uint64_t revision = 0) { + + // in this case version_major can be up to 1000 + static auto revision_decimal_places = 8; + static auto patch_decimal_places = 4; + static auto minor_decimal_places = 4; + + auto const result = version_major * static_cast(std::pow(10, minor_decimal_places + patch_decimal_places + revision_decimal_places)) + + version_minor * static_cast(std::pow(10, patch_decimal_places + revision_decimal_places)) + + version_patch * static_cast(std::pow(10, revision_decimal_places)) + + revision; + + return result; } + +uint64_t versionNumber(const clickhouse::ServerInfo & server_info); diff --git a/ut/utils_comparison.h b/ut/utils_comparison.h new file mode 100644 index 00000000..c40033b4 --- /dev/null +++ b/ut/utils_comparison.h @@ -0,0 +1,163 @@ +#pragma once + +#include "utils_meta.h" + +#include // for ipv4-ipv6 platform-specific stuff + +#include + +#include +#include + +namespace clickhouse { + class Block; + class Column; +} + +inline bool operator==(const in6_addr& left, const in6_addr& right) { + return memcmp(&left, &right, sizeof(left)) == 0; +} + +inline bool operator==(const in_addr& left, const in_addr& right) { + return memcmp(&left, &right, sizeof(left)) == 0; +} + +inline bool operator==(const in_addr & left, const uint32_t& right) { + return memcmp(&left, &right, sizeof(left)) == 0; +} + +inline bool operator==(const uint32_t& left, const in_addr& right) { + return memcmp(&left, &right, sizeof(left)) == 0; +} + +inline bool operator==(const in6_addr & left, const std::string_view & right) { + return right.size() == sizeof(left) && memcmp(&left, right.data(), sizeof(left)) == 0; +} + +inline bool operator==(const std::string_view & left, const in6_addr & right) { + return left.size() == sizeof(right) && memcmp(left.data(), &right, sizeof(right)) == 0; +} + +inline bool operator!=(const in6_addr& left, const in6_addr& right) { + return !(left == right); +} + +inline bool operator!=(const in_addr& left, const in_addr& right) { + return !(left == right); +} + +inline bool operator!=(const in_addr & left, const uint32_t& right) { + return !(left == right); +} + +inline bool operator!=(const uint32_t& left, const in_addr& right) { + return !(left == right); +} + +inline bool operator!=(const in6_addr & left, const std::string_view & right) { + return !(left == right); +} + +inline bool operator!=(const std::string_view & left, const in6_addr & right) { + return !(left == right); +} + +namespace details { +// Make a column a RO stl-like container +template +struct ColumnAsContainerWrapper { + const NestedColumnType& nested_col; + + struct Iterator { + const NestedColumnType& nested_col; + size_t i = 0; + + auto& operator++() { + ++i; + return *this; + } + + auto operator*() const { + return nested_col.At(i); + } + + bool operator==(const Iterator & other) const { + return &other.nested_col == &this->nested_col && other.i == this->i; + } + + bool operator!=(const Iterator & other) const { + return !(other == *this); + } + }; + + size_t size() const { + return nested_col.Size(); + } + + auto begin() const { + return Iterator{nested_col, 0}; + } + + auto end() const { + return Iterator{nested_col, nested_col.Size()}; + } +}; +} + +template +auto maybeWrapColumnAsContainer(const T & t) { + if constexpr (std::is_base_of_v) { + return ::details::ColumnAsContainerWrapper{t}; + } else { + return t; + } +} + + +// Compare values to each other, if values are container-ish, then recursively deep compare those containers. +template +::testing::AssertionResult CompareRecursive(const Left & left, const Right & right); + +// Compare containers element-wise, if elements are containers themselves - compare recursevely +template +::testing::AssertionResult CompareCotainersRecursive(const LeftContainer& left, const RightContainer& right) { + if (left.size() != right.size()) + return ::testing::AssertionFailure() << "\nMismatching containers size, expected: " << left.size() << " actual: " << right.size(); + + auto l_i = std::begin(left); + auto r_i = std::begin(right); + + for (size_t i = 0; i < left.size(); ++i, ++l_i, ++r_i) { + auto result = CompareRecursive(*l_i, *r_i); + if (!result) + return result << "\n\nMismatch at pos: " << i + 1; + } + + return ::testing::AssertionSuccess(); +} + +template +struct PrintContainer; + +template +::testing::AssertionResult CompareRecursive(const Left & left, const Right & right) { + if constexpr ((is_container_v || std::is_base_of_v>) + && (is_container_v || std::is_base_of_v>) ) { + + const auto & l = maybeWrapColumnAsContainer(left); + const auto & r = maybeWrapColumnAsContainer(right); + + if (auto result = CompareCotainersRecursive(l, r)) + return result; + else + return result << "\nExpected container: " << PrintContainer{l} + << "\nActual container : " << PrintContainer{r}; + } else { + if (left != right) + return ::testing::AssertionFailure() + << "\nExpected value: " << left + << "\nActual value : " << right; + + return ::testing::AssertionSuccess(); + } +} diff --git a/ut/utils_meta.h b/ut/utils_meta.h new file mode 100644 index 00000000..707f9aca --- /dev/null +++ b/ut/utils_meta.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include // for std::begin + +// based on https://stackoverflow.com/a/31207079 +template +struct is_container : std::false_type {}; + +namespace details { +template +struct is_container_helper {}; +} + +// A very loose definition of container, nerfed to fit both C-array and ColumnArrayT::ArrayWrapper +template +struct is_container< + T, + std::conditional_t< + false, + ::details::is_container_helper< + decltype(std::declval().size()), + decltype(std::begin(std::declval())), + decltype(std::end(std::declval())) + >, + void + > + > : public std::true_type {}; + +template +inline constexpr bool is_container_v = is_container::value; + +// Since result_of is deprecated in C++17, and invoke_result_of is unavailable until C++20... +template +using my_result_of_t = +#if __cplusplus >= 201703L + std::invoke_result_t; +#else + std::result_of_t; +#endif + diff --git a/ut/utils_performance.h b/ut/utils_performance.h new file mode 100644 index 00000000..b6123589 --- /dev/null +++ b/ut/utils_performance.h @@ -0,0 +1,67 @@ +#pragma once + +#include "utils_meta.h" + +#include +#include +#include +#include + +template +struct Timer { + using DurationType = ChronoDurationType; + + Timer() + : started_at(Now()) + {} + + void Restart() { + started_at = Now(); + } + + void Start() { + Restart(); + } + + auto Elapsed() const { + return std::chrono::duration_cast(Now() - started_at); + } + +private: + static auto Now() { + return std::chrono::high_resolution_clock::now().time_since_epoch(); + } + +private: + std::chrono::nanoseconds started_at; +}; + +template +class MeasuresCollector { +public: + using Result = my_result_of_t; + + explicit MeasuresCollector(MeasureFunc && measurment_func, const size_t preallocate_results = 10) + : measurment_func_(std::move(measurment_func)) + { + results_.reserve(preallocate_results); + } + + template + void Add(NameType && name) { + results_.emplace_back(name, measurment_func_()); + } + + const auto & GetResults() const { + return results_; + } + +private: + MeasureFunc measurment_func_; + std::vector> results_; +}; + +template +MeasuresCollector collect(MeasureFunc && f) { + return MeasuresCollector(std::forward(f)); +} diff --git a/ut/utils_ut.cpp b/ut/utils_ut.cpp new file mode 100644 index 00000000..a600ada0 --- /dev/null +++ b/ut/utils_ut.cpp @@ -0,0 +1,35 @@ +#include +#include "utils.h" + +#include + +TEST(TestCompareContainer, ComparePlain) { + EXPECT_TRUE(CompareRecursive(std::vector{1, 2, 3}, std::vector{1, 2, 3})); + EXPECT_TRUE(CompareRecursive(std::vector{}, std::vector{})); + + EXPECT_FALSE(CompareRecursive(std::vector{1, 2, 3}, std::vector{1, 2, 4})); + EXPECT_FALSE(CompareRecursive(std::vector{1, 2, 3}, std::vector{1, 2})); + + // That would cause compile-time error: + // EXPECT_FALSE(CompareRecursive(std::array{1, 2, 3}, 1)); +} + + +TEST(TestCompareContainer, CompareNested) { + EXPECT_TRUE(CompareRecursive( + std::vector>{{{1, 2, 3}, {4, 5, 6}}}, + std::vector>{{{1, 2, 3}, {4, 5, 6}}})); + + EXPECT_TRUE(CompareRecursive( + std::vector>{{{1, 2, 3}, {4, 5, 6}, {}}}, + std::vector>{{{1, 2, 3}, {4, 5, 6}, {}}})); + + EXPECT_TRUE(CompareRecursive( + std::vector>{{{}}}, + std::vector>{{{}}})); + + EXPECT_FALSE(CompareRecursive(std::vector>{{1, 2, 3}, {4, 5, 6}}, std::vector>{{1, 2, 3}, {4, 5, 7}})); + EXPECT_FALSE(CompareRecursive(std::vector>{{1, 2, 3}, {4, 5, 6}}, std::vector>{{1, 2, 3}, {4, 5}})); + EXPECT_FALSE(CompareRecursive(std::vector>{{1, 2, 3}, {4, 5, 6}}, std::vector>{{1, 2, 3}, {}})); + EXPECT_FALSE(CompareRecursive(std::vector>{{1, 2, 3}, {4, 5, 6}}, std::vector>{{}})); +} diff --git a/ut/value_generators.cpp b/ut/value_generators.cpp new file mode 100644 index 00000000..43b9dff5 --- /dev/null +++ b/ut/value_generators.cpp @@ -0,0 +1,149 @@ +#include "value_generators.h" + +#include +#include + +namespace { +using namespace clickhouse; +} + +std::vector MakeNumbers() { + return std::vector {1, 2, 3, 7, 11, 13, 17, 19, 23, 29, 31}; +} + +std::vector MakeBools() { + return std::vector {1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0}; +} + +std::vector MakeFixedStrings(size_t string_size) { + std::vector result {"aaa", "bbb", "ccc", "ddd"}; + + std::for_each(result.begin(), result.end(), [string_size](auto& value) { + value.resize(string_size, '\0'); + }); + + return result; +} + +std::vector MakeStrings() { + return {"a", "ab", "abc", "abcd"}; +} + +std::vector MakeUUIDs() { + return { + UInt128(0llu, 0llu), + UInt128(0xbb6a8c699ab2414cllu, 0x86697b7fd27f0825llu), + UInt128(0x84b9f24bc26b49c6llu, 0xa03b4ab723341951llu), + UInt128(0x3507213c178649f9llu, 0x9faf035d662f60aellu) + }; +} + +std::vector MakeDateTime64s(size_t scale, size_t values_size) { + const auto seconds_multiplier = static_cast(std::pow(10, scale)); + const auto year = 86400ull * 365 * seconds_multiplier; // ~approx, but this doesn't matter here. + + // Approximatelly +/- 200 years around epoch (and value of epoch itself) + // with non zero seconds and sub-seconds. + // Please note there are values outside of DateTime (32-bit) range that might + // not have correct string representation in CH yet, + // but still are supported as Int64 values. + return GenerateVector(values_size, + [seconds_multiplier, year] (size_t i )-> Int64 { + return (i - 100) * year * 2 + (i * 10) * seconds_multiplier + i; + }); +} + +std::vector MakeDates() { + // in CH Date internally a UInt16 and stores a day number + // ColumnDate expects values to be seconds, which is then + // converted to day number internally, hence the `* 86400`. + std::vector result {0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536 - 1}; + std::for_each(result.begin(), result.end(), [](auto& value) { + value *= 86400; + }); + + return result; +} + +std::vector MakeDates32() { + // in CH Date32 internally a UInt32 and stores a day number + // ColumnDate expects values to be seconds, which is then + // converted to day number internally, hence the `* 86400`. + // 114634 * 86400 is 2282-11-10, last integer that fits into DateTime32 range + // (max is 2283-11-11) + std::vector result = MakeDates(); + + // add corresponding negative values, since pre-epoch date are supported too. + const auto size = result.size(); + for (size_t i = 0; i < size; ++i) { + result.push_back(result[i] * -1); + } + + return result; +} + +std::vector MakeDateTimes() { + // in CH DateTime internally a UInt32 + return { + 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, + 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, 67108864, + 134217728, 268435456, 536870912, 1073741824, 2147483648, 4294967296 - 1 + }; +} + +std::vector MakeInt128s() { + return { + absl::MakeInt128(0xffffffffffffffffll, 0xffffffffffffffffll), // -1 + absl::MakeInt128(0, 0xffffffffffffffffll), // 2^64 + absl::MakeInt128(0xffffffffffffffffll, 0), + absl::MakeInt128(0x8000000000000000ll, 0), + Int128(0) + }; +} + +std::vector MakeDecimals(size_t /*precision*/, size_t scale) { + const auto scale_multiplier = static_cast(std::pow(10, scale)); + const auto rhs_value = 12345678910; + + const std::vector vals {0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536 - 1}; + + std::vector result; + result.reserve(vals.size()); + + std::transform(vals.begin(), vals.end(), std::back_inserter(result), [scale_multiplier, rhs_value](const auto& value) { + return value * scale_multiplier + rhs_value % scale_multiplier; + }); + + return result; +} + +std::string FooBarGenerator(size_t i) { + std::string result; + if (i % 3 == 0) + result += "Foo"; + if (i % 5 == 0) + result += "Bar"; + if (result.empty()) + result = std::to_string(i); + + return result; +} + +std::vector MakeIPv4s() { + return { + MakeIPv4(0x12345678), // 255.255.255.255 + MakeIPv4(0x0100007f), // 127.0.0.1 + MakeIPv4(3585395774), + MakeIPv4(0), + MakeIPv4(0x12345678), + }; +} + +std::vector MakeIPv6s() { + return { + MakeIPv6(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), // 1:203:405:607:809:a0b:c0d:e0f + MakeIPv6(0, 0, 0, 0, 0, 1), // ::1 + MakeIPv6(0, 0, 0, 0, 0, 0), // :: + MakeIPv6(0xff, 0xff, 204, 152, 189, 116), // ::ffff:204.152.189.116 + }; +} diff --git a/ut/value_generators.h b/ut/value_generators.h new file mode 100644 index 00000000..a3004102 --- /dev/null +++ b/ut/value_generators.h @@ -0,0 +1,124 @@ +#pragma once + +#include // for ipv4-ipv6 platform-specific stuff +#include +#include + +#include "utils.h" + +#include +#include + +inline in_addr MakeIPv4(uint32_t ip) { + static_assert(sizeof(in_addr) == sizeof(ip)); + in_addr result; + memcpy(&result, &ip, sizeof(ip)); + + return result; +} + +inline in6_addr MakeIPv6(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, + uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, + uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) { + return in6_addr{{{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}}}; +} + +inline in6_addr MakeIPv6(uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) { + return in6_addr{{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, v10, v11, v12, v13, v14, v15}}}; +} + +std::vector MakeNumbers(); +std::vector MakeBools(); +std::vector MakeFixedStrings(size_t string_size); +std::vector MakeStrings(); +std::vector MakeDateTime64s(size_t scale, size_t values_size = 200); +std::vector MakeDates(); +std::vector MakeDates32(); +std::vector MakeDateTimes(); +std::vector MakeIPv4s(); +std::vector MakeIPv6s(); +std::vector MakeUUIDs(); +std::vector MakeInt128s(); +std::vector MakeDecimals(size_t precision, size_t scale); + +std::string FooBarGenerator(size_t i); + +template +auto GenerateVector(size_t items, Generator && gen) { + using ActualValueType = std::conditional_t, my_result_of_t, ValueType>; + std::vector result; + result.reserve(items); + for (size_t i = 0; i < items; ++i) { + result.push_back(std::move(gen(i))); + } + + return result; +} + +template +auto SameValueGenerator(const U & value) { + return [&value](size_t) -> T { + return value; + }; +} + +template +auto AlternateGenerators(Generator1 && gen1, Generator2 && gen2) { + return [&gen1, &gen2](size_t i) -> ResultType { + if (i % 2 == 0) + return gen1(i/2); + else + return gen2(i/2); + }; +} + +template +struct RandomGenerator { + using uniform_distribution = + typename std::conditional_t, std::uniform_real_distribution, + typename std::conditional_t, std::uniform_int_distribution, void>>; + + explicit RandomGenerator(T seed = 0, T value_min = std::numeric_limits::min(), T value_max = std::numeric_limits::max()) + : random_engine(seed) + , distribution(value_min, value_max) + { + } + + template + T operator()(U) { + return distribution(random_engine); + } + +private: + std::default_random_engine random_engine; + uniform_distribution distribution; +}; + +template +std::vector ConcatSequences(std::vector && vec1, std::vector && vec2) { + std::vector result(vec1); + + result.reserve(vec1.size() + vec2.size()); + result.insert(result.end(), vec2.begin(), vec2.end()); + + return result; +} + +template +struct FromVectorGenerator { + const std::vector data; + RandomGenerator random_generator; + + FromVectorGenerator(std::vector data_) + : data(std::move(data_)), + random_generator(0, 0, data.size() - 1) + { + if (data.size() == 0) + throw std::runtime_error("can't generate values from empty vector"); + } + + auto operator()(size_t pos) { + return data[random_generator(pos)]; + } +};