From b6540c39d716bb3af4b870dde49aecf018b14c91 Mon Sep 17 00:00:00 2001 From: Jared Casey Date: Mon, 14 Apr 2025 19:42:56 -0600 Subject: [PATCH 1/8] Update C++ core Change-Id: I4636e498854af319f02f551f5206aec5e509856b Reviewed-on: https://review.couchbase.org/c/couchbase-python-client/+/226345 Reviewed-by: Dimitris Christodoulou Tested-by: Build Bot --- deps/couchbase-cxx-client | 2 +- src/kv_range_scan.cxx | 20 ++++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/deps/couchbase-cxx-client b/deps/couchbase-cxx-client index 24dca979..3b7defdc 160000 --- a/deps/couchbase-cxx-client +++ b/deps/couchbase-cxx-client @@ -1 +1 @@ -Subproject commit 24dca979ec842ce200aaa1741f1271a4a61c837d +Subproject commit 3b7defdcf5b4ea1e0094de04fe7dcfd4a7286c31 diff --git a/src/kv_range_scan.cxx b/src/kv_range_scan.cxx index cb40b61c..e4a2a611 100644 --- a/src/kv_range_scan.cxx +++ b/src/kv_range_scan.cxx @@ -198,19 +198,15 @@ handle_kv_range_scan_op([[maybe_unused]] PyObject* self, PyObject* args, PyObjec return nullptr; } - auto barrier = std::make_shared< - std::promise>>(); + auto barrier = std::make_shared>>>(); auto f = barrier->get_future(); - conn->cluster_.with_bucket_configuration( - bucket_name, - [barrier](std::error_code ec, const couchbase::core::topology::configuration& config) mutable { - if (ec) { - return barrier->set_value(tl::unexpected(ec)); - } - barrier->set_value(config); - }); - auto config = f.get(); - if (!config.has_value()) { + conn->cluster_.with_bucket_configuration(bucket_name, + [barrier](std::error_code ec, auto config) mutable { + barrier->set_value({ ec, std::move(config) }); + }); + auto [ec, config] = f.get(); + if (ec) { pycbc_set_python_exception( PycbcError::UnsuccessfulOperation, __FILE__, From c03ff5c8c2f1325c1087da352061ad8d78ebd3c9 Mon Sep 17 00:00:00 2001 From: Jared Casey Date: Tue, 15 Apr 2025 12:36:07 -0600 Subject: [PATCH 2/8] PYCBC-1675: Forward append/prepend CAS to core request Motivation ========== Previously the append and prepend requests in the C++ core did not have a field for CAS. This meant that the relevant option in AppendOptions/PrependOptions was being ignored. Modification ============ * Update C++ core (done in previous commit) * Set the CAS option in the core's append_request & prepend_request. Results ======= Relevant tests in FIT pass. Change-Id: Ic49f3176618aed1458384abb858eddf17c1fcf70 Reviewed-on: https://review.couchbase.org/c/couchbase-python-client/+/226408 Reviewed-by: Dimitris Christodoulou Tested-by: Build Bot --- src/binary_ops.cxx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/binary_ops.cxx b/src/binary_ops.cxx index f288232c..94330a34 100644 --- a/src/binary_ops.cxx +++ b/src/binary_ops.cxx @@ -331,8 +331,7 @@ prepare_and_execute_binary_mutation_op(struct binary_mutation_options* options, if (options->op_type == Operations::APPEND) { auto req = couchbase::core::operations::append_request{ options->id }; req.timeout = options->timeout_ms; - // @TODO: cxx client req doesn't have cas - // req.cas = options->cas; + req.cas = options->cas; req.value = value; if (nullptr != options->span) { req.parent_span = std::make_shared(options->span); @@ -355,8 +354,7 @@ prepare_and_execute_binary_mutation_op(struct binary_mutation_options* options, } else { auto req = couchbase::core::operations::prepend_request{ options->id }; req.timeout = options->timeout_ms; - // @TODO: cxx client req doesn't have cas - // req.cas = options->cas; + req.cas = options->cas; req.value = value; if (nullptr != options->span) { req.parent_span = std::make_shared(options->span); From 0460df12cccecce8325c7df877ffb8c33ee564f4 Mon Sep 17 00:00:00 2001 From: Jared Casey Date: Tue, 15 Apr 2025 16:01:09 -0600 Subject: [PATCH 3/8] PYCBC-1681: Raise InvalidArgumentException when base64 vector string is empty Motivation ========== Base64 vector strings cannot be empty. The RFC requires us to throw an InvalidArgumentException. Modification ============ If an empty base64 vector string is given, fail with an InvalidArgumentException. Results ======= Relevant FIT tests pass Change-Id: Ica200369fe18a2af31c5e3959bbf86c034fd002c Reviewed-on: https://review.couchbase.org/c/couchbase-python-client/+/226419 Tested-by: Build Bot Reviewed-by: Dimitris Christodoulou --- couchbase/logic/vector_search.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/couchbase/logic/vector_search.py b/couchbase/logic/vector_search.py index 4c19e284..35111e81 100644 --- a/couchbase/logic/vector_search.py +++ b/couchbase/logic/vector_search.py @@ -127,6 +127,9 @@ def _validate_and_set_vector(self, elif not isinstance(vector, str): raise InvalidArgumentException('Provided vector must be either a List[float] or base64 encoded str.') + if len(vector) == 0: + raise InvalidArgumentException('Provided base64 encoded vector cannot be empty.') + self._vector_base64 = vector @classmethod From 790bea7bf8b0cf5c2fb8c23c414e8c7ea84e1b4e Mon Sep 17 00:00:00 2001 From: Jared Casey Date: Tue, 15 Apr 2025 18:33:34 -0600 Subject: [PATCH 4/8] PYCBC-1678: Pin cmake version Motivation ========== Until we have time to investigate CMake 4.0 compatibility we should make sure the build system will install CMake < 4.0. Most users should not run into this scenario as we provide wheels. Modification ============ Pinned setup_requires CMake to >= 3.19 and < 4.0. Change-Id: Ib8a6badb8a3615f18a13d946dd4c7f8bef5e5971 Reviewed-on: https://review.couchbase.org/c/couchbase-python-client/+/226430 Tested-by: Build Bot Reviewed-by: Dimitris Christodoulou --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d489121c..285e1e28 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ # otherwise, we want to use the system executable. setup_requires = [] if not CMAKE_EXE: - setup_requires += ["cmake"] + setup_requires += ["cmake>=3.19.0,<4.0.0"] print(f'Python SDK version: {PYCBC_VERSION}') From d3a817cbfd56686870b8946680cb93cdface6970 Mon Sep 17 00:00:00 2001 From: Jared Casey Date: Tue, 15 Apr 2025 12:47:25 -0600 Subject: [PATCH 5/8] PYCBC-1683: Fix search range queries to follow RFC Motivation ========== The `DateRangeQuery`, `NumericRangeQuery` and `TermRangeQuery` search queries do not abide by the RFC and are incorrectly encoding the search JSON for some parameters. The search queries should be updated so that users can successfully use these range query types. Modification ============ * Deprecate `TermRangeQuery` properties that do not follow RFC (`start`, `end`, `start_inclusive` and `end_inclusive`) * Add properties missing from the RFC: `min`, `max`, `inclusive_min` and `inclusive_max`. * Deprecate `DateRangeQuery` properties that do not follow RFC (`start_inclusive` and `end_inclusive`) and add properties missing from the RFC (`inclusive_start` and `inclusive_end`) * Deprecate `NumericRangeQuery` properties that do not follow RFC (`min_inclusive` and `max_inclusive`) and add properties missing from the RFC (`inclusive_min` and `inclusive_max`) * Update various type definitions * Update search_param tests to confirm correct encoded JSON Results ======= All tests pass. Change-Id: I0117938a4c8792b7d47fd0f845811062af6448ac Reviewed-on: https://review.couchbase.org/c/couchbase-python-client/+/226412 Reviewed-by: Dimitris Christodoulou Tested-by: Build Bot --- couchbase/logic/search_queries.py | 160 ++++++++++++++++++++++++----- couchbase/tests/search_params_t.py | 99 ++++++++++++++++-- 2 files changed, 223 insertions(+), 36 deletions(-) diff --git a/couchbase/logic/search_queries.py b/couchbase/logic/search_queries.py index 6edba5d6..13d1eb4c 100644 --- a/couchbase/logic/search_queries.py +++ b/couchbase/logic/search_queries.py @@ -22,6 +22,7 @@ from couchbase.exceptions import InvalidArgumentException, NoChildrenException from couchbase.logic.search import MatchOperator, _QueryBuilder +from couchbase.logic.supportability import Supportability # Query Types @@ -355,7 +356,7 @@ def bool(self, value # type: bool @_QueryBuilder._with_fields(fields=['field']) class GeoDistanceQuery(SearchQuery): - def __init__(self, distance, # type: Union[int, float] + def __init__(self, distance, # type: str location, # type: Tuple[float, float] **kwargs # type: Dict[str, Any] ) -> None: @@ -467,7 +468,8 @@ class NumericRangeQuery(SearchQuery): At least one of `min` or `max` must be specified. """ - def __init__(self, min=None, # type: Optional[float] + def __init__(self, + min=None, # type: Optional[float] max=None, # type: Optional[float] **kwargs # type: Dict[str, Any] ) -> None: @@ -486,7 +488,8 @@ def min(self) -> Optional[float]: return self._json_.get('min', None) @min.setter - def min(self, value # type: float + def min(self, + value # type: float ) -> None: self.set_prop('min', value) @@ -495,16 +498,29 @@ def min_inclusive(self) -> Optional[bool]: return self._json_.get('min_inclusive', None) @min_inclusive.setter - def min_inclusive(self, value # type: bool + def min_inclusive(self, + value # type: bool ) -> None: - self.set_prop('min_inclusive', value) + Supportability.class_property_deprecated('min_inclusive', 'inclusive_min') + self.set_prop('inclusive_min', value) + + @property + def inclusive_min(self) -> Optional[bool]: + return self._json_.get('inclusive_min', None) + + @inclusive_min.setter + def inclusive_min(self, + value # type: bool + ) -> None: + self.set_prop('inclusive_min', value) @property def max(self) -> Optional[float]: return self._json_.get('max', None) @max.setter - def max(self, value # type: float + def max(self, + value # type: float ) -> None: self.set_prop('max', value) @@ -513,9 +529,21 @@ def max_inclusive(self) -> Optional[bool]: return self._json_.get('max_inclusive', None) @max_inclusive.setter - def max_inclusive(self, value # type: bool + def max_inclusive(self, + value # type: bool ) -> None: - self.set_prop('max_inclusive', value) + Supportability.class_property_deprecated('max_inclusive', 'inclusive_max') + self.set_prop('inclusive_max', value) + + @property + def inclusive_max(self) -> Optional[bool]: + return self._json_.get('inclusive_max', None) + + @inclusive_max.setter + def inclusive_max(self, + value # type: bool + ) -> None: + self.set_prop('inclusive_max', value) # min = _genprop( # float, 'min', doc='Lower bound of range. See :attr:`min_inclusive`') @@ -570,7 +598,8 @@ def start(self) -> Optional[str]: return self._json_.get('start', None) @start.setter - def start(self, value # type: str + def start(self, + value # type: str ) -> None: self.set_prop('start', value) @@ -579,16 +608,29 @@ def start_inclusive(self) -> Optional[bool]: return self._json_.get('start_inclusive', None) @start_inclusive.setter - def start_inclusive(self, value # type: bool + def start_inclusive(self, + value # type: bool + ) -> None: + Supportability.class_property_deprecated('start_inclusive', 'inclusive_start') + self.set_prop('inclusive_start', value) + + @property + def inclusive_start(self) -> Optional[bool]: + return self._json_.get('inclusive_start', None) + + @inclusive_start.setter + def inclusive_start(self, + value # type: bool ) -> None: - self.set_prop('start_inclusive', value) + self.set_prop('inclusive_start', value) @property def end(self) -> Optional[str]: return self._json_.get('end', None) @end.setter - def end(self, value # type: str + def end(self, + value # type: str ) -> None: self.set_prop('end', value) @@ -597,16 +639,29 @@ def end_inclusive(self) -> Optional[bool]: return self._json_.get('end_inclusive', None) @end_inclusive.setter - def end_inclusive(self, value # type: bool + def end_inclusive(self, + value # type: bool + ) -> None: + Supportability.class_property_deprecated('end_inclusive', 'inclusive_end') + self.set_prop('inclusive_end', value) + + @property + def inclusive_end(self) -> Optional[bool]: + return self._json_.get('inclusive_end', None) + + @inclusive_end.setter + def inclusive_end(self, + value # type: bool ) -> None: - self.set_prop('end_inclusive', value) + self.set_prop('inclusive_end', value) @property def datetime_parser(self) -> Optional[str]: return self._json_.get('datetime_parser', None) @datetime_parser.setter - def datetime_parser(self, value # type: str + def datetime_parser(self, + value # type: str ) -> None: self.set_prop('datetime_parser', value) @@ -638,18 +693,31 @@ class TermRangeQuery(SearchQuery): lexical range. """ - _MINMAX = 'start', 'end' + _MINMAX = 'min', 'max' - def __init__(self, start=None, # type: Optional[str] + def __init__(self, + start=None, # type: Optional[str] end=None, # type: Optional[str] + min=None, # type: Optional[str] + max=None, # type: Optional[str] **kwargs # type: Dict[str, Any] ) -> None: """ - :param str start: See :attr:`start` - :param str end: See :attr:`end` + Args: + start (str): **DEPRECATED** Use min. + end (str): **DEPRECATED** Use max. + min (str): The lower end of the range. + max (str): The higher end of the range. """ super().__init__() - _QueryBuilder._validate_range_query(self, start, end, **kwargs) + if start is not None and min is None: + Supportability.class_property_deprecated('start', 'min') + min = start + if end is not None and max is None: + Supportability.class_property_deprecated('end', 'max') + max = end + + _QueryBuilder._validate_range_query(self, min, max, **kwargs) @property def start(self) -> Optional[str]: @@ -658,7 +726,17 @@ def start(self) -> Optional[str]: @start.setter def start(self, value # type: str ) -> None: - self.set_prop('start', value) + Supportability.class_property_deprecated('start', 'min') + self.set_prop('min', value) + + @property + def min(self) -> Optional[str]: + return self._json_.get('min', None) + + @min.setter + def min(self, value # type: str + ) -> None: + self.set_prop('min', value) @property def start_inclusive(self) -> Optional[bool]: @@ -667,7 +745,17 @@ def start_inclusive(self) -> Optional[bool]: @start_inclusive.setter def start_inclusive(self, value # type: bool ) -> None: - self.set_prop('start_inclusive', value) + Supportability.class_property_deprecated('start_inclusive', 'inclusive_min') + self.set_prop('inclusive_min', value) + + @property + def inclusive_min(self) -> Optional[bool]: + return self._json_.get('start_inclusive', None) + + @inclusive_min.setter + def inclusive_min(self, value # type: bool + ) -> None: + self.set_prop('inclusive_min', value) @property def end(self) -> Optional[str]: @@ -676,7 +764,17 @@ def end(self) -> Optional[str]: @end.setter def end(self, value # type: str ) -> None: - self.set_prop('end', value) + Supportability.class_property_deprecated('end', 'max') + self.set_prop('max', value) + + @property + def max(self) -> Optional[str]: + return self._json_.get('max', None) + + @max.setter + def max(self, value # type: str + ) -> None: + self.set_prop('max', value) @property def end_inclusive(self) -> Optional[bool]: @@ -685,7 +783,17 @@ def end_inclusive(self) -> Optional[bool]: @end_inclusive.setter def end_inclusive(self, value # type: bool ) -> None: - self.set_prop('end_inclusive', value) + Supportability.class_property_deprecated('end_inclusive', 'inclusive_max') + self.set_prop('inclusive_max', value) + + @property + def inclusive_max(self) -> Optional[bool]: + return self._json_.get('inclusive_max', None) + + @inclusive_max.setter + def inclusive_max(self, value # type: bool + ) -> None: + self.set_prop('inclusive_max', value) # def __init__(self, start=None, end=None, **kwargs): # super(TermRangeQuery, self).__init__(start=start, end=end, **kwargs) @@ -799,7 +907,7 @@ def __init__(self, must=None, should=None, must_not=None): self.must_not = must_not @property - def must(self) -> DisjunctionQuery: + def must(self) -> ConjunctionQuery: return self._subqueries.get('must') @must.setter @@ -808,7 +916,7 @@ def must(self, value # type: CompoundQueryType self._set_query('must', value, ConjunctionQuery) @property - def must_not(self) -> ConjunctionQuery: + def must_not(self) -> DisjunctionQuery: return self._subqueries.get('must_not') @must_not.setter diff --git a/couchbase/tests/search_params_t.py b/couchbase/tests/search_params_t.py index e525762e..bb577cf6 100644 --- a/couchbase/tests/search_params_t.py +++ b/couchbase/tests/search_params_t.py @@ -180,26 +180,26 @@ def test_daterange_query(self, cb_env): with pytest.raises(TypeError): q = search.DateRangeQuery() - q = search.DateRangeQuery(end='theEnd') + q = search.DateRangeQuery(end='2024-12-01') search_query = search.SearchQueryBuilder.create_search_query_object( cb_env.TEST_INDEX_NAME, q ) encoded_q = cb_env.get_encoded_query(search_query) - assert encoded_q['query'] == {'end': 'theEnd'} + assert encoded_q['query'] == {'end': '2024-12-01'} - q = search.DateRangeQuery(start='theStart') + q = search.DateRangeQuery(start='2024-01-01') search_query = search.SearchQueryBuilder.create_search_query_object( cb_env.TEST_INDEX_NAME, q ) encoded_q = cb_env.get_encoded_query(search_query) - assert encoded_q['query'] == {'start': 'theStart'} + assert encoded_q['query'] == {'start': '2024-01-01'} - q = search.DateRangeQuery(start='theStart', end='theEnd') + q = search.DateRangeQuery(start='2024-01-01', end='2024-12-01') search_query = search.SearchQueryBuilder.create_search_query_object( cb_env.TEST_INDEX_NAME, q ) encoded_q = cb_env.get_encoded_query(search_query) - assert encoded_q['query'] == {'start': 'theStart', 'end': 'theEnd'} + assert encoded_q['query'] == {'start': '2024-01-01', 'end': '2024-12-01'} q = search.DateRangeQuery('', '') # Empty strings should be ok search_query = search.SearchQueryBuilder.create_search_query_object( @@ -208,6 +208,27 @@ def test_daterange_query(self, cb_env): encoded_q = cb_env.get_encoded_query(search_query) assert encoded_q['query'] == {'start': '', 'end': ''} + # deprecated start_inclusive & end_inclusive + q = search.DateRangeQuery('2024-01-01', '2024-12-01', start_inclusive=True, end_inclusive=True) + search_query = search.SearchQueryBuilder.create_search_query_object( + cb_env.TEST_INDEX_NAME, q + ) + encoded_q = cb_env.get_encoded_query(search_query) + assert encoded_q['query'] == {'start': '2024-01-01', + 'end': '2024-12-01', + 'inclusive_start': True, + 'inclusive_end': True} + + q = search.DateRangeQuery('2024-01-01', '2024-12-01', inclusive_start=True, inclusive_end=True) + search_query = search.SearchQueryBuilder.create_search_query_object( + cb_env.TEST_INDEX_NAME, q + ) + encoded_q = cb_env.get_encoded_query(search_query) + assert encoded_q['query'] == {'start': '2024-01-01', + 'end': '2024-12-01', + 'inclusive_start': True, + 'inclusive_end': True} + def test_disjunction_query(self, cb_env): q = search.DisjunctionQuery() assert q.min == 1 @@ -439,6 +460,27 @@ def test_numrange_query(self, cb_env): encoded_q = cb_env.get_encoded_query(search_query) assert encoded_q['query'] == {'min': 0.1} + # deprecated min_inclusive & max_inclusive + q = search.NumericRangeQuery(0.1, 0.9, min_inclusive=True, max_inclusive=True) + search_query = search.SearchQueryBuilder.create_search_query_object( + cb_env.TEST_INDEX_NAME, q + ) + encoded_q = cb_env.get_encoded_query(search_query) + assert encoded_q['query'] == {'min': 0.1, + 'max': 0.9, + 'inclusive_min': True, + 'inclusive_max': True} + + q = search.NumericRangeQuery(0.1, 0.9, inclusive_min=True, inclusive_max=True) + search_query = search.SearchQueryBuilder.create_search_query_object( + cb_env.TEST_INDEX_NAME, q + ) + encoded_q = cb_env.get_encoded_query(search_query) + assert encoded_q['query'] == {'min': 0.1, + 'max': 0.9, + 'inclusive_min': True, + 'inclusive_max': True} + def test_params_base(self, cb_env, base_query_opts): q, base_opts = base_query_opts opts = SearchOptions() @@ -820,28 +862,65 @@ def test_termrange_query(self, cb_env): cb_env.TEST_INDEX_NAME, q ) encoded_q = cb_env.get_encoded_query(search_query) - assert encoded_q['query'] == {'start': '', 'end': ''} + assert encoded_q['query'] == {'min': '', 'max': ''} q = search.TermRangeQuery('startTerm', 'endTerm') search_query = search.SearchQueryBuilder.create_search_query_object( cb_env.TEST_INDEX_NAME, q ) encoded_q = cb_env.get_encoded_query(search_query) - assert encoded_q['query'] == {'start': 'startTerm', 'end': 'endTerm'} + assert encoded_q['query'] == {'min': 'startTerm', 'max': 'endTerm'} + # deprecated end q = search.TermRangeQuery(end='endTerm') search_query = search.SearchQueryBuilder.create_search_query_object( cb_env.TEST_INDEX_NAME, q ) encoded_q = cb_env.get_encoded_query(search_query) - assert encoded_q['query'] == {'end': 'endTerm'} + assert encoded_q['query'] == {'max': 'endTerm'} + q = search.TermRangeQuery(max='endTerm') + search_query = search.SearchQueryBuilder.create_search_query_object( + cb_env.TEST_INDEX_NAME, q + ) + encoded_q = cb_env.get_encoded_query(search_query) + assert encoded_q['query'] == {'max': 'endTerm'} + + # deprecated start q = search.TermRangeQuery(start='startTerm') search_query = search.SearchQueryBuilder.create_search_query_object( cb_env.TEST_INDEX_NAME, q ) encoded_q = cb_env.get_encoded_query(search_query) - assert encoded_q['query'] == {'start': 'startTerm'} + assert encoded_q['query'] == {'min': 'startTerm'} + + q = search.TermRangeQuery(min='startTerm') + search_query = search.SearchQueryBuilder.create_search_query_object( + cb_env.TEST_INDEX_NAME, q + ) + encoded_q = cb_env.get_encoded_query(search_query) + assert encoded_q['query'] == {'min': 'startTerm'} + + # deprecated start_inclusive & end_inclusive + q = search.TermRangeQuery('startTerm', 'endTerm', start_inclusive=True, end_inclusive=True) + search_query = search.SearchQueryBuilder.create_search_query_object( + cb_env.TEST_INDEX_NAME, q + ) + encoded_q = cb_env.get_encoded_query(search_query) + assert encoded_q['query'] == {'min': 'startTerm', + 'max': 'endTerm', + 'inclusive_min': True, + 'inclusive_max': True} + + q = search.TermRangeQuery('startTerm', 'endTerm', inclusive_min=True, inclusive_max=True) + search_query = search.SearchQueryBuilder.create_search_query_object( + cb_env.TEST_INDEX_NAME, q + ) + encoded_q = cb_env.get_encoded_query(search_query) + assert encoded_q['query'] == {'min': 'startTerm', + 'max': 'endTerm', + 'inclusive_min': True, + 'inclusive_max': True} def test_wildcard_query(self, cb_env): exp_json = { From 2c49a86db3bfb704b0884b3aca83f31eb613c6ec Mon Sep 17 00:00:00 2001 From: Jared Casey Date: Mon, 14 Apr 2025 20:48:02 -0600 Subject: [PATCH 6/8] PYCBC-1679: Disable console logger if file logger specified Changes ======= * If the `PYCBC_LOG_FILE` environment variable is set (enabling the C++ core file logger), the C++ console logger will be disabled. * If the file logger is enabled, the console logger may also be enabled if the `PYCBC_ENABLE_CONSOLE` environment variable is set. Change-Id: I9288191a5776472ee9ad754cae7a51c3878bdd37 Reviewed-on: https://review.couchbase.org/c/couchbase-python-client/+/226346 Reviewed-by: Dimitris Christodoulou Tested-by: Build Bot --- couchbase/__init__.py | 5 ++++- src/logger.cxx | 15 +++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/couchbase/__init__.py b/couchbase/__init__.py index 756b9b47..567e72ab 100644 --- a/couchbase/__init__.py +++ b/couchbase/__init__.py @@ -146,7 +146,10 @@ def configure_console_logger(): if log_level: log_file = os.getenv('PYCBC_LOG_FILE', None) if log_file: - _PYCBC_LOGGER.create_logger(level=log_level.lower(), filename=log_file) + enable_console_logging = 0 if os.getenv('PYCBC_ENABLE_CONSOLE', None) is None else 1 + _PYCBC_LOGGER.create_logger(level=log_level.lower(), + filename=log_file, + enable_console=enable_console_logging) else: _PYCBC_LOGGER.create_logger(level=log_level.lower()) logging.getLogger().debug(get_metadata(as_str=True)) diff --git a/src/logger.cxx b/src/logger.cxx index ba80653a..6d2dd998 100644 --- a/src/logger.cxx +++ b/src/logger.cxx @@ -71,10 +71,16 @@ pycbc_logger__create_logger__(PyObject* self, PyObject* args, PyObject* kwargs) auto logger = reinterpret_cast(self); char* log_level = nullptr; char* log_filename = nullptr; - const char* kw_list[] = { "level", "filename", nullptr }; - const char* kw_format = "s|s"; - if (!PyArg_ParseTupleAndKeywords( - args, kwargs, kw_format, const_cast(kw_list), &log_level, &log_filename)) { + int enable_console = 0; + const char* kw_list[] = { "level", "filename", "enable_console", nullptr }; + const char* kw_format = "s|si"; + if (!PyArg_ParseTupleAndKeywords(args, + kwargs, + kw_format, + const_cast(kw_list), + &log_level, + &log_filename, + &enable_console)) { pycbc_set_python_exception(PycbcError::InvalidArgument, __FILE__, __LINE__, @@ -103,6 +109,7 @@ pycbc_logger__create_logger__(PyObject* self, PyObject* args, PyObject* kwargs) couchbase::core::logger::configuration configuration{}; configuration.filename = std::string{ log_filename }; configuration.log_level = level; + configuration.console = enable_console > 0; couchbase::core::logger::create_file_logger(configuration); logger->is_file_logger = true; } else { From 5beb2492cedad55ff9948546a175505ccd4dc08a Mon Sep 17 00:00:00 2001 From: Jared Casey Date: Sun, 11 May 2025 19:37:17 -0600 Subject: [PATCH 7/8] PYCBC-1676: Update couchbase query operations example to use blocking API Change-Id: I5fe66a7d185bf3e56056a941a34ed0cc03ac325c Reviewed-on: https://review.couchbase.org/c/couchbase-python-client/+/227599 Tested-by: Build Bot Reviewed-by: Dimitris Christodoulou --- .../couchbase/couchbase_query_operations.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/examples/couchbase/couchbase_query_operations.py b/examples/couchbase/couchbase_query_operations.py index 3a6af9f5..62d66275 100644 --- a/examples/couchbase/couchbase_query_operations.py +++ b/examples/couchbase/couchbase_query_operations.py @@ -1,10 +1,10 @@ import uuid -from acouchbase.cluster import Cluster, get_event_loop from couchbase.auth import PasswordAuthenticator # **DEPRECATED**, import ALL options from `couchbase.options` -from couchbase.cluster import (ClusterOptions, +from couchbase.cluster import (Cluster, + ClusterOptions, QueryOptions, QueryScanConsistency) from couchbase.exceptions import ParsingFailedException @@ -15,9 +15,9 @@ # from couchbase.n1ql import QueryScanConsistency -async def main(): - cluster = await Cluster.connect('couchbase://localhost', - ClusterOptions(PasswordAuthenticator('Administrator', 'password'))) +def main(): + cluster = Cluster.connect('couchbase://localhost', + ClusterOptions(PasswordAuthenticator('Administrator', 'password'))) bucket = cluster.bucket("travel-sample") collection = bucket.default_collection() @@ -26,7 +26,7 @@ async def main(): result = cluster.query( "SELECT * FROM `travel-sample` LIMIT 10;", QueryOptions(metrics=True)) - async for row in result.rows(): + for row in result.rows(): print(f'Found row: {row}') metrics = result.metadata().metrics() @@ -39,27 +39,27 @@ async def main(): # positional params q_str = "SELECT ts.* FROM `travel-sample` ts WHERE ts.`type`=$1 LIMIT 10" result = cluster.query(q_str, "hotel") - rows = [r async for r in result] + rows = [r for r in result] # positional params via QueryOptions result = cluster.query(q_str, QueryOptions(positional_parameters=["hotel"])) - rows = [r async for r in result] + rows = [r for r in result] # named params q_str = "SELECT ts.* FROM `travel-sample` ts WHERE ts.`type`=$doc_type LIMIT 10" result = cluster.query(q_str, doc_type='hotel') - rows = [r async for r in result] + rows = [r for r in result] # name params via QueryOptions result = cluster.query(q_str, QueryOptions(named_parameters={'doc_type': 'hotel'})) - rows = [r async for r in result] + rows = [r for r in result] # iterate over result/rows q_str = "SELECT ts.* FROM `travel-sample` ts WHERE ts.`type`='airline' LIMIT 10" result = cluster.query(q_str) # iterate over rows - async for row in result: + for row in result: # each row is an serialized JSON object name = row["name"] callsign = row["callsign"] @@ -67,7 +67,8 @@ async def main(): # query metrics result = cluster.query("SELECT 1=1", QueryOptions(metrics=True)) - await result.execute() + # ignore results + result.execute() print("Execution time: {}".format( result.metadata().metrics().execution_time())) @@ -76,7 +77,7 @@ async def main(): result = cluster.query( "SELECT ts.* FROM `travel-sample` ts WHERE ts.`type`='airline' LIMIT 10", QueryOptions(scan_consistency=QueryScanConsistency.REQUEST_PLUS)) - rows = [r async for r in result] + rows = [r for r in result] # Read your own writes new_airline = { @@ -89,35 +90,34 @@ async def main(): "type": "airline" } - res = await collection.upsert( - "airline_{}".format(new_airline["id"]), new_airline) + res = collection.upsert("airline_{}".format(new_airline["id"]), new_airline) ms = MutationState(res) result = cluster.query( "SELECT ts.* FROM `travel-sample` ts WHERE ts.`type`='airline' LIMIT 10", QueryOptions(consistent_with=ms)) - rows = [r async for r in result] + rows = [r for r in result] # client context id result = cluster.query( "SELECT ts.* FROM `travel-sample` ts WHERE ts.`type`='hotel' LIMIT 10", QueryOptions(client_context_id="user-44{}".format(uuid.uuid4()))) - rows = [r async for r in result] + rows = [r for r in result] # read only result = cluster.query( "SELECT ts.* FROM `travel-sample` ts WHERE ts.`type`='hotel' LIMIT 10", QueryOptions(read_only=True)) - rows = [r async for r in result] + rows = [r for r in result] agent_scope = bucket.scope("inventory") result = agent_scope.query( "SELECT a.* FROM `airline` a WHERE a.country=$country LIMIT 10", country='France') - rows = [r async for r in result] + rows = [r for r in result] + if __name__ == "__main__": - loop = get_event_loop() - loop.run_until_complete(main()) + main() From aa62c0ef4d0c6f9dee7b49dfa3cde261907e063f Mon Sep 17 00:00:00 2001 From: Jared Casey Date: Wed, 7 May 2025 15:33:49 -0600 Subject: [PATCH 8/8] PYCBC-1685: Remove timeout logic when waiting for C++ core HTTP response Motivation ========== Since the SDK uses a timeout (the same it passes down to the C++ core) when waiting for streaming results from the C++ core, it is possible for it to raise a `StopIteration` incorrectly. Instead the SDK should continue to wait for the C++ core to return a response. Modification ============ * Remove logic to raise a `StopIteration` exception when waiting for a response for streaming operations. Instead a message is logged (at the DEBUG level) to provide potentially useful information. * Update tests to confirm changes. Change-Id: I2f65f7b5ae1c467f5ee0ed6339f8ecce264f6839 Reviewed-on: https://review.couchbase.org/c/couchbase-python-client/+/227435 Tested-by: Build Bot Reviewed-by: Dimitris Christodoulou --- acouchbase/tests/analytics_t.py | 26 ++++++++++++++++------ acouchbase/tests/query_t.py | 38 ++++++++++++++------------------- couchbase/tests/analytics_t.py | 25 ++++++++++++++++------ couchbase/tests/query_t.py | 18 ++++++++++------ src/result.cxx | 8 +------ src/result.hxx | 19 +++++++++++++---- 6 files changed, 82 insertions(+), 52 deletions(-) diff --git a/acouchbase/tests/analytics_t.py b/acouchbase/tests/analytics_t.py index 117f6f4b..5eb6e2a6 100644 --- a/acouchbase/tests/analytics_t.py +++ b/acouchbase/tests/analytics_t.py @@ -172,6 +172,7 @@ class AnalyticsTestSuite: TEST_MANIFEST = [ 'test_analytics_metadata', 'test_analytics_with_metrics', + 'test_query_large_result_set', 'test_query_named_parameters', 'test_query_named_parameters_no_options', 'test_query_named_parameters_override', @@ -229,6 +230,21 @@ async def test_analytics_with_metrics(self, cb_env): assert isinstance(metrics.processed_objects(), UnsignedInt64) assert metrics.error_count() == UnsignedInt64(0) + @pytest.mark.asyncio + async def test_query_large_result_set(self, cb_env): + # Prior to PYCBC-1685, this would raise a StopIteration b/c the timeout was + # reached on the Python side prior to the C++ core returning the result set. + # It is difficult to determine the timeout value in the Jenkins environment, + # so allow an AmbiguousTimeoutException. + count = 100000 + statement = f'SELECT {{"x1": 1, "x2": 2, "x3": 3}} FROM range(1, {count}) r;' + try: + result = cb_env.cluster.analytics_query(statement, timeout=timedelta(seconds=2)) + row_count = [1 async for _ in result.rows()] + assert len(row_count) == count + except AmbiguousTimeoutException: + pass + @pytest.mark.asyncio async def test_query_named_parameters(self, cb_env): result = cb_env.cluster.analytics_query(f'SELECT * FROM `{cb_env.DATASET_NAME}` WHERE `type` = $atype LIMIT 1', @@ -291,19 +307,17 @@ async def test_query_timeout(self, cb_env): username, pw = cb_env.config.get_username_and_pw() auth = PasswordAuthenticator(username, pw) # Prior to PYCBC-1521, this test would fail as each request would override the cluster level analytics_timeout. - # If a timeout was not provided in the request, the default 75s timeout would be used. PYCBC-1521 corrects - # this behavior so this test will pass as we are essentially forcing an AmbiguousTimeoutException because - # we are setting the cluster level analytics_timeout such a small value. - timeout_opts = ClusterTimeoutOptions(analytics_timeout=timedelta(milliseconds=1)) + # If a timeout was not provided in the request, the default 75s timeout would be used. + timeout_opts = ClusterTimeoutOptions(analytics_timeout=timedelta(seconds=1)) cluster = await Cluster.connect(f'{conn_string}', ClusterOptions(auth, timeout_options=timeout_opts)) # don't need to do this except for older server versions _ = cluster.bucket(f'{cb_env.bucket.name}') - q_str = f'SELECT * FROM `{cb_env.DATASET_NAME}` LIMIT 1;' + q_str = 'SELECT sleep("some value", 1500) AS some_field;' with pytest.raises(AmbiguousTimeoutException): res = cluster.analytics_query(q_str) [r async for r in res.rows()] # if we override the timeout w/in the request the query should succeed. - res = cluster.analytics_query(q_str, timeout=timedelta(seconds=10)) + res = cluster.analytics_query(q_str, timeout=timedelta(seconds=2)) rows = [r async for r in res.rows()] assert len(rows) > 0 diff --git a/acouchbase/tests/query_t.py b/acouchbase/tests/query_t.py index 79c350d9..9bfbacc6 100644 --- a/acouchbase/tests/query_t.py +++ b/acouchbase/tests/query_t.py @@ -205,19 +205,6 @@ class QueryTestSuite: 'test_simple_query_without_options_with_kwargs_positional_params', ] - @pytest_asyncio.fixture(name='setup_udf') - async def setup_teardown_udf(self, cb_env): - EnvironmentFeatures.check_if_feature_supported('query_user_defined_functions', - cb_env.server_version_short, - cb_env.mock_server_type) - await AsyncTestEnvironment.try_n_times(3, - 1, - cb_env.load_udf) - yield - await AsyncTestEnvironment.try_n_times(3, - 1, - cb_env.drop_udf) - @pytest.fixture(scope='class') def check_preserve_expiry_supported(self, cb_env): EnvironmentFeatures.check_if_feature_supported('preserve_expiry', @@ -243,11 +230,14 @@ async def test_mixed_positional_parameters(self, cb_env): QueryOptions(positional_parameters=['xgfflq']), f'{cb_env.get_batch_id()}') await cb_env.assert_rows(result, 1) - @pytest.mark.usefixtures('setup_udf') @pytest.mark.asyncio async def test_non_blocking(self, cb_env): async def run_query(cluster, idx): - result = cluster.query("EXECUTE FUNCTION loop(1000000000)") + slow_query = ['SELECT COUNT (1) AS c FROM', + 'ARRAY_RANGE(0,100) AS d1,' + 'ARRAY_RANGE(0,100) AS d2,' + 'ARRAY_RANGE(0,100) AS d3'] + result = cluster.query(' '.join(slow_query)) rows = [] async for r in result: rows.append(r) @@ -363,6 +353,8 @@ async def test_query_ryow(self, cb_env): @pytest.mark.flaky(reruns=5, reruns_delay=1) @pytest.mark.asyncio async def test_query_timeout(self, cb_env): + if cb_env.server_version_short < 7.1: + pytest.skip("Query used in test only available on server versions >= 7.1") from acouchbase.cluster import Cluster from couchbase.auth import PasswordAuthenticator from couchbase.options import ClusterOptions, ClusterTimeoutOptions @@ -370,18 +362,20 @@ async def test_query_timeout(self, cb_env): username, pw = cb_env.config.get_username_and_pw() auth = PasswordAuthenticator(username, pw) # Prior to PYCBC-1521, this test would fail as each request would override the cluster level query_timeout. - # If a timeout was not provided in the request, the default 75s timeout would be used. PYCBC-1521 corrects - # this behavior so this test will pass as we are essentially forcing an AmbiguousTimeoutException because - # we are setting the cluster level query_timeout such a small value. - timeout_opts = ClusterTimeoutOptions(query_timeout=timedelta(milliseconds=1)) + # If a timeout was not provided in the request, the default 75s timeout would be used. + timeout_opts = ClusterTimeoutOptions(query_timeout=timedelta(seconds=1.5)) cluster = await Cluster.connect(f'{conn_string}', ClusterOptions(auth, timeout_options=timeout_opts)) # don't need to do this except for older server versions _ = cluster.bucket(f'{cb_env.bucket.name}') - q_str = f'SELECT * FROM `{cb_env.bucket.name}` LIMIT 10;' + slow_query = ' '.join(['SELECT COUNT (1) AS c FROM', + 'ARRAY_RANGE(0,110) AS d1,' + 'ARRAY_RANGE(0,110) AS d2,' + 'ARRAY_RANGE(0,110) AS d3']) with pytest.raises(AmbiguousTimeoutException): - await cluster.query(q_str).execute() + await cluster.query(slow_query).execute() # If we override the timeout w/in the request the query should succeed. - rows = await cluster.query(q_str, timeout=timedelta(seconds=10)).execute() + # NOTE: a timeout of < 10s is most likely acceptable, but for the Jenkins environment we give plenty of room. + rows = await cluster.query(slow_query, timeout=timedelta(seconds=30)).execute() assert len(rows) > 0 @pytest.mark.asyncio diff --git a/couchbase/tests/analytics_t.py b/couchbase/tests/analytics_t.py index 7a9a60ac..d2bef205 100644 --- a/couchbase/tests/analytics_t.py +++ b/couchbase/tests/analytics_t.py @@ -186,6 +186,7 @@ class AnalyticsTestSuite: 'test_analytics_metadata', 'test_analytics_query_in_thread', 'test_analytics_with_metrics', + 'test_query_large_result_set', 'test_query_named_parameters', 'test_query_named_parameters_no_options', 'test_query_named_parameters_override', @@ -264,6 +265,20 @@ def test_analytics_with_metrics(self, cb_env): assert isinstance(metrics.processed_objects(), UnsignedInt64) assert metrics.error_count() == UnsignedInt64(0) + def test_query_large_result_set(self, cb_env): + # Prior to PYCBC-1685, this would raise a StopIteration b/c the timeout was + # reached on the Python side prior to the C++ core returning the result set. + # It is difficult to determine the timeout value in the Jenkins environment, + # so allow an AmbiguousTimeoutException. + count = 100000 + statement = f'SELECT {{"x1": 1, "x2": 2, "x3": 3}} FROM range(1, {count}) r;' + try: + result = cb_env.cluster.analytics_query(statement, timeout=timedelta(seconds=2)) + row_count = [1 for _ in result.rows()] + assert len(row_count) == count + except AmbiguousTimeoutException: + pass + def test_query_named_parameters(self, cb_env): result = cb_env.cluster.analytics_query(f'SELECT * FROM `{cb_env.DATASET_NAME}` WHERE `type` = $atype LIMIT 1', AnalyticsOptions(named_parameters={'atype': 'vehicle'})) @@ -318,20 +333,18 @@ def test_query_timeout(self, cb_env): username, pw = cb_env.config.get_username_and_pw() auth = PasswordAuthenticator(username, pw) # Prior to PYCBC-1521, this test would fail as each request would override the cluster level analytics_timeout. - # If a timeout was not provided in the request, the default 75s timeout would be used. PYCBC-1521 corrects - # this behavior so this test will pass as we are essentially forcing an AmbiguousTimeoutException because - # we are setting the cluster level analytics_timeout such a small value. - timeout_opts = ClusterTimeoutOptions(analytics_timeout=timedelta(milliseconds=1)) + # If a timeout was not provided in the request, the default 75s timeout would be used. + timeout_opts = ClusterTimeoutOptions(analytics_timeout=timedelta(seconds=1)) cluster = Cluster.connect(f'{conn_string}', ClusterOptions(auth, timeout_options=timeout_opts)) # don't need to do this except for older server versions _ = cluster.bucket(f'{cb_env.bucket.name}') - q_str = f'SELECT * FROM `{cb_env.DATASET_NAME}` LIMIT 1;' + q_str = 'SELECT sleep("some value", 1500) AS some_field;' with pytest.raises(AmbiguousTimeoutException): res = cluster.analytics_query(q_str) [r for r in res.rows()] # if we override the timeout w/in the request the query should succeed. - res = cluster.analytics_query(q_str, timeout=timedelta(seconds=10)) + res = cluster.analytics_query(q_str, timeout=timedelta(seconds=2)) rows = [r for r in res.rows()] assert len(rows) > 0 diff --git a/couchbase/tests/query_t.py b/couchbase/tests/query_t.py index 51fabd39..a91db332 100644 --- a/couchbase/tests/query_t.py +++ b/couchbase/tests/query_t.py @@ -336,6 +336,8 @@ def test_query_raw_options(self, cb_env): # creating a new connection, allow retries @pytest.mark.flaky(reruns=5, reruns_delay=1) def test_query_timeout(self, cb_env): + if cb_env.server_version_short < 7.1: + pytest.skip("Query used in test only available on server versions >= 7.1") from couchbase.auth import PasswordAuthenticator from couchbase.cluster import Cluster from couchbase.options import ClusterOptions, ClusterTimeoutOptions @@ -343,19 +345,21 @@ def test_query_timeout(self, cb_env): username, pw = cb_env.config.get_username_and_pw() auth = PasswordAuthenticator(username, pw) # Prior to PYCBC-1521, this test would fail as each request would override the cluster level query_timeout. - # If a timeout was not provided in the request, the default 75s timeout would be used. PYCBC-1521 corrects - # this behavior so this test will pass as we are essentially forcing an AmbiguousTimeoutException because - # we are setting the cluster level query_timeout such a small value. - timeout_opts = ClusterTimeoutOptions(query_timeout=timedelta(milliseconds=1)) + # If a timeout was not provided in the request, the default 75s timeout would be used. + timeout_opts = ClusterTimeoutOptions(query_timeout=timedelta(seconds=1.5)) cluster = Cluster.connect(f'{conn_string}', ClusterOptions(auth, timeout_options=timeout_opts)) # don't need to do this except for older server versions _ = cluster.bucket(f'{cb_env.bucket.name}') - q_str = f'SELECT * FROM `{cb_env.bucket.name}` LIMIT 10;' + slow_query = ' '.join(['SELECT COUNT (1) AS c FROM', + 'ARRAY_RANGE(0,110) AS d1,' + 'ARRAY_RANGE(0,110) AS d2,' + 'ARRAY_RANGE(0,110) AS d3']) with pytest.raises(AmbiguousTimeoutException): - cluster.query(q_str).execute() + cluster.query(slow_query).execute() # If we override the timeout w/in the request the query should succeed. - rows = cluster.query(q_str, timeout=timedelta(seconds=10)).execute() + # NOTE: a timeout of < 10s is most likely acceptable, but for the Jenkins environment we give plenty of room. + rows = cluster.query(slow_query, timeout=timedelta(seconds=30)).execute() assert len(rows) > 0 def test_query_ryow(self, cb_env): diff --git a/src/result.cxx b/src/result.cxx index 15d3fbbe..60f9f0ea 100644 --- a/src/result.cxx +++ b/src/result.cxx @@ -230,13 +230,7 @@ streamed_result_iternext(PyObject* self) Py_BEGIN_ALLOW_THREADS row = s_res->rows->get(s_res->timeout_ms); Py_END_ALLOW_THREADS } - - if (row != nullptr) { - return row; - } else { - PyErr_SetString(PyExc_StopIteration, "Timeout occurred waiting for next item in queue."); - return nullptr; - } + return row; } static PyObject* diff --git a/src/result.hxx b/src/result.hxx index c738d17c..dc6299b2 100644 --- a/src/result.hxx +++ b/src/result.hxx @@ -49,10 +49,21 @@ public: std::unique_lock lock(mut_); while (rows_.empty()) { - auto now = std::chrono::system_clock::now(); - if (cv_.wait_until(lock, now + timeout_ms) == std::cv_status::timeout) { - // this will cause iternext to return nullptr, which stops iteration - return nullptr; + if (cv_.wait_for(lock, timeout_ms) == std::cv_status::timeout) { + // This timeout (e.g. timeout_ms) is the same timeout we pass to the C++ core. + // If we timeout on the Python side this means: + // - Edge case where the C++ core is about to timeout. We want to use the C++ core error + // details, + // so wait a little longer to get the C++ core timeout. + // - The result set is large and since we don't have streaming support yet, we have to + // wait for + // the entire result set to be returned. Again we should wait until we get the results. + // PYCBC-1685: Instead of trying to do some tricky error handling we instead wait for the + // C++ core results and log a message that can provide insight to users about the SDK + // behavior. + CB_LOG_DEBUG( + "PYCBC: No results received from C++ core after {}ms. Continue to wait for results.", + timeout_ms.count()); } }