diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index bc0724e4a..df790120a 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -33,23 +33,23 @@ jobs: - "3.0.2" - "3.5.2" - "3.9.0" + - "4.0.0" python: - - "3.12" + - "3.13" include: #- python: "pypy3.9" # kafka: "2.6.0" # experimental: true - #- python: "~3.13.0-0" - # kafka: "2.6.0" - # experimental: true - python: "3.8" - kafka: "3.9.0" + kafka: "4.0.0" - python: "3.9" - kafka: "3.9.0" + kafka: "4.0.0" - python: "3.10" - kafka: "3.9.0" + kafka: "4.0.0" - python: "3.11" - kafka: "3.9.0" + kafka: "4.0.0" + - python: "3.12" + kafka: "4.0.0" steps: - uses: actions/checkout@v4 @@ -65,7 +65,6 @@ jobs: sudo apt install -y libsnappy-dev libzstd-dev python -m pip install --upgrade pip pip install -r requirements-dev.txt - pip install tox-gh-actions - name: Pylint run: pylint --recursive=y --errors-only --exit-zero kafka test - name: Setup java @@ -73,8 +72,20 @@ jobs: with: distribution: temurin java-version: 23 - - name: Pull Kafka release + - name: Restore cached kafka releases + id: cache-servers-dist-restore + uses: actions/cache/restore@v4 + with: + path: servers/dist + key: servers-dist-${{ matrix.kafka }} + - name: Install Kafka release run: make servers/${{ matrix.kafka }}/kafka-bin + - name: Update kafka release cache + id: cache-servers-dist-save + uses: actions/cache/save@v4 + with: + path: servers/dist + key: ${{ steps.cache-servers-dist-restore.outputs.cache-primary-key }} - name: Pytest run: make test env: diff --git a/CHANGES.md b/CHANGES.md index ee28a84e7..372aebfc6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,309 @@ +# 2.2.13 (June 20, 2025) + +Fixes +* Use client.await_ready() to simplify blocking wait and add timeout to admin client (#2648) +* Fixup import style in example.py + +Documentation +* update README kafka version badge to include 4.0 + +# 2.2.12 (June 18, 2025) + +Fixes +* Fix construction of final GSSAPI authentication message (#2647) +* Avoid RuntimeError on mutated `_completed_fetches` deque in consumer fetcher (#2646) +* Throw exception on invalid bucket type (#2642) + + +# 2.2.11 (June 5, 2025) + +Fixes +* Do not ignore metadata response for single topic with error (#2640) +* Fix decoding bug in AWS_MSK_IAM mechanism (#2639) +* Add synchronized decorator; add lock to subscription state (#2636) +* Update build links in documentation (#2634) + +# 2.2.10 (May 22, 2025) + +Fixes +* Set the current host in the SASL configs (#2633) +* Fix sasl gssapi plugin: do not rely on `client_ctx.complete` in `auth_bytes()` (#2631) + +# 2.2.9 (May 21, 2025) + +Fixes +* Do not reset fetch positions if offset commit fetch times out (#2629) + +Logging / Error Messages +* More / updated debug logging for coordinator / consumer (#2630) + +# 2.2.8 (May 20, 2025) + +Fixes +* Wait for next heartbeat in thread loop; check for connected coordinator (#2622) +* Acquire client lock in heartbeat thread before sending requests (#2620) + +Logging / Error Messages +* Log all SyncGroupResponse errors as info+ +* More coordinator / heartbeat logging (#2621) +* Fix timeout seconds error message in KafkaProducer (#2627) +* Update offset commit error handling; use RebalanceInProgressError if applicable (#2623) + +# 2.2.7 (May 13, 2025) + +Fixes +* Minor Heartbeat updates: catch more exceptions / log configuration / raise KafkaConfigurationError (#2618) + +# 2.2.6 (May 8, 2025) + +Fixes +* Only disable heartbeat thread once at beginning of join-group (#2617) + +# 2.2.5 (May 8, 2025) + +Fixes +* Fix producer busy loop with no pending batches (#2616) +* Fixup py27 fetcher test failure + +# 2.2.4 (May 3, 2025) + +Fixes +* Do not `reset_generation` after RebalanceInProgressError; improve CommitFailed error messages (#2614) +* Fix KafkaConsumer.poll() with zero timeout (#2613) +* Fix Fetch._reset_offsets_async() KeyError when fetching from multiple nodes (#2612) + +# 2.2.3 (May 1, 2025) + +Fixes +* Ignore leading SECURITY_PROTOCOL:// in bootstrap_servers (#2608) +* Only create fetch requests for ready nodes (#2607) + +# 2.2.2 (Apr 30, 2025) + +Fixes +* Fix lint errors + +# 2.2.1 (Apr 29, 2025) + +Fixes +* Always try ApiVersionsRequest v0, even on broker disconnect (#2603) +* Fix SubscriptionState AttributeError in KafkaConsumer (#2599) + +Documentation +* Add transactional examples to docs + +# 2.2.0 (Apr 28, 2025) + +KafkaProducer +* KIP-98: Add idempotent producer support (#2569) +* KIP-98: Transactional Producer (#2587) +* KIP-98: Add offsets support to transactional KafkaProducer (#2590) +* Prefix producer logs w/ client id and transactional id (#2591) +* KAFKA-5429: Ignore produce response if batch was previously aborted +* KIP-91: KafkaProducer `delivery_timeout_ms` +* Default retries -> infinite +* Expand KafkaProducer docstring w/ idempotent and transactional notes +* RecordAccumulator: Use helper method to get/set `_tp_locks`; get dq with lock in reenqueue() + +KafkaConsumer +* KIP-98: Add Consumer support for `READ_COMMITTED` (#2582) +* KIP-394: handle `MEMBER_ID_REQUIRED` error w/ second join group request (#2598) +* KAFKA-5078: Defer fetch record exception if iterator has already moved across a valid record +* KAFKA-5075: Defer consumer fetcher exception if fetch position has already increased +* KAFKA-4937: Batch offset fetches in the Consumer +* KAFKA-4547: Avoid resetting paused partitions to committed offsets +* KAFKA-6397: Consumer should not block setting positions of unavailable partitions (#2593) + +Potentially Breaking Changes (internal) +* Rename CorruptRecordException -> CorruptRecordError +* Rename Coordinator errors to generic not group (#2585) +* Rename `ClusterMetadata.add_group_coordinator` -> `add_coordinator` + support txn type +* Use SaslAuthenticationFailedError in kafka.conn connection failure; Drop unused AuthenticationFailedError +* Remove old/unused errors; reorder; KafkaTimeout -> retriable +* Drop `log_start_offset` from producer RecordMetadata + +Internal +* MemoryRecords iterator; MemoryRecordsBuilder records() helper +* Convert `DefaultRecordsBuilder.size_in_bytes` to classmethod + +Fixes +* Resolve datetime deprecation warnings (#2589) +* Avoid self refcount in log messages; test thread close on all pythons +* Fix client.wakeup() race from producer/sender close +* Fix ElectionNotNeededError handling in admin client + +Tests +* Move integration tests and fixtures to test/integration/; simplify unit fixtures (#2588) +* Expand Sender test coverage (#2586) +* py2 test fixups +* Drop unused KafkaClient import from `test_fetcher` + +# 2.1.6 (May 2, 2025) + +Fixes +* Only create fetch requests for ready nodes (#2607) + +# 2.1.5 (Apr 4, 2025) + +Fixes +* Fix python2.7 errors (#2578) + +Improvements +* Move benchmark scripts to kafka.benchmarks module (#2584) +* Use __slots__ for metrics (#2583) +* Pass `metrics_enabled=False` to disable metrics (#2581) +* Drop unused kafka.producer.buffer / SimpleBufferPool (#2580) +* Raise UnsupportedVersionError from coordinator (#2579) + +# 2.1.4 (Mar 28, 2025) + +Fixes +* Dont block pending FetchRequests when Metadata update requested (#2576) +* Fix MetadataRequest for no topics (#2573) +* Send final error byte x01 on Sasl OAuth failure (#2572) +* Reset SASL state on disconnect (#2571) +* Try import new Sequence before old to avoid DeprecationWarning + +Improvements +* Update Makefile default to 4.0 broker; add make fixture +* Improve connection state logging (#2574) + +# 2.1.3 (Mar 25, 2025) + +Fixes +* Fix crash when switching to closest compatible api_version in KafkaClient (#2567) +* Fix maximum version to send an OffsetFetchRequest in KafkaAdminClient (#2563) +* Return empty set from consumer.partitions_for_topic when topic not found (#2556) + +Improvements +* KIP-511: Use ApiVersions v4 on initial connect w/ client_software_name + version (#2558) +* KIP-74: Manage assigned partition order in consumer (#2562) +* KIP-70: Auto-commit offsets on consumer.unsubscribe(), defer assignment changes to rejoin (#2560) +* Use SubscriptionType to track topics/pattern/user assignment (#2565) +* Add optional timeout_ms kwarg to consumer.close() (#2564) +* Move ensure_valid_topic_name to kafka.util; use in client and producer (#2561) + +Testing +* Support KRaft / 4.0 brokers in tests (#2559) +* Test older pythons against 4.0 broker + +Compatibility +* Add python 3.13 to compatibility list + +# 2.1.2 (Mar 17, 2025) + +Fixes +* Simplify consumer.poll send fetches logic +* Fix crc validation in consumer / fetcher +* Lazy `_unpack_records` in PartitionRecords to fix premature fetch offset advance in consumer.poll() (#2555) +* Debug log fetch records return; separate offsets update log +* Fix Fetcher retriable error handling (#2554) +* Use six.add_metaclass for py2/py3 compatible abc (#2551) + +Improvements +* Add FetchMetrics class; move topic_fetch_metrics inside aggregator +* DefaultRecordsBatchBuilder: support empty batch +* MemoryRecordsBuilder: support arbitrary offset, skipping offsets +* Add record.validate_crc() for v0/v1 crc checks +* Remove fetcher message_generator / iterator interface +* Add size_in_bytes to ABCRecordBatch and implement for Legacy and Default +* Add magic property to ABCRecord and implement for LegacyRecord + +# 2.1.1 (Mar 16, 2025) + +Fixes +* Fix packaging of 2.1.0 in Fedora: testing requires "pytest-timeout". (#2550) +* Improve connection error handling when try_api_versions_check fails all attempts (#2548) +* Add lock synchronization to Future success/failure (#2549) +* Fix StickyPartitionAssignor encode + +# 2.1.0 (Mar 15, 2025) + +Support Kafka Broker 2.1 API Baseline +* Add baseline leader_epoch support for ListOffsets v4 / FetchRequest v10 (#2511) +* Support OffsetFetch v5 / OffsetCommit v6 (2.1 baseline) (#2505) +* Support 2.1 baseline consumer group apis (#2503) +* Support FindCoordinatorRequest v2 in consumer and admin client (#2502) +* Support ListOffsets v3 in consumer (#2501) +* Support Fetch Request/Response v6 in consumer (#2500) +* Add support for Metadata Request/Response v7 (#2497) +* Implement Incremental Fetch Sessions / KIP-227 (#2508) +* Implement client-side connection throttling / KIP-219 (#2510) +* Add KafkaClient.api_version(operation) for best available from api_versions (#2495) + +Consumer +* Timeout coordinator poll / ensure_coordinator_ready / ensure_active_group (#2526) +* Add optional timeout_ms kwarg to remaining consumer/coordinator methods (#2544) +* Check for coordinator.poll failure in KafkaConsumer +* Only mark coordinator dead if connection_delay > 0 (#2530) +* Delay group coordinator until after bootstrap (#2539) +* KAFKA-4160: Ensure rebalance listener not called with coordinator lock (#1438) +* Call default_offset_commit_callback after `_maybe_auto_commit_offsets_async` (#2546) +* Remove legacy/v1 consumer message iterator (#2543) +* Log warning when attempting to list offsets for unknown topic/partition (#2540) +* Add heartbeat thread id to debug logs on start +* Add inner_timeout_ms handler to fetcher; add fallback (#2529) + +Producer +* KafkaProducer: Flush pending records before close() (#2537) +* Raise immediate error on producer.send after close (#2542) +* Limit producer close timeout to 1sec in __del__; use context managers to close in test_producer +* Use NullLogger in producer atexit cleanup +* Attempt to fix metadata race condition when partitioning in producer.send (#2523) +* Remove unused partial KIP-467 implementation (ProduceResponse batch error details) (#2524) + +AdminClient +* Implement perform leader election (#2536) +* Support delete_records (#2535) + +Networking +* Call ApiVersionsRequest during connection, prior to Sasl Handshake (#2493) +* Fake api_versions for old brokers, rename to ApiVersionsRequest, and handle error decoding (#2494) +* Debug log when skipping api_versions request with pre-configured api_version +* Only refresh metadata if connection fails all dns records (#2532) +* Support connections through SOCKS5 proxies (#2531) +* Fix OverflowError when connection_max_idle_ms is 0 or inf (#2538) +* socket.setblocking for eventlet/gevent compatibility +* Support custom per-request timeouts (#2498) +* Include request_timeout_ms in request debug log +* Support client.poll with future and timeout_ms +* mask unused afi var +* Debug log if check_version connection attempt fails + +SASL Modules +* Refactor Sasl authentication with SaslMechanism abstract base class; support SaslAuthenticate (#2515) +* Add SSPI (Kerberos for Windows) authentication mechanism (#2521) +* Support AWS_MSK_IAM authentication (#2519) +* Cleanup sasl mechanism configuration checks; fix gssapi bugs; add sasl_kerberos_name config (#2520) +* Move kafka.oauth.AbstractTokenProvider -> kafka.sasl.oauth.AbstractTokenProvider (#2525) + +Testing +* Bump default python to 3.13 in CI tests (#2541) +* Update pytest log_format: use logger instead of filename; add thread id +* Improve test_consumer_group::test_group logging before group stabilized (#2534) +* Limit test duration to 5mins w/ pytest-timeout +* Fix external kafka/zk fixtures for testing (#2533) +* Disable zookeeper admin server to avoid port conflicts +* Set default pytest log level to debug +* test_group: shorter timeout, more logging, more sleep +* Cache servers/dist in github actions workflow (#2527) +* Remove tox.ini; update testing docs +* Use thread-specific client_id in test_group +* Fix subprocess log warning; specify timeout_ms kwarg in consumer.poll tests +* Only set KAFKA_JVM_PERFORMANCE_OPTS in makefile if unset; add note re: 2.0-2.3 broker testing +* Add kafka command to test.fixtures; raise FileNotFoundError if version not installed + +Documentation +* Improve ClusterMetadata docs re: node_id/broker_id str/int types +* Document api_version_auto_timeout_ms default; override in group tests + +Fixes +* Signal close to metrics expire_loop +* Add kafka.util timeout_ms_fn +* fixup TopicAuthorizationFailedError construction +* Fix lint issues via ruff check (#2522) +* Make the "mock" dependency optional (only used in Python < 3.3). (#2518) + # 2.0.6 (Mar 4, 2025) Networking diff --git a/Makefile b/Makefile index 2df1c6696..30da9cf91 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ SHELL = bash -export KAFKA_VERSION ?= 2.4.0 +export KAFKA_VERSION ?= 4.0.0 DIST_BASE_URL ?= https://archive.apache.org/dist/kafka/ # Required to support testing old kafka versions on newer java releases @@ -21,7 +21,10 @@ lint: pylint --recursive=y --errors-only kafka test test: build-integration - pytest --durations=10 $(PYTESTS) + pytest $(PYTESTS) + +fixture: build-integration + python -m test.integration.fixtures kafka cov-local: build-integration pytest --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka \ @@ -68,6 +71,7 @@ kafka_scala_0_9_0_1=2.11 kafka_scala_0_10_0_0=2.11 kafka_scala_0_10_0_1=2.11 kafka_scala_0_10_1_0=2.11 +kafka_scala_4_0_0=2.13 scala_version=$(if $(SCALA_VERSION),$(SCALA_VERSION),$(if $(kafka_scala_$(subst .,_,$(1))),$(kafka_scala_$(subst .,_,$(1))),2.12)) kafka_artifact_name=kafka_$(call scala_version,$(1))-$(1).$(if $(filter 0.8.0,$(1)),tar.gz,tgz) @@ -95,7 +99,7 @@ servers/%/kafka-bin: servers/dist/$$(call kafka_artifact_name,$$*) | servers/dis if [[ "$*" < "1" ]]; then make servers/patch-libs/$*; fi servers/%/api_versions: servers/$$*/kafka-bin - KAFKA_VERSION=$* python -m test.fixtures get_api_versions >$@ + KAFKA_VERSION=$* python -m test.integration.fixtures get_api_versions >$@ servers/%/messages: servers/$$*/kafka-bin cd servers/$*/ && jar xvf kafka-bin/libs/kafka-clients-$*.jar common/message/ diff --git a/README.rst b/README.rst index 2de04c673..b11868241 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ Kafka Python client ------------------------ -.. image:: https://img.shields.io/badge/kafka-3.9--0.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-4.0--0.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/en/master/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python @@ -94,6 +94,14 @@ that expose basic message attributes: topic, partition, offset, key, and value: for msg in consumer: print (msg.headers) +.. code-block:: python + + # Read only committed messages from transactional topic + consumer = KafkaConsumer(isolation_level='read_committed') + consumer.subscribe(['txn_topic']) + for msg in consumer: + print(msg) + .. code-block:: python # Get consumer metrics @@ -153,6 +161,21 @@ for more details. for i in range(1000): producer.send('foobar', b'msg %d' % i) +.. code-block:: python + + # Use transactions + producer = KafkaProducer(transactional_id='fizzbuzz') + producer.init_transactions() + producer.begin_transaction() + future = producer.send('txn_topic', value=b'yes') + future.get() # wait for successful produce + producer.commit_transaction() # commit the transaction + + producer.begin_transaction() + future = producer.send('txn_topic', value=b'no') + future.get() # wait for successful produce + producer.abort_transaction() # abort the transaction + .. code-block:: python # Include record headers. The format is list of tuples with string key diff --git a/benchmarks/load_example.py b/benchmarks/load_example.py deleted file mode 100755 index eef113e9a..000000000 --- a/benchmarks/load_example.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import threading, logging, time - -from kafka import KafkaConsumer, KafkaProducer - -msg_size = 524288 - -producer_stop = threading.Event() -consumer_stop = threading.Event() - -class Producer(threading.Thread): - big_msg = b'1' * msg_size - - def run(self): - producer = KafkaProducer(bootstrap_servers='localhost:9092') - self.sent = 0 - - while not producer_stop.is_set(): - producer.send('my-topic', self.big_msg) - self.sent += 1 - producer.flush() - - -class Consumer(threading.Thread): - - def run(self): - consumer = KafkaConsumer(bootstrap_servers='localhost:9092', - auto_offset_reset='earliest') - consumer.subscribe(['my-topic']) - self.valid = 0 - self.invalid = 0 - - for message in consumer: - if len(message.value) == msg_size: - self.valid += 1 - else: - self.invalid += 1 - - if consumer_stop.is_set(): - break - - consumer.close() - -def main(): - threads = [ - Producer(), - Consumer() - ] - - for t in threads: - t.start() - - time.sleep(10) - producer_stop.set() - consumer_stop.set() - print('Messages sent: %d' % threads[0].sent) - print('Messages recvd: %d' % threads[1].valid) - print('Messages invalid: %d' % threads[1].invalid) - -if __name__ == "__main__": - logging.basicConfig( - format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', - level=logging.INFO - ) - main() diff --git a/docs/changelog.rst b/docs/changelog.rst index 3216ad8ff..430f8a512 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,397 @@ Changelog ========= +2.2.13 (June 20, 2025) +###################### + +Fixes +----- +* Use client.await_ready() to simplify blocking wait and add timeout to admin client (#2648) +* Fixup import style in example.py + +Documentation +------------- +* update README kafka version badge to include 4.0 + + +2.2.12 (June 18, 2025) +###################### + +Fixes +----- +* Fix construction of final GSSAPI authentication message (#2647) +* Avoid RuntimeError on mutated `_completed_fetches` deque in consumer fetcher (#2646) +* Throw exception on invalid bucket type (#2642) + + +2.2.11 (June 5, 2025) +##################### + +Fixes +----- +* Do not ignore metadata response for single topic with error (#2640) +* Fix decoding bug in AWS_MSK_IAM mechanism (#2639) +* Add synchronized decorator; add lock to subscription state (#2636) +* Update build links in documentation (#2634) + + +2.2.10 (May 22, 2025) +##################### + +Fixes +----- +* Set the current host in the SASL configs (#2633) +* Fix sasl gssapi plugin: do not rely on `client_ctx.complete` in `auth_bytes()` (#2631) + + +2.2.9 (May 21, 2025) +#################### + +Fixes +----- +* Do not reset fetch positions if offset commit fetch times out (#2629) + +Logging / Error Messages +------------------------ +* More / updated debug logging for coordinator / consumer (#2630) + + +2.2.8 (May 20, 2025) +#################### + +Fixes +----- +* Wait for next heartbeat in thread loop; check for connected coordinator (#2622) +* Acquire client lock in heartbeat thread before sending requests (#2620) + +Logging / Error Messages +------------------------ +* Log all SyncGroupResponse errors as info+ +* More coordinator / heartbeat logging (#2621) +* Fix timeout seconds error message in KafkaProducer (#2627) +* Update offset commit error handling; use RebalanceInProgressError if applicable (#2623) + + +2.2.7 (May 13, 2025) +#################### + +Fixes +----- +* Minor Heartbeat updates: catch more exceptions / log configuration / raise KafkaConfigurationError (#2618) + + +2.2.6 (May 8, 2025) +################### + +Fixes +----- +* Only disable heartbeat thread once at beginning of join-group (#2617) + + +2.2.5 (May 8, 2025) +################### + +Fixes +----- +* Fix producer busy loop with no pending batches (#2616) +* Fixup py27 fetcher test failure + + +2.2.4 (May 3, 2025) +################### + +Fixes +----- +* Do not `reset_generation` after RebalanceInProgressError; improve CommitFailed error messages (#2614) +* Fix KafkaConsumer.poll() with zero timeout (#2613) +* Fix Fetch._reset_offsets_async() KeyError when fetching from multiple nodes (#2612) + + +2.2.3 (May 1, 2025) +################### + +Fixes +----- +* Ignore leading SECURITY_PROTOCOL:// in bootstrap_servers (#2608) +* Only create fetch requests for ready nodes (#2607) + + +2.2.2 (Apr 30, 2025) +#################### + +Fixes +----- +* Fix lint errors + + +2.2.1 (Apr 29, 2025) +#################### + +Fixes +----- +* Always try ApiVersionsRequest v0, even on broker disconnect (#2603) +* Fix SubscriptionState AttributeError in KafkaConsumer (#2599) + +Documentation +------------- +* Add transactional examples to docs + + +2.2.0 (Apr 28, 2025) +#################### + +KafkaProducer +------------- +* KIP-98: Add idempotent producer support (#2569) +* KIP-98: Transactional Producer (#2587) +* KIP-98: Add offsets support to transactional KafkaProducer (#2590) +* Prefix producer logs w/ client id and transactional id (#2591) +* KAFKA-5429: Ignore produce response if batch was previously aborted +* KIP-91: KafkaProducer `delivery_timeout_ms` +* Default retries -> infinite +* Expand KafkaProducer docstring w/ idempotent and transactional notes +* RecordAccumulator: Use helper method to get/set `_tp_locks`; get dq with lock in reenqueue() + +KafkaConsumer +------------- +* KIP-98: Add Consumer support for `READ_COMMITTED` (#2582) +* KIP-394: handle `MEMBER_ID_REQUIRED` error w/ second join group request (#2598) +* KAFKA-5078: Defer fetch record exception if iterator has already moved across a valid record +* KAFKA-5075: Defer consumer fetcher exception if fetch position has already increased +* KAFKA-4937: Batch offset fetches in the Consumer +* KAFKA-4547: Avoid resetting paused partitions to committed offsets +* KAFKA-6397: Consumer should not block setting positions of unavailable partitions (#2593) + +Potentially Breaking Changes (internal) +--------------------------------------- +* Rename CorruptRecordException -> CorruptRecordError +* Rename Coordinator errors to generic not group (#2585) +* Rename `ClusterMetadata.add_group_coordinator` -> `add_coordinator` + support txn type +* Use SaslAuthenticationFailedError in kafka.conn connection failure; Drop unused AuthenticationFailedError +* Remove old/unused errors; reorder; KafkaTimeout -> retriable +* Drop `log_start_offset` from producer RecordMetadata + +Internal +-------- +* MemoryRecords iterator; MemoryRecordsBuilder records() helper +* Convert `DefaultRecordsBuilder.size_in_bytes` to classmethod + +Fixes +----- +* Resolve datetime deprecation warnings (#2589) +* Avoid self refcount in log messages; test thread close on all pythons +* Fix client.wakeup() race from producer/sender close +* Fix ElectionNotNeededError handling in admin client + +Tests +----- +* Move integration tests and fixtures to test/integration/; simplify unit fixtures (#2588) +* Expand Sender test coverage (#2586) +* py2 test fixups +* Drop unused KafkaClient import from `test_fetcher` + + +2.1.6 (May 2, 2025) +################### + +Fixes +----- +* Only create fetch requests for ready nodes (#2607) + + +2.1.5 (Apr 4, 2025) +################### + +Fixes +------ +* Fix python2.7 errors (#2578) + +Improvements +------------ +* Move benchmark scripts to kafka.benchmarks module (#2584) +* Use __slots__ for metrics (#2583) +* Pass `metrics_enabled=False` to disable metrics (#2581) +* Drop unused kafka.producer.buffer / SimpleBufferPool (#2580) +* Raise UnsupportedVersionError from coordinator (#2579) + + +2.1.4 (Mar 28, 2025) +#################### + +Fixes +----- +* Dont block pending FetchRequests when Metadata update requested (#2576) +* Fix MetadataRequest for no topics (#2573) +* Send final error byte x01 on Sasl OAuth failure (#2572) +* Reset SASL state on disconnect (#2571) +* Try import new Sequence before old to avoid DeprecationWarning + +Improvements +------------ +* Update Makefile default to 4.0 broker; add make fixture +* Improve connection state logging (#2574) + + +2.1.3 (Mar 25, 2025) +#################### + +Fixes +----- +* Fix crash when switching to closest compatible api_version in KafkaClient (#2567) +* Fix maximum version to send an OffsetFetchRequest in KafkaAdminClient (#2563) +* Return empty set from consumer.partitions_for_topic when topic not found (#2556) + +Improvements +------------ +* KIP-511: Use ApiVersions v4 on initial connect w/ client_software_name + version (#2558) +* KIP-74: Manage assigned partition order in consumer (#2562) +* KIP-70: Auto-commit offsets on consumer.unsubscribe(), defer assignment changes to rejoin (#2560) +* Use SubscriptionType to track topics/pattern/user assignment (#2565) +* Add optional timeout_ms kwarg to consumer.close() (#2564) +* Move ensure_valid_topic_name to kafka.util; use in client and producer (#2561) + +Testing +------- +* Support KRaft / 4.0 brokers in tests (#2559) +* Test older pythons against 4.0 broker + +Compatibility +------------- +* Add python 3.13 to compatibility list + + +2.1.2 (Mar 17, 2025) +#################### + +Fixes +----- +* Simplify consumer.poll send fetches logic +* Fix crc validation in consumer / fetcher +* Lazy `_unpack_records` in PartitionRecords to fix premature fetch offset advance in consumer.poll() (#2555) +* Debug log fetch records return; separate offsets update log +* Fix Fetcher retriable error handling (#2554) +* Use six.add_metaclass for py2/py3 compatible abc (#2551) + +Improvements +------------ +* Add FetchMetrics class; move topic_fetch_metrics inside aggregator +* DefaultRecordsBatchBuilder: support empty batch +* MemoryRecordsBuilder: support arbitrary offset, skipping offsets +* Add record.validate_crc() for v0/v1 crc checks +* Remove fetcher message_generator / iterator interface +* Add size_in_bytes to ABCRecordBatch and implement for Legacy and Default +* Add magic property to ABCRecord and implement for LegacyRecord + + +2.1.1 (Mar 16, 2025) +#################### + +Fixes +----- +* Fix packaging of 2.1.0 in Fedora: testing requires "pytest-timeout". (#2550) +* Improve connection error handling when try_api_versions_check fails all attempts (#2548) +* Add lock synchronization to Future success/failure (#2549) +* Fix StickyPartitionAssignor encode + + +2.1.0 (Mar 15, 2025) +#################### + +Support Kafka Broker 2.1 API Baseline +------------------------------------- +* Add baseline leader_epoch support for ListOffsets v4 / FetchRequest v10 (#2511) +* Support OffsetFetch v5 / OffsetCommit v6 (2.1 baseline) (#2505) +* Support 2.1 baseline consumer group apis (#2503) +* Support FindCoordinatorRequest v2 in consumer and admin client (#2502) +* Support ListOffsets v3 in consumer (#2501) +* Support Fetch Request/Response v6 in consumer (#2500) +* Add support for Metadata Request/Response v7 (#2497) +* Implement Incremental Fetch Sessions / KIP-227 (#2508) +* Implement client-side connection throttling / KIP-219 (#2510) +* Add KafkaClient.api_version(operation) for best available from api_versions (#2495) + +Consumer +-------- +* Timeout coordinator poll / ensure_coordinator_ready / ensure_active_group (#2526) +* Add optional timeout_ms kwarg to remaining consumer/coordinator methods (#2544) +* Check for coordinator.poll failure in KafkaConsumer +* Only mark coordinator dead if connection_delay > 0 (#2530) +* Delay group coordinator until after bootstrap (#2539) +* KAFKA-4160: Ensure rebalance listener not called with coordinator lock (#1438) +* Call default_offset_commit_callback after `_maybe_auto_commit_offsets_async` (#2546) +* Remove legacy/v1 consumer message iterator (#2543) +* Log warning when attempting to list offsets for unknown topic/partition (#2540) +* Add heartbeat thread id to debug logs on start +* Add inner_timeout_ms handler to fetcher; add fallback (#2529) + +Producer +-------- +* KafkaProducer: Flush pending records before close() (#2537) +* Raise immediate error on producer.send after close (#2542) +* Limit producer close timeout to 1sec in __del__; use context managers to close in test_producer +* Use NullLogger in producer atexit cleanup +* Attempt to fix metadata race condition when partitioning in producer.send (#2523) +* Remove unused partial KIP-467 implementation (ProduceResponse batch error details) (#2524) + +AdminClient +----------- +* Implement perform leader election (#2536) +* Support delete_records (#2535) + +Networking +---------- +* Call ApiVersionsRequest during connection, prior to Sasl Handshake (#2493) +* Fake api_versions for old brokers, rename to ApiVersionsRequest, and handle error decoding (#2494) +* Debug log when skipping api_versions request with pre-configured api_version +* Only refresh metadata if connection fails all dns records (#2532) +* Support connections through SOCKS5 proxies (#2531) +* Fix OverflowError when connection_max_idle_ms is 0 or inf (#2538) +* socket.setblocking for eventlet/gevent compatibility +* Support custom per-request timeouts (#2498) +* Include request_timeout_ms in request debug log +* Support client.poll with future and timeout_ms +* mask unused afi var +* Debug log if check_version connection attempt fails + +SASL Modules +------------ +* Refactor Sasl authentication with SaslMechanism abstract base class; support SaslAuthenticate (#2515) +* Add SSPI (Kerberos for Windows) authentication mechanism (#2521) +* Support AWS_MSK_IAM authentication (#2519) +* Cleanup sasl mechanism configuration checks; fix gssapi bugs; add sasl_kerberos_name config (#2520) +* Move kafka.oauth.AbstractTokenProvider -> kafka.sasl.oauth.AbstractTokenProvider (#2525) + +Testing +------- +* Bump default python to 3.13 in CI tests (#2541) +* Update pytest log_format: use logger instead of filename; add thread id +* Improve test_consumer_group::test_group logging before group stabilized (#2534) +* Limit test duration to 5mins w/ pytest-timeout +* Fix external kafka/zk fixtures for testing (#2533) +* Disable zookeeper admin server to avoid port conflicts +* Set default pytest log level to debug +* test_group: shorter timeout, more logging, more sleep +* Cache servers/dist in github actions workflow (#2527) +* Remove tox.ini; update testing docs +* Use thread-specific client_id in test_group +* Fix subprocess log warning; specify timeout_ms kwarg in consumer.poll tests +* Only set KAFKA_JVM_PERFORMANCE_OPTS in makefile if unset; add note re: 2.0-2.3 broker testing +* Add kafka command to test.fixtures; raise FileNotFoundError if version not installed + +Documentation +------------- +* Improve ClusterMetadata docs re: node_id/broker_id str/int types +* Document api_version_auto_timeout_ms default; override in group tests + +Fixes +----- +* Signal close to metrics expire_loop +* Add kafka.util timeout_ms_fn +* fixup TopicAuthorizationFailedError construction +* Fix lint issues via ruff check (#2522) +* Make the "mock" dependency optional (only used in Python < 3.3). (#2518) + + 2.0.6 (Mar 4, 2025) ################### diff --git a/docs/compatibility.rst b/docs/compatibility.rst index d9e2ba957..353273114 100644 --- a/docs/compatibility.rst +++ b/docs/compatibility.rst @@ -1,21 +1,21 @@ Compatibility ------------- -.. image:: https://img.shields.io/badge/kafka-3.9--0.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-4.0--0.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python -kafka-python is compatible with (and tested against) broker versions 3.9 +kafka-python is compatible with (and tested against) broker versions 4.0 through 0.8.0 . kafka-python is not compatible with the 0.8.2-beta release. Because the kafka server protocol is backwards compatible, kafka-python is expected to work with newer broker releases as well. Although kafka-python is tested and expected to work on recent broker versions, -not all features are supported. Specifically, authentication codecs, and -transactional producer/consumer support are not fully implemented. PRs welcome! +not all features are supported. Specifically, transactional producer/consumer +support is not fully implemented. PRs welcome! -kafka-python is tested on python 2.7, and 3.8-3.12. +kafka-python is tested on python 2.7, and 3.8-3.13. Builds and tests via Github Actions Workflows. See https://github.com/dpkp/kafka-python/actions diff --git a/docs/index.rst b/docs/index.rst index 5dd4f183a..0e9aa5f7b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,14 +1,14 @@ kafka-python ############ -.. image:: https://img.shields.io/badge/kafka-3.9--0.8-brightgreen.svg +.. image:: https://img.shields.io/badge/kafka-4.0--0.8-brightgreen.svg :target: https://kafka-python.readthedocs.io/en/master/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg :target: https://pypi.python.org/pypi/kafka-python .. image:: https://coveralls.io/repos/dpkp/kafka-python/badge.svg?branch=master&service=github :target: https://coveralls.io/github/dpkp/kafka-python?branch=master -.. image:: https://travis-ci.org/dpkp/kafka-python.svg?branch=master - :target: https://travis-ci.org/dpkp/kafka-python +.. image:: https://img.shields.io/github/actions/workflow/status/dpkp/kafka-python/python-package.yml + :target: https://github.com/dpkp/kafka-python/actions/workflows/python-package.yml .. image:: https://img.shields.io/badge/license-Apache%202-blue.svg :target: https://github.com/dpkp/kafka-python/blob/master/LICENSE @@ -80,6 +80,26 @@ that expose basic message attributes: topic, partition, offset, key, and value: for msg in consumer: assert isinstance(msg.value, dict) +.. code-block:: python + + # Access record headers. The returned value is a list of tuples + # with str, bytes for key and value + for msg in consumer: + print (msg.headers) + +.. code-block:: python + + # Read only committed messages from transactional topic + consumer = KafkaConsumer(isolation_level='read_committed') + consumer.subscribe(['txn_topic']) + for msg in consumer: + print(msg) + +.. code-block:: python + + # Get consumer metrics + metrics = consumer.metrics() + KafkaProducer ************* @@ -133,6 +153,32 @@ client. See `KafkaProducer `_ for more details. for i in range(1000): producer.send('foobar', b'msg %d' % i) +.. code-block:: python + + # Use transactions + producer = KafkaProducer(transactional_id='fizzbuzz') + producer.init_transactions() + producer.begin_transaction() + future = producer.send('txn_topic', value=b'yes') + future.get() # wait for successful produce + producer.commit_transaction() # commit the transaction + + producer.begin_transaction() + future = producer.send('txn_topic', value=b'no') + future.get() # wait for successful produce + producer.abort_transaction() # abort the transaction + +.. code-block:: python + + # Include record headers. The format is list of tuples with string key + # and bytes value. + producer.send('foobar', value=b'c29tZSB2YWx1ZQ==', headers=[('content-encoding', b'base64')]) + +.. code-block:: python + + # Get producer performance metrics + metrics = producer.metrics() + Thread safety ************* diff --git a/docs/tests.rst b/docs/tests.rst index 561179ca5..79409887e 100644 --- a/docs/tests.rst +++ b/docs/tests.rst @@ -3,15 +3,17 @@ Tests .. image:: https://coveralls.io/repos/dpkp/kafka-python/badge.svg?branch=master&service=github :target: https://coveralls.io/github/dpkp/kafka-python?branch=master -.. image:: https://travis-ci.org/dpkp/kafka-python.svg?branch=master - :target: https://travis-ci.org/dpkp/kafka-python +.. image:: https://img.shields.io/github/actions/workflow/status/dpkp/kafka-python/python-package.yml + :target: https://github.com/dpkp/kafka-python/actions/workflows/python-package.yml -Test environments are managed via tox. The test suite is run via pytest. +The test suite is run via pytest. -Linting is run via pylint, but is generally skipped on pypy due to pylint -compatibility / performance issues. +Linting is run via pylint, but is currently skipped during CI/CD due to +accumulated debt. We'd like to transition to ruff! For test coverage details, see https://coveralls.io/github/dpkp/kafka-python +Coverage reporting is currently disabled as we have transitioned from travis +to GH Actions and have not yet re-enabled coveralls integration. The test suite includes unit tests that mock network interfaces, as well as integration tests that setup and teardown kafka broker (and zookeeper) @@ -21,30 +23,21 @@ fixtures for client / consumer / producer testing. Unit tests ------------------ -To run the tests locally, install tox: +To run the tests locally, install test dependencies: .. code:: bash - pip install tox + pip install -r requirements-dev.txt -For more details, see https://tox.readthedocs.io/en/latest/install.html - -Then simply run tox, optionally setting the python environment. -If unset, tox will loop through all environments. +Then simply run pytest (or make test) from your preferred python + virtualenv. .. code:: bash - tox -e py27 - tox -e py35 - - # run protocol tests only - tox -- -v test.test_protocol - - # re-run the last failing test, dropping into pdb - tox -e py27 -- --lf --pdb + # run protocol tests only (via pytest) + pytest test/test_protocol.py - # see available (pytest) options - tox -e py27 -- --help + # Run conn tests only (via make) + PYTESTS=test/test_conn.py make test Integration tests @@ -52,35 +45,8 @@ Integration tests .. code:: bash - KAFKA_VERSION=0.8.2.2 tox -e py27 - KAFKA_VERSION=1.0.1 tox -e py36 - - -Integration tests start Kafka and Zookeeper fixtures. This requires downloading -kafka server binaries: - -.. code:: bash - - ./build_integration.sh - -By default, this will install the broker versions listed in build_integration.sh's `ALL_RELEASES` -into the servers/ directory. To install a specific version, set the `KAFKA_VERSION` variable: - -.. code:: bash - - KAFKA_VERSION=1.0.1 ./build_integration.sh + KAFKA_VERSION=4.0.0 make test -Then to run the tests against a specific Kafka version, simply set the `KAFKA_VERSION` -env variable to the server build you want to use for testing: - -.. code:: bash - - KAFKA_VERSION=1.0.1 tox -e py36 - -To test against the kafka source tree, set KAFKA_VERSION=trunk -[optionally set SCALA_VERSION (defaults to the value set in `build_integration.sh`)] - -.. code:: bash - SCALA_VERSION=2.12 KAFKA_VERSION=trunk ./build_integration.sh - KAFKA_VERSION=trunk tox -e py36 +Integration tests start Kafka and Zookeeper fixtures. Make will download +kafka server binaries automatically if needed. diff --git a/example.py b/example.py index 9907450f6..ac89296b6 100755 --- a/example.py +++ b/example.py @@ -1,5 +1,6 @@ #!/usr/bin/env python -import threading, time +import threading +import time from kafka import KafkaAdminClient, KafkaConsumer, KafkaProducer from kafka.admin import NewTopic diff --git a/kafka/admin/client.py b/kafka/admin/client.py index 29ee6cd9a..8490fdb46 100644 --- a/kafka/admin/client.py +++ b/kafka/admin/client.py @@ -15,14 +15,14 @@ from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment, ConsumerProtocol import kafka.errors as Errors from kafka.errors import ( - IncompatibleBrokerVersion, KafkaConfigurationError, NotControllerError, + IncompatibleBrokerVersion, KafkaConfigurationError, UnknownTopicOrPartitionError, UnrecognizedBrokerVersion, IllegalArgumentError) +from kafka.future import Future from kafka.metrics import MetricConfig, Metrics from kafka.protocol.admin import ( CreateTopicsRequest, DeleteTopicsRequest, DescribeConfigsRequest, AlterConfigsRequest, CreatePartitionsRequest, ListGroupsRequest, DescribeGroupsRequest, DescribeAclsRequest, CreateAclsRequest, DeleteAclsRequest, - DeleteGroupsRequest, DescribeLogDirsRequest -) + DeleteGroupsRequest, DeleteRecordsRequest, DescribeLogDirsRequest, ElectLeadersRequest, ElectionType) from kafka.protocol.commit import OffsetFetchRequest from kafka.protocol.find_coordinator import FindCoordinatorRequest from kafka.protocol.metadata import MetadataRequest @@ -142,14 +142,17 @@ class KafkaAdminClient(object): Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. + sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with + sasl mechanism handshake. If provided, sasl_kerberos_service_name and + sasl_kerberos_domain name are ignored. Default: None. sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI sasl mechanism handshake. Default: one of bootstrap servers - sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider - instance. (See kafka.oauth.abstract). Default: None + sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer + token provider instance. Default: None + socks5_proxy (str): Socks5 proxy url. Default: None kafka_client (callable): Custom class / callable for creating KafkaClient instances - """ DEFAULT_CONFIG = { # client configs @@ -181,9 +184,11 @@ class KafkaAdminClient(object): 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, + 'sasl_kerberos_name': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, 'sasl_oauth_token_provider': None, + 'socks5_proxy': None, # metrics configs 'metric_reporters': [], @@ -270,7 +275,7 @@ def _refresh_controller_id(self, timeout_ms=30000): self._controller_id = controller_id return else: - raise Errors.NodeNotAvailableError('controller') + raise Errors.NodeNotReadyError('controller') else: raise UnrecognizedBrokerVersion( "Kafka Admin interface cannot determine the controller using MetadataRequest_v{}." @@ -354,14 +359,11 @@ def _send_request_to_node(self, node_id, request, wakeup=True): Returns: A future object that may be polled for status and results. - - Raises: - The exception if the message could not be sent. """ - while not self._client.ready(node_id): - # poll until the connection to broker is ready, otherwise send() - # will fail with NodeNotReadyError - self._client.poll(timeout_ms=200) + try: + self._client.await_ready(node_id) + except Errors.KafkaConnectionError as e: + return Future().failure(e) return self._client.send(node_id, request, wakeup) def _send_request_to_controller(self, request): @@ -389,27 +391,55 @@ def _send_request_to_controller(self, request): # So this is a little brittle in that it assumes all responses have # one of these attributes and that they always unpack into # (topic, error_code) tuples. - topic_error_tuples = (response.topic_errors if hasattr(response, 'topic_errors') - else response.topic_error_codes) - # Also small py2/py3 compatibility -- py3 can ignore extra values - # during unpack via: for x, y, *rest in list_of_values. py2 cannot. - # So for now we have to map across the list and explicitly drop any - # extra values (usually the error_message) - for topic, error_code in map(lambda e: e[:2], topic_error_tuples): + topic_error_tuples = getattr(response, 'topic_errors', getattr(response, 'topic_error_codes', None)) + if topic_error_tuples is not None: + success = self._parse_topic_request_response(topic_error_tuples, request, response, tries) + else: + # Leader Election request has a two layer error response (topic and partition) + success = self._parse_topic_partition_request_response(request, response, tries) + + if success: + return response + raise RuntimeError("This should never happen, please file a bug with full stacktrace if encountered") + + def _parse_topic_request_response(self, topic_error_tuples, request, response, tries): + # Also small py2/py3 compatibility -- py3 can ignore extra values + # during unpack via: for x, y, *rest in list_of_values. py2 cannot. + # So for now we have to map across the list and explicitly drop any + # extra values (usually the error_message) + for topic, error_code in map(lambda e: e[:2], topic_error_tuples): + error_type = Errors.for_code(error_code) + if tries and error_type is Errors.NotControllerError: + # No need to inspect the rest of the errors for + # non-retriable errors because NotControllerError should + # either be thrown for all errors or no errors. + self._refresh_controller_id() + return False + elif error_type is not Errors.NoError: + raise error_type( + "Request '{}' failed with response '{}'." + .format(request, response)) + return True + + def _parse_topic_partition_request_response(self, request, response, tries): + # Also small py2/py3 compatibility -- py3 can ignore extra values + # during unpack via: for x, y, *rest in list_of_values. py2 cannot. + # So for now we have to map across the list and explicitly drop any + # extra values (usually the error_message) + for topic, partition_results in response.replication_election_results: + for partition_id, error_code in map(lambda e: e[:2], partition_results): error_type = Errors.for_code(error_code) - if tries and error_type is NotControllerError: + if tries and error_type is Errors.NotControllerError: # No need to inspect the rest of the errors for # non-retriable errors because NotControllerError should # either be thrown for all errors or no errors. self._refresh_controller_id() - break - elif error_type is not Errors.NoError: + return False + elif error_type not in (Errors.NoError, Errors.ElectionNotNeededError): raise error_type( "Request '{}' failed with response '{}'." .format(request, response)) - else: - return response - raise RuntimeError("This should never happen, please file a bug with full stacktrace if encountered") + return True @staticmethod def _convert_new_topic_request(new_topic): @@ -1111,8 +1141,118 @@ def create_partitions(self, topic_partitions, timeout_ms=None, validate_only=Fal .format(version)) return self._send_request_to_controller(request) - # delete records protocol not yet implemented - # Note: send the request to the partition leaders + def _get_leader_for_partitions(self, partitions, timeout_ms=None): + """Finds ID of the leader node for every given topic partition. + + Will raise UnknownTopicOrPartitionError if for some partition no leader can be found. + + :param partitions: ``[TopicPartition]``: partitions for which to find leaders. + :param timeout_ms: ``float``: Timeout in milliseconds, if None (default), will be read from + config. + + :return: Dictionary with ``{leader_id -> {partitions}}`` + """ + timeout_ms = self._validate_timeout(timeout_ms) + + partitions = set(partitions) + topics = set(tp.topic for tp in partitions) + + response = self._get_cluster_metadata(topics=topics).to_object() + + leader2partitions = defaultdict(list) + valid_partitions = set() + for topic in response.get("topics", ()): + for partition in topic.get("partitions", ()): + t2p = TopicPartition(topic=topic["topic"], partition=partition["partition"]) + if t2p in partitions: + leader2partitions[partition["leader"]].append(t2p) + valid_partitions.add(t2p) + + if len(partitions) != len(valid_partitions): + unknown = set(partitions) - valid_partitions + raise UnknownTopicOrPartitionError( + "The following partitions are not known: %s" + % ", ".join(str(x) for x in unknown) + ) + + return leader2partitions + + def delete_records(self, records_to_delete, timeout_ms=None, partition_leader_id=None): + """Delete records whose offset is smaller than the given offset of the corresponding partition. + + :param records_to_delete: ``{TopicPartition: int}``: The earliest available offsets for the + given partitions. + :param timeout_ms: ``float``: Timeout in milliseconds, if None (default), will be read from + config. + :param partition_leader_id: ``str``: If specified, all deletion requests will be sent to + this node. No check is performed verifying that this is indeed the leader for all + listed partitions: use with caution. + + :return: Dictionary {topicPartition -> metadata}, where metadata is returned by the broker. + See DeleteRecordsResponse for possible fields. error_code for all partitions is + guaranteed to be zero, otherwise an exception is raised. + """ + timeout_ms = self._validate_timeout(timeout_ms) + responses = [] + version = self._client.api_version(DeleteRecordsRequest, max_version=0) + if version is None: + raise IncompatibleBrokerVersion("Broker does not support DeleteGroupsRequest") + + # We want to make as few requests as possible + # If a single node serves as a partition leader for multiple partitions (and/or + # topics), we can send all of those in a single request. + # For that we store {leader -> {partitions for leader}}, and do 1 request per leader + if partition_leader_id is None: + leader2partitions = self._get_leader_for_partitions( + set(records_to_delete), timeout_ms + ) + else: + leader2partitions = {partition_leader_id: set(records_to_delete)} + + for leader, partitions in leader2partitions.items(): + topic2partitions = defaultdict(list) + for partition in partitions: + topic2partitions[partition.topic].append(partition) + + request = DeleteRecordsRequest[version]( + topics=[ + (topic, [(tp.partition, records_to_delete[tp]) for tp in partitions]) + for topic, partitions in topic2partitions.items() + ], + timeout_ms=timeout_ms + ) + future = self._send_request_to_node(leader, request) + self._wait_for_futures([future]) + + responses.append(future.value.to_object()) + + partition2result = {} + partition2error = {} + for response in responses: + for topic in response["topics"]: + for partition in topic["partitions"]: + tp = TopicPartition(topic["name"], partition["partition_index"]) + partition2result[tp] = partition + if partition["error_code"] != 0: + partition2error[tp] = partition["error_code"] + + if partition2error: + if len(partition2error) == 1: + key, error = next(iter(partition2error.items())) + raise Errors.for_code(error)( + "Error deleting records from topic %s partition %s" % (key.topic, key.partition) + ) + else: + raise Errors.BrokerResponseError( + "The following errors occured when trying to delete records: " + + ", ".join( + "%s(partition=%d): %s" % + (partition.topic, partition.partition, Errors.for_code(error).__name__) + for partition, error in partition2error.items() + ) + ) + + return partition2result # create delegation token protocol not yet implemented # Note: send the request to the least_loaded_node() @@ -1318,9 +1458,9 @@ def list_consumer_groups(self, broker_ids=None): list: List of tuples of Consumer Groups. Raises: - GroupCoordinatorNotAvailableError: The coordinator is not + CoordinatorNotAvailableError: The coordinator is not available, so cannot process requests. - GroupLoadInProgressError: The coordinator is loading and + CoordinatorLoadInProgressError: The coordinator is loading and hence can't process requests. """ # While we return a list, internally use a set to prevent duplicates @@ -1354,7 +1494,7 @@ def _list_consumer_group_offsets_send_request(self, group_id, A message future """ version = self._client.api_version(OffsetFetchRequest, max_version=5) - if version <= 3: + if version <= 5: if partitions is None: if version <= 1: raise ValueError( @@ -1537,6 +1677,51 @@ def _delete_consumer_groups_send_request(self, group_ids, group_coordinator_id): .format(version)) return self._send_request_to_node(group_coordinator_id, request) + @staticmethod + def _convert_topic_partitions(topic_partitions): + return [ + ( + topic, + partition_ids + ) + for topic, partition_ids in topic_partitions.items() + ] + + def _get_all_topic_partitions(self): + return [ + ( + topic, + [partition_info.partition for partition_info in self._client.cluster._partitions[topic].values()] + ) + for topic in self._client.cluster.topics() + ] + + def _get_topic_partitions(self, topic_partitions): + if topic_partitions is None: + return self._get_all_topic_partitions() + return self._convert_topic_partitions(topic_partitions) + + def perform_leader_election(self, election_type, topic_partitions=None, timeout_ms=None): + """Perform leader election on the topic partitions. + + :param election_type: Type of election to attempt. 0 for Perferred, 1 for Unclean + :param topic_partitions: A map of topic name strings to partition ids list. + By default, will run on all topic partitions + :param timeout_ms: Milliseconds to wait for the leader election process to complete + before the broker returns. + + :return: Appropriate version of ElectLeadersResponse class. + """ + version = self._client.api_version(ElectLeadersRequest, max_version=1) + timeout_ms = self._validate_timeout(timeout_ms) + request = ElectLeadersRequest[version]( + election_type=ElectionType(election_type), + topic_partitions=self._get_topic_partitions(topic_partitions), + timeout=timeout_ms, + ) + # TODO convert structs to a more pythonic interface + return self._send_request_to_controller(request) + def _wait_for_futures(self, futures): """Block until all futures complete. If any fail, raise the encountered exception. diff --git a/benchmarks/README.md b/kafka/benchmarks/README.md similarity index 100% rename from benchmarks/README.md rename to kafka/benchmarks/README.md diff --git a/kafka/benchmarks/__init__.py b/kafka/benchmarks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/consumer_performance.py b/kafka/benchmarks/consumer_performance.py old mode 100755 new mode 100644 similarity index 67% rename from benchmarks/consumer_performance.py rename to kafka/benchmarks/consumer_performance.py index 9e3b6a919..c35a164c2 --- a/benchmarks/consumer_performance.py +++ b/kafka/benchmarks/consumer_performance.py @@ -4,43 +4,16 @@ from __future__ import absolute_import, print_function import argparse -import logging import pprint import sys import threading +import time import traceback -from kafka.vendor.six.moves import range - -from kafka import KafkaConsumer, KafkaProducer -from test.fixtures import KafkaFixture, ZookeeperFixture - -logging.basicConfig(level=logging.ERROR) - - -def start_brokers(n): - print('Starting {0} {1}-node cluster...'.format(KafkaFixture.kafka_version, n)) - print('-> 1 Zookeeper') - zk = ZookeeperFixture.instance() - print('---> {0}:{1}'.format(zk.host, zk.port)) - print() - - partitions = min(n, 3) - replicas = min(n, 3) - print('-> {0} Brokers [{1} partitions / {2} replicas]'.format(n, partitions, replicas)) - brokers = [ - KafkaFixture.instance(i, zk, zk_chroot='', - partitions=partitions, replicas=replicas) - for i in range(n) - ] - for broker in brokers: - print('---> {0}:{1}'.format(broker.host, broker.port)) - print() - return brokers +from kafka import KafkaConsumer class ConsumerPerformance(object): - @staticmethod def run(args): try: @@ -53,28 +26,17 @@ def run(args): pass if v == 'None': v = None + elif v == 'False': + v = False + elif v == 'True': + v = True props[k] = v - if args.brokers: - brokers = start_brokers(args.brokers) - props['bootstrap_servers'] = ['{0}:{1}'.format(broker.host, broker.port) - for broker in brokers] - print('---> bootstrap_servers={0}'.format(props['bootstrap_servers'])) - print() - - print('-> Producing records') - record = bytes(bytearray(args.record_size)) - producer = KafkaProducer(compression_type=args.fixture_compression, - **props) - for i in range(args.num_records): - producer.send(topic=args.topic, value=record) - producer.flush() - producer.close() - print('-> OK!') - print() - print('Initializing Consumer...') + props['bootstrap_servers'] = args.bootstrap_servers props['auto_offset_reset'] = 'earliest' + if 'group_id' not in props: + props['group_id'] = 'kafka-consumer-benchmark' if 'consumer_timeout_ms' not in props: props['consumer_timeout_ms'] = 10000 props['metrics_sample_window_ms'] = args.stats_interval * 1000 @@ -92,14 +54,18 @@ def run(args): print('-> OK!') print() + start_time = time.time() records = 0 for msg in consumer: records += 1 if records >= args.num_records: break - print('Consumed {0} records'.format(records)) + end_time = time.time() timer_stop.set() + timer.join() + print('Consumed {0} records'.format(records)) + print('Execution time:', end_time - start_time, 'secs') except Exception: exc_info = sys.exc_info() @@ -143,18 +109,17 @@ def get_args_parser(): parser = argparse.ArgumentParser( description='This tool is used to verify the consumer performance.') + parser.add_argument( + '--bootstrap-servers', type=str, nargs='+', default=(), + help='host:port for cluster bootstrap servers') parser.add_argument( '--topic', type=str, - help='Topic for consumer test', + help='Topic for consumer test (default: kafka-python-benchmark-test)', default='kafka-python-benchmark-test') parser.add_argument( '--num-records', type=int, - help='number of messages to consume', + help='number of messages to consume (default: 1000000)', default=1000000) - parser.add_argument( - '--record-size', type=int, - help='message size in bytes', - default=100) parser.add_argument( '--consumer-config', type=str, nargs='+', default=(), help='kafka consumer related configuration properties like ' @@ -162,13 +127,9 @@ def get_args_parser(): parser.add_argument( '--fixture-compression', type=str, help='specify a compression type for use with broker fixtures / producer') - parser.add_argument( - '--brokers', type=int, - help='Number of kafka brokers to start', - default=0) parser.add_argument( '--stats-interval', type=int, - help='Interval in seconds for stats reporting to console', + help='Interval in seconds for stats reporting to console (default: 5)', default=5) parser.add_argument( '--raw-metrics', action='store_true', diff --git a/kafka/benchmarks/load_example.py b/kafka/benchmarks/load_example.py new file mode 100644 index 000000000..29796a74c --- /dev/null +++ b/kafka/benchmarks/load_example.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import logging +import threading +import time + +from kafka import KafkaConsumer, KafkaProducer + + +class Producer(threading.Thread): + + def __init__(self, bootstrap_servers, topic, stop_event, msg_size): + super(Producer, self).__init__() + self.bootstrap_servers = bootstrap_servers + self.topic = topic + self.stop_event = stop_event + self.big_msg = b'1' * msg_size + + def run(self): + producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers) + self.sent = 0 + + while not self.stop_event.is_set(): + producer.send(self.topic, self.big_msg) + self.sent += 1 + producer.flush() + producer.close() + + +class Consumer(threading.Thread): + def __init__(self, bootstrap_servers, topic, stop_event, msg_size): + super(Consumer, self).__init__() + self.bootstrap_servers = bootstrap_servers + self.topic = topic + self.stop_event = stop_event + self.msg_size = msg_size + + def run(self): + consumer = KafkaConsumer(bootstrap_servers=self.bootstrap_servers, + auto_offset_reset='earliest') + consumer.subscribe([self.topic]) + self.valid = 0 + self.invalid = 0 + + for message in consumer: + if len(message.value) == self.msg_size: + self.valid += 1 + else: + print('Invalid message:', len(message.value), self.msg_size) + self.invalid += 1 + + if self.stop_event.is_set(): + break + consumer.close() + + +def get_args_parser(): + parser = argparse.ArgumentParser( + description='This tool is used to demonstrate consumer and producer load.') + + parser.add_argument( + '--bootstrap-servers', type=str, nargs='+', default=('localhost:9092'), + help='host:port for cluster bootstrap servers (default: localhost:9092)') + parser.add_argument( + '--topic', type=str, + help='Topic for load test (default: kafka-python-benchmark-load-example)', + default='kafka-python-benchmark-load-example') + parser.add_argument( + '--msg-size', type=int, + help='Message size, in bytes, for load test (default: 524288)', + default=524288) + parser.add_argument( + '--load-time', type=int, + help='number of seconds to run load test (default: 10)', + default=10) + parser.add_argument( + '--log-level', type=str, + help='Optional logging level for load test: ERROR|INFO|DEBUG etc', + default=None) + return parser + + +def main(args): + if args.log_level: + logging.basicConfig( + format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', + level=getattr(logging, args.log_level)) + producer_stop = threading.Event() + consumer_stop = threading.Event() + threads = [ + Producer(args.bootstrap_servers, args.topic, producer_stop, args.msg_size), + Consumer(args.bootstrap_servers, args.topic, consumer_stop, args.msg_size) + ] + + for t in threads: + t.start() + + time.sleep(args.load_time) + producer_stop.set() + consumer_stop.set() + print('Messages sent: %d' % threads[0].sent) + print('Messages recvd: %d' % threads[1].valid) + print('Messages invalid: %d' % threads[1].invalid) + + +if __name__ == "__main__": + args = get_args_parser().parse_args() + main(args) diff --git a/benchmarks/producer_performance.py b/kafka/benchmarks/producer_performance.py old mode 100755 new mode 100644 similarity index 71% rename from benchmarks/producer_performance.py rename to kafka/benchmarks/producer_performance.py index c0de6fd23..1a1092960 --- a/benchmarks/producer_performance.py +++ b/kafka/benchmarks/producer_performance.py @@ -7,37 +7,15 @@ import pprint import sys import threading +import time import traceback from kafka.vendor.six.moves import range from kafka import KafkaProducer -from test.fixtures import KafkaFixture, ZookeeperFixture - - -def start_brokers(n): - print('Starting {0} {1}-node cluster...'.format(KafkaFixture.kafka_version, n)) - print('-> 1 Zookeeper') - zk = ZookeeperFixture.instance() - print('---> {0}:{1}'.format(zk.host, zk.port)) - print() - - partitions = min(n, 3) - replicas = min(n, 3) - print('-> {0} Brokers [{1} partitions / {2} replicas]'.format(n, partitions, replicas)) - brokers = [ - KafkaFixture.instance(i, zk, zk_chroot='', - partitions=partitions, replicas=replicas) - for i in range(n) - ] - for broker in brokers: - print('---> {0}:{1}'.format(broker.host, broker.port)) - print() - return brokers class ProducerPerformance(object): - @staticmethod def run(args): try: @@ -50,18 +28,14 @@ def run(args): pass if v == 'None': v = None + elif v == 'False': + v = False + elif v == 'True': + v = True props[k] = v - if args.brokers: - brokers = start_brokers(args.brokers) - props['bootstrap_servers'] = ['{0}:{1}'.format(broker.host, broker.port) - for broker in brokers] - print("---> bootstrap_servers={0}".format(props['bootstrap_servers'])) - print() - print('-> OK!') - print() - print('Initializing producer...') + props['bootstrap_servers'] = args.bootstrap_servers record = bytes(bytearray(args.record_size)) props['metrics_sample_window_ms'] = args.stats_interval * 1000 @@ -79,11 +53,29 @@ def run(args): print('-> OK!') print() - for i in range(args.num_records): - producer.send(topic=args.topic, value=record) - producer.flush() - + def _benchmark(): + results = [] + for i in range(args.num_records): + results.append(producer.send(topic=args.topic, value=record)) + print("Send complete...") + producer.flush() + producer.close() + count_success, count_failure = 0, 0 + for r in results: + if r.succeeded(): + count_success += 1 + elif r.failed(): + count_failure += 1 + else: + raise ValueError(r) + print("%d suceeded, %d failed" % (count_success, count_failure)) + + start_time = time.time() + _benchmark() + end_time = time.time() timer_stop.set() + timer.join() + print('Execution time:', end_time - start_time, 'secs') except Exception: exc_info = sys.exc_info() @@ -101,6 +93,8 @@ def __init__(self, interval, producer, event=None, raw_metrics=False): def print_stats(self): metrics = self.producer.metrics() + if not metrics: + return if self.raw_metrics: pprint.pprint(metrics) else: @@ -125,29 +119,28 @@ def get_args_parser(): parser = argparse.ArgumentParser( description='This tool is used to verify the producer performance.') + parser.add_argument( + '--bootstrap-servers', type=str, nargs='+', default=(), + help='host:port for cluster bootstrap server') parser.add_argument( '--topic', type=str, - help='Topic name for test', + help='Topic name for test (default: kafka-python-benchmark-test)', default='kafka-python-benchmark-test') parser.add_argument( '--num-records', type=int, - help='number of messages to produce', + help='number of messages to produce (default: 1000000)', default=1000000) parser.add_argument( '--record-size', type=int, - help='message size in bytes', + help='message size in bytes (default: 100)', default=100) parser.add_argument( '--producer-config', type=str, nargs='+', default=(), help='kafka producer related configuaration properties like ' 'bootstrap_servers,client_id etc..') - parser.add_argument( - '--brokers', type=int, - help='Number of kafka brokers to start', - default=0) parser.add_argument( '--stats-interval', type=int, - help='Interval in seconds for stats reporting to console', + help='Interval in seconds for stats reporting to console (default: 5)', default=5) parser.add_argument( '--raw-metrics', action='store_true', diff --git a/benchmarks/record_batch_compose.py b/kafka/benchmarks/record_batch_compose.py similarity index 89% rename from benchmarks/record_batch_compose.py rename to kafka/benchmarks/record_batch_compose.py index 5bdefa7af..5b07fd59a 100644 --- a/benchmarks/record_batch_compose.py +++ b/kafka/benchmarks/record_batch_compose.py @@ -71,7 +71,8 @@ def func(loops, magic): return res -runner = pyperf.Runner() -runner.bench_time_func('batch_append_v0', func, 0) -runner.bench_time_func('batch_append_v1', func, 1) -runner.bench_time_func('batch_append_v2', func, 2) +if __name__ == '__main__': + runner = pyperf.Runner() + runner.bench_time_func('batch_append_v0', func, 0) + runner.bench_time_func('batch_append_v1', func, 1) + runner.bench_time_func('batch_append_v2', func, 2) diff --git a/benchmarks/record_batch_read.py b/kafka/benchmarks/record_batch_read.py similarity index 90% rename from benchmarks/record_batch_read.py rename to kafka/benchmarks/record_batch_read.py index aa5e9c1e5..2ef32298d 100644 --- a/benchmarks/record_batch_read.py +++ b/kafka/benchmarks/record_batch_read.py @@ -76,7 +76,8 @@ def func(loops, magic): return res -runner = pyperf.Runner() -runner.bench_time_func('batch_read_v0', func, 0) -runner.bench_time_func('batch_read_v1', func, 1) -runner.bench_time_func('batch_read_v2', func, 2) +if __name__ == '__main__': + runner = pyperf.Runner() + runner.bench_time_func('batch_read_v0', func, 0) + runner.bench_time_func('batch_read_v1', func, 1) + runner.bench_time_func('batch_read_v2', func, 2) diff --git a/benchmarks/varint_speed.py b/kafka/benchmarks/varint_speed.py similarity index 81% rename from benchmarks/varint_speed.py rename to kafka/benchmarks/varint_speed.py index fd63d0ac1..b2628a1b5 100644 --- a/benchmarks/varint_speed.py +++ b/kafka/benchmarks/varint_speed.py @@ -113,8 +113,6 @@ def encode_varint_1(num): raise ValueError("Out of double range") return buf[:i + 1] -_assert_valid_enc(encode_varint_1) - def encode_varint_2(value, int2byte=six.int2byte): value = (value << 1) ^ (value >> 63) @@ -128,8 +126,6 @@ def encode_varint_2(value, int2byte=six.int2byte): value >>= 7 return res + int2byte(bits) -_assert_valid_enc(encode_varint_2) - def encode_varint_3(value, buf): append = buf.append @@ -145,12 +141,6 @@ def encode_varint_3(value, buf): return value -for encoded, decoded in test_data: - res = bytearray() - encode_varint_3(decoded, res) - assert res == encoded - - def encode_varint_4(value, int2byte=six.int2byte): value = (value << 1) ^ (value >> 63) @@ -185,12 +175,6 @@ def encode_varint_4(value, int2byte=six.int2byte): return res + int2byte(bits) -_assert_valid_enc(encode_varint_4) - -# import dis -# dis.dis(encode_varint_4) - - def encode_varint_5(value, buf, pos=0): value = (value << 1) ^ (value >> 63) @@ -204,12 +188,6 @@ def encode_varint_5(value, buf, pos=0): buf[pos] = bits return pos + 1 -for encoded, decoded in test_data: - res = bytearray(10) - written = encode_varint_5(decoded, res) - assert res[:written] == encoded - - def encode_varint_6(value, buf): append = buf.append value = (value << 1) ^ (value >> 63) @@ -253,12 +231,6 @@ def encode_varint_6(value, buf): return i -for encoded, decoded in test_data: - res = bytearray() - encode_varint_6(decoded, res) - assert res == encoded - - def size_of_varint_1(value): """ Number of bytes needed to encode an integer in variable-length format. """ @@ -271,8 +243,6 @@ def size_of_varint_1(value): break return res -_assert_valid_size(size_of_varint_1) - def size_of_varint_2(value): """ Number of bytes needed to encode an integer in variable-length format. @@ -298,8 +268,6 @@ def size_of_varint_2(value): return 9 return 10 -_assert_valid_size(size_of_varint_2) - if six.PY3: def _read_byte(memview, pos): @@ -351,8 +319,6 @@ def decode_varint_1(buffer, pos=0): # Normalize sign return (value >> 1) ^ -(value & 1), i + 1 -_assert_valid_dec(decode_varint_1) - def decode_varint_2(buffer, pos=0): result = 0 @@ -369,9 +335,6 @@ def decode_varint_2(buffer, pos=0): raise ValueError("Out of int64 range") -_assert_valid_dec(decode_varint_2) - - def decode_varint_3(buffer, pos=0): result = buffer[pos] if not (result & 0x81): @@ -393,51 +356,79 @@ def decode_varint_3(buffer, pos=0): raise ValueError("Out of int64 range") -_assert_valid_dec(decode_varint_3) - -# import dis -# dis.dis(decode_varint_3) - -runner = pyperf.Runner() -# Encode algorithms returning a bytes result -for bench_func in [ - encode_varint_1, - encode_varint_2, - encode_varint_4]: - for i, value in enumerate(BENCH_VALUES_ENC): - runner.bench_func( - '{}_{}byte'.format(bench_func.__name__, i + 1), - bench_func, value) - -# Encode algorithms writing to the buffer -for bench_func in [ - encode_varint_3, - encode_varint_5, - encode_varint_6]: - for i, value in enumerate(BENCH_VALUES_ENC): - fname = bench_func.__name__ - runner.timeit( - '{}_{}byte'.format(fname, i + 1), - stmt="{}({}, buffer)".format(fname, value), - setup="from __main__ import {}; buffer = bytearray(10)".format( - fname) - ) - -# Size algorithms -for bench_func in [ - size_of_varint_1, - size_of_varint_2]: - for i, value in enumerate(BENCH_VALUES_ENC): - runner.bench_func( - '{}_{}byte'.format(bench_func.__name__, i + 1), - bench_func, value) - -# Decode algorithms -for bench_func in [ - decode_varint_1, - decode_varint_2, - decode_varint_3]: - for i, value in enumerate(BENCH_VALUES_DEC): - runner.bench_func( - '{}_{}byte'.format(bench_func.__name__, i + 1), - bench_func, value) +if __name__ == '__main__': + _assert_valid_enc(encode_varint_1) + _assert_valid_enc(encode_varint_2) + + for encoded, decoded in test_data: + res = bytearray() + encode_varint_3(decoded, res) + assert res == encoded + + _assert_valid_enc(encode_varint_4) + + # import dis + # dis.dis(encode_varint_4) + + for encoded, decoded in test_data: + res = bytearray(10) + written = encode_varint_5(decoded, res) + assert res[:written] == encoded + + for encoded, decoded in test_data: + res = bytearray() + encode_varint_6(decoded, res) + assert res == encoded + + _assert_valid_size(size_of_varint_1) + _assert_valid_size(size_of_varint_2) + _assert_valid_dec(decode_varint_1) + _assert_valid_dec(decode_varint_2) + _assert_valid_dec(decode_varint_3) + + # import dis + # dis.dis(decode_varint_3) + + runner = pyperf.Runner() + # Encode algorithms returning a bytes result + for bench_func in [ + encode_varint_1, + encode_varint_2, + encode_varint_4]: + for i, value in enumerate(BENCH_VALUES_ENC): + runner.bench_func( + '{}_{}byte'.format(bench_func.__name__, i + 1), + bench_func, value) + + # Encode algorithms writing to the buffer + for bench_func in [ + encode_varint_3, + encode_varint_5, + encode_varint_6]: + for i, value in enumerate(BENCH_VALUES_ENC): + fname = bench_func.__name__ + runner.timeit( + '{}_{}byte'.format(fname, i + 1), + stmt="{}({}, buffer)".format(fname, value), + setup="from __main__ import {}; buffer = bytearray(10)".format( + fname) + ) + + # Size algorithms + for bench_func in [ + size_of_varint_1, + size_of_varint_2]: + for i, value in enumerate(BENCH_VALUES_ENC): + runner.bench_func( + '{}_{}byte'.format(bench_func.__name__, i + 1), + bench_func, value) + + # Decode algorithms + for bench_func in [ + decode_varint_1, + decode_varint_2, + decode_varint_3]: + for i, value in enumerate(BENCH_VALUES_DEC): + runner.bench_func( + '{}_{}byte'.format(bench_func.__name__, i + 1), + bench_func, value) diff --git a/kafka/client_async.py b/kafka/client_async.py index 2597fff61..7d466574f 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -19,7 +19,7 @@ from kafka.vendor import six from kafka.cluster import ClusterMetadata -from kafka.conn import BrokerConnection, ConnectionStates, collect_hosts, get_ip_port_afi +from kafka.conn import BrokerConnection, ConnectionStates, get_ip_port_afi from kafka import errors as Errors from kafka.future import Future from kafka.metrics import AnonMeasurable @@ -27,10 +27,10 @@ from kafka.metrics.stats.rate import TimeUnit from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS from kafka.protocol.metadata import MetadataRequest -from kafka.util import Dict, WeakMethod +from kafka.util import Dict, Timer, WeakMethod, ensure_valid_topic_name # Although this looks unused, it actually monkey-patches socket.socketpair() # and should be left in as long as we're using socket.socketpair() in this file -from kafka.vendor import socketpair +from kafka.vendor import socketpair # noqa: F401 from kafka.version import __version__ if six.PY2: @@ -150,6 +150,7 @@ class KafkaClient(object): api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version set to None. + Default: 2000 selector (selectors.BaseSelector): Provide a specific selector implementation to use for I/O multiplexing. Default: selectors.DefaultSelector @@ -163,12 +164,16 @@ class KafkaClient(object): Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. + sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with + sasl mechanism handshake. If provided, sasl_kerberos_service_name and + sasl_kerberos_domain name are ignored. Default: None. sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI sasl mechanism handshake. Default: one of bootstrap servers - sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider - instance. (See kafka.oauth.abstract). Default: None + sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer + token provider instance. Default: None + socks5_proxy (str): Socks5 proxy URL. Default: None """ DEFAULT_CONFIG = { @@ -206,9 +211,11 @@ class KafkaClient(object): 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, + 'sasl_kerberos_name': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, - 'sasl_oauth_token_provider': None + 'sasl_oauth_token_provider': None, + 'socks5_proxy': None, } def __init__(self, **configs): @@ -231,7 +238,6 @@ def __init__(self, **configs): self._api_versions = None self._connecting = set() self._sending = set() - self._refresh_on_disconnects = True # Not currently used, but data is collected internally self._last_bootstrap = 0 @@ -270,6 +276,7 @@ def __init__(self, **configs): if compatible_version: log.warning('Configured api_version %s not supported; using %s', self.config['api_version'], compatible_version) + self.config['api_version'] = compatible_version self._api_versions = BROKER_API_VERSIONS[compatible_version] else: raise Errors.UnrecognizedBrokerVersion(self.config['api_version']) @@ -303,7 +310,7 @@ def _can_connect(self, node_id): def _conn_state_change(self, node_id, sock, conn): with self._lock: - if conn.connecting(): + if conn.state is ConnectionStates.CONNECTING: # SSL connections can enter this state 2x (second during Handshake) if node_id not in self._connecting: self._connecting.add(node_id) @@ -315,7 +322,19 @@ def _conn_state_change(self, node_id, sock, conn): if self.cluster.is_bootstrap(node_id): self._last_bootstrap = time.time() - elif conn.connected(): + elif conn.state is ConnectionStates.API_VERSIONS_SEND: + try: + self._selector.register(sock, selectors.EVENT_WRITE, conn) + except KeyError: + self._selector.modify(sock, selectors.EVENT_WRITE, conn) + + elif conn.state in (ConnectionStates.API_VERSIONS_RECV, ConnectionStates.AUTHENTICATING): + try: + self._selector.register(sock, selectors.EVENT_READ, conn) + except KeyError: + self._selector.modify(sock, selectors.EVENT_READ, conn) + + elif conn.state is ConnectionStates.CONNECTED: log.debug("Node %s connected", node_id) if node_id in self._connecting: self._connecting.remove(node_id) @@ -332,6 +351,8 @@ def _conn_state_change(self, node_id, sock, conn): if self.cluster.is_bootstrap(node_id): self._bootstrap_fails = 0 + if self._api_versions is None: + self._api_versions = conn._api_versions else: for node_id in list(self._conns.keys()): @@ -344,7 +365,7 @@ def _conn_state_change(self, node_id, sock, conn): self._connecting.remove(node_id) try: self._selector.unregister(sock) - except KeyError: + except (KeyError, ValueError): pass if self._sensors: @@ -363,7 +384,7 @@ def _conn_state_change(self, node_id, sock, conn): elif self.cluster.is_bootstrap(node_id): self._bootstrap_fails += 1 - elif self._refresh_on_disconnects and not self._closed and not idle_disconnect: + elif conn.connect_failed() and not self._closed and not idle_disconnect: log.warning("Node %s connection failed -- refreshing metadata", node_id) self.cluster.request_update() @@ -379,6 +400,11 @@ def maybe_connect(self, node_id, wakeup=True): return True return False + def connection_failed(self, node_id): + if node_id not in self._conns: + return False + return self._conns[node_id].connect_failed() + def _should_recycle_connection(self, conn): # Never recycle unless disconnected if not conn.disconnected(): @@ -389,7 +415,7 @@ def _should_recycle_connection(self, conn): if broker is None: return False - host, _, afi = get_ip_port_afi(broker.host) + host, _, _ = get_ip_port_afi(broker.host) if conn.host != host or conn.port != broker.port: log.info("Broker metadata change detected for node %s" " from %s:%s to %s:%s", conn.node_id, conn.host, conn.port, @@ -617,10 +643,9 @@ def poll(self, timeout_ms=None, future=None): Returns: list: responses received (can be empty) """ - if timeout_ms is None: - timeout_ms = self.config['request_timeout_ms'] - elif not isinstance(timeout_ms, (int, float)): + if not isinstance(timeout_ms, (int, float, type(None))): raise TypeError('Invalid type for timeout: %s' % type(timeout_ms)) + timer = Timer(timeout_ms) # Loop for futures, break after first loop if None responses = [] @@ -646,11 +671,12 @@ def poll(self, timeout_ms=None, future=None): if future is not None and future.is_done: timeout = 0 else: + user_timeout_ms = timer.timeout_ms if timeout_ms is not None else self.config['request_timeout_ms'] idle_connection_timeout_ms = self._idle_expiry_manager.next_check_ms() request_timeout_ms = self._next_ifr_request_timeout_ms() - log.debug("Timeouts: user %f, metadata %f, idle connection %f, request %f", timeout_ms, metadata_timeout_ms, idle_connection_timeout_ms, request_timeout_ms) + log.debug("Timeouts: user %f, metadata %f, idle connection %f, request %f", user_timeout_ms, metadata_timeout_ms, idle_connection_timeout_ms, request_timeout_ms) timeout = min( - timeout_ms, + user_timeout_ms, metadata_timeout_ms, idle_connection_timeout_ms, request_timeout_ms) @@ -664,7 +690,11 @@ def poll(self, timeout_ms=None, future=None): # If all we had was a timeout (future is None) - only do one poll # If we do have a future, we keep looping until it is done - if future is None or future.is_done: + if future is None: + break + elif future.is_done: + break + elif timeout_ms is not None and timer.expired: break return responses @@ -881,7 +911,13 @@ def add_topic(self, topic): Returns: Future: resolves after metadata request/response + + Raises: + TypeError: if topic is not a string + ValueError: if topic is invalid: must be chars (a-zA-Z0-9._-), and less than 250 length """ + ensure_valid_topic_name(topic) + if topic in self._topics: return Future().success(set(self._topics)) @@ -943,8 +979,10 @@ def _maybe_refresh_metadata(self, wakeup=False): topics = list(self.config['bootstrap_topics_filter']) api_version = self.api_version(MetadataRequest, max_version=7) - if self.cluster.need_all_topic_metadata or not topics: + if self.cluster.need_all_topic_metadata: topics = MetadataRequest[api_version].ALL_TOPICS + elif not topics: + topics = MetadataRequest[api_version].NO_TOPICS if api_version >= 4: request = MetadataRequest[api_version](topics, self.config['allow_auto_create_topics']) else: @@ -970,15 +1008,14 @@ def refresh_done(val_or_error): def get_api_versions(self): """Return the ApiVersions map, if available. - Note: A call to check_version must previously have succeeded and returned - version 0.10.0 or later + Note: Only available after bootstrap; requires broker version 0.10.0 or later. Returns: a map of dict mapping {api_key : (min_version, max_version)}, or None if ApiVersion is not supported by the kafka cluster. """ return self._api_versions - def check_version(self, node_id=None, timeout=None, strict=False): + def check_version(self, node_id=None, timeout=None, **kwargs): """Attempt to guess the version of a Kafka broker. Keyword Arguments: @@ -994,50 +1031,47 @@ def check_version(self, node_id=None, timeout=None, strict=False): Raises: NodeNotReadyError (if node_id is provided) NoBrokersAvailable (if node_id is None) - UnrecognizedBrokerVersion: please file bug if seen! - AssertionError (if strict=True): please file bug if seen! """ timeout = timeout or (self.config['api_version_auto_timeout_ms'] / 1000) - self._lock.acquire() - end = time.time() + timeout - while time.time() < end: - - # It is possible that least_loaded_node falls back to bootstrap, - # which can block for an increasing backoff period - try_node = node_id or self.least_loaded_node() - if try_node is None: - self._lock.release() - raise Errors.NoBrokersAvailable() - if not self._init_connect(try_node): - if try_node == node_id: - raise Errors.NodeNotReadyError("Connection failed to %s" % node_id) - else: + with self._lock: + end = time.time() + timeout + while time.time() < end: + time_remaining = max(end - time.time(), 0) + if node_id is not None and self.connection_delay(node_id) > 0: + sleep_time = min(time_remaining, self.connection_delay(node_id) / 1000.0) + if sleep_time > 0: + time.sleep(sleep_time) continue + try_node = node_id or self.least_loaded_node() + if try_node is None: + sleep_time = min(time_remaining, self.least_loaded_node_refresh_ms() / 1000.0) + if sleep_time > 0: + log.warning('No node available during check_version; sleeping %.2f secs', sleep_time) + time.sleep(sleep_time) + continue + log.debug('Attempting to check version with node %s', try_node) + if not self._init_connect(try_node): + if try_node == node_id: + raise Errors.NodeNotReadyError("Connection failed to %s" % node_id) + else: + continue + conn = self._conns[try_node] + + while conn.connecting() and time.time() < end: + timeout_ms = min((end - time.time()) * 1000, 200) + self.poll(timeout_ms=timeout_ms) + + if conn._api_version is not None: + return conn._api_version + else: + log.debug('Failed to identify api_version after connection attempt to %s', conn) - conn = self._conns[try_node] - - # We will intentionally cause socket failures - # These should not trigger metadata refresh - self._refresh_on_disconnects = False - try: - remaining = end - time.time() - version = conn.check_version(timeout=remaining, strict=strict, topics=list(self.config['bootstrap_topics_filter'])) - if not self._api_versions: - self._api_versions = conn.get_api_versions() - self._lock.release() - return version - except Errors.NodeNotReadyError: - # Only raise to user if this is a node-specific request + # Timeout + else: if node_id is not None: - self._lock.release() - raise - finally: - self._refresh_on_disconnects = True - - # Timeout - else: - self._lock.release() - raise Errors.NoBrokersAvailable() + raise Errors.NodeNotReadyError(node_id) + else: + raise Errors.NoBrokersAvailable() def api_version(self, operation, max_version=None): """Find the latest version of the protocol operation supported by both @@ -1063,7 +1097,7 @@ def api_version(self, operation, max_version=None): broker_api_versions = self._api_versions api_key = operation[0].API_KEY if broker_api_versions is None or api_key not in broker_api_versions: - raise IncompatibleBrokerVersion( + raise Errors.IncompatibleBrokerVersion( "Kafka broker does not support the '{}' Kafka protocol." .format(operation[0].__name__)) broker_min_version, broker_max_version = broker_api_versions[api_key] @@ -1071,13 +1105,13 @@ def api_version(self, operation, max_version=None): if version < broker_min_version: # max library version is less than min broker version. Currently, # no Kafka versions specify a min msg version. Maybe in the future? - raise IncompatibleBrokerVersion( + raise Errors.IncompatibleBrokerVersion( "No version of the '{}' Kafka protocol is supported by both the client and broker." .format(operation[0].__name__)) return version def wakeup(self): - if self._waking or self._wake_w is None: + if self._closed or self._waking or self._wake_w is None: return with self._wake_lock: try: @@ -1124,6 +1158,39 @@ def bootstrap_connected(self): else: return False + def await_ready(self, node_id, timeout_ms=30000): + """ + Invokes `poll` to discard pending disconnects, followed by `client.ready` and 0 or more `client.poll` + invocations until the connection to `node` is ready, the timeoutMs expires or the connection fails. + + It returns `true` if the call completes normally or `false` if the timeoutMs expires. If the connection fails, + an `IOException` is thrown instead. Note that if the `NetworkClient` has been configured with a positive + connection timeoutMs, it is possible for this method to raise an `IOException` for a previous connection which + has recently disconnected. + + This method is useful for implementing blocking behaviour on top of the non-blocking `NetworkClient`, use it with + care. + """ + timer = Timer(timeout_ms) + self.poll(timeout_ms=0) + if self.is_ready(node_id): + return True + + while not self.is_ready(node_id) and not timer.expired: + if self.connection_failed(node_id): + raise Errors.KafkaConnectionError("Connection to %s failed." % (node_id,)) + self.maybe_connect(node_id) + self.poll(timeout_ms=timer.timeout_ms) + return self.is_ready(node_id) + + def send_and_receive(self, node_id, request): + future = self.send(node_id, request) + self.poll(future=future) + assert future.is_done + if future.failed(): + raise future.exception + return future.value + # OrderedDict requires python2.7+ try: @@ -1160,7 +1227,7 @@ def is_expired(self, conn_id): def next_check_ms(self): now = time.time() - if not self.lru_connections: + if not self.lru_connections or self.next_idle_close_check_time == float('inf'): return float('inf') elif self.next_idle_close_check_time <= now: return 0 diff --git a/kafka/cluster.py b/kafka/cluster.py index c28d36d20..ded8c6f96 100644 --- a/kafka/cluster.py +++ b/kafka/cluster.py @@ -3,13 +3,15 @@ import collections import copy import logging +import random +import re import threading import time from kafka.vendor import six from kafka import errors as Errors -from kafka.conn import collect_hosts +from kafka.conn import get_ip_port_afi from kafka.future import Future from kafka.structs import BrokerMetadata, PartitionMetadata, TopicPartition @@ -47,7 +49,7 @@ def __init__(self, **configs): self._brokers = {} # node_id -> BrokerMetadata self._partitions = {} # topic -> partition -> PartitionMetadata self._broker_partitions = collections.defaultdict(set) # node_id -> {TopicPartition...} - self._groups = {} # group_name -> node_id + self._coordinators = {} # (coord_type, coord_key) -> node_id self._last_refresh_ms = 0 self._last_successful_refresh_ms = 0 self._need_update = True @@ -93,7 +95,7 @@ def broker_metadata(self, broker_id): """Get BrokerMetadata Arguments: - broker_id (int): node_id for a broker to check + broker_id (int or str): node_id for a broker to check Returns: BrokerMetadata or None if not found @@ -112,6 +114,7 @@ def partitions_for_topic(self, topic): Returns: set: {partition (int), ...} + None if topic not found. """ if topic not in self._partitions: return None @@ -148,7 +151,7 @@ def partitions_for_broker(self, broker_id): """Return TopicPartitions for which the broker is a leader. Arguments: - broker_id (int): node id for a broker + broker_id (int or str): node id for a broker Returns: set: {TopicPartition, ...} @@ -163,10 +166,10 @@ def coordinator_for_group(self, group): group (str): name of consumer group Returns: - int: node_id for group coordinator + node_id (int or str) for group coordinator, -1 if coordinator unknown None if the group does not exist. """ - return self._groups.get(group) + return self._coordinators.get(('group', group)) def ttl(self): """Milliseconds until metadata should be refreshed""" @@ -201,6 +204,10 @@ def request_update(self): self._future = Future() return self._future + @property + def need_update(self): + return self._need_update + def topics(self, exclude_internal_topics=True): """Get set of known topics. @@ -238,13 +245,6 @@ def update_metadata(self, metadata): Returns: None """ - # In the common case where we ask for a single topic and get back an - # error, we should fail the future - if len(metadata.topics) == 1 and metadata.topics[0][0] != Errors.NoError.errno: - error_code, topic = metadata.topics[0][:2] - error = Errors.for_code(error_code)(topic) - return self.failed_update(error) - if not metadata.brokers: log.warning("No broker metadata found in MetadataResponse -- ignoring.") return self.failed_update(Errors.MetadataEmptyBrokerList(metadata)) @@ -342,7 +342,15 @@ def update_metadata(self, metadata): self._last_successful_refresh_ms = now if f: - f.success(self) + # In the common case where we ask for a single topic and get back an + # error, we should fail the future + if len(metadata.topics) == 1 and metadata.topics[0][0] != Errors.NoError.errno: + error_code, topic = metadata.topics[0][:2] + error = Errors.for_code(error_code)(topic) + f.failure(error) + else: + f.success(self) + log.debug("Updated cluster metadata to %s", self) for listener in self._listeners: @@ -363,24 +371,25 @@ def remove_listener(self, listener): """Remove a previously added listener callback""" self._listeners.remove(listener) - def add_group_coordinator(self, group, response): - """Update with metadata for a group coordinator + def add_coordinator(self, response, coord_type, coord_key): + """Update with metadata for a group or txn coordinator Arguments: - group (str): name of group from FindCoordinatorRequest response (FindCoordinatorResponse): broker response + coord_type (str): 'group' or 'transaction' + coord_key (str): consumer_group or transactional_id Returns: string: coordinator node_id if metadata is updated, None on error """ - log.debug("Updating coordinator for %s: %s", group, response) + log.debug("Updating coordinator for %s/%s: %s", coord_type, coord_key, response) error_type = Errors.for_code(response.error_code) if error_type is not Errors.NoError: log.error("FindCoordinatorResponse error: %s", error_type) - self._groups[group] = -1 + self._coordinators[(coord_type, coord_key)] = -1 return - # Use a coordinator-specific node id so that group requests + # Use a coordinator-specific node id so that requests # get a dedicated connection node_id = 'coordinator-{}'.format(response.coordinator_id) coordinator = BrokerMetadata( @@ -389,9 +398,9 @@ def add_group_coordinator(self, group, response): response.port, None) - log.info("Group coordinator for %s is %s", group, coordinator) + log.info("Coordinator for %s/%s is %s", coord_type, coord_key, coordinator) self._coordinator_brokers[node_id] = coordinator - self._groups[group] = node_id + self._coordinators[(coord_type, coord_key)] = node_id return node_id def with_partitions(self, partitions_to_add): @@ -400,7 +409,7 @@ def with_partitions(self, partitions_to_add): new_metadata._brokers = copy.deepcopy(self._brokers) new_metadata._partitions = copy.deepcopy(self._partitions) new_metadata._broker_partitions = copy.deepcopy(self._broker_partitions) - new_metadata._groups = copy.deepcopy(self._groups) + new_metadata._coordinators = copy.deepcopy(self._coordinators) new_metadata.internal_topics = copy.deepcopy(self.internal_topics) new_metadata.unauthorized_topics = copy.deepcopy(self.unauthorized_topics) @@ -414,5 +423,26 @@ def with_partitions(self, partitions_to_add): return new_metadata def __str__(self): - return 'ClusterMetadata(brokers: %d, topics: %d, groups: %d)' % \ - (len(self._brokers), len(self._partitions), len(self._groups)) + return 'ClusterMetadata(brokers: %d, topics: %d, coordinators: %d)' % \ + (len(self._brokers), len(self._partitions), len(self._coordinators)) + + +def collect_hosts(hosts, randomize=True): + """ + Collects a comma-separated set of hosts (host:port) and optionally + randomize the returned list. + """ + + if isinstance(hosts, six.string_types): + hosts = hosts.strip().split(',') + + result = [] + for host_port in hosts: + # ignore leading SECURITY_PROTOCOL:// to mimic java client + host_port = re.sub('^.*://', '', host_port) + host, port, afi = get_ip_port_afi(host_port) + result.append((host, port, afi)) + + if randomize: + random.shuffle(result) + return result diff --git a/kafka/conn.py b/kafka/conn.py index 6aa20117e..64445fab0 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -4,7 +4,7 @@ import errno import io import logging -from random import shuffle, uniform +from random import uniform # selectors in stdlib as of py3.4 try: @@ -14,7 +14,6 @@ from kafka.vendor import selectors34 as selectors import socket -import struct import threading import time @@ -23,16 +22,21 @@ import kafka.errors as Errors from kafka.future import Future from kafka.metrics.stats import Avg, Count, Max, Rate -from kafka.oauth.abstract import AbstractTokenProvider -from kafka.protocol.admin import SaslHandShakeRequest, DescribeAclsRequest, DescribeClientQuotasRequest +from kafka.protocol.admin import DescribeAclsRequest, DescribeClientQuotasRequest, ListGroupsRequest +from kafka.protocol.api_versions import ApiVersionsRequest +from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS from kafka.protocol.commit import OffsetFetchRequest +from kafka.protocol.fetch import FetchRequest +from kafka.protocol.find_coordinator import FindCoordinatorRequest from kafka.protocol.list_offsets import ListOffsetsRequest -from kafka.protocol.produce import ProduceRequest from kafka.protocol.metadata import MetadataRequest -from kafka.protocol.fetch import FetchRequest from kafka.protocol.parser import KafkaProtocol -from kafka.protocol.types import Int32, Int8 -from kafka.scram import ScramClient +from kafka.protocol.produce import ProduceRequest +from kafka.protocol.sasl_authenticate import SaslAuthenticateRequest +from kafka.protocol.sasl_handshake import SaslHandshakeRequest +from kafka.protocol.types import Int32 +from kafka.sasl import get_sasl_mechanism +from kafka.socks5_wrapper import Socks5Wrapper from kafka.version import __version__ @@ -45,10 +49,6 @@ DEFAULT_KAFKA_PORT = 9092 -SASL_QOP_AUTH = 1 -SASL_QOP_AUTH_INT = 2 -SASL_QOP_AUTH_CONF = 4 - try: import ssl ssl_available = True @@ -74,15 +74,6 @@ class SSLWantReadError(Exception): class SSLWantWriteError(Exception): pass -# needed for SASL_GSSAPI authentication: -try: - import gssapi - from gssapi.raw.misc import GSSError -except (ImportError, OSError): - #no gssapi available, will disable gssapi mechanism - gssapi = None - GSSError = None - AFI_NAMES = { socket.AF_UNSPEC: "unspecified", @@ -92,12 +83,13 @@ class SSLWantWriteError(Exception): class ConnectionStates(object): - DISCONNECTING = '' DISCONNECTED = '' CONNECTING = '' HANDSHAKE = '' CONNECTED = '' AUTHENTICATING = '' + API_VERSIONS_SEND = '' + API_VERSIONS_RECV = '' class BrokerConnection(object): @@ -109,6 +101,10 @@ class BrokerConnection(object): server-side log entries that correspond to this client. Also submitted to GroupCoordinator for logging with respect to consumer group administration. Default: 'kafka-python-{version}' + client_software_name (str): Sent to kafka broker for KIP-511. + Default: 'kafka-python' + client_software_version (str): Sent to kafka broker for KIP-511. + Default: The kafka-python version (via kafka.version). reconnect_backoff_ms (int): The amount of time in milliseconds to wait before attempting to reconnect to a given host. Default: 50. @@ -169,7 +165,7 @@ class BrokerConnection(object): Default: None api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker - api version. Only applies if api_version is None + api version. Only applies if api_version is None. Default: 2000. selector (selectors.BaseSelector): Provide a specific selector implementation to use for I/O multiplexing. Default: selectors.DefaultSelector @@ -185,16 +181,22 @@ class BrokerConnection(object): Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. + sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with + sasl mechanism handshake. If provided, sasl_kerberos_service_name and + sasl_kerberos_domain name are ignored. Default: None. sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI sasl mechanism handshake. Default: one of bootstrap servers - sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider - instance. (See kafka.oauth.abstract). Default: None + sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer + token provider instance. Default: None + socks5_proxy (str): Socks5 proxy url. Default: None """ DEFAULT_CONFIG = { 'client_id': 'kafka-python-' + __version__, + 'client_software_name': 'kafka-python', + 'client_software_version': __version__, 'node_id': 0, 'request_timeout_ms': 30000, 'reconnect_backoff_ms': 50, @@ -215,6 +217,7 @@ class BrokerConnection(object): 'ssl_password': None, 'ssl_ciphers': None, 'api_version': None, + 'api_version_auto_timeout_ms': 2000, 'selector': selectors.DefaultSelector, 'state_change_callback': lambda node_id, sock, conn: True, 'metrics': None, @@ -222,12 +225,19 @@ class BrokerConnection(object): 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, + 'sasl_kerberos_name': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, - 'sasl_oauth_token_provider': None + 'sasl_oauth_token_provider': None, + 'socks5_proxy': None, } SECURITY_PROTOCOLS = ('PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL') - SASL_MECHANISMS = ('PLAIN', 'GSSAPI', 'OAUTHBEARER', "SCRAM-SHA-256", "SCRAM-SHA-512") + VERSION_CHECKS = ( + ((0, 9), ListGroupsRequest[0]()), + ((0, 8, 2), FindCoordinatorRequest[0]('kafka-python-default-group')), + ((0, 8, 1), OffsetFetchRequest[0]('kafka-python-default-group', [])), + ((0, 8, 0), MetadataRequest[0]([])), + ) def __init__(self, host, port, afi, **configs): self.host = host @@ -236,7 +246,11 @@ def __init__(self, host, port, afi, **configs): self._sock_afi = afi self._sock_addr = None self._api_versions = None + self._api_version = None + self._check_version_idx = None + self._api_versions_idx = 4 # version of ApiVersionsRequest to try on first connect self._throttle_time = None + self._socks5_proxy = None self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: @@ -260,23 +274,8 @@ def __init__(self, host, port, afi, **configs): if self.config['security_protocol'] in ('SSL', 'SASL_SSL'): assert ssl_available, "Python wasn't built with SSL support" - if self.config['security_protocol'] in ('SASL_PLAINTEXT', 'SASL_SSL'): - assert self.config['sasl_mechanism'] in self.SASL_MECHANISMS, ( - 'sasl_mechanism must be in ' + ', '.join(self.SASL_MECHANISMS)) - if self.config['sasl_mechanism'] in ('PLAIN', 'SCRAM-SHA-256', 'SCRAM-SHA-512'): - assert self.config['sasl_plain_username'] is not None, ( - 'sasl_plain_username required for PLAIN or SCRAM sasl' - ) - assert self.config['sasl_plain_password'] is not None, ( - 'sasl_plain_password required for PLAIN or SCRAM sasl' - ) - if self.config['sasl_mechanism'] == 'GSSAPI': - assert gssapi is not None, 'GSSAPI lib not available' - assert self.config['sasl_kerberos_service_name'] is not None, 'sasl_kerberos_service_name required for GSSAPI sasl' - if self.config['sasl_mechanism'] == 'OAUTHBEARER': - token_provider = self.config['sasl_oauth_token_provider'] - assert token_provider is not None, 'sasl_oauth_token_provider required for OAUTHBEARER sasl' - assert callable(getattr(token_provider, "token", None)), 'sasl_oauth_token_provider must implement method #token()' + self._init_sasl_mechanism() + # This is not a general lock / this class is not generally thread-safe yet # However, to avoid pushing responsibility for maintaining # per-connection locks to the upstream client, we will use this lock to @@ -301,6 +300,8 @@ def __init__(self, host, port, afi, **configs): self._ssl_context = None if self.config['ssl_context'] is not None: self._ssl_context = self.config['ssl_context'] + self._api_versions_future = None + self._api_versions_check_timeout = self.config['api_version_auto_timeout_ms'] self._sasl_auth_future = None self.last_attempt = 0 self._gai = [] @@ -310,11 +311,17 @@ def __init__(self, host, port, afi, **configs): self.config['metric_group_prefix'], self.node_id) + def _init_sasl_mechanism(self): + if self.config['security_protocol'] in ('SASL_PLAINTEXT', 'SASL_SSL'): + self._sasl_mechanism = get_sasl_mechanism(self.config['sasl_mechanism'])(host=self.host, **self.config) + else: + self._sasl_mechanism = None + def _dns_lookup(self): self._gai = dns_lookup(self.host, self.port, self.afi) if not self._gai: - log.error('DNS lookup failed for %s:%i (%s)', - self.host, self.port, self.afi) + log.error('%s: DNS lookup failed for %s:%i (%s)', + self, self.host, self.port, self.afi) return False return True @@ -360,6 +367,7 @@ def connect_blocking(self, timeout=float('inf')): def connect(self): """Attempt to connect and return ConnectionState""" if self.state is ConnectionStates.DISCONNECTED and not self.blacked_out(): + self.state = ConnectionStates.CONNECTING self.last_attempt = time.time() next_lookup = self._next_afi_sockaddr() if not next_lookup: @@ -370,7 +378,11 @@ def connect(self): assert self._sock is None self._sock_afi, self._sock_addr = next_lookup try: - self._sock = socket.socket(self._sock_afi, socket.SOCK_STREAM) + if self.config["socks5_proxy"] is not None: + self._socks5_proxy = Socks5Wrapper(self.config["socks5_proxy"], self.afi) + self._sock = self._socks5_proxy.socket(self._sock_afi, socket.SOCK_STREAM) + else: + self._sock = socket.socket(self._sock_afi, socket.SOCK_STREAM) except (socket.error, OSError) as e: self.close(e) return self.state @@ -380,7 +392,6 @@ def connect(self): self._sock.setsockopt(*option) self._sock.setblocking(False) - self.state = ConnectionStates.CONNECTING self.config['state_change_callback'](self.node_id, self._sock, self) log.info('%s: connecting to %s:%d [%s %s]', self, self.host, self.port, self._sock_addr, AFI_NAMES[self._sock_afi]) @@ -390,7 +401,10 @@ def connect(self): # to check connection status ret = None try: - ret = self._sock.connect_ex(self._sock_addr) + if self._socks5_proxy: + ret = self._socks5_proxy.connect_ex(self._sock_addr) + else: + ret = self._sock.connect_ex(self._sock_addr) except socket.error as err: ret = err.errno @@ -399,28 +413,20 @@ def connect(self): log.debug('%s: established TCP connection', self) if self.config['security_protocol'] in ('SSL', 'SASL_SSL'): - log.debug('%s: initiating SSL handshake', self) self.state = ConnectionStates.HANDSHAKE + log.debug('%s: initiating SSL handshake', self) self.config['state_change_callback'](self.node_id, self._sock, self) # _wrap_ssl can alter the connection state -- disconnects on failure self._wrap_ssl() - - elif self.config['security_protocol'] == 'SASL_PLAINTEXT': - log.debug('%s: initiating SASL authentication', self) - self.state = ConnectionStates.AUTHENTICATING - self.config['state_change_callback'](self.node_id, self._sock, self) - else: - # security_protocol PLAINTEXT - log.info('%s: Connection complete.', self) - self.state = ConnectionStates.CONNECTED - self._reset_reconnect_backoff() + self.state = ConnectionStates.API_VERSIONS_SEND + log.debug('%s: checking broker Api Versions', self) self.config['state_change_callback'](self.node_id, self._sock, self) # Connection failed # WSAEINVAL == 10022, but errno.WSAEINVAL is not available on non-win systems elif ret not in (errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK, 10022): - log.error('Connect attempt to %s returned error %s.' + log.error('%s: Connect attempt returned error %s.' ' Disconnecting.', self, ret) errstr = errno.errorcode.get(ret, 'UNKNOWN') self.close(Errors.KafkaConnectionError('{} {}'.format(ret, errstr))) @@ -433,22 +439,32 @@ def connect(self): if self.state is ConnectionStates.HANDSHAKE: if self._try_handshake(): log.debug('%s: completed SSL handshake.', self) - if self.config['security_protocol'] == 'SASL_SSL': - log.debug('%s: initiating SASL authentication', self) - self.state = ConnectionStates.AUTHENTICATING - else: - log.info('%s: Connection complete.', self) - self.state = ConnectionStates.CONNECTED - self._reset_reconnect_backoff() + self.state = ConnectionStates.API_VERSIONS_SEND + log.debug('%s: checking broker Api Versions', self) self.config['state_change_callback'](self.node_id, self._sock, self) + if self.state in (ConnectionStates.API_VERSIONS_SEND, ConnectionStates.API_VERSIONS_RECV): + if self._try_api_versions_check(): + # _try_api_versions_check has side-effects: possibly disconnected on socket errors + if self.state in (ConnectionStates.API_VERSIONS_SEND, ConnectionStates.API_VERSIONS_RECV): + if self.config['security_protocol'] in ('SASL_PLAINTEXT', 'SASL_SSL'): + self.state = ConnectionStates.AUTHENTICATING + log.debug('%s: initiating SASL authentication', self) + self.config['state_change_callback'](self.node_id, self._sock, self) + else: + # security_protocol PLAINTEXT + self.state = ConnectionStates.CONNECTED + log.info('%s: Connection complete.', self) + self._reset_reconnect_backoff() + self.config['state_change_callback'](self.node_id, self._sock, self) + if self.state is ConnectionStates.AUTHENTICATING: assert self.config['security_protocol'] in ('SASL_PLAINTEXT', 'SASL_SSL') if self._try_authenticate(): # _try_authenticate has side-effects: possibly disconnected on socket errors if self.state is ConnectionStates.AUTHENTICATING: - log.info('%s: Connection complete.', self) self.state = ConnectionStates.CONNECTED + log.info('%s: Connection complete.', self) self._reset_reconnect_backoff() self.config['state_change_callback'](self.node_id, self._sock, self) @@ -457,7 +473,7 @@ def connect(self): # Connection timed out request_timeout = self.config['request_timeout_ms'] / 1000.0 if time.time() > request_timeout + self.last_attempt: - log.error('Connection attempt to %s timed out', self) + log.error('%s: Connection attempt timed out', self) self.close(Errors.KafkaConnectionError('timeout')) return self.state @@ -516,20 +532,136 @@ def _try_handshake(self): except (SSLWantReadError, SSLWantWriteError): pass except (SSLZeroReturnError, ConnectionError, TimeoutError, SSLEOFError): - log.warning('SSL connection closed by server during handshake.') + log.warning('%s: SSL connection closed by server during handshake.', self) self.close(Errors.KafkaConnectionError('SSL connection closed by server during handshake')) # Other SSLErrors will be raised to user return False - def _try_authenticate(self): - assert self.config['api_version'] is None or self.config['api_version'] >= (0, 10, 0) + def _try_api_versions_check(self): + if self._api_versions_future is None: + if self.config['api_version'] is not None: + self._api_version = self.config['api_version'] + # api_version will be normalized by KafkaClient, so this should not happen + if self._api_version not in BROKER_API_VERSIONS: + raise Errors.UnrecognizedBrokerVersion('api_version %s not found in kafka.protocol.broker_api_versions' % (self._api_version,)) + self._api_versions = BROKER_API_VERSIONS[self._api_version] + log.debug('%s: Using pre-configured api_version %s for ApiVersions', self, self._api_version) + return True + elif self._check_version_idx is None: + version = self._api_versions_idx + if version >= 3: + request = ApiVersionsRequest[version]( + client_software_name=self.config['client_software_name'], + client_software_version=self.config['client_software_version'], + _tagged_fields={}) + else: + request = ApiVersionsRequest[version]() + future = Future() + self._api_versions_check_timeout /= 2 + response = self._send(request, blocking=True, request_timeout_ms=self._api_versions_check_timeout) + response.add_callback(self._handle_api_versions_response, future) + response.add_errback(self._handle_api_versions_failure, future) + self._api_versions_future = future + self.state = ConnectionStates.API_VERSIONS_RECV + self.config['state_change_callback'](self.node_id, self._sock, self) + elif self._check_version_idx < len(self.VERSION_CHECKS): + version, request = self.VERSION_CHECKS[self._check_version_idx] + future = Future() + self._api_versions_check_timeout /= 2 + response = self._send(request, blocking=True, request_timeout_ms=self._api_versions_check_timeout) + response.add_callback(self._handle_check_version_response, future, version) + response.add_errback(self._handle_check_version_failure, future) + self._api_versions_future = future + self.state = ConnectionStates.API_VERSIONS_RECV + self.config['state_change_callback'](self.node_id, self._sock, self) + else: + self.close(Errors.KafkaConnectionError('Unable to determine broker version.')) + return False + + for r, f in self.recv(): + f.success(r) + # A connection error during blocking send could trigger close() which will reset the future + if self._api_versions_future is None: + return False + elif self._api_versions_future.failed(): + ex = self._api_versions_future.exception + if not isinstance(ex, Errors.KafkaConnectionError): + raise ex + return self._api_versions_future.succeeded() + + def _handle_api_versions_response(self, future, response): + error_type = Errors.for_code(response.error_code) + if error_type is not Errors.NoError: + future.failure(error_type()) + if error_type is Errors.UnsupportedVersionError: + self._api_versions_idx -= 1 + for api_version_data in response.api_versions: + api_key, min_version, max_version = api_version_data[:3] + # If broker provides a lower max_version, skip to that + if api_key == response.API_KEY: + self._api_versions_idx = min(self._api_versions_idx, max_version) + break + if self._api_versions_idx >= 0: + self._api_versions_future = None + self.state = ConnectionStates.API_VERSIONS_SEND + self.config['state_change_callback'](self.node_id, self._sock, self) + else: + self.close(error=error_type()) + return + self._api_versions = dict([ + (api_version_data[0], (api_version_data[1], api_version_data[2])) + for api_version_data in response.api_versions + ]) + self._api_version = self._infer_broker_version_from_api_versions(self._api_versions) + log.info('%s: Broker version identified as %s', self, '.'.join(map(str, self._api_version))) + future.success(self._api_version) + self.connect() + + def _handle_api_versions_failure(self, future, ex): + future.failure(ex) + # Modern brokers should not disconnect on unrecognized api-versions request, + # but in case they do we always want to try v0 as a fallback + # otherwise switch to check_version probe. + if self._api_versions_idx > 0: + self._api_versions_idx = 0 + else: + self._check_version_idx = 0 + # after failure connection is closed, so state should already be DISCONNECTED + + def _handle_check_version_response(self, future, version, _response): + log.info('%s: Broker version identified as %s', self, '.'.join(map(str, version))) + log.info('Set configuration api_version=%s to skip auto' + ' check_version requests on startup', version) + self._api_versions = BROKER_API_VERSIONS[version] + self._api_version = version + future.success(version) + self.connect() + + def _handle_check_version_failure(self, future, ex): + future.failure(ex) + self._check_version_idx += 1 + # after failure connection is closed, so state should already be DISCONNECTED + + def _sasl_handshake_version(self): + if self._api_versions is None: + raise RuntimeError('_api_versions not set') + if SaslHandshakeRequest[0].API_KEY not in self._api_versions: + raise Errors.UnsupportedVersionError('SaslHandshake') + + # Build a SaslHandshakeRequest message + min_version, max_version = self._api_versions[SaslHandshakeRequest[0].API_KEY] + if min_version > 1: + raise Errors.UnsupportedVersionError('SaslHandshake %s' % min_version) + return min(max_version, 1) + + def _try_authenticate(self): if self._sasl_auth_future is None: - # Build a SaslHandShakeRequest message - request = SaslHandShakeRequest[0](self.config['sasl_mechanism']) + version = self._sasl_handshake_version() + request = SaslHandshakeRequest[version](self.config['sasl_mechanism']) future = Future() - sasl_response = self._send(request) + sasl_response = self._send(request, blocking=True) sasl_response.add_callback(self._handle_sasl_handshake_response, future) sasl_response.add_errback(lambda f, e: f.failure(e), future) self._sasl_auth_future = future @@ -554,23 +686,18 @@ def _handle_sasl_handshake_response(self, future, response): return future.failure(error_type(self)) if self.config['sasl_mechanism'] not in response.enabled_mechanisms: - return future.failure( + future.failure( Errors.UnsupportedSaslMechanismError( 'Kafka broker does not support %s sasl mechanism. Enabled mechanisms are: %s' % (self.config['sasl_mechanism'], response.enabled_mechanisms))) - elif self.config['sasl_mechanism'] == 'PLAIN': - return self._try_authenticate_plain(future) - elif self.config['sasl_mechanism'] == 'GSSAPI': - return self._try_authenticate_gssapi(future) - elif self.config['sasl_mechanism'] == 'OAUTHBEARER': - return self._try_authenticate_oauth(future) - elif self.config['sasl_mechanism'].startswith("SCRAM-SHA-"): - return self._try_authenticate_scram(future) else: - return future.failure( - Errors.UnsupportedSaslMechanismError( - 'kafka-python does not support SASL mechanism %s' % - self.config['sasl_mechanism'])) + self._sasl_authenticate(future) + + assert future.is_done, 'SASL future not complete after mechanism processing!' + if future.failed(): + self.close(error=future.exception) + else: + self.connect() def _send_bytes(self, data): """Send some data via non-blocking IO @@ -599,6 +726,7 @@ def _send_bytes(self, data): return total_sent def _send_bytes_blocking(self, data): + self._sock.setblocking(True) self._sock.settimeout(self.config['request_timeout_ms'] / 1000) total_sent = 0 try: @@ -610,8 +738,10 @@ def _send_bytes_blocking(self, data): return total_sent finally: self._sock.settimeout(0.0) + self._sock.setblocking(False) def _recv_bytes_blocking(self, n): + self._sock.setblocking(True) self._sock.settimeout(self.config['request_timeout_ms'] / 1000) try: data = b'' @@ -623,225 +753,76 @@ def _recv_bytes_blocking(self, n): return data finally: self._sock.settimeout(0.0) + self._sock.setblocking(False) - def _try_authenticate_plain(self, future): - if self.config['security_protocol'] == 'SASL_PLAINTEXT': - log.warning('%s: Sending username and password in the clear', self) - - data = b'' - # Send PLAIN credentials per RFC-4616 - msg = bytes('\0'.join([self.config['sasl_plain_username'], - self.config['sasl_plain_username'], - self.config['sasl_plain_password']]).encode('utf-8')) - size = Int32.encode(len(msg)) - - err = None - close = False - with self._lock: - if not self._can_send_recv(): - err = Errors.NodeNotReadyError(str(self)) - close = False - else: - try: - self._send_bytes_blocking(size + msg) - - # The server will send a zero sized message (that is Int32(0)) on success. - # The connection is closed on failure - data = self._recv_bytes_blocking(4) - - except (ConnectionError, TimeoutError) as e: - log.exception("%s: Error receiving reply from server", self) - err = Errors.KafkaConnectionError("%s: %s" % (self, e)) - close = True - - if err is not None: - if close: + def _send_sasl_authenticate(self, sasl_auth_bytes): + version = self._sasl_handshake_version() + if version == 1: + request = SaslAuthenticateRequest[0](sasl_auth_bytes) + self._send(request, blocking=True) + else: + log.debug('%s: Sending %d raw sasl auth bytes to server', self, len(sasl_auth_bytes)) + try: + self._send_bytes_blocking(Int32.encode(len(sasl_auth_bytes)) + sasl_auth_bytes) + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error sending sasl auth bytes to server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) self.close(error=err) - return future.failure(err) - - if data != b'\x00\x00\x00\x00': - error = Errors.AuthenticationFailedError('Unrecognized response during authentication') - return future.failure(error) - - log.info('%s: Authenticated as %s via PLAIN', self, self.config['sasl_plain_username']) - return future.success(True) - - def _try_authenticate_scram(self, future): - if self.config['security_protocol'] == 'SASL_PLAINTEXT': - log.warning('%s: Exchanging credentials in the clear', self) - - scram_client = ScramClient( - self.config['sasl_plain_username'], self.config['sasl_plain_password'], self.config['sasl_mechanism'] - ) - - err = None - close = False - with self._lock: - if not self._can_send_recv(): - err = Errors.NodeNotReadyError(str(self)) - close = False - else: - try: - client_first = scram_client.first_message().encode('utf-8') - size = Int32.encode(len(client_first)) - self._send_bytes_blocking(size + client_first) - - (data_len,) = struct.unpack('>i', self._recv_bytes_blocking(4)) - server_first = self._recv_bytes_blocking(data_len).decode('utf-8') - scram_client.process_server_first_message(server_first) - client_final = scram_client.final_message().encode('utf-8') - size = Int32.encode(len(client_final)) - self._send_bytes_blocking(size + client_final) + def _recv_sasl_authenticate(self): + version = self._sasl_handshake_version() + # GSSAPI mechanism does not get a final recv in old non-framed mode + if version == 0 and self._sasl_mechanism.is_done(): + return b'' - (data_len,) = struct.unpack('>i', self._recv_bytes_blocking(4)) - server_final = self._recv_bytes_blocking(data_len).decode('utf-8') - scram_client.process_server_final_message(server_final) - - except (ConnectionError, TimeoutError) as e: - log.exception("%s: Error receiving reply from server", self) - err = Errors.KafkaConnectionError("%s: %s" % (self, e)) - close = True + try: + data = self._recv_bytes_blocking(4) + nbytes = Int32.decode(io.BytesIO(data)) + data += self._recv_bytes_blocking(nbytes) + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving sasl auth bytes from server", self) + err = Errors.KafkaConnectionError("%s: %s" % (self, e)) + self.close(error=err) + return - if err is not None: - if close: - self.close(error=err) - return future.failure(err) - - log.info( - '%s: Authenticated as %s via %s', self, self.config['sasl_plain_username'], self.config['sasl_mechanism'] - ) - return future.success(True) - - def _try_authenticate_gssapi(self, future): - kerberos_damin_name = self.config['sasl_kerberos_domain_name'] or self.host - auth_id = self.config['sasl_kerberos_service_name'] + '@' + kerberos_damin_name - gssapi_name = gssapi.Name( - auth_id, - name_type=gssapi.NameType.hostbased_service - ).canonicalize(gssapi.MechType.kerberos) - log.debug('%s: GSSAPI name: %s', self, gssapi_name) + if version == 1: + ((correlation_id, response),) = self._protocol.receive_bytes(data) + (future, timestamp, _timeout) = self.in_flight_requests.pop(correlation_id) + latency_ms = (time.time() - timestamp) * 1000 + if self._sensors: + self._sensors.request_time.record(latency_ms) + log.debug('%s: Response %d (%s ms): %s', self, correlation_id, latency_ms, response) - err = None - close = False - with self._lock: + error_type = Errors.for_code(response.error_code) + if error_type is not Errors.NoError: + log.error("%s: SaslAuthenticate error: %s (%s)", + self, error_type.__name__, response.error_message) + self.close(error=error_type(response.error_message)) + return + return response.auth_bytes + else: + # unframed bytes w/ SaslHandhake v0 + log.debug('%s: Received %d raw sasl auth bytes from server', self, nbytes) + return data[4:] + + def _sasl_authenticate(self, future): + while not self._sasl_mechanism.is_done(): + send_token = self._sasl_mechanism.auth_bytes() + self._send_sasl_authenticate(send_token) if not self._can_send_recv(): - err = Errors.NodeNotReadyError(str(self)) - close = False - else: - # Establish security context and negotiate protection level - # For reference RFC 2222, section 7.2.1 - try: - # Exchange tokens until authentication either succeeds or fails - client_ctx = gssapi.SecurityContext(name=gssapi_name, usage='initiate') - received_token = None - while not client_ctx.complete: - # calculate an output token from kafka token (or None if first iteration) - output_token = client_ctx.step(received_token) - - # pass output token to kafka, or send empty response if the security - # context is complete (output token is None in that case) - if output_token is None: - self._send_bytes_blocking(Int32.encode(0)) - else: - msg = output_token - size = Int32.encode(len(msg)) - self._send_bytes_blocking(size + msg) - - # The server will send a token back. Processing of this token either - # establishes a security context, or it needs further token exchange. - # The gssapi will be able to identify the needed next step. - # The connection is closed on failure. - header = self._recv_bytes_blocking(4) - (token_size,) = struct.unpack('>i', header) - received_token = self._recv_bytes_blocking(token_size) - - # Process the security layer negotiation token, sent by the server - # once the security context is established. - - # unwraps message containing supported protection levels and msg size - msg = client_ctx.unwrap(received_token).message - # Kafka currently doesn't support integrity or confidentiality security layers, so we - # simply set QoP to 'auth' only (first octet). We reuse the max message size proposed - # by the server - msg = Int8.encode(SASL_QOP_AUTH & Int8.decode(io.BytesIO(msg[0:1]))) + msg[1:] - # add authorization identity to the response, GSS-wrap and send it - msg = client_ctx.wrap(msg + auth_id.encode(), False).message - size = Int32.encode(len(msg)) - self._send_bytes_blocking(size + msg) + return future.failure(Errors.KafkaConnectionError("%s: Connection failure during Sasl Authenticate" % self)) - except (ConnectionError, TimeoutError) as e: - log.exception("%s: Error receiving reply from server", self) - err = Errors.KafkaConnectionError("%s: %s" % (self, e)) - close = True - except Exception as e: - err = e - close = True - - if err is not None: - if close: - self.close(error=err) - return future.failure(err) - - log.info('%s: Authenticated as %s via GSSAPI', self, gssapi_name) - return future.success(True) - - def _try_authenticate_oauth(self, future): - data = b'' - - msg = bytes(self._build_oauth_client_request().encode("utf-8")) - size = Int32.encode(len(msg)) - - err = None - close = False - with self._lock: - if not self._can_send_recv(): - err = Errors.NodeNotReadyError(str(self)) - close = False + recv_token = self._recv_sasl_authenticate() + if recv_token is None: + return future.failure(Errors.KafkaConnectionError("%s: Connection failure during Sasl Authenticate" % self)) else: - try: - # Send SASL OAuthBearer request with OAuth token - self._send_bytes_blocking(size + msg) - - # The server will send a zero sized message (that is Int32(0)) on success. - # The connection is closed on failure - data = self._recv_bytes_blocking(4) - - except (ConnectionError, TimeoutError) as e: - log.exception("%s: Error receiving reply from server", self) - err = Errors.KafkaConnectionError("%s: %s" % (self, e)) - close = True - - if err is not None: - if close: - self.close(error=err) - return future.failure(err) - - if data != b'\x00\x00\x00\x00': - error = Errors.AuthenticationFailedError('Unrecognized response during authentication') - return future.failure(error) - - log.info('%s: Authenticated via OAuth', self) - return future.success(True) - - def _build_oauth_client_request(self): - token_provider = self.config['sasl_oauth_token_provider'] - return "n,,\x01auth=Bearer {}{}\x01\x01".format(token_provider.token(), self._token_extensions()) + self._sasl_mechanism.receive(recv_token) - def _token_extensions(self): - """ - Return a string representation of the OPTIONAL key-value pairs that can be sent with an OAUTHBEARER - initial request. - """ - token_provider = self.config['sasl_oauth_token_provider'] - - # Only run if the #extensions() method is implemented by the clients Token Provider class - # Builds up a string separated by \x01 via a dict of key value pairs - if callable(getattr(token_provider, "extensions", None)) and len(token_provider.extensions()) > 0: - msg = "\x01".join(["{}={}".format(k, v) for k, v in token_provider.extensions().items()]) - return "\x01" + msg + if self._sasl_mechanism.is_authenticated(): + log.info('%s: %s', self, self._sasl_mechanism.auth_details()) + return future.success(True) else: - return "" + return future.failure(Errors.SaslAuthenticationFailedError('Failed to authenticate via SASL %s' % self.config['sasl_mechanism'])) def blacked_out(self): """ @@ -901,12 +882,26 @@ def connecting(self): different states, such as SSL handshake, authorization, etc).""" return self.state in (ConnectionStates.CONNECTING, ConnectionStates.HANDSHAKE, - ConnectionStates.AUTHENTICATING) + ConnectionStates.AUTHENTICATING, + ConnectionStates.API_VERSIONS_SEND, + ConnectionStates.API_VERSIONS_RECV) + + def initializing(self): + """Returns True if socket is connected but full connection is not complete. + During this time the connection may send api requests to the broker to + check api versions and perform SASL authentication.""" + return self.state in (ConnectionStates.AUTHENTICATING, + ConnectionStates.API_VERSIONS_SEND, + ConnectionStates.API_VERSIONS_RECV) def disconnected(self): """Return True iff socket is closed""" return self.state is ConnectionStates.DISCONNECTED + def connect_failed(self): + """Return True iff connection attempt failed after attempting all dns records""" + return self.disconnected() and self.last_attempt >= 0 and len(self._gai) == 0 + def _reset_reconnect_backoff(self): self._failures = 0 self._reconnect_backoff = self.config['reconnect_backoff_ms'] / 1000.0 @@ -948,8 +943,11 @@ def close(self, error=None): if self.state is ConnectionStates.DISCONNECTED: return log.log(logging.ERROR if error else logging.INFO, '%s: Closing connection. %s', self, error or '') - self._update_reconnect_backoff() + if error: + self._update_reconnect_backoff() + self._api_versions_future = None self._sasl_auth_future = None + self._init_sasl_mechanism() self._protocol = KafkaProtocol( client_id=self.config['client_id'], api_version=self.config['api_version']) @@ -969,14 +967,14 @@ def close(self, error=None): # drop lock before state change callback and processing futures self.config['state_change_callback'](self.node_id, sock, self) - sock.close() + if sock: + sock.close() for (_correlation_id, (future, _timestamp, _timeout)) in ifrs: future.failure(error) def _can_send_recv(self): """Return True iff socket is ready for requests / responses""" - return self.state in (ConnectionStates.AUTHENTICATING, - ConnectionStates.CONNECTED) + return self.connected() or self.initializing() def send(self, request, blocking=True, request_timeout_ms=None): """Queue request for async network send, return Future() @@ -1005,6 +1003,7 @@ def send(self, request, blocking=True, request_timeout_ms=None): return self._send(request, blocking=blocking, request_timeout_ms=request_timeout_ms) def _send(self, request, blocking=True, request_timeout_ms=None): + request_timeout_ms = request_timeout_ms or self.config['request_timeout_ms'] future = Future() with self._lock: if not self._can_send_recv(): @@ -1015,11 +1014,10 @@ def _send(self, request, blocking=True, request_timeout_ms=None): correlation_id = self._protocol.send_request(request) - log.debug('%s Request %d: %s', self, correlation_id, request) + log.debug('%s: Request %d (timeout_ms %s): %s', self, correlation_id, request_timeout_ms, request) if request.expect_response(): assert correlation_id not in self.in_flight_requests, 'Correlation ID already in-flight!' sent_time = time.time() - request_timeout_ms = request_timeout_ms or self.config['request_timeout_ms'] timeout_at = sent_time + (request_timeout_ms / 1000) self.in_flight_requests[correlation_id] = (future, sent_time, timeout_at) else: @@ -1050,7 +1048,7 @@ def send_pending_requests(self): return True except (ConnectionError, TimeoutError) as e: - log.exception("Error sending request data to %s", self) + log.exception("%s: Error sending request data", self) error = Errors.KafkaConnectionError("%s: %s" % (self, e)) self.close(error=error) return False @@ -1083,7 +1081,7 @@ def send_pending_requests_v2(self): return len(self._send_buffer) == 0 except (ConnectionError, TimeoutError, Exception) as e: - log.exception("Error sending request data to %s", self) + log.exception("%s: Error sending request data", self) error = Errors.KafkaConnectionError("%s: %s" % (self, e)) self.close(error=error) return False @@ -1120,7 +1118,7 @@ def recv(self): if not responses and self.requests_timed_out(): timed_out = self.timed_out_ifrs() timeout_ms = (timed_out[0][2] - timed_out[0][1]) * 1000 - log.warning('%s timed out after %s ms. Closing connection.', + log.warning('%s: timed out after %s ms. Closing connection.', self, timeout_ms) self.close(error=Errors.RequestTimedOutError( 'Request timed out after %s ms' % @@ -1139,7 +1137,7 @@ def recv(self): if self._sensors: self._sensors.request_time.record(latency_ms) - log.debug('%s Response %d (%s ms): %s', self, correlation_id, latency_ms, response) + log.debug('%s: Response %d (%s ms): %s', self, correlation_id, latency_ms, response) self._maybe_throttle(response) responses[i] = (response, future) @@ -1151,7 +1149,7 @@ def _recv(self): err = None with self._lock: if not self._can_send_recv(): - log.warning('%s cannot recv: socket not connected', self) + log.warning('%s: cannot recv: socket not connected', self) return () while len(recvd) < self.config['sock_chunk_buffer_count']: @@ -1211,29 +1209,20 @@ def timed_out_ifrs(self): def next_ifr_request_timeout_ms(self): with self._lock: if self.in_flight_requests: - get_timeout = lambda v: v[2] + def get_timeout(v): + return v[2] next_timeout = min(map(get_timeout, self.in_flight_requests.values())) return max(0, (next_timeout - time.time()) * 1000) else: return float('inf') - def _handle_api_versions_response(self, response): - error_type = Errors.for_code(response.error_code) - if error_type is not Errors.NoError: - return False - self._api_versions = dict([ - (api_key, (min_version, max_version)) - for api_key, min_version, max_version in response.api_versions - ]) - return self._api_versions - def get_api_versions(self): - if self._api_versions is not None: - return self._api_versions - - version = self.check_version() - # _api_versions is set as a side effect of check_versions() + # _api_versions is set as a side effect of first connection + # which should typically be bootstrap, but call check_version + # if that hasn't happened yet + if self._api_versions is None: + self.check_version() return self._api_versions def _infer_broker_version_from_api_versions(self, api_versions): @@ -1242,6 +1231,20 @@ def _infer_broker_version_from_api_versions(self, api_versions): test_cases = [ # format (, ) # Make sure to update consumer_integration test check when adding newer versions. + # ((3, 9), FetchRequest[17]), + # ((3, 8), ProduceRequest[11]), + # ((3, 7), FetchRequest[16]), + # ((3, 6), AddPartitionsToTxnRequest[4]), + # ((3, 5), FetchRequest[15]), + # ((3, 4), StopReplicaRequest[3]), # broker-internal api... + # ((3, 3), DescribeAclsRequest[3]), + # ((3, 2), JoinGroupRequest[9]), + # ((3, 1), FetchRequest[13]), + # ((3, 0), ListOffsetsRequest[7]), + # ((2, 8), ProduceRequest[9]), + # ((2, 7), FetchRequest[12]), + # ((2, 6), ListGroupsRequest[4]), + # ((2, 5), JoinGroupRequest[7]), ((2, 6), DescribeClientQuotasRequest[0]), ((2, 5), DescribeAclsRequest[2]), ((2, 4), ProduceRequest[8]), @@ -1257,132 +1260,35 @@ def _infer_broker_version_from_api_versions(self, api_versions): ] # Get the best match of test cases - for broker_version, struct in sorted(test_cases, reverse=True): - if struct.API_KEY not in api_versions: + for broker_version, proto_struct in sorted(test_cases, reverse=True): + if proto_struct.API_KEY not in api_versions: continue - min_version, max_version = api_versions[struct.API_KEY] - if min_version <= struct.API_VERSION <= max_version: + min_version, max_version = api_versions[proto_struct.API_KEY] + if min_version <= proto_struct.API_VERSION <= max_version: return broker_version # We know that ApiVersionsResponse is only supported in 0.10+ # so if all else fails, choose that return (0, 10, 0) - def check_version(self, timeout=2, strict=False, topics=[]): + def check_version(self, timeout=2, **kwargs): """Attempt to guess the broker version. + Keyword Arguments: + timeout (numeric, optional): Maximum number of seconds to block attempting + to connect and check version. Default 2 + Note: This is a blocking call. Returns: version tuple, i.e. (3, 9), (2, 4), etc ... + + Raises: NodeNotReadyError on timeout """ timeout_at = time.time() + timeout - log.info('Probing node %s broker version', self.node_id) - # Monkeypatch some connection configurations to avoid timeouts - override_config = { - 'request_timeout_ms': timeout * 1000, - 'max_in_flight_requests_per_connection': 5 - } - stashed = {} - for key in override_config: - stashed[key] = self.config[key] - self.config[key] = override_config[key] - - def reset_override_configs(): - for key in stashed: - self.config[key] = stashed[key] - - # kafka kills the connection when it doesn't recognize an API request - # so we can send a test request and then follow immediately with a - # vanilla MetadataRequest. If the server did not recognize the first - # request, both will be failed with a ConnectionError that wraps - # socket.error (32, 54, or 104) - from kafka.protocol.admin import ListGroupsRequest - from kafka.protocol.api_versions import ApiVersionsRequest - from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS - from kafka.protocol.commit import OffsetFetchRequest - from kafka.protocol.find_coordinator import FindCoordinatorRequest - - test_cases = [ - # All cases starting from 0.10 will be based on ApiVersionsResponse - ((0, 11), ApiVersionsRequest[1]()), - ((0, 10, 0), ApiVersionsRequest[0]()), - ((0, 9), ListGroupsRequest[0]()), - ((0, 8, 2), FindCoordinatorRequest[0]('kafka-python-default-group')), - ((0, 8, 1), OffsetFetchRequest[0]('kafka-python-default-group', [])), - ((0, 8, 0), MetadataRequest[0](topics)), - ] - - for version, request in test_cases: - if not self.connect_blocking(timeout_at - time.time()): - reset_override_configs() - raise Errors.NodeNotReadyError() - f = self.send(request) - # HACK: sleeping to wait for socket to send bytes - time.sleep(0.1) - # when broker receives an unrecognized request API - # it abruptly closes our socket. - # so we attempt to send a second request immediately - # that we believe it will definitely recognize (metadata) - # the attempt to write to a disconnected socket should - # immediately fail and allow us to infer that the prior - # request was unrecognized - mr = self.send(MetadataRequest[0](topics)) - - if not (f.is_done and mr.is_done) and self._sock is not None: - selector = self.config['selector']() - selector.register(self._sock, selectors.EVENT_READ) - while not (f.is_done and mr.is_done): - selector.select(1) - for response, future in self.recv(): - future.success(response) - selector.close() - - if f.succeeded(): - if version >= (0, 10, 0): - # Starting from 0.10 kafka broker we determine version - # by looking at ApiVersionsResponse - api_versions = self._handle_api_versions_response(f.value) - if not api_versions: - continue - version = self._infer_broker_version_from_api_versions(api_versions) - else: - if version not in BROKER_API_VERSIONS: - raise Errors.UnrecognizedBrokerVersion(version) - self._api_versions = BROKER_API_VERSIONS[version] - log.info('Broker version identified as %s', '.'.join(map(str, version))) - log.info('Set configuration api_version=%s to skip auto' - ' check_version requests on startup', version) - break - - # Only enable strict checking to verify that we understand failure - # modes. For most users, the fact that the request failed should be - # enough to rule out a particular broker version. - if strict: - # If the socket flush hack did not work (which should force the - # connection to close and fail all pending requests), then we - # get a basic Request Timeout. This is not ideal, but we'll deal - if isinstance(f.exception, Errors.RequestTimedOutError): - pass - - # 0.9 brokers do not close the socket on unrecognized api - # requests (bug...). In this case we expect to see a correlation - # id mismatch - elif (isinstance(f.exception, Errors.CorrelationIdError) and - version > (0, 9)): - pass - elif six.PY2: - assert isinstance(f.exception.args[0], socket.error) - assert f.exception.args[0].errno in (32, 54, 104) - else: - assert isinstance(f.exception.args[0], ConnectionError) - log.info("Broker is not v%s -- it did not recognize %s", - version, request.__class__.__name__) + if not self.connect_blocking(timeout_at - time.time()): + raise Errors.NodeNotReadyError() else: - reset_override_configs() - raise Errors.UnrecognizedBrokerVersion() - - reset_override_configs() - return version + return self._api_version def __str__(self): return "" % ( @@ -1590,32 +1496,6 @@ def get_ip_port_afi(host_and_port_str): return host, port, af -def collect_hosts(hosts, randomize=True): - """ - Collects a comma-separated set of hosts (host:port) and optionally - randomize the returned list. - """ - - if isinstance(hosts, six.string_types): - hosts = hosts.strip().split(',') - - result = [] - afi = socket.AF_INET - for host_port in hosts: - - host, port, afi = get_ip_port_afi(host_port) - - if port < 0: - port = DEFAULT_KAFKA_PORT - - result.append((host, port, afi)) - - if randomize: - shuffle(result) - - return result - - def is_inet_4_or_6(gai): """Given a getaddrinfo struct, return True iff ipv4 or ipv6""" return gai[0] in (socket.AF_INET, socket.AF_INET6) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index eefac5ba7..1888d38bf 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -1,9 +1,9 @@ -from __future__ import absolute_import +from __future__ import absolute_import, division import collections import copy +import itertools import logging -import random import sys import time @@ -12,13 +12,14 @@ import kafka.errors as Errors from kafka.future import Future from kafka.metrics.stats import Avg, Count, Max, Rate -from kafka.protocol.fetch import FetchRequest +from kafka.protocol.fetch import FetchRequest, AbortedTransaction from kafka.protocol.list_offsets import ( ListOffsetsRequest, OffsetResetStrategy, UNKNOWN_OFFSET ) from kafka.record import MemoryRecords from kafka.serializer import Deserializer from kafka.structs import TopicPartition, OffsetAndMetadata, OffsetAndTimestamp +from kafka.util import Timer log = logging.getLogger(__name__) @@ -27,6 +28,11 @@ READ_UNCOMMITTED = 0 READ_COMMITTED = 1 +ISOLATION_LEVEL_CONFIG = { + 'read_uncommitted': READ_UNCOMMITTED, + 'read_committed': READ_COMMITTED, +} + ConsumerRecord = collections.namedtuple("ConsumerRecord", ["topic", "partition", "leader_epoch", "offset", "timestamp", "timestamp_type", "key", "value", "headers", "checksum", "serialized_key_size", "serialized_value_size", "serialized_header_size"]) @@ -37,6 +43,10 @@ "partition_data", "metric_aggregator"]) +ExceptionMetadata = collections.namedtuple("ExceptionMetadata", + ["partition", "fetched_offset", "exception"]) + + class NoOffsetForPartitionError(Errors.KafkaError): pass @@ -55,13 +65,15 @@ class Fetcher(six.Iterator): 'max_partition_fetch_bytes': 1048576, 'max_poll_records': sys.maxsize, 'check_crcs': True, - 'iterator_refetch_records': 1, # undocumented -- interface may change + 'metrics': None, 'metric_group_prefix': 'consumer', + 'request_timeout_ms': 30000, 'retry_backoff_ms': 100, 'enable_incremental_fetch_sessions': True, + 'isolation_level': 'read_uncommitted', } - def __init__(self, client, subscriptions, metrics, **configs): + def __init__(self, client, subscriptions, **configs): """Initialize a Kafka Message Fetcher. Keyword Arguments: @@ -99,21 +111,33 @@ def __init__(self, client, subscriptions, metrics, **configs): consumed. This ensures no on-the-wire or on-disk corruption to the messages occurred. This check adds some overhead, so it may be disabled in cases seeking extreme performance. Default: True + isolation_level (str): Configure KIP-98 transactional consumer by + setting to 'read_committed'. This will cause the consumer to + skip records from aborted tranactions. Default: 'read_uncommitted' """ self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: self.config[key] = configs[key] + if self.config['isolation_level'] not in ISOLATION_LEVEL_CONFIG: + raise Errors.KafkaConfigurationError('Unrecognized isolation_level') + self._client = client self._subscriptions = subscriptions self._completed_fetches = collections.deque() # Unparsed responses self._next_partition_records = None # Holds a single PartitionRecords until fully consumed self._iterator = None self._fetch_futures = collections.deque() - self._sensors = FetchManagerMetrics(metrics, self.config['metric_group_prefix']) - self._isolation_level = READ_UNCOMMITTED + if self.config['metrics']: + self._sensors = FetchManagerMetrics(self.config['metrics'], self.config['metric_group_prefix']) + else: + self._sensors = None + self._isolation_level = ISOLATION_LEVEL_CONFIG[self.config['isolation_level']] self._session_handlers = {} + self._nodes_with_pending_fetch_requests = set() + self._cached_list_offsets_exception = None + self._next_in_line_exception_metadata = None def send_fetches(self): """Send FetchRequests for all assigned partitions that do not already have @@ -124,28 +148,17 @@ def send_fetches(self): """ futures = [] for node_id, (request, fetch_offsets) in six.iteritems(self._create_fetch_requests()): - if self._client.ready(node_id): - log.debug("Sending FetchRequest to node %s", node_id) - future = self._client.send(node_id, request, wakeup=False) - future.add_callback(self._handle_fetch_response, node_id, fetch_offsets, time.time()) - future.add_errback(self._handle_fetch_error, node_id) - futures.append(future) + log.debug("Sending FetchRequest to node %s", node_id) + self._nodes_with_pending_fetch_requests.add(node_id) + future = self._client.send(node_id, request, wakeup=False) + future.add_callback(self._handle_fetch_response, node_id, fetch_offsets, time.time()) + future.add_errback(self._handle_fetch_error, node_id) + future.add_both(self._clear_pending_fetch_request, node_id) + futures.append(future) self._fetch_futures.extend(futures) self._clean_done_fetch_futures() return futures - def reset_offsets_if_needed(self, partitions): - """Lookup and set offsets for any partitions which are awaiting an - explicit reset. - - Arguments: - partitions (set of TopicPartitions): the partitions to reset - """ - for tp in partitions: - # TODO: If there are several offsets to reset, we could submit offset requests in parallel - if self._subscriptions.is_assigned(tp) and self._subscriptions.is_offset_reset_needed(tp): - self._reset_offset(tp) - def _clean_done_fetch_futures(self): while True: if not self._fetch_futures: @@ -159,94 +172,39 @@ def in_flight_fetches(self): self._clean_done_fetch_futures() return bool(self._fetch_futures) - def update_fetch_positions(self, partitions): - """Update the fetch positions for the provided partitions. + def reset_offsets_if_needed(self): + """Reset offsets for the given partitions using the offset reset strategy. Arguments: - partitions (list of TopicPartitions): partitions to update - - Raises: - NoOffsetForPartitionError: if no offset is stored for a given - partition and no reset policy is available - """ - # reset the fetch position to the committed position - for tp in partitions: - if not self._subscriptions.is_assigned(tp): - log.warning("partition %s is not assigned - skipping offset" - " update", tp) - continue - elif self._subscriptions.is_fetchable(tp): - log.warning("partition %s is still fetchable -- skipping offset" - " update", tp) - continue - - if self._subscriptions.is_offset_reset_needed(tp): - self._reset_offset(tp) - elif self._subscriptions.assignment[tp].committed is None: - # there's no committed position, so we need to reset with the - # default strategy - self._subscriptions.need_offset_reset(tp) - self._reset_offset(tp) - else: - committed = self._subscriptions.assignment[tp].committed.offset - log.debug("Resetting offset for partition %s to the committed" - " offset %s", tp, committed) - self._subscriptions.seek(tp, committed) - - def get_offsets_by_times(self, timestamps, timeout_ms): - offsets = self._retrieve_offsets(timestamps, timeout_ms) - for tp in timestamps: - if tp not in offsets: - offsets[tp] = None - return offsets + partitions ([TopicPartition]): the partitions that need offsets reset - def beginning_offsets(self, partitions, timeout_ms): - return self.beginning_or_end_offset( - partitions, OffsetResetStrategy.EARLIEST, timeout_ms) - - def end_offsets(self, partitions, timeout_ms): - return self.beginning_or_end_offset( - partitions, OffsetResetStrategy.LATEST, timeout_ms) - - def beginning_or_end_offset(self, partitions, timestamp, timeout_ms): - timestamps = dict([(tp, timestamp) for tp in partitions]) - offsets = self._retrieve_offsets(timestamps, timeout_ms) - for tp in timestamps: - offsets[tp] = offsets[tp].offset - return offsets - - def _reset_offset(self, partition): - """Reset offsets for the given partition using the offset reset strategy. - - Arguments: - partition (TopicPartition): the partition that needs reset offset + Returns: + bool: True if any partitions need reset; otherwise False (no reset pending) Raises: NoOffsetForPartitionError: if no offset reset strategy is defined + KafkaTimeoutError if timeout_ms provided """ - timestamp = self._subscriptions.assignment[partition].reset_strategy - if timestamp is OffsetResetStrategy.EARLIEST: - strategy = 'earliest' - elif timestamp is OffsetResetStrategy.LATEST: - strategy = 'latest' - else: - raise NoOffsetForPartitionError(partition) + # Raise exception from previous offset fetch if there is one + exc, self._cached_list_offsets_exception = self._cached_list_offsets_exception, None + if exc: + raise exc - log.debug("Resetting offset for partition %s to %s offset.", - partition, strategy) - offsets = self._retrieve_offsets({partition: timestamp}) + partitions = self._subscriptions.partitions_needing_reset() + if not partitions: + return False + log.debug('Resetting offsets for %s', partitions) - if partition in offsets: - offset = offsets[partition].offset + offset_resets = dict() + for tp in partitions: + ts = self._subscriptions.assignment[tp].reset_strategy + if ts: + offset_resets[tp] = ts - # we might lose the assignment while fetching the offset, - # so check it is still active - if self._subscriptions.is_assigned(partition): - self._subscriptions.seek(partition, offset) - else: - log.debug("Could not find offset for partition %s since it is probably deleted" % (partition,)) + self._reset_offsets_async(offset_resets) + return True - def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): + def offsets_by_times(self, timestamps, timeout_ms=None): """Fetch offset for each partition passed in ``timestamps`` map. Blocks until offsets are obtained, a non-retriable exception is raised @@ -256,57 +214,83 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): timestamps: {TopicPartition: int} dict with timestamps to fetch offsets by. -1 for the latest available, -2 for the earliest available. Otherwise timestamp is treated as epoch milliseconds. + timeout_ms (int, optional): The maximum time in milliseconds to block. Returns: {TopicPartition: OffsetAndTimestamp}: Mapping of partition to retrieved offset, timestamp, and leader_epoch. If offset does not exist for the provided timestamp, that partition will be missing from this mapping. + + Raises: + KafkaTimeoutError if timeout_ms provided """ + offsets = self._fetch_offsets_by_times(timestamps, timeout_ms) + for tp in timestamps: + if tp not in offsets: + offsets[tp] = None + return offsets + + def _fetch_offsets_by_times(self, timestamps, timeout_ms=None): if not timestamps: return {} - start_time = time.time() - remaining_ms = timeout_ms + timer = Timer(timeout_ms, "Failed to get offsets by timestamps in %s ms" % (timeout_ms,)) timestamps = copy.copy(timestamps) - while remaining_ms > 0: + fetched_offsets = dict() + while True: if not timestamps: return {} future = self._send_list_offsets_requests(timestamps) - self._client.poll(future=future, timeout_ms=remaining_ms) + self._client.poll(future=future, timeout_ms=timer.timeout_ms) + + # Timeout w/o future completion + if not future.is_done: + break if future.succeeded(): - return future.value - if not future.retriable(): - raise future.exception # pylint: disable-msg=raising-bad-type + fetched_offsets.update(future.value[0]) + if not future.value[1]: + return fetched_offsets - elapsed_ms = (time.time() - start_time) * 1000 - remaining_ms = timeout_ms - elapsed_ms - if remaining_ms < 0: - break + timestamps = {tp: timestamps[tp] for tp in future.value[1]} + + elif not future.retriable(): + raise future.exception # pylint: disable-msg=raising-bad-type - if future.exception.invalid_metadata: + if future.exception.invalid_metadata or self._client.cluster.need_update: refresh_future = self._client.cluster.request_update() - self._client.poll(future=refresh_future, timeout_ms=remaining_ms) - - # Issue #1780 - # Recheck partition existence after after a successful metadata refresh - if refresh_future.succeeded() and isinstance(future.exception, Errors.StaleMetadata): - log.debug("Stale metadata was raised, and we now have an updated metadata. Rechecking partition existence") - unknown_partition = future.exception.args[0] # TopicPartition from StaleMetadata - if self._client.cluster.leader_for_partition(unknown_partition) is None: - log.debug("Removed partition %s from offsets retrieval" % (unknown_partition, )) - timestamps.pop(unknown_partition) + self._client.poll(future=refresh_future, timeout_ms=timer.timeout_ms) + + if not future.is_done: + break else: - time.sleep(self.config['retry_backoff_ms'] / 1000.0) + if timer.timeout_ms is None or timer.timeout_ms > self.config['retry_backoff_ms']: + time.sleep(self.config['retry_backoff_ms'] / 1000) + else: + time.sleep(timer.timeout_ms / 1000) - elapsed_ms = (time.time() - start_time) * 1000 - remaining_ms = timeout_ms - elapsed_ms + timer.maybe_raise() raise Errors.KafkaTimeoutError( "Failed to get offsets by timestamps in %s ms" % (timeout_ms,)) + def beginning_offsets(self, partitions, timeout_ms): + return self.beginning_or_end_offset( + partitions, OffsetResetStrategy.EARLIEST, timeout_ms) + + def end_offsets(self, partitions, timeout_ms): + return self.beginning_or_end_offset( + partitions, OffsetResetStrategy.LATEST, timeout_ms) + + def beginning_or_end_offset(self, partitions, timestamp, timeout_ms): + timestamps = dict([(tp, timestamp) for tp in partitions]) + offsets = self._fetch_offsets_by_times(timestamps, timeout_ms) + for tp in timestamps: + offsets[tp] = offsets[tp].offset + return offsets + def fetched_records(self, max_records=None, update_offsets=True): """Returns previously fetched records and updates consumed offsets. @@ -316,7 +300,7 @@ def fetched_records(self, max_records=None, update_offsets=True): Raises: OffsetOutOfRangeError: if no subscription offset_reset_strategy - CorruptRecordException: if message crc validation fails (check_crcs + CorruptRecordError: if message crc validation fails (check_crcs must be set to True) RecordTooLargeError: if a message is larger than the currently configured max_partition_fetch_bytes @@ -333,20 +317,40 @@ def fetched_records(self, max_records=None, update_offsets=True): max_records = self.config['max_poll_records'] assert max_records > 0 + if self._next_in_line_exception_metadata is not None: + exc_meta = self._next_in_line_exception_metadata + self._next_in_line_exception_metadata = None + tp = exc_meta.partition + if self._subscriptions.is_fetchable(tp) and self._subscriptions.position(tp).offset == exc_meta.fetched_offset: + raise exc_meta.exception + drained = collections.defaultdict(list) records_remaining = max_records + # Needed to construct ExceptionMetadata if any exception is found when processing completed_fetch + fetched_partition = None + fetched_offset = -1 - while records_remaining > 0: - if not self._next_partition_records: - if not self._completed_fetches: - break - completion = self._completed_fetches.popleft() - self._next_partition_records = self._parse_fetched_data(completion) - else: - records_remaining -= self._append(drained, - self._next_partition_records, - records_remaining, - update_offsets) + try: + while records_remaining > 0: + if not self._next_partition_records: + if not self._completed_fetches: + break + completion = self._completed_fetches.popleft() + fetched_partition = completion.topic_partition + fetched_offset = completion.fetched_offset + self._next_partition_records = self._parse_fetched_data(completion) + else: + fetched_partition = self._next_partition_records.topic_partition + fetched_offset = self._next_partition_records.next_fetch_offset + records_remaining -= self._append(drained, + self._next_partition_records, + records_remaining, + update_offsets) + except Exception as e: + if not drained: + raise e + # To be thrown in the next call of this method + self._next_in_line_exception_metadata = ExceptionMetadata(fetched_partition, fetched_offset, e) return dict(drained), bool(self._completed_fetches) def _append(self, drained, part, max_records, update_offsets): @@ -354,175 +358,99 @@ def _append(self, drained, part, max_records, update_offsets): return 0 tp = part.topic_partition - fetch_offset = part.fetch_offset if not self._subscriptions.is_assigned(tp): # this can happen when a rebalance happened before # fetched records are returned to the consumer's poll call log.debug("Not returning fetched records for partition %s" " since it is no longer assigned", tp) + elif not self._subscriptions.is_fetchable(tp): + # this can happen when a partition is paused before + # fetched records are returned to the consumer's poll call + log.debug("Not returning fetched records for assigned partition" + " %s since it is no longer fetchable", tp) + else: # note that the position should always be available # as long as the partition is still assigned position = self._subscriptions.assignment[tp].position - if not self._subscriptions.is_fetchable(tp): - # this can happen when a partition is paused before - # fetched records are returned to the consumer's poll call - log.debug("Not returning fetched records for assigned partition" - " %s since it is no longer fetchable", tp) - - elif fetch_offset == position.offset: - # we are ensured to have at least one record since we already checked for emptiness + if part.next_fetch_offset == position.offset: + log.debug("Returning fetched records at offset %d for assigned" + " partition %s", position.offset, tp) part_records = part.take(max_records) - next_offset = part_records[-1].offset + 1 - leader_epoch = part_records[-1].leader_epoch - - log.log(0, "Returning fetched records at offset %d for assigned" - " partition %s and update position to %s (leader epoch %s)", position.offset, - tp, next_offset, leader_epoch) - - for record in part_records: - drained[tp].append(record) - - if update_offsets: + # list.extend([]) is a noop, but because drained is a defaultdict + # we should avoid initializing the default list unless there are records + if part_records: + drained[tp].extend(part_records) + # We want to increment subscription position if (1) we're using consumer.poll(), + # or (2) we didn't return any records (consumer iterator will update position + # when each message is yielded). There may be edge cases where we re-fetch records + # that we'll end up skipping, but for now we'll live with that. + highwater = self._subscriptions.assignment[tp].highwater + if highwater is not None and self._sensors: + self._sensors.records_fetch_lag.record(highwater - part.next_fetch_offset) + if update_offsets or not part_records: # TODO: save leader_epoch - self._subscriptions.assignment[tp].position = OffsetAndMetadata(next_offset, '', -1) + log.debug("Updating fetch position for assigned partition %s to %s (leader epoch %s)", + tp, part.next_fetch_offset, part.leader_epoch) + self._subscriptions.assignment[tp].position = OffsetAndMetadata(part.next_fetch_offset, '', -1) return len(part_records) else: # these records aren't next in line based on the last consumed # position, ignore them they must be from an obsolete request log.debug("Ignoring fetched records for %s at offset %s since" - " the current position is %d", tp, part.fetch_offset, + " the current position is %d", tp, part.next_fetch_offset, position.offset) - part.discard() + part.drain() return 0 - def _message_generator(self): - """Iterate over fetched_records""" - while self._next_partition_records or self._completed_fetches: - - if not self._next_partition_records: - completion = self._completed_fetches.popleft() - self._next_partition_records = self._parse_fetched_data(completion) - continue - - # Send additional FetchRequests when the internal queue is low - # this should enable moderate pipelining - if len(self._completed_fetches) <= self.config['iterator_refetch_records']: - self.send_fetches() - - tp = self._next_partition_records.topic_partition - - # We can ignore any prior signal to drop pending record batches - # because we are starting from a fresh one where fetch_offset == position - # i.e., the user seek()'d to this position - self._subscriptions.assignment[tp].drop_pending_record_batch = False - - for msg in self._next_partition_records.take(): - - # Because we are in a generator, it is possible for - # subscription state to change between yield calls - # so we need to re-check on each loop - # this should catch assignment changes, pauses - # and resets via seek_to_beginning / seek_to_end - if not self._subscriptions.is_fetchable(tp): - log.debug("Not returning fetched records for partition %s" - " since it is no longer fetchable", tp) - self._next_partition_records = None - break - - # If there is a seek during message iteration, - # we should stop unpacking this record batch and - # wait for a new fetch response that aligns with the - # new seek position - elif self._subscriptions.assignment[tp].drop_pending_record_batch: - log.debug("Skipping remainder of record batch for partition %s", tp) - self._subscriptions.assignment[tp].drop_pending_record_batch = False - self._next_partition_records = None - break - - # Compressed messagesets may include earlier messages - elif msg.offset < self._subscriptions.assignment[tp].position.offset: - log.debug("Skipping message offset: %s (expecting %s)", - msg.offset, - self._subscriptions.assignment[tp].position.offset) - continue - - self._subscriptions.assignment[tp].position = OffsetAndMetadata(msg.offset + 1, '', -1) - yield msg + def _reset_offset_if_needed(self, partition, timestamp, offset): + # we might lose the assignment while fetching the offset, or the user might seek to a different offset, + # so verify it is still assigned and still in need of the requested reset + if not self._subscriptions.is_assigned(partition): + log.debug("Skipping reset of partition %s since it is no longer assigned", partition) + elif not self._subscriptions.is_offset_reset_needed(partition): + log.debug("Skipping reset of partition %s since reset is no longer needed", partition) + elif timestamp and not timestamp == self._subscriptions.assignment[partition].reset_strategy: + log.debug("Skipping reset of partition %s since an alternative reset has been requested", partition) + else: + log.info("Resetting offset for partition %s to offset %s.", partition, offset) + self._subscriptions.seek(partition, offset) - self._next_partition_records = None + def _reset_offsets_async(self, timestamps): + timestamps_by_node = self._group_list_offset_requests(timestamps) - def _unpack_records(self, tp, records): - try: - batch = records.next_batch() - while batch is not None: - - # Try DefaultsRecordBatch / message log format v2 - # base_offset, last_offset_delta, and control batches - try: - batch_offset = batch.base_offset + batch.last_offset_delta - leader_epoch = batch.leader_epoch - self._subscriptions.assignment[tp].last_offset_from_record_batch = batch_offset - # Control batches have a single record indicating whether a transaction - # was aborted or committed. - # When isolation_level is READ_COMMITTED (currently unsupported) - # we should also skip all messages from aborted transactions - # For now we only support READ_UNCOMMITTED and so we ignore the - # abort/commit signal. - if batch.is_control_batch: - batch = records.next_batch() - continue - except AttributeError: - leader_epoch = -1 - pass - - for record in batch: - key_size = len(record.key) if record.key is not None else -1 - value_size = len(record.value) if record.value is not None else -1 - key = self._deserialize( - self.config['key_deserializer'], - tp.topic, record.key) - value = self._deserialize( - self.config['value_deserializer'], - tp.topic, record.value) - headers = record.headers - header_size = sum( - len(h_key.encode("utf-8")) + (len(h_val) if h_val is not None else 0) for h_key, h_val in - headers) if headers else -1 - yield ConsumerRecord( - tp.topic, tp.partition, leader_epoch, record.offset, record.timestamp, - record.timestamp_type, key, value, headers, record.checksum, - key_size, value_size, header_size) + for node_id, timestamps_and_epochs in six.iteritems(timestamps_by_node): + if not self._client.ready(node_id): + continue + partitions = set(timestamps_and_epochs.keys()) + expire_at = time.time() + self.config['request_timeout_ms'] / 1000 + self._subscriptions.set_reset_pending(partitions, expire_at) + + def on_success(timestamps_and_epochs, result): + fetched_offsets, partitions_to_retry = result + if partitions_to_retry: + self._subscriptions.reset_failed(partitions_to_retry, time.time() + self.config['retry_backoff_ms'] / 1000) + self._client.cluster.request_update() - batch = records.next_batch() + for partition, offset in six.iteritems(fetched_offsets): + ts, _epoch = timestamps_and_epochs[partition] + self._reset_offset_if_needed(partition, ts, offset.offset) - # If unpacking raises StopIteration, it is erroneously - # caught by the generator. We want all exceptions to be raised - # back to the user. See Issue 545 - except StopIteration as e: - log.exception('StopIteration raised unpacking messageset') - raise RuntimeError('StopIteration raised unpacking messageset') + def on_failure(partitions, error): + self._subscriptions.reset_failed(partitions, time.time() + self.config['retry_backoff_ms'] / 1000) + self._client.cluster.request_update() - def __iter__(self): # pylint: disable=non-iterator-returned - return self + if not getattr(error, 'retriable', False): + if not self._cached_list_offsets_exception: + self._cached_list_offsets_exception = error + else: + log.error("Discarding error in ListOffsetResponse because another error is pending: %s", error) - def __next__(self): - if not self._iterator: - self._iterator = self._message_generator() - try: - return next(self._iterator) - except StopIteration: - self._iterator = None - raise - - def _deserialize(self, f, topic, bytes_): - if not f: - return bytes_ - if isinstance(f, Deserializer): - return f.deserialize(topic, bytes_) - return f(bytes_) + future = self._send_list_offsets_request(node_id, timestamps_and_epochs) + future.add_callback(on_success, timestamps_and_epochs) + future.add_errback(on_failure, partitions) def _send_list_offsets_requests(self, timestamps): """Fetch offsets for each partition in timestamps dict. This may send @@ -535,35 +463,22 @@ def _send_list_offsets_requests(self, timestamps): Returns: Future: resolves to a mapping of retrieved offsets """ - timestamps_by_node = collections.defaultdict(dict) - for partition, timestamp in six.iteritems(timestamps): - node_id = self._client.cluster.leader_for_partition(partition) - if node_id is None: - self._client.add_topic(partition.topic) - log.debug("Partition %s is unknown for fetching offset," - " wait for metadata refresh", partition) - return Future().failure(Errors.StaleMetadata(partition)) - elif node_id == -1: - log.debug("Leader for partition %s unavailable for fetching " - "offset, wait for metadata refresh", partition) - return Future().failure( - Errors.LeaderNotAvailableError(partition)) - else: - leader_epoch = -1 - timestamps_by_node[node_id][partition] = (timestamp, leader_epoch) + timestamps_by_node = self._group_list_offset_requests(timestamps) + if not timestamps_by_node: + return Future().failure(Errors.StaleMetadata()) - # Aggregate results until we have all + # Aggregate results until we have all responses list_offsets_future = Future() - responses = [] - node_count = len(timestamps_by_node) + fetched_offsets = dict() + partitions_to_retry = set() + remaining_responses = [len(timestamps_by_node)] # list for mutable / 2.7 hack - def on_success(value): - responses.append(value) - if len(responses) == node_count: - offsets = {} - for r in responses: - offsets.update(r) - list_offsets_future.success(offsets) + def on_success(remaining_responses, value): + remaining_responses[0] -= 1 # noqa: F823 + fetched_offsets.update(value[0]) + partitions_to_retry.update(value[1]) + if not remaining_responses[0] and not list_offsets_future.is_done: + list_offsets_future.success((fetched_offsets, partitions_to_retry)) def on_fail(err): if not list_offsets_future.is_done: @@ -571,12 +486,31 @@ def on_fail(err): for node_id, timestamps in six.iteritems(timestamps_by_node): _f = self._send_list_offsets_request(node_id, timestamps) - _f.add_callback(on_success) + _f.add_callback(on_success, remaining_responses) _f.add_errback(on_fail) return list_offsets_future + def _group_list_offset_requests(self, timestamps): + timestamps_by_node = collections.defaultdict(dict) + for partition, timestamp in six.iteritems(timestamps): + node_id = self._client.cluster.leader_for_partition(partition) + if node_id is None: + self._client.add_topic(partition.topic) + log.debug("Partition %s is unknown for fetching offset", partition) + self._client.cluster.request_update() + elif node_id == -1: + log.debug("Leader for partition %s unavailable for fetching " + "offset, wait for metadata refresh", partition) + self._client.cluster.request_update() + else: + leader_epoch = -1 + timestamps_by_node[node_id][partition] = (timestamp, leader_epoch) + return dict(timestamps_by_node) + def _send_list_offsets_request(self, node_id, timestamps_and_epochs): version = self._client.api_version(ListOffsetsRequest, max_version=4) + if self.config['isolation_level'] == 'read_committed' and version < 2: + raise Errors.UnsupportedVersionError('read_committed isolation level requires ListOffsetsRequest >= v2') by_topic = collections.defaultdict(list) for tp, (timestamp, leader_epoch) in six.iteritems(timestamps_and_epochs): if version >= 4: @@ -597,12 +531,12 @@ def _send_list_offsets_request(self, node_id, timestamps_and_epochs): self._isolation_level, list(six.iteritems(by_topic))) - # Client returns a future that only fails on network issues # so create a separate future and attach a callback to update it # based on response error codes future = Future() + log.debug("Sending ListOffsetRequest %s to broker %s", request, node_id) _f = self._client.send(node_id, request) _f.add_callback(self._handle_list_offsets_response, future) _f.add_errback(lambda e: future.failure(e)) @@ -618,7 +552,9 @@ def _handle_list_offsets_response(self, future, response): Raises: AssertionError: if response does not match partition """ - timestamp_offset_map = {} + fetched_offsets = dict() + partitions_to_retry = set() + unauthorized_topics = set() for topic, part_data in response.topics: for partition_info in part_data: partition, error_code = partition_info[:2] @@ -643,10 +579,11 @@ def _handle_list_offsets_response(self, future, response): "Fetched offset %s, timestamp %s, leader_epoch %s", partition, offset, timestamp, leader_epoch) if offset != UNKNOWN_OFFSET: - timestamp_offset_map[partition] = OffsetAndTimestamp(offset, timestamp, leader_epoch) + fetched_offsets[partition] = OffsetAndTimestamp(offset, timestamp, leader_epoch) elif error_type is Errors.UnsupportedForMessageFormatError: - # The message format on the broker side is before 0.10.0, - # we simply put None in the response. + # The message format on the broker side is before 0.10.0, which means it does not + # support timestamps. We treat this case the same as if we weren't able to find an + # offset corresponding to the requested timestamp and leave it out of the result. log.debug("Cannot search by timestamp for partition %s because the" " message format version is before 0.10.0", partition) elif error_type in (Errors.NotLeaderForPartitionError, @@ -654,33 +591,33 @@ def _handle_list_offsets_response(self, future, response): Errors.KafkaStorageError): log.debug("Attempt to fetch offsets for partition %s failed due" " to %s, retrying.", error_type.__name__, partition) - future.failure(error_type(partition)) - return + partitions_to_retry.add(partition) elif error_type is Errors.UnknownTopicOrPartitionError: log.warning("Received unknown topic or partition error in ListOffsets " "request for partition %s. The topic/partition " + "may not exist or the user may not have Describe access " "to it.", partition) - future.failure(error_type(partition)) - return + partitions_to_retry.add(partition) + elif error_type is Errors.TopicAuthorizationFailedError: + unauthorized_topics.add(topic) else: log.warning("Attempt to fetch offsets for partition %s failed due to:" " %s", partition, error_type.__name__) - future.failure(error_type(partition)) - return - if not future.is_done: - future.success(timestamp_offset_map) + partitions_to_retry.add(partition) + if unauthorized_topics: + future.failure(Errors.TopicAuthorizationFailedError(unauthorized_topics)) + else: + future.success((fetched_offsets, partitions_to_retry)) def _fetchable_partitions(self): fetchable = self._subscriptions.fetchable_partitions() # do not fetch a partition if we have a pending fetch response to process + # use copy.copy to avoid runtimeerror on mutation from different thread + discard = {fetch.topic_partition for fetch in self._completed_fetches.copy()} current = self._next_partition_records - pending = copy.copy(self._completed_fetches) if current: - fetchable.discard(current.topic_partition) - for fetch in pending: - fetchable.discard(fetch.topic_partition) - return fetchable + discard.add(current.topic_partition) + return [tp for tp in fetchable if tp not in discard] def _create_fetch_requests(self): """Create fetch requests for all assigned partitions, grouped by node. @@ -693,21 +630,11 @@ def _create_fetch_requests(self): # create the fetch info as a dict of lists of partition info tuples # which can be passed to FetchRequest() via .items() version = self._client.api_version(FetchRequest, max_version=10) - fetchable = collections.defaultdict(dict) + fetchable = collections.defaultdict(collections.OrderedDict) for partition in self._fetchable_partitions(): node_id = self._client.cluster.leader_for_partition(partition) - # advance position for any deleted compacted messages if required - if self._subscriptions.assignment[partition].last_offset_from_record_batch: - next_offset_from_batch_header = self._subscriptions.assignment[partition].last_offset_from_record_batch + 1 - if next_offset_from_batch_header > self._subscriptions.assignment[partition].position.offset: - log.debug( - "Advance position for partition %s from %s to %s (last record batch location plus one)" - " to correct for deleted compacted messages and/or transactional control records", - partition, self._subscriptions.assignment[partition].position.offset, next_offset_from_batch_header) - self._subscriptions.assignment[partition].position = OffsetAndMetadata(next_offset_from_batch_header, '', -1) - position = self._subscriptions.assignment[partition].position # fetch if there is a leader and no in-flight requests @@ -716,36 +643,54 @@ def _create_fetch_requests(self): " Requesting metadata update", partition) self._client.cluster.request_update() - elif self._client.in_flight_request_count(node_id) > 0: - log.log(0, "Skipping fetch for partition %s because there is an inflight request to node %s", + elif not self._client.connected(node_id) and self._client.connection_delay(node_id) > 0: + # If we try to send during the reconnect backoff window, then the request is just + # going to be failed anyway before being sent, so skip the send for now + log.debug("Skipping fetch for partition %s because node %s is awaiting reconnect backoff", partition, node_id) - continue - if version < 5: - partition_info = ( - partition.partition, - position.offset, - self.config['max_partition_fetch_bytes'] - ) - elif version <= 8: - partition_info = ( - partition.partition, - position.offset, - -1, # log_start_offset is used internally by brokers / replicas only - self.config['max_partition_fetch_bytes'], - ) - else: - partition_info = ( - partition.partition, - position.leader_epoch, - position.offset, - -1, # log_start_offset is used internally by brokers / replicas only - self.config['max_partition_fetch_bytes'], - ) + elif self._client.throttle_delay(node_id) > 0: + # If we try to send while throttled, then the request is just + # going to be failed anyway before being sent, so skip the send for now + log.debug("Skipping fetch for partition %s because node %s is throttled", + partition, node_id) + + elif not self._client.ready(node_id): + # Until we support send request queues, any attempt to send to a not-ready node will be + # immediately failed with NodeNotReadyError. + log.debug("Skipping fetch for partition %s because connection to leader node is not ready yet") - fetchable[node_id][partition] = partition_info - log.debug("Adding fetch request for partition %s at offset %d", - partition, position.offset) + elif node_id in self._nodes_with_pending_fetch_requests: + log.debug("Skipping fetch for partition %s because there is a pending fetch request to node %s", + partition, node_id) + + else: + # Leader is connected and does not have a pending fetch request + if version < 5: + partition_info = ( + partition.partition, + position.offset, + self.config['max_partition_fetch_bytes'] + ) + elif version <= 8: + partition_info = ( + partition.partition, + position.offset, + -1, # log_start_offset is used internally by brokers / replicas only + self.config['max_partition_fetch_bytes'], + ) + else: + partition_info = ( + partition.partition, + position.leader_epoch, + position.offset, + -1, # log_start_offset is used internally by brokers / replicas only + self.config['max_partition_fetch_bytes'], + ) + + fetchable[node_id][partition] = partition_info + log.debug("Adding fetch request for partition %s at offset %d", + partition, position.offset) requests = {} for node_id, next_partitions in six.iteritems(fetchable): @@ -815,12 +760,12 @@ def _handle_fetch_response(self, node_id, fetch_offsets, send_time, response): partitions = set([TopicPartition(topic, partition_data[0]) for topic, partitions in response.topics for partition_data in partitions]) - metric_aggregator = FetchResponseMetricAggregator(self._sensors, partitions) + if self._sensors: + metric_aggregator = FetchResponseMetricAggregator(self._sensors, partitions) + else: + metric_aggregator = None - # randomized ordering should improve balance for short-lived consumers - random.shuffle(response.topics) for topic, partitions in response.topics: - random.shuffle(partitions) for partition_data in partitions: tp = TopicPartition(topic, partition_data[0]) fetch_offset = fetch_offsets[tp] @@ -832,7 +777,8 @@ def _handle_fetch_response(self, node_id, fetch_offsets, send_time, response): ) self._completed_fetches.append(completed_fetch) - self._sensors.fetch_latency.record((time.time() - send_time) * 1000) + if self._sensors: + self._sensors.fetch_latency.record((time.time() - send_time) * 1000) def _handle_fetch_error(self, node_id, exception): level = logging.INFO if isinstance(exception, Errors.Cancelled) else logging.ERROR @@ -840,15 +786,18 @@ def _handle_fetch_error(self, node_id, exception): if node_id in self._session_handlers: self._session_handlers[node_id].handle_error(exception) + def _clear_pending_fetch_request(self, node_id, _): + try: + self._nodes_with_pending_fetch_requests.remove(node_id) + except KeyError: + pass + def _parse_fetched_data(self, completed_fetch): tp = completed_fetch.topic_partition fetch_offset = completed_fetch.fetched_offset - num_bytes = 0 - records_count = 0 - parsed_records = None - error_code, highwater = completed_fetch.partition_data[:2] error_type = Errors.for_code(error_code) + parsed_records = None try: if not self._subscriptions.is_fetchable(tp): @@ -858,8 +807,6 @@ def _parse_fetched_data(self, completed_fetch): " since it is no longer fetchable", tp) elif error_type is Errors.NoError: - self._subscriptions.assignment[tp].highwater = highwater - # we are interested in this fetch only if the beginning # offset (of the *request*) matches the current consumed position # Note that the *response* may return a messageset that starts @@ -873,32 +820,42 @@ def _parse_fetched_data(self, completed_fetch): return None records = MemoryRecords(completed_fetch.partition_data[-1]) - if records.has_next(): - log.debug("Adding fetched record for partition %s with" - " offset %d to buffered record list", tp, - position.offset) - unpacked = list(self._unpack_records(tp, records)) - parsed_records = self.PartitionRecords(fetch_offset, tp, unpacked) - if unpacked: - last_offset = unpacked[-1].offset - self._sensors.records_fetch_lag.record(highwater - last_offset) - num_bytes = records.valid_bytes() - records_count = len(unpacked) - elif records.size_in_bytes() > 0: - # we did not read a single message from a non-empty - # buffer because that message's size is larger than - # fetch size, in this case record this exception - record_too_large_partitions = {tp: fetch_offset} - raise RecordTooLargeError( - "There are some messages at [Partition=Offset]: %s " - " whose size is larger than the fetch size %s" - " and hence cannot be ever returned." - " Increase the fetch size, or decrease the maximum message" - " size the broker will allow." % ( - record_too_large_partitions, - self.config['max_partition_fetch_bytes']), - record_too_large_partitions) - self._sensors.record_topic_fetch_metrics(tp.topic, num_bytes, records_count) + aborted_transactions = None + if completed_fetch.response_version >= 11: + aborted_transactions = completed_fetch.partition_data[-3] + elif completed_fetch.response_version >= 4: + aborted_transactions = completed_fetch.partition_data[-2] + log.debug("Preparing to read %s bytes of data for partition %s with offset %d", + records.size_in_bytes(), tp, fetch_offset) + parsed_records = self.PartitionRecords(fetch_offset, tp, records, + key_deserializer=self.config['key_deserializer'], + value_deserializer=self.config['value_deserializer'], + check_crcs=self.config['check_crcs'], + isolation_level=self._isolation_level, + aborted_transactions=aborted_transactions, + metric_aggregator=completed_fetch.metric_aggregator, + on_drain=self._on_partition_records_drain) + if not records.has_next() and records.size_in_bytes() > 0: + if completed_fetch.response_version < 3: + # Implement the pre KIP-74 behavior of throwing a RecordTooLargeException. + record_too_large_partitions = {tp: fetch_offset} + raise RecordTooLargeError( + "There are some messages at [Partition=Offset]: %s " + " whose size is larger than the fetch size %s" + " and hence cannot be ever returned. Please condier upgrading your broker to 0.10.1.0 or" + " newer to avoid this issue. Alternatively, increase the fetch size on the client (using" + " max_partition_fetch_bytes)" % ( + record_too_large_partitions, + self.config['max_partition_fetch_bytes']), + record_too_large_partitions) + else: + # This should not happen with brokers that support FetchRequest/Response V3 or higher (i.e. KIP-74) + raise Errors.KafkaError("Failed to make progress reading messages at %s=%s." + " Received a non-empty fetch response from the server, but no" + " complete records were found." % (tp, fetch_offset)) + + if highwater >= 0: + self._subscriptions.assignment[tp].highwater = highwater elif error_type in (Errors.NotLeaderForPartitionError, Errors.ReplicaNotAvailableError, @@ -914,67 +871,220 @@ def _parse_fetched_data(self, completed_fetch): " current offset %d", tp, fetch_offset, position.offset) elif self._subscriptions.has_default_offset_reset_policy(): log.info("Fetch offset %s is out of range for topic-partition %s", fetch_offset, tp) - self._subscriptions.need_offset_reset(tp) + self._subscriptions.request_offset_reset(tp) else: raise Errors.OffsetOutOfRangeError({tp: fetch_offset}) elif error_type is Errors.TopicAuthorizationFailedError: log.warning("Not authorized to read from topic %s.", tp.topic) - raise Errors.TopicAuthorizationFailedError(set(tp.topic)) - elif error_type.is_retriable: + raise Errors.TopicAuthorizationFailedError(set([tp.topic])) + elif getattr(error_type, 'retriable', False): log.debug("Retriable error fetching partition %s: %s", tp, error_type()) - if error_type.invalid_metadata: + if getattr(error_type, 'invalid_metadata', False): self._client.cluster.request_update() else: raise error_type('Unexpected error while fetching data') finally: - completed_fetch.metric_aggregator.record(tp, num_bytes, records_count) + if parsed_records is None and completed_fetch.metric_aggregator: + completed_fetch.metric_aggregator.record(tp, 0, 0) + + if error_type is not Errors.NoError: + # we move the partition to the end if there was an error. This way, it's more likely that partitions for + # the same topic can remain together (allowing for more efficient serialization). + self._subscriptions.move_partition_to_end(tp) return parsed_records + def _on_partition_records_drain(self, partition_records): + # we move the partition to the end if we received some bytes. This way, it's more likely that partitions + # for the same topic can remain together (allowing for more efficient serialization). + if partition_records.bytes_read > 0: + self._subscriptions.move_partition_to_end(partition_records.topic_partition) + + def close(self): + if self._next_partition_records is not None: + self._next_partition_records.drain() + self._next_in_line_exception_metadata = None + class PartitionRecords(object): - def __init__(self, fetch_offset, tp, messages): + def __init__(self, fetch_offset, tp, records, + key_deserializer=None, value_deserializer=None, + check_crcs=True, isolation_level=READ_UNCOMMITTED, + aborted_transactions=None, # raw data from response / list of (producer_id, first_offset) tuples + metric_aggregator=None, on_drain=lambda x: None): self.fetch_offset = fetch_offset self.topic_partition = tp - self.messages = messages + self.leader_epoch = -1 + self.next_fetch_offset = fetch_offset + self.bytes_read = 0 + self.records_read = 0 + self.isolation_level = isolation_level + self.aborted_producer_ids = set() + self.aborted_transactions = collections.deque( + sorted([AbortedTransaction(*data) for data in aborted_transactions] if aborted_transactions else [], + key=lambda txn: txn.first_offset) + ) + self.metric_aggregator = metric_aggregator + self.check_crcs = check_crcs + self.record_iterator = itertools.dropwhile( + self._maybe_skip_record, + self._unpack_records(tp, records, key_deserializer, value_deserializer)) + self.on_drain = on_drain + self._next_inline_exception = None + + def _maybe_skip_record(self, record): # When fetching an offset that is in the middle of a # compressed batch, we will get all messages in the batch. # But we want to start 'take' at the fetch_offset # (or the next highest offset in case the message was compacted) - for i, msg in enumerate(messages): - if msg.offset < fetch_offset: - log.debug("Skipping message offset: %s (expecting %s)", - msg.offset, fetch_offset) - else: - self.message_idx = i - break - + if record.offset < self.fetch_offset: + log.debug("Skipping message offset: %s (expecting %s)", + record.offset, self.fetch_offset) + return True else: - self.message_idx = 0 - self.messages = None + return False + + # For truthiness evaluation + def __bool__(self): + return self.record_iterator is not None - # For truthiness evaluation we need to define __len__ or __nonzero__ - def __len__(self): - if self.messages is None or self.message_idx >= len(self.messages): - return 0 - return len(self.messages) - self.message_idx + # py2 + __nonzero__ = __bool__ - def discard(self): - self.messages = None + def drain(self): + if self.record_iterator is not None: + self.record_iterator = None + self._next_inline_exception = None + if self.metric_aggregator: + self.metric_aggregator.record(self.topic_partition, self.bytes_read, self.records_read) + self.on_drain(self) + + def _maybe_raise_next_inline_exception(self): + if self._next_inline_exception: + exc, self._next_inline_exception = self._next_inline_exception, None + raise exc def take(self, n=None): - if not len(self): - return [] - if n is None or n > len(self): - n = len(self) - next_idx = self.message_idx + n - res = self.messages[self.message_idx:next_idx] - self.message_idx = next_idx - # fetch_offset should be incremented by 1 to parallel the - # subscription position (also incremented by 1) - self.fetch_offset = max(self.fetch_offset, res[-1].offset + 1) - return res + self._maybe_raise_next_inline_exception() + records = [] + try: + # Note that records.extend(iter) will extend partially when exception raised mid-stream + records.extend(itertools.islice(self.record_iterator, 0, n)) + except Exception as e: + if not records: + raise e + # To be thrown in the next call of this method + self._next_inline_exception = e + return records + + def _unpack_records(self, tp, records, key_deserializer, value_deserializer): + try: + batch = records.next_batch() + last_batch = None + while batch is not None: + last_batch = batch + + if self.check_crcs and not batch.validate_crc(): + raise Errors.CorruptRecordError( + "Record batch for partition %s at offset %s failed crc check" % ( + self.topic_partition, batch.base_offset)) + + + # Try DefaultsRecordBatch / message log format v2 + # base_offset, last_offset_delta, aborted transactions, and control batches + if batch.magic == 2: + self.leader_epoch = batch.leader_epoch + if self.isolation_level == READ_COMMITTED and batch.has_producer_id(): + # remove from the aborted transaction queue all aborted transactions which have begun + # before the current batch's last offset and add the associated producerIds to the + # aborted producer set + self._consume_aborted_transactions_up_to(batch.last_offset) + + producer_id = batch.producer_id + if self._contains_abort_marker(batch): + try: + self.aborted_producer_ids.remove(producer_id) + except KeyError: + pass + elif self._is_batch_aborted(batch): + log.debug("Skipping aborted record batch from partition %s with producer_id %s and" + " offsets %s to %s", + self.topic_partition, producer_id, batch.base_offset, batch.last_offset) + self.next_fetch_offset = batch.next_offset + batch = records.next_batch() + continue + + # Control batches have a single record indicating whether a transaction + # was aborted or committed. These are not returned to the consumer. + if batch.is_control_batch: + self.next_fetch_offset = batch.next_offset + batch = records.next_batch() + continue + + for record in batch: + if self.check_crcs and not record.validate_crc(): + raise Errors.CorruptRecordError( + "Record for partition %s at offset %s failed crc check" % ( + self.topic_partition, record.offset)) + key_size = len(record.key) if record.key is not None else -1 + value_size = len(record.value) if record.value is not None else -1 + key = self._deserialize(key_deserializer, tp.topic, record.key) + value = self._deserialize(value_deserializer, tp.topic, record.value) + headers = record.headers + header_size = sum( + len(h_key.encode("utf-8")) + (len(h_val) if h_val is not None else 0) for h_key, h_val in + headers) if headers else -1 + self.records_read += 1 + self.bytes_read += record.size_in_bytes + self.next_fetch_offset = record.offset + 1 + yield ConsumerRecord( + tp.topic, tp.partition, self.leader_epoch, record.offset, record.timestamp, + record.timestamp_type, key, value, headers, record.checksum, + key_size, value_size, header_size) + + batch = records.next_batch() + else: + # Message format v2 preserves the last offset in a batch even if the last record is removed + # through compaction. By using the next offset computed from the last offset in the batch, + # we ensure that the offset of the next fetch will point to the next batch, which avoids + # unnecessary re-fetching of the same batch (in the worst case, the consumer could get stuck + # fetching the same batch repeatedly). + if last_batch and last_batch.magic == 2: + self.next_fetch_offset = last_batch.next_offset + self.drain() + + # If unpacking raises StopIteration, it is erroneously + # caught by the generator. We want all exceptions to be raised + # back to the user. See Issue 545 + except StopIteration: + log.exception('StopIteration raised unpacking messageset') + raise RuntimeError('StopIteration raised unpacking messageset') + + def _deserialize(self, f, topic, bytes_): + if not f: + return bytes_ + if isinstance(f, Deserializer): + return f.deserialize(topic, bytes_) + return f(bytes_) + + def _consume_aborted_transactions_up_to(self, offset): + if not self.aborted_transactions: + return + + while self.aborted_transactions and self.aborted_transactions[0].first_offset <= offset: + self.aborted_producer_ids.add(self.aborted_transactions.popleft().producer_id) + + def _is_batch_aborted(self, batch): + return batch.is_transactional and batch.producer_id in self.aborted_producer_ids + + def _contains_abort_marker(self, batch): + if not batch.is_control_batch: + return False + record = next(batch) + if not record: + return False + return record.abort class FetchSessionHandler(object): @@ -997,11 +1107,18 @@ def __init__(self, node_id): self.session_partitions = {} def build_next(self, next_partitions): + """ + Arguments: + next_partitions (dict): TopicPartition -> TopicPartitionState + + Returns: + FetchRequestData + """ if self.next_metadata.is_full: log.debug("Built full fetch %s for node %s with %s partition(s).", self.next_metadata, self.node_id, len(next_partitions)) self.session_partitions = next_partitions - return FetchRequestData(next_partitions, None, self.next_metadata); + return FetchRequestData(next_partitions, None, self.next_metadata) prev_tps = set(self.session_partitions.keys()) next_tps = set(next_partitions.keys()) @@ -1019,8 +1136,8 @@ def build_next(self, next_partitions): altered.add(tp) log.debug("Built incremental fetch %s for node %s. Added %s, altered %s, removed %s out of %s", - self.next_metadata, self.node_id, added, altered, removed, self.session_partitions.keys()) - to_send = {tp: next_partitions[tp] for tp in (added | altered)} + self.next_metadata, self.node_id, added, altered, removed, self.session_partitions.keys()) + to_send = collections.OrderedDict({tp: next_partitions[tp] for tp in next_partitions if tp in (added | altered)}) return FetchRequestData(to_send, removed, self.next_metadata) def handle_response(self, response): @@ -1160,18 +1277,11 @@ def epoch(self): @property def to_send(self): # Return as list of [(topic, [(partition, ...), ...]), ...] - # so it an be passed directly to encoder + # so it can be passed directly to encoder partition_data = collections.defaultdict(list) for tp, partition_info in six.iteritems(self._to_send): partition_data[tp.topic].append(partition_info) - # As of version == 3 partitions will be returned in order as - # they are requested, so to avoid starvation with - # `fetch_max_bytes` option we need this shuffle - # NOTE: we do have partition_data in random order due to usage - # of unordered structures like dicts, but that does not - # guarantee equal distribution, and starting in Python3.6 - # dicts retain insert order. - return random.sample(list(partition_data.items()), k=len(partition_data)) + return list(partition_data.items()) @property def to_forget(self): @@ -1183,6 +1293,14 @@ def to_forget(self): return list(partition_data.items()) +class FetchMetrics(object): + __slots__ = ('total_bytes', 'total_records') + + def __init__(self): + self.total_bytes = 0 + self.total_records = 0 + + class FetchResponseMetricAggregator(object): """ Since we parse the message data for each partition from each fetch @@ -1193,8 +1311,8 @@ class FetchResponseMetricAggregator(object): def __init__(self, sensors, partitions): self.sensors = sensors self.unrecorded_partitions = partitions - self.total_bytes = 0 - self.total_records = 0 + self.fetch_metrics = FetchMetrics() + self.topic_fetch_metrics = collections.defaultdict(FetchMetrics) def record(self, partition, num_bytes, num_records): """ @@ -1203,13 +1321,17 @@ def record(self, partition, num_bytes, num_records): have reported, we write the metric. """ self.unrecorded_partitions.remove(partition) - self.total_bytes += num_bytes - self.total_records += num_records + self.fetch_metrics.total_bytes += num_bytes + self.fetch_metrics.total_records += num_records + self.topic_fetch_metrics[partition.topic].total_bytes += num_bytes + self.topic_fetch_metrics[partition.topic].total_records += num_records # once all expected partitions from the fetch have reported in, record the metrics if not self.unrecorded_partitions: - self.sensors.bytes_fetched.record(self.total_bytes) - self.sensors.records_fetched.record(self.total_records) + self.sensors.bytes_fetched.record(self.fetch_metrics.total_bytes) + self.sensors.records_fetched.record(self.fetch_metrics.total_records) + for topic, metrics in six.iteritems(self.topic_fetch_metrics): + self.sensors.record_topic_fetch_metrics(topic, metrics.total_bytes, metrics.total_records) class FetchManagerMetrics(object): diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 6f23bec8a..4eb9e2ab4 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -5,7 +5,7 @@ import socket import time -from kafka.errors import KafkaConfigurationError, UnsupportedVersionError +from kafka.errors import KafkaConfigurationError, KafkaTimeoutError, UnsupportedVersionError from kafka.vendor import six @@ -18,6 +18,7 @@ from kafka.metrics import MetricConfig, Metrics from kafka.protocol.list_offsets import OffsetResetStrategy from kafka.structs import OffsetAndMetadata, TopicPartition +from kafka.util import Timer from kafka.version import __version__ log = logging.getLogger(__name__) @@ -120,6 +121,9 @@ class KafkaConsumer(six.Iterator): consumed. This ensures no on-the-wire or on-disk corruption to the messages occurred. This check adds some overhead, so it may be disabled in cases seeking extreme performance. Default: True + isolation_level (str): Configure KIP-98 transactional consumer by + setting to 'read_committed'. This will cause the consumer to + skip records from aborted tranactions. Default: 'read_uncommitted' allow_auto_create_topics (bool): Enable/disable auto topic creation on metadata request. Only available with api_version >= (0, 11). Default: True @@ -224,6 +228,7 @@ class KafkaConsumer(six.Iterator): api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version set to None. + Default: 2000 connections_max_idle_ms: Close idle connections after the number of milliseconds specified by this config. The broker closes idle connections after connections.max.idle.ms, so this avoids hitting @@ -232,6 +237,7 @@ class KafkaConsumer(six.Iterator): metric_reporters (list): A list of classes to use as metrics reporters. Implementing the AbstractMetricsReporter interface allows plugging in classes that will be notified of new metric creation. Default: [] + metrics_enabled (bool): Whether to track metrics on this instance. Default True. metrics_num_samples (int): The number of samples maintained to compute metrics. Default: 2 metrics_sample_window_ms (int): The maximum age in milliseconds of @@ -250,12 +256,16 @@ class KafkaConsumer(six.Iterator): Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. + sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with + sasl mechanism handshake. If provided, sasl_kerberos_service_name and + sasl_kerberos_domain name are ignored. Default: None. sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI sasl mechanism handshake. Default: one of bootstrap servers - sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider - instance. (See kafka.oauth.abstract). Default: None + sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer + token provider instance. Default: None + socks5_proxy (str): Socks5 proxy URL. Default: None kafka_client (callable): Custom class / callable for creating KafkaClient instances Note: @@ -283,6 +293,7 @@ class KafkaConsumer(six.Iterator): 'auto_commit_interval_ms': 5000, 'default_offset_commit_callback': lambda offsets, response: True, 'check_crcs': True, + 'isolation_level': 'read_uncommitted', 'allow_auto_create_topics': True, 'metadata_max_age_ms': 5 * 60 * 1000, 'partition_assignment_strategy': (RangePartitionAssignor, RoundRobinPartitionAssignor), @@ -309,6 +320,7 @@ class KafkaConsumer(six.Iterator): 'api_version_auto_timeout_ms': 2000, 'connections_max_idle_ms': 9 * 60 * 1000, 'metric_reporters': [], + 'metrics_enabled': True, 'metrics_num_samples': 2, 'metrics_sample_window_ms': 30000, 'metric_group_prefix': 'consumer', @@ -317,10 +329,11 @@ class KafkaConsumer(six.Iterator): 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, + 'sasl_kerberos_name': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, 'sasl_oauth_token_provider': None, - 'legacy_iterator': False, # enable to revert to < 1.4.7 iterator + 'socks5_proxy': None, 'kafka_client': KafkaClient, } DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000 @@ -351,13 +364,15 @@ def __init__(self, *topics, **configs): "fetch_max_wait_ms ({})." .format(connections_max_idle_ms, request_timeout_ms, fetch_max_wait_ms)) - metrics_tags = {'client-id': self.config['client_id']} - metric_config = MetricConfig(samples=self.config['metrics_num_samples'], - time_window_ms=self.config['metrics_sample_window_ms'], - tags=metrics_tags) - reporters = [reporter() for reporter in self.config['metric_reporters']] - self._metrics = Metrics(metric_config, reporters) - # TODO _metrics likely needs to be passed to KafkaClient, etc. + if self.config['metrics_enabled']: + metrics_tags = {'client-id': self.config['client_id']} + metric_config = MetricConfig(samples=self.config['metrics_num_samples'], + time_window_ms=self.config['metrics_sample_window_ms'], + tags=metrics_tags) + reporters = [reporter() for reporter in self.config['metric_reporters']] + self._metrics = Metrics(metric_config, reporters) + else: + self._metrics = None # api_version was previously a str. Accept old format for now if isinstance(self.config['api_version'], str): @@ -395,9 +410,9 @@ def __init__(self, *topics, **configs): self._subscription = SubscriptionState(self.config['auto_offset_reset']) self._fetcher = Fetcher( - self._client, self._subscription, self._metrics, **self.config) + self._client, self._subscription, metrics=self._metrics, **self.config) self._coordinator = ConsumerCoordinator( - self._client, self._subscription, self._metrics, + self._client, self._subscription, metrics=self._metrics, assignors=self.config['partition_assignment_strategy'], **self.config) self._closed = False @@ -437,8 +452,15 @@ def assign(self, partitions): no rebalance operation triggered when group membership or cluster and topic metadata change. """ - self._subscription.assign_from_user(partitions) - self._client.set_topics([tp.topic for tp in partitions]) + if not partitions: + self.unsubscribe() + else: + # make sure the offsets of topic partitions the consumer is unsubscribing from + # are committed since there will be no following rebalance + self._coordinator.maybe_auto_commit_offsets_now() + self._subscription.assign_from_user(partitions) + self._client.set_topics([tp.topic for tp in partitions]) + log.debug("Subscribed to partition(s): %s", partitions) def assignment(self): """Get the TopicPartitions currently assigned to this consumer. @@ -456,20 +478,23 @@ def assignment(self): """ return self._subscription.assigned_partitions() - def close(self, autocommit=True): + def close(self, autocommit=True, timeout_ms=None): """Close the consumer, waiting indefinitely for any needed cleanup. Keyword Arguments: autocommit (bool): If auto-commit is configured for this consumer, this optional flag causes the consumer to attempt to commit any pending consumed offsets prior to close. Default: True + timeout_ms (num, optional): Milliseconds to wait for auto-commit. + Default: None """ if self._closed: return log.debug("Closing the KafkaConsumer.") self._closed = True - self._coordinator.close(autocommit=autocommit) - self._metrics.close() + self._coordinator.close(autocommit=autocommit, timeout_ms=timeout_ms) + if self._metrics: + self._metrics.close() self._client.close() try: self.config['key_deserializer'].close() @@ -515,7 +540,7 @@ def commit_async(self, offsets=None, callback=None): offsets, callback=callback) return future - def commit(self, offsets=None): + def commit(self, offsets=None, timeout_ms=None): """Commit offsets to kafka, blocking until success or error. This commits offsets only to Kafka. The offsets committed using this API @@ -539,17 +564,16 @@ def commit(self, offsets=None): assert self.config['group_id'] is not None, 'Requires group_id' if offsets is None: offsets = self._subscription.all_consumed_offsets() - self._coordinator.commit_offsets_sync(offsets) + self._coordinator.commit_offsets_sync(offsets, timeout_ms=timeout_ms) - def committed(self, partition, metadata=False): + def committed(self, partition, metadata=False, timeout_ms=None): """Get the last committed offset for the given partition. This offset will be used as the position for the consumer in the event of a failure. - This call may block to do a remote call if the partition in question - isn't assigned to this consumer or if the consumer hasn't yet - initialized its cache of committed offsets. + This call will block to do a remote call to get the latest committed + offsets from the server. Arguments: partition (TopicPartition): The partition to check. @@ -558,28 +582,19 @@ def committed(self, partition, metadata=False): Returns: The last committed offset (int or OffsetAndMetadata), or None if there was no prior commit. + + Raises: + KafkaTimeoutError if timeout_ms provided + BrokerResponseErrors if OffsetFetchRequest raises an error. """ assert self.config['api_version'] >= (0, 8, 1), 'Requires >= Kafka 0.8.1' assert self.config['group_id'] is not None, 'Requires group_id' if not isinstance(partition, TopicPartition): raise TypeError('partition must be a TopicPartition namedtuple') - if self._subscription.is_assigned(partition): - committed = self._subscription.assignment[partition].committed - if committed is None: - self._coordinator.refresh_committed_offsets_if_needed() - committed = self._subscription.assignment[partition].committed - else: - commit_map = self._coordinator.fetch_committed_offsets([partition]) - if partition in commit_map: - committed = commit_map[partition] - else: - committed = None - - if committed is not None: - if metadata: - return committed - else: - return committed.offset + committed = self._coordinator.fetch_committed_offsets([partition], timeout_ms=timeout_ms) + if partition not in committed: + return None + return committed[partition] if metadata else committed[partition].offset def _fetch_all_topic_metadata(self): """A blocking call that fetches topic metadata for all topics in the @@ -624,7 +639,7 @@ def partitions_for_topic(self, topic): if partitions is None: self._fetch_all_topic_metadata() partitions = cluster.partitions_for_topic(topic) - return partitions + return partitions or set() def poll(self, timeout_ms=0, max_records=None, update_offsets=True): """Fetch data from assigned topics / partitions. @@ -664,68 +679,66 @@ def poll(self, timeout_ms=0, max_records=None, update_offsets=True): assert not self._closed, 'KafkaConsumer is closed' # Poll for new data until the timeout expires - start = time.time() - remaining = timeout_ms + timer = Timer(timeout_ms) while not self._closed: - records = self._poll_once(remaining, max_records, update_offsets=update_offsets) + records = self._poll_once(timer, max_records, update_offsets=update_offsets) if records: return records - - elapsed_ms = (time.time() - start) * 1000 - remaining = timeout_ms - elapsed_ms - - if remaining <= 0: + elif timer.expired: break - return {} - def _poll_once(self, timeout_ms, max_records, update_offsets=True): + def _poll_once(self, timer, max_records, update_offsets=True): """Do one round of polling. In addition to checking for new data, this does any needed heart-beating, auto-commits, and offset updates. Arguments: - timeout_ms (int): The maximum time in milliseconds to block. + timer (Timer): The maximum time in milliseconds to block. Returns: dict: Map of topic to list of records (may be empty). """ - self._coordinator.poll() + if not self._coordinator.poll(timeout_ms=timer.timeout_ms): + log.debug('poll: timeout during coordinator.poll(); returning early') + return {} - # Fetch positions if we have partitions we're subscribed to that we - # don't know the offset for - if not self._subscription.has_all_fetch_positions(): - self._update_fetch_positions(self._subscription.missing_fetch_positions()) + has_all_fetch_positions = self._update_fetch_positions(timeout_ms=timer.timeout_ms) # If data is available already, e.g. from a previous network client # poll() call to commit, then just return it immediately records, partial = self._fetcher.fetched_records(max_records, update_offsets=update_offsets) + log.debug('poll: fetched records: %s, %s', records, partial) + # Before returning the fetched records, we can send off the + # next round of fetches and avoid block waiting for their + # responses to enable pipelining while the user is handling the + # fetched records. + if not partial: + log.debug("poll: Sending fetches") + futures = self._fetcher.send_fetches() + if len(futures): + self._client.poll(timeout_ms=0) + if records: - # Before returning the fetched records, we can send off the - # next round of fetches and avoid block waiting for their - # responses to enable pipelining while the user is handling the - # fetched records. - if not partial: - futures = self._fetcher.send_fetches() - if len(futures): - self._client.poll(timeout_ms=0) return records - # Send any new fetches (won't resend pending fetches) - futures = self._fetcher.send_fetches() - if len(futures): - self._client.poll(timeout_ms=0) + # We do not want to be stuck blocking in poll if we are missing some positions + # since the offset lookup may be backing off after a failure + poll_timeout_ms = min(timer.timeout_ms, self._coordinator.time_to_next_poll() * 1000) + if not has_all_fetch_positions: + log.debug('poll: do not have all fetch positions...') + poll_timeout_ms = min(poll_timeout_ms, self.config['retry_backoff_ms']) - timeout_ms = min(timeout_ms, self._coordinator.time_to_next_poll() * 1000) - self._client.poll(timeout_ms=timeout_ms) + self._client.poll(timeout_ms=poll_timeout_ms) # after the long poll, we should check whether the group needs to rebalance # prior to returning data so that the group can stabilize faster if self._coordinator.need_rejoin(): + log.debug('poll: coordinator needs rejoin; returning early') return {} records, _ = self._fetcher.fetched_records(max_records, update_offsets=update_offsets) return records - def position(self, partition): + def position(self, partition, timeout_ms=None): """Get the offset of the next record that will be fetched Arguments: @@ -737,11 +750,17 @@ def position(self, partition): if not isinstance(partition, TopicPartition): raise TypeError('partition must be a TopicPartition namedtuple') assert self._subscription.is_assigned(partition), 'Partition is not assigned' + + timer = Timer(timeout_ms) position = self._subscription.assignment[partition].position - if position is None: - self._update_fetch_positions([partition]) - position = self._subscription.assignment[partition].position - return position.offset if position else None + while position is None: + # batch update fetch positions for any partitions without a valid position + if self._update_fetch_positions(timeout_ms=timer.timeout_ms): + position = self._subscription.assignment[partition].position + elif timer.expired: + return None + else: + return position.offset def highwater(self, partition): """Last known highwater offset for a partition. @@ -835,8 +854,7 @@ def seek(self, partition, offset): assert partition in self._subscription.assigned_partitions(), 'Unassigned partition' log.debug("Seeking to offset %s for partition %s", offset, partition) self._subscription.assignment[partition].seek(offset) - if not self.config['legacy_iterator']: - self._iterator = None + self._iterator = None def seek_to_beginning(self, *partitions): """Seek to the oldest available offset for partitions. @@ -860,9 +878,8 @@ def seek_to_beginning(self, *partitions): for tp in partitions: log.debug("Seeking to beginning of partition %s", tp) - self._subscription.need_offset_reset(tp, OffsetResetStrategy.EARLIEST) - if not self.config['legacy_iterator']: - self._iterator = None + self._subscription.request_offset_reset(tp, OffsetResetStrategy.EARLIEST) + self._iterator = None def seek_to_end(self, *partitions): """Seek to the most recent available offset for partitions. @@ -886,9 +903,8 @@ def seek_to_end(self, *partitions): for tp in partitions: log.debug("Seeking to end of partition %s", tp) - self._subscription.need_offset_reset(tp, OffsetResetStrategy.LATEST) - if not self.config['legacy_iterator']: - self._iterator = None + self._subscription.request_offset_reset(tp, OffsetResetStrategy.LATEST) + self._iterator = None def subscribe(self, topics=(), pattern=None, listener=None): """Subscribe to a list of topics, or a topic regex pattern. @@ -959,13 +975,16 @@ def subscription(self): def unsubscribe(self): """Unsubscribe from all topics and clear all assigned partitions.""" + # make sure the offsets of topic partitions the consumer is unsubscribing from + # are committed since there will be no following rebalance + self._coordinator.maybe_auto_commit_offsets_now() self._subscription.unsubscribe() - self._coordinator.close() + if self.config['api_version'] >= (0, 9): + self._coordinator.maybe_leave_group() self._client.cluster.need_all_topic_metadata = False self._client.set_topics([]) log.debug("Unsubscribed all topics or patterns and assigned partitions") - if not self.config['legacy_iterator']: - self._iterator = None + self._iterator = None def metrics(self, raw=False): """Get metrics on consumer performance. @@ -977,6 +996,8 @@ def metrics(self, raw=False): This is an unstable interface. It may change in future releases without warning. """ + if not self._metrics: + return if raw: return self._metrics.metrics.copy() @@ -1032,7 +1053,7 @@ def offsets_for_times(self, timestamps): raise ValueError( "The target time for partition {} is {}. The target time " "cannot be negative.".format(tp, ts)) - return self._fetcher.get_offsets_by_times( + return self._fetcher.offsets_by_times( timestamps, self.config['request_timeout_ms']) def beginning_offsets(self, partitions): @@ -1098,7 +1119,7 @@ def _use_consumer_group(self): return False return True - def _update_fetch_positions(self, partitions): + def _update_fetch_positions(self, timeout_ms=None): """Set the fetch position to the committed position (if there is one) or reset it using the offset reset policy the user has configured. @@ -1106,30 +1127,36 @@ def _update_fetch_positions(self, partitions): partitions (List[TopicPartition]): The partitions that need updating fetch positions. + Returns True if fetch positions updated, False if timeout or async reset is pending + Raises: NoOffsetForPartitionError: If no offset is stored for a given partition and no offset reset policy is defined. """ - # Lookup any positions for partitions which are awaiting reset (which may be the - # case if the user called :meth:`seek_to_beginning` or :meth:`seek_to_end`. We do - # this check first to avoid an unnecessary lookup of committed offsets (which - # typically occurs when the user is manually assigning partitions and managing - # their own offsets). - self._fetcher.reset_offsets_if_needed(partitions) - - if not self._subscription.has_all_fetch_positions(): - # if we still don't have offsets for all partitions, then we should either seek - # to the last committed position or reset using the auto reset policy - if (self.config['api_version'] >= (0, 8, 1) and - self.config['group_id'] is not None): - # first refresh commits for all assigned partitions - self._coordinator.refresh_committed_offsets_if_needed() - - # Then, do any offset lookups in case some positions are not known - self._fetcher.update_fetch_positions(partitions) + if self._subscription.has_all_fetch_positions(): + return True + + if (self.config['api_version'] >= (0, 8, 1) and + self.config['group_id'] is not None): + # If there are any partitions which do not have a valid position and are not + # awaiting reset, then we need to fetch committed offsets. We will only do a + # coordinator lookup if there are partitions which have missing positions, so + # a consumer with manually assigned partitions can avoid a coordinator dependence + # by always ensuring that assigned partitions have an initial position. + if not self._coordinator.refresh_committed_offsets_if_needed(timeout_ms=timeout_ms): + return False + + # If there are partitions still needing a position and a reset policy is defined, + # request reset using the default policy. If no reset strategy is defined and there + # are partitions with a missing position, then we will raise an exception. + self._subscription.reset_missing_positions() + + # Finally send an asynchronous request to lookup and update the positions of any + # partitions which are awaiting reset. + return not self._fetcher.reset_offsets_if_needed() def _message_generator_v2(self): - timeout_ms = 1000 * (self._consumer_timeout - time.time()) + timeout_ms = 1000 * max(0, self._consumer_timeout - time.time()) record_map = self.poll(timeout_ms=timeout_ms, update_offsets=False) for tp, records in six.iteritems(record_map): # Generators are stateful, and it is possible that the tp / records @@ -1147,69 +1174,12 @@ def _message_generator_v2(self): self._subscription.assignment[tp].position = OffsetAndMetadata(record.offset + 1, '', -1) yield record - def _message_generator(self): - assert self.assignment() or self.subscription() is not None, 'No topic subscription or manual partition assignment' - while time.time() < self._consumer_timeout: - - self._coordinator.poll() - - # Fetch offsets for any subscribed partitions that we arent tracking yet - if not self._subscription.has_all_fetch_positions(): - partitions = self._subscription.missing_fetch_positions() - self._update_fetch_positions(partitions) - - poll_ms = min((1000 * (self._consumer_timeout - time.time())), self.config['retry_backoff_ms']) - self._client.poll(timeout_ms=poll_ms) - - # after the long poll, we should check whether the group needs to rebalance - # prior to returning data so that the group can stabilize faster - if self._coordinator.need_rejoin(): - continue - - # We need to make sure we at least keep up with scheduled tasks, - # like heartbeats, auto-commits, and metadata refreshes - timeout_at = self._next_timeout() - - # Short-circuit the fetch iterator if we are already timed out - # to avoid any unintentional interaction with fetcher setup - if time.time() > timeout_at: - continue - - for msg in self._fetcher: - yield msg - if time.time() > timeout_at: - log.debug("internal iterator timeout - breaking for poll") - break - self._client.poll(timeout_ms=0) - - # An else block on a for loop only executes if there was no break - # so this should only be called on a StopIteration from the fetcher - # We assume that it is safe to init_fetches when fetcher is done - # i.e., there are no more records stored internally - else: - self._fetcher.send_fetches() - - def _next_timeout(self): - timeout = min(self._consumer_timeout, - self._client.cluster.ttl() / 1000.0 + time.time(), - self._coordinator.time_to_next_poll() + time.time()) - return timeout - def __iter__(self): # pylint: disable=non-iterator-returned return self def __next__(self): if self._closed: raise StopIteration('KafkaConsumer closed') - # Now that the heartbeat thread runs in the background - # there should be no reason to maintain a separate iterator - # but we'll keep it available for a few releases just in case - if self.config['legacy_iterator']: - return self.next_v1() - else: - return self.next_v2() - - def next_v2(self): self._set_consumer_timeout() while time.time() < self._consumer_timeout: if not self._iterator: @@ -1220,17 +1190,6 @@ def next_v2(self): self._iterator = None raise StopIteration() - def next_v1(self): - if not self._iterator: - self._iterator = self._message_generator() - - self._set_consumer_timeout() - try: - return next(self._iterator) - except StopIteration: - self._iterator = None - raise - def _set_consumer_timeout(self): # consumer_timeout_ms can be used to stop iteration early if self.config['consumer_timeout_ms'] >= 0: diff --git a/kafka/consumer/subscription_state.py b/kafka/consumer/subscription_state.py index b30922b3e..f99f01615 100644 --- a/kafka/consumer/subscription_state.py +++ b/kafka/consumer/subscription_state.py @@ -1,18 +1,40 @@ from __future__ import absolute_import import abc +from collections import OrderedDict +try: + from collections.abc import Sequence +except ImportError: + from collections import Sequence +try: + # enum in stdlib as of py3.4 + from enum import IntEnum # pylint: disable=import-error +except ImportError: + # vendored backport module + from kafka.vendor.enum34 import IntEnum import logging +import random import re +import threading +import time from kafka.vendor import six -from kafka.errors import IllegalStateError +import kafka.errors as Errors from kafka.protocol.list_offsets import OffsetResetStrategy from kafka.structs import OffsetAndMetadata +from kafka.util import ensure_valid_topic_name, synchronized log = logging.getLogger(__name__) +class SubscriptionType(IntEnum): + NONE = 0 + AUTO_TOPICS = 1 + AUTO_PATTERN = 2 + USER_ASSIGNED = 3 + + class SubscriptionState(object): """ A class for tracking the topics, partitions, and offsets for the consumer. @@ -32,10 +54,6 @@ class SubscriptionState(object): Note that pause state as well as fetch/consumed positions are not preserved when partition assignment is changed whether directly by the user or through a group rebalance. - - This class also maintains a cache of the latest commit position for each of - the assigned partitions. This is updated through committed() and can be used - to set the initial fetch position (e.g. Fetcher._reset_offset() ). """ _SUBSCRIPTION_EXCEPTION_MESSAGE = ( "You must choose only one way to configure your consumer:" @@ -43,10 +61,6 @@ class SubscriptionState(object): " (2) subscribe to topics matching a regex pattern," " (3) assign itself specific topic-partitions.") - # Taken from: https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java#L29 - _MAX_NAME_LENGTH = 249 - _TOPIC_LEGAL_CHARS = re.compile('^[a-zA-Z0-9._-]+$') - def __init__(self, offset_reset_strategy='earliest'): """Initialize a SubscriptionState instance @@ -64,15 +78,24 @@ def __init__(self, offset_reset_strategy='earliest'): self._default_offset_reset_strategy = offset_reset_strategy self.subscription = None # set() or None + self.subscription_type = SubscriptionType.NONE self.subscribed_pattern = None # regex str or None self._group_subscription = set() self._user_assignment = set() - self.assignment = dict() - self.listener = None - - # initialize to true for the consumers to fetch offset upon starting up - self.needs_fetch_committed_offsets = True - + self.assignment = OrderedDict() + self.rebalance_listener = None + self.listeners = [] + self._lock = threading.RLock() + + def _set_subscription_type(self, subscription_type): + if not isinstance(subscription_type, SubscriptionType): + raise ValueError('SubscriptionType enum required') + if self.subscription_type == SubscriptionType.NONE: + self.subscription_type = subscription_type + elif self.subscription_type != subscription_type: + raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) + + @synchronized def subscribe(self, topics=(), pattern=None, listener=None): """Subscribe to a list of topics, or a topic regex pattern. @@ -108,39 +131,26 @@ def subscribe(self, topics=(), pattern=None, listener=None): guaranteed, however, that the partitions revoked/assigned through this interface are from topics subscribed in this call. """ - if self._user_assignment or (topics and pattern): - raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) assert topics or pattern, 'Must provide topics or pattern' + if (topics and pattern): + raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) - if pattern: + elif pattern: + self._set_subscription_type(SubscriptionType.AUTO_PATTERN) log.info('Subscribing to pattern: /%s/', pattern) self.subscription = set() self.subscribed_pattern = re.compile(pattern) else: + if isinstance(topics, str) or not isinstance(topics, Sequence): + raise TypeError('Topics must be a list (or non-str sequence)') + self._set_subscription_type(SubscriptionType.AUTO_TOPICS) self.change_subscription(topics) if listener and not isinstance(listener, ConsumerRebalanceListener): raise TypeError('listener must be a ConsumerRebalanceListener') - self.listener = listener - - def _ensure_valid_topic_name(self, topic): - """ Ensures that the topic name is valid according to the kafka source. """ - - # See Kafka Source: - # https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java - if topic is None: - raise TypeError('All topics must not be None') - if not isinstance(topic, six.string_types): - raise TypeError('All topics must be strings') - if len(topic) == 0: - raise ValueError('All topics must be non-empty strings') - if topic == '.' or topic == '..': - raise ValueError('Topic name cannot be "." or ".."') - if len(topic) > self._MAX_NAME_LENGTH: - raise ValueError('Topic name is illegal, it can\'t be longer than {0} characters, topic: "{1}"'.format(self._MAX_NAME_LENGTH, topic)) - if not self._TOPIC_LEGAL_CHARS.match(topic): - raise ValueError('Topic name "{0}" is illegal, it contains a character other than ASCII alphanumerics, ".", "_" and "-"'.format(topic)) + self.rebalance_listener = listener + @synchronized def change_subscription(self, topics): """Change the topic subscription. @@ -154,8 +164,8 @@ def change_subscription(self, topics): - a topic name is '.' or '..' or - a topic name does not consist of ASCII-characters/'-'/'_'/'.' """ - if self._user_assignment: - raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) + if not self.partitions_auto_assigned(): + raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) if isinstance(topics, six.string_types): topics = [topics] @@ -166,17 +176,13 @@ def change_subscription(self, topics): return for t in topics: - self._ensure_valid_topic_name(t) + ensure_valid_topic_name(t) log.info('Updating subscribed topics to: %s', topics) self.subscription = set(topics) self._group_subscription.update(topics) - # Remove any assigned partitions which are no longer subscribed to - for tp in set(self.assignment.keys()): - if tp.topic not in self.subscription: - del self.assignment[tp] - + @synchronized def group_subscribe(self, topics): """Add topics to the current group subscription. @@ -186,17 +192,19 @@ def group_subscribe(self, topics): Arguments: topics (list of str): topics to add to the group subscription """ - if self._user_assignment: - raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) + if not self.partitions_auto_assigned(): + raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) self._group_subscription.update(topics) + @synchronized def reset_group_subscription(self): """Reset the group's subscription to only contain topics subscribed by this consumer.""" - if self._user_assignment: - raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) + if not self.partitions_auto_assigned(): + raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) assert self.subscription is not None, 'Subscription required' self._group_subscription.intersection_update(self.subscription) + @synchronized def assign_from_user(self, partitions): """Manually assign a list of TopicPartitions to this consumer. @@ -215,21 +223,13 @@ def assign_from_user(self, partitions): Raises: IllegalStateError: if consumer has already called subscribe() """ - if self.subscription is not None: - raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) - + self._set_subscription_type(SubscriptionType.USER_ASSIGNED) if self._user_assignment != set(partitions): self._user_assignment = set(partitions) + self._set_assignment({partition: self.assignment.get(partition, TopicPartitionState()) + for partition in partitions}) - for partition in partitions: - if partition not in self.assignment: - self._add_assigned_partition(partition) - - for tp in set(self.assignment.keys()) - self._user_assignment: - del self.assignment[tp] - - self.needs_fetch_committed_offsets = True - + @synchronized def assign_from_subscribed(self, assignments): """Update the assignment to the specified partitions @@ -243,26 +243,39 @@ def assign_from_subscribed(self, assignments): consumer instance. """ if not self.partitions_auto_assigned(): - raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) + raise Errors.IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) for tp in assignments: if tp.topic not in self.subscription: raise ValueError("Assigned partition %s for non-subscribed topic." % (tp,)) - # after rebalancing, we always reinitialize the assignment state - self.assignment.clear() - for tp in assignments: - self._add_assigned_partition(tp) - self.needs_fetch_committed_offsets = True + # randomized ordering should improve balance for short-lived consumers + self._set_assignment({partition: TopicPartitionState() for partition in assignments}, randomize=True) log.info("Updated partition assignment: %s", assignments) + def _set_assignment(self, partition_states, randomize=False): + """Batch partition assignment by topic (self.assignment is OrderedDict)""" + self.assignment.clear() + topics = [tp.topic for tp in six.iterkeys(partition_states)] + if randomize: + random.shuffle(topics) + topic_partitions = OrderedDict({topic: [] for topic in topics}) + for tp in six.iterkeys(partition_states): + topic_partitions[tp.topic].append(tp) + for topic in six.iterkeys(topic_partitions): + for tp in topic_partitions[topic]: + self.assignment[tp] = partition_states[tp] + + @synchronized def unsubscribe(self): """Clear all topic subscriptions and partition assignments""" self.subscription = None self._user_assignment.clear() self.assignment.clear() self.subscribed_pattern = None + self.subscription_type = SubscriptionType.NONE + @synchronized def group_subscription(self): """Get the topic subscription for the group. @@ -278,6 +291,7 @@ def group_subscription(self): """ return self._group_subscription + @synchronized def seek(self, partition, offset): """Manually specify the fetch offset for a TopicPartition. @@ -289,31 +303,38 @@ def seek(self, partition, offset): Arguments: partition (TopicPartition): partition for seek operation - offset (int): message offset in partition + offset (int or OffsetAndMetadata): message offset in partition """ + if not isinstance(offset, (int, OffsetAndMetadata)): + raise TypeError("offset must be type in or OffsetAndMetadata") self.assignment[partition].seek(offset) + @synchronized def assigned_partitions(self): """Return set of TopicPartitions in current assignment.""" return set(self.assignment.keys()) + @synchronized def paused_partitions(self): """Return current set of paused TopicPartitions.""" return set(partition for partition in self.assignment if self.is_paused(partition)) + @synchronized def fetchable_partitions(self): - """Return set of TopicPartitions that should be Fetched.""" - fetchable = set() + """Return ordered list of TopicPartitions that should be Fetched.""" + fetchable = list() for partition, state in six.iteritems(self.assignment): if state.is_fetchable(): - fetchable.add(partition) + fetchable.append(partition) return fetchable + @synchronized def partitions_auto_assigned(self): """Return True unless user supplied partitions manually.""" - return self.subscription is not None + return self.subscription_type in (SubscriptionType.AUTO_TOPICS, SubscriptionType.AUTO_PATTERN) + @synchronized def all_consumed_offsets(self): """Returns consumed offsets as {TopicPartition: OffsetAndMetadata}""" all_consumed = {} @@ -322,7 +343,8 @@ def all_consumed_offsets(self): all_consumed[partition] = state.position return all_consumed - def need_offset_reset(self, partition, offset_reset_strategy=None): + @synchronized + def request_offset_reset(self, partition, offset_reset_strategy=None): """Mark partition for offset reset using specified or default strategy. Arguments: @@ -331,60 +353,109 @@ def need_offset_reset(self, partition, offset_reset_strategy=None): """ if offset_reset_strategy is None: offset_reset_strategy = self._default_offset_reset_strategy - self.assignment[partition].await_reset(offset_reset_strategy) + self.assignment[partition].reset(offset_reset_strategy) + + @synchronized + def set_reset_pending(self, partitions, next_allowed_reset_time): + for partition in partitions: + self.assignment[partition].set_reset_pending(next_allowed_reset_time) + @synchronized def has_default_offset_reset_policy(self): """Return True if default offset reset policy is Earliest or Latest""" return self._default_offset_reset_strategy != OffsetResetStrategy.NONE + @synchronized def is_offset_reset_needed(self, partition): return self.assignment[partition].awaiting_reset + @synchronized def has_all_fetch_positions(self): - for state in self.assignment.values(): + for state in six.itervalues(self.assignment): if not state.has_valid_position: return False return True + @synchronized def missing_fetch_positions(self): missing = set() for partition, state in six.iteritems(self.assignment): - if not state.has_valid_position: + if state.is_missing_position(): missing.add(partition) return missing + @synchronized + def has_valid_position(self, partition): + return partition in self.assignment and self.assignment[partition].has_valid_position + + @synchronized + def reset_missing_positions(self): + partitions_with_no_offsets = set() + for tp, state in six.iteritems(self.assignment): + if state.is_missing_position(): + if self._default_offset_reset_strategy == OffsetResetStrategy.NONE: + partitions_with_no_offsets.add(tp) + else: + state.reset(self._default_offset_reset_strategy) + + if partitions_with_no_offsets: + raise Errors.NoOffsetForPartitionError(partitions_with_no_offsets) + + @synchronized + def partitions_needing_reset(self): + partitions = set() + for tp, state in six.iteritems(self.assignment): + if state.awaiting_reset and state.is_reset_allowed(): + partitions.add(tp) + return partitions + + @synchronized def is_assigned(self, partition): return partition in self.assignment + @synchronized def is_paused(self, partition): return partition in self.assignment and self.assignment[partition].paused + @synchronized def is_fetchable(self, partition): return partition in self.assignment and self.assignment[partition].is_fetchable() + @synchronized def pause(self, partition): self.assignment[partition].pause() + @synchronized def resume(self, partition): self.assignment[partition].resume() - def _add_assigned_partition(self, partition): - self.assignment[partition] = TopicPartitionState() + @synchronized + def reset_failed(self, partitions, next_retry_time): + for partition in partitions: + self.assignment[partition].reset_failed(next_retry_time) + + @synchronized + def move_partition_to_end(self, partition): + if partition in self.assignment: + try: + self.assignment.move_to_end(partition) + except AttributeError: + state = self.assignment.pop(partition) + self.assignment[partition] = state + + @synchronized + def position(self, partition): + return self.assignment[partition].position class TopicPartitionState(object): def __init__(self): - self.committed = None # last committed OffsetAndMetadata - self.has_valid_position = False # whether we have valid position self.paused = False # whether this partition has been paused by the user - self.awaiting_reset = False # whether we are awaiting reset - self.reset_strategy = None # the reset strategy if awaitingReset is set + self.reset_strategy = None # the reset strategy if awaiting_reset is set self._position = None # OffsetAndMetadata exposed to the user self.highwater = None self.drop_pending_record_batch = False - # The last message offset hint available from a record batch with - # magic=2 which includes deleted compacted messages - self.last_offset_from_record_batch = None + self.next_allowed_retry_time = None def _set_position(self, offset): assert self.has_valid_position, 'Valid position required' @@ -396,20 +467,37 @@ def _get_position(self): position = property(_get_position, _set_position, None, "last position") - def await_reset(self, strategy): - self.awaiting_reset = True + def reset(self, strategy): + assert strategy is not None self.reset_strategy = strategy self._position = None - self.last_offset_from_record_batch = None - self.has_valid_position = False + self.next_allowed_retry_time = None + + def is_reset_allowed(self): + return self.next_allowed_retry_time is None or self.next_allowed_retry_time < time.time() + + @property + def awaiting_reset(self): + return self.reset_strategy is not None + + def set_reset_pending(self, next_allowed_retry_time): + self.next_allowed_retry_time = next_allowed_retry_time + + def reset_failed(self, next_allowed_retry_time): + self.next_allowed_retry_time = next_allowed_retry_time + + @property + def has_valid_position(self): + return self._position is not None + + def is_missing_position(self): + return not self.has_valid_position and not self.awaiting_reset def seek(self, offset): - self._position = OffsetAndMetadata(offset, '', -1) - self.awaiting_reset = False + self._position = offset if isinstance(offset, OffsetAndMetadata) else OffsetAndMetadata(offset, '', -1) self.reset_strategy = None - self.has_valid_position = True self.drop_pending_record_batch = True - self.last_offset_from_record_batch = None + self.next_allowed_retry_time = None def pause(self): self.paused = True @@ -421,6 +509,7 @@ def is_fetchable(self): return not self.paused and self.has_valid_position +@six.add_metaclass(abc.ABCMeta) class ConsumerRebalanceListener(object): """ A callback interface that the user can implement to trigger custom actions @@ -462,8 +551,6 @@ class ConsumerRebalanceListener(object): taking over that partition has their on_partitions_assigned() callback called to load the state. """ - __metaclass__ = abc.ABCMeta - @abc.abstractmethod def on_partitions_revoked(self, revoked): """ diff --git a/kafka/coordinator/assignors/sticky/sticky_assignor.py b/kafka/coordinator/assignors/sticky/sticky_assignor.py index dce714f1a..69f68f564 100644 --- a/kafka/coordinator/assignors/sticky/sticky_assignor.py +++ b/kafka/coordinator/assignors/sticky/sticky_assignor.py @@ -2,7 +2,6 @@ from collections import defaultdict, namedtuple from copy import deepcopy -from kafka.cluster import ClusterMetadata from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor from kafka.coordinator.assignors.sticky.partition_movements import PartitionMovements from kafka.coordinator.assignors.sticky.sorted_set import SortedSet @@ -660,7 +659,7 @@ def _metadata(cls, topics, member_assignment_partitions, generation=-1): partitions_by_topic = defaultdict(list) for topic_partition in member_assignment_partitions: partitions_by_topic[topic_partition.topic].append(topic_partition.partition) - data = StickyAssignorUserDataV1(six.viewitems(partitions_by_topic), generation) + data = StickyAssignorUserDataV1(list(partitions_by_topic.items()), generation) user_data = data.encode() return ConsumerProtocolMemberMetadata(cls.version, list(topics), user_data) diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index a30b5a9b8..0eb7f0eec 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -5,6 +5,7 @@ import logging import threading import time +import warnings import weakref from kafka.vendor import six @@ -16,8 +17,10 @@ from kafka.metrics.stats import Avg, Count, Max, Rate from kafka.protocol.find_coordinator import FindCoordinatorRequest from kafka.protocol.group import HeartbeatRequest, JoinGroupRequest, LeaveGroupRequest, SyncGroupRequest, DEFAULT_GENERATION_ID, UNKNOWN_MEMBER_ID +from kafka.util import Timer log = logging.getLogger('kafka.coordinator') +heartbeat_log = logging.getLogger('kafka.coordinator.heartbeat') class MemberState(object): @@ -32,6 +35,19 @@ def __init__(self, generation_id, member_id, protocol): self.member_id = member_id self.protocol = protocol + @property + def is_valid(self): + return self.generation_id != DEFAULT_GENERATION_ID + + def __eq__(self, other): + return (self.generation_id == other.generation_id and + self.member_id == other.member_id and + self.protocol == other.protocol) + + def __str__(self): + return "" % (self.generation_id, self.member_id, self.protocol) + + Generation.NO_GENERATION = Generation(DEFAULT_GENERATION_ID, UNKNOWN_MEMBER_ID, None) @@ -83,10 +99,11 @@ class BaseCoordinator(object): 'max_poll_interval_ms': 300000, 'retry_backoff_ms': 100, 'api_version': (0, 10, 1), + 'metrics': None, 'metric_group_prefix': '', } - def __init__(self, client, metrics, **configs): + def __init__(self, client, **configs): """ Keyword Arguments: group_id (str): name of the consumer group to join for dynamic @@ -129,8 +146,11 @@ def __init__(self, client, metrics, **configs): self.coordinator_id = None self._find_coordinator_future = None self._generation = Generation.NO_GENERATION - self.sensors = GroupCoordinatorMetrics(self.heartbeat, metrics, - self.config['metric_group_prefix']) + if self.config['metrics']: + self._sensors = GroupCoordinatorMetrics(self.heartbeat, self.config['metrics'], + self.config['metric_group_prefix']) + else: + self._sensors = None @abc.abstractmethod def protocol_type(self): @@ -162,7 +182,7 @@ def group_protocols(self): pass @abc.abstractmethod - def _on_join_prepare(self, generation, member_id): + def _on_join_prepare(self, generation, member_id, timeout_ms=None): """Invoked prior to each group join or rejoin. This is typically used to perform any cleanup from the previous @@ -228,16 +248,27 @@ def coordinator(self): """ if self.coordinator_id is None: return None - elif self._client.is_disconnected(self.coordinator_id): + elif self._client.is_disconnected(self.coordinator_id) and self._client.connection_delay(self.coordinator_id) > 0: self.coordinator_dead('Node Disconnected') return None else: return self.coordinator_id - def ensure_coordinator_ready(self): - """Block until the coordinator for this group is known - (and we have an active connection -- java client uses unsent queue). + def connected(self): + """Return True iff the coordinator node is connected""" + with self._lock: + return self.coordinator_id is not None and self._client.connected(self.coordinator_id) + + def ensure_coordinator_ready(self, timeout_ms=None): + """Block until the coordinator for this group is known. + + Keyword Arguments: + timeout_ms (numeric, optional): Maximum number of milliseconds to + block waiting to find coordinator. Default: None. + + Returns: True is coordinator found before timeout_ms, else False """ + timer = Timer(timeout_ms) with self._client._lock, self._lock: while self.coordinator_unknown(): @@ -245,30 +276,49 @@ def ensure_coordinator_ready(self): # so we will just pick a node at random and treat # it as the "coordinator" if self.config['api_version'] < (0, 8, 2): - self.coordinator_id = self._client.least_loaded_node() - if self.coordinator_id is not None: + maybe_coordinator_id = self._client.least_loaded_node() + if maybe_coordinator_id is None or self._client.cluster.is_bootstrap(maybe_coordinator_id): + future = Future().failure(Errors.NoBrokersAvailable()) + else: + self.coordinator_id = maybe_coordinator_id self._client.maybe_connect(self.coordinator_id) - continue + if timer.expired: + return False + else: + continue + else: + future = self.lookup_coordinator() + + self._client.poll(future=future, timeout_ms=timer.timeout_ms) - future = self.lookup_coordinator() - self._client.poll(future=future) + if not future.is_done: + return False if future.failed(): if future.retriable(): if getattr(future.exception, 'invalid_metadata', False): log.debug('Requesting metadata for group coordinator request: %s', future.exception) metadata_update = self._client.cluster.request_update() - self._client.poll(future=metadata_update) + self._client.poll(future=metadata_update, timeout_ms=timer.timeout_ms) + if not metadata_update.is_done: + return False else: - time.sleep(self.config['retry_backoff_ms'] / 1000) + if timeout_ms is None or timer.timeout_ms > self.config['retry_backoff_ms']: + time.sleep(self.config['retry_backoff_ms'] / 1000) + else: + time.sleep(timer.timeout_ms / 1000) else: raise future.exception # pylint: disable-msg=raising-bad-type + if timer.expired: + return False + else: + return True def _reset_find_coordinator_future(self, result): self._find_coordinator_future = None def lookup_coordinator(self): - with self._lock: + with self._client._lock, self._lock: if self._find_coordinator_future is not None: return self._find_coordinator_future @@ -326,103 +376,139 @@ def time_to_next_heartbeat(self): return float('inf') return self.heartbeat.time_to_next_heartbeat() + def _reset_join_group_future(self): + with self._lock: + self.join_future = None + + def _initiate_join_group(self): + with self._lock: + # we store the join future in case we are woken up by the user + # after beginning the rebalance in the call to poll below. + # This ensures that we do not mistakenly attempt to rejoin + # before the pending rebalance has completed. + if self.join_future is None: + self.state = MemberState.REBALANCING + self.join_future = self._send_join_group_request() + + # handle join completion in the callback so that the + # callback will be invoked even if the consumer is woken up + # before finishing the rebalance + self.join_future.add_callback(self._handle_join_success) + + # we handle failures below after the request finishes. + # If the join completes after having been woken up, the + # exception is ignored and we will rejoin + self.join_future.add_errback(self._handle_join_failure) + + return self.join_future + def _handle_join_success(self, member_assignment_bytes): + # handle join completion in the callback so that the callback + # will be invoked even if the consumer is woken up before + # finishing the rebalance with self._lock: - log.info("Successfully joined group %s with generation %s", - self.group_id, self._generation.generation_id) self.state = MemberState.STABLE - self.rejoin_needed = False if self._heartbeat_thread: self._heartbeat_thread.enable() - def _handle_join_failure(self, _): + def _handle_join_failure(self, exception): + # we handle failures below after the request finishes. + # if the join completes after having been woken up, + # the exception is ignored and we will rejoin with self._lock: + log.info("Failed to join group %s: %s", self.group_id, exception) self.state = MemberState.UNJOINED - def ensure_active_group(self): - """Ensure that the group is active (i.e. joined and synced)""" - with self._client._lock, self._lock: - if self._heartbeat_thread is None: - self._start_heartbeat_thread() - - while self.need_rejoin() or self._rejoin_incomplete(): - self.ensure_coordinator_ready() - - # call on_join_prepare if needed. We set a flag - # to make sure that we do not call it a second - # time if the client is woken up before a pending - # rebalance completes. This must be called on each - # iteration of the loop because an event requiring - # a rebalance (such as a metadata refresh which - # changes the matched subscription set) can occur - # while another rebalance is still in progress. - if not self.rejoining: - self._on_join_prepare(self._generation.generation_id, - self._generation.member_id) - self.rejoining = True - - # ensure that there are no pending requests to the coordinator. - # This is important in particular to avoid resending a pending - # JoinGroup request. - while not self.coordinator_unknown(): - if not self._client.in_flight_request_count(self.coordinator_id): - break - self._client.poll(timeout_ms=200) - else: - continue - - # we store the join future in case we are woken up by the user - # after beginning the rebalance in the call to poll below. - # This ensures that we do not mistakenly attempt to rejoin - # before the pending rebalance has completed. - if self.join_future is None: - # Fence off the heartbeat thread explicitly so that it cannot - # interfere with the join group. Note that this must come after - # the call to _on_join_prepare since we must be able to continue - # sending heartbeats if that callback takes some time. - self._heartbeat_thread.disable() - - self.state = MemberState.REBALANCING - future = self._send_join_group_request() - - self.join_future = future # this should happen before adding callbacks + def ensure_active_group(self, timeout_ms=None): + """Ensure that the group is active (i.e. joined and synced) - # handle join completion in the callback so that the - # callback will be invoked even if the consumer is woken up - # before finishing the rebalance - future.add_callback(self._handle_join_success) - - # we handle failures below after the request finishes. - # If the join completes after having been woken up, the - # exception is ignored and we will rejoin - future.add_errback(self._handle_join_failure) - - else: - future = self.join_future - - self._client.poll(future=future) + Keyword Arguments: + timeout_ms (numeric, optional): Maximum number of milliseconds to + block waiting to join group. Default: None. - if future.succeeded(): - self._on_join_complete(self._generation.generation_id, - self._generation.member_id, - self._generation.protocol, - future.value) - self.join_future = None - self.rejoining = False + Returns: True if group initialized before timeout_ms, else False + """ + if self.config['api_version'] < (0, 9): + raise Errors.UnsupportedVersionError('Group Coordinator APIs require 0.9+ broker') + timer = Timer(timeout_ms) + if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms): + return False + self._start_heartbeat_thread() + return self.join_group(timeout_ms=timer.timeout_ms) + + def join_group(self, timeout_ms=None): + if self.config['api_version'] < (0, 9): + raise Errors.UnsupportedVersionError('Group Coordinator APIs require 0.9+ broker') + timer = Timer(timeout_ms) + while self.need_rejoin(): + if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms): + return False + + # call on_join_prepare if needed. We set a flag + # to make sure that we do not call it a second + # time if the client is woken up before a pending + # rebalance completes. This must be called on each + # iteration of the loop because an event requiring + # a rebalance (such as a metadata refresh which + # changes the matched subscription set) can occur + # while another rebalance is still in progress. + if not self.rejoining: + self._on_join_prepare(self._generation.generation_id, + self._generation.member_id, + timeout_ms=timer.timeout_ms) + self.rejoining = True + + # fence off the heartbeat thread explicitly so that it cannot + # interfere with the join group. # Note that this must come after + # the call to onJoinPrepare since we must be able to continue + # sending heartbeats if that callback takes some time. + log.debug("Disabling heartbeat thread during join-group") + self._disable_heartbeat_thread() + + # ensure that there are no pending requests to the coordinator. + # This is important in particular to avoid resending a pending + # JoinGroup request. + while not self.coordinator_unknown(): + if not self._client.in_flight_request_count(self.coordinator_id): + break + poll_timeout_ms = 200 if timer.timeout_ms is None or timer.timeout_ms > 200 else timer.timeout_ms + self._client.poll(timeout_ms=poll_timeout_ms) + if timer.expired: + return False + else: + continue + future = self._initiate_join_group() + self._client.poll(future=future, timeout_ms=timer.timeout_ms) + if future.is_done: + self._reset_join_group_future() + else: + return False + + if future.succeeded(): + self.rejoining = False + self.rejoin_needed = False + self._on_join_complete(self._generation.generation_id, + self._generation.member_id, + self._generation.protocol, + future.value) + return True + else: + exception = future.exception + if isinstance(exception, (Errors.UnknownMemberIdError, + Errors.RebalanceInProgressError, + Errors.IllegalGenerationError, + Errors.MemberIdRequiredError)): + continue + elif not future.retriable(): + raise exception # pylint: disable-msg=raising-bad-type + elif timer.expired: + return False else: - self.join_future = None - exception = future.exception - if isinstance(exception, (Errors.UnknownMemberIdError, - Errors.RebalanceInProgressError, - Errors.IllegalGenerationError)): - continue - elif not future.retriable(): - raise exception # pylint: disable-msg=raising-bad-type - time.sleep(self.config['retry_backoff_ms'] / 1000) - - def _rejoin_incomplete(self): - return self.join_future is not None + if timer.timeout_ms is None or timer.timeout_ms > self.config['retry_backoff_ms']: + time.sleep(self.config['retry_backoff_ms'] / 1000) + else: + time.sleep(timer.timeout_ms / 1000) def _send_join_group_request(self): """Join the group and return the assignment for the next generation. @@ -435,7 +521,7 @@ def _send_join_group_request(self): group leader """ if self.coordinator_unknown(): - e = Errors.GroupCoordinatorNotAvailableError(self.coordinator_id) + e = Errors.CoordinatorNotAvailableError(self.coordinator_id) return Future().failure(e) elif not self._client.ready(self.coordinator_id, metadata_priority=False): @@ -448,7 +534,7 @@ def _send_join_group_request(self): (protocol, metadata if isinstance(metadata, bytes) else metadata.encode()) for protocol, metadata in self.group_protocols() ] - version = self._client.api_version(JoinGroupRequest, max_version=3) + version = self._client.api_version(JoinGroupRequest, max_version=4) if version == 0: request = JoinGroupRequest[version]( self.group_id, @@ -476,8 +562,9 @@ def _send_join_group_request(self): def _failed_request(self, node_id, request, future, error): # Marking coordinator dead - # unless the error is caused by internal client pipelining + # unless the error is caused by internal client pipelining or throttling if not isinstance(error, (Errors.NodeNotReadyError, + Errors.ThrottlingQuotaExceededError, Errors.TooManyInFlightRequests)): log.error('Error sending %s to node %s [%s]', request.__class__.__name__, node_id, error) @@ -488,11 +575,11 @@ def _failed_request(self, node_id, request, future, error): future.failure(error) def _handle_join_group_response(self, future, send_time, response): + log.debug("Received JoinGroup response: %s", response) error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: - log.debug("Received successful JoinGroup response for group %s: %s", - self.group_id, response) - self.sensors.join_latency.record((time.time() - send_time) * 1000) + if self._sensors: + self._sensors.join_latency.record((time.time() - send_time) * 1000) with self._lock: if self.state is not MemberState.REBALANCING: # if the consumer was woken up before a rebalance completes, @@ -504,6 +591,7 @@ def _handle_join_group_response(self, future, send_time, response): response.member_id, response.group_protocol) + log.info("Successfully joined group %s %s", self.group_id, self._generation) if response.leader_id == response.member_id: log.info("Elected group leader -- performing partition" " assignments using %s", self._generation.protocol) @@ -511,25 +599,25 @@ def _handle_join_group_response(self, future, send_time, response): else: self._on_join_follower().chain(future) - elif error_type is Errors.GroupLoadInProgressError: - log.debug("Attempt to join group %s rejected since coordinator %s" - " is loading the group.", self.group_id, self.coordinator_id) + elif error_type is Errors.CoordinatorLoadInProgressError: + log.info("Attempt to join group %s rejected since coordinator %s" + " is loading the group.", self.group_id, self.coordinator_id) # backoff and retry future.failure(error_type(response)) elif error_type is Errors.UnknownMemberIdError: # reset the member id and retry immediately error = error_type(self._generation.member_id) self.reset_generation() - log.debug("Attempt to join group %s failed due to unknown member id", - self.group_id) + log.info("Attempt to join group %s failed due to unknown member id", + self.group_id) future.failure(error) - elif error_type in (Errors.GroupCoordinatorNotAvailableError, - Errors.NotCoordinatorForGroupError): + elif error_type in (Errors.CoordinatorNotAvailableError, + Errors.NotCoordinatorError): # re-discover the coordinator and retry with backoff self.coordinator_dead(error_type()) - log.debug("Attempt to join group %s failed due to obsolete " - "coordinator information: %s", self.group_id, - error_type.__name__) + log.info("Attempt to join group %s failed due to obsolete " + "coordinator information: %s", self.group_id, + error_type.__name__) future.failure(error_type()) elif error_type in (Errors.InconsistentGroupProtocolError, Errors.InvalidSessionTimeoutError, @@ -540,7 +628,21 @@ def _handle_join_group_response(self, future, send_time, response): self.group_id, error) future.failure(error) elif error_type is Errors.GroupAuthorizationFailedError: + log.error("Attempt to join group %s failed due to group authorization error", + self.group_id) future.failure(error_type(self.group_id)) + elif error_type is Errors.MemberIdRequiredError: + # Broker requires a concrete member id to be allowed to join the group. Update member id + # and send another join group request in next cycle. + log.info("Received member id %s for group %s; will retry join-group", + response.member_id, self.group_id) + self.reset_generation(response.member_id) + future.failure(error_type()) + elif error_type is Errors.RebalanceInProgressError: + log.info("Attempt to join group %s failed due to RebalanceInProgressError," + " which could indicate a replication timeout on the broker. Will retry.", + self.group_id) + future.failure(error_type()) else: # unexpected error, throw the exception error = error_type() @@ -592,7 +694,7 @@ def _on_join_leader(self, response): def _send_sync_group_request(self, request): if self.coordinator_unknown(): - e = Errors.GroupCoordinatorNotAvailableError(self.coordinator_id) + e = Errors.CoordinatorNotAvailableError(self.coordinator_id) return Future().failure(e) # We assume that coordinator is ready if we're sending SyncGroup @@ -609,9 +711,11 @@ def _send_sync_group_request(self, request): return future def _handle_sync_group_response(self, future, send_time, response): + log.debug("Received SyncGroup response: %s", response) error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: - self.sensors.sync_latency.record((time.time() - send_time) * 1000) + if self._sensors: + self._sensors.sync_latency.record((time.time() - send_time) * 1000) future.success(response.member_assignment) return @@ -620,19 +724,19 @@ def _handle_sync_group_response(self, future, send_time, response): if error_type is Errors.GroupAuthorizationFailedError: future.failure(error_type(self.group_id)) elif error_type is Errors.RebalanceInProgressError: - log.debug("SyncGroup for group %s failed due to coordinator" - " rebalance", self.group_id) + log.info("SyncGroup for group %s failed due to coordinator" + " rebalance", self.group_id) future.failure(error_type(self.group_id)) elif error_type in (Errors.UnknownMemberIdError, Errors.IllegalGenerationError): error = error_type() - log.debug("SyncGroup for group %s failed due to %s", self.group_id, error) + log.info("SyncGroup for group %s failed due to %s", self.group_id, error) self.reset_generation() future.failure(error) - elif error_type in (Errors.GroupCoordinatorNotAvailableError, - Errors.NotCoordinatorForGroupError): + elif error_type in (Errors.CoordinatorNotAvailableError, + Errors.NotCoordinatorError): error = error_type() - log.debug("SyncGroup for group %s failed due to %s", self.group_id, error) + log.info("SyncGroup for group %s failed due to %s", self.group_id, error) self.coordinator_dead(error) future.failure(error) else: @@ -647,20 +751,20 @@ def _send_group_coordinator_request(self): Future: resolves to the node id of the coordinator """ node_id = self._client.least_loaded_node() - if node_id is None: + if node_id is None or self._client.cluster.is_bootstrap(node_id): return Future().failure(Errors.NoBrokersAvailable()) elif not self._client.ready(node_id, metadata_priority=False): e = Errors.NodeNotReadyError(node_id) return Future().failure(e) - log.debug("Sending group coordinator request for group %s to broker %s", - self.group_id, node_id) version = self._client.api_version(FindCoordinatorRequest, max_version=2) if version == 0: request = FindCoordinatorRequest[version](self.group_id) else: request = FindCoordinatorRequest[version](self.group_id, 0) + log.debug("Sending group coordinator request for group %s to broker %s: %s", + self.group_id, node_id, request) future = Future() _f = self._client.send(node_id, request) _f.add_callback(self._handle_group_coordinator_response, future) @@ -673,7 +777,7 @@ def _handle_group_coordinator_response(self, future, response): error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: with self._lock: - coordinator_id = self._client.cluster.add_group_coordinator(self.group_id, response) + coordinator_id = self._client.cluster.add_coordinator(response, 'group', self.group_id) if not coordinator_id: # This could happen if coordinator metadata is different # than broker metadata @@ -687,7 +791,7 @@ def _handle_group_coordinator_response(self, future, response): self.heartbeat.reset_timeouts() future.success(self.coordinator_id) - elif error_type is Errors.GroupCoordinatorNotAvailableError: + elif error_type is Errors.CoordinatorNotAvailableError: log.debug("Group Coordinator Not Available; retry") future.failure(error_type()) elif error_type is Errors.GroupAuthorizationFailedError: @@ -696,7 +800,7 @@ def _handle_group_coordinator_response(self, future, response): future.failure(error) else: error = error_type() - log.error("Group coordinator lookup for group %s failed: %s", + log.error("Group Coordinator lookup for group %s failed: %s", self.group_id, error) future.failure(error) @@ -707,7 +811,7 @@ def coordinator_dead(self, error): self.coordinator_id, self.group_id, error) self.coordinator_id = None - def generation(self): + def generation_if_stable(self): """Get the current generation state if the group is stable. Returns: the current generation or None if the group is unjoined/rebalancing @@ -717,10 +821,19 @@ def generation(self): return None return self._generation - def reset_generation(self): - """Reset the generation and memberId because we have fallen out of the group.""" + # deprecated + def generation(self): + warnings.warn("Function coordinator.generation() has been renamed to generation_if_stable()", + DeprecationWarning, stacklevel=2) + return self.generation_if_stable() + + def rebalance_in_progress(self): + return self.state is MemberState.REBALANCING + + def reset_generation(self, member_id=UNKNOWN_MEMBER_ID): + """Reset the generation and member_id because we have fallen out of the group.""" with self._lock: - self._generation = Generation.NO_GENERATION + self._generation = Generation(DEFAULT_GENERATION_ID, member_id, None) self.rejoin_needed = True self.state = MemberState.UNJOINED @@ -728,62 +841,81 @@ def request_rejoin(self): self.rejoin_needed = True def _start_heartbeat_thread(self): - if self._heartbeat_thread is None: - log.info('Starting new heartbeat thread') - self._heartbeat_thread = HeartbeatThread(weakref.proxy(self)) - self._heartbeat_thread.daemon = True - self._heartbeat_thread.start() - - def _close_heartbeat_thread(self): - if hasattr(self, '_heartbeat_thread') and self._heartbeat_thread is not None: - log.info('Stopping heartbeat thread') - try: - self._heartbeat_thread.close() - except ReferenceError: - pass - self._heartbeat_thread = None + if self.config['api_version'] < (0, 9): + raise Errors.UnsupportedVersionError('Heartbeat APIs require 0.9+ broker') + with self._lock: + if self._heartbeat_thread is None: + heartbeat_log.info('Starting new heartbeat thread') + self._heartbeat_thread = HeartbeatThread(weakref.proxy(self)) + self._heartbeat_thread.daemon = True + self._heartbeat_thread.start() + heartbeat_log.debug("Started heartbeat thread %s", self._heartbeat_thread.ident) + + def _disable_heartbeat_thread(self): + with self._lock: + if self._heartbeat_thread is not None: + self._heartbeat_thread.disable() + + def _close_heartbeat_thread(self, timeout_ms=None): + with self._lock: + if self._heartbeat_thread is not None: + heartbeat_log.info('Stopping heartbeat thread') + try: + self._heartbeat_thread.close(timeout_ms=timeout_ms) + except ReferenceError: + pass + self._heartbeat_thread = None def __del__(self): - self._close_heartbeat_thread() + try: + self._close_heartbeat_thread() + except (TypeError, AttributeError): + pass - def close(self): + def close(self, timeout_ms=None): """Close the coordinator, leave the current group, and reset local generation / member_id""" - self._close_heartbeat_thread() - self.maybe_leave_group() + self._close_heartbeat_thread(timeout_ms=timeout_ms) + if self.config['api_version'] >= (0, 9): + self.maybe_leave_group(timeout_ms=timeout_ms) - def maybe_leave_group(self): + def maybe_leave_group(self, timeout_ms=None): """Leave the current group and reset local generation/memberId.""" + if self.config['api_version'] < (0, 9): + raise Errors.UnsupportedVersionError('Group Coordinator APIs require 0.9+ broker') with self._client._lock, self._lock: if (not self.coordinator_unknown() and self.state is not MemberState.UNJOINED - and self._generation is not Generation.NO_GENERATION): + and self._generation.is_valid): # this is a minimal effort attempt to leave the group. we do not # attempt any resending if the request fails or times out. log.info('Leaving consumer group (%s).', self.group_id) version = self._client.api_version(LeaveGroupRequest, max_version=2) request = LeaveGroupRequest[version](self.group_id, self._generation.member_id) + log.debug('Sending LeaveGroupRequest to %s: %s', self.coordinator_id, request) future = self._client.send(self.coordinator_id, request) future.add_callback(self._handle_leave_group_response) future.add_errback(log.error, "LeaveGroup request failed: %s") - self._client.poll(future=future) + self._client.poll(future=future, timeout_ms=timeout_ms) self.reset_generation() def _handle_leave_group_response(self, response): + log.debug("Received LeaveGroupResponse: %s", response) error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: - log.debug("LeaveGroup request for group %s returned successfully", - self.group_id) + log.info("LeaveGroup request for group %s returned successfully", + self.group_id) else: log.error("LeaveGroup request for group %s failed with error: %s", self.group_id, error_type()) def _send_heartbeat_request(self): """Send a heartbeat request""" + # Note: acquire both client + coordinator lock before calling if self.coordinator_unknown(): - e = Errors.GroupCoordinatorNotAvailableError(self.coordinator_id) + e = Errors.CoordinatorNotAvailableError(self.coordinator_id) return Future().failure(e) elif not self._client.ready(self.coordinator_id, metadata_priority=False): @@ -794,7 +926,7 @@ def _send_heartbeat_request(self): request = HeartbeatRequest[version](self.group_id, self._generation.generation_id, self._generation.member_id) - log.debug("Heartbeat: %s[%s] %s", request.group, request.generation_id, request.member_id) # pylint: disable-msg=no-member + heartbeat_log.debug("Sending HeartbeatRequest to %s: %s", self.coordinator_id, request) future = Future() _f = self._client.send(self.coordinator_id, request) _f.add_callback(self._handle_heartbeat_response, future, time.time()) @@ -803,41 +935,42 @@ def _send_heartbeat_request(self): return future def _handle_heartbeat_response(self, future, send_time, response): - self.sensors.heartbeat_latency.record((time.time() - send_time) * 1000) + if self._sensors: + self._sensors.heartbeat_latency.record((time.time() - send_time) * 1000) + heartbeat_log.debug("Received heartbeat response for group %s: %s", + self.group_id, response) error_type = Errors.for_code(response.error_code) if error_type is Errors.NoError: - log.debug("Received successful heartbeat response for group %s", - self.group_id) future.success(None) - elif error_type in (Errors.GroupCoordinatorNotAvailableError, - Errors.NotCoordinatorForGroupError): - log.warning("Heartbeat failed for group %s: coordinator (node %s)" - " is either not started or not valid", self.group_id, + elif error_type in (Errors.CoordinatorNotAvailableError, + Errors.NotCoordinatorError): + heartbeat_log.warning("Heartbeat failed for group %s: coordinator (node %s)" + " is either not started or not valid", self.group_id, self.coordinator()) self.coordinator_dead(error_type()) future.failure(error_type()) elif error_type is Errors.RebalanceInProgressError: - log.warning("Heartbeat failed for group %s because it is" - " rebalancing", self.group_id) + heartbeat_log.warning("Heartbeat failed for group %s because it is" + " rebalancing", self.group_id) self.request_rejoin() future.failure(error_type()) elif error_type is Errors.IllegalGenerationError: - log.warning("Heartbeat failed for group %s: generation id is not " - " current.", self.group_id) + heartbeat_log.warning("Heartbeat failed for group %s: generation id is not " + " current.", self.group_id) self.reset_generation() future.failure(error_type()) elif error_type is Errors.UnknownMemberIdError: - log.warning("Heartbeat: local member_id was not recognized;" - " this consumer needs to re-join") + heartbeat_log.warning("Heartbeat: local member_id was not recognized;" + " this consumer needs to re-join") self.reset_generation() future.failure(error_type) elif error_type is Errors.GroupAuthorizationFailedError: error = error_type(self.group_id) - log.error("Heartbeat failed: authorization error: %s", error) + heartbeat_log.error("Heartbeat failed: authorization error: %s", error) future.failure(error) else: error = error_type() - log.error("Heartbeat failed: Unhandled error: %s", error) + heartbeat_log.error("Heartbeat failed: Unhandled error: %s", error) future.failure(error) @@ -903,14 +1036,17 @@ def __init__(self, coordinator): def enable(self): with self.coordinator._lock: + heartbeat_log.debug('Enabling heartbeat thread') self.enabled = True self.coordinator.heartbeat.reset_timeouts() self.coordinator._lock.notify() def disable(self): - self.enabled = False + with self.coordinator._lock: + heartbeat_log.debug('Disabling heartbeat thread') + self.enabled = False - def close(self): + def close(self, timeout_ms=None): if self.closed: return self.closed = True @@ -925,93 +1061,111 @@ def close(self): self.coordinator._lock.notify() if self.is_alive(): - self.join(self.coordinator.config['heartbeat_interval_ms'] / 1000) + if timeout_ms is None: + timeout_ms = self.coordinator.config['heartbeat_interval_ms'] + self.join(timeout_ms / 1000) if self.is_alive(): - log.warning("Heartbeat thread did not fully terminate during close") + heartbeat_log.warning("Heartbeat thread did not fully terminate during close") def run(self): try: - log.debug('Heartbeat thread started') + heartbeat_log.debug('Heartbeat thread started: %s', self.coordinator.heartbeat) while not self.closed: self._run_once() except ReferenceError: - log.debug('Heartbeat thread closed due to coordinator gc') + heartbeat_log.debug('Heartbeat thread closed due to coordinator gc') - except RuntimeError as e: - log.error("Heartbeat thread for group %s failed due to unexpected error: %s", - self.coordinator.group_id, e) + except Exception as e: + heartbeat_log.exception("Heartbeat thread for group %s failed due to unexpected error: %s", + self.coordinator.group_id, e) self.failed = e finally: - log.debug('Heartbeat thread closed') + heartbeat_log.debug('Heartbeat thread closed') def _run_once(self): - with self.coordinator._client._lock, self.coordinator._lock: - if self.enabled and self.coordinator.state is MemberState.STABLE: - # TODO: When consumer.wakeup() is implemented, we need to - # disable here to prevent propagating an exception to this - # heartbeat thread - # must get client._lock, or maybe deadlock at heartbeat - # failure callback in consumer poll - self.coordinator._client.poll(timeout_ms=0) - - with self.coordinator._lock: + self.coordinator._client._lock.acquire() + self.coordinator._lock.acquire() + try: if not self.enabled: - log.debug('Heartbeat disabled. Waiting') + heartbeat_log.debug('Heartbeat disabled. Waiting') + self.coordinator._client._lock.release() self.coordinator._lock.wait() - log.debug('Heartbeat re-enabled.') + if self.enabled: + heartbeat_log.debug('Heartbeat re-enabled.') return if self.coordinator.state is not MemberState.STABLE: # the group is not stable (perhaps because we left the # group or because the coordinator kicked us out), so # disable heartbeats and wait for the main thread to rejoin. - log.debug('Group state is not stable, disabling heartbeats') + heartbeat_log.debug('Group state is not stable, disabling heartbeats') self.disable() return + # TODO: When consumer.wakeup() is implemented, we need to + # disable here to prevent propagating an exception to this + # heartbeat thread + self.coordinator._client.poll(timeout_ms=0) + if self.coordinator.coordinator_unknown(): future = self.coordinator.lookup_coordinator() if not future.is_done or future.failed(): # the immediate future check ensures that we backoff # properly in the case that no brokers are available # to connect to (and the future is automatically failed). + self.coordinator._client._lock.release() self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) + elif not self.coordinator.connected(): + self.coordinator._client._lock.release() + self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) + elif self.coordinator.heartbeat.session_timeout_expired(): # the session timeout has expired without seeing a # successful heartbeat, so we should probably make sure # the coordinator is still healthy. - log.warning('Heartbeat session expired, marking coordinator dead') + heartbeat_log.warning('Heartbeat session expired, marking coordinator dead') self.coordinator.coordinator_dead('Heartbeat session expired') elif self.coordinator.heartbeat.poll_timeout_expired(): # the poll timeout has expired, which means that the # foreground thread has stalled in between calls to # poll(), so we explicitly leave the group. - log.warning('Heartbeat poll expired, leaving group') - ### XXX - # maybe_leave_group acquires client + coordinator lock; - # if we hold coordinator lock before calling, we risk deadlock - # release() is safe here because this is the last code in the current context - self.coordinator._lock.release() + heartbeat_log.warning( + "Consumer poll timeout has expired. This means the time between subsequent calls to poll()" + " was longer than the configured max_poll_interval_ms, which typically implies that" + " the poll loop is spending too much time processing messages. You can address this" + " either by increasing max_poll_interval_ms or by reducing the maximum size of batches" + " returned in poll() with max_poll_records." + ) self.coordinator.maybe_leave_group() elif not self.coordinator.heartbeat.should_heartbeat(): - # poll again after waiting for the retry backoff in case - # the heartbeat failed or the coordinator disconnected - log.log(0, 'Not ready to heartbeat, waiting') - self.coordinator._lock.wait(self.coordinator.config['retry_backoff_ms'] / 1000) + next_hb = self.coordinator.heartbeat.time_to_next_heartbeat() + heartbeat_log.debug('Waiting %0.1f secs to send next heartbeat', next_hb) + self.coordinator._client._lock.release() + self.coordinator._lock.wait(next_hb) else: + heartbeat_log.debug('Sending heartbeat for group %s %s', self.coordinator.group_id, self.coordinator._generation) self.coordinator.heartbeat.sent_heartbeat() future = self.coordinator._send_heartbeat_request() future.add_callback(self._handle_heartbeat_success) future.add_errback(self._handle_heartbeat_failure) + finally: + self.coordinator._lock.release() + try: + # Possibly released in block above to allow coordinator lock wait() + self.coordinator._client._lock.release() + except RuntimeError: + pass + def _handle_heartbeat_success(self, result): with self.coordinator._lock: + heartbeat_log.debug('Heartbeat success') self.coordinator.heartbeat.received_heartbeat() def _handle_heartbeat_failure(self, exception): @@ -1022,8 +1176,10 @@ def _handle_heartbeat_failure(self, exception): # member in the group for as long as the duration of the # rebalance timeout. If we stop sending heartbeats, however, # then the session timeout may expire before we can rejoin. + heartbeat_log.debug('Treating RebalanceInProgressError as successful heartbeat') self.coordinator.heartbeat.received_heartbeat() else: + heartbeat_log.debug('Heartbeat failure: %s', exception) self.coordinator.heartbeat.fail_heartbeat() # wake up the thread if it's sleeping to reschedule the heartbeat self.coordinator._lock.notify() diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index 36c91ee42..dca10ae1a 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -19,7 +19,7 @@ from kafka.metrics.stats import Avg, Count, Max, Rate from kafka.protocol.commit import OffsetCommitRequest, OffsetFetchRequest from kafka.structs import OffsetAndMetadata, TopicPartition -from kafka.util import WeakMethod +from kafka.util import Timer, WeakMethod log = logging.getLogger(__name__) @@ -39,10 +39,11 @@ class ConsumerCoordinator(BaseCoordinator): 'retry_backoff_ms': 100, 'api_version': (0, 10, 1), 'exclude_internal_topics': True, + 'metrics': None, 'metric_group_prefix': 'consumer' } - def __init__(self, client, subscription, metrics, **configs): + def __init__(self, client, subscription, **configs): """Initialize the coordination manager. Keyword Arguments: @@ -54,7 +55,7 @@ def __init__(self, client, subscription, metrics, **configs): auto_commit_interval_ms (int): milliseconds between automatic offset commits, if enable_auto_commit is True. Default: 5000. default_offset_commit_callback (callable): called as - callback(offsets, exception) response will be either an Exception + callback(offsets, response) response will be either an Exception or None. This callback can be used to trigger custom actions when a commit request completes. assignors (list): List of objects to use to distribute partition @@ -78,7 +79,7 @@ def __init__(self, client, subscription, metrics, **configs): True the only way to receive records from an internal topic is subscribing to it. Requires 0.10+. Default: True """ - super(ConsumerCoordinator, self).__init__(client, metrics, **configs) + super(ConsumerCoordinator, self).__init__(client, **configs) self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: @@ -94,6 +95,7 @@ def __init__(self, client, subscription, metrics, **configs): self.auto_commit_interval = self.config['auto_commit_interval_ms'] / 1000 self.next_auto_commit_deadline = None self.completed_offset_commits = collections.deque() + self._offset_fetch_futures = dict() if self.config['default_offset_commit_callback'] is None: self.config['default_offset_commit_callback'] = self._default_offset_commit_callback @@ -120,8 +122,11 @@ def __init__(self, client, subscription, metrics, **configs): else: self.next_auto_commit_deadline = time.time() + self.auto_commit_interval - self.consumer_sensors = ConsumerCoordinatorMetrics( - metrics, self.config['metric_group_prefix'], self._subscription) + if self.config['metrics']: + self._consumer_sensors = ConsumerCoordinatorMetrics( + self.config['metrics'], self.config['metric_group_prefix'], self._subscription) + else: + self._consumer_sensors = None self._cluster.request_update() self._cluster.add_listener(WeakMethod(self._handle_metadata_update)) @@ -203,8 +208,8 @@ def _auto_assign_all_partitions(self): def _build_metadata_snapshot(self, subscription, cluster): metadata_snapshot = {} for topic in subscription.group_subscription(): - partitions = cluster.partitions_for_topic(topic) or [] - metadata_snapshot[topic] = set(partitions) + partitions = cluster.partitions_for_topic(topic) + metadata_snapshot[topic] = partitions or set() return metadata_snapshot def _lookup_assignor(self, name): @@ -225,10 +230,6 @@ def _on_join_complete(self, generation, member_id, protocol, assignment = ConsumerProtocol.ASSIGNMENT.decode(member_assignment_bytes) - # set the flag to refresh last committed offsets - self._subscription.needs_fetch_committed_offsets = True - - # update partition assignment try: self._subscription.assign_from_subscribed(assignment.partitions()) except ValueError as e: @@ -249,16 +250,16 @@ def _on_join_complete(self, generation, member_id, protocol, assigned, self.group_id) # execute the user's callback after rebalance - if self._subscription.listener: + if self._subscription.rebalance_listener: try: - self._subscription.listener.on_partitions_assigned(assigned) + self._subscription.rebalance_listener.on_partitions_assigned(assigned) except Exception: - log.exception("User provided listener %s for group %s" + log.exception("User provided rebalance listener %s for group %s" " failed on partition assignment: %s", - self._subscription.listener, self.group_id, + self._subscription.rebalance_listener, self.group_id, assigned) - def poll(self): + def poll(self, timeout_ms=None): """ Poll for coordinator events. Only applicable if group_id is set, and broker version supports GroupCoordinators. This ensures that the @@ -267,33 +268,46 @@ def poll(self): periodic offset commits if they are enabled. """ if self.group_id is None: - return + return True - self._invoke_completed_offset_commit_callbacks() - self.ensure_coordinator_ready() - - if self.config['api_version'] >= (0, 9) and self._subscription.partitions_auto_assigned(): - if self.need_rejoin(): - # due to a race condition between the initial metadata fetch and the - # initial rebalance, we need to ensure that the metadata is fresh - # before joining initially, and then request the metadata update. If - # metadata update arrives while the rebalance is still pending (for - # example, when the join group is still inflight), then we will lose - # track of the fact that we need to rebalance again to reflect the - # change to the topic subscription. Without ensuring that the - # metadata is fresh, any metadata update that changes the topic - # subscriptions and arrives while a rebalance is in progress will - # essentially be ignored. See KAFKA-3949 for the complete - # description of the problem. - if self._subscription.subscribed_pattern: - metadata_update = self._client.cluster.request_update() - self._client.poll(future=metadata_update) - - self.ensure_active_group() - - self.poll_heartbeat() - - self._maybe_auto_commit_offsets_async() + timer = Timer(timeout_ms) + try: + self._invoke_completed_offset_commit_callbacks() + if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms): + log.debug('coordinator.poll: timeout in ensure_coordinator_ready; returning early') + return False + + if self.config['api_version'] >= (0, 9) and self._subscription.partitions_auto_assigned(): + if self.need_rejoin(): + # due to a race condition between the initial metadata fetch and the + # initial rebalance, we need to ensure that the metadata is fresh + # before joining initially, and then request the metadata update. If + # metadata update arrives while the rebalance is still pending (for + # example, when the join group is still inflight), then we will lose + # track of the fact that we need to rebalance again to reflect the + # change to the topic subscription. Without ensuring that the + # metadata is fresh, any metadata update that changes the topic + # subscriptions and arrives while a rebalance is in progress will + # essentially be ignored. See KAFKA-3949 for the complete + # description of the problem. + if self._subscription.subscribed_pattern: + metadata_update = self._client.cluster.request_update() + self._client.poll(future=metadata_update, timeout_ms=timer.timeout_ms) + if not metadata_update.is_done: + log.debug('coordinator.poll: timeout updating metadata; returning early') + return False + + if not self.ensure_active_group(timeout_ms=timer.timeout_ms): + log.debug('coordinator.poll: timeout in ensure_active_group; returning early') + return False + + self.poll_heartbeat() + + self._maybe_auto_commit_offsets_async() + return True + + except Errors.KafkaTimeoutError: + return False def time_to_next_poll(self): """Return seconds (float) remaining until :meth:`.poll` should be called again""" @@ -343,21 +357,21 @@ def _perform_assignment(self, leader_id, assignment_strategy, members): group_assignment[member_id] = assignment return group_assignment - def _on_join_prepare(self, generation, member_id): + def _on_join_prepare(self, generation, member_id, timeout_ms=None): # commit offsets prior to rebalance if auto-commit enabled - self._maybe_auto_commit_offsets_sync() + self._maybe_auto_commit_offsets_sync(timeout_ms=timeout_ms) # execute the user's callback before rebalance log.info("Revoking previously assigned partitions %s for group %s", self._subscription.assigned_partitions(), self.group_id) - if self._subscription.listener: + if self._subscription.rebalance_listener: try: revoked = set(self._subscription.assigned_partitions()) - self._subscription.listener.on_partitions_revoked(revoked) + self._subscription.rebalance_listener.on_partitions_revoked(revoked) except Exception: - log.exception("User provided subscription listener %s" + log.exception("User provided subscription rebalance listener %s" " for group %s failed on_partitions_revoked", - self._subscription.listener, self.group_id) + self._subscription.rebalance_listener, self.group_id) self._is_leader = False self._subscription.reset_group_subscription() @@ -386,17 +400,19 @@ def need_rejoin(self): return super(ConsumerCoordinator, self).need_rejoin() - def refresh_committed_offsets_if_needed(self): + def refresh_committed_offsets_if_needed(self, timeout_ms=None): """Fetch committed offsets for assigned partitions.""" - if self._subscription.needs_fetch_committed_offsets: - offsets = self.fetch_committed_offsets(self._subscription.assigned_partitions()) - for partition, offset in six.iteritems(offsets): - # verify assignment is still active - if self._subscription.is_assigned(partition): - self._subscription.assignment[partition].committed = offset - self._subscription.needs_fetch_committed_offsets = False - - def fetch_committed_offsets(self, partitions): + missing_fetch_positions = set(self._subscription.missing_fetch_positions()) + try: + offsets = self.fetch_committed_offsets(missing_fetch_positions, timeout_ms=timeout_ms) + except Errors.KafkaTimeoutError: + return False + for partition, offset in six.iteritems(offsets): + log.debug("Setting offset for partition %s to the committed offset %s", partition, offset.offset) + self._subscription.seek(partition, offset.offset) + return True + + def fetch_committed_offsets(self, partitions, timeout_ms=None): """Fetch the current committed offsets for specified partitions Arguments: @@ -404,26 +420,45 @@ def fetch_committed_offsets(self, partitions): Returns: dict: {TopicPartition: OffsetAndMetadata} + + Raises: + KafkaTimeoutError if timeout_ms provided """ if not partitions: return {} + future_key = frozenset(partitions) + timer = Timer(timeout_ms) while True: - self.ensure_coordinator_ready() + if not self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms): + timer.maybe_raise() # contact coordinator to fetch committed offsets - future = self._send_offset_fetch_request(partitions) - self._client.poll(future=future) + if future_key in self._offset_fetch_futures: + future = self._offset_fetch_futures[future_key] + else: + future = self._send_offset_fetch_request(partitions) + self._offset_fetch_futures[future_key] = future - if future.succeeded(): - return future.value + self._client.poll(future=future, timeout_ms=timer.timeout_ms) - if not future.retriable(): - raise future.exception # pylint: disable-msg=raising-bad-type + if future.is_done: + del self._offset_fetch_futures[future_key] - time.sleep(self.config['retry_backoff_ms'] / 1000) + if future.succeeded(): + return future.value - def close(self, autocommit=True): + elif not future.retriable(): + raise future.exception # pylint: disable-msg=raising-bad-type + + # future failed but is retriable, or is not done yet + if timer.timeout_ms is None or timer.timeout_ms > self.config['retry_backoff_ms']: + time.sleep(self.config['retry_backoff_ms'] / 1000) + else: + time.sleep(timer.timeout_ms / 1000) + timer.maybe_raise() + + def close(self, autocommit=True, timeout_ms=None): """Close the coordinator, leave the current group, and reset local generation / member_id. @@ -434,14 +469,14 @@ def close(self, autocommit=True): """ try: if autocommit: - self._maybe_auto_commit_offsets_sync() + self._maybe_auto_commit_offsets_sync(timeout_ms=timeout_ms) finally: - super(ConsumerCoordinator, self).close() + super(ConsumerCoordinator, self).close(timeout_ms=timeout_ms) def _invoke_completed_offset_commit_callbacks(self): while self.completed_offset_commits: - callback, offsets, exception = self.completed_offset_commits.popleft() - callback(offsets, exception) + callback, offsets, res_or_exc = self.completed_offset_commits.popleft() + callback(offsets, res_or_exc) def commit_offsets_async(self, offsets, callback=None): """Commit specific offsets asynchronously. @@ -481,18 +516,18 @@ def commit_offsets_async(self, offsets, callback=None): return future def _do_commit_offsets_async(self, offsets, callback=None): - assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API' + if self.config['api_version'] < (0, 8, 1): + raise Errors.UnsupportedVersionError('OffsetCommitRequest requires 0.8.1+ broker') assert all(map(lambda k: isinstance(k, TopicPartition), offsets)) assert all(map(lambda v: isinstance(v, OffsetAndMetadata), offsets.values())) if callback is None: callback = self.config['default_offset_commit_callback'] - self._subscription.needs_fetch_committed_offsets = True future = self._send_offset_commit_request(offsets) future.add_both(lambda res: self.completed_offset_commits.appendleft((callback, offsets, res))) return future - def commit_offsets_sync(self, offsets): + def commit_offsets_sync(self, offsets, timeout_ms=None): """Commit specific offsets synchronously. This method will retry until the commit completes successfully or an @@ -503,7 +538,8 @@ def commit_offsets_sync(self, offsets): Raises error on failure """ - assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API' + if self.config['api_version'] < (0, 8, 1): + raise Errors.UnsupportedVersionError('OffsetCommitRequest requires 0.8.1+ broker') assert all(map(lambda k: isinstance(k, TopicPartition), offsets)) assert all(map(lambda v: isinstance(v, OffsetAndMetadata), offsets.values())) @@ -511,24 +547,31 @@ def commit_offsets_sync(self, offsets): if not offsets: return + timer = Timer(timeout_ms) while True: - self.ensure_coordinator_ready() + self.ensure_coordinator_ready(timeout_ms=timer.timeout_ms) future = self._send_offset_commit_request(offsets) - self._client.poll(future=future) + self._client.poll(future=future, timeout_ms=timer.timeout_ms) - if future.succeeded(): - return future.value + if future.is_done: + if future.succeeded(): + return future.value - if not future.retriable(): - raise future.exception # pylint: disable-msg=raising-bad-type + elif not future.retriable(): + raise future.exception # pylint: disable-msg=raising-bad-type - time.sleep(self.config['retry_backoff_ms'] / 1000) + # future failed but is retriable, or it is still pending + if timer.timeout_ms is None or timer.timeout_ms > self.config['retry_backoff_ms']: + time.sleep(self.config['retry_backoff_ms'] / 1000) + else: + time.sleep(timer.timeout_ms / 1000) + timer.maybe_raise() - def _maybe_auto_commit_offsets_sync(self): + def _maybe_auto_commit_offsets_sync(self, timeout_ms=None): if self.config['enable_auto_commit']: try: - self.commit_offsets_sync(self._subscription.all_consumed_offsets()) + self.commit_offsets_sync(self._subscription.all_consumed_offsets(), timeout_ms=timeout_ms) # The three main group membership errors are known and should not # require a stacktrace -- just a warning @@ -556,7 +599,8 @@ def _send_offset_commit_request(self, offsets): Returns: Future: indicating whether the commit was successful or not """ - assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API' + if self.config['api_version'] < (0, 8, 1): + raise Errors.UnsupportedVersionError('OffsetCommitRequest requires 0.8.1+ broker') assert all(map(lambda k: isinstance(k, TopicPartition), offsets)) assert all(map(lambda v: isinstance(v, OffsetAndMetadata), offsets.values())) @@ -566,26 +610,43 @@ def _send_offset_commit_request(self, offsets): node_id = self.coordinator() if node_id is None: - return Future().failure(Errors.GroupCoordinatorNotAvailableError) + return Future().failure(Errors.CoordinatorNotAvailableError) + # Verify node is ready + if not self._client.ready(node_id, metadata_priority=False): + log.debug("Node %s not ready -- failing offset commit request", + node_id) + return Future().failure(Errors.NodeNotReadyError) # create the offset commit request offset_data = collections.defaultdict(dict) for tp, offset in six.iteritems(offsets): offset_data[tp.topic][tp.partition] = offset - if self._subscription.partitions_auto_assigned(): - generation = self.generation() or Generation.NO_GENERATION + version = self._client.api_version(OffsetCommitRequest, max_version=6) + if version > 1 and self._subscription.partitions_auto_assigned(): + generation = self.generation_if_stable() else: generation = Generation.NO_GENERATION # if the generation is None, we are not part of an active group # (and we expect to be). The only thing we can do is fail the commit # and let the user rejoin the group in poll() - if self.config['api_version'] >= (0, 9) and generation is None: - return Future().failure(Errors.CommitFailedError()) + if generation is None: + log.info("Failing OffsetCommit request since the consumer is not part of an active group") + if self.rebalance_in_progress(): + # if the client knows it is already rebalancing, we can use RebalanceInProgressError instead of + # CommitFailedError to indicate this is not a fatal error + return Future().failure(Errors.RebalanceInProgressError( + "Offset commit cannot be completed since the" + " consumer is undergoing a rebalance for auto partition assignment. You can try completing the rebalance" + " by calling poll() and then retry the operation.")) + else: + return Future().failure(Errors.CommitFailedError( + "Offset commit cannot be completed since the" + " consumer is not part of an active group for auto partition assignment; it is likely that the consumer" + " was kicked out of the group.")) - version = self._client.api_version(OffsetCommitRequest, max_version=6) if version == 0: request = OffsetCommitRequest[version]( self.group_id, @@ -665,8 +726,10 @@ def _send_offset_commit_request(self, offsets): return future def _handle_offset_commit_response(self, offsets, future, send_time, response): + log.debug("Received OffsetCommitResponse: %s", response) # TODO look at adding request_latency_ms to response (like java kafka) - self.consumer_sensors.commit_latency.record((time.time() - send_time) * 1000) + if self._consumer_sensors: + self._consumer_sensors.commit_latency.record((time.time() - send_time) * 1000) unauthorized_topics = set() for topic, partitions in response.topics: @@ -678,8 +741,6 @@ def _handle_offset_commit_response(self, offsets, future, send_time, response): if error_type is Errors.NoError: log.debug("Group %s committed offset %s for partition %s", self.group_id, offset, tp) - if self._subscription.is_assigned(tp): - self._subscription.assignment[tp].committed = offset elif error_type is Errors.GroupAuthorizationFailedError: log.error("Not authorized to commit offsets for group %s", self.group_id) @@ -694,29 +755,38 @@ def _handle_offset_commit_response(self, offsets, future, send_time, response): " %s", self.group_id, tp, error_type.__name__) future.failure(error_type()) return - elif error_type is Errors.GroupLoadInProgressError: + elif error_type is Errors.CoordinatorLoadInProgressError: # just retry log.debug("OffsetCommit for group %s failed: %s", self.group_id, error_type.__name__) future.failure(error_type(self.group_id)) return - elif error_type in (Errors.GroupCoordinatorNotAvailableError, - Errors.NotCoordinatorForGroupError, + elif error_type in (Errors.CoordinatorNotAvailableError, + Errors.NotCoordinatorError, Errors.RequestTimedOutError): log.debug("OffsetCommit for group %s failed: %s", self.group_id, error_type.__name__) self.coordinator_dead(error_type()) future.failure(error_type(self.group_id)) return + elif error_type is Errors.RebalanceInProgressError: + # Consumer never tries to commit offset in between join-group and sync-group, + # and hence on broker-side it is not expected to see a commit offset request + # during CompletingRebalance phase; if it ever happens then broker would return + # this error. In this case we should just treat as a fatal CommitFailed exception. + # However, we do not need to reset generations and just request re-join, such that + # if the caller decides to proceed and poll, it would still try to proceed and re-join normally. + self.request_rejoin() + future.failure(Errors.CommitFailedError(error_type())) + return elif error_type in (Errors.UnknownMemberIdError, - Errors.IllegalGenerationError, - Errors.RebalanceInProgressError): - # need to re-join group + Errors.IllegalGenerationError): + # need reset generation and re-join group error = error_type(self.group_id) - log.debug("OffsetCommit for group %s failed: %s", - self.group_id, error) + log.warning("OffsetCommit for group %s failed: %s", + self.group_id, error) self.reset_generation() - future.failure(Errors.CommitFailedError()) + future.failure(Errors.CommitFailedError(error_type())) return else: log.error("Group %s failed to commit partition %s at offset" @@ -744,17 +814,18 @@ def _send_offset_fetch_request(self, partitions): Returns: Future: resolves to dict of offsets: {TopicPartition: OffsetAndMetadata} """ - assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API' + if self.config['api_version'] < (0, 8, 1): + raise Errors.UnsupportedVersionError('OffsetFetchRequest requires 0.8.1+ broker') assert all(map(lambda k: isinstance(k, TopicPartition), partitions)) if not partitions: return Future().success({}) node_id = self.coordinator() if node_id is None: - return Future().failure(Errors.GroupCoordinatorNotAvailableError) + return Future().failure(Errors.CoordinatorNotAvailableError) # Verify node is ready - if not self._client.ready(node_id): + if not self._client.ready(node_id, metadata_priority=False): log.debug("Node %s not ready -- failing offset fetch request", node_id) return Future().failure(Errors.NodeNotReadyError) @@ -782,21 +853,22 @@ def _send_offset_fetch_request(self, partitions): return future def _handle_offset_fetch_response(self, future, response): + log.debug("Received OffsetFetchResponse: %s", response) if response.API_VERSION >= 2 and response.error_code != Errors.NoError.errno: error_type = Errors.for_code(response.error_code) log.debug("Offset fetch failed: %s", error_type.__name__) error = error_type() - if error_type is Errors.GroupLoadInProgressError: + if error_type is Errors.CoordinatorLoadInProgressError: # Retry future.failure(error) - elif error_type is Errors.NotCoordinatorForGroupError: + elif error_type is Errors.NotCoordinatorError: # re-discover the coordinator and retry self.coordinator_dead(error) future.failure(error) elif error_type is Errors.GroupAuthorizationFailedError: future.failure(error) else: - log.error("Unknown error fetching offsets for %s: %s", tp, error) + log.error("Unknown error fetching offsets: %s", error) future.failure(error) return @@ -808,17 +880,17 @@ def _handle_offset_fetch_response(self, future, response): leader_epoch, metadata, error_code = partition_data[2:] else: metadata, error_code = partition_data[2:] - leader_epoch = -1 + leader_epoch = -1 # noqa: F841 tp = TopicPartition(topic, partition) error_type = Errors.for_code(error_code) if error_type is not Errors.NoError: error = error_type() log.debug("Group %s failed to fetch offset for partition" " %s: %s", self.group_id, tp, error) - if error_type is Errors.GroupLoadInProgressError: + if error_type is Errors.CoordinatorLoadInProgressError: # just retry future.failure(error) - elif error_type is Errors.NotCoordinatorForGroupError: + elif error_type is Errors.NotCoordinatorError: # re-discover the coordinator and retry self.coordinator_dead(error) future.failure(error) @@ -842,28 +914,34 @@ def _handle_offset_fetch_response(self, future, response): " %s", self.group_id, tp) future.success(offsets) - def _default_offset_commit_callback(self, offsets, exception): - if exception is not None: - log.error("Offset commit failed: %s", exception) - - def _commit_offsets_async_on_complete(self, offsets, exception): - if exception is not None: + def _default_offset_commit_callback(self, offsets, res_or_exc): + if isinstance(res_or_exc, Exception): log.warning("Auto offset commit failed for group %s: %s", - self.group_id, exception) - if getattr(exception, 'retriable', False): - self.next_auto_commit_deadline = min(time.time() + self.config['retry_backoff_ms'] / 1000, self.next_auto_commit_deadline) + self.group_id, res_or_exc) else: log.debug("Completed autocommit of offsets %s for group %s", offsets, self.group_id) + def _commit_offsets_async_on_complete(self, offsets, res_or_exc): + if isinstance(res_or_exc, Exception) and getattr(res_or_exc, 'retriable', False): + self.next_auto_commit_deadline = min(time.time() + self.config['retry_backoff_ms'] / 1000, self.next_auto_commit_deadline) + self.config['default_offset_commit_callback'](offsets, res_or_exc) + def _maybe_auto_commit_offsets_async(self): if self.config['enable_auto_commit']: if self.coordinator_unknown(): self.next_auto_commit_deadline = time.time() + self.config['retry_backoff_ms'] / 1000 elif time.time() > self.next_auto_commit_deadline: self.next_auto_commit_deadline = time.time() + self.auto_commit_interval - self.commit_offsets_async(self._subscription.all_consumed_offsets(), - self._commit_offsets_async_on_complete) + self._do_auto_commit_offsets_async() + + def maybe_auto_commit_offsets_now(self): + if self.config['enable_auto_commit'] and not self.coordinator_unknown(): + self._do_auto_commit_offsets_async() + + def _do_auto_commit_offsets_async(self): + self.commit_offsets_async(self._subscription.all_consumed_offsets(), + self._commit_offsets_async_on_complete) class ConsumerCoordinatorMetrics(object): diff --git a/kafka/coordinator/heartbeat.py b/kafka/coordinator/heartbeat.py index 2f5930b63..edc9f4a36 100644 --- a/kafka/coordinator/heartbeat.py +++ b/kafka/coordinator/heartbeat.py @@ -1,8 +1,13 @@ from __future__ import absolute_import, division import copy +import logging import time +from kafka.errors import KafkaConfigurationError + +log = logging.getLogger(__name__) + class Heartbeat(object): DEFAULT_CONFIG = { @@ -20,9 +25,13 @@ def __init__(self, **configs): self.config[key] = configs[key] if self.config['group_id'] is not None: - assert (self.config['heartbeat_interval_ms'] - <= self.config['session_timeout_ms']), ( - 'Heartbeat interval must be lower than the session timeout') + if self.config['heartbeat_interval_ms'] >= self.config['session_timeout_ms']: + raise KafkaConfigurationError('Heartbeat interval must be lower than the session timeout (%s v %s)' % ( + self.config['heartbeat_interval_ms'], self.config['session_timeout_ms'])) + if self.config['heartbeat_interval_ms'] > (self.config['session_timeout_ms'] / 3): + log.warning('heartbeat_interval_ms is high relative to session_timeout_ms (%s v %s).' + ' Recommend heartbeat interval less than 1/3rd of session timeout', + self.config['heartbeat_interval_ms'], self.config['session_timeout_ms']) self.last_send = -1 * float('inf') self.last_receive = -1 * float('inf') @@ -66,3 +75,10 @@ def reset_timeouts(self): def poll_timeout_expired(self): return (time.time() - self.last_poll) > (self.config['max_poll_interval_ms'] / 1000) + + def __str__(self): + return ("").format(**self.config) diff --git a/kafka/errors.py b/kafka/errors.py index aaba89d39..ac4eadfec 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -16,21 +16,37 @@ def __str__(self): super(KafkaError, self).__str__()) +class Cancelled(KafkaError): + retriable = True + + +class CommitFailedError(KafkaError): + def __init__(self, *args): + if not args: + args = ("Commit cannot be completed since the group has already" + " rebalanced and assigned the partitions to another member.",) + super(CommitFailedError, self).__init__(*args) + + +class IllegalArgumentError(KafkaError): + pass + + class IllegalStateError(KafkaError): pass -class IllegalArgumentError(KafkaError): +class IncompatibleBrokerVersion(KafkaError): pass -class NoBrokersAvailable(KafkaError): - retriable = True - invalid_metadata = True +class KafkaConfigurationError(KafkaError): + pass -class NodeNotReadyError(KafkaError): +class KafkaConnectionError(KafkaError): retriable = True + invalid_metadata = True class KafkaProtocolError(KafkaError): @@ -41,52 +57,46 @@ class CorrelationIdError(KafkaProtocolError): retriable = True -class Cancelled(KafkaError): +class KafkaTimeoutError(KafkaError): retriable = True -class TooManyInFlightRequests(KafkaError): +class MetadataEmptyBrokerList(KafkaError): retriable = True -class StaleMetadata(KafkaError): +class NoBrokersAvailable(KafkaError): retriable = True invalid_metadata = True -class MetadataEmptyBrokerList(KafkaError): +class NoOffsetForPartitionError(KafkaError): + pass + + +class NodeNotReadyError(KafkaError): retriable = True -class UnrecognizedBrokerVersion(KafkaError): +class QuotaViolationError(KafkaError): pass -class IncompatibleBrokerVersion(KafkaError): - pass +class StaleMetadata(KafkaError): + retriable = True + invalid_metadata = True -class CommitFailedError(KafkaError): - def __init__(self, *args, **kwargs): - super(CommitFailedError, self).__init__( - """Commit cannot be completed since the group has already - rebalanced and assigned the partitions to another member. - This means that the time between subsequent calls to poll() - was longer than the configured max_poll_interval_ms, which - typically implies that the poll loop is spending too much - time message processing. You can address this either by - increasing the rebalance timeout with max_poll_interval_ms, - or by reducing the maximum size of batches returned in poll() - with max_poll_records. - """, *args, **kwargs) - - -class AuthenticationMethodNotSupported(KafkaError): +class TooManyInFlightRequests(KafkaError): + retriable = True + + +class UnrecognizedBrokerVersion(KafkaError): pass -class AuthenticationFailedError(KafkaError): - retriable = False +class UnsupportedCodecError(KafkaError): + pass class BrokerResponseError(KafkaError): @@ -101,6 +111,10 @@ def __str__(self): super(BrokerResponseError, self).__str__()) +class AuthorizationError(BrokerResponseError): + pass + + class NoError(BrokerResponseError): errno = 0 message = 'NO_ERROR' @@ -120,14 +134,14 @@ class OffsetOutOfRangeError(BrokerResponseError): ' maintained by the server for the given topic/partition.') -class CorruptRecordException(BrokerResponseError): +class CorruptRecordError(BrokerResponseError): errno = 2 message = 'CORRUPT_MESSAGE' description = ('This message has failed its CRC checksum, exceeds the' ' valid size, or is otherwise corrupt.') # Backward compatibility -InvalidMessageError = CorruptRecordException +CorruptRecordException = CorruptRecordError class UnknownTopicOrPartitionError(BrokerResponseError): @@ -218,33 +232,28 @@ class NetworkExceptionError(BrokerResponseError): invalid_metadata = True -class GroupLoadInProgressError(BrokerResponseError): +class CoordinatorLoadInProgressError(BrokerResponseError): errno = 14 - message = 'OFFSETS_LOAD_IN_PROGRESS' - description = ('The broker returns this error code for an offset fetch' - ' request if it is still loading offsets (after a leader' - ' change for that offsets topic partition), or in response' - ' to group membership requests (such as heartbeats) when' - ' group metadata is being loaded by the coordinator.') + message = 'COORDINATOR_LOAD_IN_PROGRESS' + description = ('The broker returns this error code for txn or group requests,' + ' when the coordinator is loading and hence cant process requests') retriable = True -class GroupCoordinatorNotAvailableError(BrokerResponseError): +class CoordinatorNotAvailableError(BrokerResponseError): errno = 15 - message = 'CONSUMER_COORDINATOR_NOT_AVAILABLE' - description = ('The broker returns this error code for group coordinator' - ' requests, offset commits, and most group management' + message = 'COORDINATOR_NOT_AVAILABLE' + description = ('The broker returns this error code for consumer and transaction' ' requests if the offsets topic has not yet been created, or' - ' if the group coordinator is not active.') + ' if the group/txn coordinator is not active.') retriable = True -class NotCoordinatorForGroupError(BrokerResponseError): +class NotCoordinatorError(BrokerResponseError): errno = 16 - message = 'NOT_COORDINATOR_FOR_CONSUMER' - description = ('The broker returns this error code if it receives an offset' - ' fetch or commit request for a group that it is not a' - ' coordinator for.') + message = 'NOT_COORDINATOR' + description = ('The broker returns this error code if it is not the correct' + ' coordinator for the specified consumer or transaction group') retriable = True @@ -341,21 +350,21 @@ class InvalidCommitOffsetSizeError(BrokerResponseError): ' because of oversize metadata.') -class TopicAuthorizationFailedError(BrokerResponseError): +class TopicAuthorizationFailedError(AuthorizationError): errno = 29 message = 'TOPIC_AUTHORIZATION_FAILED' description = ('Returned by the broker when the client is not authorized to' ' access the requested topic.') -class GroupAuthorizationFailedError(BrokerResponseError): +class GroupAuthorizationFailedError(AuthorizationError): errno = 30 message = 'GROUP_AUTHORIZATION_FAILED' description = ('Returned by the broker when the client is not authorized to' ' access a particular groupId.') -class ClusterAuthorizationFailedError(BrokerResponseError): +class ClusterAuthorizationFailedError(AuthorizationError): errno = 31 message = 'CLUSTER_AUTHORIZATION_FAILED' description = ('Returned by the broker when the client is not authorized to' @@ -502,7 +511,7 @@ class TransactionCoordinatorFencedError(BrokerResponseError): retriable = False -class TransactionalIdAuthorizationFailedError(BrokerResponseError): +class TransactionalIdAuthorizationFailedError(AuthorizationError): errno = 53 message = 'TRANSACTIONAL_ID_AUTHORIZATION_FAILED' description = 'Transactional Id authorization failed.' @@ -587,7 +596,7 @@ class DelegationTokenRequestNotAllowedError(BrokerResponseError): retriable = False -class DelegationTokenAuthorizationFailedError(BrokerResponseError): +class DelegationTokenAuthorizationFailedError(AuthorizationError): errno = 65 message = 'DELEGATION_TOKEN_AUTHORIZATION_FAILED' description = 'Delegation Token authorization failed.' @@ -1036,47 +1045,6 @@ class VoterNotFoundError(BrokerResponseError): retriable = False -class KafkaUnavailableError(KafkaError): - pass - - -class KafkaTimeoutError(KafkaError): - pass - - -class FailedPayloadsError(KafkaError): - def __init__(self, payload, *args): - super(FailedPayloadsError, self).__init__(*args) - self.payload = payload - - -class KafkaConnectionError(KafkaError): - retriable = True - invalid_metadata = True - - -class ProtocolError(KafkaError): - pass - - -class UnsupportedCodecError(KafkaError): - pass - - -class KafkaConfigurationError(KafkaError): - pass - - -class QuotaViolationError(KafkaError): - pass - - -class AsyncProducerQueueFull(KafkaError): - def __init__(self, failed_msgs, *args): - super(AsyncProducerQueueFull, self).__init__(*args) - self.failed_msgs = failed_msgs - - def _iter_broker_errors(): for name, obj in inspect.getmembers(sys.modules[__name__]): if inspect.isclass(obj) and issubclass(obj, BrokerResponseError) and obj != BrokerResponseError: diff --git a/kafka/future.py b/kafka/future.py index d0f3c6658..2af061ee7 100644 --- a/kafka/future.py +++ b/kafka/future.py @@ -2,6 +2,7 @@ import functools import logging +import threading log = logging.getLogger(__name__) @@ -15,6 +16,7 @@ def __init__(self): self.exception = None self._callbacks = [] self._errbacks = [] + self._lock = threading.Lock() def succeeded(self): return self.is_done and not bool(self.exception) @@ -30,37 +32,46 @@ def retriable(self): def success(self, value): assert not self.is_done, 'Future is already complete' - self.value = value - self.is_done = True + with self._lock: + self.value = value + self.is_done = True if self._callbacks: self._call_backs('callback', self._callbacks, self.value) return self def failure(self, e): assert not self.is_done, 'Future is already complete' - self.exception = e if type(e) is not type else e() - assert isinstance(self.exception, BaseException), ( + exception = e if type(e) is not type else e() + assert isinstance(exception, BaseException), ( 'future failed without an exception') - self.is_done = True + with self._lock: + self.exception = exception + self.is_done = True self._call_backs('errback', self._errbacks, self.exception) return self def add_callback(self, f, *args, **kwargs): if args or kwargs: f = functools.partial(f, *args, **kwargs) - if self.is_done and not self.exception: - self._call_backs('callback', [f], self.value) - else: - self._callbacks.append(f) + with self._lock: + if not self.is_done: + self._callbacks.append(f) + elif self.succeeded(): + self._lock.release() + self._call_backs('callback', [f], self.value) + self._lock.acquire() return self def add_errback(self, f, *args, **kwargs): if args or kwargs: f = functools.partial(f, *args, **kwargs) - if self.is_done and self.exception: - self._call_backs('errback', [f], self.exception) - else: - self._errbacks.append(f) + with self._lock: + if not self.is_done: + self._errbacks.append(f) + elif self.failed(): + self._lock.release() + self._call_backs('errback', [f], self.exception) + self._lock.acquire() return self def add_both(self, f, *args, **kwargs): diff --git a/kafka/metrics/compound_stat.py b/kafka/metrics/compound_stat.py index ac92480dc..f5b482da2 100644 --- a/kafka/metrics/compound_stat.py +++ b/kafka/metrics/compound_stat.py @@ -3,16 +3,16 @@ import abc from kafka.metrics.stat import AbstractStat +from kafka.vendor.six import add_metaclass +@add_metaclass(abc.ABCMeta) class AbstractCompoundStat(AbstractStat): """ A compound stat is a stat where a single measurement and associated data structure feeds many metrics. This is the example for a histogram which has many associated percentiles. """ - __metaclass__ = abc.ABCMeta - def stats(self): """ Return list of NamedMeasurable @@ -21,6 +21,8 @@ def stats(self): class NamedMeasurable(object): + __slots__ = ('_name', '_stat') + def __init__(self, metric_name, measurable_stat): self._name = metric_name self._stat = measurable_stat diff --git a/kafka/metrics/kafka_metric.py b/kafka/metrics/kafka_metric.py index 9fb8d89f1..fef684850 100644 --- a/kafka/metrics/kafka_metric.py +++ b/kafka/metrics/kafka_metric.py @@ -4,6 +4,8 @@ class KafkaMetric(object): + __slots__ = ('_metric_name', '_measurable', '_config') + # NOTE java constructor takes a lock instance def __init__(self, metric_name, measurable, config): if not metric_name: @@ -33,4 +35,4 @@ def config(self, config): def value(self, time_ms=None): if time_ms is None: time_ms = time.time() * 1000 - return self.measurable.measure(self.config, time_ms) + return self._measurable.measure(self._config, time_ms) diff --git a/kafka/metrics/measurable_stat.py b/kafka/metrics/measurable_stat.py index 4487adf6e..08222b144 100644 --- a/kafka/metrics/measurable_stat.py +++ b/kafka/metrics/measurable_stat.py @@ -4,8 +4,10 @@ from kafka.metrics.measurable import AbstractMeasurable from kafka.metrics.stat import AbstractStat +from kafka.vendor.six import add_metaclass +@add_metaclass(abc.ABCMeta) class AbstractMeasurableStat(AbstractStat, AbstractMeasurable): """ An AbstractMeasurableStat is an AbstractStat that is also @@ -13,4 +15,3 @@ class AbstractMeasurableStat(AbstractStat, AbstractMeasurable): This is the interface used for most of the simple statistics such as Avg, Max, Count, etc. """ - __metaclass__ = abc.ABCMeta diff --git a/kafka/metrics/metric_config.py b/kafka/metrics/metric_config.py index 2e55abfcb..7e5ead1fe 100644 --- a/kafka/metrics/metric_config.py +++ b/kafka/metrics/metric_config.py @@ -5,6 +5,8 @@ class MetricConfig(object): """Configuration values for metrics""" + __slots__ = ('quota', '_samples', 'event_window', 'time_window_ms', 'tags') + def __init__(self, quota=None, samples=2, event_window=sys.maxsize, time_window_ms=30 * 1000, tags=None): """ diff --git a/kafka/metrics/metric_name.py b/kafka/metrics/metric_name.py index b5acd1662..b8ab2a3ad 100644 --- a/kafka/metrics/metric_name.py +++ b/kafka/metrics/metric_name.py @@ -38,6 +38,7 @@ class MetricName(object): # as messages are sent we record the sizes sensor.record(message_size) """ + __slots__ = ('_name', '_group', '_description', '_tags', '_hash') def __init__(self, name, group, description=None, tags=None): """ @@ -93,7 +94,7 @@ def __eq__(self, other): return True if other is None: return False - return (type(self) == type(other) and + return (isinstance(self, type(other)) and self.group == other.group and self.name == other.name and self.tags == other.tags) diff --git a/kafka/metrics/metrics.py b/kafka/metrics/metrics.py index 2c53488ff..41a37db58 100644 --- a/kafka/metrics/metrics.py +++ b/kafka/metrics/metrics.py @@ -55,10 +55,11 @@ def __init__(self, default_config=None, reporters=None, self._reporters = reporters or [] for reporter in self._reporters: reporter.init([]) + self._closed = False if enable_expiration: def expire_loop(): - while True: + while not self._closed: # delay 30 seconds time.sleep(30) self.ExpireSensorTask.run(self) @@ -259,3 +260,4 @@ def close(self): reporter.close() self._metrics.clear() + self._closed = True diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index d8bd12b3b..8df2e9ea6 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -2,14 +2,15 @@ import abc +from kafka.vendor.six import add_metaclass + +@add_metaclass(abc.ABCMeta) class AbstractMetricsReporter(object): """ An abstract class to allow things to listen as new metrics are created so they can be reported. """ - __metaclass__ = abc.ABCMeta - @abc.abstractmethod def init(self, metrics): """ diff --git a/kafka/metrics/quota.py b/kafka/metrics/quota.py index 4d1b0d6cb..36a30c44e 100644 --- a/kafka/metrics/quota.py +++ b/kafka/metrics/quota.py @@ -3,6 +3,8 @@ class Quota(object): """An upper or lower bound for metrics""" + __slots__ = ('_bound', '_upper') + def __init__(self, bound, is_upper): self._bound = bound self._upper = is_upper @@ -34,7 +36,7 @@ def __hash__(self): def __eq__(self, other): if self is other: return True - return (type(self) == type(other) and + return (isinstance(self, type(other)) and self.bound == other.bound and self.is_upper_bound() == other.is_upper_bound()) diff --git a/kafka/metrics/stat.py b/kafka/metrics/stat.py index 9fd2f01ec..8825d2783 100644 --- a/kafka/metrics/stat.py +++ b/kafka/metrics/stat.py @@ -2,14 +2,15 @@ import abc +from kafka.vendor.six import add_metaclass + +@add_metaclass(abc.ABCMeta) class AbstractStat(object): """ An AbstractStat is a quantity such as average, max, etc that is computed off the stream of updates to a sensor """ - __metaclass__ = abc.ABCMeta - @abc.abstractmethod def record(self, config, value, time_ms): """ diff --git a/kafka/metrics/stats/avg.py b/kafka/metrics/stats/avg.py index cfbaec309..906d95573 100644 --- a/kafka/metrics/stats/avg.py +++ b/kafka/metrics/stats/avg.py @@ -7,6 +7,8 @@ class Avg(AbstractSampledStat): """ An AbstractSampledStat that maintains a simple average over its samples. """ + __slots__ = ('_initial_value', '_samples', '_current') + def __init__(self): super(Avg, self).__init__(0.0) diff --git a/kafka/metrics/stats/count.py b/kafka/metrics/stats/count.py index 6e0a2d545..6cd6d2abe 100644 --- a/kafka/metrics/stats/count.py +++ b/kafka/metrics/stats/count.py @@ -7,6 +7,8 @@ class Count(AbstractSampledStat): """ An AbstractSampledStat that maintains a simple count of what it has seen. """ + __slots__ = ('_initial_value', '_samples', '_current') + def __init__(self): super(Count, self).__init__(0.0) diff --git a/kafka/metrics/stats/histogram.py b/kafka/metrics/stats/histogram.py index ecc6c9db4..2c8afbfb3 100644 --- a/kafka/metrics/stats/histogram.py +++ b/kafka/metrics/stats/histogram.py @@ -4,6 +4,8 @@ class Histogram(object): + __slots__ = ('_hist', '_count', '_bin_scheme') + def __init__(self, bin_scheme): self._hist = [0.0] * bin_scheme.bins self._count = 0.0 @@ -40,6 +42,8 @@ def __str__(self): return '{%s}' % ','.join(values) class ConstantBinScheme(object): + __slots__ = ('_min', '_max', '_bins', '_bucket_width') + def __init__(self, bins, min_val, max_val): if bins < 2: raise ValueError('Must have at least 2 bins.') @@ -69,6 +73,8 @@ def to_bin(self, x): return int(((x - self._min) / self._bucket_width) + 1) class LinearBinScheme(object): + __slots__ = ('_bins', '_max', '_scale') + def __init__(self, num_bins, max_val): self._bins = num_bins self._max = max_val diff --git a/kafka/metrics/stats/max_stat.py b/kafka/metrics/stats/max_stat.py index 08aebddfd..9c5eeb6fd 100644 --- a/kafka/metrics/stats/max_stat.py +++ b/kafka/metrics/stats/max_stat.py @@ -5,6 +5,8 @@ class Max(AbstractSampledStat): """An AbstractSampledStat that gives the max over its samples.""" + __slots__ = ('_initial_value', '_samples', '_current') + def __init__(self): super(Max, self).__init__(float('-inf')) diff --git a/kafka/metrics/stats/min_stat.py b/kafka/metrics/stats/min_stat.py index 072106d8a..6bebe57e0 100644 --- a/kafka/metrics/stats/min_stat.py +++ b/kafka/metrics/stats/min_stat.py @@ -7,6 +7,8 @@ class Min(AbstractSampledStat): """An AbstractSampledStat that gives the min over its samples.""" + __slots__ = ('_initial_value', '_samples', '_current') + def __init__(self): super(Min, self).__init__(float(sys.maxsize)) diff --git a/kafka/metrics/stats/percentile.py b/kafka/metrics/stats/percentile.py index 3a86a84a9..75e64ce5e 100644 --- a/kafka/metrics/stats/percentile.py +++ b/kafka/metrics/stats/percentile.py @@ -2,6 +2,8 @@ class Percentile(object): + __slots__ = ('_metric_name', '_percentile') + def __init__(self, metric_name, percentile): self._metric_name = metric_name self._percentile = float(percentile) diff --git a/kafka/metrics/stats/percentiles.py b/kafka/metrics/stats/percentiles.py index 6d702e80f..2cb2d84de 100644 --- a/kafka/metrics/stats/percentiles.py +++ b/kafka/metrics/stats/percentiles.py @@ -13,6 +13,9 @@ class BucketSizing(object): class Percentiles(AbstractSampledStat, AbstractCompoundStat): """A compound stat that reports one or more percentiles""" + __slots__ = ('_initial_value', '_samples', '_current', + '_percentiles', '_buckets', '_bin_scheme') + def __init__(self, size_in_bytes, bucketing, max_val, min_val=0.0, percentiles=None): super(Percentiles, self).__init__(0.0) @@ -27,7 +30,7 @@ def __init__(self, size_in_bytes, bucketing, max_val, min_val=0.0, ' to be 0.0.') self.bin_scheme = Histogram.LinearBinScheme(self._buckets, max_val) else: - ValueError('Unknown bucket type: %s' % (bucketing,)) + raise ValueError('Unknown bucket type: %s' % (bucketing,)) def stats(self): measurables = [] diff --git a/kafka/metrics/stats/rate.py b/kafka/metrics/stats/rate.py index 68393fbf7..4d0ba0f27 100644 --- a/kafka/metrics/stats/rate.py +++ b/kafka/metrics/stats/rate.py @@ -37,6 +37,8 @@ class Rate(AbstractMeasurableStat): occurrences (e.g. the count of values measured over the time interval) or other such values. """ + __slots__ = ('_stat', '_unit') + def __init__(self, time_unit=TimeUnit.SECONDS, sampled_stat=None): self._stat = sampled_stat or SampledTotal() self._unit = time_unit @@ -105,6 +107,7 @@ def convert(self, time_ms): class SampledTotal(AbstractSampledStat): + __slots__ = ('_initial_value', '_samples', '_current') def __init__(self, initial_value=None): if initial_value is not None: raise ValueError('initial_value cannot be set on SampledTotal') diff --git a/kafka/metrics/stats/sampled_stat.py b/kafka/metrics/stats/sampled_stat.py index c41b14bbc..fe8970dbf 100644 --- a/kafka/metrics/stats/sampled_stat.py +++ b/kafka/metrics/stats/sampled_stat.py @@ -3,8 +3,10 @@ import abc from kafka.metrics.measurable_stat import AbstractMeasurableStat +from kafka.vendor.six import add_metaclass +@add_metaclass(abc.ABCMeta) class AbstractSampledStat(AbstractMeasurableStat): """ An AbstractSampledStat records a single scalar value measured over @@ -20,7 +22,7 @@ class AbstractSampledStat(AbstractMeasurableStat): Subclasses of this class define different statistics measured using this basic pattern. """ - __metaclass__ = abc.ABCMeta + __slots__ = ('_initial_value', '_samples', '_current') def __init__(self, initial_value): self._initial_value = initial_value diff --git a/kafka/metrics/stats/sensor.py b/kafka/metrics/stats/sensor.py index 571723f97..9f7ac45f5 100644 --- a/kafka/metrics/stats/sensor.py +++ b/kafka/metrics/stats/sensor.py @@ -15,6 +15,10 @@ class Sensor(object): the `record(double)` api and would maintain a set of metrics about request sizes such as the average or max. """ + __slots__ = ('_lock', '_registry', '_name', '_parents', '_metrics', + '_stats', '_config', '_inactive_sensor_expiration_time_ms', + '_last_record_time') + def __init__(self, registry, name, parents, config, inactive_sensor_expiration_time_seconds): if not name: diff --git a/kafka/metrics/stats/total.py b/kafka/metrics/stats/total.py index 5b3bb87fd..a78e99733 100644 --- a/kafka/metrics/stats/total.py +++ b/kafka/metrics/stats/total.py @@ -5,6 +5,8 @@ class Total(AbstractMeasurableStat): """An un-windowed cumulative total maintained over all time.""" + __slots__ = ('_total') + def __init__(self, value=0.0): self._total = value diff --git a/kafka/oauth/__init__.py b/kafka/oauth/__init__.py deleted file mode 100644 index 8c8349564..000000000 --- a/kafka/oauth/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from __future__ import absolute_import - -from kafka.oauth.abstract import AbstractTokenProvider diff --git a/kafka/oauth/abstract.py b/kafka/oauth/abstract.py deleted file mode 100644 index 8d89ff51d..000000000 --- a/kafka/oauth/abstract.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import absolute_import - -import abc - -# This statement is compatible with both Python 2.7 & 3+ -ABC = abc.ABCMeta('ABC', (object,), {'__slots__': ()}) - -class AbstractTokenProvider(ABC): - """ - A Token Provider must be used for the SASL OAuthBearer protocol. - - The implementation should ensure token reuse so that multiple - calls at connect time do not create multiple tokens. The implementation - should also periodically refresh the token in order to guarantee - that each call returns an unexpired token. A timeout error should - be returned after a short period of inactivity so that the - broker can log debugging info and retry. - - Token Providers MUST implement the token() method - """ - - def __init__(self, **config): - pass - - @abc.abstractmethod - def token(self): - """ - Returns a (str) ID/Access Token to be sent to the Kafka - client. - """ - pass - - def extensions(self): - """ - This is an OPTIONAL method that may be implemented. - - Returns a map of key-value pairs that can - be sent with the SASL/OAUTHBEARER initial client request. If - not implemented, the values are ignored. This feature is only available - in Kafka >= 2.1.0. - """ - return {} diff --git a/kafka/producer/buffer.py b/kafka/producer/buffer.py deleted file mode 100644 index 100801700..000000000 --- a/kafka/producer/buffer.py +++ /dev/null @@ -1,115 +0,0 @@ -from __future__ import absolute_import, division - -import collections -import io -import threading -import time - -from kafka.metrics.stats import Rate - -import kafka.errors as Errors - - -class SimpleBufferPool(object): - """A simple pool of BytesIO objects with a weak memory ceiling.""" - def __init__(self, memory, poolable_size, metrics=None, metric_group_prefix='producer-metrics'): - """Create a new buffer pool. - - Arguments: - memory (int): maximum memory that this buffer pool can allocate - poolable_size (int): memory size per buffer to cache in the free - list rather than deallocating - """ - self._poolable_size = poolable_size - self._lock = threading.RLock() - - buffers = int(memory / poolable_size) if poolable_size else 0 - self._free = collections.deque([io.BytesIO() for _ in range(buffers)]) - - self._waiters = collections.deque() - self.wait_time = None - if metrics: - self.wait_time = metrics.sensor('bufferpool-wait-time') - self.wait_time.add(metrics.metric_name( - 'bufferpool-wait-ratio', metric_group_prefix, - 'The fraction of time an appender waits for space allocation.'), - Rate()) - - def allocate(self, size, max_time_to_block_ms): - """ - Allocate a buffer of the given size. This method blocks if there is not - enough memory and the buffer pool is configured with blocking mode. - - Arguments: - size (int): The buffer size to allocate in bytes [ignored] - max_time_to_block_ms (int): The maximum time in milliseconds to - block for buffer memory to be available - - Returns: - io.BytesIO - """ - with self._lock: - # check if we have a free buffer of the right size pooled - if self._free: - return self._free.popleft() - - elif self._poolable_size == 0: - return io.BytesIO() - - else: - # we are out of buffers and will have to block - buf = None - more_memory = threading.Condition(self._lock) - self._waiters.append(more_memory) - # loop over and over until we have a buffer or have reserved - # enough memory to allocate one - while buf is None: - start_wait = time.time() - more_memory.wait(max_time_to_block_ms / 1000.0) - end_wait = time.time() - if self.wait_time: - self.wait_time.record(end_wait - start_wait) - - if self._free: - buf = self._free.popleft() - else: - self._waiters.remove(more_memory) - raise Errors.KafkaTimeoutError( - "Failed to allocate memory within the configured" - " max blocking time") - - # remove the condition for this thread to let the next thread - # in line start getting memory - removed = self._waiters.popleft() - assert removed is more_memory, 'Wrong condition' - - # signal any additional waiters if there is more memory left - # over for them - if self._free and self._waiters: - self._waiters[0].notify() - - # unlock and return the buffer - return buf - - def deallocate(self, buf): - """ - Return buffers to the pool. If they are of the poolable size add them - to the free list, otherwise just mark the memory as free. - - Arguments: - buffer_ (io.BytesIO): The buffer to return - """ - with self._lock: - # BytesIO.truncate here makes the pool somewhat pointless - # but we stick with the BufferPool API until migrating to - # bytesarray / memoryview. The buffer we return must not - # expose any prior data on read(). - buf.truncate(0) - self._free.append(buf) - if self._waiters: - self._waiters[0].notify() - - def queued(self): - """The number of threads blocked waiting on memory.""" - with self._lock: - return len(self._waiters) diff --git a/kafka/producer/future.py b/kafka/producer/future.py index 07fa4adb4..f67db0979 100644 --- a/kafka/producer/future.py +++ b/kafka/producer/future.py @@ -38,7 +38,7 @@ def __init__(self, produce_future, relative_offset, timestamp_ms, checksum, seri produce_future.add_errback(self.failure) def _produce_success(self, offset_and_timestamp): - offset, produce_timestamp_ms, log_start_offset = offset_and_timestamp + offset, produce_timestamp_ms = offset_and_timestamp # Unpacking from args tuple is minor speed optimization (relative_offset, timestamp_ms, checksum, @@ -51,7 +51,7 @@ def _produce_success(self, offset_and_timestamp): if offset != -1 and relative_offset is not None: offset += relative_offset tp = self._produce_future.topic_partition - metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms, log_start_offset, + metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size) self.success(metadata) @@ -67,5 +67,5 @@ def get(self, timeout=None): RecordMetadata = collections.namedtuple( - 'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp', 'log_start_offset', + 'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp', 'checksum', 'serialized_key_size', 'serialized_value_size', 'serialized_header_size']) diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 233bc3dce..9401bd814 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -1,11 +1,11 @@ -from __future__ import absolute_import +from __future__ import absolute_import, division import atexit import copy import logging import socket import threading -import time +import warnings import weakref from kafka.vendor import six @@ -18,10 +18,12 @@ from kafka.producer.future import FutureRecordMetadata, FutureProduceResult from kafka.producer.record_accumulator import AtomicInteger, RecordAccumulator from kafka.producer.sender import Sender +from kafka.producer.transaction_manager import TransactionManager from kafka.record.default_records import DefaultRecordBatchBuilder from kafka.record.legacy_records import LegacyRecordBatchBuilder from kafka.serializer import Serializer from kafka.structs import TopicPartition +from kafka.util import Timer, ensure_valid_topic_name log = logging.getLogger(__name__) @@ -34,8 +36,8 @@ class KafkaProducer(object): The producer is thread safe and sharing a single producer instance across threads will generally be faster than having multiple instances. - The producer consists of a pool of buffer space that holds records that - haven't yet been transmitted to the server as well as a background I/O + The producer consists of a RecordAccumulator which holds records that + haven't yet been transmitted to the server, and a Sender background I/O thread that is responsible for turning these records into requests and transmitting them to the cluster. @@ -71,14 +73,50 @@ class KafkaProducer(object): can lead to fewer, more efficient requests when not under maximal load at the cost of a small amount of latency. - The buffer_memory controls the total amount of memory available to the - producer for buffering. If records are sent faster than they can be - transmitted to the server then this buffer space will be exhausted. When - the buffer space is exhausted additional send calls will block. - The key_serializer and value_serializer instruct how to turn the key and value objects the user provides into bytes. + From Kafka 0.11, the KafkaProducer supports two additional modes: + the idempotent producer and the transactional producer. + The idempotent producer strengthens Kafka's delivery semantics from + at least once to exactly once delivery. In particular, producer retries + will no longer introduce duplicates. The transactional producer allows an + application to send messages to multiple partitions (and topics!) + atomically. + + To enable idempotence, the `enable_idempotence` configuration must be set + to True. If set, the `retries` config will default to `float('inf')` and + the `acks` config will default to 'all'. There are no API changes for the + idempotent producer, so existing applications will not need to be modified + to take advantage of this feature. + + To take advantage of the idempotent producer, it is imperative to avoid + application level re-sends since these cannot be de-duplicated. As such, if + an application enables idempotence, it is recommended to leave the + `retries` config unset, as it will be defaulted to `float('inf')`. + Additionally, if a :meth:`~kafka.KafkaProducer.send` returns an error even + with infinite retries (for instance if the message expires in the buffer + before being sent), then it is recommended to shut down the producer and + check the contents of the last produced message to ensure that it is not + duplicated. Finally, the producer can only guarantee idempotence for + messages sent within a single session. + + To use the transactional producer and the attendant APIs, you must set the + `transactional_id` configuration property. If the `transactional_id` is + set, idempotence is automatically enabled along with the producer configs + which idempotence depends on. Further, topics which are included in + transactions should be configured for durability. In particular, the + `replication.factor` should be at least `3`, and the `min.insync.replicas` + for these topics should be set to 2. Finally, in order for transactional + guarantees to be realized from end-to-end, the consumers must be + configured to read only committed messages as well. + + The purpose of the `transactional_id` is to enable transaction recovery + across multiple sessions of a single producer instance. It would typically + be derived from the shard identifier in a partitioned, stateful, + application. As such, it should be unique to each producer instance running + within a partitioned application. + Keyword Arguments: bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' strings) that the producer should contact to bootstrap initial @@ -96,6 +134,28 @@ class KafkaProducer(object): value_serializer (callable): used to convert user-supplied message values to bytes. If not None, called as f(value), should return bytes. Default: None. + enable_idempotence (bool): When set to True, the producer will ensure + that exactly one copy of each message is written in the stream. + If False, producer retries due to broker failures, etc., may write + duplicates of the retried message in the stream. Default: False. + + Note that enabling idempotence requires + `max_in_flight_requests_per_connection` to be set to 1 and `retries` + cannot be zero. Additionally, `acks` must be set to 'all'. If these + values are left at their defaults, the producer will override the + defaults to be suitable. If the values are set to something + incompatible with the idempotent producer, a KafkaConfigurationError + will be raised. + delivery_timeout_ms (float): An upper bound on the time to report success + or failure after producer.send() returns. This limits the total time + that a record will be delayed prior to sending, the time to await + acknowledgement from the broker (if expected), and the time allowed + for retriable send failures. The producer may report failure to send + a record earlier than this config if either an unrecoverable error is + encountered, the retries have been exhausted, or the record is added + to a batch which reached an earlier delivery expiration deadline. + The value of this config should be greater than or equal to the + sum of (request_timeout_ms + linger_ms). Default: 120000. acks (0, 1, 'all'): The number of acknowledgments the producer requires the leader to have received before considering a request complete. This controls the durability of records that are sent. The @@ -123,7 +183,7 @@ class KafkaProducer(object): Compression is of full batches of data, so the efficacy of batching will also impact the compression ratio (more batching means better compression). Default: None. - retries (int): Setting a value greater than zero will cause the client + retries (numeric): Setting a value greater than zero will cause the client to resend any record whose send fails with a potentially transient error. Note that this retry is no different than if the client resent the record upon receiving the error. Allowing retries @@ -131,8 +191,12 @@ class KafkaProducer(object): potentially change the ordering of records because if two batches are sent to a single partition, and the first fails and is retried but the second succeeds, then the records in the second batch may - appear first. - Default: 0. + appear first. Note additionally that produce requests will be + failed before the number of retries has been exhausted if the timeout + configured by delivery_timeout_ms expires first before successful + acknowledgement. Users should generally prefer to leave this config + unset and instead use delivery_timeout_ms to control retry behavior. + Default: float('inf') (infinite) batch_size (int): Requests sent to brokers will contain multiple batches, one for each partition with data available to be sent. A small batch size will make batching less common and may reduce @@ -165,12 +229,6 @@ class KafkaProducer(object): messages with the same key are assigned to the same partition. When a key is None, the message is delivered to a random partition (filtered to partitions with available leaders only, if possible). - buffer_memory (int): The total bytes of memory the producer should use - to buffer records waiting to be sent to the server. If records are - sent faster than they can be delivered to the server the producer - will block up to max_block_ms, raising an exception on timeout. - In the current implementation, this setting is an approximation. - Default: 33554432 (32MB) connections_max_idle_ms: Close idle connections after the number of milliseconds specified by this config. The broker closes idle connections after connections.max.idle.ms, so this avoids hitting @@ -272,9 +330,11 @@ class KafkaProducer(object): api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version set to None. + Default: 2000 metric_reporters (list): A list of classes to use as metrics reporters. Implementing the AbstractMetricsReporter interface allows plugging in classes that will be notified of new metric creation. Default: [] + metrics_enabled (bool): Whether to track metrics on this instance. Default True. metrics_num_samples (int): The number of samples maintained to compute metrics. Default: 2 metrics_sample_window_ms (int): The maximum age in milliseconds of @@ -289,12 +349,16 @@ class KafkaProducer(object): Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. + sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with + sasl mechanism handshake. If provided, sasl_kerberos_service_name and + sasl_kerberos_domain name are ignored. Default: None. sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI sasl mechanism handshake. Default: one of bootstrap servers - sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider - instance. (See kafka.oauth.abstract). Default: None + sasl_oauth_token_provider (kafka.sasl.oauth.AbstractTokenProvider): OAuthBearer + token provider instance. Default: None + socks5_proxy (str): Socks5 proxy URL. Default: None kafka_client (callable): Custom class / callable for creating KafkaClient instances Note: @@ -306,14 +370,17 @@ class KafkaProducer(object): 'client_id': None, 'key_serializer': None, 'value_serializer': None, + 'enable_idempotence': False, + 'transactional_id': None, + 'transaction_timeout_ms': 60000, + 'delivery_timeout_ms': 120000, 'acks': 1, 'bootstrap_topics_filter': set(), 'compression_type': None, - 'retries': 0, + 'retries': float('inf'), 'batch_size': 16384, 'linger_ms': 0, 'partitioner': DefaultPartitioner(), - 'buffer_memory': 33554432, 'connections_max_idle_ms': 9 * 60 * 1000, 'max_block_ms': 60000, 'max_request_size': 1048576, @@ -341,18 +408,23 @@ class KafkaProducer(object): 'api_version': None, 'api_version_auto_timeout_ms': 2000, 'metric_reporters': [], + 'metrics_enabled': True, 'metrics_num_samples': 2, 'metrics_sample_window_ms': 30000, 'selector': selectors.DefaultSelector, 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, + 'sasl_kerberos_name': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, 'sasl_oauth_token_provider': None, + 'socks5_proxy': None, 'kafka_client': KafkaClient, } + DEPRECATED_CONFIGS = ('buffer_memory',) + _COMPRESSORS = { 'gzip': (has_gzip, LegacyRecordBatchBuilder.CODEC_GZIP), 'snappy': (has_snappy, LegacyRecordBatchBuilder.CODEC_SNAPPY), @@ -362,12 +434,17 @@ class KafkaProducer(object): } def __init__(self, **configs): - log.debug("Starting the Kafka producer") # trace self.config = copy.copy(self.DEFAULT_CONFIG) + user_provided_configs = set(configs.keys()) for key in self.config: if key in configs: self.config[key] = configs.pop(key) + for key in self.DEPRECATED_CONFIGS: + if key in configs: + configs.pop(key) + warnings.warn('Deprecated Producer config: %s' % (key,), DeprecationWarning) + # Only check for extra config keys in top-level class assert not configs, 'Unrecognized configs: %s' % (configs,) @@ -385,16 +462,21 @@ def __init__(self, **configs): self.config['api_version'] = None else: self.config['api_version'] = tuple(map(int, deprecated.split('.'))) - log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated', - str(self.config['api_version']), deprecated) + log.warning('%s: use api_version=%s [tuple] -- "%s" as str is deprecated', + str(self), str(self.config['api_version']), deprecated) + + log.debug("%s: Starting Kafka producer", str(self)) # Configure metrics - metrics_tags = {'client-id': self.config['client_id']} - metric_config = MetricConfig(samples=self.config['metrics_num_samples'], - time_window_ms=self.config['metrics_sample_window_ms'], - tags=metrics_tags) - reporters = [reporter() for reporter in self.config['metric_reporters']] - self._metrics = Metrics(metric_config, reporters) + if self.config['metrics_enabled']: + metrics_tags = {'client-id': self.config['client_id']} + metric_config = MetricConfig(samples=self.config['metrics_num_samples'], + time_window_ms=self.config['metrics_sample_window_ms'], + tags=metrics_tags) + reporters = [reporter() for reporter in self.config['metric_reporters']] + self._metrics = Metrics(metric_config, reporters) + else: + self._metrics = None client = self.config['kafka_client']( metrics=self._metrics, metric_group_prefix='producer', @@ -419,12 +501,58 @@ def __init__(self, **configs): assert checker(), "Libraries for {} compression codec not found".format(ct) self.config['compression_attrs'] = compression_attrs - message_version = self._max_usable_produce_magic() - self._accumulator = RecordAccumulator(message_version=message_version, metrics=self._metrics, **self.config) self._metadata = client.cluster + self._transaction_manager = None + self._init_transactions_result = None + if 'enable_idempotence' in user_provided_configs and not self.config['enable_idempotence'] and self.config['transactional_id']: + raise Errors.KafkaConfigurationError("Cannot set transactional_id without enable_idempotence.") + + if self.config['transactional_id']: + self.config['enable_idempotence'] = True + + if self.config['enable_idempotence']: + assert self.config['api_version'] >= (0, 11), "Transactional/Idempotent producer requires >= Kafka 0.11 Brokers" + + self._transaction_manager = TransactionManager( + transactional_id=self.config['transactional_id'], + transaction_timeout_ms=self.config['transaction_timeout_ms'], + retry_backoff_ms=self.config['retry_backoff_ms'], + api_version=self.config['api_version'], + metadata=self._metadata, + ) + if self._transaction_manager.is_transactional(): + log.info("%s: Instantiated a transactional producer.", str(self)) + else: + log.info("%s: Instantiated an idempotent producer.", str(self)) + + if self.config['retries'] == 0: + raise Errors.KafkaConfigurationError("Must set 'retries' to non-zero when using the idempotent producer.") + + if 'max_in_flight_requests_per_connection' not in user_provided_configs: + log.info("%s: Overriding the default 'max_in_flight_requests_per_connection' to 1 since idempontence is enabled.", str(self)) + self.config['max_in_flight_requests_per_connection'] = 1 + elif self.config['max_in_flight_requests_per_connection'] != 1: + raise Errors.KafkaConfigurationError("Must set 'max_in_flight_requests_per_connection' to 1 in order" + " to use the idempotent producer." + " Otherwise we cannot guarantee idempotence.") + + if 'acks' not in user_provided_configs: + log.info("%s: Overriding the default 'acks' config to 'all' since idempotence is enabled", str(self)) + self.config['acks'] = -1 + elif self.config['acks'] != -1: + raise Errors.KafkaConfigurationError("Must set 'acks' config to 'all' in order to use the idempotent" + " producer. Otherwise we cannot guarantee idempotence") + + message_version = self.max_usable_produce_magic(self.config['api_version']) + self._accumulator = RecordAccumulator( + transaction_manager=self._transaction_manager, + message_version=message_version, + **self.config) guarantee_message_order = bool(self.config['max_in_flight_requests_per_connection'] == 1) self._sender = Sender(client, self._metadata, - self._accumulator, self._metrics, + self._accumulator, + metrics=self._metrics, + transaction_manager=self._transaction_manager, guarantee_message_order=guarantee_message_order, **self.config) self._sender.daemon = True @@ -433,7 +561,7 @@ def __init__(self, **configs): self._cleanup = self._cleanup_factory() atexit.register(self._cleanup) - log.debug("Kafka producer started") + log.debug("%s: Kafka producer started", str(self)) def bootstrap_connected(self): """Return True if the bootstrap is connected.""" @@ -444,7 +572,7 @@ def _cleanup_factory(self): _self = weakref.proxy(self) def wrapper(): try: - _self.close(timeout=0) + _self.close(timeout=0, null_logger=True) except (ReferenceError, AttributeError): pass return wrapper @@ -467,28 +595,28 @@ def _unregister_cleanup(self): self._cleanup = None def __del__(self): - # Disable logger during destruction to avoid touching dangling references - class NullLogger(object): - def __getattr__(self, name): - return lambda *args: None - - global log - log = NullLogger() + self.close(timeout=1, null_logger=True) - self.close() - - def close(self, timeout=None): + def close(self, timeout=None, null_logger=False): """Close this producer. Arguments: timeout (float, optional): timeout in seconds to wait for completion. """ + if null_logger: + # Disable logger during destruction to avoid touching dangling references + class NullLogger(object): + def __getattr__(self, name): + return lambda *args: None + + global log + log = NullLogger() # drop our atexit handler now to avoid leaks self._unregister_cleanup() if not hasattr(self, '_closed') or self._closed: - log.info('Kafka producer closed') + log.info('%s: Kafka producer closed', str(self)) return if timeout is None: # threading.TIMEOUT_MAX is available in Python3.3+ @@ -498,15 +626,16 @@ def close(self, timeout=None): else: assert timeout >= 0 - log.info("Closing the Kafka producer with %s secs timeout.", timeout) + log.info("%s: Closing the Kafka producer with %s secs timeout.", str(self), timeout) + self.flush(timeout) invoked_from_callback = bool(threading.current_thread() is self._sender) if timeout > 0: if invoked_from_callback: - log.warning("Overriding close timeout %s secs to 0 in order to" + log.warning("%s: Overriding close timeout %s secs to 0 in order to" " prevent useless blocking due to self-join. This" " means you have incorrectly invoked close with a" " non-zero timeout from the producer call-back.", - timeout) + str(self), timeout) else: # Try to close gracefully. if self._sender is not None: @@ -514,12 +643,13 @@ def close(self, timeout=None): self._sender.join(timeout) if self._sender is not None and self._sender.is_alive(): - log.info("Proceeding to force close the producer since pending" + log.info("%s: Proceeding to force close the producer since pending" " requests could not be completed within timeout %s.", - timeout) + str(self), timeout) self._sender.force_close() - self._metrics.close() + if self._metrics: + self._metrics.close() try: self.config['key_serializer'].close() except AttributeError: @@ -529,23 +659,23 @@ def close(self, timeout=None): except AttributeError: pass self._closed = True - log.debug("The Kafka producer has closed.") + log.debug("%s: The Kafka producer has closed.", str(self)) def partitions_for(self, topic): """Returns set of all known partitions for the topic.""" - max_wait = self.config['max_block_ms'] / 1000.0 - return self._wait_on_metadata(topic, max_wait) + return self._wait_on_metadata(topic, self.config['max_block_ms']) - def _max_usable_produce_magic(self): - if self.config['api_version'] >= (0, 11): + @classmethod + def max_usable_produce_magic(cls, api_version): + if api_version >= (0, 11): return 2 - elif self.config['api_version'] >= (0, 10, 0): + elif api_version >= (0, 10, 0): return 1 else: return 0 def _estimate_size_in_bytes(self, key, value, headers=[]): - magic = self._max_usable_produce_magic() + magic = self.max_usable_produce_magic(self.config['api_version']) if magic == 2: return DefaultRecordBatchBuilder.estimate_size_in_bytes( key, value, headers) @@ -553,6 +683,114 @@ def _estimate_size_in_bytes(self, key, value, headers=[]): return LegacyRecordBatchBuilder.estimate_size_in_bytes( magic, self.config['compression_type'], key, value) + def init_transactions(self): + """ + Needs to be called before any other methods when the transactional.id is set in the configuration. + + This method does the following: + 1. Ensures any transactions initiated by previous instances of the producer with the same + transactional_id are completed. If the previous instance had failed with a transaction in + progress, it will be aborted. If the last transaction had begun completion, + but not yet finished, this method awaits its completion. + 2. Gets the internal producer id and epoch, used in all future transactional + messages issued by the producer. + + Note that this method will raise KafkaTimeoutError if the transactional state cannot + be initialized before expiration of `max_block_ms`. + + Retrying after a KafkaTimeoutError will continue to wait for the prior request to succeed or fail. + Retrying after any other exception will start a new initialization attempt. + Retrying after a successful initialization will do nothing. + + Raises: + IllegalStateError: if no transactional_id has been configured + AuthorizationError: fatal error indicating that the configured + transactional_id is not authorized. + KafkaError: if the producer has encountered a previous fatal error or for any other unexpected error + KafkaTimeoutError: if the time taken for initialize the transaction has surpassed `max.block.ms`. + """ + if not self._transaction_manager: + raise Errors.IllegalStateError("Cannot call init_transactions without setting a transactional_id.") + if self._init_transactions_result is None: + self._init_transactions_result = self._transaction_manager.initialize_transactions() + self._sender.wakeup() + + try: + if not self._init_transactions_result.wait(timeout_ms=self.config['max_block_ms']): + raise Errors.KafkaTimeoutError("Timeout expired while initializing transactional state in %s ms." % (self.config['max_block_ms'],)) + finally: + if self._init_transactions_result.failed: + self._init_transactions_result = None + + def begin_transaction(self): + """ Should be called before the start of each new transaction. + + Note that prior to the first invocation of this method, + you must invoke `init_transactions()` exactly one time. + + Raises: + ProducerFencedError if another producer is with the same + transactional_id is active. + """ + # Set the transactional bit in the producer. + if not self._transaction_manager: + raise Errors.IllegalStateError("Cannot use transactional methods without enabling transactions") + self._transaction_manager.begin_transaction() + + def send_offsets_to_transaction(self, offsets, consumer_group_id): + """ + Sends a list of consumed offsets to the consumer group coordinator, and also marks + those offsets as part of the current transaction. These offsets will be considered + consumed only if the transaction is committed successfully. + + This method should be used when you need to batch consumed and produced messages + together, typically in a consume-transform-produce pattern. + + Arguments: + offsets ({TopicPartition: OffsetAndMetadata}): map of topic-partition -> offsets to commit + as part of current transaction. + consumer_group_id (str): Name of consumer group for offsets commit. + + Raises: + IllegalStateError: if no transactional_id, or transaction has not been started. + ProducerFencedError: fatal error indicating another producer with the same transactional_id is active. + UnsupportedVersionError: fatal error indicating the broker does not support transactions (i.e. if < 0.11). + UnsupportedForMessageFormatError: fatal error indicating the message format used for the offsets + topic on the broker does not support transactions. + AuthorizationError: fatal error indicating that the configured transactional_id is not authorized. + KafkaErro:r if the producer has encountered a previous fatal or abortable error, or for any + other unexpected error + """ + if not self._transaction_manager: + raise Errors.IllegalStateError("Cannot use transactional methods without enabling transactions") + result = self._transaction_manager.send_offsets_to_transaction(offsets, consumer_group_id) + self._sender.wakeup() + result.wait() + + def commit_transaction(self): + """ Commits the ongoing transaction. + + Raises: ProducerFencedError if another producer with the same + transactional_id is active. + """ + if not self._transaction_manager: + raise Errors.IllegalStateError("Cannot commit transaction since transactions are not enabled") + result = self._transaction_manager.begin_commit() + self._sender.wakeup() + result.wait() + + def abort_transaction(self): + """ Aborts the ongoing transaction. + + Raises: ProducerFencedError if another producer with the same + transactional_id is active. + """ + if not self._transaction_manager: + raise Errors.IllegalStateError("Cannot abort transaction since transactions are not enabled.") + result = self._transaction_manager.begin_abort() + self._sender.wakeup() + result.wait() + def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None): """Publish a message to a topic. @@ -585,44 +823,58 @@ def send(self, topic, value=None, key=None, headers=None, partition=None, timest Raises: KafkaTimeoutError: if unable to fetch topic metadata, or unable to obtain memory buffer prior to configured max_block_ms + TypeError: if topic is not a string + ValueError: if topic is invalid: must be chars (a-zA-Z0-9._-), and less than 250 length + AssertionError: if KafkaProducer is closed, or key and value are both None """ + assert not self._closed, 'KafkaProducer already closed!' assert value is not None or self.config['api_version'] >= (0, 8, 1), ( 'Null messages require kafka >= 0.8.1') assert not (value is None and key is None), 'Need at least one: key or value' + ensure_valid_topic_name(topic) key_bytes = value_bytes = None + timer = Timer(self.config['max_block_ms'], "Failed to assign partition for message in max_block_ms.") try: - self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0) - - key_bytes = self._serialize( - self.config['key_serializer'], - topic, key) - value_bytes = self._serialize( - self.config['value_serializer'], - topic, value) - assert type(key_bytes) in (bytes, bytearray, memoryview, type(None)) - assert type(value_bytes) in (bytes, bytearray, memoryview, type(None)) - - partition = self._partition(topic, partition, key, value, - key_bytes, value_bytes) + assigned_partition = None + while assigned_partition is None and not timer.expired: + self._wait_on_metadata(topic, timer.timeout_ms) + + key_bytes = self._serialize( + self.config['key_serializer'], + topic, key) + value_bytes = self._serialize( + self.config['value_serializer'], + topic, value) + assert type(key_bytes) in (bytes, bytearray, memoryview, type(None)) + assert type(value_bytes) in (bytes, bytearray, memoryview, type(None)) + + assigned_partition = self._partition(topic, partition, key, value, + key_bytes, value_bytes) + if assigned_partition is None: + raise Errors.KafkaTimeoutError("Failed to assign partition for message after %s secs." % timer.elapsed_ms / 1000) + else: + partition = assigned_partition if headers is None: headers = [] - assert type(headers) == list - assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers) + assert isinstance(headers, list) + assert all(isinstance(item, tuple) and len(item) == 2 and isinstance(item[0], str) and isinstance(item[1], bytes) for item in headers) message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers) self._ensure_valid_record_size(message_size) tp = TopicPartition(topic, partition) - log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp) + log.debug("%s: Sending (key=%r value=%r headers=%r) to %s", str(self), key, value, headers, tp) + + if self._transaction_manager and self._transaction_manager.is_transactional(): + self._transaction_manager.maybe_add_partition_to_transaction(tp) + result = self._accumulator.append(tp, timestamp_ms, - key_bytes, value_bytes, headers, - self.config['max_block_ms'], - estimated_size=message_size) + key_bytes, value_bytes, headers) future, batch_is_full, new_batch_created = result if batch_is_full or new_batch_created: - log.debug("Waking up the sender since %s is either full or" - " getting a new batch", tp) + log.debug("%s: Waking up the sender since %s is either full or" + " getting a new batch", str(self), tp) self._sender.wakeup() return future @@ -630,7 +882,7 @@ def send(self, topic, value=None, key=None, headers=None, partition=None, timest # for API exceptions return them in the future, # for other exceptions raise directly except Errors.BrokerResponseError as e: - log.error("Exception occurred during message send: %s", e) + log.error("%s: Exception occurred during message send: %s", str(self), e) return FutureRecordMetadata( FutureProduceResult(TopicPartition(topic, partition)), -1, None, None, @@ -661,7 +913,7 @@ def flush(self, timeout=None): KafkaTimeoutError: failure to flush buffered records within the provided timeout """ - log.debug("Flushing accumulated records in producer.") # trace + log.debug("%s: Flushing accumulated records in producer.", str(self)) self._accumulator.begin_flush() self._sender.wakeup() self._accumulator.await_flush_completion(timeout=timeout) @@ -673,13 +925,8 @@ def _ensure_valid_record_size(self, size): "The message is %d bytes when serialized which is larger than" " the maximum request size you have configured with the" " max_request_size configuration" % (size,)) - if size > self.config['buffer_memory']: - raise Errors.MessageSizeTooLargeError( - "The message is %d bytes when serialized which is larger than" - " the total memory buffer you have configured with the" - " buffer_memory configuration." % (size,)) - def _wait_on_metadata(self, topic, max_wait): + def _wait_on_metadata(self, topic, max_wait_ms): """ Wait for cluster metadata including partitions for the given topic to be available. @@ -697,32 +944,31 @@ def _wait_on_metadata(self, topic, max_wait): """ # add topic to metadata topic list if it is not there already. self._sender.add_topic(topic) - begin = time.time() - elapsed = 0.0 + timer = Timer(max_wait_ms, "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,)) metadata_event = None while True: partitions = self._metadata.partitions_for_topic(topic) if partitions is not None: return partitions - + timer.maybe_raise() if not metadata_event: metadata_event = threading.Event() - log.debug("Requesting metadata update for topic %s", topic) - + log.debug("%s: Requesting metadata update for topic %s", str(self), topic) metadata_event.clear() future = self._metadata.request_update() future.add_both(lambda e, *args: e.set(), metadata_event) self._sender.wakeup() - metadata_event.wait(max_wait - elapsed) - elapsed = time.time() - begin - if not metadata_event.is_set(): + metadata_event.wait(timer.timeout_ms / 1000) + if not future.is_done: raise Errors.KafkaTimeoutError( - "Failed to update metadata after %.1f secs." % (max_wait,)) + "Failed to update metadata after %.1f secs." % (max_wait_ms / 1000,)) + elif future.failed() and not future.retriable(): + raise future.exception elif topic in self._metadata.unauthorized_topics: - raise Errors.TopicAuthorizationFailedError(topic) + raise Errors.TopicAuthorizationFailedError(set([topic])) else: - log.debug("_wait_on_metadata woke after %s secs.", elapsed) + log.debug("%s: _wait_on_metadata woke after %s secs.", str(self), timer.elapsed_ms / 1000) def _serialize(self, f, topic, data): if not f: @@ -733,16 +979,18 @@ def _serialize(self, f, topic, data): def _partition(self, topic, partition, key, value, serialized_key, serialized_value): + all_partitions = self._metadata.partitions_for_topic(topic) + available = self._metadata.available_partitions_for_topic(topic) + if all_partitions is None or available is None: + return None if partition is not None: assert partition >= 0 - assert partition in self._metadata.partitions_for_topic(topic), 'Unrecognized partition' + assert partition in all_partitions, 'Unrecognized partition' return partition - all_partitions = sorted(self._metadata.partitions_for_topic(topic)) - available = list(self._metadata.available_partitions_for_topic(topic)) return self.config['partitioner'](serialized_key, - all_partitions, - available) + sorted(all_partitions), + list(available)) def metrics(self, raw=False): """Get metrics on producer performance. @@ -754,6 +1002,8 @@ def metrics(self, raw=False): This is an unstable interface. It may change in future releases without warning. """ + if not self._metrics: + return if raw: return self._metrics.metrics.copy() @@ -765,3 +1015,6 @@ def metrics(self, raw=False): metrics[k.group][k.name] = {} metrics[k.group][k.name] = v.value() return metrics + + def __str__(self): + return "" % (self.config['client_id'], self.config['transactional_id']) diff --git a/kafka/producer/record_accumulator.py b/kafka/producer/record_accumulator.py index f13c21b9f..77d48d84f 100644 --- a/kafka/producer/record_accumulator.py +++ b/kafka/producer/record_accumulator.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import +from __future__ import absolute_import, division import collections import copy @@ -6,8 +6,14 @@ import threading import time +try: + # enum in stdlib as of py3.4 + from enum import IntEnum # pylint: disable=import-error +except ImportError: + # vendored backport module + from kafka.vendor.enum34 import IntEnum + import kafka.errors as Errors -from kafka.producer.buffer import SimpleBufferPool from kafka.producer.future import FutureRecordMetadata, FutureProduceResult from kafka.record.memory_records import MemoryRecordsBuilder from kafka.structs import TopicPartition @@ -35,10 +41,16 @@ def get(self): return self._val +class FinalState(IntEnum): + ABORTED = 0 + FAILED = 1 + SUCCEEDED = 2 + + class ProducerBatch(object): - def __init__(self, tp, records, buffer): + def __init__(self, tp, records, now=None): + now = time.time() if now is None else now self.max_record_size = 0 - now = time.time() self.created = now self.drained = None self.attempts = 0 @@ -48,81 +60,120 @@ def __init__(self, tp, records, buffer): self.topic_partition = tp self.produce_future = FutureProduceResult(tp) self._retry = False - self._buffer = buffer # We only save it, we don't write to it + self._final_state = None + + @property + def final_state(self): + return self._final_state @property def record_count(self): return self.records.next_offset() - def try_append(self, timestamp_ms, key, value, headers): + @property + def producer_id(self): + return self.records.producer_id if self.records else None + + @property + def producer_epoch(self): + return self.records.producer_epoch if self.records else None + + @property + def has_sequence(self): + return self.records.has_sequence if self.records else False + + def try_append(self, timestamp_ms, key, value, headers, now=None): metadata = self.records.append(timestamp_ms, key, value, headers) if metadata is None: return None + now = time.time() if now is None else now self.max_record_size = max(self.max_record_size, metadata.size) - self.last_append = time.time() - future = FutureRecordMetadata(self.produce_future, metadata.offset, - metadata.timestamp, metadata.crc, - len(key) if key is not None else -1, - len(value) if value is not None else -1, - sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1) + self.last_append = now + future = FutureRecordMetadata( + self.produce_future, + metadata.offset, + metadata.timestamp, + metadata.crc, + len(key) if key is not None else -1, + len(value) if value is not None else -1, + sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1) return future - def done(self, base_offset=None, timestamp_ms=None, exception=None, log_start_offset=None, global_error=None): - level = logging.DEBUG if exception is None else logging.WARNING - log.log(level, "Produced messages to topic-partition %s with base offset" - " %s log start offset %s and error %s.", self.topic_partition, base_offset, - log_start_offset, global_error) # trace - if self.produce_future.is_done: - log.warning('Batch is already closed -- ignoring batch.done()') - return - elif exception is None: - self.produce_future.success((base_offset, timestamp_ms, log_start_offset)) - else: - self.produce_future.failure(exception) - - def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms, is_full): - """Expire batches if metadata is not available - - A batch whose metadata is not available should be expired if one - of the following is true: + def abort(self, exception): + """Abort the batch and complete the future and callbacks.""" + if self._final_state is not None: + raise Errors.IllegalStateError("Batch has already been completed in final state: %s" % self._final_state) + self._final_state = FinalState.ABORTED - * the batch is not in retry AND request timeout has elapsed after - it is ready (full or linger.ms has reached). + log.debug("Aborting batch for partition %s: %s", self.topic_partition, exception) + self._complete_future(-1, -1, exception) - * the batch is in retry AND request timeout has elapsed after the - backoff period ended. + def done(self, base_offset=None, timestamp_ms=None, exception=None): """ - now = time.time() - since_append = now - self.last_append - since_ready = now - (self.created + linger_ms / 1000.0) - since_backoff = now - (self.last_attempt + retry_backoff_ms / 1000.0) - timeout = request_timeout_ms / 1000.0 - - error = None - if not self.in_retry() and is_full and timeout < since_append: - error = "%d seconds have passed since last append" % (since_append,) - elif not self.in_retry() and timeout < since_ready: - error = "%d seconds have passed since batch creation plus linger time" % (since_ready,) - elif self.in_retry() and timeout < since_backoff: - error = "%d seconds have passed since last attempt plus backoff time" % (since_backoff,) - - if error: - self.records.close() - self.done(-1, None, Errors.KafkaTimeoutError( - "Batch for %s containing %s record(s) expired: %s" % ( - self.topic_partition, self.records.next_offset(), error))) + Finalize the state of a batch. Final state, once set, is immutable. This function may be called + once or twice on a batch. It may be called twice if + 1. An inflight batch expires before a response from the broker is received. The batch's final + state is set to FAILED. But it could succeed on the broker and second time around batch.done() may + try to set SUCCEEDED final state. + + 2. If a transaction abortion happens or if the producer is closed forcefully, the final state is + ABORTED but again it could succeed if broker responds with a success. + + Attempted transitions from [FAILED | ABORTED] --> SUCCEEDED are logged. + Attempted transitions from one failure state to the same or a different failed state are ignored. + Attempted transitions from SUCCEEDED to the same or a failed state throw an exception. + """ + final_state = FinalState.SUCCEEDED if exception is None else FinalState.FAILED + if self._final_state is None: + self._final_state = final_state + if final_state is FinalState.SUCCEEDED: + log.debug("Successfully produced messages to %s with base offset %s", self.topic_partition, base_offset) + else: + log.warning("Failed to produce messages to topic-partition %s with base offset %s: %s", + self.topic_partition, base_offset, exception) + self._complete_future(base_offset, timestamp_ms, exception) return True + + elif self._final_state is not FinalState.SUCCEEDED: + if final_state is FinalState.SUCCEEDED: + # Log if a previously unsuccessful batch succeeded later on. + log.debug("ProduceResponse returned %s for %s after batch with base offset %s had already been %s.", + final_state, self.topic_partition, base_offset, self._final_state) + else: + # FAILED --> FAILED and ABORTED --> FAILED transitions are ignored. + log.debug("Ignored state transition %s -> %s for %s batch with base offset %s", + self._final_state, final_state, self.topic_partition, base_offset) + else: + # A SUCCESSFUL batch must not attempt another state change. + raise Errors.IllegalStateError("A %s batch must not attempt another state change to %s" % (self._final_state, final_state)) return False + def _complete_future(self, base_offset, timestamp_ms, exception): + if self.produce_future.is_done: + raise Errors.IllegalStateError('Batch is already closed!') + elif exception is None: + self.produce_future.success((base_offset, timestamp_ms)) + else: + self.produce_future.failure(exception) + + def has_reached_delivery_timeout(self, delivery_timeout_ms, now=None): + now = time.time() if now is None else now + return delivery_timeout_ms / 1000 <= now - self.created + def in_retry(self): return self._retry - def set_retry(self): + def retry(self, now=None): + now = time.time() if now is None else now self._retry = True + self.attempts += 1 + self.last_attempt = now + self.last_append = now - def buffer(self): - return self._buffer + @property + def is_done(self): + return self.produce_future.is_done def __str__(self): return 'ProducerBatch(topic_partition=%s, record_count=%d)' % ( @@ -143,12 +194,6 @@ class RecordAccumulator(object): A small batch size will make batching less common and may reduce throughput (a batch size of zero will disable batching entirely). Default: 16384 - buffer_memory (int): The total bytes of memory the producer should use - to buffer records waiting to be sent to the server. If records are - sent faster than they can be delivered to the server the producer - will block up to max_block_ms, raising an exception on timeout. - In the current implementation, this setting is an approximation. - Default: 33554432 (32MB) compression_attrs (int): The compression type for all data generated by the producer. Valid values are gzip(1), snappy(2), lz4(3), or none(0). @@ -166,14 +211,14 @@ class RecordAccumulator(object): all retries in a short period of time. Default: 100 """ DEFAULT_CONFIG = { - 'buffer_memory': 33554432, 'batch_size': 16384, 'compression_attrs': 0, 'linger_ms': 0, + 'request_timeout_ms': 30000, + 'delivery_timeout_ms': 120000, 'retry_backoff_ms': 100, - 'message_version': 0, - 'metrics': None, - 'metric_group_prefix': 'producer-metrics', + 'transaction_manager': None, + 'message_version': 2, } def __init__(self, **configs): @@ -183,22 +228,37 @@ def __init__(self, **configs): self.config[key] = configs.pop(key) self._closed = False + self._transaction_manager = self.config['transaction_manager'] self._flushes_in_progress = AtomicInteger() self._appends_in_progress = AtomicInteger() self._batches = collections.defaultdict(collections.deque) # TopicPartition: [ProducerBatch] self._tp_locks = {None: threading.Lock()} # TopicPartition: Lock, plus a lock to add entries - self._free = SimpleBufferPool(self.config['buffer_memory'], - self.config['batch_size'], - metrics=self.config['metrics'], - metric_group_prefix=self.config['metric_group_prefix']) self._incomplete = IncompleteProducerBatches() # The following variables should only be accessed by the sender thread, # so we don't need to protect them w/ locking. self.muted = set() self._drain_index = 0 + self._next_batch_expiry_time_ms = float('inf') - def append(self, tp, timestamp_ms, key, value, headers, max_time_to_block_ms, - estimated_size=0): + if self.config['delivery_timeout_ms'] < self.config['linger_ms'] + self.config['request_timeout_ms']: + raise Errors.KafkaConfigurationError("Must set delivery_timeout_ms higher than linger_ms + request_timeout_ms") + + @property + def delivery_timeout_ms(self): + return self.config['delivery_timeout_ms'] + + @property + def next_expiry_time_ms(self): + return self._next_batch_expiry_time_ms + + def _tp_lock(self, tp): + if tp not in self._tp_locks: + with self._tp_locks[None]: + if tp not in self._tp_locks: + self._tp_locks[tp] = threading.Lock() + return self._tp_locks[tp] + + def append(self, tp, timestamp_ms, key, value, headers, now=None): """Add a record to the accumulator, return the append result. The append result will contain the future metadata, and flag for @@ -211,59 +271,53 @@ def append(self, tp, timestamp_ms, key, value, headers, max_time_to_block_ms, key (bytes): The key for the record value (bytes): The value for the record headers (List[Tuple[str, bytes]]): The header fields for the record - max_time_to_block_ms (int): The maximum time in milliseconds to - block for buffer memory to be available Returns: tuple: (future, batch_is_full, new_batch_created) """ assert isinstance(tp, TopicPartition), 'not TopicPartition' assert not self._closed, 'RecordAccumulator is closed' + now = time.time() if now is None else now # We keep track of the number of appending thread to make sure we do # not miss batches in abortIncompleteBatches(). self._appends_in_progress.increment() try: - if tp not in self._tp_locks: - with self._tp_locks[None]: - if tp not in self._tp_locks: - self._tp_locks[tp] = threading.Lock() - - with self._tp_locks[tp]: + with self._tp_lock(tp): # check if we have an in-progress batch dq = self._batches[tp] if dq: last = dq[-1] - future = last.try_append(timestamp_ms, key, value, headers) + future = last.try_append(timestamp_ms, key, value, headers, now=now) if future is not None: batch_is_full = len(dq) > 1 or last.records.is_full() return future, batch_is_full, False - size = max(self.config['batch_size'], estimated_size) - log.debug("Allocating a new %d byte message buffer for %s", size, tp) # trace - buf = self._free.allocate(size, max_time_to_block_ms) - with self._tp_locks[tp]: + with self._tp_lock(tp): # Need to check if producer is closed again after grabbing the # dequeue lock. assert not self._closed, 'RecordAccumulator is closed' if dq: last = dq[-1] - future = last.try_append(timestamp_ms, key, value, headers) + future = last.try_append(timestamp_ms, key, value, headers, now=now) if future is not None: # Somebody else found us a batch, return the one we # waited for! Hopefully this doesn't happen often... - self._free.deallocate(buf) batch_is_full = len(dq) > 1 or last.records.is_full() return future, batch_is_full, False + if self._transaction_manager and self.config['message_version'] < 2: + raise Errors.UnsupportedVersionError("Attempting to use idempotence with a broker which" + " does not support the required message format (v2)." + " The broker must be version 0.11 or later.") records = MemoryRecordsBuilder( self.config['message_version'], self.config['compression_attrs'], self.config['batch_size'] ) - batch = ProducerBatch(tp, records, buf) - future = batch.try_append(timestamp_ms, key, value, headers) + batch = ProducerBatch(tp, records, now=now) + future = batch.try_append(timestamp_ms, key, value, headers, now=now) if not future: raise Exception() @@ -274,79 +328,43 @@ def append(self, tp, timestamp_ms, key, value, headers, max_time_to_block_ms, finally: self._appends_in_progress.decrement() - def abort_expired_batches(self, request_timeout_ms, cluster): - """Abort the batches that have been sitting in RecordAccumulator for - more than the configured request_timeout due to metadata being - unavailable. + def reset_next_batch_expiry_time(self): + self._next_batch_expiry_time_ms = float('inf') - Arguments: - request_timeout_ms (int): milliseconds to timeout - cluster (ClusterMetadata): current metadata for kafka cluster + def maybe_update_next_batch_expiry_time(self, batch): + self._next_batch_expiry_time_ms = min(self._next_batch_expiry_time_ms, batch.created * 1000 + self.delivery_timeout_ms) - Returns: - list of ProducerBatch that were expired - """ + def expired_batches(self, now=None): + """Get a list of batches which have been sitting in the accumulator too long and need to be expired.""" expired_batches = [] - to_remove = [] - count = 0 for tp in list(self._batches.keys()): - assert tp in self._tp_locks, 'TopicPartition not in locks dict' - - # We only check if the batch should be expired if the partition - # does not have a batch in flight. This is to avoid the later - # batches get expired when an earlier batch is still in progress. - # This protection only takes effect when user sets - # max.in.flight.request.per.connection=1. Otherwise the expiration - # order is not guranteed. - if tp in self.muted: - continue - - with self._tp_locks[tp]: + with self._tp_lock(tp): # iterate over the batches and expire them if they have stayed # in accumulator for more than request_timeout_ms dq = self._batches[tp] - for batch in dq: - is_full = bool(bool(batch != dq[-1]) or batch.records.is_full()) - # check if the batch is expired - if batch.maybe_expire(request_timeout_ms, - self.config['retry_backoff_ms'], - self.config['linger_ms'], - is_full): + while dq: + batch = dq[0] + if batch.has_reached_delivery_timeout(self.delivery_timeout_ms, now=now): + dq.popleft() + batch.records.close() expired_batches.append(batch) - to_remove.append(batch) - count += 1 - self.deallocate(batch) else: # Stop at the first batch that has not expired. + self.maybe_update_next_batch_expiry_time(batch) break - - # Python does not allow us to mutate the dq during iteration - # Assuming expired batches are infrequent, this is better than - # creating a new copy of the deque for iteration on every loop - if to_remove: - for batch in to_remove: - dq.remove(batch) - to_remove = [] - - if expired_batches: - log.warning("Expired %d batches in accumulator", count) # trace - return expired_batches - def reenqueue(self, batch): - """Re-enqueue the given record batch in the accumulator to retry.""" - now = time.time() - batch.attempts += 1 - batch.last_attempt = now - batch.last_append = now - batch.set_retry() - assert batch.topic_partition in self._tp_locks, 'TopicPartition not in locks dict' - assert batch.topic_partition in self._batches, 'TopicPartition not in batches' - dq = self._batches[batch.topic_partition] - with self._tp_locks[batch.topic_partition]: + def reenqueue(self, batch, now=None): + """ + Re-enqueue the given record batch in the accumulator. In Sender._complete_batch method, we check + whether the batch has reached delivery_timeout_ms or not. Hence we do not do the delivery timeout check here. + """ + batch.retry(now=now) + with self._tp_lock(batch.topic_partition): + dq = self._batches[batch.topic_partition] dq.appendleft(batch) - def ready(self, cluster): + def ready(self, cluster, now=None): """ Get a list of nodes whose partitions are ready to be sent, and the earliest time at which any non-sendable partition will be ready; @@ -380,9 +398,8 @@ def ready(self, cluster): ready_nodes = set() next_ready_check = 9999999.99 unknown_leaders_exist = False - now = time.time() + now = time.time() if now is None else now - exhausted = bool(self._free.queued() > 0) # several threads are accessing self._batches -- to simplify # concurrent access, we iterate over a snapshot of partitions # and lock each partition separately as needed @@ -397,22 +414,22 @@ def ready(self, cluster): elif tp in self.muted: continue - with self._tp_locks[tp]: + with self._tp_lock(tp): dq = self._batches[tp] if not dq: continue batch = dq[0] - retry_backoff = self.config['retry_backoff_ms'] / 1000.0 - linger = self.config['linger_ms'] / 1000.0 - backing_off = bool(batch.attempts > 0 and - batch.last_attempt + retry_backoff > now) + retry_backoff = self.config['retry_backoff_ms'] / 1000 + linger = self.config['linger_ms'] / 1000 + backing_off = bool(batch.attempts > 0 + and (batch.last_attempt + retry_backoff) > now) waited_time = now - batch.last_attempt time_to_wait = retry_backoff if backing_off else linger time_left = max(time_to_wait - waited_time, 0) full = bool(len(dq) > 1 or batch.records.is_full()) expired = bool(waited_time >= time_to_wait) - sendable = (full or expired or exhausted or self._closed or + sendable = (full or expired or self._closed or self._flush_in_progress()) if sendable and not backing_off: @@ -427,16 +444,98 @@ def ready(self, cluster): return ready_nodes, next_ready_check, unknown_leaders_exist - def has_unsent(self): - """Return whether there is any unsent record in the accumulator.""" + def has_undrained(self): + """Check whether there are any batches which haven't been drained""" for tp in list(self._batches.keys()): - with self._tp_locks[tp]: + with self._tp_lock(tp): dq = self._batches[tp] if len(dq): return True return False - def drain(self, cluster, nodes, max_size): + def _should_stop_drain_batches_for_partition(self, first, tp): + if self._transaction_manager: + if not self._transaction_manager.is_send_to_partition_allowed(tp): + return True + if not self._transaction_manager.producer_id_and_epoch.is_valid: + # we cannot send the batch until we have refreshed the PID + log.debug("Waiting to send ready batches because transaction producer id is not valid") + return True + return False + + def drain_batches_for_one_node(self, cluster, node_id, max_size, now=None): + now = time.time() if now is None else now + size = 0 + ready = [] + partitions = list(cluster.partitions_for_broker(node_id)) + if not partitions: + return ready + # to make starvation less likely this loop doesn't start at 0 + self._drain_index %= len(partitions) + start = None + while start != self._drain_index: + tp = partitions[self._drain_index] + if start is None: + start = self._drain_index + self._drain_index += 1 + self._drain_index %= len(partitions) + + # Only proceed if the partition has no in-flight batches. + if tp in self.muted: + continue + + if tp not in self._batches: + continue + + with self._tp_lock(tp): + dq = self._batches[tp] + if len(dq) == 0: + continue + first = dq[0] + backoff = bool(first.attempts > 0 and + first.last_attempt + self.config['retry_backoff_ms'] / 1000 > now) + # Only drain the batch if it is not during backoff + if backoff: + continue + + if (size + first.records.size_in_bytes() > max_size + and len(ready) > 0): + # there is a rare case that a single batch + # size is larger than the request size due + # to compression; in this case we will + # still eventually send this batch in a + # single request + break + else: + if self._should_stop_drain_batches_for_partition(first, tp): + break + + batch = dq.popleft() + if self._transaction_manager and not batch.in_retry(): + # If the batch is in retry, then we should not change the pid and + # sequence number, since this may introduce duplicates. In particular, + # the previous attempt may actually have been accepted, and if we change + # the pid and sequence here, this attempt will also be accepted, causing + # a duplicate. + sequence_number = self._transaction_manager.sequence_number(batch.topic_partition) + log.debug("Dest: %s: %s producer_id=%s epoch=%s sequence=%s", + node_id, batch.topic_partition, + self._transaction_manager.producer_id_and_epoch.producer_id, + self._transaction_manager.producer_id_and_epoch.epoch, + sequence_number) + batch.records.set_producer_state( + self._transaction_manager.producer_id_and_epoch.producer_id, + self._transaction_manager.producer_id_and_epoch.epoch, + sequence_number, + self._transaction_manager.is_transactional() + ) + batch.records.close() + size += batch.records.size_in_bytes() + ready.append(batch) + batch.drained = now + return ready + + def drain(self, cluster, nodes, max_size, now=None): """ Drain all the data for the given nodes and collate them into a list of batches that will fit within the specified size on a per-node basis. @@ -454,57 +553,15 @@ def drain(self, cluster, nodes, max_size): if not nodes: return {} - now = time.time() + now = time.time() if now is None else now batches = {} for node_id in nodes: - size = 0 - partitions = list(cluster.partitions_for_broker(node_id)) - ready = [] - # to make starvation less likely this loop doesn't start at 0 - self._drain_index %= len(partitions) - start = self._drain_index - while True: - tp = partitions[self._drain_index] - if tp in self._batches and tp not in self.muted: - with self._tp_locks[tp]: - dq = self._batches[tp] - if dq: - first = dq[0] - backoff = ( - bool(first.attempts > 0) and - bool(first.last_attempt + - self.config['retry_backoff_ms'] / 1000.0 - > now) - ) - # Only drain the batch if it is not during backoff - if not backoff: - if (size + first.records.size_in_bytes() > max_size - and len(ready) > 0): - # there is a rare case that a single batch - # size is larger than the request size due - # to compression; in this case we will - # still eventually send this batch in a - # single request - break - else: - batch = dq.popleft() - batch.records.close() - size += batch.records.size_in_bytes() - ready.append(batch) - batch.drained = now - - self._drain_index += 1 - self._drain_index %= len(partitions) - if start == self._drain_index: - break - - batches[node_id] = ready + batches[node_id] = self.drain_batches_for_one_node(cluster, node_id, max_size, now=now) return batches def deallocate(self, batch): """Deallocate the record batch.""" self._incomplete.remove(batch) - self._free.deallocate(batch.buffer()) def _flush_in_progress(self): """Are there any threads currently waiting on a flush?""" @@ -535,6 +592,10 @@ def await_flush_completion(self, timeout=None): finally: self._flushes_in_progress.decrement() + @property + def has_incomplete(self): + return bool(self._incomplete) + def abort_incomplete_batches(self): """ This function is only called when sender is closed forcefully. It will fail all the @@ -544,27 +605,41 @@ def abort_incomplete_batches(self): # 1. Avoid losing batches. # 2. Free up memory in case appending threads are blocked on buffer full. # This is a tight loop but should be able to get through very quickly. + error = Errors.IllegalStateError("Producer is closed forcefully.") while True: - self._abort_batches() + self._abort_batches(error) if not self._appends_in_progress.get(): break # After this point, no thread will append any messages because they will see the close # flag set. We need to do the last abort after no thread was appending in case the there was a new # batch appended by the last appending thread. - self._abort_batches() + self._abort_batches(error) self._batches.clear() - def _abort_batches(self): + def _abort_batches(self, error): """Go through incomplete batches and abort them.""" - error = Errors.IllegalStateError("Producer is closed forcefully.") for batch in self._incomplete.all(): tp = batch.topic_partition # Close the batch before aborting - with self._tp_locks[tp]: + with self._tp_lock(tp): batch.records.close() - batch.done(exception=error) + self._batches[tp].remove(batch) + batch.abort(error) self.deallocate(batch) + def abort_undrained_batches(self, error): + for batch in self._incomplete.all(): + tp = batch.topic_partition + with self._tp_lock(tp): + aborted = False + if not batch.is_done: + aborted = True + batch.records.close() + self._batches[tp].remove(batch) + if aborted: + batch.abort(error) + self.deallocate(batch) + def close(self): """Close this accumulator and force all the record buffers to be drained.""" self._closed = True @@ -579,12 +654,21 @@ def __init__(self): def add(self, batch): with self._lock: - return self._incomplete.add(batch) + self._incomplete.add(batch) def remove(self, batch): with self._lock: - return self._incomplete.remove(batch) + try: + self._incomplete.remove(batch) + except KeyError: + pass def all(self): with self._lock: return list(self._incomplete) + + def __bool__(self): + return bool(self._incomplete) + + + __nonzero__ = __bool__ diff --git a/kafka/producer/sender.py b/kafka/producer/sender.py index 3dd52ba76..7a4c557c8 100644 --- a/kafka/producer/sender.py +++ b/kafka/producer/sender.py @@ -2,6 +2,7 @@ import collections import copy +import heapq import logging import threading import time @@ -11,6 +12,8 @@ from kafka import errors as Errors from kafka.metrics.measurable import AnonMeasurable from kafka.metrics.stats import Avg, Max, Rate +from kafka.producer.transaction_manager import ProducerIdAndEpoch +from kafka.protocol.init_producer_id import InitProducerIdRequest from kafka.protocol.produce import ProduceRequest from kafka.structs import TopicPartition from kafka.version import __version__ @@ -27,13 +30,18 @@ class Sender(threading.Thread): DEFAULT_CONFIG = { 'max_request_size': 1048576, 'acks': 1, - 'retries': 0, + 'retries': float('inf'), 'request_timeout_ms': 30000, + 'retry_backoff_ms': 100, + 'metrics': None, 'guarantee_message_order': False, + 'transaction_manager': None, + 'transactional_id': None, + 'transaction_timeout_ms': 60000, 'client_id': 'kafka-python-' + __version__, } - def __init__(self, client, metadata, accumulator, metrics, **configs): + def __init__(self, client, metadata, accumulator, **configs): super(Sender, self).__init__() self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: @@ -47,32 +55,75 @@ def __init__(self, client, metadata, accumulator, metrics, **configs): self._running = True self._force_close = False self._topics_to_add = set() - self._sensors = SenderMetrics(metrics, self._client, self._metadata) + if self.config['metrics']: + self._sensors = SenderMetrics(self.config['metrics'], self._client, self._metadata) + else: + self._sensors = None + self._transaction_manager = self.config['transaction_manager'] + # A per-partition queue of batches ordered by creation time for tracking the in-flight batches + self._in_flight_batches = collections.defaultdict(list) + + def _maybe_remove_from_inflight_batches(self, batch): + try: + queue = self._in_flight_batches[batch.topic_partition] + except KeyError: + return + try: + idx = queue.index((batch.created, batch)) + except ValueError: + return + # https://stackoverflow.com/questions/10162679/python-delete-element-from-heap + queue[idx] = queue[-1] + queue.pop() + heapq.heapify(queue) + + def _get_expired_inflight_batches(self, now=None): + """Get the in-flight batches that has reached delivery timeout.""" + expired_batches = [] + to_remove = [] + for tp, queue in six.iteritems(self._in_flight_batches): + while queue: + _created_at, batch = queue[0] + if batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms): + heapq.heappop(queue) + if batch.final_state is None: + expired_batches.append(batch) + else: + raise Errors.IllegalStateError("%s batch created at %s gets unexpected final state %s" % (batch.topic_partition, batch.created, batch.final_state)) + else: + self._accumulator.maybe_update_next_batch_expiry_time(batch) + break + else: + # Avoid mutating in_flight_batches during iteration + to_remove.append(tp) + for tp in to_remove: + del self._in_flight_batches[tp] + return expired_batches def run(self): """The main run loop for the sender thread.""" - log.debug("Starting Kafka producer I/O thread.") + log.debug("%s: Starting Kafka producer I/O thread.", str(self)) # main loop, runs until close is called while self._running: try: self.run_once() except Exception: - log.exception("Uncaught error in kafka producer I/O thread") + log.exception("%s: Uncaught error in kafka producer I/O thread", str(self)) - log.debug("Beginning shutdown of Kafka producer I/O thread, sending" - " remaining records.") + log.debug("%s: Beginning shutdown of Kafka producer I/O thread, sending" + " remaining records.", str(self)) # okay we stopped accepting requests but there may still be # requests in the accumulator or waiting for acknowledgment, # wait until these are completed. while (not self._force_close - and (self._accumulator.has_unsent() + and (self._accumulator.has_undrained() or self._client.in_flight_request_count() > 0)): try: self.run_once() except Exception: - log.exception("Uncaught error in kafka producer I/O thread") + log.exception("%s: Uncaught error in kafka producer I/O thread", str(self)) if self._force_close: # We need to fail all the incomplete batches and wake up the @@ -82,23 +133,54 @@ def run(self): try: self._client.close() except Exception: - log.exception("Failed to close network client") + log.exception("%s: Failed to close network client", str(self)) - log.debug("Shutdown of Kafka producer I/O thread has completed.") + log.debug("%s: Shutdown of Kafka producer I/O thread has completed.", str(self)) def run_once(self): """Run a single iteration of sending.""" while self._topics_to_add: self._client.add_topic(self._topics_to_add.pop()) + if self._transaction_manager: + try: + if not self._transaction_manager.is_transactional(): + # this is an idempotent producer, so make sure we have a producer id + self._maybe_wait_for_producer_id() + elif self._transaction_manager.has_in_flight_transactional_request() or self._maybe_send_transactional_request(): + # as long as there are outstanding transactional requests, we simply wait for them to return + self._client.poll(timeout_ms=self.config['retry_backoff_ms']) + return + + # do not continue sending if the transaction manager is in a failed state or if there + # is no producer id (for the idempotent case). + if self._transaction_manager.has_fatal_error() or not self._transaction_manager.has_producer_id(): + last_error = self._transaction_manager.last_error + if last_error is not None: + self._maybe_abort_batches(last_error) + self._client.poll(timeout_ms=self.config['retry_backoff_ms']) + return + elif self._transaction_manager.has_abortable_error(): + self._accumulator.abort_undrained_batches(self._transaction_manager.last_error) + + except Errors.SaslAuthenticationFailedError as e: + # This is already logged as error, but propagated here to perform any clean ups. + log.debug("%s: Authentication exception while processing transactional request: %s", str(self), e) + self._transaction_manager.authentication_failed(e) + + poll_timeout_ms = self._send_producer_data() + self._client.poll(timeout_ms=poll_timeout_ms) + + def _send_producer_data(self, now=None): + now = time.time() if now is None else now # get the list of partitions with data ready to send - result = self._accumulator.ready(self._metadata) + result = self._accumulator.ready(self._metadata, now=now) ready_nodes, next_ready_check_delay, unknown_leaders_exist = result # if there are any partitions whose leaders are not known yet, force # metadata update if unknown_leaders_exist: - log.debug('Unknown leaders exist, requesting metadata update') + log.debug('%s: Unknown leaders exist, requesting metadata update', str(self)) self._metadata.request_update() # remove any nodes we aren't ready to send to @@ -106,14 +188,20 @@ def run_once(self): for node in list(ready_nodes): if not self._client.is_ready(node): node_delay_ms = self._client.connection_delay(node) - log.debug('Node %s not ready; delaying produce of accumulated batch (%f ms)', node, node_delay_ms) + log.debug('%s: Node %s not ready; delaying produce of accumulated batch (%f ms)', str(self), node, node_delay_ms) self._client.maybe_connect(node, wakeup=False) ready_nodes.remove(node) not_ready_timeout_ms = min(not_ready_timeout_ms, node_delay_ms) # create produce requests batches_by_node = self._accumulator.drain( - self._metadata, ready_nodes, self.config['max_request_size']) + self._metadata, ready_nodes, self.config['max_request_size'], now=now) + + for batch_list in six.itervalues(batches_by_node): + for batch in batch_list: + item = (batch.created, batch) + queue = self._in_flight_batches[batch.topic_partition] + heapq.heappush(queue, item) if self.config['guarantee_message_order']: # Mute all the partitions drained @@ -121,42 +209,130 @@ def run_once(self): for batch in batch_list: self._accumulator.muted.add(batch.topic_partition) - expired_batches = self._accumulator.abort_expired_batches( - self.config['request_timeout_ms'], self._metadata) + self._accumulator.reset_next_batch_expiry_time() + expired_batches = self._accumulator.expired_batches(now=now) + expired_batches.extend(self._get_expired_inflight_batches(now=now)) + + if expired_batches: + log.debug("%s: Expired %s batches in accumulator", str(self), len(expired_batches)) + + # Reset the producer_id if an expired batch has previously been sent to the broker. + # See the documentation of `TransactionState.reset_producer_id` to understand why + # we need to reset the producer id here. + if self._transaction_manager and any([batch.in_retry() for batch in expired_batches]): + needs_transaction_state_reset = True + else: + needs_transaction_state_reset = False + for expired_batch in expired_batches: - self._sensors.record_errors(expired_batch.topic_partition.topic, expired_batch.record_count) + error = Errors.KafkaTimeoutError( + "Expiring %d record(s) for %s: %s ms has passed since batch creation" % ( + expired_batch.record_count, expired_batch.topic_partition, + int((time.time() - expired_batch.created) * 1000))) + self._fail_batch(expired_batch, error, base_offset=-1) + + if self._sensors: + self._sensors.update_produce_request_metrics(batches_by_node) + + if needs_transaction_state_reset: + self._transaction_manager.reset_producer_id() + return 0 - self._sensors.update_produce_request_metrics(batches_by_node) requests = self._create_produce_requests(batches_by_node) # If we have any nodes that are ready to send + have sendable data, # poll with 0 timeout so this can immediately loop and try sending more - # data. Otherwise, the timeout is determined by nodes that have - # partitions with data that isn't yet sendable (e.g. lingering, backing - # off). Note that this specifically does not include nodes with + # data. Otherwise, the timeout will be the smaller value between next + # batch expiry time, and the delay time for checking data availability. + # Note that the nodes may have data that isn't yet sendable due to + # lingering, backing off, etc. This specifically does not include nodes with # sendable data that aren't ready to send since they would cause busy # looping. - poll_timeout_ms = min(next_ready_check_delay * 1000, not_ready_timeout_ms) + poll_timeout_ms = min(next_ready_check_delay * 1000, + not_ready_timeout_ms, + self._accumulator.next_expiry_time_ms - now * 1000) + if poll_timeout_ms < 0: + poll_timeout_ms = 0 + if ready_nodes: - log.debug("Nodes with data ready to send: %s", ready_nodes) # trace - log.debug("Created %d produce requests: %s", len(requests), requests) # trace + log.debug("%s: Nodes with data ready to send: %s", str(self), ready_nodes) # trace + log.debug("%s: Created %d produce requests: %s", str(self), len(requests), requests) # trace + # if some partitions are already ready to be sent, the select time + # would be 0; otherwise if some partition already has some data + # accumulated but not ready yet, the select time will be the time + # difference between now and its linger expiry time; otherwise the + # select time will be the time difference between now and the + # metadata expiry time poll_timeout_ms = 0 for node_id, request in six.iteritems(requests): batches = batches_by_node[node_id] - log.debug('Sending Produce Request: %r', request) + log.debug('%s: Sending Produce Request: %r', str(self), request) (self._client.send(node_id, request, wakeup=False) .add_callback( self._handle_produce_response, node_id, time.time(), batches) .add_errback( self._failed_produce, batches, node_id)) + return poll_timeout_ms + + def _maybe_send_transactional_request(self): + if self._transaction_manager.is_completing() and self._accumulator.has_incomplete: + if self._transaction_manager.is_aborting(): + self._accumulator.abort_undrained_batches(Errors.KafkaError("Failing batch since transaction was aborted")) + # There may still be requests left which are being retried. Since we do not know whether they had + # been successfully appended to the broker log, we must resend them until their final status is clear. + # If they had been appended and we did not receive the error, then our sequence number would no longer + # be correct which would lead to an OutOfSequenceNumberError. + if not self._accumulator.flush_in_progress(): + self._accumulator.begin_flush() + + next_request_handler = self._transaction_manager.next_request_handler(self._accumulator.has_incomplete) + if next_request_handler is None: + return False + + log.debug("%s: Sending transactional request %s", str(self), next_request_handler.request) + while not self._force_close: + target_node = None + try: + if next_request_handler.needs_coordinator(): + target_node = self._transaction_manager.coordinator(next_request_handler.coordinator_type) + if target_node is None: + self._transaction_manager.lookup_coordinator_for_request(next_request_handler) + break + elif not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']): + self._transaction_manager.lookup_coordinator_for_request(next_request_handler) + target_node = None + break + else: + target_node = self._client.least_loaded_node() + if target_node is not None and not self._client.await_ready(target_node, timeout_ms=self.config['request_timeout_ms']): + target_node = None + + if target_node is not None: + if next_request_handler.is_retry: + time.sleep(self.config['retry_backoff_ms'] / 1000) + txn_correlation_id = self._transaction_manager.next_in_flight_request_correlation_id() + future = self._client.send(target_node, next_request_handler.request) + future.add_both(next_request_handler.on_complete, txn_correlation_id) + return True + + except Exception as e: + log.warn("%s: Got an exception when trying to find a node to send a transactional request to. Going to back off and retry: %s", str(self), e) + if next_request_handler.needs_coordinator(): + self._transaction_manager.lookup_coordinator_for_request(next_request_handler) + break + + time.sleep(self.config['retry_backoff_ms'] / 1000) + self._metadata.request_update() - # if some partitions are already ready to be sent, the select time - # would be 0; otherwise if some partition already has some data - # accumulated but not ready yet, the select time will be the time - # difference between now and its linger expiry time; otherwise the - # select time will be the time difference between now and the - # metadata expiry time - self._client.poll(timeout_ms=poll_timeout_ms) + if target_node is None: + self._transaction_manager.retry(next_request_handler) + + return True + + def _maybe_abort_batches(self, exc): + if self._accumulator.has_incomplete: + log.error("%s: Aborting producer batches due to fatal error: %s", str(self), exc) + self._accumulator.abort_batches(exc) def initiate_close(self): """Start closing the sender (won't complete until all data is sent).""" @@ -179,79 +355,164 @@ def add_topic(self, topic): self._topics_to_add.add(topic) self.wakeup() + def _maybe_wait_for_producer_id(self): + while not self._transaction_manager.has_producer_id(): + try: + node_id = self._client.least_loaded_node() + if node_id is None or not self._client.await_ready(node_id): + log.debug("%s, Could not find an available broker to send InitProducerIdRequest to." + + " Will back off and try again.", str(self)) + time.sleep(self._client.least_loaded_node_refresh_ms() / 1000) + continue + version = self._client.api_version(InitProducerIdRequest, max_version=1) + request = InitProducerIdRequest[version]( + transactional_id=self.config['transactional_id'], + transaction_timeout_ms=self.config['transaction_timeout_ms'], + ) + response = self._client.send_and_receive(node_id, request) + error_type = Errors.for_code(response.error_code) + if error_type is Errors.NoError: + self._transaction_manager.set_producer_id_and_epoch(ProducerIdAndEpoch(response.producer_id, response.producer_epoch)) + break + elif getattr(error_type, 'retriable', False): + log.debug("%s: Retriable error from InitProducerId response: %s", str(self), error_type.__name__) + if getattr(error_type, 'invalid_metadata', False): + self._metadata.request_update() + else: + self._transaction_manager.transition_to_fatal_error(error_type()) + break + except Errors.KafkaConnectionError: + log.debug("%s: Broker %s disconnected while awaiting InitProducerId response", str(self), node_id) + except Errors.RequestTimedOutError: + log.debug("%s: InitProducerId request to node %s timed out", str(self), node_id) + log.debug("%s: Retry InitProducerIdRequest in %sms.", str(self), self.config['retry_backoff_ms']) + time.sleep(self.config['retry_backoff_ms'] / 1000) + def _failed_produce(self, batches, node_id, error): - log.error("Error sending produce request to node %d: %s", node_id, error) # trace + log.error("%s: Error sending produce request to node %d: %s", str(self), node_id, error) # trace for batch in batches: - self._complete_batch(batch, error, -1, None) + self._complete_batch(batch, error, -1) def _handle_produce_response(self, node_id, send_time, batches, response): """Handle a produce response.""" # if we have a response, parse it - log.debug('Parsing produce response: %r', response) + log.debug('%s: Parsing produce response: %r', str(self), response) if response: batches_by_partition = dict([(batch.topic_partition, batch) for batch in batches]) for topic, partitions in response.topics: for partition_info in partitions: - global_error = None - log_start_offset = None if response.API_VERSION < 2: partition, error_code, offset = partition_info ts = None elif 2 <= response.API_VERSION <= 4: partition, error_code, offset, ts = partition_info elif 5 <= response.API_VERSION <= 7: - partition, error_code, offset, ts, log_start_offset = partition_info + partition, error_code, offset, ts, _log_start_offset = partition_info else: - # the ignored parameter is record_error of type list[(batch_index: int, error_message: str)] - partition, error_code, offset, ts, log_start_offset, _, global_error = partition_info + # Currently unused / TODO: KIP-467 + partition, error_code, offset, ts, _log_start_offset, _record_errors, _global_error = partition_info tp = TopicPartition(topic, partition) error = Errors.for_code(error_code) batch = batches_by_partition[tp] - self._complete_batch(batch, error, offset, ts, log_start_offset, global_error) + self._complete_batch(batch, error, offset, timestamp_ms=ts) else: # this is the acks = 0 case, just complete all requests for batch in batches: - self._complete_batch(batch, None, -1, None) + self._complete_batch(batch, None, -1) + + def _fail_batch(self, batch, exception, base_offset=None, timestamp_ms=None): + exception = exception if type(exception) is not type else exception() + if self._transaction_manager: + if isinstance(exception, Errors.OutOfOrderSequenceNumberError) and \ + not self._transaction_manager.is_transactional() and \ + self._transaction_manager.has_producer_id(batch.producer_id): + log.error("%s: The broker received an out of order sequence number for topic-partition %s" + " at offset %s. This indicates data loss on the broker, and should be investigated.", + str(self), batch.topic_partition, base_offset) + + # Reset the transaction state since we have hit an irrecoverable exception and cannot make any guarantees + # about the previously committed message. Note that this will discard the producer id and sequence + # numbers for all existing partitions. + self._transaction_manager.reset_producer_id() + elif isinstance(exception, (Errors.ClusterAuthorizationFailedError, + Errors.TransactionalIdAuthorizationFailedError, + Errors.ProducerFencedError, + Errors.InvalidTxnStateError)): + self._transaction_manager.transition_to_fatal_error(exception) + elif self._transaction_manager.is_transactional(): + self._transaction_manager.transition_to_abortable_error(exception) + + if self._sensors: + self._sensors.record_errors(batch.topic_partition.topic, batch.record_count) + + if batch.done(base_offset=base_offset, timestamp_ms=timestamp_ms, exception=exception): + self._maybe_remove_from_inflight_batches(batch) + self._accumulator.deallocate(batch) - def _complete_batch(self, batch, error, base_offset, timestamp_ms=None, log_start_offset=None, global_error=None): + def _complete_batch(self, batch, error, base_offset, timestamp_ms=None): """Complete or retry the given batch of records. Arguments: - batch (RecordBatch): The record batch + batch (ProducerBatch): The record batch error (Exception): The error (or None if none) base_offset (int): The base offset assigned to the records if successful timestamp_ms (int, optional): The timestamp returned by the broker for this batch - log_start_offset (int): The start offset of the log at the time this produce response was created - global_error (str): The summarising error message """ # Standardize no-error to None if error is Errors.NoError: error = None - if error is not None and self._can_retry(batch, error): - # retry - log.warning("Got error produce response on topic-partition %s," - " retrying (%d attempts left). Error: %s", - batch.topic_partition, - self.config['retries'] - batch.attempts - 1, - global_error or error) - self._accumulator.reenqueue(batch) - self._sensors.record_retries(batch.topic_partition.topic, batch.record_count) + if error is not None: + if self._can_retry(batch, error): + # retry + log.warning("%s: Got error produce response on topic-partition %s," + " retrying (%s attempts left). Error: %s", + str(self), batch.topic_partition, + self.config['retries'] - batch.attempts - 1, + error) + + # If idempotence is enabled only retry the request if the batch matches our current producer id and epoch + if not self._transaction_manager or self._transaction_manager.producer_id_and_epoch.match(batch): + log.debug("%s: Retrying batch to topic-partition %s. Sequence number: %s", + str(self), batch.topic_partition, + self._transaction_manager.sequence_number(batch.topic_partition) if self._transaction_manager else None) + self._accumulator.reenqueue(batch) + self._maybe_remove_from_inflight_batches(batch) + if self._sensors: + self._sensors.record_retries(batch.topic_partition.topic, batch.record_count) + else: + log.warning("%s: Attempted to retry sending a batch but the producer id/epoch changed from %s/%s to %s/%s. This batch will be dropped", + str(self), batch.producer_id, batch.producer_epoch, + self._transaction_manager.producer_id_and_epoch.producer_id, + self._transaction_manager.producer_id_and_epoch.epoch) + self._fail_batch(batch, error, base_offset=base_offset, timestamp_ms=timestamp_ms) + else: + if error is Errors.TopicAuthorizationFailedError: + error = error(batch.topic_partition.topic) + + # tell the user the result of their request + self._fail_batch(batch, error, base_offset=base_offset, timestamp_ms=timestamp_ms) + + if error is Errors.UnknownTopicOrPartitionError: + log.warning("%s: Received unknown topic or partition error in produce request on partition %s." + " The topic/partition may not exist or the user may not have Describe access to it", + str(self), batch.topic_partition) + + if getattr(error, 'invalid_metadata', False): + self._metadata.request_update() + else: - if error is Errors.TopicAuthorizationFailedError: - error = error(batch.topic_partition.topic) + if batch.done(base_offset=base_offset, timestamp_ms=timestamp_ms): + self._maybe_remove_from_inflight_batches(batch) + self._accumulator.deallocate(batch) - # tell the user the result of their request - batch.done(base_offset, timestamp_ms, error, log_start_offset, global_error) - self._accumulator.deallocate(batch) - if error is not None: - self._sensors.record_errors(batch.topic_partition.topic, batch.record_count) - - if getattr(error, 'invalid_metadata', False): - self._metadata.request_update() + if self._transaction_manager and self._transaction_manager.producer_id_and_epoch.match(batch): + self._transaction_manager.increment_sequence_number(batch.topic_partition, batch.record_count) + log.debug("%s: Incremented sequence number for topic-partition %s to %s", str(self), batch.topic_partition, + self._transaction_manager.sequence_number(batch.topic_partition)) # Unmute the completed partition. if self.config['guarantee_message_order']: @@ -262,8 +523,10 @@ def _can_retry(self, batch, error): We can retry a send if the error is transient and the number of attempts taken is fewer than the maximum allowed """ - return (batch.attempts < self.config['retries'] - and getattr(error, 'retriable', False)) + return (not batch.has_reached_delivery_timeout(self._accumulator.delivery_timeout_ms) and + batch.attempts < self.config['retries'] and + batch.final_state is None and + getattr(error, 'retriable', False)) def _create_produce_requests(self, collated): """ @@ -271,16 +534,17 @@ def _create_produce_requests(self, collated): per-node basis. Arguments: - collated: {node_id: [RecordBatch]} + collated: {node_id: [ProducerBatch]} Returns: dict: {node_id: ProduceRequest} (version depends on client api_versions) """ requests = {} for node_id, batches in six.iteritems(collated): - requests[node_id] = self._produce_request( - node_id, self.config['acks'], - self.config['request_timeout_ms'], batches) + if batches: + requests[node_id] = self._produce_request( + node_id, self.config['acks'], + self.config['request_timeout_ms'], batches) return requests def _produce_request(self, node_id, acks, timeout, batches): @@ -298,14 +562,25 @@ def _produce_request(self, node_id, acks, timeout, batches): produce_records_by_partition[topic][partition] = buf version = self._client.api_version(ProduceRequest, max_version=7) - # TODO: support transactional_id - return ProduceRequest[version]( - required_acks=acks, - timeout=timeout, - topics=[(topic, list(partition_info.items())) - for topic, partition_info - in six.iteritems(produce_records_by_partition)], - ) + topic_partition_data = [ + (topic, list(partition_info.items())) + for topic, partition_info in six.iteritems(produce_records_by_partition)] + transactional_id = self._transaction_manager.transactional_id if self._transaction_manager else None + if version >= 3: + return ProduceRequest[version]( + transactional_id=transactional_id, + required_acks=acks, + timeout=timeout, + topics=topic_partition_data, + ) + else: + if transactional_id is not None: + log.warning('%s: Broker does not support ProduceRequest v3+, required for transactional_id', str(self)) + return ProduceRequest[version]( + required_acks=acks, + timeout=timeout, + topics=topic_partition_data, + ) def wakeup(self): """Wake up the selector associated with this send thread.""" @@ -314,6 +589,9 @@ def wakeup(self): def bootstrap_connected(self): return self._client.bootstrap_connected() + def __str__(self): + return "" % (self.config['client_id'], self.config['transactional_id']) + class SenderMetrics(object): @@ -468,8 +746,9 @@ def update_produce_request_metrics(self, batches_map): records += batch.record_count total_bytes += batch.records.size_in_bytes() - self.records_per_request_sensor.record(records) - self.byte_rate_sensor.record(total_bytes) + if node_batch: + self.records_per_request_sensor.record(records) + self.byte_rate_sensor.record(total_bytes) def record_retries(self, topic, count): self.retry_sensor.record(count) diff --git a/kafka/producer/transaction_manager.py b/kafka/producer/transaction_manager.py new file mode 100644 index 000000000..7302eb00e --- /dev/null +++ b/kafka/producer/transaction_manager.py @@ -0,0 +1,981 @@ +from __future__ import absolute_import, division + +import abc +import collections +import heapq +import logging +import threading + +from kafka.vendor import six + +try: + # enum in stdlib as of py3.4 + from enum import IntEnum # pylint: disable=import-error +except ImportError: + # vendored backport module + from kafka.vendor.enum34 import IntEnum + +import kafka.errors as Errors +from kafka.protocol.add_offsets_to_txn import AddOffsetsToTxnRequest +from kafka.protocol.add_partitions_to_txn import AddPartitionsToTxnRequest +from kafka.protocol.end_txn import EndTxnRequest +from kafka.protocol.find_coordinator import FindCoordinatorRequest +from kafka.protocol.init_producer_id import InitProducerIdRequest +from kafka.protocol.txn_offset_commit import TxnOffsetCommitRequest +from kafka.structs import TopicPartition + + +log = logging.getLogger(__name__) + + +NO_PRODUCER_ID = -1 +NO_PRODUCER_EPOCH = -1 +NO_SEQUENCE = -1 + + +class ProducerIdAndEpoch(object): + __slots__ = ('producer_id', 'epoch') + + def __init__(self, producer_id, epoch): + self.producer_id = producer_id + self.epoch = epoch + + @property + def is_valid(self): + return NO_PRODUCER_ID < self.producer_id + + def match(self, batch): + return self.producer_id == batch.producer_id and self.epoch == batch.producer_epoch + + def __eq__(self, other): + return isinstance(other, ProducerIdAndEpoch) and self.producer_id == other.producer_id and self.epoch == other.epoch + + def __str__(self): + return "ProducerIdAndEpoch(producer_id={}, epoch={})".format(self.producer_id, self.epoch) + + +class TransactionState(IntEnum): + UNINITIALIZED = 0 + INITIALIZING = 1 + READY = 2 + IN_TRANSACTION = 3 + COMMITTING_TRANSACTION = 4 + ABORTING_TRANSACTION = 5 + ABORTABLE_ERROR = 6 + FATAL_ERROR = 7 + + @classmethod + def is_transition_valid(cls, source, target): + if target == cls.INITIALIZING: + return source == cls.UNINITIALIZED + elif target == cls.READY: + return source in (cls.INITIALIZING, cls.COMMITTING_TRANSACTION, cls.ABORTING_TRANSACTION) + elif target == cls.IN_TRANSACTION: + return source == cls.READY + elif target == cls.COMMITTING_TRANSACTION: + return source == cls.IN_TRANSACTION + elif target == cls.ABORTING_TRANSACTION: + return source in (cls.IN_TRANSACTION, cls.ABORTABLE_ERROR) + elif target == cls.ABORTABLE_ERROR: + return source in (cls.IN_TRANSACTION, cls.COMMITTING_TRANSACTION, cls.ABORTABLE_ERROR) + elif target == cls.UNINITIALIZED: + # Disallow transitions to UNITIALIZED + return False + elif target == cls.FATAL_ERROR: + # We can transition to FATAL_ERROR unconditionally. + # FATAL_ERROR is never a valid starting state for any transition. So the only option is to close the + # producer or do purely non transactional requests. + return True + + +class Priority(IntEnum): + # We use the priority to determine the order in which requests need to be sent out. For instance, if we have + # a pending FindCoordinator request, that must always go first. Next, If we need a producer id, that must go second. + # The endTxn request must always go last. + FIND_COORDINATOR = 0 + INIT_PRODUCER_ID = 1 + ADD_PARTITIONS_OR_OFFSETS = 2 + END_TXN = 3 + + +class TransactionManager(object): + """ + A class which maintains state for transactions. Also keeps the state necessary to ensure idempotent production. + """ + NO_INFLIGHT_REQUEST_CORRELATION_ID = -1 + # The retry_backoff_ms is overridden to the following value if the first AddPartitions receives a + # CONCURRENT_TRANSACTIONS error. + ADD_PARTITIONS_RETRY_BACKOFF_MS = 20 + + def __init__(self, transactional_id=None, transaction_timeout_ms=0, retry_backoff_ms=100, api_version=(0, 11), metadata=None): + self._api_version = api_version + self._metadata = metadata + + self._sequence_numbers = collections.defaultdict(lambda: 0) + + self.transactional_id = transactional_id + self.transaction_timeout_ms = transaction_timeout_ms + self._transaction_coordinator = None + self._consumer_group_coordinator = None + self._new_partitions_in_transaction = set() + self._pending_partitions_in_transaction = set() + self._partitions_in_transaction = set() + self._pending_txn_offset_commits = dict() + + self._current_state = TransactionState.UNINITIALIZED + self._last_error = None + self.producer_id_and_epoch = ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH) + + self._transaction_started = False + + self._pending_requests = [] # priority queue via heapq + self._pending_requests_sort_id = 0 + self._in_flight_request_correlation_id = self.NO_INFLIGHT_REQUEST_CORRELATION_ID + + # This is used by the TxnRequestHandlers to control how long to back off before a given request is retried. + # For instance, this value is lowered by the AddPartitionsToTxnHandler when it receives a CONCURRENT_TRANSACTIONS + # error for the first AddPartitionsRequest in a transaction. + self.retry_backoff_ms = retry_backoff_ms + self._lock = threading.Condition() + + def initialize_transactions(self): + with self._lock: + self._ensure_transactional() + self._transition_to(TransactionState.INITIALIZING) + self.set_producer_id_and_epoch(ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH)) + self._sequence_numbers.clear() + handler = InitProducerIdHandler(self, self.transaction_timeout_ms) + self._enqueue_request(handler) + return handler.result + + def begin_transaction(self): + with self._lock: + self._ensure_transactional() + self._maybe_fail_with_error() + self._transition_to(TransactionState.IN_TRANSACTION) + + def begin_commit(self): + with self._lock: + self._ensure_transactional() + self._maybe_fail_with_error() + self._transition_to(TransactionState.COMMITTING_TRANSACTION) + return self._begin_completing_transaction(True) + + def begin_abort(self): + with self._lock: + self._ensure_transactional() + if self._current_state != TransactionState.ABORTABLE_ERROR: + self._maybe_fail_with_error() + self._transition_to(TransactionState.ABORTING_TRANSACTION) + + # We're aborting the transaction, so there should be no need to add new partitions + self._new_partitions_in_transaction.clear() + return self._begin_completing_transaction(False) + + def _begin_completing_transaction(self, committed): + if self._new_partitions_in_transaction: + self._enqueue_request(self._add_partitions_to_transaction_handler()) + handler = EndTxnHandler(self, committed) + self._enqueue_request(handler) + return handler.result + + def send_offsets_to_transaction(self, offsets, consumer_group_id): + with self._lock: + self._ensure_transactional() + self._maybe_fail_with_error() + if self._current_state != TransactionState.IN_TRANSACTION: + raise Errors.KafkaError("Cannot send offsets to transaction because the producer is not in an active transaction") + + log.debug("Begin adding offsets %s for consumer group %s to transaction", offsets, consumer_group_id) + handler = AddOffsetsToTxnHandler(self, consumer_group_id, offsets) + self._enqueue_request(handler) + return handler.result + + def maybe_add_partition_to_transaction(self, topic_partition): + with self._lock: + self._fail_if_not_ready_for_send() + + if self.is_partition_added(topic_partition) or self.is_partition_pending_add(topic_partition): + return + + log.debug("Begin adding new partition %s to transaction", topic_partition) + self._new_partitions_in_transaction.add(topic_partition) + + def _fail_if_not_ready_for_send(self): + with self._lock: + if self.has_error(): + raise Errors.KafkaError( + "Cannot perform send because at least one previous transactional or" + " idempotent request has failed with errors.", self._last_error) + + if self.is_transactional(): + if not self.has_producer_id(): + raise Errors.IllegalStateError( + "Cannot perform a 'send' before completing a call to init_transactions" + " when transactions are enabled.") + + if self._current_state != TransactionState.IN_TRANSACTION: + raise Errors.IllegalStateError("Cannot call send in state %s" % (self._current_state.name,)) + + def is_send_to_partition_allowed(self, tp): + with self._lock: + if self.has_fatal_error(): + return False + return not self.is_transactional() or tp in self._partitions_in_transaction + + def has_producer_id(self, producer_id=None): + if producer_id is None: + return self.producer_id_and_epoch.is_valid + else: + return self.producer_id_and_epoch.producer_id == producer_id + + def is_transactional(self): + return self.transactional_id is not None + + def has_partitions_to_add(self): + with self._lock: + return bool(self._new_partitions_in_transaction) or bool(self._pending_partitions_in_transaction) + + def is_completing(self): + with self._lock: + return self._current_state in ( + TransactionState.COMMITTING_TRANSACTION, + TransactionState.ABORTING_TRANSACTION) + + @property + def last_error(self): + return self._last_error + + def has_error(self): + with self._lock: + return self._current_state in ( + TransactionState.ABORTABLE_ERROR, + TransactionState.FATAL_ERROR) + + def is_aborting(self): + with self._lock: + return self._current_state == TransactionState.ABORTING_TRANSACTION + + def transition_to_abortable_error(self, exc): + with self._lock: + if self._current_state == TransactionState.ABORTING_TRANSACTION: + log.debug("Skipping transition to abortable error state since the transaction is already being " + " aborted. Underlying exception: %s", exc) + return + self._transition_to(TransactionState.ABORTABLE_ERROR, error=exc) + + def transition_to_fatal_error(self, exc): + with self._lock: + self._transition_to(TransactionState.FATAL_ERROR, error=exc) + + def is_partition_added(self, partition): + with self._lock: + return partition in self._partitions_in_transaction + + def is_partition_pending_add(self, partition): + return partition in self._new_partitions_in_transaction or partition in self._pending_partitions_in_transaction + + def has_producer_id_and_epoch(self, producer_id, producer_epoch): + return ( + self.producer_id_and_epoch.producer_id == producer_id and + self.producer_id_and_epoch.epoch == producer_epoch + ) + + def set_producer_id_and_epoch(self, producer_id_and_epoch): + if not isinstance(producer_id_and_epoch, ProducerIdAndEpoch): + raise TypeError("ProducerAndIdEpoch type required") + log.info("ProducerId set to %s with epoch %s", + producer_id_and_epoch.producer_id, producer_id_and_epoch.epoch) + self.producer_id_and_epoch = producer_id_and_epoch + + def reset_producer_id(self): + """ + This method is used when the producer needs to reset its internal state because of an irrecoverable exception + from the broker. + + We need to reset the producer id and associated state when we have sent a batch to the broker, but we either get + a non-retriable exception or we run out of retries, or the batch expired in the producer queue after it was already + sent to the broker. + + In all of these cases, we don't know whether batch was actually committed on the broker, and hence whether the + sequence number was actually updated. If we don't reset the producer state, we risk the chance that all future + messages will return an OutOfOrderSequenceNumberError. + + Note that we can't reset the producer state for the transactional producer as this would mean bumping the epoch + for the same producer id. This might involve aborting the ongoing transaction during the initProducerIdRequest, + and the user would not have any way of knowing this happened. So for the transactional producer, + it's best to return the produce error to the user and let them abort the transaction and close the producer explicitly. + """ + with self._lock: + if self.is_transactional(): + raise Errors.IllegalStateError( + "Cannot reset producer state for a transactional producer." + " You must either abort the ongoing transaction or" + " reinitialize the transactional producer instead") + self.set_producer_id_and_epoch(ProducerIdAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH)) + self._sequence_numbers.clear() + + def sequence_number(self, tp): + with self._lock: + return self._sequence_numbers[tp] + + def increment_sequence_number(self, tp, increment): + with self._lock: + if tp not in self._sequence_numbers: + raise Errors.IllegalStateError("Attempt to increment sequence number for a partition with no current sequence.") + # Sequence number wraps at java max int + base = self._sequence_numbers[tp] + if base > (2147483647 - increment): + self._sequence_numbers[tp] = increment - (2147483647 - base) - 1 + else: + self._sequence_numbers[tp] += increment + + def next_request_handler(self, has_incomplete_batches): + with self._lock: + if self._new_partitions_in_transaction: + self._enqueue_request(self._add_partitions_to_transaction_handler()) + + if not self._pending_requests: + return None + + _, _, next_request_handler = self._pending_requests[0] + # Do not send the EndTxn until all batches have been flushed + if isinstance(next_request_handler, EndTxnHandler) and has_incomplete_batches: + return None + + heapq.heappop(self._pending_requests) + if self._maybe_terminate_request_with_error(next_request_handler): + log.debug("Not sending transactional request %s because we are in an error state", + next_request_handler.request) + return None + + if isinstance(next_request_handler, EndTxnHandler) and not self._transaction_started: + next_request_handler.result.done() + if self._current_state != TransactionState.FATAL_ERROR: + log.debug("Not sending EndTxn for completed transaction since no partitions" + " or offsets were successfully added") + self._complete_transaction() + try: + _, _, next_request_handler = heapq.heappop(self._pending_requests) + except IndexError: + next_request_handler = None + + if next_request_handler: + log.debug("Request %s dequeued for sending", next_request_handler.request) + + return next_request_handler + + def retry(self, request): + with self._lock: + request.set_retry() + self._enqueue_request(request) + + def authentication_failed(self, exc): + with self._lock: + for _, _, request in self._pending_requests: + request.fatal_error(exc) + + def coordinator(self, coord_type): + if coord_type == 'group': + return self._consumer_group_coordinator + elif coord_type == 'transaction': + return self._transaction_coordinator + else: + raise Errors.IllegalStateError("Received an invalid coordinator type: %s" % (coord_type,)) + + def lookup_coordinator_for_request(self, request): + self._lookup_coordinator(request.coordinator_type, request.coordinator_key) + + def next_in_flight_request_correlation_id(self): + self._in_flight_request_correlation_id += 1 + return self._in_flight_request_correlation_id + + def clear_in_flight_transactional_request_correlation_id(self): + self._in_flight_request_correlation_id = self.NO_INFLIGHT_REQUEST_CORRELATION_ID + + def has_in_flight_transactional_request(self): + return self._in_flight_request_correlation_id != self.NO_INFLIGHT_REQUEST_CORRELATION_ID + + def has_fatal_error(self): + return self._current_state == TransactionState.FATAL_ERROR + + def has_abortable_error(self): + return self._current_state == TransactionState.ABORTABLE_ERROR + + # visible for testing + def _test_transaction_contains_partition(self, tp): + with self._lock: + return tp in self._partitions_in_transaction + + # visible for testing + def _test_has_pending_offset_commits(self): + return bool(self._pending_txn_offset_commits) + + # visible for testing + def _test_has_ongoing_transaction(self): + with self._lock: + # transactions are considered ongoing once started until completion or a fatal error + return self._current_state == TransactionState.IN_TRANSACTION or self.is_completing() or self.has_abortable_error() + + # visible for testing + def _test_is_ready(self): + with self._lock: + return self.is_transactional() and self._current_state == TransactionState.READY + + def _transition_to(self, target, error=None): + with self._lock: + if not self._current_state.is_transition_valid(self._current_state, target): + raise Errors.KafkaError("TransactionalId %s: Invalid transition attempted from state %s to state %s" % ( + self.transactional_id, self._current_state.name, target.name)) + + if target in (TransactionState.FATAL_ERROR, TransactionState.ABORTABLE_ERROR): + if error is None: + raise Errors.IllegalArgumentError("Cannot transition to %s with an None exception" % (target.name,)) + self._last_error = error + else: + self._last_error = None + + if self._last_error is not None: + log.debug("Transition from state %s to error state %s (%s)", self._current_state.name, target.name, self._last_error) + else: + log.debug("Transition from state %s to %s", self._current_state, target) + self._current_state = target + + def _ensure_transactional(self): + if not self.is_transactional(): + raise Errors.IllegalStateError("Transactional method invoked on a non-transactional producer.") + + def _maybe_fail_with_error(self): + if self.has_error(): + raise Errors.KafkaError("Cannot execute transactional method because we are in an error state: %s" % (self._last_error,)) + + def _maybe_terminate_request_with_error(self, request_handler): + if self.has_error(): + if self.has_abortable_error() and isinstance(request_handler, FindCoordinatorHandler): + # No harm letting the FindCoordinator request go through if we're expecting to abort + return False + request_handler.fail(self._last_error) + return True + return False + + def _next_pending_requests_sort_id(self): + self._pending_requests_sort_id += 1 + return self._pending_requests_sort_id + + def _enqueue_request(self, request_handler): + log.debug("Enqueuing transactional request %s", request_handler.request) + heapq.heappush( + self._pending_requests, + ( + request_handler.priority, # keep lowest priority at head of queue + self._next_pending_requests_sort_id(), # break ties + request_handler + ) + ) + + def _lookup_coordinator(self, coord_type, coord_key): + with self._lock: + if coord_type == 'group': + self._consumer_group_coordinator = None + elif coord_type == 'transaction': + self._transaction_coordinator = None + else: + raise Errors.IllegalStateError("Invalid coordinator type: %s" % (coord_type,)) + self._enqueue_request(FindCoordinatorHandler(self, coord_type, coord_key)) + + def _complete_transaction(self): + with self._lock: + self._transition_to(TransactionState.READY) + self._transaction_started = False + self._new_partitions_in_transaction.clear() + self._pending_partitions_in_transaction.clear() + self._partitions_in_transaction.clear() + + def _add_partitions_to_transaction_handler(self): + with self._lock: + self._pending_partitions_in_transaction.update(self._new_partitions_in_transaction) + self._new_partitions_in_transaction.clear() + return AddPartitionsToTxnHandler(self, self._pending_partitions_in_transaction) + + +class TransactionalRequestResult(object): + def __init__(self): + self._latch = threading.Event() + self._error = None + + def done(self, error=None): + self._error = error + self._latch.set() + + def wait(self, timeout_ms=None): + timeout = timeout_ms / 1000 if timeout_ms is not None else None + success = self._latch.wait(timeout) + if self._error: + raise self._error + return success + + @property + def is_done(self): + return self._latch.is_set() + + @property + def succeeded(self): + return self._latch.is_set() and self._error is None + + @property + def failed(self): + return self._latch.is_set() and self._error is not None + + @property + def exception(self): + return self._error + + +@six.add_metaclass(abc.ABCMeta) +class TxnRequestHandler(object): + def __init__(self, transaction_manager, result=None): + self.transaction_manager = transaction_manager + self.retry_backoff_ms = transaction_manager.retry_backoff_ms + self.request = None + self._result = result or TransactionalRequestResult() + self._is_retry = False + + @property + def transactional_id(self): + return self.transaction_manager.transactional_id + + @property + def producer_id(self): + return self.transaction_manager.producer_id_and_epoch.producer_id + + @property + def producer_epoch(self): + return self.transaction_manager.producer_id_and_epoch.epoch + + def fatal_error(self, exc): + self.transaction_manager._transition_to_fatal_error(exc) + self._result.done(error=exc) + + def abortable_error(self, exc): + self.transaction_manager._transition_to_abortable_error(exc) + self._result.done(error=exc) + + def fail(self, exc): + self._result.done(error=exc) + + def reenqueue(self): + with self.transaction_manager._lock: + self._is_retry = True + self.transaction_manager._enqueue_request(self) + + def on_complete(self, correlation_id, response_or_exc): + if correlation_id != self.transaction_manager._in_flight_request_correlation_id: + self.fatal_error(RuntimeError("Detected more than one in-flight transactional request.")) + else: + self.transaction_manager.clear_in_flight_transactional_request_correlation_id() + if isinstance(response_or_exc, Errors.KafkaConnectionError): + log.debug("Disconnected from node. Will retry.") + if self.needs_coordinator(): + self.transaction_manager._lookup_coordinator(self.coordinator_type, self.coordinator_key) + self.reenqueue() + elif isinstance(response_or_exc, Errors.UnsupportedVersionError): + self.fatal_error(response_or_exc) + elif not isinstance(response_or_exc, (Exception, type(None))): + log.debug("Received transactional response %s for request %s", response_or_exc, self.request) + with self.transaction_manager._lock: + self.handle_response(response_or_exc) + else: + self.fatal_error(Errors.KafkaError("Could not execute transactional request for unknown reasons: %s" % response_or_exc)) + + def needs_coordinator(self): + return self.coordinator_type is not None + + @property + def result(self): + return self._result + + @property + def coordinator_type(self): + return 'transaction' + + @property + def coordinator_key(self): + return self.transaction_manager.transactional_id + + def set_retry(self): + self._is_retry = True + + @property + def is_retry(self): + return self._is_retry + + @abc.abstractmethod + def handle_response(self, response): + pass + + @abc.abstractproperty + def priority(self): + pass + + +class InitProducerIdHandler(TxnRequestHandler): + def __init__(self, transaction_manager, transaction_timeout_ms): + super(InitProducerIdHandler, self).__init__(transaction_manager) + + if transaction_manager._api_version >= (2, 0): + version = 1 + else: + version = 0 + self.request = InitProducerIdRequest[version]( + transactional_id=self.transactional_id, + transaction_timeout_ms=transaction_timeout_ms) + + @property + def priority(self): + return Priority.INIT_PRODUCER_ID + + def handle_response(self, response): + error = Errors.for_code(response.error_code) + + if error is Errors.NoError: + self.transaction_manager.set_producer_id_and_epoch(ProducerIdAndEpoch(response.producer_id, response.producer_epoch)) + self.transaction_manager._transition_to(TransactionState.READY) + self._result.done() + elif error in (Errors.NotCoordinatorError, Errors.CoordinatorNotAvailableError): + self.transaction_manager._lookup_coordinator('transaction', self.transactional_id) + self.reenqueue() + elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError): + self.reenqueue() + elif error is Errors.TransactionalIdAuthorizationFailedError: + self.fatal_error(error()) + else: + self.fatal_error(Errors.KafkaError("Unexpected error in InitProducerIdResponse: %s" % (error()))) + +class AddPartitionsToTxnHandler(TxnRequestHandler): + def __init__(self, transaction_manager, topic_partitions): + super(AddPartitionsToTxnHandler, self).__init__(transaction_manager) + + if transaction_manager._api_version >= (2, 7): + version = 2 + elif transaction_manager._api_version >= (2, 0): + version = 1 + else: + version = 0 + topic_data = collections.defaultdict(list) + for tp in topic_partitions: + topic_data[tp.topic].append(tp.partition) + self.request = AddPartitionsToTxnRequest[version]( + transactional_id=self.transactional_id, + producer_id=self.producer_id, + producer_epoch=self.producer_epoch, + topics=list(topic_data.items())) + + @property + def priority(self): + return Priority.ADD_PARTITIONS_OR_OFFSETS + + def handle_response(self, response): + has_partition_errors = False + unauthorized_topics = set() + self.retry_backoff_ms = self.transaction_manager.retry_backoff_ms + + results = {TopicPartition(topic, partition): Errors.for_code(error_code) + for topic, partition_data in response.results + for partition, error_code in partition_data} + + for tp, error in six.iteritems(results): + if error is Errors.NoError: + continue + elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError): + self.transaction_manager._lookup_coordinator('transaction', self.transactional_id) + self.reenqueue() + return + elif error is Errors.ConcurrentTransactionsError: + self.maybe_override_retry_backoff_ms() + self.reenqueue() + return + elif error in (Errors.CoordinatorLoadInProgressError, Errors.UnknownTopicOrPartitionError): + self.reenqueue() + return + elif error is Errors.InvalidProducerEpochError: + self.fatal_error(error()) + return + elif error is Errors.TransactionalIdAuthorizationFailedError: + self.fatal_error(error()) + return + elif error in (Errors.InvalidProducerIdMappingError, Errors.InvalidTxnStateError): + self.fatal_error(Errors.KafkaError(error())) + return + elif error is Errors.TopicAuthorizationFailedError: + unauthorized_topics.add(tp.topic) + elif error is Errors.OperationNotAttemptedError: + log.debug("Did not attempt to add partition %s to transaction because other partitions in the" + " batch had errors.", tp) + has_partition_errors = True + else: + log.error("Could not add partition %s due to unexpected error %s", tp, error()) + has_partition_errors = True + + partitions = set(results) + + # Remove the partitions from the pending set regardless of the result. We use the presence + # of partitions in the pending set to know when it is not safe to send batches. However, if + # the partitions failed to be added and we enter an error state, we expect the batches to be + # aborted anyway. In this case, we must be able to continue sending the batches which are in + # retry for partitions that were successfully added. + self.transaction_manager._pending_partitions_in_transaction -= partitions + + if unauthorized_topics: + self.abortable_error(Errors.TopicAuthorizationFailedError(unauthorized_topics)) + elif has_partition_errors: + self.abortable_error(Errors.KafkaError("Could not add partitions to transaction due to errors: %s" % (results))) + else: + log.debug("Successfully added partitions %s to transaction", partitions) + self.transaction_manager._partitions_in_transaction.update(partitions) + self.transaction_manager._transaction_started = True + self._result.done() + + def maybe_override_retry_backoff_ms(self): + # We only want to reduce the backoff when retrying the first AddPartition which errored out due to a + # CONCURRENT_TRANSACTIONS error since this means that the previous transaction is still completing and + # we don't want to wait too long before trying to start the new one. + # + # This is only a temporary fix, the long term solution is being tracked in + # https://issues.apache.org/jira/browse/KAFKA-5482 + if not self.transaction_manager._partitions_in_transaction: + self.retry_backoff_ms = min(self.transaction_manager.ADD_PARTITIONS_RETRY_BACKOFF_MS, self.retry_backoff_ms) + + +class FindCoordinatorHandler(TxnRequestHandler): + def __init__(self, transaction_manager, coord_type, coord_key): + super(FindCoordinatorHandler, self).__init__(transaction_manager) + + self._coord_type = coord_type + self._coord_key = coord_key + if transaction_manager._api_version >= (2, 0): + version = 2 + else: + version = 1 + if coord_type == 'group': + coord_type_int8 = 0 + elif coord_type == 'transaction': + coord_type_int8 = 1 + else: + raise ValueError("Unrecognized coordinator type: %s" % (coord_type,)) + self.request = FindCoordinatorRequest[version]( + coordinator_key=coord_key, + coordinator_type=coord_type_int8, + ) + + @property + def priority(self): + return Priority.FIND_COORDINATOR + + @property + def coordinator_type(self): + return None + + @property + def coordinator_key(self): + return None + + def handle_response(self, response): + error = Errors.for_code(response.error_code) + + if error is Errors.NoError: + coordinator_id = self.transaction_manager._metadata.add_coordinator( + response, self._coord_type, self._coord_key) + if self._coord_type == 'group': + self.transaction_manager._consumer_group_coordinator = coordinator_id + elif self._coord_type == 'transaction': + self.transaction_manager._transaction_coordinator = coordinator_id + self._result.done() + elif error is Errors.CoordinatorNotAvailableError: + self.reenqueue() + elif error is Errors.TransactionalIdAuthorizationFailedError: + self.fatal_error(error()) + elif error is Errors.GroupAuthorizationFailedError: + self.abortable_error(error(self._coord_key)) + else: + self.fatal_error(Errors.KafkaError( + "Could not find a coordinator with type %s with key %s due to" + " unexpected error: %s" % (self._coord_type, self._coord_key, error()))) + + +class EndTxnHandler(TxnRequestHandler): + def __init__(self, transaction_manager, committed): + super(EndTxnHandler, self).__init__(transaction_manager) + + if self.transaction_manager._api_version >= (2, 7): + version = 2 + elif self.transaction_manager._api_version >= (2, 0): + version = 1 + else: + version = 0 + self.request = EndTxnRequest[version]( + transactional_id=self.transactional_id, + producer_id=self.producer_id, + producer_epoch=self.producer_epoch, + committed=committed) + + @property + def priority(self): + return Priority.END_TXN + + def handle_response(self, response): + error = Errors.for_code(response.error_code) + + if error is Errors.NoError: + self.transaction_manager._complete_transaction() + self._result.done() + elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError): + self.transaction_manager._lookup_coordinator('transaction', self.transactional_id) + self.reenqueue() + elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError): + self.reenqueue() + elif error is Errors.InvalidProducerEpochError: + self.fatal_error(error()) + elif error is Errors.TransactionalIdAuthorizationFailedError: + self.fatal_error(error()) + elif error is Errors.InvalidTxnStateError: + self.fatal_error(error()) + else: + self.fatal_error(Errors.KafkaError("Unhandled error in EndTxnResponse: %s" % (error()))) + + +class AddOffsetsToTxnHandler(TxnRequestHandler): + def __init__(self, transaction_manager, consumer_group_id, offsets): + super(AddOffsetsToTxnHandler, self).__init__(transaction_manager) + + self.consumer_group_id = consumer_group_id + self.offsets = offsets + if self.transaction_manager._api_version >= (2, 7): + version = 2 + elif self.transaction_manager._api_version >= (2, 0): + version = 1 + else: + version = 0 + self.request = AddOffsetsToTxnRequest[version]( + transactional_id=self.transactional_id, + producer_id=self.producer_id, + producer_epoch=self.producer_epoch, + group_id=consumer_group_id) + + @property + def priority(self): + return Priority.ADD_PARTITIONS_OR_OFFSETS + + def handle_response(self, response): + error = Errors.for_code(response.error_code) + + if error is Errors.NoError: + log.debug("Successfully added partition for consumer group %s to transaction", self.consumer_group_id) + + # note the result is not completed until the TxnOffsetCommit returns + for tp, offset in six.iteritems(self.offsets): + self.transaction_manager._pending_txn_offset_commits[tp] = offset + handler = TxnOffsetCommitHandler(self.transaction_manager, self.consumer_group_id, + self.transaction_manager._pending_txn_offset_commits, self._result) + self.transaction_manager._enqueue_request(handler) + self.transaction_manager._transaction_started = True + elif error in (Errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError): + self.transaction_manager._lookup_coordinator('transaction', self.transactional_id) + self.reenqueue() + elif error in (Errors.CoordinatorLoadInProgressError, Errors.ConcurrentTransactionsError): + self.reenqueue() + elif error is Errors.InvalidProducerEpochError: + self.fatal_error(error()) + elif error is Errors.TransactionalIdAuthorizationFailedError: + self.fatal_error(error()) + elif error is Errors.GroupAuthorizationFailedError: + self.abortable_error(error(self.consumer_group_id)) + else: + self.fatal_error(Errors.KafkaError("Unexpected error in AddOffsetsToTxnResponse: %s" % (error()))) + + +class TxnOffsetCommitHandler(TxnRequestHandler): + def __init__(self, transaction_manager, consumer_group_id, offsets, result): + super(TxnOffsetCommitHandler, self).__init__(transaction_manager, result=result) + + self.consumer_group_id = consumer_group_id + self.offsets = offsets + self.request = self._build_request() + + def _build_request(self): + if self.transaction_manager._api_version >= (2, 1): + version = 2 + elif self.transaction_manager._api_version >= (2, 0): + version = 1 + else: + version = 0 + + topic_data = collections.defaultdict(list) + for tp, offset in six.iteritems(self.offsets): + if version >= 2: + partition_data = (tp.partition, offset.offset, offset.leader_epoch, offset.metadata) + else: + partition_data = (tp.partition, offset.offset, offset.metadata) + topic_data[tp.topic].append(partition_data) + + return TxnOffsetCommitRequest[version]( + transactional_id=self.transactional_id, + group_id=self.consumer_group_id, + producer_id=self.producer_id, + producer_epoch=self.producer_epoch, + topics=list(topic_data.items())) + + @property + def priority(self): + return Priority.ADD_PARTITIONS_OR_OFFSETS + + @property + def coordinator_type(self): + return 'group' + + @property + def coordinator_key(self): + return self.consumer_group_id + + def handle_response(self, response): + lookup_coordinator = False + retriable_failure = False + + errors = {TopicPartition(topic, partition): Errors.for_code(error_code) + for topic, partition_data in response.topics + for partition, error_code in partition_data} + + for tp, error in six.iteritems(errors): + if error is Errors.NoError: + log.debug("Successfully added offsets for %s from consumer group %s to transaction.", + tp, self.consumer_group_id) + del self.transaction_manager._pending_txn_offset_commits[tp] + elif error in (errors.CoordinatorNotAvailableError, Errors.NotCoordinatorError, Errors.RequestTimedOutError): + retriable_failure = True + lookup_coordinator = True + elif error is Errors.UnknownTopicOrPartitionError: + retriable_failure = True + elif error is Errors.GroupAuthorizationFailedError: + self.abortable_error(error(self.consumer_group_id)) + return + elif error in (Errors.TransactionalIdAuthorizationFailedError, + Errors.InvalidProducerEpochError, + Errors.UnsupportedForMessageFormatError): + self.fatal_error(error()) + return + else: + self.fatal_error(Errors.KafkaError("Unexpected error in TxnOffsetCommitResponse: %s" % (error()))) + return + + if lookup_coordinator: + self.transaction_manager._lookup_coordinator('group', self.consumer_group_id) + + if not retriable_failure: + # all attempted partitions were either successful, or there was a fatal failure. + # either way, we are not retrying, so complete the request. + self.result.done() + + # retry the commits which failed with a retriable error. + elif self.transaction_manager._pending_txn_offset_commits: + self.offsets = self.transaction_manager._pending_txn_offset_commits + self.request = self._build_request() + self.reenqueue() diff --git a/kafka/protocol/abstract.py b/kafka/protocol/abstract.py index 2de65c4bb..7ce5fc18f 100644 --- a/kafka/protocol/abstract.py +++ b/kafka/protocol/abstract.py @@ -2,10 +2,11 @@ import abc +from kafka.vendor.six import add_metaclass -class AbstractType(object): - __metaclass__ = abc.ABCMeta +@add_metaclass(abc.ABCMeta) +class AbstractType(object): @abc.abstractmethod def encode(cls, value): # pylint: disable=no-self-argument pass diff --git a/kafka/protocol/add_offsets_to_txn.py b/kafka/protocol/add_offsets_to_txn.py new file mode 100644 index 000000000..fa2509330 --- /dev/null +++ b/kafka/protocol/add_offsets_to_txn.py @@ -0,0 +1,59 @@ +from __future__ import absolute_import + +from kafka.protocol.api import Request, Response +from kafka.protocol.types import Int16, Int32, Int64, Schema, String + + +class AddOffsetsToTxnResponse_v0(Response): + API_KEY = 25 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ) + + +class AddOffsetsToTxnResponse_v1(Response): + API_KEY = 25 + API_VERSION = 1 + SCHEMA = AddOffsetsToTxnResponse_v0.SCHEMA + + +class AddOffsetsToTxnResponse_v2(Response): + API_KEY = 25 + API_VERSION = 2 + SCHEMA = AddOffsetsToTxnResponse_v1.SCHEMA + + +class AddOffsetsToTxnRequest_v0(Request): + API_KEY = 25 + API_VERSION = 0 + RESPONSE_TYPE = AddOffsetsToTxnResponse_v0 + SCHEMA = Schema( + ('transactional_id', String('utf-8')), + ('producer_id', Int64), + ('producer_epoch', Int16), + ('group_id', String('utf-8')), + ) + + +class AddOffsetsToTxnRequest_v1(Request): + API_KEY = 25 + API_VERSION = 1 + RESPONSE_TYPE = AddOffsetsToTxnResponse_v1 + SCHEMA = AddOffsetsToTxnRequest_v0.SCHEMA + + +class AddOffsetsToTxnRequest_v2(Request): + API_KEY = 25 + API_VERSION = 2 + RESPONSE_TYPE = AddOffsetsToTxnResponse_v2 + SCHEMA = AddOffsetsToTxnRequest_v1.SCHEMA + + +AddOffsetsToTxnRequest = [ + AddOffsetsToTxnRequest_v0, AddOffsetsToTxnRequest_v1, AddOffsetsToTxnRequest_v2, +] +AddOffsetsToTxnResponse = [ + AddOffsetsToTxnResponse_v0, AddOffsetsToTxnResponse_v1, AddOffsetsToTxnResponse_v2, +] diff --git a/kafka/protocol/add_partitions_to_txn.py b/kafka/protocol/add_partitions_to_txn.py new file mode 100644 index 000000000..fdf28f4ae --- /dev/null +++ b/kafka/protocol/add_partitions_to_txn.py @@ -0,0 +1,63 @@ +from __future__ import absolute_import + +from kafka.protocol.api import Request, Response +from kafka.protocol.types import Array, Int16, Int32, Int64, Schema, String + + +class AddPartitionsToTxnResponse_v0(Response): + API_KEY = 24 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('results', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16)))))) + + +class AddPartitionsToTxnResponse_v1(Response): + API_KEY = 24 + API_VERSION = 1 + SCHEMA = AddPartitionsToTxnResponse_v0.SCHEMA + + +class AddPartitionsToTxnResponse_v2(Response): + API_KEY = 24 + API_VERSION = 2 + SCHEMA = AddPartitionsToTxnResponse_v1.SCHEMA + + +class AddPartitionsToTxnRequest_v0(Request): + API_KEY = 24 + API_VERSION = 0 + RESPONSE_TYPE = AddPartitionsToTxnResponse_v0 + SCHEMA = Schema( + ('transactional_id', String('utf-8')), + ('producer_id', Int64), + ('producer_epoch', Int16), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array(Int32))))) + + +class AddPartitionsToTxnRequest_v1(Request): + API_KEY = 24 + API_VERSION = 1 + RESPONSE_TYPE = AddPartitionsToTxnResponse_v1 + SCHEMA = AddPartitionsToTxnRequest_v0.SCHEMA + + +class AddPartitionsToTxnRequest_v2(Request): + API_KEY = 24 + API_VERSION = 2 + RESPONSE_TYPE = AddPartitionsToTxnResponse_v2 + SCHEMA = AddPartitionsToTxnRequest_v1.SCHEMA + + +AddPartitionsToTxnRequest = [ + AddPartitionsToTxnRequest_v0, AddPartitionsToTxnRequest_v1, AddPartitionsToTxnRequest_v2, +] +AddPartitionsToTxnResponse = [ + AddPartitionsToTxnResponse_v0, AddPartitionsToTxnResponse_v1, AddPartitionsToTxnResponse_v2, +] diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index c237ef7e0..255166801 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -1,5 +1,12 @@ from __future__ import absolute_import +# enum in stdlib as of py3.4 +try: + from enum import IntEnum # pylint: disable=import-error +except ImportError: + # vendored backport module + from kafka.vendor.enum34 import IntEnum + from kafka.protocol.api import Request, Response from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Int64, Schema, String, Float64, CompactString, CompactArray, TaggedFields @@ -179,6 +186,38 @@ class DeleteTopicsRequest_v3(Request): ] +class DeleteRecordsResponse_v0(Response): + API_KEY = 21 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('topics', Array( + ('name', String('utf-8')), + ('partitions', Array( + ('partition_index', Int32), + ('low_watermark', Int64), + ('error_code', Int16))))), + ) + + +class DeleteRecordsRequest_v0(Request): + API_KEY = 21 + API_VERSION = 0 + RESPONSE_TYPE = DeleteRecordsResponse_v0 + SCHEMA = Schema( + ('topics', Array( + ('name', String('utf-8')), + ('partitions', Array( + ('partition_index', Int32), + ('offset', Int64))))), + ('timeout_ms', Int32) + ) + + +DeleteRecordsResponse = [DeleteRecordsResponse_v0] +DeleteRecordsRequest = [DeleteRecordsRequest_v0] + + class ListGroupsResponse_v0(Response): API_KEY = 16 API_VERSION = 0 @@ -346,41 +385,6 @@ class DescribeGroupsRequest_v3(Request): ] -class SaslHandShakeResponse_v0(Response): - API_KEY = 17 - API_VERSION = 0 - SCHEMA = Schema( - ('error_code', Int16), - ('enabled_mechanisms', Array(String('utf-8'))) - ) - - -class SaslHandShakeResponse_v1(Response): - API_KEY = 17 - API_VERSION = 1 - SCHEMA = SaslHandShakeResponse_v0.SCHEMA - - -class SaslHandShakeRequest_v0(Request): - API_KEY = 17 - API_VERSION = 0 - RESPONSE_TYPE = SaslHandShakeResponse_v0 - SCHEMA = Schema( - ('mechanism', String('utf-8')) - ) - - -class SaslHandShakeRequest_v1(Request): - API_KEY = 17 - API_VERSION = 1 - RESPONSE_TYPE = SaslHandShakeResponse_v1 - SCHEMA = SaslHandShakeRequest_v0.SCHEMA - - -SaslHandShakeRequest = [SaslHandShakeRequest_v0, SaslHandShakeRequest_v1] -SaslHandShakeResponse = [SaslHandShakeResponse_v0, SaslHandShakeResponse_v1] - - class DescribeAclsResponse_v0(Response): API_KEY = 29 API_VERSION = 0 @@ -733,7 +737,6 @@ class DescribeConfigsRequest_v2(Request): class DescribeLogDirsResponse_v0(Response): API_KEY = 35 API_VERSION = 0 - FLEXIBLE_VERSION = True SCHEMA = Schema( ('throttle_time_ms', Int32), ('log_dirs', Array( @@ -966,6 +969,7 @@ class AlterPartitionReassignmentsResponse_v0(Response): )), ("tags", TaggedFields) ) + FLEXIBLE_VERSION = True class AlterPartitionReassignmentsRequest_v0(Request): @@ -1013,6 +1017,7 @@ class ListPartitionReassignmentsResponse_v0(Response): )), ("tags", TaggedFields) ) + FLEXIBLE_VERSION = True class ListPartitionReassignmentsRequest_v0(Request): @@ -1034,3 +1039,77 @@ class ListPartitionReassignmentsRequest_v0(Request): ListPartitionReassignmentsRequest = [ListPartitionReassignmentsRequest_v0] ListPartitionReassignmentsResponse = [ListPartitionReassignmentsResponse_v0] + + +class ElectLeadersResponse_v0(Response): + API_KEY = 43 + API_VERSION = 1 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ('replication_election_results', Array( + ('topic', String('utf-8')), + ('partition_result', Array( + ('partition_id', Int32), + ('error_code', Int16), + ('error_message', String('utf-8')) + )) + )) + ) + + +class ElectLeadersRequest_v0(Request): + API_KEY = 43 + API_VERSION = 1 + RESPONSE_TYPE = ElectLeadersResponse_v0 + SCHEMA = Schema( + ('election_type', Int8), + ('topic_partitions', Array( + ('topic', String('utf-8')), + ('partition_ids', Array(Int32)) + )), + ('timeout', Int32), + ) + + +class ElectLeadersResponse_v1(Response): + API_KEY = 43 + API_VERSION = 1 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ('replication_election_results', Array( + ('topic', String('utf-8')), + ('partition_result', Array( + ('partition_id', Int32), + ('error_code', Int16), + ('error_message', String('utf-8')) + )) + )) + ) + + +class ElectLeadersRequest_v1(Request): + API_KEY = 43 + API_VERSION = 1 + RESPONSE_TYPE = ElectLeadersResponse_v1 + SCHEMA = Schema( + ('election_type', Int8), + ('topic_partitions', Array( + ('topic', String('utf-8')), + ('partition_ids', Array(Int32)) + )), + ('timeout', Int32), + ) + + +class ElectionType(IntEnum): + """ Leader election type + """ + + PREFERRED = 0, + UNCLEAN = 1 + + +ElectLeadersRequest = [ElectLeadersRequest_v0, ElectLeadersRequest_v1] +ElectLeadersResponse = [ElectLeadersResponse_v0, ElectLeadersResponse_v1] diff --git a/kafka/protocol/api.py b/kafka/protocol/api.py index f12cb972b..9cd5767c1 100644 --- a/kafka/protocol/api.py +++ b/kafka/protocol/api.py @@ -5,6 +5,8 @@ from kafka.protocol.struct import Struct from kafka.protocol.types import Int16, Int32, String, Schema, Array, TaggedFields +from kafka.vendor.six import add_metaclass + class RequestHeader(Struct): SCHEMA = Schema( @@ -49,9 +51,8 @@ class ResponseHeaderV2(Struct): ) +@add_metaclass(abc.ABCMeta) class Request(Struct): - __metaclass__ = abc.ABCMeta - FLEXIBLE_VERSION = False @abc.abstractproperty @@ -81,19 +82,15 @@ def expect_response(self): def to_object(self): return _to_object(self.SCHEMA, self) - def build_request_header(self, correlation_id, client_id): + def build_header(self, correlation_id, client_id): if self.FLEXIBLE_VERSION: return RequestHeaderV2(self, correlation_id=correlation_id, client_id=client_id) return RequestHeader(self, correlation_id=correlation_id, client_id=client_id) - def parse_response_header(self, read_buffer): - if self.FLEXIBLE_VERSION: - return ResponseHeaderV2.decode(read_buffer) - return ResponseHeader.decode(read_buffer) - +@add_metaclass(abc.ABCMeta) class Response(Struct): - __metaclass__ = abc.ABCMeta + FLEXIBLE_VERSION = False @abc.abstractproperty def API_KEY(self): @@ -113,6 +110,12 @@ def SCHEMA(self): def to_object(self): return _to_object(self.SCHEMA, self) + @classmethod + def parse_header(cls, read_buffer): + if cls.FLEXIBLE_VERSION: + return ResponseHeaderV2.decode(read_buffer) + return ResponseHeader.decode(read_buffer) + def _to_object(schema, data): obj = {} diff --git a/kafka/protocol/api_versions.py b/kafka/protocol/api_versions.py index 9a782928b..e7cedd954 100644 --- a/kafka/protocol/api_versions.py +++ b/kafka/protocol/api_versions.py @@ -1,7 +1,9 @@ from __future__ import absolute_import +from io import BytesIO + from kafka.protocol.api import Request, Response -from kafka.protocol.types import Array, Int16, Int32, Schema +from kafka.protocol.types import Array, CompactArray, CompactString, Int16, Int32, Schema, TaggedFields class BaseApiVersionsResponse(Response): @@ -59,6 +61,28 @@ class ApiVersionsResponse_v2(BaseApiVersionsResponse): SCHEMA = ApiVersionsResponse_v1.SCHEMA +class ApiVersionsResponse_v3(BaseApiVersionsResponse): + API_KEY = 18 + API_VERSION = 3 + SCHEMA = Schema( + ('error_code', Int16), + ('api_versions', CompactArray( + ('api_key', Int16), + ('min_version', Int16), + ('max_version', Int16), + ('_tagged_fields', TaggedFields))), + ('throttle_time_ms', Int32), + ('_tagged_fields', TaggedFields) + ) + # Note: ApiVersions Response does not send FLEXIBLE_VERSION header! + + +class ApiVersionsResponse_v4(BaseApiVersionsResponse): + API_KEY = 18 + API_VERSION = 4 + SCHEMA = ApiVersionsResponse_v3.SCHEMA + + class ApiVersionsRequest_v0(Request): API_KEY = 18 API_VERSION = 0 @@ -76,13 +100,35 @@ class ApiVersionsRequest_v1(Request): class ApiVersionsRequest_v2(Request): API_KEY = 18 API_VERSION = 2 - RESPONSE_TYPE = ApiVersionsResponse_v1 - SCHEMA = ApiVersionsRequest_v0.SCHEMA + RESPONSE_TYPE = ApiVersionsResponse_v2 + SCHEMA = ApiVersionsRequest_v1.SCHEMA + + +class ApiVersionsRequest_v3(Request): + API_KEY = 18 + API_VERSION = 3 + RESPONSE_TYPE = ApiVersionsResponse_v3 + SCHEMA = Schema( + ('client_software_name', CompactString('utf-8')), + ('client_software_version', CompactString('utf-8')), + ('_tagged_fields', TaggedFields) + ) + FLEXIBLE_VERSION = True + + +class ApiVersionsRequest_v4(Request): + API_KEY = 18 + API_VERSION = 4 + RESPONSE_TYPE = ApiVersionsResponse_v4 + SCHEMA = ApiVersionsRequest_v3.SCHEMA + FLEXIBLE_VERSION = True ApiVersionsRequest = [ ApiVersionsRequest_v0, ApiVersionsRequest_v1, ApiVersionsRequest_v2, + ApiVersionsRequest_v3, ApiVersionsRequest_v4, ] ApiVersionsResponse = [ ApiVersionsResponse_v0, ApiVersionsResponse_v1, ApiVersionsResponse_v2, + ApiVersionsResponse_v3, ApiVersionsResponse_v4, ] diff --git a/kafka/protocol/broker_api_versions.py b/kafka/protocol/broker_api_versions.py index db7567180..af142d07c 100644 --- a/kafka/protocol/broker_api_versions.py +++ b/kafka/protocol/broker_api_versions.py @@ -23,6 +23,8 @@ # Adds Sasl Authenticate, and additional admin apis (describe/alter log dirs, etc) (1, 0): {0: (0, 5), 1: (0, 6), 2: (0, 2), 3: (0, 5), 4: (0, 1), 5: (0, 0), 6: (0, 4), 7: (0, 1), 8: (0, 3), 9: (0, 3), 10: (0, 1), 11: (0, 2), 12: (0, 1), 13: (0, 1), 14: (0, 1), 15: (0, 1), 16: (0, 1), 17: (0, 1), 18: (0, 1), 19: (0, 2), 20: (0, 1), 21: (0, 0), 22: (0, 0), 23: (0, 0), 24: (0, 0), 25: (0, 0), 26: (0, 0), 27: (0, 0), 28: (0, 0), 29: (0, 0), 30: (0, 0), 31: (0, 0), 32: (0, 0), 33: (0, 0), 34: (0, 0), 35: (0, 0), 36: (0, 0), 37: (0, 0)}, + (1, 1): {0: (0, 5), 1: (0, 7), 2: (0, 2), 3: (0, 5), 4: (0, 1), 5: (0, 0), 6: (0, 4), 7: (0, 1), 8: (0, 3), 9: (0, 3), 10: (0, 1), 11: (0, 2), 12: (0, 1), 13: (0, 1), 14: (0, 1), 15: (0, 1), 16: (0, 1), 17: (0, 1), 18: (0, 1), 19: (0, 2), 20: (0, 1), 21: (0, 0), 22: (0, 0), 23: (0, 0), 24: (0, 0), 25: (0, 0), 26: (0, 0), 27: (0, 0), 28: (0, 0), 29: (0, 0), 30: (0, 0), 31: (0, 0), 32: (0, 1), 33: (0, 0), 34: (0, 0), 35: (0, 0), 36: (0, 0), 37: (0, 0), 38: (0, 0), 39: (0, 0), 40: (0, 0), 41: (0, 0), 42: (0, 0)}, + (2, 0): {0: (0, 6), 1: (0, 8), 2: (0, 3), 3: (0, 6), 4: (0, 1), 5: (0, 0), 6: (0, 4), 7: (0, 1), 8: (0, 4), 9: (0, 4), 10: (0, 2), 11: (0, 3), 12: (0, 2), 13: (0, 2), 14: (0, 2), 15: (0, 2), 16: (0, 2), 17: (0, 1), 18: (0, 2), 19: (0, 3), 20: (0, 2), 21: (0, 1), 22: (0, 1), 23: (0, 1), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 1), 29: (0, 1), 30: (0, 1), 31: (0, 1), 32: (0, 2), 33: (0, 1), 34: (0, 1), 35: (0, 1), 36: (0, 0), 37: (0, 1), 38: (0, 1), 39: (0, 1), 40: (0, 1), 41: (0, 1), 42: (0, 1)}, (2, 1): {0: (0, 7), 1: (0, 10), 2: (0, 4), 3: (0, 7), 4: (0, 1), 5: (0, 0), 6: (0, 4), 7: (0, 1), 8: (0, 6), 9: (0, 5), 10: (0, 2), 11: (0, 3), 12: (0, 2), 13: (0, 2), 14: (0, 2), 15: (0, 2), 16: (0, 2), 17: (0, 1), 18: (0, 2), 19: (0, 3), 20: (0, 3), 21: (0, 1), 22: (0, 1), 23: (0, 2), 24: (0, 1), 25: (0, 1), 26: (0, 1), 27: (0, 0), 28: (0, 2), 29: (0, 1), 30: (0, 1), 31: (0, 1), 32: (0, 2), 33: (0, 1), 34: (0, 1), 35: (0, 1), 36: (0, 0), 37: (0, 1), 38: (0, 1), 39: (0, 1), 40: (0, 1), 41: (0, 1), 42: (0, 1)}, @@ -61,4 +63,6 @@ (3, 9): {0: (0, 11), 1: (0, 17), 2: (0, 9), 3: (0, 12), 4: (0, 7), 5: (0, 4), 6: (0, 8), 7: (0, 3), 8: (0, 9), 9: (0, 9), 10: (0, 6), 11: (0, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 5), 16: (0, 5), 17: (0, 1), 18: (0, 4), 19: (0, 7), 20: (0, 6), 21: (0, 2), 22: (0, 5), 23: (0, 4), 24: (0, 5), 25: (0, 4), 26: (0, 4), 27: (0, 1), 28: (0, 4), 29: (0, 3), 30: (0, 3), 31: (0, 3), 32: (0, 4), 33: (0, 2), 34: (0, 2), 35: (0, 4), 36: (0, 2), 37: (0, 3), 38: (0, 3), 39: (0, 2), 40: (0, 2), 41: (0, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 56: (0, 3), 57: (0, 1), 58: (0, 0), 60: (0, 1), 61: (0, 0), 65: (0, 0), 66: (0, 1), 67: (0, 0), 68: (0, 0), 69: (0, 0)}, + (4, 0): {0: (0, 12), 1: (4, 17), 2: (1, 10), 3: (0, 13), 8: (2, 9), 9: (1, 9), 10: (0, 6), 11: (2, 9), 12: (0, 4), 13: (0, 5), 14: (0, 5), 15: (0, 6), 16: (0, 5), 17: (0, 1), 18: (0, 4), 19: (2, 7), 20: (1, 6), 21: (0, 2), 22: (0, 5), 23: (2, 4), 24: (0, 5), 25: (0, 4), 26: (0, 5), 27: (1, 1), 28: (0, 5), 29: (1, 3), 30: (1, 3), 31: (1, 3), 32: (1, 4), 33: (0, 2), 34: (1, 2), 35: (1, 4), 36: (0, 2), 37: (0, 3), 38: (1, 3), 39: (1, 2), 40: (1, 2), 41: (1, 3), 42: (0, 2), 43: (0, 2), 44: (0, 1), 45: (0, 0), 46: (0, 0), 47: (0, 0), 48: (0, 1), 49: (0, 1), 50: (0, 0), 51: (0, 0), 55: (0, 2), 57: (0, 2), 60: (0, 2), 61: (0, 0), 64: (0, 0), 65: (0, 0), 66: (0, 1), 68: (0, 1), 69: (0, 1), 74: (0, 0), 75: (0, 0), 80: (0, 0), 81: (0, 0)}, + } diff --git a/kafka/protocol/commit.py b/kafka/protocol/commit.py index 53c2466fe..a0439e7ef 100644 --- a/kafka/protocol/commit.py +++ b/kafka/protocol/commit.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from kafka.protocol.api import Request, Response -from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String +from kafka.protocol.types import Array, Int16, Int32, Int64, Schema, String class OffsetCommitResponse_v0(Response): diff --git a/kafka/protocol/end_txn.py b/kafka/protocol/end_txn.py new file mode 100644 index 000000000..96d6cc514 --- /dev/null +++ b/kafka/protocol/end_txn.py @@ -0,0 +1,58 @@ +from __future__ import absolute_import + +from kafka.protocol.api import Request, Response +from kafka.protocol.types import Boolean, Int16, Int32, Int64, Schema, String + + +class EndTxnResponse_v0(Response): + API_KEY = 26 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ) + + +class EndTxnResponse_v1(Response): + API_KEY = 26 + API_VERSION = 1 + SCHEMA = EndTxnResponse_v0.SCHEMA + + +class EndTxnResponse_v2(Response): + API_KEY = 26 + API_VERSION = 2 + SCHEMA = EndTxnResponse_v1.SCHEMA + + +class EndTxnRequest_v0(Request): + API_KEY = 26 + API_VERSION = 0 + RESPONSE_TYPE = EndTxnResponse_v0 + SCHEMA = Schema( + ('transactional_id', String('utf-8')), + ('producer_id', Int64), + ('producer_epoch', Int16), + ('committed', Boolean)) + + +class EndTxnRequest_v1(Request): + API_KEY = 26 + API_VERSION = 1 + RESPONSE_TYPE = EndTxnResponse_v1 + SCHEMA = EndTxnRequest_v0.SCHEMA + + +class EndTxnRequest_v2(Request): + API_KEY = 26 + API_VERSION = 2 + RESPONSE_TYPE = EndTxnResponse_v2 + SCHEMA = EndTxnRequest_v1.SCHEMA + + +EndTxnRequest = [ + EndTxnRequest_v0, EndTxnRequest_v1, EndTxnRequest_v2, +] +EndTxnResponse = [ + EndTxnResponse_v0, EndTxnResponse_v1, EndTxnResponse_v2, +] diff --git a/kafka/protocol/fetch.py b/kafka/protocol/fetch.py index d193eafcf..036a37eb8 100644 --- a/kafka/protocol/fetch.py +++ b/kafka/protocol/fetch.py @@ -1,9 +1,15 @@ from __future__ import absolute_import +import collections + from kafka.protocol.api import Request, Response from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String, Bytes +AbortedTransaction = collections.namedtuple("AbortedTransaction", + ["producer_id", "first_offset"]) + + class FetchResponse_v0(Response): API_KEY = 1 API_VERSION = 0 diff --git a/kafka/protocol/find_coordinator.py b/kafka/protocol/find_coordinator.py index a68a23902..be5b45ded 100644 --- a/kafka/protocol/find_coordinator.py +++ b/kafka/protocol/find_coordinator.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from kafka.protocol.api import Request, Response -from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String +from kafka.protocol.types import Int8, Int16, Int32, Schema, String class FindCoordinatorResponse_v0(Response): diff --git a/kafka/protocol/group.py b/kafka/protocol/group.py index 3b32590ec..74e19c94b 100644 --- a/kafka/protocol/group.py +++ b/kafka/protocol/group.py @@ -52,6 +52,12 @@ class JoinGroupResponse_v3(Response): SCHEMA = JoinGroupResponse_v2.SCHEMA +class JoinGroupResponse_v4(Response): + API_KEY = 11 + API_VERSION = 4 + SCHEMA = JoinGroupResponse_v3.SCHEMA + + class JoinGroupRequest_v0(Request): API_KEY = 11 API_VERSION = 0 @@ -95,14 +101,22 @@ class JoinGroupRequest_v3(Request): API_VERSION = 3 RESPONSE_TYPE = JoinGroupResponse_v3 SCHEMA = JoinGroupRequest_v2.SCHEMA - UNKNOWN_MEMBER_ID = '' + + +class JoinGroupRequest_v4(Request): + API_KEY = 11 + API_VERSION = 4 + RESPONSE_TYPE = JoinGroupResponse_v4 + SCHEMA = JoinGroupRequest_v3.SCHEMA JoinGroupRequest = [ - JoinGroupRequest_v0, JoinGroupRequest_v1, JoinGroupRequest_v2, JoinGroupRequest_v3 + JoinGroupRequest_v0, JoinGroupRequest_v1, JoinGroupRequest_v2, + JoinGroupRequest_v3, JoinGroupRequest_v4, ] JoinGroupResponse = [ - JoinGroupResponse_v0, JoinGroupResponse_v1, JoinGroupResponse_v2, JoinGroupResponse_v3 + JoinGroupResponse_v0, JoinGroupResponse_v1, JoinGroupResponse_v2, + JoinGroupResponse_v3, JoinGroupResponse_v4, ] diff --git a/kafka/protocol/init_producer_id.py b/kafka/protocol/init_producer_id.py new file mode 100644 index 000000000..8426fe00b --- /dev/null +++ b/kafka/protocol/init_producer_id.py @@ -0,0 +1,46 @@ +from __future__ import absolute_import + +from kafka.protocol.api import Request, Response +from kafka.protocol.types import Int16, Int32, Int64, Schema, String + + +class InitProducerIdResponse_v0(Response): + API_KEY = 22 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ('producer_id', Int64), + ('producer_epoch', Int16), + ) + + +class InitProducerIdResponse_v1(Response): + API_KEY = 22 + API_VERSION = 1 + SCHEMA = InitProducerIdResponse_v0.SCHEMA + + +class InitProducerIdRequest_v0(Request): + API_KEY = 22 + API_VERSION = 0 + RESPONSE_TYPE = InitProducerIdResponse_v0 + SCHEMA = Schema( + ('transactional_id', String('utf-8')), + ('transaction_timeout_ms', Int32), + ) + + +class InitProducerIdRequest_v1(Request): + API_KEY = 22 + API_VERSION = 1 + RESPONSE_TYPE = InitProducerIdResponse_v1 + SCHEMA = InitProducerIdRequest_v0.SCHEMA + + +InitProducerIdRequest = [ + InitProducerIdRequest_v0, InitProducerIdRequest_v1, +] +InitProducerIdResponse = [ + InitProducerIdResponse_v0, InitProducerIdResponse_v1, +] diff --git a/kafka/protocol/metadata.py b/kafka/protocol/metadata.py index 3291be82d..bb22ba997 100644 --- a/kafka/protocol/metadata.py +++ b/kafka/protocol/metadata.py @@ -172,6 +172,7 @@ class MetadataRequest_v0(Request): ('topics', Array(String('utf-8'))) ) ALL_TOPICS = [] # Empty Array (len 0) for topics returns all topics + NO_TOPICS = [] # v0 does not support a 'no topics' request, so we'll just ask for ALL class MetadataRequest_v1(Request): diff --git a/kafka/protocol/offset_for_leader_epoch.py b/kafka/protocol/offset_for_leader_epoch.py index afe8284eb..8465588a3 100644 --- a/kafka/protocol/offset_for_leader_epoch.py +++ b/kafka/protocol/offset_for_leader_epoch.py @@ -4,7 +4,7 @@ from kafka.protocol.types import Array, CompactArray, CompactString, Int16, Int32, Int64, Schema, String, TaggedFields -class OffsetForLeaderEpochResponse_v0(Request): +class OffsetForLeaderEpochResponse_v0(Response): API_KEY = 23 API_VERSION = 0 SCHEMA = Schema( @@ -16,7 +16,7 @@ class OffsetForLeaderEpochResponse_v0(Request): ('end_offset', Int64)))))) -class OffsetForLeaderEpochResponse_v1(Request): +class OffsetForLeaderEpochResponse_v1(Response): API_KEY = 23 API_VERSION = 1 SCHEMA = Schema( @@ -29,7 +29,7 @@ class OffsetForLeaderEpochResponse_v1(Request): ('end_offset', Int64)))))) -class OffsetForLeaderEpochResponse_v2(Request): +class OffsetForLeaderEpochResponse_v2(Response): API_KEY = 23 API_VERSION = 2 SCHEMA = Schema( @@ -43,13 +43,13 @@ class OffsetForLeaderEpochResponse_v2(Request): ('end_offset', Int64)))))) -class OffsetForLeaderEpochResponse_v3(Request): +class OffsetForLeaderEpochResponse_v3(Response): API_KEY = 23 API_VERSION = 3 SCHEMA = OffsetForLeaderEpochResponse_v2.SCHEMA -class OffsetForLeaderEpochResponse_v4(Request): +class OffsetForLeaderEpochResponse_v4(Response): API_KEY = 23 API_VERSION = 4 SCHEMA = Schema( diff --git a/kafka/protocol/parser.py b/kafka/protocol/parser.py index e7799fce6..4bc427330 100644 --- a/kafka/protocol/parser.py +++ b/kafka/protocol/parser.py @@ -59,7 +59,7 @@ def send_request(self, request, correlation_id=None): if correlation_id is None: correlation_id = self._next_correlation_id() - header = request.build_request_header(correlation_id=correlation_id, client_id=self._client_id) + header = request.build_header(correlation_id=correlation_id, client_id=self._client_id) message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) data = size + message @@ -136,13 +136,14 @@ def _process_response(self, read_buffer): if not self.in_flight_requests: raise Errors.CorrelationIdError('No in-flight-request found for server response') (correlation_id, request) = self.in_flight_requests.popleft() - response_header = request.parse_response_header(read_buffer) + response_type = request.RESPONSE_TYPE + response_header = response_type.parse_header(read_buffer) recv_correlation_id = response_header.correlation_id log.debug('Received correlation id: %d', recv_correlation_id) # 0.8.2 quirk if (recv_correlation_id == 0 and correlation_id != 0 and - request.RESPONSE_TYPE is FindCoordinatorResponse[0] and + response_type is FindCoordinatorResponse[0] and (self._api_version == (0, 8, 2) or self._api_version is None)): log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse' ' Correlation ID does not match request. This' @@ -156,15 +157,15 @@ def _process_response(self, read_buffer): % (correlation_id, recv_correlation_id)) # decode response - log.debug('Processing response %s', request.RESPONSE_TYPE.__name__) + log.debug('Processing response %s', response_type.__name__) try: - response = request.RESPONSE_TYPE.decode(read_buffer) + response = response_type.decode(read_buffer) except ValueError: read_buffer.seek(0) buf = read_buffer.read() log.error('Response %d [ResponseType: %s Request: %s]:' ' Unable to decode %d-byte buffer: %r', - correlation_id, request.RESPONSE_TYPE, + correlation_id, response_type, request, len(buf), buf) raise Errors.KafkaProtocolError('Unable to decode response') diff --git a/kafka/protocol/sasl_authenticate.py b/kafka/protocol/sasl_authenticate.py new file mode 100644 index 000000000..a2b9b1988 --- /dev/null +++ b/kafka/protocol/sasl_authenticate.py @@ -0,0 +1,42 @@ +from __future__ import absolute_import + +from kafka.protocol.api import Request, Response +from kafka.protocol.types import Bytes, Int16, Int64, Schema, String + + +class SaslAuthenticateResponse_v0(Response): + API_KEY = 36 + API_VERSION = 0 + SCHEMA = Schema( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('auth_bytes', Bytes)) + + +class SaslAuthenticateResponse_v1(Response): + API_KEY = 36 + API_VERSION = 1 + SCHEMA = Schema( + ('error_code', Int16), + ('error_message', String('utf-8')), + ('auth_bytes', Bytes), + ('session_lifetime_ms', Int64)) + + +class SaslAuthenticateRequest_v0(Request): + API_KEY = 36 + API_VERSION = 0 + RESPONSE_TYPE = SaslAuthenticateResponse_v0 + SCHEMA = Schema( + ('auth_bytes', Bytes)) + + +class SaslAuthenticateRequest_v1(Request): + API_KEY = 36 + API_VERSION = 1 + RESPONSE_TYPE = SaslAuthenticateResponse_v1 + SCHEMA = SaslAuthenticateRequest_v0.SCHEMA + + +SaslAuthenticateRequest = [SaslAuthenticateRequest_v0, SaslAuthenticateRequest_v1] +SaslAuthenticateResponse = [SaslAuthenticateResponse_v0, SaslAuthenticateResponse_v1] diff --git a/kafka/protocol/sasl_handshake.py b/kafka/protocol/sasl_handshake.py new file mode 100644 index 000000000..e91c856ca --- /dev/null +++ b/kafka/protocol/sasl_handshake.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import + +from kafka.protocol.api import Request, Response +from kafka.protocol.types import Array, Int16, Schema, String + + +class SaslHandshakeResponse_v0(Response): + API_KEY = 17 + API_VERSION = 0 + SCHEMA = Schema( + ('error_code', Int16), + ('enabled_mechanisms', Array(String('utf-8'))) + ) + + +class SaslHandshakeResponse_v1(Response): + API_KEY = 17 + API_VERSION = 1 + SCHEMA = SaslHandshakeResponse_v0.SCHEMA + + +class SaslHandshakeRequest_v0(Request): + API_KEY = 17 + API_VERSION = 0 + RESPONSE_TYPE = SaslHandshakeResponse_v0 + SCHEMA = Schema( + ('mechanism', String('utf-8')) + ) + + +class SaslHandshakeRequest_v1(Request): + API_KEY = 17 + API_VERSION = 1 + RESPONSE_TYPE = SaslHandshakeResponse_v1 + SCHEMA = SaslHandshakeRequest_v0.SCHEMA + + +SaslHandshakeRequest = [SaslHandshakeRequest_v0, SaslHandshakeRequest_v1] +SaslHandshakeResponse = [SaslHandshakeResponse_v0, SaslHandshakeResponse_v1] diff --git a/kafka/protocol/txn_offset_commit.py b/kafka/protocol/txn_offset_commit.py new file mode 100644 index 000000000..df1b1bd1e --- /dev/null +++ b/kafka/protocol/txn_offset_commit.py @@ -0,0 +1,78 @@ +from __future__ import absolute_import + +from kafka.protocol.api import Request, Response +from kafka.protocol.types import Array, Int16, Int32, Int64, Schema, String + + +class TxnOffsetCommitResponse_v0(Response): + API_KEY = 28 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('error_code', Int16)))))) + + +class TxnOffsetCommitResponse_v1(Response): + API_KEY = 28 + API_VERSION = 1 + SCHEMA = TxnOffsetCommitResponse_v0.SCHEMA + + +class TxnOffsetCommitResponse_v2(Response): + API_KEY = 28 + API_VERSION = 2 + SCHEMA = TxnOffsetCommitResponse_v1.SCHEMA + + +class TxnOffsetCommitRequest_v0(Request): + API_KEY = 28 + API_VERSION = 0 + RESPONSE_TYPE = TxnOffsetCommitResponse_v0 + SCHEMA = Schema( + ('transactional_id', String('utf-8')), + ('group_id', String('utf-8')), + ('producer_id', Int64), + ('producer_epoch', Int16), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('offset', Int64), + ('metadata', String('utf-8'))))))) + + +class TxnOffsetCommitRequest_v1(Request): + API_KEY = 28 + API_VERSION = 1 + RESPONSE_TYPE = TxnOffsetCommitResponse_v1 + SCHEMA = TxnOffsetCommitRequest_v0.SCHEMA + + +class TxnOffsetCommitRequest_v2(Request): + API_KEY = 28 + API_VERSION = 2 + RESPONSE_TYPE = TxnOffsetCommitResponse_v2 + SCHEMA = Schema( + ('transactional_id', String('utf-8')), + ('group_id', String('utf-8')), + ('producer_id', Int64), + ('producer_epoch', Int16), + ('topics', Array( + ('topic', String('utf-8')), + ('partitions', Array( + ('partition', Int32), + ('offset', Int64), + ('leader_epoch', Int32), + ('metadata', String('utf-8'))))))) + + +TxnOffsetCommitRequest = [ + TxnOffsetCommitRequest_v0, TxnOffsetCommitRequest_v1, TxnOffsetCommitRequest_v2, +] +TxnOffsetCommitResponse = [ + TxnOffsetCommitResponse_v0, TxnOffsetCommitResponse_v1, TxnOffsetCommitResponse_v2, +] diff --git a/kafka/record/abc.py b/kafka/record/abc.py index 8509e23e5..c78f0da69 100644 --- a/kafka/record/abc.py +++ b/kafka/record/abc.py @@ -1,11 +1,19 @@ from __future__ import absolute_import + import abc +from kafka.vendor.six import add_metaclass + +@add_metaclass(abc.ABCMeta) class ABCRecord(object): - __metaclass__ = abc.ABCMeta __slots__ = () + @abc.abstractproperty + def size_in_bytes(self): + """ Number of total bytes in record + """ + @abc.abstractproperty def offset(self): """ Absolute offset of record @@ -37,6 +45,11 @@ def checksum(self): be the checksum for v0 and v1 and None for v2 and above. """ + @abc.abstractmethod + def validate_crc(self): + """ Return True if v0/v1 record matches checksum. noop/True for v2 records + """ + @abc.abstractproperty def headers(self): """ If supported by version list of key-value tuples, or empty list if @@ -44,8 +57,8 @@ def headers(self): """ +@add_metaclass(abc.ABCMeta) class ABCRecordBatchBuilder(object): - __metaclass__ = abc.ABCMeta __slots__ = () @abc.abstractmethod @@ -84,11 +97,11 @@ def build(self): """ +@add_metaclass(abc.ABCMeta) class ABCRecordBatch(object): """ For v2 encapsulates a RecordBatch, for v0/v1 a single (maybe compressed) message. """ - __metaclass__ = abc.ABCMeta __slots__ = () @abc.abstractmethod @@ -97,9 +110,24 @@ def __iter__(self): if needed. """ + @abc.abstractproperty + def base_offset(self): + """ Return base offset for batch + """ + + @abc.abstractproperty + def size_in_bytes(self): + """ Return size of batch in bytes (includes header overhead) + """ + + @abc.abstractproperty + def magic(self): + """ Return magic value (0, 1, 2) for batch. + """ + +@add_metaclass(abc.ABCMeta) class ABCRecords(object): - __metaclass__ = abc.ABCMeta __slots__ = () @abc.abstractmethod diff --git a/kafka/record/default_records.py b/kafka/record/default_records.py index 14732cb06..a3b9cd5d8 100644 --- a/kafka/record/default_records.py +++ b/kafka/record/default_records.py @@ -60,7 +60,7 @@ from kafka.record.util import ( decode_varint, encode_varint, calc_crc32c, size_of_varint ) -from kafka.errors import CorruptRecordException, UnsupportedCodecError +from kafka.errors import CorruptRecordError, UnsupportedCodecError from kafka.codec import ( gzip_encode, snappy_encode, lz4_encode, zstd_encode, gzip_decode, snappy_decode, lz4_decode, zstd_decode @@ -104,6 +104,9 @@ class DefaultRecordBase(object): LOG_APPEND_TIME = 1 CREATE_TIME = 0 + NO_PRODUCER_ID = -1 + NO_SEQUENCE = -1 + MAX_INT = 2147483647 def _assert_has_codec(self, compression_type): if compression_type == self.CODEC_GZIP: @@ -114,6 +117,8 @@ def _assert_has_codec(self, compression_type): checker, name = codecs.has_lz4, "lz4" elif compression_type == self.CODEC_ZSTD: checker, name = codecs.has_zstd, "zstd" + else: + raise UnsupportedCodecError("Unrecognized compression type: %s" % (compression_type,)) if not checker(): raise UnsupportedCodecError( "Libraries for {} compression codec not found".format(name)) @@ -136,6 +141,10 @@ def __init__(self, buffer): def base_offset(self): return self._header_data[0] + @property + def size_in_bytes(self): + return self._header_data[1] + self.AFTER_LEN_OFFSET + @property def leader_epoch(self): return self._header_data[2] @@ -156,6 +165,14 @@ def attributes(self): def last_offset_delta(self): return self._header_data[6] + @property + def last_offset(self): + return self.base_offset + self.last_offset_delta + + @property + def next_offset(self): + return self.last_offset + 1 + @property def compression_type(self): return self.attributes & self.CODEC_MASK @@ -180,6 +197,40 @@ def first_timestamp(self): def max_timestamp(self): return self._header_data[8] + @property + def producer_id(self): + return self._header_data[9] + + def has_producer_id(self): + return self.producer_id > self.NO_PRODUCER_ID + + @property + def producer_epoch(self): + return self._header_data[10] + + @property + def base_sequence(self): + return self._header_data[11] + + @property + def has_sequence(self): + return self._header_data[11] != -1 # NO_SEQUENCE + + @property + def last_sequence(self): + if self.base_sequence == self.NO_SEQUENCE: + return self.NO_SEQUENCE + return self._increment_sequence(self.base_sequence, self.last_offset_delta) + + def _increment_sequence(self, base, increment): + if base > (self.MAX_INT - increment): + return increment - (self.MAX_INT - base) - 1 + return base + increment + + @property + def records_count(self): + return self._header_data[12] + def _maybe_uncompress(self): if not self._decompressed: compression_type = self.compression_type @@ -243,14 +294,14 @@ def _read_msg( header_count, pos = decode_varint(buffer, pos) if header_count < 0: - raise CorruptRecordException("Found invalid number of record " + raise CorruptRecordError("Found invalid number of record " "headers {}".format(header_count)) headers = [] while header_count: # Header key is of type String, that can't be None h_key_len, pos = decode_varint(buffer, pos) if h_key_len < 0: - raise CorruptRecordException( + raise CorruptRecordError( "Invalid negative header key size {}".format(h_key_len)) h_key = buffer[pos: pos + h_key_len].decode("utf-8") pos += h_key_len @@ -268,17 +319,17 @@ def _read_msg( # validate whether we have read all header bytes in the current record if pos - start_pos != length: - raise CorruptRecordException( + raise CorruptRecordError( "Invalid record size: expected to read {} bytes in record " "payload, but instead read {}".format(length, pos - start_pos)) self._pos = pos if self.is_control_batch: return ControlRecord( - offset, timestamp, self.timestamp_type, key, value, headers) + length, offset, timestamp, self.timestamp_type, key, value, headers) else: return DefaultRecord( - offset, timestamp, self.timestamp_type, key, value, headers) + length, offset, timestamp, self.timestamp_type, key, value, headers) def __iter__(self): self._maybe_uncompress() @@ -287,14 +338,14 @@ def __iter__(self): def __next__(self): if self._next_record_index >= self._num_records: if self._pos != len(self._buffer): - raise CorruptRecordException( + raise CorruptRecordError( "{} unconsumed bytes after all records consumed".format( len(self._buffer) - self._pos)) raise StopIteration try: msg = self._read_msg() except (ValueError, IndexError) as err: - raise CorruptRecordException( + raise CorruptRecordError( "Found invalid record structure: {!r}".format(err)) else: self._next_record_index += 1 @@ -311,13 +362,25 @@ def validate_crc(self): verify_crc = calc_crc32c(data_view.tobytes()) return crc == verify_crc + def __str__(self): + return ( + "DefaultRecordBatch(magic={}, base_offset={}, last_offset_delta={}," + " first_timestamp={}, max_timestamp={}," + " is_transactional={}, producer_id={}, producer_epoch={}, base_sequence={}," + " records_count={})".format( + self.magic, self.base_offset, self.last_offset_delta, + self.first_timestamp, self.max_timestamp, + self.is_transactional, self.producer_id, self.producer_epoch, self.base_sequence, + self.records_count)) + class DefaultRecord(ABCRecord): - __slots__ = ("_offset", "_timestamp", "_timestamp_type", "_key", "_value", + __slots__ = ("_size_in_bytes", "_offset", "_timestamp", "_timestamp_type", "_key", "_value", "_headers") - def __init__(self, offset, timestamp, timestamp_type, key, value, headers): + def __init__(self, size_in_bytes, offset, timestamp, timestamp_type, key, value, headers): + self._size_in_bytes = size_in_bytes self._offset = offset self._timestamp = timestamp self._timestamp_type = timestamp_type @@ -325,6 +388,10 @@ def __init__(self, offset, timestamp, timestamp_type, key, value, headers): self._value = value self._headers = headers + @property + def size_in_bytes(self): + return self._size_in_bytes + @property def offset(self): return self._offset @@ -361,6 +428,9 @@ def headers(self): def checksum(self): return None + def validate_crc(self): + return True + def __repr__(self): return ( "DefaultRecord(offset={!r}, timestamp={!r}, timestamp_type={!r}," @@ -371,7 +441,7 @@ def __repr__(self): class ControlRecord(DefaultRecord): - __slots__ = ("_offset", "_timestamp", "_timestamp_type", "_key", "_value", + __slots__ = ("_size_in_bytes", "_offset", "_timestamp", "_timestamp_type", "_key", "_value", "_headers", "_version", "_type") KEY_STRUCT = struct.Struct( @@ -379,8 +449,8 @@ class ControlRecord(DefaultRecord): "h" # Type => Int16 (0 indicates an abort marker, 1 indicates a commit) ) - def __init__(self, offset, timestamp, timestamp_type, key, value, headers): - super(ControlRecord, self).__init__(offset, timestamp, timestamp_type, key, value, headers) + def __init__(self, size_in_bytes, offset, timestamp, timestamp_type, key, value, headers): + super(ControlRecord, self).__init__(size_in_bytes, offset, timestamp, timestamp_type, key, value, headers) (self._version, self._type) = self.KEY_STRUCT.unpack(self._key) # see https://kafka.apache.org/documentation/#controlbatch @@ -440,6 +510,23 @@ def __init__( self._buffer = bytearray(self.HEADER_STRUCT.size) + def set_producer_state(self, producer_id, producer_epoch, base_sequence, is_transactional): + assert not is_transactional or producer_id != -1, "Cannot write transactional messages without a valid producer ID" + assert producer_id == -1 or producer_epoch != -1, "Invalid negative producer epoch" + assert producer_id == -1 or base_sequence != -1, "Invalid negative sequence number" + self._producer_id = producer_id + self._producer_epoch = producer_epoch + self._base_sequence = base_sequence + self._is_transactional = is_transactional + + @property + def producer_id(self): + return self._producer_id + + @property + def producer_epoch(self): + return self._producer_epoch + def _get_attributes(self, include_compression_type=True): attrs = 0 if include_compression_type: @@ -548,8 +635,8 @@ def write_header(self, use_compression_type=True): 0, # CRC will be set below, as we need a filled buffer for it self._get_attributes(use_compression_type), self._last_offset, - self._first_timestamp, - self._max_timestamp, + self._first_timestamp or 0, + self._max_timestamp or 0, self._producer_id, self._producer_epoch, self._base_sequence, @@ -594,14 +681,15 @@ def size(self): """ return len(self._buffer) - def size_in_bytes(self, offset, timestamp, key, value, headers): - if self._first_timestamp is not None: - timestamp_delta = timestamp - self._first_timestamp - else: - timestamp_delta = 0 + @classmethod + def header_size_in_bytes(self): + return self.HEADER_STRUCT.size + + @classmethod + def size_in_bytes(self, offset_delta, timestamp_delta, key, value, headers): size_of_body = ( 1 + # Attrs - size_of_varint(offset) + + size_of_varint(offset_delta) + size_of_varint(timestamp_delta) + self.size_of(key, value, headers) ) @@ -644,6 +732,17 @@ def estimate_size_in_bytes(cls, key, value, headers): cls.size_of(key, value, headers) ) + def __str__(self): + return ( + "DefaultRecordBatchBuilder(magic={}, base_offset={}, last_offset_delta={}," + " first_timestamp={}, max_timestamp={}," + " is_transactional={}, producer_id={}, producer_epoch={}, base_sequence={}," + " records_count={})".format( + self._magic, 0, self._last_offset, + self._first_timestamp or 0, self._max_timestamp or 0, + self._is_transactional, self._producer_id, self._producer_epoch, self._base_sequence, + self._num_records)) + class DefaultRecordMetadata(object): diff --git a/kafka/record/legacy_records.py b/kafka/record/legacy_records.py index 2f8523fcb..f085978f0 100644 --- a/kafka/record/legacy_records.py +++ b/kafka/record/legacy_records.py @@ -52,7 +52,7 @@ gzip_decode, snappy_decode, lz4_decode, lz4_decode_old_kafka, ) import kafka.codec as codecs -from kafka.errors import CorruptRecordException, UnsupportedCodecError +from kafka.errors import CorruptRecordError, UnsupportedCodecError class LegacyRecordBase(object): @@ -129,7 +129,7 @@ def _assert_has_codec(self, compression_type): class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase): - __slots__ = ("_buffer", "_magic", "_offset", "_crc", "_timestamp", + __slots__ = ("_buffer", "_magic", "_offset", "_length", "_crc", "_timestamp", "_attributes", "_decompressed") def __init__(self, buffer, magic): @@ -141,11 +141,20 @@ def __init__(self, buffer, magic): assert magic == magic_ self._offset = offset + self._length = length self._crc = crc self._timestamp = timestamp self._attributes = attrs self._decompressed = False + @property + def base_offset(self): + return self._offset + + @property + def size_in_bytes(self): + return self._length + self.LOG_OVERHEAD + @property def timestamp_type(self): """0 for CreateTime; 1 for LogAppendTime; None if unsupported. @@ -164,6 +173,10 @@ def timestamp_type(self): def compression_type(self): return self._attributes & self.CODEC_MASK + @property + def magic(self): + return self._magic + def validate_crc(self): crc = calc_crc32(self._buffer[self.MAGIC_OFFSET:]) return self._crc == crc @@ -178,7 +191,7 @@ def _decompress(self, key_offset): value_size = struct.unpack_from(">i", self._buffer, pos)[0] pos += self.VALUE_LENGTH if value_size == -1: - raise CorruptRecordException("Value of compressed message is None") + raise CorruptRecordError("Value of compressed message is None") else: data = self._buffer[pos:pos + value_size] @@ -232,6 +245,9 @@ def _read_key_value(self, pos): value = self._buffer[pos:pos + value_size].tobytes() return key, value + def _crc_bytes(self, msg_pos, length): + return self._buffer[msg_pos + self.MAGIC_OFFSET:msg_pos + self.LOG_OVERHEAD + length] + def __iter__(self): if self._magic == 1: key_offset = self.KEY_OFFSET_V1 @@ -255,7 +271,7 @@ def __iter__(self): absolute_base_offset = -1 for header, msg_pos in headers: - offset, _, crc, _, attrs, timestamp = header + offset, length, crc, _, attrs, timestamp = header # There should only ever be a single layer of compression assert not attrs & self.CODEC_MASK, ( 'MessageSet at offset %d appears double-compressed. This ' @@ -271,28 +287,36 @@ def __iter__(self): offset += absolute_base_offset key, value = self._read_key_value(msg_pos + key_offset) + crc_bytes = self._crc_bytes(msg_pos, length) yield LegacyRecord( - offset, timestamp, timestamp_type, - key, value, crc) + self._magic, offset, timestamp, timestamp_type, + key, value, crc, crc_bytes) else: key, value = self._read_key_value(key_offset) + crc_bytes = self._crc_bytes(0, len(self._buffer) - self.LOG_OVERHEAD) yield LegacyRecord( - self._offset, self._timestamp, timestamp_type, - key, value, self._crc) + self._magic, self._offset, self._timestamp, timestamp_type, + key, value, self._crc, crc_bytes) class LegacyRecord(ABCRecord): - __slots__ = ("_offset", "_timestamp", "_timestamp_type", "_key", "_value", - "_crc") + __slots__ = ("_magic", "_offset", "_timestamp", "_timestamp_type", "_key", "_value", + "_crc", "_crc_bytes") - def __init__(self, offset, timestamp, timestamp_type, key, value, crc): + def __init__(self, magic, offset, timestamp, timestamp_type, key, value, crc, crc_bytes): + self._magic = magic self._offset = offset self._timestamp = timestamp self._timestamp_type = timestamp_type self._key = key self._value = value self._crc = crc + self._crc_bytes = crc_bytes + + @property + def magic(self): + return self._magic @property def offset(self): @@ -330,11 +354,19 @@ def headers(self): def checksum(self): return self._crc + def validate_crc(self): + crc = calc_crc32(self._crc_bytes) + return self._crc == crc + + @property + def size_in_bytes(self): + return LegacyRecordBatchBuilder.estimate_size_in_bytes(self._magic, None, self._key, self._value) + def __repr__(self): return ( - "LegacyRecord(offset={!r}, timestamp={!r}, timestamp_type={!r}," + "LegacyRecord(magic={!r} offset={!r}, timestamp={!r}, timestamp_type={!r}," " key={!r}, value={!r}, crc={!r})".format( - self._offset, self._timestamp, self._timestamp_type, + self._magic, self._offset, self._timestamp, self._timestamp_type, self._key, self._value, self._crc) ) diff --git a/kafka/record/memory_records.py b/kafka/record/memory_records.py index fc2ef2d6b..9df733059 100644 --- a/kafka/record/memory_records.py +++ b/kafka/record/memory_records.py @@ -22,7 +22,7 @@ import struct -from kafka.errors import CorruptRecordException +from kafka.errors import CorruptRecordError, IllegalStateError, UnsupportedVersionError from kafka.record.abc import ABCRecords from kafka.record.legacy_records import LegacyRecordBatch, LegacyRecordBatchBuilder from kafka.record.default_records import DefaultRecordBatch, DefaultRecordBatchBuilder @@ -99,7 +99,7 @@ def next_batch(self, _min_slice=MIN_SLICE, if next_slice is None: return None if len(next_slice) < _min_slice: - raise CorruptRecordException( + raise CorruptRecordError( "Record size is less than the minimum record overhead " "({})".format(_min_slice - self.LOG_OVERHEAD)) self._cache_next() @@ -109,31 +109,56 @@ def next_batch(self, _min_slice=MIN_SLICE, else: return DefaultRecordBatch(next_slice) + def __iter__(self): + return self + + def __next__(self): + if not self.has_next(): + raise StopIteration + return self.next_batch() + + next = __next__ + class MemoryRecordsBuilder(object): __slots__ = ("_builder", "_batch_size", "_buffer", "_next_offset", "_closed", - "_bytes_written") + "_magic", "_bytes_written", "_producer_id", "_producer_epoch") - def __init__(self, magic, compression_type, batch_size): + def __init__(self, magic, compression_type, batch_size, offset=0, + transactional=False, producer_id=-1, producer_epoch=-1, base_sequence=-1): assert magic in [0, 1, 2], "Not supported magic" assert compression_type in [0, 1, 2, 3, 4], "Not valid compression type" if magic >= 2: + assert not transactional or producer_id != -1, "Cannot write transactional messages without a valid producer ID" + assert producer_id == -1 or producer_epoch != -1, "Invalid negative producer epoch" + assert producer_id == -1 or base_sequence != -1, "Invalid negative sequence number used" + self._builder = DefaultRecordBatchBuilder( magic=magic, compression_type=compression_type, - is_transactional=False, producer_id=-1, producer_epoch=-1, - base_sequence=-1, batch_size=batch_size) + is_transactional=transactional, producer_id=producer_id, + producer_epoch=producer_epoch, base_sequence=base_sequence, + batch_size=batch_size) + self._producer_id = producer_id + self._producer_epoch = producer_epoch else: + assert not transactional and producer_id == -1, "Idempotent messages are not supported for magic %s" % (magic,) self._builder = LegacyRecordBatchBuilder( magic=magic, compression_type=compression_type, batch_size=batch_size) + self._producer_id = None self._batch_size = batch_size self._buffer = None - self._next_offset = 0 + self._next_offset = offset self._closed = False + self._magic = magic self._bytes_written = 0 + def skip(self, offsets_to_skip): + # Exposed for testing compacted records + self._next_offset += offsets_to_skip + def append(self, timestamp, key, value, headers=[]): """ Append a message to the buffer. @@ -151,6 +176,30 @@ def append(self, timestamp, key, value, headers=[]): self._next_offset += 1 return metadata + def set_producer_state(self, producer_id, producer_epoch, base_sequence, is_transactional): + if self._magic < 2: + raise UnsupportedVersionError('Producer State requires Message format v2+') + elif self._closed: + # Sequence numbers are assigned when the batch is closed while the accumulator is being drained. + # If the resulting ProduceRequest to the partition leader failed for a retriable error, the batch will + # be re queued. In this case, we should not attempt to set the state again, since changing the pid and sequence + # once a batch has been sent to the broker risks introducing duplicates. + raise IllegalStateError("Trying to set producer state of an already closed batch. This indicates a bug on the client.") + self._builder.set_producer_state(producer_id, producer_epoch, base_sequence, is_transactional) + self._producer_id = producer_id + + @property + def producer_id(self): + return self._producer_id + + @property + def producer_epoch(self): + return self._producer_epoch + + def records(self): + assert self._closed + return MemoryRecords(self._buffer) + def close(self): # This method may be called multiple times on the same batch # i.e., on retries @@ -160,6 +209,9 @@ def close(self): if not self._closed: self._bytes_written = self._builder.size() self._buffer = bytes(self._builder.build()) + if self._magic == 2: + self._producer_id = self._builder.producer_id + self._producer_epoch = self._builder.producer_epoch self._builder = None self._closed = True diff --git a/kafka/sasl/__init__.py b/kafka/sasl/__init__.py new file mode 100644 index 000000000..90f05e733 --- /dev/null +++ b/kafka/sasl/__init__.py @@ -0,0 +1,34 @@ +from __future__ import absolute_import + +import platform + +from kafka.sasl.gssapi import SaslMechanismGSSAPI +from kafka.sasl.msk import SaslMechanismAwsMskIam +from kafka.sasl.oauth import SaslMechanismOAuth +from kafka.sasl.plain import SaslMechanismPlain +from kafka.sasl.scram import SaslMechanismScram +from kafka.sasl.sspi import SaslMechanismSSPI + + +SASL_MECHANISMS = {} + + +def register_sasl_mechanism(name, klass, overwrite=False): + if not overwrite and name in SASL_MECHANISMS: + raise ValueError('Sasl mechanism %s already defined!' % name) + SASL_MECHANISMS[name] = klass + + +def get_sasl_mechanism(name): + return SASL_MECHANISMS[name] + + +register_sasl_mechanism('AWS_MSK_IAM', SaslMechanismAwsMskIam) +if platform.system() == 'Windows': + register_sasl_mechanism('GSSAPI', SaslMechanismSSPI) +else: + register_sasl_mechanism('GSSAPI', SaslMechanismGSSAPI) +register_sasl_mechanism('OAUTHBEARER', SaslMechanismOAuth) +register_sasl_mechanism('PLAIN', SaslMechanismPlain) +register_sasl_mechanism('SCRAM-SHA-256', SaslMechanismScram) +register_sasl_mechanism('SCRAM-SHA-512', SaslMechanismScram) diff --git a/kafka/sasl/abc.py b/kafka/sasl/abc.py new file mode 100644 index 000000000..0577888a9 --- /dev/null +++ b/kafka/sasl/abc.py @@ -0,0 +1,33 @@ +from __future__ import absolute_import + +import abc + +from kafka.vendor.six import add_metaclass + + +@add_metaclass(abc.ABCMeta) +class SaslMechanism(object): + @abc.abstractmethod + def __init__(self, **config): + pass + + @abc.abstractmethod + def auth_bytes(self): + pass + + @abc.abstractmethod + def receive(self, auth_bytes): + pass + + @abc.abstractmethod + def is_done(self): + pass + + @abc.abstractmethod + def is_authenticated(self): + pass + + def auth_details(self): + if not self.is_authenticated: + raise RuntimeError('Not authenticated yet!') + return 'Authenticated via SASL' diff --git a/kafka/sasl/gssapi.py b/kafka/sasl/gssapi.py new file mode 100644 index 000000000..c8e4f7cac --- /dev/null +++ b/kafka/sasl/gssapi.py @@ -0,0 +1,96 @@ +from __future__ import absolute_import + +import struct + +# needed for SASL_GSSAPI authentication: +try: + import gssapi + from gssapi.raw.misc import GSSError +except (ImportError, OSError): + #no gssapi available, will disable gssapi mechanism + gssapi = None + GSSError = None + +from kafka.sasl.abc import SaslMechanism + + +class SaslMechanismGSSAPI(SaslMechanism): + # Establish security context and negotiate protection level + # For reference RFC 2222, section 7.2.1 + + SASL_QOP_AUTH = 1 + SASL_QOP_AUTH_INT = 2 + SASL_QOP_AUTH_CONF = 4 + + def __init__(self, **config): + assert gssapi is not None, 'GSSAPI lib not available' + if 'sasl_kerberos_name' not in config and 'sasl_kerberos_service_name' not in config: + raise ValueError('sasl_kerberos_service_name or sasl_kerberos_name required for GSSAPI sasl configuration') + self._is_done = False + self._is_authenticated = False + self.gssapi_name = None + if config.get('sasl_kerberos_name', None) is not None: + self.auth_id = str(config['sasl_kerberos_name']) + if isinstance(config['sasl_kerberos_name'], gssapi.Name): + self.gssapi_name = config['sasl_kerberos_name'] + else: + kerberos_domain_name = config.get('sasl_kerberos_domain_name', '') or config.get('host', '') + self.auth_id = config['sasl_kerberos_service_name'] + '@' + kerberos_domain_name + if self.gssapi_name is None: + self.gssapi_name = gssapi.Name(self.auth_id, name_type=gssapi.NameType.hostbased_service).canonicalize(gssapi.MechType.kerberos) + self._client_ctx = gssapi.SecurityContext(name=self.gssapi_name, usage='initiate') + self._next_token = self._client_ctx.step(None) + + def auth_bytes(self): + # GSSAPI Auth does not have a final broker->client message + # so mark is_done after the final auth_bytes are provided + # in practice we'll still receive a response when using SaslAuthenticate + # but not when using the prior unframed approach. + if self._is_authenticated: + self._is_done = True + return self._next_token or b'' + + def receive(self, auth_bytes): + if not self._client_ctx.complete: + # The server will send a token back. Processing of this token either + # establishes a security context, or it needs further token exchange. + # The gssapi will be able to identify the needed next step. + self._next_token = self._client_ctx.step(auth_bytes) + elif self._is_done: + # The final step of gssapi is send, so we do not expect any additional bytes + # however, allow an empty message to support SaslAuthenticate response + if auth_bytes != b'': + raise ValueError("Unexpected receive auth_bytes after sasl/gssapi completion") + else: + # unwraps message containing supported protection levels and msg size + msg = self._client_ctx.unwrap(auth_bytes).message + # Kafka currently doesn't support integrity or confidentiality security layers, so we + # simply set QoP to 'auth' only (first octet). We reuse the max message size proposed + # by the server + client_flags = self.SASL_QOP_AUTH + server_flags = msg[0] + message_parts = [ + struct.Struct('>b').pack(client_flags & server_flags), + msg[1:], + self.auth_id.encode('utf-8'), + ] + # add authorization identity to the response, and GSS-wrap + self._next_token = self._client_ctx.wrap(b''.join(message_parts), False).message + # We need to identify the last token in auth_bytes(); + # we can't rely on client_ctx.complete because it becomes True after generating + # the second-to-last token (after calling .step(auth_bytes) for the final time) + # We could introduce an additional state variable (i.e., self._final_token), + # but instead we just set _is_authenticated. Since the plugin interface does + # not read is_authenticated() until after is_done() is True, this should be fine. + self._is_authenticated = True + + def is_done(self): + return self._is_done + + def is_authenticated(self): + return self._is_authenticated + + def auth_details(self): + if not self.is_authenticated: + raise RuntimeError('Not authenticated yet!') + return 'Authenticated as %s to %s via SASL / GSSAPI' % (self._client_ctx.initiator_name, self._client_ctx.target_name) diff --git a/kafka/sasl/msk.py b/kafka/sasl/msk.py new file mode 100644 index 000000000..7ec03215d --- /dev/null +++ b/kafka/sasl/msk.py @@ -0,0 +1,244 @@ +from __future__ import absolute_import + +import datetime +import hashlib +import hmac +import json +import logging +import string + +# needed for AWS_MSK_IAM authentication: +try: + from botocore.session import Session as BotoSession +except ImportError: + # no botocore available, will disable AWS_MSK_IAM mechanism + BotoSession = None + +from kafka.errors import KafkaConfigurationError +from kafka.sasl.abc import SaslMechanism +from kafka.vendor.six.moves import urllib + + +log = logging.getLogger(__name__) + + +class SaslMechanismAwsMskIam(SaslMechanism): + def __init__(self, **config): + assert BotoSession is not None, 'AWS_MSK_IAM requires the "botocore" package' + assert config.get('security_protocol', '') == 'SASL_SSL', 'AWS_MSK_IAM requires SASL_SSL' + assert 'host' in config, 'AWS_MSK_IAM requires host configuration' + self.host = config['host'] + self._auth = None + self._is_done = False + self._is_authenticated = False + + def _build_client(self): + session = BotoSession() + credentials = session.get_credentials().get_frozen_credentials() + if not session.get_config_variable('region'): + raise KafkaConfigurationError('Unable to determine region for AWS MSK cluster. Is AWS_DEFAULT_REGION set?') + return AwsMskIamClient( + host=self.host, + access_key=credentials.access_key, + secret_key=credentials.secret_key, + region=session.get_config_variable('region'), + token=credentials.token, + ) + + def auth_bytes(self): + client = self._build_client() + log.debug("Generating auth token for MSK scope: %s", client._scope) + return client.first_message() + + def receive(self, auth_bytes): + self._is_done = True + self._is_authenticated = auth_bytes != b'' + self._auth = auth_bytes.decode('utf-8') + + def is_done(self): + return self._is_done + + def is_authenticated(self): + return self._is_authenticated + + def auth_details(self): + if not self.is_authenticated: + raise RuntimeError('Not authenticated yet!') + return 'Authenticated via SASL / AWS_MSK_IAM %s' % (self._auth,) + + +class AwsMskIamClient: + UNRESERVED_CHARS = string.ascii_letters + string.digits + '-._~' + + def __init__(self, host, access_key, secret_key, region, token=None): + """ + Arguments: + host (str): The hostname of the broker. + access_key (str): An AWS_ACCESS_KEY_ID. + secret_key (str): An AWS_SECRET_ACCESS_KEY. + region (str): An AWS_REGION. + token (Optional[str]): An AWS_SESSION_TOKEN if using temporary + credentials. + """ + self.algorithm = 'AWS4-HMAC-SHA256' + self.expires = '900' + self.hashfunc = hashlib.sha256 + self.headers = [ + ('host', host) + ] + self.version = '2020_10_22' + + self.service = 'kafka-cluster' + self.action = '{}:Connect'.format(self.service) + + now = datetime.datetime.utcnow() + self.datestamp = now.strftime('%Y%m%d') + self.timestamp = now.strftime('%Y%m%dT%H%M%SZ') + + self.host = host + self.access_key = access_key + self.secret_key = secret_key + self.region = region + self.token = token + + @property + def _credential(self): + return '{0.access_key}/{0._scope}'.format(self) + + @property + def _scope(self): + return '{0.datestamp}/{0.region}/{0.service}/aws4_request'.format(self) + + @property + def _signed_headers(self): + """ + Returns (str): + An alphabetically sorted, semicolon-delimited list of lowercase + request header names. + """ + return ';'.join(sorted(k.lower() for k, _ in self.headers)) + + @property + def _canonical_headers(self): + """ + Returns (str): + A newline-delited list of header names and values. + Header names are lowercased. + """ + return '\n'.join(map(':'.join, self.headers)) + '\n' + + @property + def _canonical_request(self): + """ + Returns (str): + An AWS Signature Version 4 canonical request in the format: + \n + \n + \n + \n + \n + + """ + # The hashed_payload is always an empty string for MSK. + hashed_payload = self.hashfunc(b'').hexdigest() + return '\n'.join(( + 'GET', + '/', + self._canonical_querystring, + self._canonical_headers, + self._signed_headers, + hashed_payload, + )) + + @property + def _canonical_querystring(self): + """ + Returns (str): + A '&'-separated list of URI-encoded key/value pairs. + """ + params = [] + params.append(('Action', self.action)) + params.append(('X-Amz-Algorithm', self.algorithm)) + params.append(('X-Amz-Credential', self._credential)) + params.append(('X-Amz-Date', self.timestamp)) + params.append(('X-Amz-Expires', self.expires)) + if self.token: + params.append(('X-Amz-Security-Token', self.token)) + params.append(('X-Amz-SignedHeaders', self._signed_headers)) + + return '&'.join(self._uriencode(k) + '=' + self._uriencode(v) for k, v in params) + + @property + def _signing_key(self): + """ + Returns (bytes): + An AWS Signature V4 signing key generated from the secret_key, date, + region, service, and request type. + """ + key = self._hmac(('AWS4' + self.secret_key).encode('utf-8'), self.datestamp) + key = self._hmac(key, self.region) + key = self._hmac(key, self.service) + key = self._hmac(key, 'aws4_request') + return key + + @property + def _signing_str(self): + """ + Returns (str): + A string used to sign the AWS Signature V4 payload in the format: + \n + \n + \n + + """ + canonical_request_hash = self.hashfunc(self._canonical_request.encode('utf-8')).hexdigest() + return '\n'.join((self.algorithm, self.timestamp, self._scope, canonical_request_hash)) + + def _uriencode(self, msg): + """ + Arguments: + msg (str): A string to URI-encode. + + Returns (str): + The URI-encoded version of the provided msg, following the encoding + rules specified: https://github.com/aws/aws-msk-iam-auth#uriencode + """ + return urllib.parse.quote(msg, safe=self.UNRESERVED_CHARS) + + def _hmac(self, key, msg): + """ + Arguments: + key (bytes): A key to use for the HMAC digest. + msg (str): A value to include in the HMAC digest. + Returns (bytes): + An HMAC digest of the given key and msg. + """ + return hmac.new(key, msg.encode('utf-8'), digestmod=self.hashfunc).digest() + + def first_message(self): + """ + Returns (bytes): + An encoded JSON authentication payload that can be sent to the + broker. + """ + signature = hmac.new( + self._signing_key, + self._signing_str.encode('utf-8'), + digestmod=self.hashfunc, + ).hexdigest() + msg = { + 'version': self.version, + 'host': self.host, + 'user-agent': 'kafka-python', + 'action': self.action, + 'x-amz-algorithm': self.algorithm, + 'x-amz-credential': self._credential, + 'x-amz-date': self.timestamp, + 'x-amz-signedheaders': self._signed_headers, + 'x-amz-expires': self.expires, + 'x-amz-signature': signature, + } + if self.token: + msg['x-amz-security-token'] = self.token + + return json.dumps(msg, separators=(',', ':')).encode('utf-8') diff --git a/kafka/sasl/oauth.py b/kafka/sasl/oauth.py new file mode 100644 index 000000000..f1e959cb6 --- /dev/null +++ b/kafka/sasl/oauth.py @@ -0,0 +1,100 @@ +from __future__ import absolute_import + +import abc +import logging + +from kafka.sasl.abc import SaslMechanism + + +log = logging.getLogger(__name__) + + +class SaslMechanismOAuth(SaslMechanism): + + def __init__(self, **config): + assert 'sasl_oauth_token_provider' in config, 'sasl_oauth_token_provider required for OAUTHBEARER sasl' + assert isinstance(config['sasl_oauth_token_provider'], AbstractTokenProvider), \ + 'sasl_oauth_token_provider must implement kafka.sasl.oauth.AbstractTokenProvider' + self.token_provider = config['sasl_oauth_token_provider'] + self._error = None + self._is_done = False + self._is_authenticated = False + + def auth_bytes(self): + if self._error: + # Server should respond to this with SaslAuthenticate failure, which ends the auth process + return self._error + token = self.token_provider.token() + extensions = self._token_extensions() + return "n,,\x01auth=Bearer {}{}\x01\x01".format(token, extensions).encode('utf-8') + + def receive(self, auth_bytes): + if auth_bytes != b'': + error = auth_bytes.decode('utf-8') + log.debug("Sending x01 response to server after receiving SASL OAuth error: %s", error) + self._error = b'\x01' + else: + self._is_done = True + self._is_authenticated = True + + def is_done(self): + return self._is_done + + def is_authenticated(self): + return self._is_authenticated + + def _token_extensions(self): + """ + Return a string representation of the OPTIONAL key-value pairs that can be sent with an OAUTHBEARER + initial request. + """ + # Builds up a string separated by \x01 via a dict of key value pairs + extensions = self.token_provider.extensions() + msg = '\x01'.join(['{}={}'.format(k, v) for k, v in extensions.items()]) + return '\x01' + msg if msg else '' + + def auth_details(self): + if not self.is_authenticated: + raise RuntimeError('Not authenticated yet!') + return 'Authenticated via SASL / OAuth' + +# This statement is compatible with both Python 2.7 & 3+ +ABC = abc.ABCMeta('ABC', (object,), {'__slots__': ()}) + +class AbstractTokenProvider(ABC): + """ + A Token Provider must be used for the SASL OAuthBearer protocol. + + The implementation should ensure token reuse so that multiple + calls at connect time do not create multiple tokens. The implementation + should also periodically refresh the token in order to guarantee + that each call returns an unexpired token. A timeout error should + be returned after a short period of inactivity so that the + broker can log debugging info and retry. + + Token Providers MUST implement the token() method + """ + + def __init__(self, **config): + pass + + @abc.abstractmethod + def token(self): + """ + Returns a (str) ID/Access Token to be sent to the Kafka + client. + """ + pass + + def extensions(self): + """ + This is an OPTIONAL method that may be implemented. + + Returns a map of key-value pairs that can + be sent with the SASL/OAUTHBEARER initial client request. If + not implemented, the values are ignored. This feature is only available + in Kafka >= 2.1.0. + + All returned keys and values should be type str + """ + return {} diff --git a/kafka/sasl/plain.py b/kafka/sasl/plain.py new file mode 100644 index 000000000..81443f5fe --- /dev/null +++ b/kafka/sasl/plain.py @@ -0,0 +1,41 @@ +from __future__ import absolute_import + +import logging + +from kafka.sasl.abc import SaslMechanism + + +log = logging.getLogger(__name__) + + +class SaslMechanismPlain(SaslMechanism): + + def __init__(self, **config): + if config.get('security_protocol', '') == 'SASL_PLAINTEXT': + log.warning('Sending username and password in the clear') + assert 'sasl_plain_username' in config, 'sasl_plain_username required for PLAIN sasl' + assert 'sasl_plain_password' in config, 'sasl_plain_password required for PLAIN sasl' + + self.username = config['sasl_plain_username'] + self.password = config['sasl_plain_password'] + self._is_done = False + self._is_authenticated = False + + def auth_bytes(self): + # Send PLAIN credentials per RFC-4616 + return bytes('\0'.join([self.username, self.username, self.password]).encode('utf-8')) + + def receive(self, auth_bytes): + self._is_done = True + self._is_authenticated = auth_bytes == b'' + + def is_done(self): + return self._is_done + + def is_authenticated(self): + return self._is_authenticated + + def auth_details(self): + if not self.is_authenticated: + raise RuntimeError('Not authenticated yet!') + return 'Authenticated as %s via SASL / Plain' % self.username diff --git a/kafka/sasl/scram.py b/kafka/sasl/scram.py new file mode 100644 index 000000000..d8cd071a7 --- /dev/null +++ b/kafka/sasl/scram.py @@ -0,0 +1,133 @@ +from __future__ import absolute_import + +import base64 +import hashlib +import hmac +import logging +import uuid + + +from kafka.sasl.abc import SaslMechanism +from kafka.vendor import six + + +log = logging.getLogger(__name__) + + +if six.PY2: + def xor_bytes(left, right): + return bytearray(ord(lb) ^ ord(rb) for lb, rb in zip(left, right)) +else: + def xor_bytes(left, right): + return bytes(lb ^ rb for lb, rb in zip(left, right)) + + +class SaslMechanismScram(SaslMechanism): + def __init__(self, **config): + assert 'sasl_plain_username' in config, 'sasl_plain_username required for SCRAM sasl' + assert 'sasl_plain_password' in config, 'sasl_plain_password required for SCRAM sasl' + assert config.get('sasl_mechanism', '') in ScramClient.MECHANISMS, 'Unrecognized SCRAM mechanism' + if config.get('security_protocol', '') == 'SASL_PLAINTEXT': + log.warning('Exchanging credentials in the clear during Sasl Authentication') + + self.username = config['sasl_plain_username'] + self.mechanism = config['sasl_mechanism'] + self._scram_client = ScramClient( + config['sasl_plain_username'], + config['sasl_plain_password'], + config['sasl_mechanism'] + ) + self._state = 0 + + def auth_bytes(self): + if self._state == 0: + return self._scram_client.first_message() + elif self._state == 1: + return self._scram_client.final_message() + else: + raise ValueError('No auth_bytes for state: %s' % self._state) + + def receive(self, auth_bytes): + if self._state == 0: + self._scram_client.process_server_first_message(auth_bytes) + elif self._state == 1: + self._scram_client.process_server_final_message(auth_bytes) + else: + raise ValueError('Cannot receive bytes in state: %s' % self._state) + self._state += 1 + return self.is_done() + + def is_done(self): + return self._state == 2 + + def is_authenticated(self): + # receive raises if authentication fails...? + return self._state == 2 + + def auth_details(self): + if not self.is_authenticated: + raise RuntimeError('Not authenticated yet!') + return 'Authenticated as %s via SASL / %s' % (self.username, self.mechanism) + + +class ScramClient: + MECHANISMS = { + 'SCRAM-SHA-256': hashlib.sha256, + 'SCRAM-SHA-512': hashlib.sha512 + } + + def __init__(self, user, password, mechanism): + self.nonce = str(uuid.uuid4()).replace('-', '').encode('utf-8') + self.auth_message = b'' + self.salted_password = None + self.user = user.encode('utf-8') + self.password = password.encode('utf-8') + self.hashfunc = self.MECHANISMS[mechanism] + self.hashname = ''.join(mechanism.lower().split('-')[1:3]) + self.stored_key = None + self.client_key = None + self.client_signature = None + self.client_proof = None + self.server_key = None + self.server_signature = None + + def first_message(self): + client_first_bare = b'n=' + self.user + b',r=' + self.nonce + self.auth_message += client_first_bare + return b'n,,' + client_first_bare + + def process_server_first_message(self, server_first_message): + self.auth_message += b',' + server_first_message + params = dict(pair.split('=', 1) for pair in server_first_message.decode('utf-8').split(',')) + server_nonce = params['r'].encode('utf-8') + if not server_nonce.startswith(self.nonce): + raise ValueError("Server nonce, did not start with client nonce!") + self.nonce = server_nonce + self.auth_message += b',c=biws,r=' + self.nonce + + salt = base64.b64decode(params['s'].encode('utf-8')) + iterations = int(params['i']) + self.create_salted_password(salt, iterations) + + self.client_key = self.hmac(self.salted_password, b'Client Key') + self.stored_key = self.hashfunc(self.client_key).digest() + self.client_signature = self.hmac(self.stored_key, self.auth_message) + self.client_proof = xor_bytes(self.client_key, self.client_signature) + self.server_key = self.hmac(self.salted_password, b'Server Key') + self.server_signature = self.hmac(self.server_key, self.auth_message) + + def hmac(self, key, msg): + return hmac.new(key, msg, digestmod=self.hashfunc).digest() + + def create_salted_password(self, salt, iterations): + self.salted_password = hashlib.pbkdf2_hmac( + self.hashname, self.password, salt, iterations + ) + + def final_message(self): + return b'c=biws,r=' + self.nonce + b',p=' + base64.b64encode(self.client_proof) + + def process_server_final_message(self, server_final_message): + params = dict(pair.split('=', 1) for pair in server_final_message.decode('utf-8').split(',')) + if self.server_signature != base64.b64decode(params['v'].encode('utf-8')): + raise ValueError("Server sent wrong signature!") diff --git a/kafka/sasl/sspi.py b/kafka/sasl/sspi.py new file mode 100644 index 000000000..f4c95d037 --- /dev/null +++ b/kafka/sasl/sspi.py @@ -0,0 +1,111 @@ +from __future__ import absolute_import + +import logging + +# Windows-only +try: + import sspi + import pywintypes + import sspicon + import win32security +except ImportError: + sspi = None + +from kafka.sasl.abc import SaslMechanism + + +log = logging.getLogger(__name__) + + +class SaslMechanismSSPI(SaslMechanism): + # Establish security context and negotiate protection level + # For reference see RFC 4752, section 3 + + SASL_QOP_AUTH = 1 + SASL_QOP_AUTH_INT = 2 + SASL_QOP_AUTH_CONF = 4 + + def __init__(self, **config): + assert sspi is not None, 'No GSSAPI lib available (gssapi or sspi)' + if 'sasl_kerberos_name' not in config and 'sasl_kerberos_service_name' not in config: + raise ValueError('sasl_kerberos_service_name or sasl_kerberos_name required for GSSAPI sasl configuration') + self._is_done = False + self._is_authenticated = False + if config.get('sasl_kerberos_name', None) is not None: + self.auth_id = str(config['sasl_kerberos_name']) + else: + kerberos_domain_name = config.get('sasl_kerberos_domain_name', '') or config.get('host', '') + self.auth_id = config['sasl_kerberos_service_name'] + '/' + kerberos_domain_name + scheme = "Kerberos" # Do not try with Negotiate for SASL authentication. Tokens are different. + # https://docs.microsoft.com/en-us/windows/win32/secauthn/context-requirements + flags = ( + sspicon.ISC_REQ_MUTUAL_AUTH | # mutual authentication + sspicon.ISC_REQ_INTEGRITY | # check for integrity + sspicon.ISC_REQ_SEQUENCE_DETECT | # enable out-of-order messages + sspicon.ISC_REQ_CONFIDENTIALITY # request confidentiality + ) + self._client_ctx = sspi.ClientAuth(scheme, targetspn=self.auth_id, scflags=flags) + self._next_token = self._client_ctx.step(None) + + def auth_bytes(self): + # GSSAPI Auth does not have a final broker->client message + # so mark is_done after the final auth_bytes are provided + # in practice we'll still receive a response when using SaslAuthenticate + # but not when using the prior unframed approach. + if self._client_ctx.authenticated: + self._is_done = True + self._is_authenticated = True + return self._next_token or b'' + + def receive(self, auth_bytes): + log.debug("Received token from server (size %s)", len(auth_bytes)) + if not self._client_ctx.authenticated: + # calculate an output token from kafka token (or None on first iteration) + # https://docs.microsoft.com/en-us/windows/win32/api/sspi/nf-sspi-initializesecuritycontexta + # https://docs.microsoft.com/en-us/windows/win32/secauthn/initializesecuritycontext--kerberos + # authorize method will wrap for us our token in sspi structures + error, auth = self._client_ctx.authorize(auth_bytes) + if len(auth) > 0 and len(auth[0].Buffer): + log.debug("Got token from context") + # this buffer must be sent to the server whatever the result is + self._next_token = auth[0].Buffer + else: + log.debug("Got no token, exchange finished") + # seems to be the end of the loop + self._next_token = b'' + elif self._is_done: + # The final step of gssapi is send, so we do not expect any additional bytes + # however, allow an empty message to support SaslAuthenticate response + if auth_bytes != b'': + raise ValueError("Unexpected receive auth_bytes after sasl/gssapi completion") + else: + # Process the security layer negotiation token, sent by the server + # once the security context is established. + + # The following part is required by SASL, but not by classic Kerberos. + # See RFC 4752 + + # unwraps message containing supported protection levels and msg size + msg, _was_encrypted = self._client_ctx.unwrap(auth_bytes) + + # Kafka currently doesn't support integrity or confidentiality security layers, so we + # simply set QoP to 'auth' only (first octet). We reuse the max message size proposed + # by the server + client_flags = self.SASL_QOP_AUTH + server_flags = msg[0] + message_parts = [ + bytes(client_flags & server_flags), + msg[:1], + self.auth_id.encode('utf-8'), + ] + # add authorization identity to the response, and GSS-wrap + self._next_token = self._client_ctx.wrap(b''.join(message_parts), False) + + def is_done(self): + return self._is_done + + def is_authenticated(self): + return self._is_authenticated + + def auth_details(self): + return 'Authenticated as %s to %s via SASL / SSPI/GSSAPI \\o/' % (self._client_ctx.initiator_name, self._client_ctx.service_name) diff --git a/kafka/scram.py b/kafka/scram.py deleted file mode 100644 index 7f003750c..000000000 --- a/kafka/scram.py +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import absolute_import - -import base64 -import hashlib -import hmac -import uuid - -from kafka.vendor import six - - -if six.PY2: - def xor_bytes(left, right): - return bytearray(ord(lb) ^ ord(rb) for lb, rb in zip(left, right)) -else: - def xor_bytes(left, right): - return bytes(lb ^ rb for lb, rb in zip(left, right)) - - -class ScramClient: - MECHANISMS = { - 'SCRAM-SHA-256': hashlib.sha256, - 'SCRAM-SHA-512': hashlib.sha512 - } - - def __init__(self, user, password, mechanism): - self.nonce = str(uuid.uuid4()).replace('-', '') - self.auth_message = '' - self.salted_password = None - self.user = user - self.password = password.encode('utf-8') - self.hashfunc = self.MECHANISMS[mechanism] - self.hashname = ''.join(mechanism.lower().split('-')[1:3]) - self.stored_key = None - self.client_key = None - self.client_signature = None - self.client_proof = None - self.server_key = None - self.server_signature = None - - def first_message(self): - client_first_bare = 'n={},r={}'.format(self.user, self.nonce) - self.auth_message += client_first_bare - return 'n,,' + client_first_bare - - def process_server_first_message(self, server_first_message): - self.auth_message += ',' + server_first_message - params = dict(pair.split('=', 1) for pair in server_first_message.split(',')) - server_nonce = params['r'] - if not server_nonce.startswith(self.nonce): - raise ValueError("Server nonce, did not start with client nonce!") - self.nonce = server_nonce - self.auth_message += ',c=biws,r=' + self.nonce - - salt = base64.b64decode(params['s'].encode('utf-8')) - iterations = int(params['i']) - self.create_salted_password(salt, iterations) - - self.client_key = self.hmac(self.salted_password, b'Client Key') - self.stored_key = self.hashfunc(self.client_key).digest() - self.client_signature = self.hmac(self.stored_key, self.auth_message.encode('utf-8')) - self.client_proof = xor_bytes(self.client_key, self.client_signature) - self.server_key = self.hmac(self.salted_password, b'Server Key') - self.server_signature = self.hmac(self.server_key, self.auth_message.encode('utf-8')) - - def hmac(self, key, msg): - return hmac.new(key, msg, digestmod=self.hashfunc).digest() - - def create_salted_password(self, salt, iterations): - self.salted_password = hashlib.pbkdf2_hmac( - self.hashname, self.password, salt, iterations - ) - - def final_message(self): - return 'c=biws,r={},p={}'.format(self.nonce, base64.b64encode(self.client_proof).decode('utf-8')) - - def process_server_final_message(self, server_final_message): - params = dict(pair.split('=', 1) for pair in server_final_message.split(',')) - if self.server_signature != base64.b64decode(params['v'].encode('utf-8')): - raise ValueError("Server sent wrong signature!") - - diff --git a/kafka/socks5_wrapper.py b/kafka/socks5_wrapper.py new file mode 100644 index 000000000..18bea7c8d --- /dev/null +++ b/kafka/socks5_wrapper.py @@ -0,0 +1,248 @@ +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse + +import errno +import logging +import random +import socket +import struct + +log = logging.getLogger(__name__) + + +class ProxyConnectionStates: + DISCONNECTED = '' + CONNECTING = '' + NEGOTIATE_PROPOSE = '' + NEGOTIATING = '' + AUTHENTICATING = '' + REQUEST_SUBMIT = '' + REQUESTING = '' + READ_ADDRESS = '' + COMPLETE = '' + + +class Socks5Wrapper: + """Socks5 proxy wrapper + + Manages connection through socks5 proxy with support for username/password + authentication. + """ + + def __init__(self, proxy_url, afi): + self._buffer_in = b'' + self._buffer_out = b'' + self._proxy_url = urlparse(proxy_url) + self._sock = None + self._state = ProxyConnectionStates.DISCONNECTED + self._target_afi = socket.AF_UNSPEC + + proxy_addrs = self.dns_lookup(self._proxy_url.hostname, self._proxy_url.port, afi) + # TODO raise error on lookup failure + self._proxy_addr = random.choice(proxy_addrs) + + @classmethod + def is_inet_4_or_6(cls, gai): + """Given a getaddrinfo struct, return True iff ipv4 or ipv6""" + return gai[0] in (socket.AF_INET, socket.AF_INET6) + + @classmethod + def dns_lookup(cls, host, port, afi=socket.AF_UNSPEC): + """Returns a list of getaddrinfo structs, optionally filtered to an afi (ipv4 / ipv6)""" + # XXX: all DNS functions in Python are blocking. If we really + # want to be non-blocking here, we need to use a 3rd-party + # library like python-adns, or move resolution onto its + # own thread. This will be subject to the default libc + # name resolution timeout (5s on most Linux boxes) + try: + return list(filter(cls.is_inet_4_or_6, + socket.getaddrinfo(host, port, afi, + socket.SOCK_STREAM))) + except socket.gaierror as ex: + log.warning("DNS lookup failed for proxy %s:%d, %r", host, port, ex) + return [] + + def socket(self, family, sock_type): + """Open and record a socket. + + Returns the actual underlying socket + object to ensure e.g. selects and ssl wrapping works as expected. + """ + self._target_afi = family # Store the address family of the target + afi, _, _, _, _ = self._proxy_addr + self._sock = socket.socket(afi, sock_type) + return self._sock + + def _flush_buf(self): + """Send out all data that is stored in the outgoing buffer. + + It is expected that the caller handles error handling, including non-blocking + as well as connection failure exceptions. + """ + while self._buffer_out: + sent_bytes = self._sock.send(self._buffer_out) + self._buffer_out = self._buffer_out[sent_bytes:] + + def _peek_buf(self, datalen): + """Ensure local inbound buffer has enough data, and return that data without + consuming the local buffer + + It's expected that the caller handles e.g. blocking exceptions""" + while True: + bytes_remaining = datalen - len(self._buffer_in) + if bytes_remaining <= 0: + break + data = self._sock.recv(bytes_remaining) + if not data: + break + self._buffer_in = self._buffer_in + data + + return self._buffer_in[:datalen] + + def _read_buf(self, datalen): + """Read and consume bytes from socket connection + + It's expected that the caller handles e.g. blocking exceptions""" + buf = self._peek_buf(datalen) + if buf: + self._buffer_in = self._buffer_in[len(buf):] + return buf + + def connect_ex(self, addr): + """Runs a state machine through connection to authentication to + proxy connection request. + + The somewhat strange setup is to facilitate non-intrusive use from + BrokerConnection state machine. + + This function is called with a socket in non-blocking mode. Both + send and receive calls can return in EWOULDBLOCK/EAGAIN which we + specifically avoid handling here. These are handled in main + BrokerConnection connection loop, which then would retry calls + to this function.""" + + if self._state == ProxyConnectionStates.DISCONNECTED: + self._state = ProxyConnectionStates.CONNECTING + + if self._state == ProxyConnectionStates.CONNECTING: + _, _, _, _, sockaddr = self._proxy_addr + ret = self._sock.connect_ex(sockaddr) + if not ret or ret == errno.EISCONN: + self._state = ProxyConnectionStates.NEGOTIATE_PROPOSE + else: + return ret + + if self._state == ProxyConnectionStates.NEGOTIATE_PROPOSE: + if self._proxy_url.username and self._proxy_url.password: + # Propose username/password + self._buffer_out = b"\x05\x01\x02" + else: + # Propose no auth + self._buffer_out = b"\x05\x01\x00" + self._state = ProxyConnectionStates.NEGOTIATING + + if self._state == ProxyConnectionStates.NEGOTIATING: + self._flush_buf() + buf = self._read_buf(2) + if buf[0:1] != b"\x05": + log.error("Unrecognized SOCKS version") + self._state = ProxyConnectionStates.DISCONNECTED + self._sock.close() + return errno.ECONNREFUSED + + if buf[1:2] == b"\x00": + # No authentication required + self._state = ProxyConnectionStates.REQUEST_SUBMIT + elif buf[1:2] == b"\x02": + # Username/password authentication selected + userlen = len(self._proxy_url.username) + passlen = len(self._proxy_url.password) + self._buffer_out = struct.pack( + "!bb{}sb{}s".format(userlen, passlen), + 1, # version + userlen, + self._proxy_url.username.encode(), + passlen, + self._proxy_url.password.encode(), + ) + self._state = ProxyConnectionStates.AUTHENTICATING + else: + log.error("Unrecognized SOCKS authentication method") + self._state = ProxyConnectionStates.DISCONNECTED + self._sock.close() + return errno.ECONNREFUSED + + if self._state == ProxyConnectionStates.AUTHENTICATING: + self._flush_buf() + buf = self._read_buf(2) + if buf == b"\x01\x00": + # Authentication succesful + self._state = ProxyConnectionStates.REQUEST_SUBMIT + else: + log.error("Socks5 proxy authentication failure") + self._state = ProxyConnectionStates.DISCONNECTED + self._sock.close() + return errno.ECONNREFUSED + + if self._state == ProxyConnectionStates.REQUEST_SUBMIT: + if self._target_afi == socket.AF_INET: + addr_type = 1 + addr_len = 4 + elif self._target_afi == socket.AF_INET6: + addr_type = 4 + addr_len = 16 + else: + log.error("Unknown address family, %r", self._target_afi) + self._state = ProxyConnectionStates.DISCONNECTED + self._sock.close() + return errno.ECONNREFUSED + + self._buffer_out = struct.pack( + "!bbbb{}sh".format(addr_len), + 5, # version + 1, # command: connect + 0, # reserved + addr_type, # 1 for ipv4, 4 for ipv6 address + socket.inet_pton(self._target_afi, addr[0]), # either 4 or 16 bytes of actual address + addr[1], # port + ) + self._state = ProxyConnectionStates.REQUESTING + + if self._state == ProxyConnectionStates.REQUESTING: + self._flush_buf() + buf = self._read_buf(2) + if buf[0:2] == b"\x05\x00": + self._state = ProxyConnectionStates.READ_ADDRESS + else: + log.error("Proxy request failed: %r", buf[1:2]) + self._state = ProxyConnectionStates.DISCONNECTED + self._sock.close() + return errno.ECONNREFUSED + + if self._state == ProxyConnectionStates.READ_ADDRESS: + # we don't really care about the remote endpoint address, but need to clear the stream + buf = self._peek_buf(2) + if buf[0:2] == b"\x00\x01": + _ = self._read_buf(2 + 4 + 2) # ipv4 address + port + elif buf[0:2] == b"\x00\x05": + _ = self._read_buf(2 + 16 + 2) # ipv6 address + port + else: + log.error("Unrecognized remote address type %r", buf[1:2]) + self._state = ProxyConnectionStates.DISCONNECTED + self._sock.close() + return errno.ECONNREFUSED + self._state = ProxyConnectionStates.COMPLETE + + if self._state == ProxyConnectionStates.COMPLETE: + return 0 + + # not reached; + # Send and recv will raise socket error on EWOULDBLOCK/EAGAIN that is assumed to be handled by + # the caller. The caller re-enters this state machine from retry logic with timer or via select & family + log.error("Internal error, state %r not handled correctly", self._state) + self._state = ProxyConnectionStates.DISCONNECTED + if self._sock: + self._sock.close() + return errno.ECONNREFUSED diff --git a/kafka/util.py b/kafka/util.py index e31d99305..658c17d59 100644 --- a/kafka/util.py +++ b/kafka/util.py @@ -1,8 +1,12 @@ -from __future__ import absolute_import +from __future__ import absolute_import, division import binascii +import functools +import re +import time import weakref +from kafka.errors import KafkaTimeoutError from kafka.vendor import six @@ -19,7 +23,69 @@ def crc32(data): crc -= TO_SIGNED return crc else: - from binascii import crc32 + from binascii import crc32 # noqa: F401 + + +class Timer: + __slots__ = ('_start_at', '_expire_at', '_timeout_ms', '_error_message') + + def __init__(self, timeout_ms, error_message=None, start_at=None): + self._timeout_ms = timeout_ms + self._start_at = start_at or time.time() + if timeout_ms is not None: + self._expire_at = self._start_at + timeout_ms / 1000 + else: + self._expire_at = float('inf') + self._error_message = error_message + + @property + def expired(self): + return time.time() >= self._expire_at + + @property + def timeout_ms(self): + if self._timeout_ms is None: + return None + elif self._expire_at == float('inf'): + return float('inf') + remaining = self._expire_at - time.time() + if remaining < 0: + return 0 + else: + return int(remaining * 1000) + + @property + def elapsed_ms(self): + return int(1000 * (time.time() - self._start_at)) + + def maybe_raise(self): + if self.expired: + raise KafkaTimeoutError(self._error_message) + + def __str__(self): + return "Timer(%s ms remaining)" % (self.timeout_ms) + +# Taken from: https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java#L29 +TOPIC_MAX_LENGTH = 249 +TOPIC_LEGAL_CHARS = re.compile('^[a-zA-Z0-9._-]+$') + +def ensure_valid_topic_name(topic): + """ Ensures that the topic name is valid according to the kafka source. """ + + # See Kafka Source: + # https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java + if topic is None: + raise TypeError('All topics must not be None') + if not isinstance(topic, six.string_types): + raise TypeError('All topics must be strings') + if len(topic) == 0: + raise ValueError('All topics must be non-empty strings') + if topic == '.' or topic == '..': + raise ValueError('Topic name cannot be "." or ".."') + if len(topic) > TOPIC_MAX_LENGTH: + raise ValueError('Topic name is illegal, it can\'t be longer than {0} characters, topic: "{1}"'.format(TOPIC_MAX_LENGTH, topic)) + if not TOPIC_LEGAL_CHARS.match(topic): + raise ValueError('Topic name "{0}" is illegal, it contains a character other than ASCII alphanumerics, ".", "_" and "-"'.format(topic)) class WeakMethod(object): @@ -64,3 +130,11 @@ class Dict(dict): See: https://docs.python.org/2/library/weakref.html """ pass + + +def synchronized(func): + def wrapper(self, *args, **kwargs): + with self._lock: + return func(self, *args, **kwargs) + functools.update_wrapper(wrapper, func) + return wrapper diff --git a/kafka/version.py b/kafka/version.py index 83d888e17..298979870 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1 @@ -__version__ = '2.1.0.dev' +__version__ = '2.2.13' diff --git a/pyproject.toml b/pyproject.toml index 48be87ffd..d575a8959 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", @@ -37,7 +38,8 @@ crc32c = ["crc32c"] lz4 = ["lz4"] snappy = ["python-snappy"] zstd = ["zstandard"] -testing = ["pytest", "mock", "pytest-mock"] +testing = ["pytest", "mock; python_version < '3.3'", "pytest-mock", "pytest-timeout"] +benchmarks = ["pyperf"] [tool.setuptools] include-package-data = false diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..7fcb1f4a8 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +log_format = %(asctime)s.%(msecs)03d %(levelname)-8s %(thread)d:%(threadName)s %(name)-23s %(message)s +log_level = DEBUG +addopts = --durations=10 --timeout=300 diff --git a/requirements-dev.txt b/requirements-dev.txt index e272d1ff7..8de5e28d4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,16 +3,17 @@ crc32c docker-py flake8 lz4 -mock +mock; python_version < '3.3' py pylint +pyperf pytest pytest-cov pytest-mock pytest-pylint +pytest-timeout python-snappy Sphinx sphinx-rtd-theme -tox xxhash zstandard diff --git a/servers/2.4.0/resources/zookeeper.properties b/servers/2.4.0/resources/zookeeper.properties index e3fd09742..b146fac9e 100644 --- a/servers/2.4.0/resources/zookeeper.properties +++ b/servers/2.4.0/resources/zookeeper.properties @@ -19,3 +19,4 @@ clientPort={port} clientPortAddress={host} # disable the per-ip limit on the number of connections since this is a non-production config maxClientCnxns=0 +admin.enableServer=false diff --git a/servers/2.5.0/resources/zookeeper.properties b/servers/2.5.0/resources/zookeeper.properties index e3fd09742..b146fac9e 100644 --- a/servers/2.5.0/resources/zookeeper.properties +++ b/servers/2.5.0/resources/zookeeper.properties @@ -19,3 +19,4 @@ clientPort={port} clientPortAddress={host} # disable the per-ip limit on the number of connections since this is a non-production config maxClientCnxns=0 +admin.enableServer=false diff --git a/servers/2.6.0/resources/zookeeper.properties b/servers/2.6.0/resources/zookeeper.properties index e3fd09742..b146fac9e 100644 --- a/servers/2.6.0/resources/zookeeper.properties +++ b/servers/2.6.0/resources/zookeeper.properties @@ -19,3 +19,4 @@ clientPort={port} clientPortAddress={host} # disable the per-ip limit on the number of connections since this is a non-production config maxClientCnxns=0 +admin.enableServer=false diff --git a/servers/4.0.0/resources/kafka.properties b/servers/4.0.0/resources/kafka.properties new file mode 100644 index 000000000..3dba393ba --- /dev/null +++ b/servers/4.0.0/resources/kafka.properties @@ -0,0 +1,161 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################# Server Basics ############################# + +# The role of this server. Setting this puts us in KRaft mode +process.roles=broker,controller + +# The node id associated with this instance's roles +node.id={broker_id} + +# List of controller endpoints used connect to the controller cluster +controller.quorum.bootstrap.servers={controller_bootstrap_host}:{controller_port} + +############################# Socket Server Settings ############################# + +# The address the socket server listens on. +# Combined nodes (i.e. those with `process.roles=broker,controller`) must list the controller listener here at a minimum. +# If the broker listener is not defined, the default listener will use a host name that is equal to the value of java.net.InetAddress.getCanonicalHostName(), +# with PLAINTEXT listener name, and port 9092. +# FORMAT: +# listeners = listener_name://host_name:port +# EXAMPLE: +# listeners = PLAINTEXT://your.host.name:9092 +#listeners=PLAINTEXT://:9092,CONTROLLER://:9093 +listeners={transport}://{host}:{port},CONTROLLER://{host}:{controller_port} + +# Name of listener used for communication between brokers. +inter.broker.listener.name={transport} + +{sasl_config} + +authorizer.class.name=org.apache.kafka.metadata.authorizer.StandardAuthorizer +allow.everyone.if.no.acl.found=true + +# Listener name, hostname and port the broker or the controller will advertise to clients. +# If not set, it uses the value for "listeners". +advertised.listeners={transport}://{host}:{port},CONTROLLER://{host}:{controller_port} + +# A comma-separated list of the names of the listeners used by the controller. +# If no explicit mapping set in `listener.security.protocol.map`, default will be using PLAINTEXT protocol +# This is required if running in KRaft mode. +controller.listener.names=CONTROLLER + +# Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details +listener.security.protocol.map=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL + +# The number of threads that the server uses for receiving requests from the network and sending responses to the network +num.network.threads=3 + +# The number of threads that the server uses for processing requests, which may include disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma separated list of directories under which to store log files +log.dirs={tmp_dir}/kraft-combined-logs + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. +# This value is recommended to be increased for installations with data dirs located in RAID array. +num.recovery.threads.per.data.dir=1 + +############################# Internal Topic Settings ############################# +# The replication factor for the group metadata internal topics "__consumer_offsets", "__share_group_state" and "__transaction_state" +# For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. +offsets.topic.replication.factor=1 +share.coordinator.state.topic.replication.factor=1 +share.coordinator.state.topic.min.isr=1 +transaction.state.log.replication.factor=1 +transaction.state.log.min.isr=1 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion due to age +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log unless the remaining +# segments drop below log.retention.bytes. Functions independently of log.retention.hours. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + +############################# Group Coordinator Settings ############################# + +# The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. +# The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. +# The default value for this is 3 seconds. +# We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. +# However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. +group.initial.rebalance.delay.ms=0 diff --git a/servers/resources/default/zookeeper.properties b/servers/resources/default/zookeeper.properties index e3fd09742..b146fac9e 100644 --- a/servers/resources/default/zookeeper.properties +++ b/servers/resources/default/zookeeper.properties @@ -19,3 +19,4 @@ clientPort={port} clientPortAddress={host} # disable the per-ip limit on the number of connections since this is a non-production config maxClientCnxns=0 +admin.enableServer=false diff --git a/test/conftest.py b/test/conftest.py index d54a91243..b65593a86 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,127 +1,17 @@ from __future__ import absolute_import -import uuid - import pytest -from test.testutil import env_kafka_version, random_string -from test.fixtures import KafkaFixture, ZookeeperFixture - -@pytest.fixture(scope="module") -def zookeeper(): - """Return a Zookeeper fixture""" - zk_instance = ZookeeperFixture.instance() - yield zk_instance - zk_instance.close() - - -@pytest.fixture(scope="module") -def kafka_broker(kafka_broker_factory): - """Return a Kafka broker fixture""" - return kafka_broker_factory()[0] - - -@pytest.fixture(scope="module") -def kafka_broker_factory(zookeeper): - """Return a Kafka broker fixture factory""" - assert env_kafka_version(), 'KAFKA_VERSION must be specified to run integration tests' - - _brokers = [] - def factory(**broker_params): - params = {} if broker_params is None else broker_params.copy() - params.setdefault('partitions', 4) - num_brokers = params.pop('num_brokers', 1) - brokers = tuple(KafkaFixture.instance(x, zookeeper, **params) - for x in range(num_brokers)) - _brokers.extend(brokers) - return brokers - - yield factory - - for broker in _brokers: - broker.close() - - -@pytest.fixture -def kafka_client(kafka_broker, request): - """Return a KafkaClient fixture""" - (client,) = kafka_broker.get_clients(cnt=1, client_id='%s_client' % (request.node.name,)) - yield client - client.close() - - -@pytest.fixture -def kafka_consumer(kafka_consumer_factory): - """Return a KafkaConsumer fixture""" - return kafka_consumer_factory() - - -@pytest.fixture -def kafka_consumer_factory(kafka_broker, topic, request): - """Return a KafkaConsumer factory fixture""" - _consumer = [None] - - def factory(**kafka_consumer_params): - params = {} if kafka_consumer_params is None else kafka_consumer_params.copy() - params.setdefault('client_id', 'consumer_%s' % (request.node.name,)) - params.setdefault('auto_offset_reset', 'earliest') - _consumer[0] = next(kafka_broker.get_consumers(cnt=1, topics=[topic], **params)) - return _consumer[0] - - yield factory - - if _consumer[0]: - _consumer[0].close() - - -@pytest.fixture -def kafka_producer(kafka_producer_factory): - """Return a KafkaProducer fixture""" - yield kafka_producer_factory() - @pytest.fixture -def kafka_producer_factory(kafka_broker, request): - """Return a KafkaProduce factory fixture""" - _producer = [None] - - def factory(**kafka_producer_params): - params = {} if kafka_producer_params is None else kafka_producer_params.copy() - params.setdefault('client_id', 'producer_%s' % (request.node.name,)) - _producer[0] = next(kafka_broker.get_producers(cnt=1, **params)) - return _producer[0] +def metrics(): + from kafka.metrics import Metrics - yield factory - - if _producer[0]: - _producer[0].close() - -@pytest.fixture -def kafka_admin_client(kafka_admin_client_factory): - """Return a KafkaAdminClient fixture""" - yield kafka_admin_client_factory() - -@pytest.fixture -def kafka_admin_client_factory(kafka_broker): - """Return a KafkaAdminClient factory fixture""" - _admin_client = [None] - - def factory(**kafka_admin_client_params): - params = {} if kafka_admin_client_params is None else kafka_admin_client_params.copy() - _admin_client[0] = next(kafka_broker.get_admin_clients(cnt=1, **params)) - return _admin_client[0] - - yield factory - - if _admin_client[0]: - _admin_client[0].close() - -@pytest.fixture -def topic(kafka_broker, request): - """Return a topic fixture""" - topic_name = '%s_%s' % (request.node.name, random_string(10)) - kafka_broker.create_topics([topic_name]) - return topic_name + metrics = Metrics() + try: + yield metrics + finally: + metrics.close() @pytest.fixture @@ -153,25 +43,13 @@ def _set_conn_state(state): return conn -@pytest.fixture() -def send_messages(topic, kafka_producer, request): - """A factory that returns a send_messages function with a pre-populated - topic topic / producer.""" - - def _send_messages(number_range, partition=0, topic=topic, producer=kafka_producer, request=request): - """ - messages is typically `range(0,100)` - partition is an int - """ - messages_and_futures = [] # [(message, produce_future),] - for i in number_range: - # request.node.name provides the test name (including parametrized values) - encoded_msg = '{}-{}-{}'.format(i, request.node.name, uuid.uuid4()).encode('utf-8') - future = kafka_producer.send(topic, value=encoded_msg, partition=partition) - messages_and_futures.append((encoded_msg, future)) - kafka_producer.flush() - for (msg, f) in messages_and_futures: - assert f.succeeded() - return [msg for (msg, f) in messages_and_futures] +@pytest.fixture +def client(conn, mocker): + from kafka import KafkaClient - return _send_messages + cli = KafkaClient(api_version=(0, 9)) + mocker.patch.object(cli, '_init_connect', return_value=True) + try: + yield cli + finally: + cli._close() diff --git a/test/integration/__init__.py b/test/integration/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/integration/conftest.py b/test/integration/conftest.py new file mode 100644 index 000000000..8af729296 --- /dev/null +++ b/test/integration/conftest.py @@ -0,0 +1,168 @@ +from __future__ import absolute_import + +import os +import uuid + +import pytest + +from kafka.vendor.six.moves.urllib.parse import urlparse # pylint: disable=E0611,F0401 +from test.testutil import env_kafka_version, random_string +from test.integration.fixtures import KafkaFixture, ZookeeperFixture + + +@pytest.fixture(scope="module") +def zookeeper(): + """Return a Zookeeper fixture""" + if "ZOOKEEPER_URI" in os.environ: + parse = urlparse(os.environ["ZOOKEEPER_URI"]) + (host, port) = (parse.hostname, parse.port) + yield ZookeeperFixture.instance(host=host, port=port, external=True) + else: + zk_instance = ZookeeperFixture.instance() + yield zk_instance + zk_instance.close() + + +@pytest.fixture(scope="module") +def kafka_broker(kafka_broker_factory): + """Return a Kafka broker fixture""" + if "KAFKA_URI" in os.environ: + parse = urlparse(os.environ["KAFKA_URI"]) + (host, port) = (parse.hostname, parse.port) + return KafkaFixture.instance(0, host=host, port=port, external=True) + else: + return kafka_broker_factory() + + +@pytest.fixture(scope="module") +def kafka_broker_factory(): + """Return a Kafka broker fixture factory""" + assert env_kafka_version(), 'KAFKA_VERSION must be specified to run integration tests' + + _brokers = [] + def factory(**broker_params): + params = {} if broker_params is None else broker_params.copy() + params.setdefault('partitions', 4) + node_id = params.pop('node_id', 0) + broker = KafkaFixture.instance(node_id, **params) + _brokers.append(broker) + return broker + + yield factory + + zks = set() + for broker in _brokers: + zks.add(broker.zookeeper) + broker.close() + for zk in zks: + if zk: + zk.close() + + +@pytest.fixture +def kafka_client(kafka_broker, request): + """Return a KafkaClient fixture""" + (client,) = kafka_broker.get_clients(cnt=1, client_id='%s_client' % (request.node.name,)) + yield client + client.close() + + +@pytest.fixture +def kafka_consumer(kafka_consumer_factory): + """Return a KafkaConsumer fixture""" + return kafka_consumer_factory() + + +@pytest.fixture +def kafka_consumer_factory(kafka_broker, topic, request): + """Return a KafkaConsumer factory fixture""" + _consumer = [None] + + def factory(topics=(topic,), **kafka_consumer_params): + params = {} if kafka_consumer_params is None else kafka_consumer_params.copy() + params.setdefault('client_id', 'consumer_%s' % (request.node.name,)) + params.setdefault('auto_offset_reset', 'earliest') + _consumer[0] = next(kafka_broker.get_consumers(cnt=1, topics=list(topics), **params)) + return _consumer[0] + + yield factory + + if _consumer[0]: + _consumer[0].close() + + +@pytest.fixture +def kafka_producer(kafka_producer_factory): + """Return a KafkaProducer fixture""" + yield kafka_producer_factory() + + +@pytest.fixture +def kafka_producer_factory(kafka_broker, request): + """Return a KafkaProduce factory fixture""" + _producer = [None] + + def factory(**kafka_producer_params): + params = {} if kafka_producer_params is None else kafka_producer_params.copy() + params.setdefault('client_id', 'producer_%s' % (request.node.name,)) + _producer[0] = next(kafka_broker.get_producers(cnt=1, **params)) + return _producer[0] + + yield factory + + if _producer[0]: + _producer[0].close() + + +@pytest.fixture +def kafka_admin_client(kafka_admin_client_factory): + """Return a KafkaAdminClient fixture""" + yield kafka_admin_client_factory() + + +@pytest.fixture +def kafka_admin_client_factory(kafka_broker): + """Return a KafkaAdminClient factory fixture""" + _admin_client = [None] + + def factory(**kafka_admin_client_params): + params = {} if kafka_admin_client_params is None else kafka_admin_client_params.copy() + _admin_client[0] = next(kafka_broker.get_admin_clients(cnt=1, **params)) + return _admin_client[0] + + yield factory + + if _admin_client[0]: + _admin_client[0].close() + + +@pytest.fixture +def topic(kafka_broker, request): + """Return a topic fixture""" + topic_name = '%s_%s' % (request.node.name, random_string(10)) + kafka_broker.create_topics([topic_name]) + return topic_name + + +@pytest.fixture() +def send_messages(topic, kafka_producer, request): + """A factory that returns a send_messages function with a pre-populated + topic topic / producer.""" + + def _send_messages(number_range, partition=0, topic=topic, producer=kafka_producer, request=request): + """ + messages is typically `range(0,100)` + partition is an int + """ + messages_and_futures = [] # [(message, produce_future),] + for i in number_range: + # request.node.name provides the test name (including parametrized values) + encoded_msg = '{}-{}-{}'.format(i, request.node.name, uuid.uuid4()).encode('utf-8') + future = kafka_producer.send(topic, value=encoded_msg, partition=partition) + messages_and_futures.append((encoded_msg, future)) + kafka_producer.flush() + for (msg, f) in messages_and_futures: + assert f.succeeded() + return [msg for (msg, f) in messages_and_futures] + + return _send_messages diff --git a/test/fixtures.py b/test/integration/fixtures.py similarity index 82% rename from test/fixtures.py rename to test/integration/fixtures.py index f8e2aa746..b9baf5223 100644 --- a/test/fixtures.py +++ b/test/integration/fixtures.py @@ -1,6 +1,7 @@ from __future__ import absolute_import, division import atexit +import base64 import logging import os import os.path @@ -10,11 +11,11 @@ import uuid import py -from kafka.vendor.six.moves import urllib, range +from kafka.vendor.six.moves import range from kafka.vendor.six.moves.urllib.parse import urlparse # pylint: disable=E0611,F0401 from kafka import errors, KafkaAdminClient, KafkaClient, KafkaConsumer, KafkaProducer -from kafka.errors import InvalidReplicationFactorError +from kafka.errors import InvalidReplicationFactorError, KafkaTimeoutError from kafka.protocol.admin import CreateTopicsRequest from kafka.protocol.metadata import MetadataRequest from test.testutil import env_kafka_version, random_string @@ -65,7 +66,7 @@ class Fixture(object): kafka_version = os.environ.get('KAFKA_VERSION', '0.11.0.2') scala_version = os.environ.get("SCALA_VERSION", '2.8.0') project_root = os.environ.get('PROJECT_ROOT', - os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) kafka_root = os.environ.get("KAFKA_ROOT", os.path.join(project_root, 'servers', kafka_version, "kafka-bin")) @@ -74,43 +75,6 @@ def __init__(self): if not os.path.isdir(self.kafka_root): raise FileNotFoundError(self.kafka_root) - @classmethod - def download_official_distribution(cls, - kafka_version=None, - scala_version=None, - output_dir=None): - if not kafka_version: - kafka_version = cls.kafka_version - if not scala_version: - scala_version = cls.scala_version - if not output_dir: - output_dir = os.path.join(cls.project_root, 'servers', 'dist') - - distfile = 'kafka_%s-%s' % (scala_version, kafka_version,) - url_base = 'https://archive.apache.org/dist/kafka/%s/' % (kafka_version,) - output_file = os.path.join(output_dir, distfile + '.tgz') - - if os.path.isfile(output_file): - log.info("Found file already on disk: %s", output_file) - return output_file - - # New tarballs are .tgz, older ones are sometimes .tar.gz - try: - url = url_base + distfile + '.tgz' - log.info("Attempting to download %s", url) - response = urllib.request.urlopen(url) - except urllib.error.HTTPError: - log.exception("HTTP Error") - url = url_base + distfile + '.tar.gz' - log.info("Attempting to download %s", url) - response = urllib.request.urlopen(url) - - log.info("Saving distribution file to %s", output_file) - with open(output_file, 'w') as output_file_fd: - output_file_fd.write(response.read()) - - return output_file - @classmethod def test_resource(cls, filename): path = os.path.join(cls.project_root, "servers", cls.kafka_version, "resources", filename) @@ -169,23 +133,18 @@ def dump_logs(self): class ZookeeperFixture(Fixture): @classmethod - def instance(cls): - if "ZOOKEEPER_URI" in os.environ: - parse = urlparse(os.environ["ZOOKEEPER_URI"]) - (host, port) = (parse.hostname, parse.port) - fixture = ExternalService(host, port) - else: - (host, port) = ("127.0.0.1", None) - fixture = cls(host, port) - + def instance(cls, host=None, port=None, external=False): + if host is None: + host = "127.0.0.1" + fixture = cls(host, port, external=external) fixture.open() return fixture - def __init__(self, host, port, tmp_dir=None): + def __init__(self, host, port, external=False, tmp_dir=None): super(ZookeeperFixture, self).__init__() self.host = host self.port = port - + self.running = external self.tmp_dir = tmp_dir def kafka_run_class_env(self): @@ -198,6 +157,8 @@ def out(self, message): log.info("*** Zookeeper [%s:%s]: %s", self.host, self.port or '(auto)', message) def open(self): + if self.running: + return if self.tmp_dir is None: self.tmp_dir = py.path.local.mkdtemp() #pylint: disable=no-member self.tmp_dir.ensure(dir=True) @@ -261,40 +222,52 @@ class KafkaFixture(Fixture): broker_password = 'alice-secret' @classmethod - def instance(cls, broker_id, zookeeper, zk_chroot=None, - host=None, port=None, - transport='PLAINTEXT', replicas=1, partitions=2, + def instance(cls, broker_id, zookeeper=None, zk_chroot=None, + host="localhost", port=None, external=False, + transport='PLAINTEXT', replicas=1, partitions=4, sasl_mechanism=None, auto_create_topic=True, tmp_dir=None): - if zk_chroot is None: - zk_chroot = "kafka-python_" + str(uuid.uuid4()).replace("-", "_") - if "KAFKA_URI" in os.environ: - parse = urlparse(os.environ["KAFKA_URI"]) - (host, port) = (parse.hostname, parse.port) - fixture = ExternalService(host, port) - else: - if host is None: - host = "localhost" - fixture = KafkaFixture(host, port, broker_id, - zookeeper, zk_chroot, - transport=transport, - replicas=replicas, partitions=partitions, - sasl_mechanism=sasl_mechanism, - auto_create_topic=auto_create_topic, - tmp_dir=tmp_dir) - - fixture.open() + # Kafka requries zookeeper prior to 4.0 release + if env_kafka_version() < (4, 0): + if zookeeper is None: + if "ZOOKEEPER_URI" in os.environ: + parse = urlparse(os.environ["ZOOKEEPER_URI"]) + (host, port) = (parse.hostname, parse.port) + zookeeper = ZookeeperFixture.instance(host=host, port=port, external=True) + elif not external: + zookeeper = ZookeeperFixture.instance() + if zk_chroot is None: + zk_chroot = "kafka-python_" + str(uuid.uuid4()).replace("-", "_") + + fixture = KafkaFixture(host, port, broker_id, + zookeeper=zookeeper, zk_chroot=zk_chroot, + external=external, + transport=transport, + replicas=replicas, partitions=partitions, + sasl_mechanism=sasl_mechanism, + auto_create_topic=auto_create_topic, + tmp_dir=tmp_dir) + + fixture.open() return fixture - def __init__(self, host, port, broker_id, zookeeper, zk_chroot, + def __init__(self, host, port, broker_id, zookeeper=None, zk_chroot=None, replicas=1, partitions=2, transport='PLAINTEXT', sasl_mechanism=None, auto_create_topic=True, - tmp_dir=None): + tmp_dir=None, external=False): super(KafkaFixture, self).__init__() self.host = host - self.port = port + self.controller_bootstrap_host = host + if port is None: + self.auto_port = True + self.port = get_open_port() + else: + self.auto_port = False + self.port = port + self.controller_port = get_open_port() + self.cluster_id = self._gen_cluster_id() self.broker_id = broker_id self.auto_create_topic = auto_create_topic self.transport = transport.upper() @@ -307,26 +280,40 @@ def __init__(self, host, port, broker_id, zookeeper, zk_chroot, # TODO: checking for port connection would be better than scanning logs # until then, we need the pattern to work across all supported broker versions # The logging format changed slightly in 1.0.0 - self.start_pattern = r"\[Kafka ?Server (id=)?%d\],? started" % (broker_id,) - # Need to wait until the broker has fetched user configs from zookeeper in case we use scram as sasl mechanism - self.scram_pattern = r"Removing Produce quota for user %s" % (self.broker_user) + if env_kafka_version() < (4, 0): + self.start_pattern = r"\[Kafka ?Server (id=)?%d\],? started" % (broker_id,) + # Need to wait until the broker has fetched user configs from zookeeper in case we use scram as sasl mechanism + self.scram_pattern = r"Removing Produce quota for user %s" % (self.broker_user) + else: + self.start_pattern = r"\[KafkaRaftServer nodeId=%d\] Kafka Server started" % (broker_id,) + self.scram_pattern = r"Replayed UserScramCredentialRecord creating new entry for %s" % (self.broker_user,) self.zookeeper = zookeeper self.zk_chroot = zk_chroot # Add the attributes below for the template binding - self.zk_host = self.zookeeper.host - self.zk_port = self.zookeeper.port + self.zk_host = self.zookeeper.host if self.zookeeper else None + self.zk_port = self.zookeeper.port if self.zookeeper else None self.replicas = replicas self.partitions = partitions self.tmp_dir = tmp_dir - self.running = False + self.external = external + + if self.external: + self.child = ExternalService(self.host, self.port) + (self._client,) = self.get_clients(1, client_id='_internal_client') + self.running = True + else: + self._client = None + self.running = False - self._client = None self.sasl_config = '' self.jaas_config = '' + def _gen_cluster_id(self): + return base64.b64encode(uuid.uuid4().bytes).decode('utf-8').rstrip('=') + def _sasl_config(self): if not self.sasl_enabled: return '' @@ -416,6 +403,8 @@ def _create_zk_chroot(self): self.out("Kafka chroot created in Zookeeper!") def start(self): + if self.running: + return True # Configure Kafka child process properties = self.tmp_dir.join("kafka.properties") jaas_conf = self.tmp_dir.join("kafka_server_jaas.conf") @@ -436,12 +425,11 @@ def start(self): backoff = 1 end_at = time.time() + max_timeout tries = 1 - auto_port = (self.port is None) while time.time() < end_at: # We have had problems with port conflicts on travis # so we will try a different port on each retry # unless the fixture was passed a specific port - if auto_port: + if self.auto_port: self.port = get_open_port() self.out('Attempting to start on port %d (try #%d)' % (self.port, tries)) self.render_template(properties_template, properties, vars(self)) @@ -487,6 +475,9 @@ def open(self): self.tmp_dir.ensure(dir=True) self.tmp_dir.ensure('logs', dir=True) self.tmp_dir.ensure('data', dir=True) + properties = self.tmp_dir.join('kafka.properties') + properties_template = self.test_resource('kafka.properties') + self.render_template(properties_template, properties, vars(self)) self.out("Running local instance...") log.info(" host = %s", self.host) @@ -494,19 +485,26 @@ def open(self): log.info(" transport = %s", self.transport) log.info(" sasl_mechanism = %s", self.sasl_mechanism) log.info(" broker_id = %s", self.broker_id) - log.info(" zk_host = %s", self.zookeeper.host) - log.info(" zk_port = %s", self.zookeeper.port) + log.info(" zk_host = %s", self.zk_host) + log.info(" zk_port = %s", self.zk_port) log.info(" zk_chroot = %s", self.zk_chroot) log.info(" replicas = %s", self.replicas) log.info(" partitions = %s", self.partitions) log.info(" tmp_dir = %s", self.tmp_dir.strpath) - self._create_zk_chroot() + if self.zookeeper: + if self.zk_chroot: + self._create_zk_chroot() + # add user to zookeeper for the first server + if self.sasl_enabled and self.sasl_mechanism.startswith("SCRAM-SHA") and self.broker_id == 0: + self._add_scram_user() + + else: + # running in KRaft mode + self._format_log_dirs() + self.sasl_config = self._sasl_config() self.jaas_config = self._jaas_config() - # add user to zookeeper for the first server - if self.sasl_enabled and self.sasl_mechanism.startswith("SCRAM-SHA") and self.broker_id == 0: - self._add_scram_user() self.start() atexit.register(self.close) @@ -515,6 +513,8 @@ def __del__(self): self.close() def stop(self): + if self.external: + return if not self.running: self.out("Instance already stopped") return @@ -536,6 +536,21 @@ def dump_logs(self): super(KafkaFixture, self).dump_logs() self.zookeeper.dump_logs() + def _format_log_dirs(self): + self.out("Formatting log dirs for kraft bootstrapping") + args = self.run_script('kafka-storage.sh', 'format', '--standalone', '-t', self.cluster_id, '-c', self.tmp_dir.join("kafka.properties")) + if self.sasl_enabled and self.sasl_mechanism.startswith("SCRAM-SHA"): + args.extend(['--add-scram', '{}=[name={},password={}]'.format(self.sasl_mechanism, self.broker_user, self.broker_password)]) + env = self.kafka_run_class_env() + proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + if proc.returncode != 0: + self.out("Failed to format log dirs for kraft bootstrap!") + self.out(stdout) + self.out(stderr) + raise RuntimeError("Failed to format log dirs!") + return True + def _send_request(self, request, timeout=None): def _failure(error): raise error @@ -555,6 +570,8 @@ def _failure(error): future.error_on_callbacks = True future.add_errback(_failure) self._client.poll(future=future, timeout_ms=timeout) + if not future.is_done: + raise KafkaTimeoutError() return future.value except Exception as exc: time.sleep(1) @@ -573,8 +590,9 @@ def _create_topic(self, topic_name, num_partitions=None, replication_factor=None # Try different methods to create a topic, from the fastest to the slowest if self.auto_create_topic and num_partitions == self.partitions and replication_factor == self.replicas: self._create_topic_via_metadata(topic_name, timeout_ms) - elif env_kafka_version() >= (0, 10, 1, 0): + elif env_kafka_version() >= (0, 10, 1, 0) and env_kafka_version() < (4, 0): try: + # 4.0 brokers dropped support for CreateTopicsRequest v0 (TODO: pick from api_versions) self._create_topic_via_admin_api(topic_name, num_partitions, replication_factor, timeout_ms) except InvalidReplicationFactorError: # wait and try again @@ -718,8 +736,8 @@ def get_api_versions(): def run_brokers(): logging.basicConfig(level=logging.ERROR) - zk = ZookeeperFixture.instance() - k = KafkaFixture.instance(0, zk) + k = KafkaFixture.instance(0) + zk = k.zookeeper print("Kafka", k.kafka_version, "running on port:", k.port) try: @@ -728,7 +746,8 @@ def run_brokers(): except KeyboardInterrupt: print("Bye!") k.close() - zk.close() + if zk: + zk.close() if __name__ == '__main__': diff --git a/test/test_admin_integration.py b/test/integration/test_admin_integration.py similarity index 76% rename from test/test_admin_integration.py rename to test/integration/test_admin_integration.py index bd2fd216e..f95f367e8 100644 --- a/test/test_admin_integration.py +++ b/test/integration/test_admin_integration.py @@ -1,3 +1,4 @@ +from kafka.structs import TopicPartition import pytest from logging import info @@ -7,7 +8,9 @@ from kafka.admin import ( ACLFilter, ACLOperation, ACLPermissionType, ResourcePattern, ResourceType, ACL, ConfigResource, ConfigResourceType) -from kafka.errors import (NoError, GroupCoordinatorNotAvailableError, NonEmptyGroupError, GroupIdNotFoundError) +from kafka.errors import ( + BrokerResponseError, KafkaError, NoError, CoordinatorNotAvailableError, NonEmptyGroupError, + GroupIdNotFoundError, OffsetOutOfRangeError, UnknownTopicOrPartitionError) @pytest.mark.skipif(env_kafka_version() < (0, 11), reason="ACL features require broker >=0.11") @@ -140,15 +143,15 @@ def test_describe_configs_invalid_broker_id_raises(kafka_admin_client): broker_id = "str" with pytest.raises(ValueError): - configs = kafka_admin_client.describe_configs([ConfigResource(ConfigResourceType.BROKER, broker_id)]) + kafka_admin_client.describe_configs([ConfigResource(ConfigResourceType.BROKER, broker_id)]) @pytest.mark.skipif(env_kafka_version() < (0, 11), reason='Describe consumer group requires broker >=0.11') def test_describe_consumer_group_does_not_exist(kafka_admin_client): """Tests that the describe consumer group call fails if the group coordinator is not available """ - with pytest.raises(GroupCoordinatorNotAvailableError): - group_description = kafka_admin_client.describe_consumer_groups(['test']) + with pytest.raises(CoordinatorNotAvailableError): + kafka_admin_client.describe_consumer_groups(['test']) @pytest.mark.skipif(env_kafka_version() < (0, 11), reason='Describe consumer group requires broker >=0.11') @@ -315,3 +318,71 @@ def test_delete_consumergroups_with_errors(kafka_admin_client, kafka_consumer_fa assert group1 not in consumergroups assert group2 in consumergroups assert group3 not in consumergroups + +@pytest.fixture(name="topic2") +def _topic2(kafka_broker, request): + """Same as `topic` fixture, but a different name if you need to topics.""" + topic_name = '%s_%s' % (request.node.name, random_string(10)) + kafka_broker.create_topics([topic_name]) + return topic_name + +@pytest.mark.skipif(env_kafka_version() < (0, 11), reason="Delete records requires broker >=0.11.0") +def test_delete_records(kafka_admin_client, kafka_consumer_factory, send_messages, topic, topic2): + t0p0 = TopicPartition(topic, 0) + t0p1 = TopicPartition(topic, 1) + t0p2 = TopicPartition(topic, 2) + t1p0 = TopicPartition(topic2, 0) + t1p1 = TopicPartition(topic2, 1) + t1p2 = TopicPartition(topic2, 2) + + partitions = (t0p0, t0p1, t0p2, t1p0, t1p1, t1p2) + + for p in partitions: + send_messages(range(0, 100), partition=p.partition, topic=p.topic) + + consumer1 = kafka_consumer_factory(group_id=None, topics=()) + consumer1.assign(partitions) + for _ in range(600): + next(consumer1) + + result = kafka_admin_client.delete_records({t0p0: -1, t0p1: 50, t1p0: 40, t1p2: 30}, timeout_ms=1000) + assert result[t0p0] == {"low_watermark": 100, "error_code": 0, "partition_index": t0p0.partition} + assert result[t0p1] == {"low_watermark": 50, "error_code": 0, "partition_index": t0p1.partition} + assert result[t1p0] == {"low_watermark": 40, "error_code": 0, "partition_index": t1p0.partition} + assert result[t1p2] == {"low_watermark": 30, "error_code": 0, "partition_index": t1p2.partition} + + consumer2 = kafka_consumer_factory(group_id=None, topics=()) + consumer2.assign(partitions) + all_messages = consumer2.poll(max_records=600, timeout_ms=2000) + assert sum(len(x) for x in all_messages.values()) == 600 - 100 - 50 - 40 - 30 + assert not consumer2.poll(max_records=1, timeout_ms=1000) # ensure there are no delayed messages + + assert not all_messages.get(t0p0, []) + assert [r.offset for r in all_messages[t0p1]] == list(range(50, 100)) + assert [r.offset for r in all_messages[t0p2]] == list(range(100)) + + assert [r.offset for r in all_messages[t1p0]] == list(range(40, 100)) + assert [r.offset for r in all_messages[t1p1]] == list(range(100)) + assert [r.offset for r in all_messages[t1p2]] == list(range(30, 100)) + + +@pytest.mark.skipif(env_kafka_version() < (0, 11), reason="Delete records requires broker >=0.11.0") +def test_delete_records_with_errors(kafka_admin_client, topic, send_messages): + sleep(1) # sometimes the topic is not created yet...? + p0 = TopicPartition(topic, 0) + p1 = TopicPartition(topic, 1) + p2 = TopicPartition(topic, 2) + # verify that topic has been created + send_messages(range(0, 1), partition=p2.partition, topic=p2.topic) + + with pytest.raises(UnknownTopicOrPartitionError): + kafka_admin_client.delete_records({TopicPartition(topic, 9999): -1}) + with pytest.raises(UnknownTopicOrPartitionError): + kafka_admin_client.delete_records({TopicPartition("doesntexist", 0): -1}) + with pytest.raises(OffsetOutOfRangeError): + kafka_admin_client.delete_records({p0: 1000}) + with pytest.raises(BrokerResponseError): + kafka_admin_client.delete_records({p0: 1000, p1: 1000}) + + + diff --git a/test/test_consumer_group.py b/test/integration/test_consumer_group.py similarity index 60% rename from test/test_consumer_group.py rename to test/integration/test_consumer_group.py index c1ef978e2..eed570074 100644 --- a/test/test_consumer_group.py +++ b/test/integration/test_consumer_group.py @@ -47,7 +47,7 @@ def test_group(kafka_broker, topic): consumers = {} stop = {} threads = {} - messages = collections.defaultdict(list) + messages = collections.defaultdict(lambda: collections.defaultdict(list)) group_id = 'test-group-' + random_string(6) def consumer_thread(i): assert i not in consumers @@ -56,58 +56,62 @@ def consumer_thread(i): consumers[i] = KafkaConsumer(topic, bootstrap_servers=connect_str, group_id=group_id, + client_id="consumer_thread-%s" % i, + api_version_auto_timeout_ms=5000, heartbeat_interval_ms=500) while not stop[i].is_set(): - for tp, records in six.itervalues(consumers[i].poll(timeout_ms=200)): + for tp, records in six.iteritems(consumers[i].poll(timeout_ms=200)): messages[i][tp].extend(records) - consumers[i].close() + consumers[i].close(timeout_ms=500) consumers[i] = None stop[i] = None num_consumers = 4 for i in range(num_consumers): t = threading.Thread(target=consumer_thread, args=(i,)) + t.daemon = True t.start() threads[i] = t try: - timeout = time.time() + 35 + timeout = time.time() + 15 while True: - for c in range(num_consumers): - - # Verify all consumers have been created - if c not in consumers: - break - - # Verify all consumers have an assignment - elif not consumers[c].assignment(): - break + assert time.time() < timeout, "timeout waiting for assignments" + # Verify all consumers have been created + missing_consumers = set(consumers.keys()) - set(range(num_consumers)) + if missing_consumers: + logging.info('Waiting on consumer threads: %s', missing_consumers) + time.sleep(1) + continue + + unassigned_consumers = {c for c, consumer in six.iteritems(consumers) if not consumer.assignment()} + if unassigned_consumers: + logging.info('Waiting for consumer assignments: %s', unassigned_consumers) + time.sleep(1) + continue # If all consumers exist and have an assignment + logging.info('All consumers have assignment... checking for stable group') + # Verify all consumers are in the same generation + # then log state and break while loop + generations = set([consumer._coordinator._generation.generation_id + for consumer in six.itervalues(consumers)]) + + # New generation assignment is not complete until + # coordinator.rejoining = False + rejoining = set([c for c, consumer in six.iteritems(consumers) if consumer._coordinator.rejoining]) + + if not rejoining and len(generations) == 1: + for c, consumer in six.iteritems(consumers): + logging.info("[%s] %s %s: %s", c, + consumer._coordinator._generation.generation_id, + consumer._coordinator._generation.member_id, + consumer.assignment()) + break else: - - logging.info('All consumers have assignment... checking for stable group') - # Verify all consumers are in the same generation - # then log state and break while loop - generations = set([consumer._coordinator._generation.generation_id - for consumer in list(consumers.values())]) - - # New generation assignment is not complete until - # coordinator.rejoining = False - rejoining = any([consumer._coordinator.rejoining - for consumer in list(consumers.values())]) - - if not rejoining and len(generations) == 1: - for c, consumer in list(consumers.items()): - logging.info("[%s] %s %s: %s", c, - consumer._coordinator._generation.generation_id, - consumer._coordinator._generation.member_id, - consumer.assignment()) - break - else: - logging.info('Rejoining: %s, generations: %s', rejoining, generations) - time.sleep(1) - assert time.time() < timeout, "timeout waiting for assignments" + logging.info('Rejoining: %s, generations: %s', rejoining, generations) + time.sleep(1) + continue logging.info('Group stabilized; verifying assignment') group_assignment = set() @@ -121,12 +125,27 @@ def consumer_thread(i): for partition in range(num_partitions)]) logging.info('Assignment looks good!') + logging.info('Verifying heartbeats') + while True: + for c in range(num_consumers): + heartbeat = consumers[c]._coordinator.heartbeat + last_hb = time.time() - 0.5 + if (heartbeat.heartbeat_failed or + heartbeat.last_receive < last_hb or + heartbeat.last_reset > last_hb): + time.sleep(0.1) + continue + else: + break + logging.info('Heartbeats look good') + finally: logging.info('Shutting down %s consumers', num_consumers) for c in range(num_consumers): logging.info('Stopping consumer %s', c) stop[c].set() - threads[c].join() + threads[c].join(timeout=5) + assert not threads[c].is_alive() threads[c] = None @@ -158,22 +177,32 @@ def test_heartbeat_thread(kafka_broker, topic): heartbeat_interval_ms=500) # poll until we have joined group / have assignment + start = time.time() while not consumer.assignment(): consumer.poll(timeout_ms=100) assert consumer._coordinator.state is MemberState.STABLE last_poll = consumer._coordinator.heartbeat.last_poll - last_beat = consumer._coordinator.heartbeat.last_send + + # wait until we receive first heartbeat + while consumer._coordinator.heartbeat.last_receive < start: + time.sleep(0.1) + + last_send = consumer._coordinator.heartbeat.last_send + last_recv = consumer._coordinator.heartbeat.last_receive + assert last_poll > start + assert last_send > start + assert last_recv > start timeout = time.time() + 30 while True: if time.time() > timeout: raise RuntimeError('timeout waiting for heartbeat') - if consumer._coordinator.heartbeat.last_send > last_beat: + if consumer._coordinator.heartbeat.last_receive > last_recv: break time.sleep(0.5) assert consumer._coordinator.heartbeat.last_poll == last_poll consumer.poll(timeout_ms=100) assert consumer._coordinator.heartbeat.last_poll > last_poll - consumer.close() + consumer.close(timeout_ms=100) diff --git a/test/test_consumer_integration.py b/test/integration/test_consumer_integration.py similarity index 96% rename from test/test_consumer_integration.py rename to test/integration/test_consumer_integration.py index 5aeb63d1d..71cf2642d 100644 --- a/test/test_consumer_integration.py +++ b/test/integration/test_consumer_integration.py @@ -1,12 +1,15 @@ import logging import time -from mock import patch, ANY +try: + from unittest.mock import patch, ANY +except ImportError: + from mock import patch, ANY import pytest from kafka.vendor.six.moves import range import kafka.codec -from kafka.errors import UnsupportedCodecError, UnsupportedVersionError +from kafka.errors import KafkaTimeoutError, UnsupportedCodecError, UnsupportedVersionError from kafka.structs import TopicPartition, OffsetAndTimestamp from test.testutil import Timer, assert_message_count, env_kafka_version, random_string @@ -65,8 +68,8 @@ def test_kafka_consumer_unsupported_encoding( def test_kafka_consumer__blocking(kafka_consumer_factory, topic, send_messages): TIMEOUT_MS = 500 consumer = kafka_consumer_factory(auto_offset_reset='earliest', - enable_auto_commit=False, - consumer_timeout_ms=TIMEOUT_MS) + enable_auto_commit=False, + consumer_timeout_ms=TIMEOUT_MS) # Manual assignment avoids overhead of consumer group mgmt consumer.unsubscribe() @@ -297,4 +300,5 @@ def test_kafka_consumer_offsets_for_times_errors(kafka_consumer_factory, topic): with pytest.raises(ValueError): consumer.offsets_for_times({tp: -1}) - assert consumer.offsets_for_times({bad_tp: 0}) == {bad_tp: None} + with pytest.raises(KafkaTimeoutError): + consumer.offsets_for_times({bad_tp: 0}) diff --git a/test/integration/test_producer_integration.py b/test/integration/test_producer_integration.py new file mode 100644 index 000000000..037a82834 --- /dev/null +++ b/test/integration/test_producer_integration.py @@ -0,0 +1,207 @@ +from __future__ import absolute_import + +from contextlib import contextmanager +import platform +import time + +import pytest + +from kafka import KafkaAdminClient, KafkaConsumer, KafkaProducer, TopicPartition, OffsetAndMetadata +from test.testutil import env_kafka_version, random_string, maybe_skip_unsupported_compression + + +@contextmanager +def producer_factory(**kwargs): + producer = KafkaProducer(**kwargs) + try: + yield producer + finally: + producer.close(timeout=1) + + +@contextmanager +def consumer_factory(**kwargs): + consumer = KafkaConsumer(**kwargs) + try: + yield consumer + finally: + consumer.close(timeout_ms=100) + + +@contextmanager +def admin_factory(**kwargs): + admin = KafkaAdminClient(**kwargs) + try: + yield admin + finally: + admin.close() + + +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +@pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4', 'zstd']) +def test_end_to_end(kafka_broker, compression): + maybe_skip_unsupported_compression(compression) + if compression == 'lz4': + if env_kafka_version() < (0, 8, 2): + pytest.skip('LZ4 requires 0.8.2') + elif platform.python_implementation() == 'PyPy': + pytest.skip('python-lz4 crashes on older versions of pypy') + + if compression == 'zstd' and env_kafka_version() < (2, 1, 0): + pytest.skip('zstd requires kafka 2.1.0 or newer') + + connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) + producer_args = { + 'bootstrap_servers': connect_str, + 'retries': 5, + 'max_block_ms': 30000, + 'compression_type': compression, + 'value_serializer': str.encode, + } + consumer_args = { + 'bootstrap_servers': connect_str, + 'group_id': None, + 'consumer_timeout_ms': 30000, + 'auto_offset_reset': 'earliest', + 'value_deserializer': bytes.decode, + } + with producer_factory(**producer_args) as producer, consumer_factory(**consumer_args) as consumer: + topic = random_string(5) + + messages = 100 + futures = [] + for i in range(messages): + futures.append(producer.send(topic, 'msg %d' % i)) + ret = [f.get(timeout=30) for f in futures] + assert len(ret) == messages + + consumer.subscribe([topic]) + msgs = set() + for i in range(messages): + try: + msgs.add(next(consumer).value) + except StopIteration: + break + + assert msgs == set(['msg %d' % (i,) for i in range(messages)]) + + +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") +@pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4', 'zstd']) +def test_kafka_producer_proper_record_metadata(kafka_broker, compression): + maybe_skip_unsupported_compression(compression) + if compression == 'zstd' and env_kafka_version() < (2, 1, 0): + pytest.skip('zstd requires 2.1.0 or more') + connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) + with producer_factory(bootstrap_servers=connect_str, + retries=5, + max_block_ms=30000, + compression_type=compression) as producer: + magic = producer.max_usable_produce_magic(producer.config['api_version']) + + # record headers are supported in 0.11.0 + if env_kafka_version() < (0, 11, 0): + headers = None + else: + headers = [("Header Key", b"Header Value")] + + topic = random_string(5) + future = producer.send( + topic, + value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, + partition=0) + record = future.get(timeout=5) + assert record is not None + assert record.topic == topic + assert record.partition == 0 + assert record.topic_partition == TopicPartition(topic, 0) + assert record.offset == 0 + if magic >= 1: + assert record.timestamp == 9999999 + else: + assert record.timestamp == -1 # NO_TIMESTAMP + + if magic >= 2: + assert record.checksum is None + elif magic == 1: + assert record.checksum == 1370034956 + else: + assert record.checksum == 3296137851 + + assert record.serialized_key_size == 10 + assert record.serialized_value_size == 12 + if headers: + assert record.serialized_header_size == 22 + + if magic == 0: + pytest.skip('generated timestamp case is skipped for broker 0.9 and below') + send_time = time.time() * 1000 + future = producer.send( + topic, + value=b"Simple value", key=b"Simple key", timestamp_ms=None, + partition=0) + record = future.get(timeout=5) + assert abs(record.timestamp - send_time) <= 1000 # Allow 1s deviation + + +@pytest.mark.skipif(env_kafka_version() < (0, 11), reason="Idempotent producer requires broker >=0.11") +def test_idempotent_producer(kafka_broker): + connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) + with producer_factory(bootstrap_servers=connect_str, enable_idempotence=True) as producer: + for _ in range(10): + producer.send('idempotent_test_topic', value=b'idempotent_msg').get(timeout=1) + + +@pytest.mark.skipif(env_kafka_version() < (0, 11), reason="Idempotent producer requires broker >=0.11") +def test_transactional_producer_messages(kafka_broker): + connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) + with producer_factory(bootstrap_servers=connect_str, transactional_id='testing') as producer: + producer.init_transactions() + producer.begin_transaction() + producer.send('transactional_test_topic', partition=0, value=b'msg1').get() + producer.send('transactional_test_topic', partition=0, value=b'msg2').get() + producer.abort_transaction() + producer.begin_transaction() + producer.send('transactional_test_topic', partition=0, value=b'msg3').get() + producer.send('transactional_test_topic', partition=0, value=b'msg4').get() + producer.commit_transaction() + + messages = set() + consumer_opts = { + 'bootstrap_servers': connect_str, + 'group_id': None, + 'consumer_timeout_ms': 10000, + 'auto_offset_reset': 'earliest', + 'isolation_level': 'read_committed', + } + with consumer_factory(**consumer_opts) as consumer: + consumer.assign([TopicPartition('transactional_test_topic', 0)]) + for msg in consumer: + assert msg.value in {b'msg3', b'msg4'} + messages.add(msg.value) + if messages == {b'msg3', b'msg4'}: + break + assert messages == {b'msg3', b'msg4'} + + +@pytest.mark.skipif(env_kafka_version() < (0, 11), reason="Idempotent producer requires broker >=0.11") +def test_transactional_producer_offsets(kafka_broker): + connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) + # Setting leader_epoch only supported in 2.1+ + if env_kafka_version() >= (2, 1): + leader_epoch = 0 + else: + leader_epoch = -1 + offsets = {TopicPartition('transactional_test_topic', 0): OffsetAndMetadata(0, 'metadata', leader_epoch)} + with producer_factory(bootstrap_servers=connect_str, transactional_id='testing') as producer: + producer.init_transactions() + producer.begin_transaction() + producer.send_offsets_to_transaction(offsets, 'txn-test-group') + producer.commit_transaction() + + producer.begin_transaction() + producer.send_offsets_to_transaction({TopicPartition('transactional_test_topic', 1): OffsetAndMetadata(1, 'bad', 1)}, 'txn-test-group') + producer.abort_transaction() + + with admin_factory(bootstrap_servers=connect_str) as admin: + assert admin.list_consumer_group_offsets('txn-test-group') == offsets diff --git a/test/test_sasl_integration.py b/test/integration/test_sasl_integration.py similarity index 98% rename from test/test_sasl_integration.py rename to test/integration/test_sasl_integration.py index 0f404da20..69323fb92 100644 --- a/test/test_sasl_integration.py +++ b/test/integration/test_sasl_integration.py @@ -25,7 +25,7 @@ ] ) def sasl_kafka(request, kafka_broker_factory): - sasl_kafka = kafka_broker_factory(transport="SASL_PLAINTEXT", sasl_mechanism=request.param)[0] + sasl_kafka = kafka_broker_factory(transport="SASL_PLAINTEXT", sasl_mechanism=request.param) yield sasl_kafka sasl_kafka.child.dump_logs() diff --git a/test/record/test_default_records.py b/test/record/test_default_records.py index c3a7b02c8..540705d50 100644 --- a/test/record/test_default_records.py +++ b/test/record/test_default_records.py @@ -1,13 +1,18 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import pytest -from mock import patch +try: + from unittest.mock import patch +except ImportError: + from mock import patch import kafka.codec from kafka.record.default_records import ( DefaultRecordBatch, DefaultRecordBatchBuilder ) from kafka.errors import UnsupportedCodecError +from test.testutil import maybe_skip_unsupported_compression + @pytest.mark.parametrize("compression_type", [ DefaultRecordBatch.CODEC_NONE, @@ -16,6 +21,7 @@ DefaultRecordBatch.CODEC_LZ4 ]) def test_read_write_serde_v2(compression_type): + maybe_skip_unsupported_compression(compression_type) builder = DefaultRecordBatchBuilder( magic=2, compression_type=compression_type, is_transactional=1, producer_id=123456, producer_epoch=123, base_sequence=9999, @@ -51,8 +57,8 @@ def test_written_bytes_equals_size_in_bytes_v2(): producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=999999) - size_in_bytes = builder.size_in_bytes( - 0, timestamp=9999999, key=key, value=value, headers=headers) + size_in_bytes = DefaultRecordBatchBuilder.size_in_bytes( + offset_delta=0, timestamp_delta=0, key=key, value=value, headers=headers) pos = builder.size() meta = builder.append( @@ -183,6 +189,8 @@ def test_default_batch_size_limit(): ]) @pytest.mark.parametrize("magic", [0, 1]) def test_unavailable_codec(magic, compression_type, name, checker_name): + if not getattr(kafka.codec, checker_name)(): + pytest.skip('%s compression_type not installed' % (compression_type,)) builder = DefaultRecordBatchBuilder( magic=2, compression_type=compression_type, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, diff --git a/test/record/test_legacy_records.py b/test/record/test_legacy_records.py index 43970f7c9..c692d35a1 100644 --- a/test/record/test_legacy_records.py +++ b/test/record/test_legacy_records.py @@ -1,12 +1,17 @@ from __future__ import unicode_literals import pytest -from mock import patch +try: + from unittest.mock import patch +except ImportError: + from mock import patch from kafka.record.legacy_records import ( LegacyRecordBatch, LegacyRecordBatchBuilder ) import kafka.codec from kafka.errors import UnsupportedCodecError +from test.testutil import maybe_skip_unsupported_compression + @pytest.mark.parametrize("magic", [0, 1]) def test_read_write_serde_v0_v1_no_compression(magic): @@ -36,6 +41,7 @@ def test_read_write_serde_v0_v1_no_compression(magic): ]) @pytest.mark.parametrize("magic", [0, 1]) def test_read_write_serde_v0_v1_with_compression(compression_type, magic): + maybe_skip_unsupported_compression(compression_type) builder = LegacyRecordBatchBuilder( magic=magic, compression_type=compression_type, batch_size=9999999) for offset in range(10): @@ -176,6 +182,7 @@ def test_legacy_batch_size_limit(magic): ]) @pytest.mark.parametrize("magic", [0, 1]) def test_unavailable_codec(magic, compression_type, name, checker_name): + maybe_skip_unsupported_compression(compression_type) builder = LegacyRecordBatchBuilder( magic=magic, compression_type=compression_type, batch_size=1024) builder.append(0, timestamp=None, key=None, value=b"M") diff --git a/test/record/test_records.py b/test/record/test_records.py index cab95922d..65010d88f 100644 --- a/test/record/test_records.py +++ b/test/record/test_records.py @@ -2,7 +2,9 @@ from __future__ import unicode_literals import pytest from kafka.record import MemoryRecords, MemoryRecordsBuilder -from kafka.errors import CorruptRecordException +from kafka.errors import CorruptRecordError + +from test.testutil import maybe_skip_unsupported_compression # This is real live data from Kafka 11 broker record_batch_data_v2 = [ @@ -172,13 +174,14 @@ def test_memory_records_corrupt(): b"\x00\x00\x00\x03" # Length=3 b"\xfe\xb0\x1d", # Some random bytes ) - with pytest.raises(CorruptRecordException): + with pytest.raises(CorruptRecordError): records.next_batch() @pytest.mark.parametrize("compression_type", [0, 1, 2, 3]) @pytest.mark.parametrize("magic", [0, 1, 2]) def test_memory_records_builder(magic, compression_type): + maybe_skip_unsupported_compression(compression_type) builder = MemoryRecordsBuilder( magic=magic, compression_type=compression_type, batch_size=1024 * 10) base_size = builder.size_in_bytes() # V2 has a header before diff --git a/test/sasl/test_gssapi.py b/test/sasl/test_gssapi.py new file mode 100644 index 000000000..aa1d86b9e --- /dev/null +++ b/test/sasl/test_gssapi.py @@ -0,0 +1,42 @@ +from __future__ import absolute_import + +try: + from unittest import mock +except ImportError: + import mock + +from kafka.sasl import get_sasl_mechanism +import kafka.sasl.gssapi + + +def test_gssapi(): + config = { + 'sasl_kerberos_domain_name': 'foo', + 'sasl_kerberos_service_name': 'bar', + } + client_ctx = mock.Mock() + client_ctx.step.side_effect = [b'init', b'exchange', b'complete', b'xxxx'] + client_ctx.complete = False + def mocked_message_wrapper(msg, *args): + wrapped = mock.Mock() + type(wrapped).message = mock.PropertyMock(return_value=msg) + return wrapped + client_ctx.unwrap.side_effect = mocked_message_wrapper + client_ctx.wrap.side_effect = mocked_message_wrapper + kafka.sasl.gssapi.gssapi = mock.Mock() + kafka.sasl.gssapi.gssapi.SecurityContext.return_value = client_ctx + gssapi = get_sasl_mechanism('GSSAPI')(**config) + assert isinstance(gssapi, kafka.sasl.gssapi.SaslMechanismGSSAPI) + client_ctx.step.assert_called_with(None) + + while not gssapi.is_done(): + send_token = gssapi.auth_bytes() + receive_token = send_token # not realistic, but enough for testing + if send_token == b'\x01ompletebar@foo': # final wrapped message + receive_token = b'' # final message gets an empty response + gssapi.receive(receive_token) + if client_ctx.step.call_count == 3: + client_ctx.complete = True + + assert gssapi.is_done() + assert gssapi.is_authenticated() diff --git a/test/sasl/test_msk.py b/test/sasl/test_msk.py new file mode 100644 index 000000000..f3cc46ce8 --- /dev/null +++ b/test/sasl/test_msk.py @@ -0,0 +1,85 @@ +import datetime +import json +import sys + +from kafka.sasl.msk import AwsMskIamClient, SaslMechanismAwsMskIam + +try: + from unittest import mock +except ImportError: + import mock + + +def client_factory(token=None): + if sys.version_info >= (3, 3): + now = datetime.datetime.fromtimestamp(1629321911, datetime.timezone.utc) + else: + now = datetime.datetime.utcfromtimestamp(1629321911) + with mock.patch('kafka.sasl.msk.datetime') as mock_dt: + mock_dt.datetime.utcnow = mock.Mock(return_value=now) + return AwsMskIamClient( + host='localhost', + access_key='XXXXXXXXXXXXXXXXXXXX', + secret_key='XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', + region='us-east-1', + token=token, + ) + + +def test_aws_msk_iam_client_permanent_credentials(): + client = client_factory(token=None) + msg = client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg) + + expected = { + 'version': '2020_10_22', + 'host': 'localhost', + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': 'XXXXXXXXXXXXXXXXXXXX/20210818/us-east-1/kafka-cluster/aws4_request', + 'x-amz-date': '20210818T212511Z', + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': '0fa42ae3d5693777942a7a4028b564f0b372bafa2f71c1a19ad60680e6cb994b', + } + assert actual == expected + + +def test_aws_msk_iam_client_temporary_credentials(): + client = client_factory(token='XXXXX') + msg = client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg) + + expected = { + 'version': '2020_10_22', + 'host': 'localhost', + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': 'XXXXXXXXXXXXXXXXXXXX/20210818/us-east-1/kafka-cluster/aws4_request', + 'x-amz-date': '20210818T212511Z', + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': 'b0619c50b7ecb4a7f6f92bd5f733770df5710e97b25146f97015c0b1db783b05', + 'x-amz-security-token': 'XXXXX', + } + assert actual == expected + + +def test_aws_msk_iam_sasl_mechanism(): + with mock.patch('kafka.sasl.msk.BotoSession'): + sasl = SaslMechanismAwsMskIam(security_protocol='SASL_SSL', host='localhost') + with mock.patch.object(sasl, '_build_client', return_value=client_factory(token=None)): + assert sasl.auth_bytes() != b'' + assert not sasl.is_done() + assert not sasl.is_authenticated() + sasl.receive(b'foo') + assert sasl._auth == 'foo' + assert sasl.is_done() + assert sasl.is_authenticated() + assert sasl.auth_details() diff --git a/test/service.py b/test/service.py index e4e89f8fe..a53fab8da 100644 --- a/test/service.py +++ b/test/service.py @@ -29,6 +29,11 @@ def open(self): def close(self): pass + def dump_logs(self): + pass + + def wait_for(self, pattern, timeout=30): + pass class SpawnedService(threading.Thread): def __init__(self, args=None, env=None): @@ -52,8 +57,8 @@ def __init__(self, args=None, env=None): log.debug(" {key}={value}".format(key=key, value=value)) def _spawn(self): - if self.alive: return - if self.child and self.child.poll() is None: return + if self.alive or (self.child and self.child.poll() is None): + return self.child = subprocess.Popen( self.args, @@ -76,6 +81,7 @@ def _despawn(self): else: self.child.kill() + # via threading.Thread def run(self): self._spawn() while True: diff --git a/test/test_admin.py b/test/test_admin.py index 279f85abf..cdb74242e 100644 --- a/test/test_admin.py +++ b/test/test_admin.py @@ -6,7 +6,7 @@ def test_config_resource(): with pytest.raises(KeyError): - bad_resource = kafka.admin.ConfigResource('something', 'foo') + _bad_resource = kafka.admin.ConfigResource('something', 'foo') good_resource = kafka.admin.ConfigResource('broker', 'bar') assert good_resource.resource_type == kafka.admin.ConfigResourceType.BROKER assert good_resource.name == 'bar' @@ -59,11 +59,11 @@ def test_acl_resource(): def test_new_topic(): with pytest.raises(IllegalArgumentError): - bad_topic = kafka.admin.NewTopic('foo', -1, -1) + _bad_topic = kafka.admin.NewTopic('foo', -1, -1) with pytest.raises(IllegalArgumentError): - bad_topic = kafka.admin.NewTopic('foo', 1, -1) + _bad_topic = kafka.admin.NewTopic('foo', 1, -1) with pytest.raises(IllegalArgumentError): - bad_topic = kafka.admin.NewTopic('foo', 1, 1, {1: [1, 1, 1]}) + _bad_topic = kafka.admin.NewTopic('foo', 1, 1, {1: [1, 1, 1]}) good_topic = kafka.admin.NewTopic('foo', 1, 2) assert good_topic.name == 'foo' assert good_topic.num_partitions == 1 diff --git a/test/test_client_async.py b/test/test_client_async.py index 015f39365..acc400f9c 100644 --- a/test/test_client_async.py +++ b/test/test_client_async.py @@ -23,16 +23,33 @@ @pytest.fixture -def cli(mocker, conn): +def client_poll_mocked(mocker): + cli = KafkaClient(request_timeout_ms=9999999, + reconnect_backoff_ms=2222, + connections_max_idle_ms=float('inf'), + api_version=(0, 9)) + mocker.patch.object(cli, '_poll') + ttl = mocker.patch.object(cli.cluster, 'ttl') + ttl.return_value = 0 + try: + yield cli + finally: + cli._close() + + +@pytest.fixture +def client_selector_mocked(mocker, conn): client = KafkaClient(api_version=(0, 9)) mocker.patch.object(client, '_selector') client.poll(future=client.cluster.request_update()) - return client - + try: + yield client + finally: + client._close() def test_bootstrap(mocker, conn): conn.state = ConnectionStates.CONNECTED - cli = KafkaClient(api_version=(0, 9)) + cli = KafkaClient(api_version=(2, 1)) mocker.patch.object(cli, '_selector') future = cli.cluster.request_update() cli.poll(future=future) @@ -43,207 +60,206 @@ def test_bootstrap(mocker, conn): kwargs.pop('state_change_callback') kwargs.pop('node_id') assert kwargs == cli.config - conn.send.assert_called_once_with(MetadataRequest[0]([]), blocking=False, request_timeout_ms=None) + conn.send.assert_called_once_with(MetadataRequest[7]([], True), blocking=False, request_timeout_ms=None) assert cli._bootstrap_fails == 0 assert cli.cluster.brokers() == set([BrokerMetadata(0, 'foo', 12, None), BrokerMetadata(1, 'bar', 34, None)]) -def test_can_connect(cli, conn): +def test_can_connect(client_selector_mocked, conn): # Node is not in broker metadata - can't connect - assert not cli._can_connect(2) + assert not client_selector_mocked._can_connect(2) # Node is in broker metadata but not in _conns - assert 0 not in cli._conns - assert cli._can_connect(0) + assert 0 not in client_selector_mocked._conns + assert client_selector_mocked._can_connect(0) # Node is connected, can't reconnect - assert cli._init_connect(0) is True - assert not cli._can_connect(0) + assert client_selector_mocked._init_connect(0) is True + assert not client_selector_mocked._can_connect(0) # Node is disconnected, can connect - cli._conns[0].state = ConnectionStates.DISCONNECTED - assert cli._can_connect(0) + client_selector_mocked._conns[0].state = ConnectionStates.DISCONNECTED + assert client_selector_mocked._can_connect(0) # Node is disconnected, but blacked out conn.blacked_out.return_value = True - assert not cli._can_connect(0) + assert not client_selector_mocked._can_connect(0) -def test_init_connect(cli, conn): +def test_init_connect(client_selector_mocked, conn): # Node not in metadata, return False - assert not cli._init_connect(2) + assert not client_selector_mocked._init_connect(2) # New node_id creates a conn object - assert 0 not in cli._conns + assert 0 not in client_selector_mocked._conns conn.state = ConnectionStates.DISCONNECTED conn.connect.side_effect = lambda: conn._set_conn_state(ConnectionStates.CONNECTING) - assert cli._init_connect(0) is True - assert cli._conns[0] is conn + assert client_selector_mocked._init_connect(0) is True + assert client_selector_mocked._conns[0] is conn -def test_conn_state_change(mocker, cli, conn): - sel = cli._selector +def test_conn_state_change(client_selector_mocked, conn): + sel = client_selector_mocked._selector node_id = 0 - cli._conns[node_id] = conn + client_selector_mocked._conns[node_id] = conn conn.state = ConnectionStates.CONNECTING sock = conn._sock - cli._conn_state_change(node_id, sock, conn) - assert node_id in cli._connecting + client_selector_mocked._conn_state_change(node_id, sock, conn) + assert node_id in client_selector_mocked._connecting sel.register.assert_called_with(sock, selectors.EVENT_WRITE, conn) conn.state = ConnectionStates.CONNECTED - cli._conn_state_change(node_id, sock, conn) - assert node_id not in cli._connecting + client_selector_mocked._conn_state_change(node_id, sock, conn) + assert node_id not in client_selector_mocked._connecting sel.modify.assert_called_with(sock, selectors.EVENT_READ, conn) # Failure to connect should trigger metadata update - assert cli.cluster._need_update is False + assert client_selector_mocked.cluster._need_update is False conn.state = ConnectionStates.DISCONNECTED - cli._conn_state_change(node_id, sock, conn) - assert node_id not in cli._connecting - assert cli.cluster._need_update is True + client_selector_mocked._conn_state_change(node_id, sock, conn) + assert node_id not in client_selector_mocked._connecting + assert client_selector_mocked.cluster._need_update is True sel.unregister.assert_called_with(sock) conn.state = ConnectionStates.CONNECTING - cli._conn_state_change(node_id, sock, conn) - assert node_id in cli._connecting + client_selector_mocked._conn_state_change(node_id, sock, conn) + assert node_id in client_selector_mocked._connecting conn.state = ConnectionStates.DISCONNECTED - cli._conn_state_change(node_id, sock, conn) - assert node_id not in cli._connecting + client_selector_mocked._conn_state_change(node_id, sock, conn) + assert node_id not in client_selector_mocked._connecting -def test_ready(mocker, cli, conn): - maybe_connect = mocker.patch.object(cli, 'maybe_connect') +def test_ready(mocker, client_selector_mocked, conn): + maybe_connect = mocker.patch.object(client_selector_mocked, 'maybe_connect') node_id = 1 - cli.ready(node_id) + client_selector_mocked.ready(node_id) maybe_connect.assert_called_with(node_id) -def test_is_ready(mocker, cli, conn): - cli._init_connect(0) - cli._init_connect(1) +def test_is_ready(client_selector_mocked, conn): + client_selector_mocked._init_connect(0) + client_selector_mocked._init_connect(1) # metadata refresh blocks ready nodes - assert cli.is_ready(0) - assert cli.is_ready(1) - cli._metadata_refresh_in_progress = True - assert not cli.is_ready(0) - assert not cli.is_ready(1) + assert client_selector_mocked.is_ready(0) + assert client_selector_mocked.is_ready(1) + client_selector_mocked._metadata_refresh_in_progress = True + assert not client_selector_mocked.is_ready(0) + assert not client_selector_mocked.is_ready(1) # requesting metadata update also blocks ready nodes - cli._metadata_refresh_in_progress = False - assert cli.is_ready(0) - assert cli.is_ready(1) - cli.cluster.request_update() - cli.cluster.config['retry_backoff_ms'] = 0 - assert not cli._metadata_refresh_in_progress - assert not cli.is_ready(0) - assert not cli.is_ready(1) - cli.cluster._need_update = False + client_selector_mocked._metadata_refresh_in_progress = False + assert client_selector_mocked.is_ready(0) + assert client_selector_mocked.is_ready(1) + client_selector_mocked.cluster.request_update() + client_selector_mocked.cluster.config['retry_backoff_ms'] = 0 + assert not client_selector_mocked._metadata_refresh_in_progress + assert not client_selector_mocked.is_ready(0) + assert not client_selector_mocked.is_ready(1) + client_selector_mocked.cluster._need_update = False # if connection can't send more, not ready - assert cli.is_ready(0) + assert client_selector_mocked.is_ready(0) conn.can_send_more.return_value = False - assert not cli.is_ready(0) + assert not client_selector_mocked.is_ready(0) conn.can_send_more.return_value = True # disconnected nodes, not ready - assert cli.is_ready(0) + assert client_selector_mocked.is_ready(0) conn.state = ConnectionStates.DISCONNECTED - assert not cli.is_ready(0) + assert not client_selector_mocked.is_ready(0) -def test_close(mocker, cli, conn): - mocker.patch.object(cli, '_selector') - +def test_close(client_selector_mocked, conn): call_count = conn.close.call_count # Unknown node - silent - cli.close(2) + client_selector_mocked.close(2) call_count += 0 assert conn.close.call_count == call_count # Single node close - cli._init_connect(0) + client_selector_mocked._init_connect(0) assert conn.close.call_count == call_count - cli.close(0) + client_selector_mocked.close(0) call_count += 1 assert conn.close.call_count == call_count # All node close - cli._init_connect(1) - cli.close() + client_selector_mocked._init_connect(1) + client_selector_mocked.close() # +2 close: node 1, node bootstrap (node 0 already closed) call_count += 2 assert conn.close.call_count == call_count -def test_is_disconnected(cli, conn): +def test_is_disconnected(client_selector_mocked, conn): # False if not connected yet conn.state = ConnectionStates.DISCONNECTED - assert not cli.is_disconnected(0) + assert not client_selector_mocked.is_disconnected(0) - cli._init_connect(0) - assert cli.is_disconnected(0) + client_selector_mocked._init_connect(0) + assert client_selector_mocked.is_disconnected(0) conn.state = ConnectionStates.CONNECTING - assert not cli.is_disconnected(0) + assert not client_selector_mocked.is_disconnected(0) conn.state = ConnectionStates.CONNECTED - assert not cli.is_disconnected(0) + assert not client_selector_mocked.is_disconnected(0) -def test_send(cli, conn): +def test_send(client_selector_mocked, conn): # Send to unknown node => raises AssertionError try: - cli.send(2, None) + client_selector_mocked.send(2, None) assert False, 'Exception not raised' except AssertionError: pass # Send to disconnected node => NodeNotReady conn.state = ConnectionStates.DISCONNECTED - f = cli.send(0, None) + f = client_selector_mocked.send(0, None) assert f.failed() assert isinstance(f.exception, Errors.NodeNotReadyError) conn.state = ConnectionStates.CONNECTED - cli._init_connect(0) + client_selector_mocked._init_connect(0) # ProduceRequest w/ 0 required_acks -> no response request = ProduceRequest[0](0, 0, []) assert request.expect_response() is False - ret = cli.send(0, request) + ret = client_selector_mocked.send(0, request) conn.send.assert_called_with(request, blocking=False, request_timeout_ms=None) assert isinstance(ret, Future) request = MetadataRequest[0]([]) - cli.send(0, request) + client_selector_mocked.send(0, request) conn.send.assert_called_with(request, blocking=False, request_timeout_ms=None) -def test_poll(mocker): - metadata = mocker.patch.object(KafkaClient, '_maybe_refresh_metadata') - ifr_request_timeout = mocker.patch.object(KafkaClient, '_next_ifr_request_timeout_ms') - _poll = mocker.patch.object(KafkaClient, '_poll') - cli = KafkaClient(api_version=(0, 9)) +def test_poll(mocker, client_poll_mocked): + metadata = mocker.patch.object(client_poll_mocked, '_maybe_refresh_metadata') + ifr_request_timeout = mocker.patch.object(client_poll_mocked, '_next_ifr_request_timeout_ms') + now = time.time() + t = mocker.patch('time.time') + t.return_value = now # metadata timeout wins ifr_request_timeout.return_value = float('inf') metadata.return_value = 1000 - cli.poll() - _poll.assert_called_with(1.0) + client_poll_mocked.poll() + client_poll_mocked._poll.assert_called_with(1.0) # user timeout wins - cli.poll(timeout_ms=250) - _poll.assert_called_with(0.25) + client_poll_mocked.poll(timeout_ms=250) + client_poll_mocked._poll.assert_called_with(0.25) # ifr request timeout wins ifr_request_timeout.return_value = 30000 metadata.return_value = 1000000 - cli.poll() - _poll.assert_called_with(30.0) + client_poll_mocked.poll() + client_poll_mocked._poll.assert_called_with(30.0) def test__poll(): @@ -284,80 +300,66 @@ def test_set_topics(mocker): request_update.assert_not_called() -@pytest.fixture -def client(mocker): - _poll = mocker.patch.object(KafkaClient, '_poll') - - cli = KafkaClient(request_timeout_ms=9999999, - reconnect_backoff_ms=2222, - connections_max_idle_ms=float('inf'), - api_version=(0, 9)) - - ttl = mocker.patch.object(cli.cluster, 'ttl') - ttl.return_value = 0 - return cli - - -def test_maybe_refresh_metadata_ttl(mocker, client): - client.cluster.ttl.return_value = 1234 +def test_maybe_refresh_metadata_ttl(client_poll_mocked): + client_poll_mocked.cluster.ttl.return_value = 1234 - client.poll(timeout_ms=12345678) - client._poll.assert_called_with(1.234) + client_poll_mocked.poll(timeout_ms=12345678) + client_poll_mocked._poll.assert_called_with(1.234) -def test_maybe_refresh_metadata_backoff(mocker, client): - mocker.patch.object(client, 'least_loaded_node', return_value=None) - mocker.patch.object(client, 'least_loaded_node_refresh_ms', return_value=4321) +def test_maybe_refresh_metadata_backoff(mocker, client_poll_mocked): + mocker.patch.object(client_poll_mocked, 'least_loaded_node', return_value=None) + mocker.patch.object(client_poll_mocked, 'least_loaded_node_refresh_ms', return_value=4321) now = time.time() t = mocker.patch('time.time') t.return_value = now - client.poll(timeout_ms=12345678) - client._poll.assert_called_with(4.321) + client_poll_mocked.poll(timeout_ms=12345678) + client_poll_mocked._poll.assert_called_with(4.321) -def test_maybe_refresh_metadata_in_progress(mocker, client): - client._metadata_refresh_in_progress = True +def test_maybe_refresh_metadata_in_progress(client_poll_mocked): + client_poll_mocked._metadata_refresh_in_progress = True - client.poll(timeout_ms=12345678) - client._poll.assert_called_with(9999.999) # request_timeout_ms + client_poll_mocked.poll(timeout_ms=12345678) + client_poll_mocked._poll.assert_called_with(9999.999) # request_timeout_ms -def test_maybe_refresh_metadata_update(mocker, client): - mocker.patch.object(client, 'least_loaded_node', return_value='foobar') - mocker.patch.object(client, '_can_send_request', return_value=True) - send = mocker.patch.object(client, 'send') +def test_maybe_refresh_metadata_update(mocker, client_poll_mocked): + mocker.patch.object(client_poll_mocked, 'least_loaded_node', return_value='foobar') + mocker.patch.object(client_poll_mocked, '_can_send_request', return_value=True) + send = mocker.patch.object(client_poll_mocked, 'send') + client_poll_mocked.cluster.need_all_topic_metadata = True - client.poll(timeout_ms=12345678) - client._poll.assert_called_with(9999.999) # request_timeout_ms - assert client._metadata_refresh_in_progress + client_poll_mocked.poll(timeout_ms=12345678) + client_poll_mocked._poll.assert_called_with(9999.999) # request_timeout_ms + assert client_poll_mocked._metadata_refresh_in_progress request = MetadataRequest[0]([]) send.assert_called_once_with('foobar', request, wakeup=False) -def test_maybe_refresh_metadata_cant_send(mocker, client): - mocker.patch.object(client, 'least_loaded_node', return_value='foobar') - mocker.patch.object(client, '_can_send_request', return_value=False) - mocker.patch.object(client, '_can_connect', return_value=True) - mocker.patch.object(client, '_init_connect', return_value=True) +def test_maybe_refresh_metadata_cant_send(mocker, client_poll_mocked): + mocker.patch.object(client_poll_mocked, 'least_loaded_node', return_value='foobar') + mocker.patch.object(client_poll_mocked, '_can_send_request', return_value=False) + mocker.patch.object(client_poll_mocked, '_can_connect', return_value=True) + mocker.patch.object(client_poll_mocked, '_init_connect', return_value=True) now = time.time() t = mocker.patch('time.time') t.return_value = now # first poll attempts connection - client.poll(timeout_ms=12345678) - client._poll.assert_called_with(12345.678) - client._init_connect.assert_called_once_with('foobar') + client_poll_mocked.poll() + client_poll_mocked._poll.assert_called() + client_poll_mocked._init_connect.assert_called_once_with('foobar') # poll while connecting should not attempt a new connection - client._connecting.add('foobar') - client._can_connect.reset_mock() - client.poll(timeout_ms=12345678) - client._poll.assert_called_with(12345.678) - assert not client._can_connect.called - - assert not client._metadata_refresh_in_progress + client_poll_mocked._connecting.add('foobar') + client_poll_mocked._can_connect.reset_mock() + client_poll_mocked.poll() + client_poll_mocked._poll.assert_called() + assert not client_poll_mocked._can_connect.called + assert not client_poll_mocked._metadata_refresh_in_progress def test_schedule(): diff --git a/test/test_cluster.py b/test/test_cluster.py index b55bdc5ad..730b27cb6 100644 --- a/test/test_cluster.py +++ b/test/test_cluster.py @@ -1,9 +1,9 @@ # pylint: skip-file from __future__ import absolute_import -import pytest +import socket -from kafka.cluster import ClusterMetadata +from kafka.cluster import ClusterMetadata, collect_hosts from kafka.protocol.metadata import MetadataResponse @@ -134,3 +134,75 @@ def test_metadata_v7(): assert cluster.cluster_id == 'cluster-foo' assert cluster._partitions['topic-1'][0].offline_replicas == [12] assert cluster._partitions['topic-1'][0].leader_epoch == 0 + + +def test_unauthorized_topic(): + cluster = ClusterMetadata() + assert len(cluster.brokers()) == 0 + + cluster.update_metadata(MetadataResponse[0]( + [(0, 'foo', 12), (1, 'bar', 34)], + [(29, 'unauthorized-topic', [])])) # single topic w/ unauthorized error + + # broker metadata should get updated + assert len(cluster.brokers()) == 2 + + # topic should be added to unauthorized list + assert 'unauthorized-topic' in cluster.unauthorized_topics + + +def test_collect_hosts__happy_path(): + hosts = "127.0.0.1:1234,127.0.0.1" + results = collect_hosts(hosts) + assert set(results) == set([ + ('127.0.0.1', 1234, socket.AF_INET), + ('127.0.0.1', 9092, socket.AF_INET), + ]) + + +def test_collect_hosts__ipv6(): + hosts = "[localhost]:1234,[2001:1000:2000::1],[2001:1000:2000::1]:1234" + results = collect_hosts(hosts) + assert set(results) == set([ + ('localhost', 1234, socket.AF_INET6), + ('2001:1000:2000::1', 9092, socket.AF_INET6), + ('2001:1000:2000::1', 1234, socket.AF_INET6), + ]) + + +def test_collect_hosts__string_list(): + hosts = [ + 'localhost:1234', + 'localhost', + '[localhost]', + '2001::1', + '[2001::1]', + '[2001::1]:1234', + ] + results = collect_hosts(hosts) + assert set(results) == set([ + ('localhost', 1234, socket.AF_UNSPEC), + ('localhost', 9092, socket.AF_UNSPEC), + ('localhost', 9092, socket.AF_INET6), + ('2001::1', 9092, socket.AF_INET6), + ('2001::1', 9092, socket.AF_INET6), + ('2001::1', 1234, socket.AF_INET6), + ]) + + +def test_collect_hosts__with_spaces(): + hosts = "localhost:1234, localhost" + results = collect_hosts(hosts) + assert set(results) == set([ + ('localhost', 1234, socket.AF_UNSPEC), + ('localhost', 9092, socket.AF_UNSPEC), + ]) + + +def test_collect_hosts__protocol(): + hosts = "SASL_SSL://foo.bar:1234,SASL_SSL://fizz.buzz:5678" + results = collect_hosts(hosts) + assert set(results) == set([ + ('foo.bar', 1234, socket.AF_UNSPEC), + ('fizz.buzz', 5678, socket.AF_UNSPEC), + ]) diff --git a/test/test_conn.py b/test/test_conn.py index 47f5c428e..8d56668c5 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -4,10 +4,14 @@ from errno import EALREADY, EINPROGRESS, EISCONN, ECONNRESET import socket -import mock +try: + from unittest import mock +except ImportError: + import mock import pytest -from kafka.conn import BrokerConnection, ConnectionStates, collect_hosts +from kafka.conn import BrokerConnection, ConnectionStates +from kafka.future import Future from kafka.protocol.api import RequestHeader from kafka.protocol.group import HeartbeatResponse from kafka.protocol.metadata import MetadataRequest @@ -15,6 +19,13 @@ import kafka.errors as Errors +from kafka.vendor import six + +if six.PY2: + ConnectionError = socket.error + TimeoutError = socket.error + BlockingIOError = Exception + @pytest.fixture def dns_lookup(mocker): @@ -27,13 +38,16 @@ def dns_lookup(mocker): def _socket(mocker): socket = mocker.MagicMock() socket.connect_ex.return_value = 0 + socket.send.side_effect = lambda d: len(d) + socket.recv.side_effect = BlockingIOError("mocked recv") mocker.patch('socket.socket', return_value=socket) return socket @pytest.fixture -def conn(_socket, dns_lookup): +def conn(_socket, dns_lookup, mocker): conn = BrokerConnection('localhost', 9092, socket.AF_INET) + mocker.patch.object(conn, '_try_api_versions_check', return_value=True) return conn @@ -56,6 +70,38 @@ def test_connect(_socket, conn, states): assert conn.state is state +def test_api_versions_check(_socket, mocker): + conn = BrokerConnection('localhost', 9092, socket.AF_INET) + mocker.patch.object(conn, '_send', return_value=Future()) + mocker.patch.object(conn, 'recv', return_value=[]) + assert conn._api_versions_future is None + conn.connect() + assert conn._api_versions_future is not None + assert conn.connecting() is True + assert conn.state is ConnectionStates.API_VERSIONS_RECV + + assert conn._try_api_versions_check() is False + assert conn.connecting() is True + assert conn.state is ConnectionStates.API_VERSIONS_RECV + + conn._api_versions_future = None + conn._check_version_idx = 0 + assert conn._try_api_versions_check() is False + assert conn.connecting() is True + + conn._check_version_idx = len(conn.VERSION_CHECKS) + conn._api_versions_future = None + assert conn._try_api_versions_check() is False + assert conn.connecting() is False + assert conn.disconnected() is True + + +def test_api_versions_check_unrecognized(_socket): + conn = BrokerConnection('localhost', 9092, socket.AF_INET, api_version=(0, 0)) + with pytest.raises(Errors.UnrecognizedBrokerVersion): + conn.connect() + + def test_connect_timeout(_socket, conn): assert conn.state is ConnectionStates.DISCONNECTED @@ -217,12 +263,13 @@ def test_recv_disconnected(_socket, conn): conn.send(req) # Empty data on recv means the socket is disconnected + _socket.recv.side_effect = None _socket.recv.return_value = b'' # Attempt to receive should mark connection as disconnected - assert conn.connected() + assert conn.connected(), 'Not connected: %s' % conn.state conn.recv() - assert conn.disconnected() + assert conn.disconnected(), 'Not disconnected: %s' % conn.state def test_recv(_socket, conn): @@ -233,54 +280,6 @@ def test_close(conn): pass # TODO -def test_collect_hosts__happy_path(): - hosts = "127.0.0.1:1234,127.0.0.1" - results = collect_hosts(hosts) - assert set(results) == set([ - ('127.0.0.1', 1234, socket.AF_INET), - ('127.0.0.1', 9092, socket.AF_INET), - ]) - - -def test_collect_hosts__ipv6(): - hosts = "[localhost]:1234,[2001:1000:2000::1],[2001:1000:2000::1]:1234" - results = collect_hosts(hosts) - assert set(results) == set([ - ('localhost', 1234, socket.AF_INET6), - ('2001:1000:2000::1', 9092, socket.AF_INET6), - ('2001:1000:2000::1', 1234, socket.AF_INET6), - ]) - - -def test_collect_hosts__string_list(): - hosts = [ - 'localhost:1234', - 'localhost', - '[localhost]', - '2001::1', - '[2001::1]', - '[2001::1]:1234', - ] - results = collect_hosts(hosts) - assert set(results) == set([ - ('localhost', 1234, socket.AF_UNSPEC), - ('localhost', 9092, socket.AF_UNSPEC), - ('localhost', 9092, socket.AF_INET6), - ('2001::1', 9092, socket.AF_INET6), - ('2001::1', 9092, socket.AF_INET6), - ('2001::1', 1234, socket.AF_INET6), - ]) - - -def test_collect_hosts__with_spaces(): - hosts = "localhost:1234, localhost" - results = collect_hosts(hosts) - assert set(results) == set([ - ('localhost', 1234, socket.AF_UNSPEC), - ('localhost', 9092, socket.AF_UNSPEC), - ]) - - def test_lookup_on_connect(): hostname = 'example.org' port = 9092 @@ -387,3 +386,13 @@ def test_maybe_throttle(conn): time.return_value = 3000 assert not conn.throttled() + + +def test_host_in_sasl_config(): + hostname = 'example.org' + port = 9092 + for security_protocol in ('SASL_PLAINTEXT', 'SASL_SSL'): + with mock.patch("kafka.conn.get_sasl_mechanism") as get_sasl_mechanism: + BrokerConnection(hostname, port, socket.AF_UNSPEC, security_protocol=security_protocol) + call_config = get_sasl_mechanism.mock_calls[1].kwargs + assert call_config['host'] == hostname diff --git a/test/test_consumer.py b/test/test_consumer.py index 8186125df..0d9477729 100644 --- a/test/test_consumer.py +++ b/test/test_consumer.py @@ -1,26 +1,52 @@ +from __future__ import absolute_import + import pytest -from kafka import KafkaConsumer -from kafka.errors import KafkaConfigurationError +from kafka import KafkaConsumer, TopicPartition +from kafka.errors import KafkaConfigurationError, IllegalStateError + + +def test_session_timeout_larger_than_request_timeout_raises(): + with pytest.raises(KafkaConfigurationError): + KafkaConsumer(bootstrap_servers='localhost:9092', api_version=(0, 9), group_id='foo', session_timeout_ms=50000, request_timeout_ms=40000) + + +def test_fetch_max_wait_larger_than_request_timeout_raises(): + with pytest.raises(KafkaConfigurationError): + KafkaConsumer(bootstrap_servers='localhost:9092', fetch_max_wait_ms=50000, request_timeout_ms=40000) + + +def test_request_timeout_larger_than_connections_max_idle_ms_raises(): + with pytest.raises(KafkaConfigurationError): + KafkaConsumer(bootstrap_servers='localhost:9092', api_version=(0, 9), request_timeout_ms=50000, connections_max_idle_ms=40000) + +def test_subscription_copy(): + consumer = KafkaConsumer('foo', api_version=(0, 10, 0)) + sub = consumer.subscription() + assert sub is not consumer.subscription() + assert sub == set(['foo']) + sub.add('fizz') + assert consumer.subscription() == set(['foo']) -class TestKafkaConsumer: - def test_session_timeout_larger_than_request_timeout_raises(self): - with pytest.raises(KafkaConfigurationError): - KafkaConsumer(bootstrap_servers='localhost:9092', api_version=(0, 9), group_id='foo', session_timeout_ms=50000, request_timeout_ms=40000) - def test_fetch_max_wait_larger_than_request_timeout_raises(self): - with pytest.raises(KafkaConfigurationError): - KafkaConsumer(bootstrap_servers='localhost:9092', fetch_max_wait_ms=50000, request_timeout_ms=40000) +def test_assign(): + # Consumer w/ subscription to topic 'foo' + consumer = KafkaConsumer('foo', api_version=(0, 10, 0)) + assert consumer.assignment() == set() + # Cannot assign manually + with pytest.raises(IllegalStateError): + consumer.assign([TopicPartition('foo', 0)]) - def test_request_timeout_larger_than_connections_max_idle_ms_raises(self): - with pytest.raises(KafkaConfigurationError): - KafkaConsumer(bootstrap_servers='localhost:9092', api_version=(0, 9), request_timeout_ms=50000, connections_max_idle_ms=40000) + assert 'foo' in consumer._client._topics - def test_subscription_copy(self): - consumer = KafkaConsumer('foo', api_version=(0, 10, 0)) - sub = consumer.subscription() - assert sub is not consumer.subscription() - assert sub == set(['foo']) - sub.add('fizz') - assert consumer.subscription() == set(['foo']) + consumer = KafkaConsumer(api_version=(0, 10, 0)) + assert consumer.assignment() == set() + consumer.assign([TopicPartition('foo', 0)]) + assert consumer.assignment() == set([TopicPartition('foo', 0)]) + assert 'foo' in consumer._client._topics + # Cannot subscribe + with pytest.raises(IllegalStateError): + consumer.subscribe(topics=['foo']) + consumer.assign([]) + assert consumer.assignment() == set() diff --git a/test/test_coordinator.py b/test/test_coordinator.py index 09422790e..4ffe1d28c 100644 --- a/test/test_coordinator.py +++ b/test/test_coordinator.py @@ -16,7 +16,6 @@ ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment) import kafka.errors as Errors from kafka.future import Future -from kafka.metrics import Metrics from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS from kafka.protocol.commit import ( OffsetCommitRequest, OffsetCommitResponse, @@ -27,12 +26,13 @@ @pytest.fixture -def client(conn): - return KafkaClient(api_version=(0, 9)) - -@pytest.fixture -def coordinator(client): - return ConsumerCoordinator(client, SubscriptionState(), Metrics()) +def coordinator(client, metrics, mocker): + coord = ConsumerCoordinator(client, SubscriptionState(), metrics=metrics) + try: + yield coord + finally: + mocker.patch.object(coord, 'coordinator_unknown', return_value=True) # avoid attempting to leave group during close() + coord.close(timeout_ms=0) def test_init(client, coordinator): @@ -42,10 +42,10 @@ def test_init(client, coordinator): @pytest.mark.parametrize("api_version", [(0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9)]) -def test_autocommit_enable_api_version(conn, api_version): +def test_autocommit_enable_api_version(conn, metrics, api_version): coordinator = ConsumerCoordinator(KafkaClient(api_version=api_version), SubscriptionState(), - Metrics(), + metrics=metrics, enable_auto_commit=True, session_timeout_ms=30000, # session_timeout_ms and max_poll_interval_ms max_poll_interval_ms=30000, # should be the same to avoid KafkaConfigurationError @@ -55,6 +55,7 @@ def test_autocommit_enable_api_version(conn, api_version): assert coordinator.config['enable_auto_commit'] is False else: assert coordinator.config['enable_auto_commit'] is True + coordinator.close() def test_protocol_type(coordinator): @@ -88,10 +89,10 @@ def test_group_protocols(coordinator): @pytest.mark.parametrize('api_version', [(0, 8, 0), (0, 8, 1), (0, 8, 2), (0, 9)]) -def test_pattern_subscription(conn, api_version): +def test_pattern_subscription(conn, metrics, api_version): coordinator = ConsumerCoordinator(KafkaClient(api_version=api_version), SubscriptionState(), - Metrics(), + metrics=metrics, api_version=api_version, session_timeout_ms=10000, max_poll_interval_ms=10000) @@ -117,6 +118,7 @@ def test_pattern_subscription(conn, api_version): else: assert set(coordinator._subscription.assignment.keys()) == {TopicPartition('foo1', 0), TopicPartition('foo2', 0)} + coordinator.close() def test_lookup_assignor(coordinator): @@ -189,6 +191,7 @@ def test_subscription_listener_failure(mocker, coordinator): def test_perform_assignment(mocker, coordinator): + coordinator._subscription.subscribe(topics=['foo1']) member_metadata = { 'member-foo': ConsumerProtocolMemberMetadata(0, ['foo1'], b''), 'member-bar': ConsumerProtocolMemberMetadata(0, ['foo1'], b'') @@ -228,17 +231,23 @@ def test_need_rejoin(coordinator): def test_refresh_committed_offsets_if_needed(mocker, coordinator): + tp0 = TopicPartition('foobar', 0) + tp1 = TopicPartition('foobar', 1) mocker.patch.object(ConsumerCoordinator, 'fetch_committed_offsets', return_value = { - TopicPartition('foobar', 0): OffsetAndMetadata(123, '', -1), - TopicPartition('foobar', 1): OffsetAndMetadata(234, '', -1)}) - coordinator._subscription.assign_from_user([TopicPartition('foobar', 0)]) - assert coordinator._subscription.needs_fetch_committed_offsets is True + tp0: OffsetAndMetadata(123, '', -1), + tp1: OffsetAndMetadata(234, '', -1)}) + coordinator._subscription.assign_from_user([tp0, tp1]) + coordinator._subscription.request_offset_reset(tp0) + coordinator._subscription.request_offset_reset(tp1) + assert coordinator._subscription.is_offset_reset_needed(tp0) + assert coordinator._subscription.is_offset_reset_needed(tp1) coordinator.refresh_committed_offsets_if_needed() assignment = coordinator._subscription.assignment - assert assignment[TopicPartition('foobar', 0)].committed == OffsetAndMetadata(123, '', -1) - assert TopicPartition('foobar', 1) not in assignment - assert coordinator._subscription.needs_fetch_committed_offsets is False + assert assignment[tp0].position == OffsetAndMetadata(123, '', -1) + assert assignment[tp1].position == OffsetAndMetadata(234, '', -1) + assert not coordinator._subscription.is_offset_reset_needed(tp0) + assert not coordinator._subscription.is_offset_reset_needed(tp1) def test_fetch_committed_offsets(mocker, coordinator): @@ -295,7 +304,7 @@ def test_close(mocker, coordinator): coordinator._handle_leave_group_response.assert_called_with('foobar') assert coordinator.generation() is None - assert coordinator._generation is Generation.NO_GENERATION + assert coordinator._generation == Generation.NO_GENERATION assert coordinator.state is MemberState.UNJOINED assert coordinator.rejoin_needed is True @@ -376,7 +385,6 @@ def test_maybe_auto_commit_offsets_sync(mocker, api_version, group_id, enable, mock_exc = mocker.patch('kafka.coordinator.consumer.log.exception') client = KafkaClient(api_version=api_version) coordinator = ConsumerCoordinator(client, SubscriptionState(), - Metrics(), api_version=api_version, session_timeout_ms=30000, max_poll_interval_ms=30000, @@ -397,6 +405,7 @@ def test_maybe_auto_commit_offsets_sync(mocker, api_version, group_id, enable, assert commit_sync.call_count == (1 if commit_offsets else 0) assert mock_warn.call_count == (1 if warn else 0) assert mock_exc.call_count == (1 if exc else 0) + coordinator.close() @pytest.fixture @@ -433,7 +442,7 @@ def test_send_offset_commit_request_fail(mocker, patched_coord, offsets): # No coordinator ret = patched_coord._send_offset_commit_request(offsets) assert ret.failed() - assert isinstance(ret.exception, Errors.GroupCoordinatorNotAvailableError) + assert isinstance(ret.exception, Errors.CoordinatorNotAvailableError) @pytest.mark.parametrize('api_version,req_type', [ @@ -486,11 +495,11 @@ def test_send_offset_commit_request_success(mocker, patched_coord, offsets): (OffsetCommitResponse[0]([('foobar', [(0, 28), (1, 28)])]), Errors.InvalidCommitOffsetSizeError, False), (OffsetCommitResponse[0]([('foobar', [(0, 14), (1, 14)])]), - Errors.GroupLoadInProgressError, False), + Errors.CoordinatorLoadInProgressError, False), (OffsetCommitResponse[0]([('foobar', [(0, 15), (1, 15)])]), - Errors.GroupCoordinatorNotAvailableError, True), + Errors.CoordinatorNotAvailableError, True), (OffsetCommitResponse[0]([('foobar', [(0, 16), (1, 16)])]), - Errors.NotCoordinatorForGroupError, True), + Errors.NotCoordinatorError, True), (OffsetCommitResponse[0]([('foobar', [(0, 7), (1, 7)])]), Errors.RequestTimedOutError, True), (OffsetCommitResponse[0]([('foobar', [(0, 25), (1, 25)])]), @@ -546,7 +555,7 @@ def test_send_offset_fetch_request_fail(mocker, patched_coord, partitions): # No coordinator ret = patched_coord._send_offset_fetch_request(partitions) assert ret.failed() - assert isinstance(ret.exception, Errors.GroupCoordinatorNotAvailableError) + assert isinstance(ret.exception, Errors.CoordinatorNotAvailableError) @pytest.mark.parametrize('api_version,req_type', [ @@ -595,9 +604,9 @@ def test_send_offset_fetch_request_success(patched_coord, partitions): @pytest.mark.parametrize('response,error,dead', [ (OffsetFetchResponse[0]([('foobar', [(0, 123, '', 14), (1, 234, '', 14)])]), - Errors.GroupLoadInProgressError, False), + Errors.CoordinatorLoadInProgressError, False), (OffsetFetchResponse[0]([('foobar', [(0, 123, '', 16), (1, 234, '', 16)])]), - Errors.NotCoordinatorForGroupError, True), + Errors.NotCoordinatorError, True), (OffsetFetchResponse[0]([('foobar', [(0, 123, '', 25), (1, 234, '', 25)])]), Errors.UnknownMemberIdError, False), (OffsetFetchResponse[0]([('foobar', [(0, 123, '', 22), (1, 234, '', 22)])]), @@ -649,6 +658,7 @@ def test_heartbeat(mocker, patched_coord): heartbeat.enable() patched_coord.state = MemberState.STABLE mocker.spy(patched_coord, '_send_heartbeat_request') + mocker.patch.object(patched_coord, 'connected', return_value=True) mocker.patch.object(patched_coord.heartbeat, 'should_heartbeat', return_value=True) heartbeat._run_once() assert patched_coord._send_heartbeat_request.call_count == 1 diff --git a/test/test_fetcher.py b/test/test_fetcher.py index 7e948e3cb..4794563ed 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -8,30 +8,24 @@ import itertools import time -from kafka.client_async import KafkaClient from kafka.consumer.fetcher import ( - CompletedFetch, ConsumerRecord, Fetcher, NoOffsetForPartitionError + CompletedFetch, ConsumerRecord, Fetcher ) from kafka.consumer.subscription_state import SubscriptionState import kafka.errors as Errors from kafka.future import Future -from kafka.metrics import Metrics from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS from kafka.protocol.fetch import FetchRequest, FetchResponse -from kafka.protocol.list_offsets import ListOffsetsResponse +from kafka.protocol.list_offsets import ListOffsetsResponse, OffsetResetStrategy from kafka.errors import ( - StaleMetadata, LeaderNotAvailableError, NotLeaderForPartitionError, + StaleMetadata, NotLeaderForPartitionError, UnknownTopicOrPartitionError, OffsetOutOfRangeError ) +from kafka.future import Future from kafka.record.memory_records import MemoryRecordsBuilder, MemoryRecords from kafka.structs import OffsetAndMetadata, OffsetAndTimestamp, TopicPartition -@pytest.fixture -def client(): - return KafkaClient(bootstrap_servers=(), api_version=(0, 9)) - - @pytest.fixture def subscription_state(): return SubscriptionState() @@ -43,18 +37,18 @@ def topic(): @pytest.fixture -def fetcher(client, subscription_state, topic): +def fetcher(client, metrics, subscription_state, topic): subscription_state.subscribe(topics=[topic]) assignment = [TopicPartition(topic, i) for i in range(3)] subscription_state.assign_from_subscribed(assignment) for tp in assignment: subscription_state.seek(tp, 0) - return Fetcher(client, subscription_state, Metrics()) + return Fetcher(client, subscription_state, metrics=metrics) -def _build_record_batch(msgs, compression=0): +def _build_record_batch(msgs, compression=0, offset=0, magic=2): builder = MemoryRecordsBuilder( - magic=1, compression_type=0, batch_size=9999999) + magic=magic, compression_type=0, batch_size=9999999, offset=offset) for msg in msgs: key, value, timestamp = msg builder.append(key=key, value=value, timestamp=timestamp, headers=[]) @@ -109,54 +103,59 @@ def test_create_fetch_requests(fetcher, mocker, api_version, fetch_version): fetcher._client._api_versions = BROKER_API_VERSIONS[api_version] mocker.patch.object(fetcher._client.cluster, "leader_for_partition", return_value=0) mocker.patch.object(fetcher._client.cluster, "leader_epoch_for_partition", return_value=0) + mocker.patch.object(fetcher._client, "ready", return_value=True) by_node = fetcher._create_fetch_requests() requests_and_offsets = by_node.values() assert set([r.API_VERSION for (r, _offsets) in requests_and_offsets]) == set([fetch_version]) -def test_update_fetch_positions(fetcher, topic, mocker): - mocker.patch.object(fetcher, '_reset_offset') +def test_reset_offsets_if_needed(fetcher, topic, mocker): + mocker.patch.object(fetcher, '_reset_offsets_async') partition = TopicPartition(topic, 0) - # unassigned partition - fetcher.update_fetch_positions([TopicPartition('fizzbuzz', 0)]) - assert fetcher._reset_offset.call_count == 0 - # fetchable partition (has offset, not paused) - fetcher.update_fetch_positions([partition]) - assert fetcher._reset_offset.call_count == 0 - - # partition needs reset, no committed offset - fetcher._subscriptions.need_offset_reset(partition) - fetcher._subscriptions.assignment[partition].awaiting_reset = False - fetcher.update_fetch_positions([partition]) - fetcher._reset_offset.assert_called_with(partition) - assert fetcher._subscriptions.assignment[partition].awaiting_reset is True - fetcher.update_fetch_positions([partition]) - fetcher._reset_offset.assert_called_with(partition) + fetcher.reset_offsets_if_needed() + assert fetcher._reset_offsets_async.call_count == 0 - # partition needs reset, has committed offset - fetcher._reset_offset.reset_mock() - fetcher._subscriptions.need_offset_reset(partition) - fetcher._subscriptions.assignment[partition].awaiting_reset = False - fetcher._subscriptions.assignment[partition].committed = OffsetAndMetadata(123, '', -1) - mocker.patch.object(fetcher._subscriptions, 'seek') - fetcher.update_fetch_positions([partition]) - assert fetcher._reset_offset.call_count == 0 - fetcher._subscriptions.seek.assert_called_with(partition, 123) - - -def test__reset_offset(fetcher, mocker): - tp = TopicPartition("topic", 0) - fetcher._subscriptions.subscribe(topics="topic") - fetcher._subscriptions.assign_from_subscribed([tp]) - fetcher._subscriptions.need_offset_reset(tp) - mocked = mocker.patch.object(fetcher, '_retrieve_offsets') - - mocked.return_value = {tp: OffsetAndTimestamp(1001, None, -1)} - fetcher._reset_offset(tp) - assert not fetcher._subscriptions.assignment[tp].awaiting_reset - assert fetcher._subscriptions.assignment[tp].position.offset == 1001 + # partition needs reset, no valid position + fetcher._subscriptions.request_offset_reset(partition) + fetcher.reset_offsets_if_needed() + fetcher._reset_offsets_async.assert_called_with({partition: OffsetResetStrategy.EARLIEST}) + assert fetcher._subscriptions.assignment[partition].awaiting_reset is True + fetcher.reset_offsets_if_needed() + fetcher._reset_offsets_async.assert_called_with({partition: OffsetResetStrategy.EARLIEST}) + + # partition needs reset, has valid position + fetcher._reset_offsets_async.reset_mock() + fetcher._subscriptions.request_offset_reset(partition) + fetcher._subscriptions.seek(partition, 123) + fetcher.reset_offsets_if_needed() + assert fetcher._reset_offsets_async.call_count == 0 + + +def test__reset_offsets_async(fetcher, mocker): + tp0 = TopicPartition("topic", 0) + tp1 = TopicPartition("topic", 1) + fetcher._subscriptions.subscribe(topics=["topic"]) + fetcher._subscriptions.assign_from_subscribed([tp0, tp1]) + fetcher._subscriptions.request_offset_reset(tp0) + fetcher._subscriptions.request_offset_reset(tp1) + leaders = {tp0: 0, tp1: 1} + mocker.patch.object(fetcher._client.cluster, "leader_for_partition", side_effect=lambda tp: leaders[tp]) + mocker.patch.object(fetcher._client, 'ready', return_value=True) + future1 = Future() + future2 = Future() + mocker.patch.object(fetcher, '_send_list_offsets_request', side_effect=[future1, future2]) + fetcher._reset_offsets_async({ + tp0: OffsetResetStrategy.EARLIEST, + tp1: OffsetResetStrategy.EARLIEST, + }) + future1.success(({tp0: OffsetAndTimestamp(1001, None, -1)}, set())), + future2.success(({tp1: OffsetAndTimestamp(1002, None, -1)}, set())), + assert not fetcher._subscriptions.assignment[tp0].awaiting_reset + assert not fetcher._subscriptions.assignment[tp1].awaiting_reset + assert fetcher._subscriptions.assignment[tp0].position.offset == 1001 + assert fetcher._subscriptions.assignment[tp1].position.offset == 1002 def test__send_list_offsets_requests(fetcher, mocker): @@ -187,7 +186,7 @@ def send_side_effect(*args, **kw): # Leader == -1 fut = fetcher._send_list_offsets_requests({tp: 0}) assert fut.failed() - assert isinstance(fut.exception, LeaderNotAvailableError) + assert isinstance(fut.exception, StaleMetadata) assert not mocked_send.called # Leader == 0, send failed @@ -204,9 +203,9 @@ def send_side_effect(*args, **kw): assert not fut.is_done assert mocked_send.called # Check that we bound the futures correctly to chain success - send_futures.pop().success({tp: (10, 10000)}) + send_futures.pop().success(({tp: (10, 10000)}, set())) assert fut.succeeded() - assert fut.value == {tp: (10, 10000)} + assert fut.value == ({tp: (10, 10000)}, set()) def test__send_list_offsets_requests_multiple_nodes(fetcher, mocker): @@ -240,7 +239,7 @@ def send_side_effect(node_id, timestamps): req_by_node[node] = timestamps if node == 0: # Say tp3 does not have any messages so it's missing - f.success({tp1: (11, 1001)}) + f.success(({tp1: (11, 1001)}, set())) else: second_future = f assert req_by_node == { @@ -250,15 +249,15 @@ def send_side_effect(node_id, timestamps): # We only resolved 1 future so far, so result future is not yet ready assert not fut.is_done - second_future.success({tp2: (12, 1002), tp4: (14, 1004)}) + second_future.success(({tp2: (12, 1002), tp4: (14, 1004)}, set())) assert fut.succeeded() - assert fut.value == {tp1: (11, 1001), tp2: (12, 1002), tp4: (14, 1004)} + assert fut.value == ({tp1: (11, 1001), tp2: (12, 1002), tp4: (14, 1004)}, set()) # -- First succeeded second not del send_futures[:] fut = fetcher._send_list_offsets_requests(tss) assert len(send_futures) == 2 - send_futures[0][2].success({tp1: (11, 1001)}) + send_futures[0][2].success(({tp1: (11, 1001)}, set())) send_futures[1][2].failure(UnknownTopicOrPartitionError(tp1)) assert fut.failed() assert isinstance(fut.exception, UnknownTopicOrPartitionError) @@ -268,7 +267,7 @@ def send_side_effect(node_id, timestamps): fut = fetcher._send_list_offsets_requests(tss) assert len(send_futures) == 2 send_futures[0][2].failure(UnknownTopicOrPartitionError(tp1)) - send_futures[1][2].success({tp1: (11, 1001)}) + send_futures[1][2].success(({tp1: (11, 1001)}, set())) assert fut.failed() assert isinstance(fut.exception, UnknownTopicOrPartitionError) @@ -282,7 +281,7 @@ def test__handle_list_offsets_response_v1(fetcher, mocker): ]) fetcher._handle_list_offsets_response(fut, res) assert fut.succeeded() - assert fut.value == {TopicPartition("topic", 1): OffsetAndTimestamp(9999, 1000, -1)} + assert fut.value == ({TopicPartition("topic", 1): OffsetAndTimestamp(9999, 1000, -1)}, set()) # Broker returns NotLeaderForPartitionError fut = Future() @@ -290,8 +289,8 @@ def test__handle_list_offsets_response_v1(fetcher, mocker): ("topic", [(0, 6, -1, -1)]), ]) fetcher._handle_list_offsets_response(fut, res) - assert fut.failed() - assert isinstance(fut.exception, NotLeaderForPartitionError) + assert fut.succeeded() + assert fut.value == ({}, set([TopicPartition("topic", 0)])) # Broker returns UnknownTopicOrPartitionError fut = Future() @@ -299,21 +298,21 @@ def test__handle_list_offsets_response_v1(fetcher, mocker): ("topic", [(0, 3, -1, -1)]), ]) fetcher._handle_list_offsets_response(fut, res) - assert fut.failed() - assert isinstance(fut.exception, UnknownTopicOrPartitionError) + assert fut.succeeded() + assert fut.value == ({}, set([TopicPartition("topic", 0)])) # Broker returns many errors and 1 result - # Will fail on 1st error and return fut = Future() res = ListOffsetsResponse[1]([ - ("topic", [(0, 43, -1, -1)]), - ("topic", [(1, 6, -1, -1)]), - ("topic", [(2, 3, -1, -1)]), + ("topic", [(0, 43, -1, -1)]), # not retriable + ("topic", [(1, 6, -1, -1)]), # retriable + ("topic", [(2, 3, -1, -1)]), # retriable ("topic", [(3, 0, 1000, 9999)]) ]) fetcher._handle_list_offsets_response(fut, res) - assert fut.failed() - assert isinstance(fut.exception, NotLeaderForPartitionError) + assert fut.succeeded() + assert fut.value == ({TopicPartition("topic", 3): OffsetAndTimestamp(9999, 1000, -1)}, + set([TopicPartition("topic", 1), TopicPartition("topic", 2)])) def test__handle_list_offsets_response_v2_v3(fetcher, mocker): @@ -325,7 +324,7 @@ def test__handle_list_offsets_response_v2_v3(fetcher, mocker): ]) fetcher._handle_list_offsets_response(fut, res) assert fut.succeeded() - assert fut.value == {TopicPartition("topic", 0): OffsetAndTimestamp(9999, 1000, -1)} + assert fut.value == ({TopicPartition("topic", 0): OffsetAndTimestamp(9999, 1000, -1)}, set()) # v3 response is the same format fut = Future() @@ -335,7 +334,7 @@ def test__handle_list_offsets_response_v2_v3(fetcher, mocker): ]) fetcher._handle_list_offsets_response(fut, res) assert fut.succeeded() - assert fut.value == {TopicPartition("topic", 0): OffsetAndTimestamp(9999, 1000, -1)} + assert fut.value == ({TopicPartition("topic", 0): OffsetAndTimestamp(9999, 1000, -1)}, set()) def test__handle_list_offsets_response_v4_v5(fetcher, mocker): @@ -347,7 +346,7 @@ def test__handle_list_offsets_response_v4_v5(fetcher, mocker): ]) fetcher._handle_list_offsets_response(fut, res) assert fut.succeeded() - assert fut.value == {TopicPartition("topic", 0): OffsetAndTimestamp(9999, 1000, 1234)} + assert fut.value == ({TopicPartition("topic", 0): OffsetAndTimestamp(9999, 1000, 1234)}, set()) # v5 response is the same format fut = Future() @@ -357,7 +356,7 @@ def test__handle_list_offsets_response_v4_v5(fetcher, mocker): ]) fetcher._handle_list_offsets_response(fut, res) assert fut.succeeded() - assert fut.value == {TopicPartition("topic", 0): OffsetAndTimestamp(9999, 1000, 1234)} + assert fut.value == ({TopicPartition("topic", 0): OffsetAndTimestamp(9999, 1000, 1234)}, set()) def test_fetched_records(fetcher, topic, mocker): @@ -423,6 +422,7 @@ def test_fetched_records(fetcher, topic, mocker): ), ]) def test__handle_fetch_response(fetcher, fetch_offsets, fetch_response, num_partitions): + fetcher._nodes_with_pending_fetch_requests.add(0) fetcher._handle_fetch_response(0, fetch_offsets, time.time(), fetch_response) assert len(fetcher._completed_fetches) == num_partitions @@ -438,13 +438,13 @@ def test__handle_fetch_response(fetcher, fetch_offsets, fetch_response, num_part ) ]) def test__handle_fetch_error(fetcher, caplog, exception, log_level): + fetcher._nodes_with_pending_fetch_requests.add(3) fetcher._handle_fetch_error(3, exception) assert len(caplog.records) == 1 assert caplog.records[0].levelname == logging.getLevelName(log_level) -def test__unpack_records(fetcher): - fetcher.config['check_crcs'] = False +def test__unpack_records(mocker): tp = TopicPartition('foo', 0) messages = [ (None, b"a", None), @@ -452,7 +452,8 @@ def test__unpack_records(fetcher): (None, b"c", None), ] memory_records = MemoryRecords(_build_record_batch(messages)) - records = list(fetcher._unpack_records(tp, memory_records)) + part_records = Fetcher.PartitionRecords(0, tp, memory_records) + records = list(part_records.record_iterator) assert len(records) == 3 assert all(map(lambda x: isinstance(x, ConsumerRecord), records)) assert records[0].value == b'a' @@ -463,22 +464,21 @@ def test__unpack_records(fetcher): assert records[2].offset == 2 -def test__message_generator(fetcher, topic, mocker): - fetcher.config['check_crcs'] = False - tp = TopicPartition(topic, 0) - msgs = [] - for i in range(10): - msgs.append((None, b"foo", None)) - completed_fetch = CompletedFetch( - tp, 0, 0, [0, 100, _build_record_batch(msgs)], - mocker.MagicMock() - ) - fetcher._completed_fetches.append(completed_fetch) - for i in range(10): - msg = next(fetcher) - assert isinstance(msg, ConsumerRecord) - assert msg.offset == i - assert msg.value == b'foo' +def test__unpack_records_corrupted(mocker): + tp = TopicPartition('foo', 0) + messages = [ + (None, b"a", None), + (None, b"b", None), + (None, b"c", None), + ] + memory_records = MemoryRecords(_build_record_batch(messages)) + from kafka.record.default_records import DefaultRecord + mocker.patch.object(DefaultRecord, 'validate_crc', side_effect=[True, True, False]) + part_records = Fetcher.PartitionRecords(0, tp, memory_records) + records = part_records.take(10) + assert len(records) == 2 + with pytest.raises(Errors.CorruptRecordError): + part_records.take(10) def test__parse_fetched_data(fetcher, topic, mocker): @@ -493,7 +493,8 @@ def test__parse_fetched_data(fetcher, topic, mocker): ) partition_record = fetcher._parse_fetched_data(completed_fetch) assert isinstance(partition_record, fetcher.PartitionRecords) - assert len(partition_record) == 10 + assert partition_record + assert len(partition_record.take()) == 10 def test__parse_fetched_data__paused(fetcher, topic, mocker): @@ -563,7 +564,7 @@ def test__parse_fetched_data__out_of_range(fetcher, topic, mocker): assert fetcher._subscriptions.assignment[tp].awaiting_reset is True -def test_partition_records_offset(): +def test_partition_records_offset(mocker): """Test that compressed messagesets are handle correctly when fetch offset is in the middle of the message list """ @@ -571,39 +572,45 @@ def test_partition_records_offset(): batch_end = 130 fetch_offset = 123 tp = TopicPartition('foo', 0) - messages = [ConsumerRecord(tp.topic, tp.partition, -1, i, - None, None, 'key', 'value', [], 'checksum', 0, 0, -1) - for i in range(batch_start, batch_end)] - records = Fetcher.PartitionRecords(fetch_offset, None, messages) - assert len(records) > 0 + messages = [(None, b'msg', None) for i in range(batch_start, batch_end)] + memory_records = MemoryRecords(_build_record_batch(messages, offset=batch_start)) + records = Fetcher.PartitionRecords(fetch_offset, tp, memory_records) + assert records + assert records.next_fetch_offset == fetch_offset msgs = records.take(1) assert msgs[0].offset == fetch_offset - assert records.fetch_offset == fetch_offset + 1 + assert records.next_fetch_offset == fetch_offset + 1 msgs = records.take(2) assert len(msgs) == 2 - assert len(records) > 0 - records.discard() - assert len(records) == 0 + assert records + assert records.next_fetch_offset == fetch_offset + 3 + records.drain() + assert not records -def test_partition_records_empty(): - records = Fetcher.PartitionRecords(0, None, []) - assert len(records) == 0 +def test_partition_records_empty(mocker): + tp = TopicPartition('foo', 0) + memory_records = MemoryRecords(_build_record_batch([])) + records = Fetcher.PartitionRecords(0, tp, memory_records) + msgs = records.take() + assert len(msgs) == 0 + assert not records -def test_partition_records_no_fetch_offset(): +def test_partition_records_no_fetch_offset(mocker): batch_start = 0 batch_end = 100 fetch_offset = 123 tp = TopicPartition('foo', 0) - messages = [ConsumerRecord(tp.topic, tp.partition, -1, i, - None, None, 'key', 'value', None, 'checksum', 0, 0, -1) - for i in range(batch_start, batch_end)] - records = Fetcher.PartitionRecords(fetch_offset, None, messages) - assert len(records) == 0 + messages = [(None, b'msg', None) for i in range(batch_start, batch_end)] + memory_records = MemoryRecords(_build_record_batch(messages, offset=batch_start)) + records = Fetcher.PartitionRecords(fetch_offset, tp, memory_records) + msgs = records.take() + assert len(msgs) == 0 + assert not records -def test_partition_records_compacted_offset(): +def test_partition_records_compacted_offset(mocker): """Test that messagesets are handle correctly when the fetch offset points to a message that has been compacted """ @@ -611,10 +618,155 @@ def test_partition_records_compacted_offset(): batch_end = 100 fetch_offset = 42 tp = TopicPartition('foo', 0) - messages = [ConsumerRecord(tp.topic, tp.partition, -1, i, - None, None, 'key', 'value', None, 'checksum', 0, 0, -1) - for i in range(batch_start, batch_end) if i != fetch_offset] - records = Fetcher.PartitionRecords(fetch_offset, None, messages) - assert len(records) == batch_end - fetch_offset - 1 - msgs = records.take(1) + builder = MemoryRecordsBuilder( + magic=2, compression_type=0, batch_size=9999999) + + for i in range(batch_start, batch_end): + if i == fetch_offset: + builder.skip(1) + else: + builder.append(key=None, value=b'msg', timestamp=None, headers=[]) + builder.close() + memory_records = MemoryRecords(builder.buffer()) + records = Fetcher.PartitionRecords(fetch_offset, tp, memory_records) + msgs = records.take() + assert len(msgs) == batch_end - fetch_offset - 1 assert msgs[0].offset == fetch_offset + 1 + + +def test_reset_offsets_paused(subscription_state, client, mocker): + fetcher = Fetcher(client, subscription_state) + tp = TopicPartition('foo', 0) + subscription_state.assign_from_user([tp]) + subscription_state.pause(tp) # paused partition does not have a valid position + subscription_state.request_offset_reset(tp, OffsetResetStrategy.LATEST) + + fetched_offsets = {tp: OffsetAndTimestamp(10, 1, -1)} + mocker.patch.object(fetcher._client, 'ready', return_value=True) + mocker.patch.object(fetcher, '_send_list_offsets_request', + return_value=Future().success((fetched_offsets, set()))) + mocker.patch.object(fetcher._client.cluster, "leader_for_partition", return_value=0) + fetcher.reset_offsets_if_needed() + + assert not subscription_state.is_offset_reset_needed(tp) + assert not subscription_state.is_fetchable(tp) # because tp is paused + assert subscription_state.has_valid_position(tp) + assert subscription_state.position(tp) == OffsetAndMetadata(10, '', -1) + + +def test_reset_offsets_paused_without_valid(subscription_state, client, mocker): + fetcher = Fetcher(client, subscription_state) + tp = TopicPartition('foo', 0) + subscription_state.assign_from_user([tp]) + subscription_state.pause(tp) # paused partition does not have a valid position + subscription_state.reset_missing_positions() + + fetched_offsets = {tp: OffsetAndTimestamp(0, 1, -1)} + mocker.patch.object(fetcher._client, 'ready', return_value=True) + mocker.patch.object(fetcher, '_send_list_offsets_request', + return_value=Future().success((fetched_offsets, set()))) + mocker.patch.object(fetcher._client.cluster, "leader_for_partition", return_value=0) + fetcher.reset_offsets_if_needed() + + assert not subscription_state.is_offset_reset_needed(tp) + assert not subscription_state.is_fetchable(tp) # because tp is paused + assert subscription_state.has_valid_position(tp) + assert subscription_state.position(tp) == OffsetAndMetadata(0, '', -1) + + +def test_reset_offsets_paused_with_valid(subscription_state, client, mocker): + fetcher = Fetcher(client, subscription_state) + tp = TopicPartition('foo', 0) + subscription_state.assign_from_user([tp]) + subscription_state.seek(tp, 0) + subscription_state.assignment[tp].position = OffsetAndMetadata(10, '', -1) + subscription_state.pause(tp) # paused partition already has a valid position + + mocker.patch.object(fetcher, '_fetch_offsets_by_times', return_value={tp: OffsetAndTimestamp(0, 1, -1)}) + fetcher.reset_offsets_if_needed() + + assert not subscription_state.is_offset_reset_needed(tp) + assert not subscription_state.is_fetchable(tp) # because tp is paused + assert subscription_state.has_valid_position(tp) + assert subscription_state.position(tp) == OffsetAndMetadata(10, '', -1) + + +def test_fetch_position_after_exception(client, mocker): + subscription_state = SubscriptionState(offset_reset_strategy='NONE') + fetcher = Fetcher(client, subscription_state) + + tp0 = TopicPartition('foo', 0) + tp1 = TopicPartition('foo', 1) + # verify the advancement in the next fetch offset equals to the number of fetched records when + # some fetched partitions cause Exception. This ensures that consumer won't lose record upon exception + subscription_state.assign_from_user([tp0, tp1]) + subscription_state.seek(tp0, 1) + subscription_state.seek(tp1, 1) + + assert len(fetcher._fetchable_partitions()) == 2 + + empty_records = _build_record_batch([], offset=1) + three_records = _build_record_batch([(None, b'msg', None) for _ in range(3)], offset=1) + fetcher._completed_fetches.append( + CompletedFetch(tp1, 1, 0, [0, 100, three_records], mocker.MagicMock())) + fetcher._completed_fetches.append( + CompletedFetch(tp0, 1, 0, [1, 100, empty_records], mocker.MagicMock())) + records, partial = fetcher.fetched_records() + + assert len(records) == 1 + assert tp1 in records + assert tp0 not in records + assert len(records[tp1]) == 3 + assert subscription_state.position(tp1).offset == 4 + + exceptions = [] + try: + records, partial = fetcher.fetched_records() + except Errors.OffsetOutOfRangeError as e: + exceptions.append(e) + + assert len(exceptions) == 1 + assert isinstance(exceptions[0], Errors.OffsetOutOfRangeError) + assert exceptions[0].args == ({tp0: 1},) + + +def test_seek_before_exception(client, mocker): + subscription_state = SubscriptionState(offset_reset_strategy='NONE') + fetcher = Fetcher(client, subscription_state, max_poll_records=2) + + tp0 = TopicPartition('foo', 0) + tp1 = TopicPartition('foo', 1) + subscription_state.assign_from_user([tp0]) + subscription_state.seek(tp0, 1) + + assert len(fetcher._fetchable_partitions()) == 1 + + three_records = _build_record_batch([(None, b'msg', None) for _ in range(3)], offset=1) + fetcher._completed_fetches.append( + CompletedFetch(tp0, 1, 0, [0, 100, three_records], mocker.MagicMock())) + records, partial = fetcher.fetched_records() + + assert len(records) == 1 + assert tp0 in records + assert len(records[tp0]) == 2 + assert subscription_state.position(tp0).offset == 3 + + subscription_state.assign_from_user([tp0, tp1]) + subscription_state.seek(tp1, 1) + + assert len(fetcher._fetchable_partitions()) == 1 + + empty_records = _build_record_batch([], offset=1) + fetcher._completed_fetches.append( + CompletedFetch(tp1, 1, 0, [1, 100, empty_records], mocker.MagicMock())) + records, partial = fetcher.fetched_records() + + assert len(records) == 1 + assert tp0 in records + assert len(records[tp0]) == 1 + assert subscription_state.position(tp0).offset == 4 + + subscription_state.seek(tp1, 10) + # Should not throw OffsetOutOfRangeError after the seek + records, partial = fetcher.fetched_records() + assert len(records) == 0 diff --git a/test/test_metrics.py b/test/test_metrics.py index 308ea5831..07c0e838a 100644 --- a/test/test_metrics.py +++ b/test/test_metrics.py @@ -19,23 +19,6 @@ def time_keeper(): return TimeKeeper() -@pytest.fixture -def config(): - return MetricConfig() - - -@pytest.fixture -def reporter(): - return DictReporter() - - -@pytest.fixture -def metrics(request, config, reporter): - metrics = Metrics(config, [reporter], enable_expiration=True) - yield metrics - metrics.close() - - def test_MetricName(): # The Java test only cover the differences between the deprecated # constructors, so I'm skipping them but doing some other basic testing. @@ -82,8 +65,9 @@ def test_MetricName(): assert name.tags == tags -def test_simple_stats(mocker, time_keeper, config, metrics): +def test_simple_stats(mocker, time_keeper, metrics): mocker.patch('time.time', side_effect=time_keeper.time) + config = metrics._config measurable = ConstantMeasurable() diff --git a/test/test_object_conversion.py b/test/test_object_conversion.py index 9b1ff2131..a48eb0601 100644 --- a/test/test_object_conversion.py +++ b/test/test_object_conversion.py @@ -207,7 +207,7 @@ def test_with_metadata_response(): assert len(obj['topics']) == 2 assert obj['topics'][0]['error_code'] == 0 assert obj['topics'][0]['topic'] == 'testtopic1' - assert obj['topics'][0]['is_internal'] == False + assert obj['topics'][0]['is_internal'] is False assert len(obj['topics'][0]['partitions']) == 2 assert obj['topics'][0]['partitions'][0]['error_code'] == 0 assert obj['topics'][0]['partitions'][0]['partition'] == 0 @@ -224,7 +224,7 @@ def test_with_metadata_response(): assert obj['topics'][1]['error_code'] == 0 assert obj['topics'][1]['topic'] == 'other-test-topic' - assert obj['topics'][1]['is_internal'] == True + assert obj['topics'][1]['is_internal'] is True assert len(obj['topics'][1]['partitions']) == 1 assert obj['topics'][1]['partitions'][0]['error_code'] == 0 assert obj['topics'][1]['partitions'][0]['partition'] == 0 diff --git a/test/test_producer.py b/test/test_producer.py index 7263130d1..e79c682a7 100644 --- a/test/test_producer.py +++ b/test/test_producer.py @@ -1,137 +1,35 @@ +from __future__ import absolute_import + import gc import platform -import time import threading import pytest -from kafka import KafkaConsumer, KafkaProducer, TopicPartition -from kafka.producer.buffer import SimpleBufferPool -from test.testutil import env_kafka_version, random_string - - -def test_buffer_pool(): - pool = SimpleBufferPool(1000, 1000) - - buf1 = pool.allocate(1000, 1000) - message = ''.join(map(str, range(100))) - buf1.write(message.encode('utf-8')) - pool.deallocate(buf1) - - buf2 = pool.allocate(1000, 1000) - assert buf2.read() == b'' - - -@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") -@pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4', 'zstd']) -def test_end_to_end(kafka_broker, compression): - if compression == 'lz4': - if env_kafka_version() < (0, 8, 2): - pytest.skip('LZ4 requires 0.8.2') - elif platform.python_implementation() == 'PyPy': - pytest.skip('python-lz4 crashes on older versions of pypy') - - if compression == 'zstd' and env_kafka_version() < (2, 1, 0): - pytest.skip('zstd requires kafka 2.1.0 or newer') - - connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) - producer = KafkaProducer(bootstrap_servers=connect_str, - retries=5, - max_block_ms=30000, - compression_type=compression, - value_serializer=str.encode) - consumer = KafkaConsumer(bootstrap_servers=connect_str, - group_id=None, - consumer_timeout_ms=30000, - auto_offset_reset='earliest', - value_deserializer=bytes.decode) - - topic = random_string(5) +from kafka import KafkaProducer +from kafka.cluster import ClusterMetadata +from kafka.producer.transaction_manager import TransactionManager, ProducerIdAndEpoch - messages = 100 - futures = [] - for i in range(messages): - futures.append(producer.send(topic, 'msg %d' % i)) - ret = [f.get(timeout=30) for f in futures] - assert len(ret) == messages - producer.close() - - consumer.subscribe([topic]) - msgs = set() - for i in range(messages): - try: - msgs.add(next(consumer).value) - except StopIteration: - break - - assert msgs == set(['msg %d' % (i,) for i in range(messages)]) - consumer.close() - -@pytest.mark.skipif(platform.python_implementation() != 'CPython', - reason='Test relies on CPython-specific gc policies') -def test_kafka_producer_gc_cleanup(): - gc.collect() +def test_kafka_producer_thread_close(): threads = threading.active_count() - producer = KafkaProducer(api_version='0.9') # set api_version explicitly to avoid auto-detection + producer = KafkaProducer(api_version=(2, 1)) # set api_version explicitly to avoid auto-detection assert threading.active_count() == threads + 1 - del(producer) - gc.collect() + producer.close() assert threading.active_count() == threads -@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") -@pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4', 'zstd']) -def test_kafka_producer_proper_record_metadata(kafka_broker, compression): - if compression == 'zstd' and env_kafka_version() < (2, 1, 0): - pytest.skip('zstd requires 2.1.0 or more') - connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) - producer = KafkaProducer(bootstrap_servers=connect_str, - retries=5, - max_block_ms=30000, - compression_type=compression) - magic = producer._max_usable_produce_magic() - - # record headers are supported in 0.11.0 - if env_kafka_version() < (0, 11, 0): - headers = None - else: - headers = [("Header Key", b"Header Value")] - - topic = random_string(5) - future = producer.send( - topic, - value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, - partition=0) - record = future.get(timeout=5) - assert record is not None - assert record.topic == topic - assert record.partition == 0 - assert record.topic_partition == TopicPartition(topic, 0) - assert record.offset == 0 - if magic >= 1: - assert record.timestamp == 9999999 - else: - assert record.timestamp == -1 # NO_TIMESTAMP - - if magic >= 2: - assert record.checksum is None - elif magic == 1: - assert record.checksum == 1370034956 - else: - assert record.checksum == 3296137851 - - assert record.serialized_key_size == 10 - assert record.serialized_value_size == 12 - if headers: - assert record.serialized_header_size == 22 - - if magic == 0: - pytest.skip('generated timestamp case is skipped for broker 0.9 and below') - send_time = time.time() * 1000 - future = producer.send( - topic, - value=b"Simple value", key=b"Simple key", timestamp_ms=None, - partition=0) - record = future.get(timeout=5) - assert abs(record.timestamp - send_time) <= 1000 # Allow 1s deviation +def test_idempotent_producer_reset_producer_id(): + transaction_manager = TransactionManager( + transactional_id=None, + transaction_timeout_ms=1000, + retry_backoff_ms=100, + api_version=(0, 11), + metadata=ClusterMetadata(), + ) + + test_producer_id_and_epoch = ProducerIdAndEpoch(123, 456) + transaction_manager.set_producer_id_and_epoch(test_producer_id_and_epoch) + assert transaction_manager.producer_id_and_epoch == test_producer_id_and_epoch + transaction_manager.reset_producer_id() + assert transaction_manager.producer_id_and_epoch == ProducerIdAndEpoch(-1, -1) diff --git a/test/test_protocol.py b/test/test_protocol.py index 6f94c74e1..d0cc7ed0a 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -2,8 +2,6 @@ import io import struct -import pytest - from kafka.protocol.api import RequestHeader from kafka.protocol.fetch import FetchRequest, FetchResponse from kafka.protocol.find_coordinator import FindCoordinatorRequest @@ -273,7 +271,7 @@ def test_decode_fetch_response_partial(): def test_struct_unrecognized_kwargs(): try: - mr = MetadataRequest[0](topicz='foo') + _mr = MetadataRequest[0](topicz='foo') assert False, 'Structs should not allow unrecognized kwargs' except ValueError: pass @@ -331,6 +329,6 @@ def test_compact_data_structs(): assert CompactBytes.decode(io.BytesIO(b'\x00')) is None enc = CompactBytes.encode(b'') assert enc == b'\x01' - assert CompactBytes.decode(io.BytesIO(b'\x01')) is b'' + assert CompactBytes.decode(io.BytesIO(b'\x01')) == b'' enc = CompactBytes.encode(b'foo') assert CompactBytes.decode(io.BytesIO(enc)) == b'foo' diff --git a/test/test_record_accumulator.py b/test/test_record_accumulator.py new file mode 100644 index 000000000..5c7134e5c --- /dev/null +++ b/test/test_record_accumulator.py @@ -0,0 +1,266 @@ +# pylint: skip-file +from __future__ import absolute_import, division + +import pytest + +from kafka.cluster import ClusterMetadata +from kafka.errors import IllegalStateError, KafkaError +from kafka.producer.future import FutureRecordMetadata, RecordMetadata +from kafka.producer.record_accumulator import RecordAccumulator, ProducerBatch +from kafka.record.default_records import DefaultRecordBatchBuilder +from kafka.record.memory_records import MemoryRecordsBuilder +from kafka.structs import TopicPartition + + +@pytest.fixture +def tp(): + return TopicPartition('foo', 0) + +@pytest.fixture +def cluster(tp, mocker): + metadata = ClusterMetadata() + mocker.patch.object(metadata, 'leader_for_partition', return_value=0) + mocker.patch.object(metadata, 'partitions_for_broker', return_value=[tp]) + return metadata + +def test_producer_batch_producer_id(): + tp = TopicPartition('foo', 0) + records = MemoryRecordsBuilder( + magic=2, compression_type=0, batch_size=100000) + batch = ProducerBatch(tp, records) + assert batch.producer_id == -1 + batch.records.set_producer_state(123, 456, 789, False) + assert batch.producer_id == 123 + records.close() + assert batch.producer_id == 123 + +@pytest.mark.parametrize("magic", [0, 1, 2]) +def test_producer_batch_try_append(magic): + tp = TopicPartition('foo', 0) + records = MemoryRecordsBuilder( + magic=magic, compression_type=0, batch_size=100000) + batch = ProducerBatch(tp, records) + assert batch.record_count == 0 + future = batch.try_append(0, b'key', b'value', []) + assert isinstance(future, FutureRecordMetadata) + assert not future.is_done + batch.done(base_offset=123, timestamp_ms=456) + assert future.is_done + # record-level checksum only provided in v0/v1 formats; payload includes magic-byte + if magic == 0: + checksum = 592888119 + elif magic == 1: + checksum = 213653215 + else: + checksum = None + + expected_metadata = RecordMetadata( + topic=tp[0], partition=tp[1], topic_partition=tp, + offset=123, timestamp=456, checksum=checksum, + serialized_key_size=3, serialized_value_size=5, serialized_header_size=-1) + assert future.value == expected_metadata + +def test_producer_batch_retry(): + tp = TopicPartition('foo', 0) + records = MemoryRecordsBuilder( + magic=2, compression_type=0, batch_size=100000) + batch = ProducerBatch(tp, records) + assert not batch.in_retry() + batch.retry() + assert batch.in_retry() + +def test_batch_abort(): + tp = TopicPartition('foo', 0) + records = MemoryRecordsBuilder( + magic=2, compression_type=0, batch_size=100000) + batch = ProducerBatch(tp, records) + future = batch.try_append(123, None, b'msg', []) + + batch.abort(KafkaError()) + assert future.is_done + + # subsequent completion should be ignored + batch.done(500, 2342342341) + batch.done(exception=KafkaError()) + + assert future.is_done + with pytest.raises(KafkaError): + future.get() + +def test_batch_cannot_abort_twice(): + tp = TopicPartition('foo', 0) + records = MemoryRecordsBuilder( + magic=2, compression_type=0, batch_size=100000) + batch = ProducerBatch(tp, records) + future = batch.try_append(123, None, b'msg', []) + + batch.abort(KafkaError()) + + with pytest.raises(IllegalStateError): + batch.abort(KafkaError()) + + assert future.is_done + with pytest.raises(KafkaError): + future.get() + +def test_batch_cannot_complete_twice(): + tp = TopicPartition('foo', 0) + records = MemoryRecordsBuilder( + magic=2, compression_type=0, batch_size=100000) + batch = ProducerBatch(tp, records) + future = batch.try_append(123, None, b'msg', []) + + batch.done(500, 10, None) + + with pytest.raises(IllegalStateError): + batch.done(1000, 20, None) + + record_metadata = future.get() + + assert record_metadata.offset == 500 + assert record_metadata.timestamp == 10 + +def test_linger(tp, cluster): + now = 0 + accum = RecordAccumulator(linger_ms=10) + accum.append(tp, 0, b'key', b'value', [], now=now) + ready, next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert len(ready) == 0, 'No partitions should be ready' + assert next_ready_check == .01 # linger_ms in secs + now += .01 + ready, _next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert ready == set([0]), "Our partitions leader should be ready" + batches = accum.drain(cluster, ready, 0, 2147483647)[0] + assert len(batches) == 1 + batch = batches[0] + assert batch.records.is_full() + + parsed = list(batch.records.records()) + assert len(parsed) == 1 + records = list(parsed[0]) + assert len(records) == 1 + assert records[0].key == b'key', 'Keys should match' + assert records[0].value == b'value', 'Values should match' + +def _advance_now_ms(now, ms): + return now + ms / 1000 + 1/10000 # add extra .1 ms to each advance to avoid rounding issues when converting back to seconds + +def _do_expire_batch_single(cluster, tp, delivery_timeout_ms): + now = 0 + linger_ms = 300 + accum = RecordAccumulator(linger_ms=linger_ms, delivery_timeout_ms=delivery_timeout_ms, request_timeout_ms=(delivery_timeout_ms-linger_ms-100)) + + # Make the batches ready due to linger. These batches are not in retry + for mute in [False, True]: + accum.append(tp, 0, b'key', b'value', [], now=now) + ready, next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert len(ready) == 0, 'No partitions should be ready' + assert next_ready_check == linger_ms / 1000 + + now = _advance_now_ms(now, linger_ms) + ready, _next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert ready == set([0]), "Our partitions leader should be ready" + + expired_batches = accum.expired_batches(now=now) + assert len(expired_batches) == 0, "The batch should not expire when just linger has passed" + + if mute: + accum.muted.add(tp) + else: + try: + accum.muted.remove(tp) + except KeyError: + pass + + # Advance the clock to expire the batch. + now = _advance_now_ms(now, delivery_timeout_ms - linger_ms) + expired_batches = accum.expired_batches(now=now) + assert len(expired_batches) == 1, "The batch may expire when the partition is muted" + ready, _next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert len(ready) == 0, "No partitions should be ready." + +def test_expired_batch_single(cluster, tp): + _do_expire_batch_single(cluster, tp, 3200) + +def test_expired_batch_single_max_value(cluster, tp): + _do_expire_batch_single(cluster, tp, 2147483647) + +def _expected_num_appends(batch_size): + size = DefaultRecordBatchBuilder.header_size_in_bytes() + offset_delta = 0 + while True: + record_size = DefaultRecordBatchBuilder.size_in_bytes(offset_delta, 0, b'key', b'value', []) + if size + record_size > batch_size: + return offset_delta + offset_delta += 1 + size += record_size + +def test_expired_batches(cluster, tp): + now = 0 + retry_backoff_ms = 100 + linger_ms = 30 + request_timeout_ms = 60 + delivery_timeout_ms = 3200 + batch_size = 1024 + accum = RecordAccumulator(linger_ms=linger_ms, delivery_timeout_ms=delivery_timeout_ms, request_timeout_ms=request_timeout_ms, retry_backoff_ms=retry_backoff_ms, batch_size=batch_size) + appends = _expected_num_appends(batch_size) + + # Test batches not in retry + for i in range(appends): + accum.append(tp, 0, b'key', b'value', [], now=now) + ready, next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert len(ready) == 0, 'No partitions should be ready' + assert next_ready_check == linger_ms / 1000 + + # Make the batches ready due to batch full + accum.append(tp, 0, b'key', b'value', [], now=now) + ready, _next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert ready == set([0]), "Our partitions leader should be ready" + + # Advance the clock to expire the batch. + now = _advance_now_ms(now, delivery_timeout_ms + 1) + accum.muted.add(tp) + expired_batches = accum.expired_batches(now=now) + assert len(expired_batches) == 2, "The batches will be expired no matter if the partition is muted or not" + + accum.muted.remove(tp) + expired_batches = accum.expired_batches(now=now) + assert len(expired_batches) == 0, "All batches should have been expired earlier" + ready, _next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert len(ready) == 0, "No partitions should be ready." + + # Test batches in retry. + # Create a retried batch + accum.append(tp, 0, b'key', b'value', [], now=now) + now = _advance_now_ms(now, linger_ms) + ready, _next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert ready == set([0]), "Our partitions leader should be ready" + + drained = accum.drain(cluster, ready, 2147483647, now=now) + assert len(drained[0]) == 1, "There should be only one batch." + now = _advance_now_ms(now, 1000) + accum.reenqueue(drained[0][0], now=now) + + # test expiration. + now = _advance_now_ms(now, request_timeout_ms + retry_backoff_ms) + expired_batches = accum.expired_batches(now=now) + assert len(expired_batches) == 0, "The batch should not be expired." + now = _advance_now_ms(now, 1) + + accum.muted.add(tp) + expired_batches = accum.expired_batches(now=now) + assert len(expired_batches) == 0, "The batch should not be expired when the partition is muted" + + accum.muted.remove(tp) + expired_batches = accum.expired_batches(now=now) + assert len(expired_batches) == 0, "The batch should not be expired when the partition is unmuted" + + now = _advance_now_ms(now, linger_ms) + ready, _next_ready_check, _unknown_leaders_exist = accum.ready(cluster, now=now) + assert ready == set([0]), "Our partitions leader should be ready" + + # Advance the clock to expire the batch. + now = _advance_now_ms(now, delivery_timeout_ms + 1) + accum.muted.add(tp) + expired_batches = accum.expired_batches(now=now) + assert len(expired_batches) == 1, "The batch should not be expired when the partition is muted" diff --git a/test/test_sender.py b/test/test_sender.py index 83a26cd39..6d29c1e44 100644 --- a/test/test_sender.py +++ b/test/test_sender.py @@ -1,52 +1,255 @@ # pylint: skip-file from __future__ import absolute_import -import pytest +import collections import io +import time + +import pytest +try: + from unittest.mock import call +except ImportError: + from mock import call + +from kafka.vendor import six from kafka.client_async import KafkaClient from kafka.cluster import ClusterMetadata -from kafka.metrics import Metrics +import kafka.errors as Errors from kafka.protocol.broker_api_versions import BROKER_API_VERSIONS +from kafka.producer.kafka import KafkaProducer from kafka.protocol.produce import ProduceRequest from kafka.producer.record_accumulator import RecordAccumulator, ProducerBatch from kafka.producer.sender import Sender +from kafka.producer.transaction_manager import TransactionManager from kafka.record.memory_records import MemoryRecordsBuilder from kafka.structs import TopicPartition -@pytest.fixture -def client(): - return KafkaClient(bootstrap_servers=(), api_version=(0, 9)) - - @pytest.fixture def accumulator(): return RecordAccumulator() @pytest.fixture -def metrics(): - return Metrics() +def sender(client, accumulator): + return Sender(client, client.cluster, accumulator) + + +def producer_batch(topic='foo', partition=0, magic=2): + tp = TopicPartition(topic, partition) + records = MemoryRecordsBuilder( + magic=magic, compression_type=0, batch_size=100000) + batch = ProducerBatch(tp, records) + batch.try_append(0, None, b'msg', []) + batch.records.close() + return batch @pytest.fixture -def sender(client, accumulator, metrics, mocker): - return Sender(client, client.cluster, accumulator, metrics) +def transaction_manager(): + return TransactionManager( + transactional_id=None, + transaction_timeout_ms=60000, + retry_backoff_ms=100, + api_version=(2, 1), + metadata=ClusterMetadata()) @pytest.mark.parametrize(("api_version", "produce_version"), [ + ((2, 1), 7), ((0, 10, 0), 2), ((0, 9), 1), ((0, 8, 0), 0) ]) -def test_produce_request(sender, mocker, api_version, produce_version): +def test_produce_request(sender, api_version, produce_version): sender._client._api_versions = BROKER_API_VERSIONS[api_version] - tp = TopicPartition('foo', 0) - buffer = io.BytesIO() - records = MemoryRecordsBuilder( - magic=1, compression_type=0, batch_size=100000) - batch = ProducerBatch(tp, records, buffer) - records.close() + magic = KafkaProducer.max_usable_produce_magic(api_version) + batch = producer_batch(magic=magic) produce_request = sender._produce_request(0, 0, 0, [batch]) assert isinstance(produce_request, ProduceRequest[produce_version]) + + +@pytest.mark.parametrize(("api_version", "produce_version"), [ + ((2, 1), 7), +]) +def test_create_produce_requests(sender, api_version, produce_version): + sender._client._api_versions = BROKER_API_VERSIONS[api_version] + tp = TopicPartition('foo', 0) + magic = KafkaProducer.max_usable_produce_magic(api_version) + batches_by_node = collections.defaultdict(list) + for node in range(3): + for _ in range(5): + batches_by_node[node].append(producer_batch(magic=magic)) + produce_requests_by_node = sender._create_produce_requests(batches_by_node) + assert len(produce_requests_by_node) == 3 + for node in range(3): + assert isinstance(produce_requests_by_node[node], ProduceRequest[produce_version]) + + +def test_complete_batch_success(sender): + batch = producer_batch() + assert not batch.produce_future.is_done + + # No error, base_offset 0 + sender._complete_batch(batch, None, 0, timestamp_ms=123) + assert batch.is_done + assert batch.produce_future.is_done + assert batch.produce_future.succeeded() + assert batch.produce_future.value == (0, 123) + + +def test_complete_batch_transaction(sender, transaction_manager): + sender._transaction_manager = transaction_manager + batch = producer_batch() + assert sender._transaction_manager.sequence_number(batch.topic_partition) == 0 + assert sender._transaction_manager.producer_id_and_epoch.producer_id == batch.producer_id + + # No error, base_offset 0 + sender._complete_batch(batch, None, 0) + assert batch.is_done + assert sender._transaction_manager.sequence_number(batch.topic_partition) == batch.record_count + + +@pytest.mark.parametrize(("error", "refresh_metadata"), [ + (Errors.KafkaConnectionError, True), + (Errors.CorruptRecordError, False), + (Errors.UnknownTopicOrPartitionError, True), + (Errors.NotLeaderForPartitionError, True), + (Errors.MessageSizeTooLargeError, False), + (Errors.InvalidTopicError, False), + (Errors.RecordListTooLargeError, False), + (Errors.NotEnoughReplicasError, False), + (Errors.NotEnoughReplicasAfterAppendError, False), + (Errors.InvalidRequiredAcksError, False), + (Errors.TopicAuthorizationFailedError, False), + (Errors.UnsupportedForMessageFormatError, False), + (Errors.InvalidProducerEpochError, False), + (Errors.ClusterAuthorizationFailedError, False), + (Errors.TransactionalIdAuthorizationFailedError, False), +]) +def test_complete_batch_error(sender, error, refresh_metadata): + sender._client.cluster._last_successful_refresh_ms = (time.time() - 10) * 1000 + sender._client.cluster._need_update = False + sender.config['retries'] = 0 + assert sender._client.cluster.ttl() > 0 + batch = producer_batch() + sender._complete_batch(batch, error, -1) + if refresh_metadata: + assert sender._client.cluster.ttl() == 0 + else: + assert sender._client.cluster.ttl() > 0 + assert batch.is_done + assert batch.produce_future.failed() + assert isinstance(batch.produce_future.exception, error) + + +@pytest.mark.parametrize(("error", "retry"), [ + (Errors.KafkaConnectionError, True), + (Errors.CorruptRecordError, False), + (Errors.UnknownTopicOrPartitionError, True), + (Errors.NotLeaderForPartitionError, True), + (Errors.MessageSizeTooLargeError, False), + (Errors.InvalidTopicError, False), + (Errors.RecordListTooLargeError, False), + (Errors.NotEnoughReplicasError, True), + (Errors.NotEnoughReplicasAfterAppendError, True), + (Errors.InvalidRequiredAcksError, False), + (Errors.TopicAuthorizationFailedError, False), + (Errors.UnsupportedForMessageFormatError, False), + (Errors.InvalidProducerEpochError, False), + (Errors.ClusterAuthorizationFailedError, False), + (Errors.TransactionalIdAuthorizationFailedError, False), +]) +def test_complete_batch_retry(sender, accumulator, mocker, error, retry): + sender.config['retries'] = 1 + mocker.spy(sender, '_fail_batch') + mocker.patch.object(accumulator, 'reenqueue') + batch = producer_batch() + sender._complete_batch(batch, error, -1) + if retry: + assert not batch.is_done + accumulator.reenqueue.assert_called_with(batch) + batch.attempts += 1 # normally handled by accumulator.reenqueue, but it's mocked + sender._complete_batch(batch, error, -1) + assert batch.is_done + assert isinstance(batch.produce_future.exception, error) + else: + assert batch.is_done + assert isinstance(batch.produce_future.exception, error) + + +def test_complete_batch_producer_id_changed_no_retry(sender, accumulator, transaction_manager, mocker): + sender._transaction_manager = transaction_manager + sender.config['retries'] = 1 + mocker.spy(sender, '_fail_batch') + mocker.patch.object(accumulator, 'reenqueue') + error = Errors.NotLeaderForPartitionError + batch = producer_batch() + sender._complete_batch(batch, error, -1) + assert not batch.is_done + accumulator.reenqueue.assert_called_with(batch) + batch.records._producer_id = 123 # simulate different producer_id + assert batch.producer_id != sender._transaction_manager.producer_id_and_epoch.producer_id + sender._complete_batch(batch, error, -1) + assert batch.is_done + assert isinstance(batch.produce_future.exception, error) + + +def test_fail_batch(sender, accumulator, transaction_manager, mocker): + sender._transaction_manager = transaction_manager + batch = producer_batch() + mocker.patch.object(batch, 'done') + assert sender._transaction_manager.producer_id_and_epoch.producer_id == batch.producer_id + error = Exception('error') + sender._fail_batch(batch, base_offset=0, timestamp_ms=None, exception=error) + batch.done.assert_called_with(base_offset=0, timestamp_ms=None, exception=error) + + +def test_out_of_order_sequence_number_reset_producer_id(sender, accumulator, transaction_manager, mocker): + sender._transaction_manager = transaction_manager + assert transaction_manager.transactional_id is None # this test is for idempotent producer only + mocker.patch.object(TransactionManager, 'reset_producer_id') + batch = producer_batch() + mocker.patch.object(batch, 'done') + assert sender._transaction_manager.producer_id_and_epoch.producer_id == batch.producer_id + error = Errors.OutOfOrderSequenceNumberError() + sender._fail_batch(batch, base_offset=0, timestamp_ms=None, exception=error) + sender._transaction_manager.reset_producer_id.assert_called_once() + batch.done.assert_called_with(base_offset=0, timestamp_ms=None, exception=error) + + +def test_handle_produce_response(): + pass + + +def test_failed_produce(sender, mocker): + mocker.patch.object(sender, '_complete_batch') + mock_batches = ['foo', 'bar', 'fizzbuzz'] + sender._failed_produce(mock_batches, 0, 'error') + sender._complete_batch.assert_has_calls([ + call('foo', 'error', -1), + call('bar', 'error', -1), + call('fizzbuzz', 'error', -1), + ]) + + +def test_maybe_wait_for_producer_id(): + pass + + +def test_run_once(): + pass + + +def test__send_producer_data_expiry_time_reset(sender, accumulator, mocker): + now = time.time() + tp = TopicPartition('foo', 0) + mocker.patch.object(sender, '_failed_produce') + result = accumulator.append(tp, 0, b'key', b'value', [], now=now) + poll_timeout_ms = sender._send_producer_data(now=now) + assert poll_timeout_ms == accumulator.config['delivery_timeout_ms'] + sender._failed_produce.assert_not_called() + now += accumulator.config['delivery_timeout_ms'] + poll_timeout_ms = sender._send_producer_data(now=now) + assert poll_timeout_ms > 0 diff --git a/test/test_subscription_state.py b/test/test_subscription_state.py index 9718f6af4..773606525 100644 --- a/test/test_subscription_state.py +++ b/test/test_subscription_state.py @@ -1,25 +1,57 @@ -# pylint: skip-file from __future__ import absolute_import import pytest -from kafka.consumer.subscription_state import SubscriptionState - -@pytest.mark.parametrize(('topic_name', 'expectation'), [ - (0, pytest.raises(TypeError)), - (None, pytest.raises(TypeError)), - ('', pytest.raises(ValueError)), - ('.', pytest.raises(ValueError)), - ('..', pytest.raises(ValueError)), - ('a' * 250, pytest.raises(ValueError)), - ('abc/123', pytest.raises(ValueError)), - ('/abc/123', pytest.raises(ValueError)), - ('/abc123', pytest.raises(ValueError)), - ('name with space', pytest.raises(ValueError)), - ('name*with*stars', pytest.raises(ValueError)), - ('name+with+plus', pytest.raises(ValueError)), -]) -def test_topic_name_validation(topic_name, expectation): - state = SubscriptionState() - with expectation: - state._ensure_valid_topic_name(topic_name) +from kafka import TopicPartition +from kafka.consumer.subscription_state import SubscriptionState, TopicPartitionState +from kafka.vendor import six + + +def test_type_error(): + s = SubscriptionState() + with pytest.raises(TypeError): + s.subscribe(topics='foo') + + s.subscribe(topics=['foo']) + + +def test_change_subscription(): + s = SubscriptionState() + s.subscribe(topics=['foo']) + assert s.subscription == set(['foo']) + s.change_subscription(['bar']) + assert s.subscription == set(['bar']) + + +def test_group_subscribe(): + s = SubscriptionState() + s.subscribe(topics=['foo']) + assert s.subscription == set(['foo']) + s.group_subscribe(['bar']) + assert s.subscription == set(['foo']) + assert s._group_subscription == set(['foo', 'bar']) + + s.reset_group_subscription() + assert s.subscription == set(['foo']) + assert s._group_subscription == set(['foo']) + + +def test_assign_from_subscribed(): + s = SubscriptionState() + s.subscribe(topics=['foo']) + with pytest.raises(ValueError): + s.assign_from_subscribed([TopicPartition('bar', 0)]) + + s.assign_from_subscribed([TopicPartition('foo', 0), TopicPartition('foo', 1)]) + assert set(s.assignment.keys()) == set([TopicPartition('foo', 0), TopicPartition('foo', 1)]) + assert all([isinstance(tps, TopicPartitionState) for tps in six.itervalues(s.assignment)]) + assert all([not tps.has_valid_position for tps in six.itervalues(s.assignment)]) + + +def test_change_subscription_after_assignment(): + s = SubscriptionState() + s.subscribe(topics=['foo']) + s.assign_from_subscribed([TopicPartition('foo', 0), TopicPartition('foo', 1)]) + # Changing subscription retains existing assignment until next rebalance + s.change_subscription(['bar']) + assert set(s.assignment.keys()) == set([TopicPartition('foo', 0), TopicPartition('foo', 1)]) diff --git a/test/test_util.py b/test/test_util.py new file mode 100644 index 000000000..875b252aa --- /dev/null +++ b/test/test_util.py @@ -0,0 +1,24 @@ +# pylint: skip-file +from __future__ import absolute_import + +import pytest + +from kafka.util import ensure_valid_topic_name + +@pytest.mark.parametrize(('topic_name', 'expectation'), [ + (0, pytest.raises(TypeError)), + (None, pytest.raises(TypeError)), + ('', pytest.raises(ValueError)), + ('.', pytest.raises(ValueError)), + ('..', pytest.raises(ValueError)), + ('a' * 250, pytest.raises(ValueError)), + ('abc/123', pytest.raises(ValueError)), + ('/abc/123', pytest.raises(ValueError)), + ('/abc123', pytest.raises(ValueError)), + ('name with space', pytest.raises(ValueError)), + ('name*with*stars', pytest.raises(ValueError)), + ('name+with+plus', pytest.raises(ValueError)), +]) +def test_topic_name_validation(topic_name, expectation): + with expectation: + ensure_valid_topic_name(topic_name) diff --git a/test/testutil.py b/test/testutil.py index dd4e267a8..b5dab1c02 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -6,6 +6,10 @@ import string import time +import pytest + +import kafka.codec + def special_to_underscore(string, _matcher=re.compile(r'[^a-zA-Z0-9_]+')): return _matcher.sub('_', string) @@ -36,6 +40,18 @@ def assert_message_count(messages, num_messages): assert len(unique_messages) == num_messages, 'Expected %d unique messages, got %d' % (num_messages, len(unique_messages)) +def maybe_skip_unsupported_compression(compression_type): + codecs = {1: 'gzip', 2: 'snappy', 3: 'lz4', 4: 'zstd'} + if not compression_type: + return + elif compression_type in codecs: + compression_type = codecs[compression_type] + + checker = getattr(kafka.codec, 'has_' + compression_type, None) + if checker and not checker(): + pytest.skip("Compression libraries not installed for %s" % (compression_type,)) + + class Timer(object): def __enter__(self): self.start = time.time() diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 71e443dec..000000000 --- a/tox.ini +++ /dev/null @@ -1,49 +0,0 @@ -[tox] -envlist = py{38,39,310,311,312,py}, docs - -[pytest] -testpaths = kafka test -addopts = --durations=10 -log_format = %(created)f %(filename)-23s %(threadName)s %(message)s - -[gh-actions] -python = - 3.8: py38 - 3.9: py39 - 3.10: py310 - 3.11: py311 - 3.12: py312 - pypy-3.9: pypy - -[testenv] -deps = - pytest - pytest-cov - pylint - pytest-pylint - pytest-mock - mock - python-snappy - zstandard - lz4 - xxhash - crc32c -commands = - pytest {posargs:--cov=kafka --cov-config=.covrc} -setenv = - CRC32C_SW_MODE = auto - PROJECT_ROOT = {toxinidir} -passenv = KAFKA_* - -[testenv:pypy] -# pylint is super slow on pypy... -commands = pytest {posargs:--cov=kafka --cov-config=.covrc} - -[testenv:docs] -deps = - sphinx_rtd_theme - sphinx - -commands = - sphinx-apidoc -o docs/apidoc/ kafka/ - sphinx-build -b html docs/ docs/_build