From bc4cc434cddf403a35d0393d68ecfdbfad17c8e5 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Mon, 28 May 2018 15:58:26 -0700 Subject: [PATCH 01/28] Don't use `kafka.common` internally This finishes the split from `kafka.common` to `kafka.errors`/`kafka.structs`. --- kafka/__init__.py | 2 +- kafka/consumer/multiprocess.py | 4 ++-- kafka/consumer/simple.py | 6 +++--- kafka/coordinator/assignors/roundrobin.py | 2 +- kafka/coordinator/consumer.py | 2 +- kafka/producer/base.py | 4 ++-- kafka/producer/kafka.py | 8 ++++---- kafka/producer/record_accumulator.py | 4 ++-- kafka/protocol/legacy.py | 5 ++--- kafka/structs.py | 4 ---- test/test_client_async.py | 3 +-- test/test_conn.py | 2 +- test/test_coordinator.py | 6 +++--- test/test_fetcher.py | 4 ++-- test/test_util.py | 2 +- test/testutil.py | 10 ++++++---- 16 files changed, 32 insertions(+), 36 deletions(-) diff --git a/kafka/__init__.py b/kafka/__init__.py index f108eff1c..ff364d345 100644 --- a/kafka/__init__.py +++ b/kafka/__init__.py @@ -25,8 +25,8 @@ def emit(self, record): from kafka.protocol import ( create_message, create_gzip_message, create_snappy_message) from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner -from kafka.structs import TopicPartition, OffsetAndMetadata from kafka.serializer import Serializer, Deserializer +from kafka.structs import TopicPartition, OffsetAndMetadata # To be deprecated when KafkaProducer interface is released from kafka.client import SimpleClient diff --git a/kafka/consumer/multiprocess.py b/kafka/consumer/multiprocess.py index 1da4a3353..758bb92f8 100644 --- a/kafka/consumer/multiprocess.py +++ b/kafka/consumer/multiprocess.py @@ -8,7 +8,7 @@ from kafka.vendor.six.moves import queue # pylint: disable=import-error -from kafka.common import KafkaError +from kafka.errors import KafkaError from kafka.consumer.base import ( Consumer, AUTO_COMMIT_MSG_COUNT, AUTO_COMMIT_INTERVAL, @@ -92,7 +92,7 @@ def _mp_consume(client, group, topic, message_queue, size, events, **consumer_op except KafkaError as e: # Retry with exponential backoff - log.error("Problem communicating with Kafka (%s), retrying in %d seconds..." % (e, interval)) + log.exception("Problem communicating with Kafka, retrying in %d seconds...", interval) time.sleep(interval) interval = interval*2 if interval*2 < MAX_BACKOFF_SECONDS else MAX_BACKOFF_SECONDS diff --git a/kafka/consumer/simple.py b/kafka/consumer/simple.py index c0c1b1ed3..b60a5865b 100644 --- a/kafka/consumer/simple.py +++ b/kafka/consumer/simple.py @@ -24,13 +24,13 @@ ITER_TIMEOUT_SECONDS, NO_MESSAGES_WAIT_TIME_SECONDS ) -from kafka.common import ( - FetchRequestPayload, KafkaError, OffsetRequestPayload, - ConsumerFetchSizeTooSmall, +from kafka.errors import ( + KafkaError, ConsumerFetchSizeTooSmall, UnknownTopicOrPartitionError, NotLeaderForPartitionError, OffsetOutOfRangeError, FailedPayloadsError, check_error ) from kafka.protocol.message import PartialMessage +from kafka.structs import FetchRequestPayload, OffsetRequestPayload log = logging.getLogger(__name__) diff --git a/kafka/coordinator/assignors/roundrobin.py b/kafka/coordinator/assignors/roundrobin.py index a8310338c..2d24a5c8b 100644 --- a/kafka/coordinator/assignors/roundrobin.py +++ b/kafka/coordinator/assignors/roundrobin.py @@ -7,8 +7,8 @@ from kafka.vendor import six from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor -from kafka.common import TopicPartition from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment +from kafka.structs import TopicPartition log = logging.getLogger(__name__) diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index cb1de0d2e..f90d1821d 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -11,7 +11,7 @@ from kafka.coordinator.assignors.range import RangePartitionAssignor from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor from kafka.coordinator.protocol import ConsumerProtocol -from kafka import errors as Errors +import kafka.errors as Errors from kafka.future import Future from kafka.metrics import AnonMeasurable from kafka.metrics.stats import Avg, Count, Max, Rate diff --git a/kafka/producer/base.py b/kafka/producer/base.py index c9dd6c3a1..956cef6c5 100644 --- a/kafka/producer/base.py +++ b/kafka/producer/base.py @@ -14,13 +14,13 @@ from kafka.vendor import six -from kafka.structs import ( - ProduceRequestPayload, ProduceResponsePayload, TopicPartition, RetryOptions) from kafka.errors import ( kafka_errors, UnsupportedCodecError, FailedPayloadsError, RequestTimedOutError, AsyncProducerQueueFull, UnknownError, RETRY_ERROR_TYPES, RETRY_BACKOFF_ERROR_TYPES, RETRY_REFRESH_ERROR_TYPES) from kafka.protocol import CODEC_NONE, ALL_CODECS, create_message_set +from kafka.structs import ( + ProduceRequestPayload, ProduceResponsePayload, TopicPartition, RetryOptions) log = logging.getLogger('kafka.producer') diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index f285ab474..7d52bdfa7 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -10,18 +10,18 @@ from kafka.vendor import six -from kafka import errors as Errors +import kafka.errors as Errors from kafka.client_async import KafkaClient, selectors from kafka.codec import has_gzip, has_snappy, has_lz4 from kafka.metrics import MetricConfig, Metrics from kafka.partitioner.default import DefaultPartitioner +from kafka.producer.future import FutureRecordMetadata, FutureProduceResult +from kafka.producer.record_accumulator import AtomicInteger, RecordAccumulator +from kafka.producer.sender import Sender from kafka.record.default_records import DefaultRecordBatchBuilder from kafka.record.legacy_records import LegacyRecordBatchBuilder from kafka.serializer import Serializer from kafka.structs import TopicPartition -from kafka.producer.future import FutureRecordMetadata, FutureProduceResult -from kafka.producer.record_accumulator import AtomicInteger, RecordAccumulator -from kafka.producer.sender import Sender log = logging.getLogger(__name__) diff --git a/kafka/producer/record_accumulator.py b/kafka/producer/record_accumulator.py index 61f1e0e2a..1cd541356 100644 --- a/kafka/producer/record_accumulator.py +++ b/kafka/producer/record_accumulator.py @@ -6,12 +6,12 @@ import threading import time -from kafka import errors as Errors +import kafka.errors as Errors from kafka.producer.buffer import SimpleBufferPool from kafka.producer.future import FutureRecordMetadata, FutureProduceResult -from kafka.structs import TopicPartition from kafka.record.memory_records import MemoryRecordsBuilder from kafka.record.legacy_records import LegacyRecordBatchBuilder +from kafka.structs import TopicPartition log = logging.getLogger(__name__) diff --git a/kafka/protocol/legacy.py b/kafka/protocol/legacy.py index b8f84e717..7dd258032 100644 --- a/kafka/protocol/legacy.py +++ b/kafka/protocol/legacy.py @@ -15,7 +15,6 @@ from kafka.codec import gzip_encode, snappy_encode from kafka.errors import ProtocolError, UnsupportedCodecError -from kafka.structs import ConsumerMetadataResponse from kafka.util import ( crc32, read_short_string, relative_unpack, write_int_string, group_by_topic_and_partition) @@ -322,7 +321,7 @@ def encode_consumer_metadata_request(cls, client_id, correlation_id, payloads): @classmethod def decode_consumer_metadata_response(cls, data): """ - Decode bytes to a ConsumerMetadataResponse + Decode bytes to a kafka.structs.ConsumerMetadataResponse Arguments: data: bytes to decode @@ -331,7 +330,7 @@ def decode_consumer_metadata_response(cls, data): (host, cur) = read_short_string(data, cur) ((port,), cur) = relative_unpack('>i', data, cur) - return ConsumerMetadataResponse(error, nodeId, host, port) + return kafka.structs.ConsumerMetadataResponse(error, nodeId, host, port) @classmethod def encode_offset_commit_request(cls, group, payloads): diff --git a/kafka/structs.py b/kafka/structs.py index 62f36dd4c..e15e92ed6 100644 --- a/kafka/structs.py +++ b/kafka/structs.py @@ -93,7 +93,3 @@ # Limit value: int >= 0, 0 means no retries RetryOptions = namedtuple("RetryOptions", ["limit", "backoff_ms", "retry_on_timeouts"]) - - -# Support legacy imports from kafka.common -from kafka.errors import * diff --git a/test/test_client_async.py b/test/test_client_async.py index eccb56421..09781ac2c 100644 --- a/test/test_client_async.py +++ b/test/test_client_async.py @@ -13,14 +13,13 @@ import pytest from kafka.client_async import KafkaClient, IdleConnectionManager +from kafka.cluster import ClusterMetadata from kafka.conn import ConnectionStates import kafka.errors as Errors from kafka.future import Future from kafka.protocol.metadata import MetadataResponse, MetadataRequest from kafka.protocol.produce import ProduceRequest from kafka.structs import BrokerMetadata -from kafka.cluster import ClusterMetadata -from kafka.future import Future @pytest.fixture diff --git a/test/test_conn.py b/test/test_conn.py index fbdeeb9e7..27d77beb3 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -13,7 +13,7 @@ from kafka.protocol.metadata import MetadataRequest from kafka.protocol.produce import ProduceRequest -import kafka.common as Errors +import kafka.errors as Errors @pytest.fixture diff --git a/test/test_coordinator.py b/test/test_coordinator.py index 7a2627ea0..4afdcd9ac 100644 --- a/test/test_coordinator.py +++ b/test/test_coordinator.py @@ -5,7 +5,6 @@ import pytest from kafka.client_async import KafkaClient -from kafka.structs import TopicPartition, OffsetAndMetadata from kafka.consumer.subscription_state import ( SubscriptionState, ConsumerRebalanceListener) from kafka.coordinator.assignors.range import RangePartitionAssignor @@ -21,6 +20,7 @@ OffsetCommitRequest, OffsetCommitResponse, OffsetFetchRequest, OffsetFetchResponse) from kafka.protocol.metadata import MetadataResponse +from kafka.structs import TopicPartition, OffsetAndMetadata from kafka.util import WeakMethod @@ -34,7 +34,7 @@ def coordinator(client): def test_init(client, coordinator): - # metadata update on init + # metadata update on init assert client.cluster._need_update is True assert WeakMethod(coordinator._handle_metadata_update) in client.cluster._listeners @@ -542,7 +542,7 @@ def test_send_offset_fetch_request_success(patched_coord, partitions): response = OffsetFetchResponse[0]([('foobar', [(0, 123, b'', 0), (1, 234, b'', 0)])]) _f.success(response) patched_coord._handle_offset_fetch_response.assert_called_with( - future, response) + future, response) @pytest.mark.parametrize('response,error,dead', [ diff --git a/test/test_fetcher.py b/test/test_fetcher.py index fc031f742..c82101818 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -12,16 +12,16 @@ CompletedFetch, ConsumerRecord, Fetcher, NoOffsetForPartitionError ) from kafka.consumer.subscription_state import SubscriptionState +from kafka.future import Future from kafka.metrics import Metrics from kafka.protocol.fetch import FetchRequest, FetchResponse from kafka.protocol.offset import OffsetResponse -from kafka.structs import TopicPartition -from kafka.future import Future from kafka.errors import ( StaleMetadata, LeaderNotAvailableError, NotLeaderForPartitionError, UnknownTopicOrPartitionError, OffsetOutOfRangeError ) from kafka.record.memory_records import MemoryRecordsBuilder, MemoryRecords +from kafka.structs import TopicPartition @pytest.fixture diff --git a/test/test_util.py b/test/test_util.py index 58e5ab840..fb592e8e6 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -5,8 +5,8 @@ from . import unittest import kafka.errors -import kafka.util import kafka.structs +import kafka.util class UtilTest(unittest.TestCase): diff --git a/test/testutil.py b/test/testutil.py index 365e47f3b..a1383a0a0 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -11,10 +11,12 @@ from . import unittest from kafka import SimpleClient, create_message -from kafka.errors import LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError -from kafka.structs import OffsetRequestPayload, ProduceRequestPayload, \ - NotLeaderForPartitionError, UnknownTopicOrPartitionError, \ - FailedPayloadsError +from kafka.errors import ( + LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError, + NotLeaderForPartitionError, UnknownTopicOrPartitionError, + FailedPayloadsError +) +from kafka.structs import OffsetRequestPayload, ProduceRequestPayload from test.fixtures import random_string, version_str_to_list, version as kafka_version #pylint: disable=wrong-import-order def kafka_versions(*versions): From 9ac3cb1ec220ff9968a8b003b02e98dd11cc486b Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Tue, 26 Jun 2018 12:47:12 -0700 Subject: [PATCH 02/28] Document connections_max_idle_ms This was added in #1068 but never documented. Fix #1497 --- kafka/client_async.py | 5 +++++ kafka/consumer/group.py | 5 +++++ kafka/producer/kafka.py | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/kafka/client_async.py b/kafka/client_async.py index a9704fafd..5a16f6bba 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -79,6 +79,11 @@ class KafkaClient(object): the computed value. Default: 1000. request_timeout_ms (int): Client request timeout in milliseconds. Default: 30000. + connections_max_idle_ms: Close idle connections after the number of + milliseconds specified by this config. The broker closes idle + connections after connections.max.idle.ms, so this avoids hitting + unexpected socket disconnected errors on the client. + Default: 540000 retry_backoff_ms (int): Milliseconds to backoff when retrying on errors. Default: 100. max_in_flight_requests_per_connection (int): Requests are pipelined diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 9abf15e9b..1c3ec6385 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -212,6 +212,11 @@ class KafkaConsumer(six.Iterator): api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version set to 'auto' + connections_max_idle_ms: Close idle connections after the number of + milliseconds specified by this config. The broker closes idle + connections after connections.max.idle.ms, so this avoids hitting + unexpected socket disconnected errors on the client. + Default: 540000 metric_reporters (list): A list of classes to use as metrics reporters. Implementing the AbstractMetricsReporter interface allows plugging in classes that will be notified of new metric creation. Default: [] diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 7d52bdfa7..719acef59 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -171,6 +171,11 @@ class KafkaProducer(object): will block up to max_block_ms, raising an exception on timeout. In the current implementation, this setting is an approximation. Default: 33554432 (32MB) + connections_max_idle_ms: Close idle connections after the number of + milliseconds specified by this config. The broker closes idle + connections after connections.max.idle.ms, so this avoids hitting + unexpected socket disconnected errors on the client. + Default: 540000 max_block_ms (int): Number of milliseconds to block during :meth:`~kafka.KafkaProducer.send` and :meth:`~kafka.KafkaProducer.partitions_for`. These methods can be From a7d3063d5fa1c3cb2a76c16231bb3028a6f8cde9 Mon Sep 17 00:00:00 2001 From: Ning Xie Date: Fri, 31 Aug 2018 21:01:46 +0800 Subject: [PATCH 03/28] add support for smaller topic metadata fetch during bootstrap (#1541) --- kafka/client_async.py | 13 ++++++++++--- kafka/conn.py | 6 +++--- kafka/producer/kafka.py | 1 + 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index 5a16f6bba..c0072aeda 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -149,6 +149,7 @@ class KafkaClient(object): DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', + 'bootstrap_topics_filter': set(), 'client_id': 'kafka-python-' + __version__, 'request_timeout_ms': 30000, 'connections_max_idle_ms': 9 * 60 * 1000, @@ -236,9 +237,15 @@ def _bootstrap(self, hosts): self._last_bootstrap = time.time() if self.config['api_version'] is None or self.config['api_version'] < (0, 10): - metadata_request = MetadataRequest[0]([]) + if self.config['bootstrap_topics_filter']: + metadata_request = MetadataRequest[0](list(self.config['bootstrap_topics_filter'])) + else: + metadata_request = MetadataRequest[0]([]) else: - metadata_request = MetadataRequest[1](None) + if self.config['bootstrap_topics_filter']: + metadata_request = MetadataRequest[1](list(self.config['bootstrap_topics_filter'])) + else: + metadata_request = MetadataRequest[1](None) for host, port, afi in hosts: log.debug("Attempting to bootstrap via node at %s:%s", host, port) @@ -830,7 +837,7 @@ def check_version(self, node_id=None, timeout=2, strict=False): self._refresh_on_disconnects = False try: remaining = end - time.time() - version = conn.check_version(timeout=remaining, strict=strict) + version = conn.check_version(timeout=remaining, strict=strict, topics=list(self.config['bootstrap_topics_filter'])) return version except Errors.NodeNotReadyError: # Only raise to user if this is a node-specific request diff --git a/kafka/conn.py b/kafka/conn.py index a2d5ee6cc..122297b4b 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -892,7 +892,7 @@ def _infer_broker_version_from_api_versions(self, api_versions): # so if all else fails, choose that return (0, 10, 0) - def check_version(self, timeout=2, strict=False): + def check_version(self, timeout=2, strict=False, topics=[]): """Attempt to guess the broker version. Note: This is a blocking call. @@ -925,7 +925,7 @@ def check_version(self, timeout=2, strict=False): ((0, 9), ListGroupsRequest[0]()), ((0, 8, 2), GroupCoordinatorRequest[0]('kafka-python-default-group')), ((0, 8, 1), OffsetFetchRequest[0]('kafka-python-default-group', [])), - ((0, 8, 0), MetadataRequest[0]([])), + ((0, 8, 0), MetadataRequest[0](topics)), ] for version, request in test_cases: @@ -941,7 +941,7 @@ def check_version(self, timeout=2, strict=False): # the attempt to write to a disconnected socket should # immediately fail and allow us to infer that the prior # request was unrecognized - mr = self.send(MetadataRequest[0]([])) + mr = self.send(MetadataRequest[0](topics)) selector = self.config['selector']() selector.register(self._sock, selectors.EVENT_READ) diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 719acef59..d8fb5dc2f 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -281,6 +281,7 @@ class KafkaProducer(object): 'key_serializer': None, 'value_serializer': None, 'acks': 1, + 'bootstrap_topics_filter': set(), 'compression_type': None, 'retries': 0, 'batch_size': 16384, From 36b53f487778e919dfe6a5940dc25c552444cc7c Mon Sep 17 00:00:00 2001 From: the-sea Date: Fri, 31 Aug 2018 21:03:26 +0800 Subject: [PATCH 04/28] add kerberos domain name config for gssapi sasl mechanism handshake (#1542) --- kafka/client_async.py | 3 +++ kafka/conn.py | 8 ++++++-- kafka/consumer/group.py | 5 ++++- kafka/producer/kafka.py | 5 ++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/kafka/client_async.py b/kafka/client_async.py index c0072aeda..5a161bb6a 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -145,6 +145,8 @@ class KafkaClient(object): Default: None sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers """ DEFAULT_CONFIG = { @@ -180,6 +182,7 @@ class KafkaClient(object): 'sasl_plain_username': None, 'sasl_plain_password': None, 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None } def __init__(self, **configs): diff --git a/kafka/conn.py b/kafka/conn.py index 122297b4b..ccaa2ed62 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -176,6 +176,8 @@ class BrokerConnection(object): Default: None sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers """ DEFAULT_CONFIG = { @@ -206,7 +208,8 @@ class BrokerConnection(object): 'sasl_mechanism': 'PLAIN', 'sasl_plain_username': None, 'sasl_plain_password': None, - 'sasl_kerberos_service_name': 'kafka' + 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None } SECURITY_PROTOCOLS = ('PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL') SASL_MECHANISMS = ('PLAIN', 'GSSAPI') @@ -567,7 +570,8 @@ def _try_authenticate_plain(self, future): return future.success(True) def _try_authenticate_gssapi(self, future): - auth_id = self.config['sasl_kerberos_service_name'] + '@' + self.host + kerberos_damin_name = self.config['sasl_kerberos_domain_name'] or self.host + auth_id = self.config['sasl_kerberos_service_name'] + '@' + kerberos_damin_name gssapi_name = gssapi.Name( auth_id, name_type=gssapi.NameType.hostbased_service diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 1c3ec6385..279cce033 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -240,6 +240,8 @@ class KafkaConsumer(six.Iterator): Default: None sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers Note: Configuration parameters are described in more detail at @@ -298,7 +300,8 @@ class KafkaConsumer(six.Iterator): 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, - 'sasl_kerberos_service_name': 'kafka' + 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None } DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000 diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index d8fb5dc2f..24b58fe6d 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -270,6 +270,8 @@ class KafkaProducer(object): Default: None sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers Note: Configuration parameters are described in more detail at @@ -319,7 +321,8 @@ class KafkaProducer(object): 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, - 'sasl_kerberos_service_name': 'kafka' + 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None } _COMPRESSORS = { From 5a04bc78f3392038733d65fc1e4830c8b14cd6fd Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Fri, 31 Aug 2018 06:11:23 -0700 Subject: [PATCH 05/28] Return future from commit_offsets_async (#1560) --- kafka/coordinator/consumer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index f90d1821d..647a6b585 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -441,10 +441,13 @@ def commit_offsets_async(self, offsets, callback=None): response will be either an Exception or a OffsetCommitResponse struct. This callback can be used to trigger custom actions when a commit request completes. + + Returns: + kafka.future.Future """ self._invoke_completed_offset_commit_callbacks() if not self.coordinator_unknown(): - self._do_commit_offsets_async(offsets, callback) + future = self._do_commit_offsets_async(offsets, callback) else: # we don't know the current coordinator, so try to find it and then # send the commit or fail (we don't want recursive retries which can @@ -464,6 +467,8 @@ def commit_offsets_async(self, offsets, callback=None): # through delayed task execution. self._client.poll(timeout_ms=0) # no wakeup if we add that feature + return future + def _do_commit_offsets_async(self, offsets, callback=None): assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API' assert all(map(lambda k: isinstance(k, TopicPartition), offsets)) From 506822906e20b713d1d06b8a3e9b10bb04d803dc Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Fri, 31 Aug 2018 19:12:44 +0530 Subject: [PATCH 06/28] Clear the metrics dictionary on close. (#1569) --- kafka/metrics/metrics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kafka/metrics/metrics.py b/kafka/metrics/metrics.py index e9c465deb..f2e99edc9 100644 --- a/kafka/metrics/metrics.py +++ b/kafka/metrics/metrics.py @@ -257,3 +257,5 @@ def close(self): """Close this metrics repository.""" for reporter in self._reporters: reporter.close() + + self._metrics.clear() From ba7372e44ffa1ee49fb4d5efbd67534393e944db Mon Sep 17 00:00:00 2001 From: Jonathan Emord Date: Sat, 8 Sep 2018 14:06:42 -0400 Subject: [PATCH 07/28] Remove ConsumerTimeout --- kafka/errors.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kafka/errors.py b/kafka/errors.py index 47d228e48..fb9576c3f 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -476,10 +476,6 @@ class ConsumerNoMoreData(KafkaError): pass -class ConsumerTimeout(KafkaError): - pass - - class ProtocolError(KafkaError): pass From 5825c67cf9b90c9e8045fcfc064c562a2888725c Mon Sep 17 00:00:00 2001 From: Ben Harack Date: Thu, 20 Sep 2018 13:46:05 -0700 Subject: [PATCH 08/28] Expose ConsumerRebalanceListener in all This solves a warning in linters like PyCharm, which warns that a line like: from kafka import ConsumerRebalanceListener is actually accessing a protected member of a class or module. Adding it to __all__ should solve this. --- kafka/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/__init__.py b/kafka/__init__.py index ff364d345..897ebb095 100644 --- a/kafka/__init__.py +++ b/kafka/__init__.py @@ -50,5 +50,5 @@ def __init__(self, *args, **kwargs): 'SimpleClient', 'SimpleProducer', 'KeyedProducer', 'RoundRobinPartitioner', 'HashedPartitioner', 'create_message', 'create_gzip_message', 'create_snappy_message', - 'SimpleConsumer', 'MultiProcessConsumer', + 'SimpleConsumer', 'MultiProcessConsumer', 'ConsumerRebalanceListener', ] From 9d30ab8bdbbd7e722ba4a96a6883a965d577d3cc Mon Sep 17 00:00:00 2001 From: Heikki Nousiainen Date: Wed, 29 Aug 2018 17:02:48 +0300 Subject: [PATCH 09/28] Add positive tests for headers in record encode/decode --- test/record/test_default_records.py | 6 +++++- test/record/test_records.py | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/test/record/test_default_records.py b/test/record/test_default_records.py index 6e2f5e8ac..c3a7b02c8 100644 --- a/test/record/test_default_records.py +++ b/test/record/test_default_records.py @@ -119,8 +119,12 @@ def test_default_batch_builder_validates_arguments(): builder.append( 5, timestamp=9999999, key=b"123", value=None, headers=[]) + # Check record with headers + builder.append( + 6, timestamp=9999999, key=b"234", value=None, headers=[("hkey", b"hval")]) + # in case error handling code fails to fix inner buffer in builder - assert len(builder.build()) == 104 + assert len(builder.build()) == 124 def test_default_correct_metadata_response(): diff --git a/test/record/test_records.py b/test/record/test_records.py index 224989f38..f1b8baa40 100644 --- a/test/record/test_records.py +++ b/test/record/test_records.py @@ -22,6 +22,11 @@ b'\x85\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x01]\xff|\xe7\x9d\x00\x00\x01]' b'\xff|\xe7\x9d\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff' b'\x00\x00\x00\x01\x12\x00\x00\x00\x01\x06123\x00' + # Fourth batch value = "hdr" with header hkey=hval + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00E\x00\x00\x00\x00\x02\\' + b'\xd8\xefR\x00\x00\x00\x00\x00\x00\x00\x00\x01e\x85\xb6\xf3\xc1\x00\x00' + b'\x01e\x85\xb6\xf3\xc1\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff' + b'\xff\xff\x00\x00\x00\x01&\x00\x00\x00\x01\x06hdr\x02\x08hkey\x08hval' ] record_batch_data_v1 = [ @@ -60,8 +65,8 @@ def test_memory_records_v2(): data_bytes = b"".join(record_batch_data_v2) + b"\x00" * 4 records = MemoryRecords(data_bytes) - assert records.size_in_bytes() == 222 - assert records.valid_bytes() == 218 + assert records.size_in_bytes() == 303 + assert records.valid_bytes() == 299 assert records.has_next() is True batch = records.next_batch() @@ -77,6 +82,12 @@ def test_memory_records_v2(): assert records.next_batch() is not None assert records.next_batch() is not None + batch = records.next_batch() + recs = list(batch) + assert len(recs) == 1 + assert recs[0].value == b"hdr" + assert recs[0].headers == [('hkey', b'hval')] + assert records.has_next() is False assert records.next_batch() is None assert records.next_batch() is None From 0ca4313170df2657456009af5550942ace9f1a81 Mon Sep 17 00:00:00 2001 From: Heikki Nousiainen Date: Tue, 14 Aug 2018 15:38:42 +0300 Subject: [PATCH 10/28] Expose record headers in ConsumerRecords --- README.rst | 5 +++++ kafka/consumer/fetcher.py | 8 +++++--- test/test_fetcher.py | 6 +++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index dcade4339..28cb7e77c 100644 --- a/README.rst +++ b/README.rst @@ -70,6 +70,11 @@ that expose basic message attributes: topic, partition, offset, key, and value: >>> for msg in consumer: ... assert isinstance(msg.value, dict) +>>> # Access record headers. The returned value is a list of tuples +>>> # with str, bytes for key and value +>>> for msg in consumer: +... print (msg.headers) + >>> # Get consumer metrics >>> metrics = consumer.metrics() diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 6ec1b71ed..7d58b7caa 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -29,7 +29,7 @@ ConsumerRecord = collections.namedtuple("ConsumerRecord", ["topic", "partition", "offset", "timestamp", "timestamp_type", - "key", "value", "checksum", "serialized_key_size", "serialized_value_size"]) + "key", "value", "headers", "checksum", "serialized_key_size", "serialized_value_size", "serialized_header_size"]) CompletedFetch = collections.namedtuple("CompletedFetch", @@ -456,10 +456,12 @@ def _unpack_message_set(self, tp, records): value = self._deserialize( self.config['value_deserializer'], tp.topic, record.value) + headers = record.headers + header_size = sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1 yield ConsumerRecord( tp.topic, tp.partition, record.offset, record.timestamp, - record.timestamp_type, key, value, record.checksum, - key_size, value_size) + record.timestamp_type, key, value, headers, record.checksum, + key_size, value_size, header_size) batch = records.next_batch() diff --git a/test/test_fetcher.py b/test/test_fetcher.py index c82101818..e37a70db5 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -509,7 +509,7 @@ def test_partition_records_offset(): fetch_offset = 123 tp = TopicPartition('foo', 0) messages = [ConsumerRecord(tp.topic, tp.partition, i, - None, None, 'key', 'value', 'checksum', 0, 0) + None, None, 'key', 'value', [], 'checksum', 0, 0, -1) for i in range(batch_start, batch_end)] records = Fetcher.PartitionRecords(fetch_offset, None, messages) assert len(records) > 0 @@ -534,7 +534,7 @@ def test_partition_records_no_fetch_offset(): fetch_offset = 123 tp = TopicPartition('foo', 0) messages = [ConsumerRecord(tp.topic, tp.partition, i, - None, None, 'key', 'value', 'checksum', 0, 0) + None, None, 'key', 'value', None, 'checksum', 0, 0, -1) for i in range(batch_start, batch_end)] records = Fetcher.PartitionRecords(fetch_offset, None, messages) assert len(records) == 0 @@ -549,7 +549,7 @@ def test_partition_records_compacted_offset(): fetch_offset = 42 tp = TopicPartition('foo', 0) messages = [ConsumerRecord(tp.topic, tp.partition, i, - None, None, 'key', 'value', 'checksum', 0, 0) + None, None, 'key', 'value', None, 'checksum', 0, 0, -1) for i in range(batch_start, batch_end) if i != fetch_offset] records = Fetcher.PartitionRecords(fetch_offset, None, messages) assert len(records) == batch_end - fetch_offset - 1 From 08c77499a2e8bc79d6788d70ef96d77752ed6325 Mon Sep 17 00:00:00 2001 From: Heikki Nousiainen Date: Tue, 14 Aug 2018 15:17:23 +0300 Subject: [PATCH 11/28] Support produce with Kafka record headers --- README.rst | 4 ++++ kafka/producer/future.py | 10 +++++----- kafka/producer/kafka.py | 18 +++++++++++++----- kafka/producer/record_accumulator.py | 16 +++++++++------- test/test_producer.py | 10 +++++++++- 5 files changed, 40 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 28cb7e77c..a82573bbf 100644 --- a/README.rst +++ b/README.rst @@ -117,6 +117,10 @@ for more details. >>> for i in range(1000): ... producer.send('foobar', b'msg %d' % i) +>>> # Include record headers. The format is list of tuples with string key +>>> # and bytes value. +>>> producer.send('foobar', value=b'c29tZSB2YWx1ZQ==', headers=[('content-encoding', b'base64')]) + >>> # Get producer performance metrics >>> metrics = producer.metrics() diff --git a/kafka/producer/future.py b/kafka/producer/future.py index aa216c4e5..1c5d6d7bf 100644 --- a/kafka/producer/future.py +++ b/kafka/producer/future.py @@ -29,11 +29,11 @@ def wait(self, timeout=None): class FutureRecordMetadata(Future): - def __init__(self, produce_future, relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size): + def __init__(self, produce_future, relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size): super(FutureRecordMetadata, self).__init__() self._produce_future = produce_future # packing args as a tuple is a minor speed optimization - self.args = (relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size) + self.args = (relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size) produce_future.add_callback(self._produce_success) produce_future.add_errback(self.failure) @@ -42,7 +42,7 @@ def _produce_success(self, offset_and_timestamp): # Unpacking from args tuple is minor speed optimization (relative_offset, timestamp_ms, checksum, - serialized_key_size, serialized_value_size) = self.args + serialized_key_size, serialized_value_size, serialized_header_size) = self.args # None is when Broker does not support the API (<0.10) and # -1 is when the broker is configured for CREATE_TIME timestamps @@ -53,7 +53,7 @@ def _produce_success(self, offset_and_timestamp): tp = self._produce_future.topic_partition metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms, checksum, serialized_key_size, - serialized_value_size) + serialized_value_size, serialized_header_size) self.success(metadata) def get(self, timeout=None): @@ -68,4 +68,4 @@ def get(self, timeout=None): RecordMetadata = collections.namedtuple( 'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp', - 'checksum', 'serialized_key_size', 'serialized_value_size']) + 'checksum', 'serialized_key_size', 'serialized_value_size', 'serialized_header_size']) diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 24b58fe6d..4fc7bc687 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -513,7 +513,7 @@ def _estimate_size_in_bytes(self, key, value, headers=[]): return LegacyRecordBatchBuilder.estimate_size_in_bytes( magic, self.config['compression_type'], key, value) - def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): + def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None): """Publish a message to a topic. Arguments: @@ -534,6 +534,8 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer. + headers (optional): a list of header key value pairs. List items + are tuples of str key and bytes value. timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time. @@ -563,13 +565,18 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): partition = self._partition(topic, partition, key, value, key_bytes, value_bytes) - message_size = self._estimate_size_in_bytes(key_bytes, value_bytes) + if headers is None: + headers = [] + assert type(headers) == list + assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers) + + message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers) self._ensure_valid_record_size(message_size) tp = TopicPartition(topic, partition) - log.debug("Sending (key=%r value=%r) to %s", key, value, tp) + log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp) result = self._accumulator.append(tp, timestamp_ms, - key_bytes, value_bytes, + key_bytes, value_bytes, headers, self.config['max_block_ms'], estimated_size=message_size) future, batch_is_full, new_batch_created = result @@ -588,7 +595,8 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): FutureProduceResult(TopicPartition(topic, partition)), -1, None, None, len(key_bytes) if key_bytes is not None else -1, - len(value_bytes) if value_bytes is not None else -1 + len(value_bytes) if value_bytes is not None else -1, + sum(len(h_key.encode("utf-8")) + len(h_value) for h_key, h_value in headers) if headers else -1, ).failure(e) def flush(self, timeout=None): diff --git a/kafka/producer/record_accumulator.py b/kafka/producer/record_accumulator.py index 1cd541356..84b01d1b5 100644 --- a/kafka/producer/record_accumulator.py +++ b/kafka/producer/record_accumulator.py @@ -55,8 +55,8 @@ def __init__(self, tp, records, buffer): def record_count(self): return self.records.next_offset() - def try_append(self, timestamp_ms, key, value): - metadata = self.records.append(timestamp_ms, key, value) + def try_append(self, timestamp_ms, key, value, headers): + metadata = self.records.append(timestamp_ms, key, value, headers) if metadata is None: return None @@ -65,7 +65,8 @@ def try_append(self, timestamp_ms, key, value): future = FutureRecordMetadata(self.produce_future, metadata.offset, metadata.timestamp, metadata.crc, len(key) if key is not None else -1, - len(value) if value is not None else -1) + len(value) if value is not None else -1, + sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1) return future def done(self, base_offset=None, timestamp_ms=None, exception=None): @@ -196,7 +197,7 @@ def __init__(self, **configs): self.muted = set() self._drain_index = 0 - def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, + def append(self, tp, timestamp_ms, key, value, headers, max_time_to_block_ms, estimated_size=0): """Add a record to the accumulator, return the append result. @@ -209,6 +210,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, timestamp_ms (int): The timestamp of the record (epoch ms) key (bytes): The key for the record value (bytes): The value for the record + headers (List[Tuple[str, bytes]]): The header fields for the record max_time_to_block_ms (int): The maximum time in milliseconds to block for buffer memory to be available @@ -231,7 +233,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, dq = self._batches[tp] if dq: last = dq[-1] - future = last.try_append(timestamp_ms, key, value) + future = last.try_append(timestamp_ms, key, value, headers) if future is not None: batch_is_full = len(dq) > 1 or last.records.is_full() return future, batch_is_full, False @@ -246,7 +248,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, if dq: last = dq[-1] - future = last.try_append(timestamp_ms, key, value) + future = last.try_append(timestamp_ms, key, value, headers) if future is not None: # Somebody else found us a batch, return the one we # waited for! Hopefully this doesn't happen often... @@ -261,7 +263,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, ) batch = ProducerBatch(tp, records, buf) - future = batch.try_append(timestamp_ms, key, value) + future = batch.try_append(timestamp_ms, key, value, headers) if not future: raise Exception() diff --git a/test/test_producer.py b/test/test_producer.py index 09d184f34..176b23988 100644 --- a/test/test_producer.py +++ b/test/test_producer.py @@ -91,10 +91,16 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): compression_type=compression) magic = producer._max_usable_produce_magic() + # record headers are supported in 0.11.0 + if version() < (0, 11, 0): + headers = None + else: + headers = [("Header Key", b"Header Value")] + topic = random_string(5) future = producer.send( topic, - value=b"Simple value", key=b"Simple key", timestamp_ms=9999999, + value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, partition=0) record = future.get(timeout=5) assert record is not None @@ -116,6 +122,8 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): assert record.serialized_key_size == 10 assert record.serialized_value_size == 12 + if headers: + assert record.serialized_header_size == 22 # generated timestamp case is skipped for broker 0.9 and below if magic == 0: From 0c3f2c176d6d4ae7bffa8d91795e915bc7b4952c Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Mon, 22 Oct 2018 03:01:48 -0700 Subject: [PATCH 12/28] Fix typo in file name --- test/{test_substription_state.py => test_subscription_state.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/{test_substription_state.py => test_subscription_state.py} (100%) diff --git a/test/test_substription_state.py b/test/test_subscription_state.py similarity index 100% rename from test/test_substription_state.py rename to test/test_subscription_state.py From 477ab740c0c105daef0e8411f95c06ad49f7f782 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Sun, 21 Oct 2018 22:36:54 -0700 Subject: [PATCH 13/28] Bump vendored `six` to `1.11.0` Bump `six` to `1.11.0`. Most changes do not affect us, but it's good to stay up to date. Also, we will likely start vendoring `enum34` in which case https://github.com/benjaminp/six/pull/178 is needed. Note that this preserves the `kafka-python` customization from https://github.com/dpkp/kafka-python/pull/979 which has been submitted upstream as https://github.com/benjaminp/six/pull/176 but not yet merged. --- kafka/vendor/six.py | 58 ++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/kafka/vendor/six.py b/kafka/vendor/six.py index a949b9539..3621a0ab4 100644 --- a/kafka/vendor/six.py +++ b/kafka/vendor/six.py @@ -1,7 +1,6 @@ # pylint: skip-file -"""Utilities for writing code that runs on Python 2 and 3""" -# Copyright (c) 2010-2015 Benjamin Peterson +# Copyright (c) 2010-2017 Benjamin Peterson # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,6 +20,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +"""Utilities for writing code that runs on Python 2 and 3""" + from __future__ import absolute_import import functools @@ -30,7 +31,7 @@ import types __author__ = "Benjamin Peterson " -__version__ = "1.10.0" +__version__ = "1.11.0" # Useful for very coarse version differentiation. @@ -71,7 +72,9 @@ def __len__(self): # 64-bit MAXSIZE = int((1 << 63) - 1) - # Don't del it here, cause with gc disabled this "leaks" to garbage + # Don't del it here, cause with gc disabled this "leaks" to garbage. + # Note: This is a kafka-python customization, details at: + # https://github.com/dpkp/kafka-python/pull/979#discussion_r100403389 # del X @@ -244,6 +247,7 @@ class _MovedItems(_LazyModule): MovedAttribute("map", "itertools", "builtins", "imap", "map"), MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("getoutput", "commands", "subprocess"), MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), MovedAttribute("reduce", "__builtin__", "functools"), @@ -265,10 +269,11 @@ class _MovedItems(_LazyModule): MovedModule("html_entities", "htmlentitydefs", "html.entities"), MovedModule("html_parser", "HTMLParser", "html.parser"), MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"), MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), @@ -340,10 +345,12 @@ class Module_six_moves_urllib_parse(_LazyModule): MovedAttribute("quote_plus", "urllib", "urllib.parse"), MovedAttribute("unquote", "urllib", "urllib.parse"), MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes"), MovedAttribute("urlencode", "urllib", "urllib.parse"), MovedAttribute("splitquery", "urllib", "urllib.parse"), MovedAttribute("splittag", "urllib", "urllib.parse"), MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("splitvalue", "urllib", "urllib.parse"), MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), MovedAttribute("uses_params", "urlparse", "urllib.parse"), @@ -419,6 +426,8 @@ class Module_six_moves_urllib_request(_LazyModule): MovedAttribute("URLopener", "urllib", "urllib.request"), MovedAttribute("FancyURLopener", "urllib", "urllib.request"), MovedAttribute("proxy_bypass", "urllib", "urllib.request"), + MovedAttribute("parse_http_list", "urllib2", "urllib.request"), + MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"), ] for attr in _urllib_request_moved_attributes: setattr(Module_six_moves_urllib_request, attr.name, attr) @@ -682,11 +691,15 @@ def assertRegex(self, *args, **kwargs): exec_ = getattr(moves.builtins, "exec") def reraise(tp, value, tb=None): - if value is None: - value = tp() - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None else: def exec_(_code_, _globs_=None, _locs_=None): @@ -702,19 +715,28 @@ def exec_(_code_, _globs_=None, _locs_=None): exec("""exec _code_ in _globs_, _locs_""") exec_("""def reraise(tp, value, tb=None): - raise tp, value, tb + try: + raise tp, value, tb + finally: + tb = None """) if sys.version_info[:2] == (3, 2): exec_("""def raise_from(value, from_value): - if from_value is None: - raise value - raise value from from_value + try: + if from_value is None: + raise value + raise value from from_value + finally: + value = None """) elif sys.version_info[:2] > (3, 2): exec_("""def raise_from(value, from_value): - raise value from from_value + try: + raise value from from_value + finally: + value = None """) else: def raise_from(value, from_value): @@ -805,10 +827,14 @@ def with_metaclass(meta, *bases): # This requires a bit of explanation: the basic idea is to make a dummy # metaclass for one level of class instantiation that replaces itself with # the actual metaclass. - class metaclass(meta): + class metaclass(type): def __new__(cls, name, this_bases, d): return meta(name, bases, d) + + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) return type.__new__(metaclass, 'temporary_class', (), {}) From a6be21e7b3a20ce2e25ef26140c43b59cc356f38 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Mon, 22 Oct 2018 00:06:16 -0700 Subject: [PATCH 14/28] Update remote urls: snappy, https, etc Snappy URL was outdated. Similarly, many of these sites now support https. --- benchmarks/README | 2 +- docs/install.rst | 8 ++++---- kafka/producer/kafka.py | 4 ++-- kafka/record/_crc32c.py | 4 ++-- kafka/util.py | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/benchmarks/README b/benchmarks/README index 369e8b626..531b78940 100644 --- a/benchmarks/README +++ b/benchmarks/README @@ -1,4 +1,4 @@ The `record_batch_*` benchmarks in this section are written using ``perf`` library, created by Viktor Stinner. For more information on how to get reliable results of test runs please consult -http://perf.readthedocs.io/en/latest/run_benchmark.html. +https://perf.readthedocs.io/en/latest/run_benchmark.html. diff --git a/docs/install.rst b/docs/install.rst index fe740f660..d6473ecd4 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -37,7 +37,7 @@ Optional Snappy install Install Development Libraries ============================= -Download and build Snappy from http://code.google.com/p/snappy/downloads/list +Download and build Snappy from https://google.github.io/snappy/ Ubuntu: @@ -55,9 +55,9 @@ From Source: .. code:: bash - wget http://snappy.googlecode.com/files/snappy-1.0.5.tar.gz - tar xzvf snappy-1.0.5.tar.gz - cd snappy-1.0.5 + wget https://github.com/google/snappy/releases/download/1.1.3/snappy-1.1.3.tar.gz + tar xzvf snappy-1.1.3.tar.gz + cd snappy-1.1.3 ./configure make sudo make install diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 4fc7bc687..7878c0a57 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -51,7 +51,7 @@ class KafkaProducer(object): 'retries' is configured to 0. Enabling retries also opens up the possibility of duplicates (see the documentation on message delivery semantics for details: - http://kafka.apache.org/documentation.html#semantics + https://kafka.apache.org/documentation.html#semantics ). The producer maintains buffers of unsent records for each partition. These @@ -522,7 +522,7 @@ def send(self, topic, value=None, key=None, headers=None, partition=None, timest serializable to bytes via configured value_serializer. If value is None, key is required and message acts as a 'delete'. See kafka compaction documentation for more details: - http://kafka.apache.org/documentation.html#compaction + https://kafka.apache.org/documentation.html#compaction (compaction requires kafka >= 0.8.1) partition (int, optional): optionally specify a partition. If not set, the partition will be selected using the configured diff --git a/kafka/record/_crc32c.py b/kafka/record/_crc32c.py index 5704f8238..9db2d89af 100644 --- a/kafka/record/_crc32c.py +++ b/kafka/record/_crc32c.py @@ -18,9 +18,9 @@ # limitations under the License. # """Implementation of CRC-32C checksumming as in rfc3720 section B.4. -See http://en.wikipedia.org/wiki/Cyclic_redundancy_check for details on CRC-32C +See https://en.wikipedia.org/wiki/Cyclic_redundancy_check for details on CRC-32C This code is a manual python translation of c code generated by -pycrc 0.7.1 (http://www.tty1.net/pycrc/). Command line used: +pycrc 0.7.1 (https://pycrc.org/). Command line used: './pycrc.py --model=crc-32c --generate c --algorithm=table-driven' """ diff --git a/kafka/util.py b/kafka/util.py index 75538ddb4..9354bd936 100644 --- a/kafka/util.py +++ b/kafka/util.py @@ -134,7 +134,7 @@ def __del__(self): class WeakMethod(object): """ Callable that weakly references a method and the object it is bound to. It - is based on http://stackoverflow.com/a/24287465. + is based on https://stackoverflow.com/a/24287465. Arguments: From b83feeca2ec6f6ad745fb7ea47c6484304bb55d8 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Mon, 22 Oct 2018 01:24:50 -0700 Subject: [PATCH 15/28] Vendor `six` consistently Use vendored `six`, and also `six.moves.range` rather than `xrange` --- benchmarks/consumer_performance.py | 4 +++- benchmarks/producer_performance.py | 4 +++- benchmarks/varint_speed.py | 2 +- kafka/codec.py | 4 ++-- kafka/producer/simple.py | 4 ++-- test/fixtures.py | 6 +++--- test/test_client.py | 2 +- test/test_codec.py | 12 ++++++------ test/test_consumer_group.py | 2 +- test/test_consumer_integration.py | 10 +++++----- test/test_producer_integration.py | 2 +- test/test_producer_legacy.py | 6 +++--- test/test_protocol.py | 2 +- test/test_protocol_legacy.py | 2 +- test/test_util.py | 2 +- 15 files changed, 34 insertions(+), 30 deletions(-) diff --git a/benchmarks/consumer_performance.py b/benchmarks/consumer_performance.py index 3e879ae58..5ffd3f5f6 100755 --- a/benchmarks/consumer_performance.py +++ b/benchmarks/consumer_performance.py @@ -10,6 +10,8 @@ import threading import traceback +from kafka.vendor.six.moves import range + from kafka import KafkaConsumer, KafkaProducer from test.fixtures import KafkaFixture, ZookeeperFixture @@ -64,7 +66,7 @@ def run(args): record = bytes(bytearray(args.record_size)) producer = KafkaProducer(compression_type=args.fixture_compression, **props) - for i in xrange(args.num_records): + for i in range(args.num_records): producer.send(topic=args.topic, value=record) producer.flush() producer.close() diff --git a/benchmarks/producer_performance.py b/benchmarks/producer_performance.py index e9587358e..0c29cbc24 100755 --- a/benchmarks/producer_performance.py +++ b/benchmarks/producer_performance.py @@ -9,6 +9,8 @@ import threading import traceback +from kafka.vendor.six.moves import range + from kafka import KafkaProducer from test.fixtures import KafkaFixture, ZookeeperFixture @@ -77,7 +79,7 @@ def run(args): print('-> OK!') print() - for i in xrange(args.num_records): + for i in range(args.num_records): producer.send(topic=args.topic, value=record) producer.flush() diff --git a/benchmarks/varint_speed.py b/benchmarks/varint_speed.py index 2c5cd620d..624a12a42 100644 --- a/benchmarks/varint_speed.py +++ b/benchmarks/varint_speed.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from __future__ import print_function import perf -import six +from kafka.vendor import six test_data = [ diff --git a/kafka/codec.py b/kafka/codec.py index 4d180ddd3..aa9fc8291 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -6,7 +6,7 @@ import struct from kafka.vendor import six -from kafka.vendor.six.moves import xrange # pylint: disable=import-error +from kafka.vendor.six.moves import range _XERIAL_V1_HEADER = (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1) _XERIAL_V1_FORMAT = 'bccccccBii' @@ -150,7 +150,7 @@ def snappy_encode(payload, xerial_compatible=True, xerial_blocksize=32*1024): chunker = lambda payload, i, size: memoryview(payload)[i:size+i].tobytes() for chunk in (chunker(payload, i, xerial_blocksize) - for i in xrange(0, len(payload), xerial_blocksize)): + for i in range(0, len(payload), xerial_blocksize)): block = snappy.compress(chunk) block_size = len(block) diff --git a/kafka/producer/simple.py b/kafka/producer/simple.py index 91e0abc4c..e06e65954 100644 --- a/kafka/producer/simple.py +++ b/kafka/producer/simple.py @@ -4,7 +4,7 @@ import logging import random -from kafka.vendor.six.moves import xrange # pylint: disable=import-error +from kafka.vendor.six.moves import range from kafka.producer.base import Producer @@ -39,7 +39,7 @@ def _next_partition(self, topic): # Randomize the initial partition that is returned if self.random_start: num_partitions = len(self.client.get_partition_ids_for_topic(topic)) - for _ in xrange(random.randint(0, num_partitions-1)): + for _ in range(random.randint(0, num_partitions-1)): next(self.partition_cycles[topic]) return next(self.partition_cycles[topic]) diff --git a/test/fixtures.py b/test/fixtures.py index 493a664a5..08cc951a2 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -12,8 +12,8 @@ import uuid import py -from six.moves import urllib, xrange -from six.moves.urllib.parse import urlparse # pylint: disable=E0611,F0401 +from kafka.vendor.six.moves import urllib, range +from kafka.vendor.six.moves.urllib.parse import urlparse # pylint: disable=E0611,F0401 from kafka import errors, KafkaConsumer, KafkaProducer, SimpleClient from kafka.client_async import KafkaClient @@ -24,7 +24,7 @@ log = logging.getLogger(__name__) def random_string(length): - return "".join(random.choice(string.ascii_letters) for i in xrange(length)) + return "".join(random.choice(string.ascii_letters) for i in range(length)) def version_str_to_list(version_str): return tuple(map(int, version_str.split('.'))) # e.g., (0, 8, 1, 1) diff --git a/test/test_client.py b/test/test_client.py index c53983c94..1c689789b 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -2,7 +2,7 @@ from mock import ANY, MagicMock, patch from operator import itemgetter -import six +from kafka.vendor import six from . import unittest from kafka import SimpleClient diff --git a/test/test_codec.py b/test/test_codec.py index d31fc8674..e132c1d47 100644 --- a/test/test_codec.py +++ b/test/test_codec.py @@ -4,7 +4,7 @@ import struct import pytest -from six.moves import xrange +from kafka.vendor.six.moves import range from kafka.codec import ( has_snappy, has_gzip, has_lz4, @@ -18,7 +18,7 @@ def test_gzip(): - for i in xrange(1000): + for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = gzip_decode(gzip_encode(b1)) assert b1 == b2 @@ -26,7 +26,7 @@ def test_gzip(): @pytest.mark.skipif(not has_snappy(), reason="Snappy not available") def test_snappy(): - for i in xrange(1000): + for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) assert b1 == b2 @@ -86,7 +86,7 @@ def test_snappy_encode_xerial(): @pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', reason="python-lz4 crashes on old versions of pypy") def test_lz4(): - for i in xrange(1000): + for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = lz4_decode(lz4_encode(b1)) assert len(b1) == len(b2) @@ -96,7 +96,7 @@ def test_lz4(): @pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', reason="python-lz4 crashes on old versions of pypy") def test_lz4_old(): - for i in xrange(1000): + for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = lz4_decode_old_kafka(lz4_encode_old_kafka(b1)) assert len(b1) == len(b2) @@ -106,7 +106,7 @@ def test_lz4_old(): @pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', reason="python-lz4 crashes on old versions of pypy") def test_lz4_incremental(): - for i in xrange(1000): + for i in range(1000): # lz4 max single block size is 4MB # make sure we test with multiple-blocks b1 = random_string(100).encode('utf-8') * 50000 diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index f9a41a46a..55cf6625d 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -4,7 +4,7 @@ import time import pytest -import six +from kafka.vendor import six from kafka import SimpleClient from kafka.conn import ConnectionStates diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index e6f140598..ce934ea1c 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -6,8 +6,8 @@ import kafka.codec import pytest -from six.moves import xrange -import six +from kafka.vendor.six.moves import range +from kafka.vendor import six from . import unittest from kafka import ( @@ -473,7 +473,7 @@ def test_offset_behavior__resuming_behavior(self): ) # Grab the first 195 messages - output_msgs1 = [ consumer1.get_message().message.value for _ in xrange(195) ] + output_msgs1 = [ consumer1.get_message().message.value for _ in range(195) ] self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 @@ -603,7 +603,7 @@ def test_kafka_consumer__offset_commit_resume(self): # Grab the first 180 messages output_msgs1 = [] - for _ in xrange(180): + for _ in range(180): m = next(consumer1) output_msgs1.append(m) self.assert_message_count(output_msgs1, 180) @@ -619,7 +619,7 @@ def test_kafka_consumer__offset_commit_resume(self): # 181-200 output_msgs2 = [] - for _ in xrange(20): + for _ in range(20): m = next(consumer2) output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) diff --git a/test/test_producer_integration.py b/test/test_producer_integration.py index 6533cfabb..35ce0d7a5 100644 --- a/test/test_producer_integration.py +++ b/test/test_producer_integration.py @@ -3,7 +3,7 @@ import uuid import pytest -from six.moves import range +from kafka.vendor.six.moves import range from kafka import ( SimpleProducer, KeyedProducer, diff --git a/test/test_producer_legacy.py b/test/test_producer_legacy.py index 6d00116c3..ab80ee707 100644 --- a/test/test_producer_legacy.py +++ b/test/test_producer_legacy.py @@ -16,7 +16,7 @@ from kafka.structs import ( ProduceResponsePayload, RetryOptions, TopicPartition) -from six.moves import queue, xrange +from kafka.vendor.six.moves import queue, range class TestKafkaProducer(unittest.TestCase): @@ -84,7 +84,7 @@ def test_producer_async_queue_overfilled(self, mock): message_list = [message] * (queue_size + 1) producer.send_messages(topic, partition, *message_list) self.assertEqual(producer.queue.qsize(), queue_size) - for _ in xrange(producer.queue.qsize()): + for _ in range(producer.queue.qsize()): producer.queue.get() def test_producer_sync_fail_on_error(self): @@ -253,5 +253,5 @@ def send_side_effect(reqs, *args, **kwargs): self.assertEqual(self.client.send_produce_request.call_count, 5) def tearDown(self): - for _ in xrange(self.queue.qsize()): + for _ in range(self.queue.qsize()): self.queue.get() diff --git a/test/test_protocol.py b/test/test_protocol.py index d96365026..7abcefb46 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -3,7 +3,7 @@ import struct import pytest -import six +from kafka.vendor import six from kafka.protocol.api import RequestHeader from kafka.protocol.commit import GroupCoordinatorRequest diff --git a/test/test_protocol_legacy.py b/test/test_protocol_legacy.py index d705e3a15..1341af003 100644 --- a/test/test_protocol_legacy.py +++ b/test/test_protocol_legacy.py @@ -2,7 +2,7 @@ from contextlib import contextmanager import struct -import six +from kafka.vendor import six from mock import patch, sentinel from . import unittest diff --git a/test/test_util.py b/test/test_util.py index fb592e8e6..a4dbaa5ab 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import struct -import six +from kafka.vendor import six from . import unittest import kafka.errors From a7e28aeacf6579720594bfe9201a8945d2935c3e Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Sun, 21 Oct 2018 22:55:58 -0700 Subject: [PATCH 16/28] Vendor enum34 This is needed for https://github.com/dpkp/kafka-python/pull/1540 While the usage there is trivial and could probably be worked around, I'd rather vendor it so that future code can use enums... since `enum` is already available in the python 3 stdlib, this will be easy enough to eventually stop vendoring whenever we finally drop python 2 support. --- kafka/vendor/enum34.py | 841 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 841 insertions(+) create mode 100644 kafka/vendor/enum34.py diff --git a/kafka/vendor/enum34.py b/kafka/vendor/enum34.py new file mode 100644 index 000000000..5f64bd2d8 --- /dev/null +++ b/kafka/vendor/enum34.py @@ -0,0 +1,841 @@ +# pylint: skip-file +# vendored from: +# https://bitbucket.org/stoneleaf/enum34/src/58c4cd7174ca35f164304c8a6f0a4d47b779c2a7/enum/__init__.py?at=1.1.6 + +"""Python Enumerations""" + +import sys as _sys + +__all__ = ['Enum', 'IntEnum', 'unique'] + +version = 1, 1, 6 + +pyver = float('%s.%s' % _sys.version_info[:2]) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + +try: + from collections import OrderedDict +except ImportError: + OrderedDict = None + +try: + basestring +except NameError: + # In Python 2 basestring is the ancestor of both str and unicode + # in Python 3 it's just str, but was missing in 3.1 + basestring = str + +try: + unicode +except NameError: + # In Python 3 unicode no longer exists (it's just str) + unicode = str + +class _RouteClassAttributeToGetattr(object): + """Route attribute access on a class to __getattr__. + + This is a descriptor, used to define attributes that act differently when + accessed through an instance and through a class. Instance access remains + normal, but access to an attribute through a class will be routed to the + class's __getattr__ method; this is done by raising AttributeError. + + """ + def __init__(self, fget=None): + self.fget = fget + + def __get__(self, instance, ownerclass=None): + if instance is None: + raise AttributeError() + return self.fget(instance) + + def __set__(self, instance, value): + raise AttributeError("can't set attribute") + + def __delete__(self, instance): + raise AttributeError("can't delete attribute") + + +def _is_descriptor(obj): + """Returns True if obj is a descriptor, False otherwise.""" + return ( + hasattr(obj, '__get__') or + hasattr(obj, '__set__') or + hasattr(obj, '__delete__')) + + +def _is_dunder(name): + """Returns True if a __dunder__ name, False otherwise.""" + return (name[:2] == name[-2:] == '__' and + name[2:3] != '_' and + name[-3:-2] != '_' and + len(name) > 4) + + +def _is_sunder(name): + """Returns True if a _sunder_ name, False otherwise.""" + return (name[0] == name[-1] == '_' and + name[1:2] != '_' and + name[-2:-1] != '_' and + len(name) > 2) + + +def _make_class_unpicklable(cls): + """Make the given class un-picklable.""" + def _break_on_call_reduce(self, protocol=None): + raise TypeError('%r cannot be pickled' % self) + cls.__reduce_ex__ = _break_on_call_reduce + cls.__module__ = '' + + +class _EnumDict(dict): + """Track enum member order and ensure member names are not reused. + + EnumMeta will use the names found in self._member_names as the + enumeration member names. + + """ + def __init__(self): + super(_EnumDict, self).__init__() + self._member_names = [] + + def __setitem__(self, key, value): + """Changes anything not dundered or not a descriptor. + + If a descriptor is added with the same name as an enum member, the name + is removed from _member_names (this may leave a hole in the numerical + sequence of values). + + If an enum member name is used twice, an error is raised; duplicate + values are not checked for. + + Single underscore (sunder) names are reserved. + + Note: in 3.x __order__ is simply discarded as a not necessary piece + leftover from 2.x + + """ + if pyver >= 3.0 and key in ('_order_', '__order__'): + return + elif key == '__order__': + key = '_order_' + if _is_sunder(key): + if key != '_order_': + raise ValueError('_names_ are reserved for future Enum use') + elif _is_dunder(key): + pass + elif key in self._member_names: + # descriptor overwriting an enum? + raise TypeError('Attempted to reuse key: %r' % key) + elif not _is_descriptor(value): + if key in self: + # enum overwriting a descriptor? + raise TypeError('Key already defined as: %r' % self[key]) + self._member_names.append(key) + super(_EnumDict, self).__setitem__(key, value) + + +# Dummy value for Enum as EnumMeta explicity checks for it, but of course until +# EnumMeta finishes running the first time the Enum class doesn't exist. This +# is also why there are checks in EnumMeta like `if Enum is not None` +Enum = None + + +class EnumMeta(type): + """Metaclass for Enum""" + @classmethod + def __prepare__(metacls, cls, bases): + return _EnumDict() + + def __new__(metacls, cls, bases, classdict): + # an Enum class is final once enumeration items have been defined; it + # cannot be mixed with other types (int, float, etc.) if it has an + # inherited __new__ unless a new __new__ is defined (or the resulting + # class will fail). + if type(classdict) is dict: + original_dict = classdict + classdict = _EnumDict() + for k, v in original_dict.items(): + classdict[k] = v + + member_type, first_enum = metacls._get_mixins_(bases) + __new__, save_new, use_args = metacls._find_new_(classdict, member_type, + first_enum) + # save enum items into separate mapping so they don't get baked into + # the new class + members = dict((k, classdict[k]) for k in classdict._member_names) + for name in classdict._member_names: + del classdict[name] + + # py2 support for definition order + _order_ = classdict.get('_order_') + if _order_ is None: + if pyver < 3.0: + try: + _order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] + except TypeError: + _order_ = [name for name in sorted(members.keys())] + else: + _order_ = classdict._member_names + else: + del classdict['_order_'] + if pyver < 3.0: + _order_ = _order_.replace(',', ' ').split() + aliases = [name for name in members if name not in _order_] + _order_ += aliases + + # check for illegal enum names (any others?) + invalid_names = set(members) & set(['mro']) + if invalid_names: + raise ValueError('Invalid enum member name(s): %s' % ( + ', '.join(invalid_names), )) + + # save attributes from super classes so we know if we can take + # the shortcut of storing members in the class dict + base_attributes = set([a for b in bases for a in b.__dict__]) + # create our new Enum type + enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) + enum_class._member_names_ = [] # names in random order + if OrderedDict is not None: + enum_class._member_map_ = OrderedDict() + else: + enum_class._member_map_ = {} # name->value map + enum_class._member_type_ = member_type + + # Reverse value->name map for hashable values. + enum_class._value2member_map_ = {} + + # instantiate them, checking for duplicates as we go + # we instantiate first instead of checking for duplicates first in case + # a custom __new__ is doing something funky with the values -- such as + # auto-numbering ;) + if __new__ is None: + __new__ = enum_class.__new__ + for member_name in _order_: + value = members[member_name] + if not isinstance(value, tuple): + args = (value, ) + else: + args = value + if member_type is tuple: # special case for tuple enums + args = (args, ) # wrap it one more time + if not use_args or not args: + enum_member = __new__(enum_class) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = value + else: + enum_member = __new__(enum_class, *args) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = member_type(*args) + value = enum_member._value_ + enum_member._name_ = member_name + enum_member.__objclass__ = enum_class + enum_member.__init__(*args) + # If another member with the same value was already defined, the + # new member becomes an alias to the existing one. + for name, canonical_member in enum_class._member_map_.items(): + if canonical_member.value == enum_member._value_: + enum_member = canonical_member + break + else: + # Aliases don't appear in member names (only in __members__). + enum_class._member_names_.append(member_name) + # performance boost for any member that would not shadow + # a DynamicClassAttribute (aka _RouteClassAttributeToGetattr) + if member_name not in base_attributes: + setattr(enum_class, member_name, enum_member) + # now add to _member_map_ + enum_class._member_map_[member_name] = enum_member + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_[value] = enum_member + except TypeError: + pass + + + # If a custom type is mixed into the Enum, and it does not know how + # to pickle itself, pickle.dumps will succeed but pickle.loads will + # fail. Rather than have the error show up later and possibly far + # from the source, sabotage the pickle protocol for this class so + # that pickle.dumps also fails. + # + # However, if the new class implements its own __reduce_ex__, do not + # sabotage -- it's on them to make sure it works correctly. We use + # __reduce_ex__ instead of any of the others as it is preferred by + # pickle over __reduce__, and it handles all pickle protocols. + unpicklable = False + if '__reduce_ex__' not in classdict: + if member_type is not object: + methods = ('__getnewargs_ex__', '__getnewargs__', + '__reduce_ex__', '__reduce__') + if not any(m in member_type.__dict__ for m in methods): + _make_class_unpicklable(enum_class) + unpicklable = True + + + # double check that repr and friends are not the mixin's or various + # things break (such as pickle) + for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): + class_method = getattr(enum_class, name) + obj_method = getattr(member_type, name, None) + enum_method = getattr(first_enum, name, None) + if name not in classdict and class_method is not enum_method: + if name == '__reduce_ex__' and unpicklable: + continue + setattr(enum_class, name, enum_method) + + # method resolution and int's are not playing nice + # Python's less than 2.6 use __cmp__ + + if pyver < 2.6: + + if issubclass(enum_class, int): + setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) + + elif pyver < 3.0: + + if issubclass(enum_class, int): + for method in ( + '__le__', + '__lt__', + '__gt__', + '__ge__', + '__eq__', + '__ne__', + '__hash__', + ): + setattr(enum_class, method, getattr(int, method)) + + # replace any other __new__ with our own (as long as Enum is not None, + # anyway) -- again, this is to support pickle + if Enum is not None: + # if the user defined their own __new__, save it before it gets + # clobbered in case they subclass later + if save_new: + setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) + setattr(enum_class, '__new__', Enum.__dict__['__new__']) + return enum_class + + def __bool__(cls): + """ + classes/types should always be True. + """ + return True + + def __call__(cls, value, names=None, module=None, type=None, start=1): + """Either returns an existing member, or creates a new enum class. + + This method is used both when an enum class is given a value to match + to an enumeration member (i.e. Color(3)) and for the functional API + (i.e. Color = Enum('Color', names='red green blue')). + + When used for the functional API: `module`, if set, will be stored in + the new class' __module__ attribute; `type`, if set, will be mixed in + as the first base class. + + Note: if `module` is not set this routine will attempt to discover the + calling module by walking the frame stack; if this is unsuccessful + the resulting class will not be pickleable. + + """ + if names is None: # simple value lookup + return cls.__new__(cls, value) + # otherwise, functional API: we're creating a new Enum type + return cls._create_(value, names, module=module, type=type, start=start) + + def __contains__(cls, member): + return isinstance(member, cls) and member.name in cls._member_map_ + + def __delattr__(cls, attr): + # nicer error message when someone tries to delete an attribute + # (see issue19025). + if attr in cls._member_map_: + raise AttributeError( + "%s: cannot delete Enum member." % cls.__name__) + super(EnumMeta, cls).__delattr__(attr) + + def __dir__(self): + return (['__class__', '__doc__', '__members__', '__module__'] + + self._member_names_) + + @property + def __members__(cls): + """Returns a mapping of member name->value. + + This mapping lists all enum members, including aliases. Note that this + is a copy of the internal mapping. + + """ + return cls._member_map_.copy() + + def __getattr__(cls, name): + """Return the enum member matching `name` + + We use __getattr__ instead of descriptors or inserting into the enum + class' __dict__ in order to support `name` and `value` being both + properties for enum members (which live in the class' __dict__) and + enum members themselves. + + """ + if _is_dunder(name): + raise AttributeError(name) + try: + return cls._member_map_[name] + except KeyError: + raise AttributeError(name) + + def __getitem__(cls, name): + return cls._member_map_[name] + + def __iter__(cls): + return (cls._member_map_[name] for name in cls._member_names_) + + def __reversed__(cls): + return (cls._member_map_[name] for name in reversed(cls._member_names_)) + + def __len__(cls): + return len(cls._member_names_) + + __nonzero__ = __bool__ + + def __repr__(cls): + return "" % cls.__name__ + + def __setattr__(cls, name, value): + """Block attempts to reassign Enum members. + + A simple assignment to the class namespace only changes one of the + several possible ways to get an Enum member from the Enum class, + resulting in an inconsistent Enumeration. + + """ + member_map = cls.__dict__.get('_member_map_', {}) + if name in member_map: + raise AttributeError('Cannot reassign members.') + super(EnumMeta, cls).__setattr__(name, value) + + def _create_(cls, class_name, names=None, module=None, type=None, start=1): + """Convenience method to create a new Enum class. + + `names` can be: + + * A string containing member names, separated either with spaces or + commas. Values are auto-numbered from 1. + * An iterable of member names. Values are auto-numbered from 1. + * An iterable of (member name, value) pairs. + * A mapping of member name -> value. + + """ + if pyver < 3.0: + # if class_name is unicode, attempt a conversion to ASCII + if isinstance(class_name, unicode): + try: + class_name = class_name.encode('ascii') + except UnicodeEncodeError: + raise TypeError('%r is not representable in ASCII' % class_name) + metacls = cls.__class__ + if type is None: + bases = (cls, ) + else: + bases = (type, cls) + classdict = metacls.__prepare__(class_name, bases) + _order_ = [] + + # special processing needed for names? + if isinstance(names, basestring): + names = names.replace(',', ' ').split() + if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): + names = [(e, i+start) for (i, e) in enumerate(names)] + + # Here, names is either an iterable of (name, value) or a mapping. + item = None # in case names is empty + for item in names: + if isinstance(item, basestring): + member_name, member_value = item, names[item] + else: + member_name, member_value = item + classdict[member_name] = member_value + _order_.append(member_name) + # only set _order_ in classdict if name/value was not from a mapping + if not isinstance(item, basestring): + classdict['_order_'] = ' '.join(_order_) + enum_class = metacls.__new__(metacls, class_name, bases, classdict) + + # TODO: replace the frame hack if a blessed way to know the calling + # module is ever developed + if module is None: + try: + module = _sys._getframe(2).f_globals['__name__'] + except (AttributeError, ValueError): + pass + if module is None: + _make_class_unpicklable(enum_class) + else: + enum_class.__module__ = module + + return enum_class + + @staticmethod + def _get_mixins_(bases): + """Returns the type for creating enum members, and the first inherited + enum class. + + bases: the tuple of bases that was given to __new__ + + """ + if not bases or Enum is None: + return object, Enum + + + # double check that we are not subclassing a class with existing + # enumeration members; while we're at it, see if any other data + # type has been mixed in so we can use the correct __new__ + member_type = first_enum = None + for base in bases: + if (base is not Enum and + issubclass(base, Enum) and + base._member_names_): + raise TypeError("Cannot extend enumerations") + # base is now the last base in bases + if not issubclass(base, Enum): + raise TypeError("new enumerations must be created as " + "`ClassName([mixin_type,] enum_type)`") + + # get correct mix-in type (either mix-in type of Enum subclass, or + # first base if last base is Enum) + if not issubclass(bases[0], Enum): + member_type = bases[0] # first data type + first_enum = bases[-1] # enum type + else: + for base in bases[0].__mro__: + # most common: (IntEnum, int, Enum, object) + # possible: (, , + # , , + # ) + if issubclass(base, Enum): + if first_enum is None: + first_enum = base + else: + if member_type is None: + member_type = base + + return member_type, first_enum + + if pyver < 3.0: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + if __new__: + return None, True, True # __new__, save_new, use_args + + N__new__ = getattr(None, '__new__') + O__new__ = getattr(object, '__new__') + if Enum is None: + E__new__ = N__new__ + else: + E__new__ = Enum.__dict__['__new__'] + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + try: + target = possible.__dict__[method] + except (AttributeError, KeyError): + target = getattr(possible, method, None) + if target not in [ + None, + N__new__, + O__new__, + E__new__, + ]: + if method == '__member_new__': + classdict['__new__'] = target + return None, False, True + if isinstance(target, staticmethod): + target = target.__get__(member_type) + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, False, use_args + else: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + + # should __new__ be saved as __member_new__ later? + save_new = __new__ is not None + + if __new__ is None: + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + target = getattr(possible, method, None) + if target not in ( + None, + None.__new__, + object.__new__, + Enum.__new__, + ): + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, save_new, use_args + + +######################################################## +# In order to support Python 2 and 3 with a single +# codebase we have to create the Enum methods separately +# and then use the `type(name, bases, dict)` method to +# create the class. +######################################################## +temp_enum_dict = {} +temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" + +def __new__(cls, value): + # all enum instances are actually created during class construction + # without calling this method; this method is called by the metaclass' + # __call__ (i.e. Color(3) ), and by pickle + if type(value) is cls: + # For lookups like Color(Color.red) + value = value.value + #return value + # by-value search for a matching enum member + # see if it's in the reverse mapping (for hashable values) + try: + if value in cls._value2member_map_: + return cls._value2member_map_[value] + except TypeError: + # not there, now do long search -- O(n) behavior + for member in cls._member_map_.values(): + if member.value == value: + return member + raise ValueError("%s is not a valid %s" % (value, cls.__name__)) +temp_enum_dict['__new__'] = __new__ +del __new__ + +def __repr__(self): + return "<%s.%s: %r>" % ( + self.__class__.__name__, self._name_, self._value_) +temp_enum_dict['__repr__'] = __repr__ +del __repr__ + +def __str__(self): + return "%s.%s" % (self.__class__.__name__, self._name_) +temp_enum_dict['__str__'] = __str__ +del __str__ + +if pyver >= 3.0: + def __dir__(self): + added_behavior = [ + m + for cls in self.__class__.mro() + for m in cls.__dict__ + if m[0] != '_' and m not in self._member_map_ + ] + return (['__class__', '__doc__', '__module__', ] + added_behavior) + temp_enum_dict['__dir__'] = __dir__ + del __dir__ + +def __format__(self, format_spec): + # mixed-in Enums should use the mixed-in type's __format__, otherwise + # we can get strange results with the Enum name showing up instead of + # the value + + # pure Enum branch + if self._member_type_ is object: + cls = str + val = str(self) + # mix-in branch + else: + cls = self._member_type_ + val = self.value + return cls.__format__(val, format_spec) +temp_enum_dict['__format__'] = __format__ +del __format__ + + +#################################### +# Python's less than 2.6 use __cmp__ + +if pyver < 2.6: + + def __cmp__(self, other): + if type(other) is self.__class__: + if self is other: + return 0 + return -1 + return NotImplemented + raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__cmp__'] = __cmp__ + del __cmp__ + +else: + + def __le__(self, other): + raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__le__'] = __le__ + del __le__ + + def __lt__(self, other): + raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__lt__'] = __lt__ + del __lt__ + + def __ge__(self, other): + raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__ge__'] = __ge__ + del __ge__ + + def __gt__(self, other): + raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__gt__'] = __gt__ + del __gt__ + + +def __eq__(self, other): + if type(other) is self.__class__: + return self is other + return NotImplemented +temp_enum_dict['__eq__'] = __eq__ +del __eq__ + +def __ne__(self, other): + if type(other) is self.__class__: + return self is not other + return NotImplemented +temp_enum_dict['__ne__'] = __ne__ +del __ne__ + +def __hash__(self): + return hash(self._name_) +temp_enum_dict['__hash__'] = __hash__ +del __hash__ + +def __reduce_ex__(self, proto): + return self.__class__, (self._value_, ) +temp_enum_dict['__reduce_ex__'] = __reduce_ex__ +del __reduce_ex__ + +# _RouteClassAttributeToGetattr is used to provide access to the `name` +# and `value` properties of enum members while keeping some measure of +# protection from modification, while still allowing for an enumeration +# to have members named `name` and `value`. This works because enumeration +# members are not set directly on the enum class -- __getattr__ is +# used to look them up. + +@_RouteClassAttributeToGetattr +def name(self): + return self._name_ +temp_enum_dict['name'] = name +del name + +@_RouteClassAttributeToGetattr +def value(self): + return self._value_ +temp_enum_dict['value'] = value +del value + +@classmethod +def _convert(cls, name, module, filter, source=None): + """ + Create a new Enum subclass that replaces a collection of global constants + """ + # convert all constants from source (or module) that pass filter() to + # a new Enum called name, and export the enum and its members back to + # module; + # also, replace the __reduce_ex__ method so unpickling works in + # previous Python versions + module_globals = vars(_sys.modules[module]) + if source: + source = vars(source) + else: + source = module_globals + members = dict((name, value) for name, value in source.items() if filter(name)) + cls = cls(name, members, module=module) + cls.__reduce_ex__ = _reduce_ex_by_name + module_globals.update(cls.__members__) + module_globals[name] = cls + return cls +temp_enum_dict['_convert'] = _convert +del _convert + +Enum = EnumMeta('Enum', (object, ), temp_enum_dict) +del temp_enum_dict + +# Enum has now been created +########################### + +class IntEnum(int, Enum): + """Enum where members are also (and must be) ints""" + +def _reduce_ex_by_name(self, proto): + return self.name + +def unique(enumeration): + """Class decorator that ensures only unique members exist in an enumeration.""" + duplicates = [] + for name, member in enumeration.__members__.items(): + if name != member.name: + duplicates.append((name, member.name)) + if duplicates: + duplicate_names = ', '.join( + ["%s -> %s" % (alias, name) for (alias, name) in duplicates] + ) + raise ValueError('duplicate names found in %r: %s' % + (enumeration, duplicate_names) + ) + return enumeration From 6380c1db52b6b4d06a189bbfad62e3393cde60c8 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 24 Oct 2018 12:58:41 -0700 Subject: [PATCH 17/28] Prevent `pylint` import errors on `six.moves` `six.moves` is a dynamically-created namespace that doesn't actually exist and therefore `pylint` can't statically analyze it. By default, `pylint` is smart enough to realize that and ignore the import errors. However, because we vendor it, the location changes to `kafka.vendor.six.moves` so `pylint` doesn't realize it should be ignored. So this explicitly ignores it. `pylint` documentation of this feature: http://pylint.pycqa.org/en/1.9/technical_reference/features.html?highlight=ignored-modules#id34 More background: * https://github.com/PyCQA/pylint/issues/1640 * https://github.com/PyCQA/pylint/issues/223 --- pylint.rc | 1 + 1 file changed, 1 insertion(+) diff --git a/pylint.rc b/pylint.rc index d22e523ec..851275bcc 100644 --- a/pylint.rc +++ b/pylint.rc @@ -1,5 +1,6 @@ [TYPECHECK] ignored-classes=SyncManager,_socketobject +ignored-modules=kafka.vendor.six.moves generated-members=py.* [MESSAGES CONTROL] From ac9d5623116a5754c57a8ecd95b2954ba0f30c14 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 24 Oct 2018 13:30:45 -0700 Subject: [PATCH 18/28] Fix sphinx url When I was fixing urls the other day, I noticed that sphinx hadn't added https but there was an open ticket: https://github.com/sphinx-doc/sphinx/issues/5522 Now that that is resolved, I'm updating it here. --- docs/Makefile | 2 +- docs/make.bat | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 5751f68c6..b27cf7742 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -9,7 +9,7 @@ BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from https://www.sphinx-doc.org/) endif # Internal variables. diff --git a/docs/make.bat b/docs/make.bat index 2e9d7dc51..3332a3a1b 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -56,7 +56,7 @@ if errorlevel 9009 ( echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ + echo.https://www.sphinx-doc.org/ exit /b 1 ) From 481f88068bdf0a18f12fd7a811b795f889d35fc7 Mon Sep 17 00:00:00 2001 From: Richard Lee Date: Thu, 12 Jul 2018 11:39:29 -0700 Subject: [PATCH 19/28] Add KafkaAdmin class Requires cluster version > 0.10.0.0, and uses new wire protocol classes to do many things via broker connection that previously needed to be done directly in zookeeper. --- kafka/__init__.py | 2 + kafka/admin/__init__.py | 10 + kafka/admin/config_resource.py | 36 +++ kafka/admin/kafka.py | 505 +++++++++++++++++++++++++++++++++ kafka/admin/new_partitions.py | 19 ++ kafka/admin/new_topic.py | 34 +++ kafka/client_async.py | 16 ++ kafka/conn.py | 10 + kafka/protocol/__init__.py | 5 + test/test_admin.py | 47 +++ 10 files changed, 684 insertions(+) create mode 100644 kafka/admin/__init__.py create mode 100644 kafka/admin/config_resource.py create mode 100644 kafka/admin/kafka.py create mode 100644 kafka/admin/new_partitions.py create mode 100644 kafka/admin/new_topic.py create mode 100644 test/test_admin.py diff --git a/kafka/__init__.py b/kafka/__init__.py index 897ebb095..fa50bf61c 100644 --- a/kafka/__init__.py +++ b/kafka/__init__.py @@ -18,6 +18,7 @@ def emit(self, record): logging.getLogger(__name__).addHandler(NullHandler()) +from kafka.admin import KafkaAdmin from kafka.consumer import KafkaConsumer from kafka.consumer.subscription_state import ConsumerRebalanceListener from kafka.producer import KafkaProducer @@ -46,6 +47,7 @@ def __init__(self, *args, **kwargs): __all__ = [ + 'KafkaAdmin', 'KafkaConsumer', 'KafkaProducer', 'KafkaClient', 'BrokerConnection', 'SimpleClient', 'SimpleProducer', 'KeyedProducer', 'RoundRobinPartitioner', 'HashedPartitioner', diff --git a/kafka/admin/__init__.py b/kafka/admin/__init__.py new file mode 100644 index 000000000..069bc7c88 --- /dev/null +++ b/kafka/admin/__init__.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import + +from kafka.admin.config_resource import ConfigResource, ConfigResourceType +from kafka.admin.kafka import KafkaAdmin +from kafka.admin.new_topic import NewTopic +from kafka.admin.new_partitions import NewPartitions + +__all__ = [ + 'ConfigResource', 'ConfigResourceType', 'KafkaAdmin', 'NewTopic', 'NewPartitions' +] diff --git a/kafka/admin/config_resource.py b/kafka/admin/config_resource.py new file mode 100644 index 000000000..e3294c9c4 --- /dev/null +++ b/kafka/admin/config_resource.py @@ -0,0 +1,36 @@ +from __future__ import absolute_import + +# enum in stdlib as of py3.4 +try: + from enum import IntEnum # pylint: disable=import-error +except ImportError: + # vendored backport module + from kafka.vendor.enum34 import IntEnum + + +class ConfigResourceType(IntEnum): + """An enumerated type of config resources""" + + BROKER = 4, + TOPIC = 2 + + +class ConfigResource(object): + """A class for specifying config resources. + Arguments: + resource_type (ConfigResourceType): the type of kafka resource + name (string): The name of the kafka resource + configs ({key : value}): A maps of config keys to values. + """ + + def __init__( + self, + resource_type, + name, + configs=None + ): + if not isinstance(resource_type, (ConfigResourceType)): + resource_type = ConfigResourceType[str(resource_type).upper()] # pylint: disable-msg=unsubscriptable-object + self.resource_type = resource_type + self.name = name + self.configs = configs diff --git a/kafka/admin/kafka.py b/kafka/admin/kafka.py new file mode 100644 index 000000000..e78bdbfa7 --- /dev/null +++ b/kafka/admin/kafka.py @@ -0,0 +1,505 @@ +from __future__ import absolute_import + +import copy +import logging +import socket +from kafka.client_async import KafkaClient, selectors +from kafka.errors import ( + KafkaConfigurationError, UnsupportedVersionError, NodeNotReadyError, NotControllerError, KafkaConnectionError) +from kafka.metrics import MetricConfig, Metrics +from kafka.protocol.admin import ( + CreateTopicsRequest, DeleteTopicsRequest, DescribeConfigsRequest, AlterConfigsRequest, CreatePartitionsRequest, + ListGroupsRequest, DescribeGroupsRequest) +from kafka.protocol.metadata import MetadataRequest +from kafka.version import __version__ + +log = logging.getLogger(__name__) + +class KafkaAdmin(object): + """An class for administering the kafka cluster. + + The KafkaAdmin class will negotiate for the latest version of each message protocol format supported + by both the kafka-python client library and the kafka broker. Usage of optional fields from protocol + versions that are not supported by the broker will result in UnsupportedVersionError exceptions. + + Use of this class requires a minimum broker version >= 0.10.0.0. + + Keyword Arguments: + bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' + strings) that the consumer should contact to bootstrap initial + cluster metadata. This does not have to be the full node list. + It just needs to have at least one broker that will respond to a + Metadata API Request. Default port is 9092. If no servers are + specified, will default to localhost:9092. + client_id (str): a name for this client. This string is passed in + each request to servers and can be used to identify specific + server-side log entries that correspond to this client. Also + submitted to GroupCoordinator for logging with respect to + consumer group administration. Default: 'kafka-python-{version}' + reconnect_backoff_ms (int): The amount of time in milliseconds to + wait before attempting to reconnect to a given host. + Default: 50. + reconnect_backoff_max_ms (int): The maximum amount of time in + milliseconds to wait when reconnecting to a broker that has + repeatedly failed to connect. If provided, the backoff per host + will increase exponentially for each consecutive connection + failure, up to this maximum. To avoid connection storms, a + randomization factor of 0.2 will be applied to the backoff + resulting in a random range between 20% below and 20% above + the computed value. Default: 1000. + request_timeout_ms (int): Client request timeout in milliseconds. + Default: 30000. + connections_max_idle_ms: Close idle connections after the number of + milliseconds specified by this config. The broker closes idle + connections after connections.max.idle.ms, so this avoids hitting + unexpected socket disconnected errors on the client. + Default: 540000 + retry_backoff_ms (int): Milliseconds to backoff when retrying on + errors. Default: 100. + max_in_flight_requests_per_connection (int): Requests are pipelined + to kafka brokers up to this number of maximum requests per + broker connection. Default: 5. + receive_buffer_bytes (int): The size of the TCP receive buffer + (SO_RCVBUF) to use when reading data. Default: None (relies on + system defaults). Java client defaults to 32768. + send_buffer_bytes (int): The size of the TCP send buffer + (SO_SNDBUF) to use when sending data. Default: None (relies on + system defaults). Java client defaults to 131072. + socket_options (list): List of tuple-arguments to socket.setsockopt + to apply to broker connection sockets. Default: + [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + metadata_max_age_ms (int): The period of time in milliseconds after + which we force a refresh of metadata even if we haven't seen any + partition leadership changes to proactively discover any new + brokers or partitions. Default: 300000 + security_protocol (str): Protocol used to communicate with brokers. + Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. + ssl_context (ssl.SSLContext): Pre-configured SSLContext for wrapping + socket connections. If provided, all other ssl_* configurations + will be ignored. Default: None. + ssl_check_hostname (bool): Flag to configure whether SSL handshake + should verify that the certificate matches the broker's hostname. + Default: True. + ssl_cafile (str): Optional filename of CA file to use in certificate + veriication. Default: None. + ssl_certfile (str): Optional filename of file in PEM format containing + the client certificate, as well as any CA certificates needed to + establish the certificate's authenticity. Default: None. + ssl_keyfile (str): Optional filename containing the client private key. + Default: None. + ssl_password (str): Optional password to be used when loading the + certificate chain. Default: None. + ssl_crlfile (str): Optional filename containing the CRL to check for + certificate expiration. By default, no CRL check is done. When + providing a file, only the leaf certificate will be checked against + this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. + Default: None. + api_version (tuple): Specify which Kafka API version to use. If set + to None, KafkaClient will attempt to infer the broker version by + probing various APIs. Example: (0, 10, 2). Default: None + api_version_auto_timeout_ms (int): number of milliseconds to throw a + timeout exception from the constructor when checking the broker + api version. Only applies if api_version is None + selector (selectors.BaseSelector): Provide a specific selector + implementation to use for I/O multiplexing. + Default: selectors.DefaultSelector + metrics (kafka.metrics.Metrics): Optionally provide a metrics + instance for capturing network IO stats. Default: None. + metric_group_prefix (str): Prefix for metric names. Default: '' + sasl_mechanism (str): string picking sasl mechanism when security_protocol + is SASL_PLAINTEXT or SASL_SSL. Currently only PLAIN is supported. + Default: None + sasl_plain_username (str): username for sasl PLAIN authentication. + Default: None + sasl_plain_password (str): password for sasl PLAIN authentication. + Default: None + sasl_kerberos_service_name (str): Service name to include in GSSAPI + sasl mechanism handshake. Default: 'kafka' + + """ + DEFAULT_CONFIG = { + # client configs + 'bootstrap_servers': 'localhost', + 'client_id': 'kafka-python-' + __version__, + 'request_timeout_ms': 30000, + 'connections_max_idle_ms': 9 * 60 * 1000, + 'reconnect_backoff_ms': 50, + 'reconnect_backoff_max_ms': 1000, + 'max_in_flight_requests_per_connection': 5, + 'receive_buffer_bytes': None, + 'send_buffer_bytes': None, + 'socket_options': [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)], + 'sock_chunk_bytes': 4096, # undocumented experimental option + 'sock_chunk_buffer_count': 1000, # undocumented experimental option + 'retry_backoff_ms': 100, + 'metadata_max_age_ms': 300000, + 'security_protocol': 'PLAINTEXT', + 'ssl_context': None, + 'ssl_check_hostname': True, + 'ssl_cafile': None, + 'ssl_certfile': None, + 'ssl_keyfile': None, + 'ssl_password': None, + 'ssl_crlfile': None, + 'api_version': None, + 'api_version_auto_timeout_ms': 2000, + 'selector': selectors.DefaultSelector, + 'sasl_mechanism': None, + 'sasl_plain_username': None, + 'sasl_plain_password': None, + 'sasl_kerberos_service_name': 'kafka', + + # metrics configs + 'metric_reporters' : [], + 'metrics_num_samples': 2, + 'metrics_sample_window_ms': 30000, + } + + def __init__(self, **configs): + log.debug("Starting Kafka administration interface") + extra_configs = set(configs).difference(self.DEFAULT_CONFIG) + if extra_configs: + raise KafkaConfigurationError("Unrecognized configs: %s" % extra_configs) + + self.config = copy.copy(self.DEFAULT_CONFIG) + self.config.update(configs) + + # api_version was previously a str. accept old format for now + if isinstance(self.config['api_version'], str): + deprecated = self.config['api_version'] + if deprecated == 'auto': + self.config['api_version'] = None + else: + self.config['api_version'] = tuple(map(int, deprecated.split('.'))) + log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated', + str(self.config['api_version']), deprecated) + + # Configure metrics + metrics_tags = {'client-id': self.config['client_id']} + metric_config = MetricConfig(samples=self.config['metrics_num_samples'], + time_window_ms=self.config['metrics_sample_window_ms'], + tags=metrics_tags) + reporters = [reporter() for reporter in self.config['metric_reporters']] + self._metrics = Metrics(metric_config, reporters) + + self._client = KafkaClient(metrics=self._metrics, metric_group_prefix='admin', + **self.config) + + # Get auto-discovered version from client if necessary + if self.config['api_version'] is None: + self.config['api_version'] = self._client.config['api_version'] + + self._closed = False + self._refresh_controller_id() + log.debug('Kafka administration interface started') + + def close(self): + """Close the administration connection to the kafka broker""" + if not hasattr(self, '_closed') or self._closed: + log.info('Kafka administration interface already closed') + return + + self._metrics.close() + self._client.close() + self._closed = True + log.debug('Kafka administartion interface has closed') + + def _matching_api_version(self, operation): + """Find matching api version, the lesser of either the latest api version the library supports, or + the max version supported by the broker + + :param operation: An operation array from kafka.protocol + :return: The max matching version number between client and broker + """ + version = min(len(operation) - 1, + self._client.get_api_versions()[operation[0].API_KEY][1]) + if version < self._client.get_api_versions()[operation[0].API_KEY][0]: + # max library version is less than min broker version. Not sure any brokers + # actually set a min version greater than 0 right now, tho. But maybe in the future? + raise UnsupportedVersionError( + "Could not find matching protocol version for {}" + .format(operation.__name__)) + return version + + def _validate_timeout(self, timeout_ms): + """Validate the timeout is set or use the configuration default + + :param timeout_ms: The timeout provided by api call, in milliseconds + :return: The timeout to use for the operation + """ + return timeout_ms or self.config['request_timeout_ms'] + + def _refresh_controller_id(self): + """Determine the kafka cluster controller + """ + response = self._send_request_to_node( + self._client.least_loaded_node(), + MetadataRequest[1]([]) + ) + self._controller_id = response.controller_id + version = self._client.check_version(self._controller_id) + if version < (0, 10, 0): + raise UnsupportedVersionError( + "Kafka Admin interface not supported for cluster controller version {} < 0.10.0.0" + .format(version)) + + def _send_request_to_node(self, node, request): + """Send a kafka protocol message to a specific broker. Will block until the message result is received. + + :param node: The broker id to which to send the message + :param request: The message to send + :return: The kafka protocol response for the message + :exception: The exception if the message could not be sent + """ + while not self._client.ready(node): + # connection to broker not ready, poll until it is or send will fail with NodeNotReadyError + self._client.poll() + future = self._client.send(node, request) + self._client.poll(future=future) + if future.succeeded(): + return future.value + else: + raise future.exception # pylint: disable-msg=raising-bad-type + + def _send(self, request): + """Send a kafka protocol message to the cluster controller. Will block until the message result is received. + + :param request: The message to send + :return The kafka protocol response for the message + :exception NodeNotReadyError: If the controller connection can't be established + """ + remaining_tries = 2 + while remaining_tries > 0: + remaining_tries = remaining_tries - 1 + try: + return self._send_request_to_node(self._controller_id, request) + except (NotControllerError, KafkaConnectionError) as e: + # controller changed? refresh it + self._refresh_controller_id() + raise NodeNotReadyError(self._controller_id) + + @staticmethod + def _convert_new_topic_request(new_topic): + return ( + new_topic.name, + new_topic.num_partitions, + new_topic.replication_factor, + [ + (partition_id, replicas) for partition_id, replicas in new_topic.replica_assignments.items() + ], + [ + (config_key, config_value) for config_key, config_value in new_topic.topic_configs.items() + ] + ) + + def create_topics(self, new_topics, timeout_ms=None, validate_only=None): + """Create new topics in the cluster. + + :param new_topics: Array of NewTopic objects + :param timeout_ms: Milliseconds to wait for new topics to be created before broker returns + :param validate_only: If True, don't actually create new topics. Not supported by all versions. + :return: Appropriate version of CreateTopicResponse class + """ + version = self._matching_api_version(CreateTopicsRequest) + timeout_ms = self._validate_timeout(timeout_ms) + if version == 0: + if validate_only: + raise UnsupportedVersionError( + "validate_only not supported on cluster version {}" + .format(self.config['api_version'])) + request = CreateTopicsRequest[version]( + create_topic_requests = [self._convert_new_topic_request(new_topic) for new_topic in new_topics], + timeout = timeout_ms + ) + elif version <= 2: + validate_only = validate_only or False + request = CreateTopicsRequest[version]( + create_topic_requests = [self._convert_new_topic_request(new_topic) for new_topic in new_topics], + timeout = timeout_ms, + validate_only = validate_only + ) + else: + raise UnsupportedVersionError( + "missing implementation of CreateTopics for library supported version {}" + .format(version) + ) + return self._send(request) + + def delete_topics(self, topics, timeout_ms=None): + """Delete topics from the cluster + + :param topics: Array of topic name strings + :param timeout_ms: Milliseconds to wait for topics to be deleted before broker returns + :return: Appropriate version of DeleteTopicsResponse class + """ + version = self._matching_api_version(DeleteTopicsRequest) + timeout_ms = self._validate_timeout(timeout_ms) + if version <= 1: + request = DeleteTopicsRequest[version]( + topics = topics, + timeout = timeout_ms + ) + else: + raise UnsupportedVersionError( + "missing implementation of DeleteTopics for library supported version {}" + .format(version)) + return self._send(request) + + # list topics functionality is in ClusterMetadata + + # describe topics functionality is in ClusterMetadata + + # describe cluster functionality is in ClusterMetadata + + # describe_acls protocol not implemented + + # create_acls protocol not implemented + + # delete_acls protocol not implemented + + @staticmethod + def _convert_describe_config_resource_request(config_resource): + return ( + config_resource.resource_type, + config_resource.name, + [ + config_key for config_key, config_value in config_resource.configs.items() + ] if config_resource.configs else None + ) + + def describe_configs(self, config_resources, include_synonyms=None): + """Fetch configuration parameters for one or more kafka resources. + + :param config_resources: An array of ConfigResource objects. + Any keys in ConfigResource.configs dict will be used to filter the result. The configs dict should be None + to get all values. An empty dict will get zero values (as per kafka protocol). + :param include_synonyms: If True, return synonyms in response. Not supported by all versions. + :return: Appropriate version of DescribeConfigsResponse class + """ + version = self._matching_api_version(DescribeConfigsRequest) + if version == 0: + if include_synonyms: + raise UnsupportedVersionError( + "include_synonyms not supported on cluster version {}" + .format(self.config['api_version'])) + request = DescribeConfigsRequest[version]( + resources = [self._convert_describe_config_resource_request(config_resource) for config_resource in config_resources] + ) + elif version <= 1: + include_synonyms = include_synonyms or False + request = DescribeConfigsRequest[version]( + resources = [self._convert_describe_config_resource_request(config_resource) for config_resource in config_resources], + include_synonyms = include_synonyms + ) + else: + raise UnsupportedVersionError( + "missing implementation of DescribeConfigs for library supported version {}" + .format(version)) + return self._send(request) + + @staticmethod + def _convert_alter_config_resource_request(config_resource): + return ( + config_resource.resource_type, + config_resource.name, + [ + (config_key, config_value) for config_key, config_value in config_resource.configs.items() + ] + ) + + def alter_configs(self, config_resources): + """Alter configuration parameters of one or more kafka resources. + + :param config_resources: An array of ConfigResource objects. + :return: Appropriate version of AlterConfigsResponse class + """ + version = self._matching_api_version(AlterConfigsRequest) + if version == 0: + request = AlterConfigsRequest[version]( + resources = [self._convert_alter_config_resource_request(config_resource) for config_resource in config_resources] + ) + else: + raise UnsupportedVersionError( + "missing implementation of AlterConfigs for library supported version {}" + .format(version)) + return self._send(request) + + # alter replica logs dir protocol not implemented + + # describe log dirs protocol not implemented + + @staticmethod + def _convert_create_partitions_request(topic_name, new_partitions): + return ( + topic_name, + ( + new_partitions.total_count, + new_partitions.new_assignments + ) + ) + + def create_partitions(self, topic_partitions, timeout_ms=None, validate_only=None): + """Create additional partitions for an existing topic. + + :param topic_partitions: A map of topic name strings to NewPartition objects + :param timeout_ms: Milliseconds to wait for new partitions to be created before broker returns + :param validate_only: If True, don't actually create new partitions. + :return: Appropriate version of CreatePartitionsResponse class + """ + version = self._matching_api_version(CreatePartitionsRequest) + timeout_ms = self._validate_timeout(timeout_ms) + validate_only = validate_only or False + if version == 0: + request = CreatePartitionsRequest[version]( + topic_partitions = [self._convert_create_partitions_request(topic_name, new_partitions) for topic_name, new_partitions in topic_partitions.items()], + timeout = timeout_ms, + validate_only = validate_only + ) + else: + raise UnsupportedVersionError( + "missing implementation of CreatePartitions for library supported version {}" + .format(version)) + return self._send(request) + + # delete records protocol not implemented + + # create delegation token protocol not implemented + + # renew delegation token protocol not implemented + + # expire delegation_token protocol not implemented + + # describe delegation_token protocol not implemented + + def describe_consumer_groups(self, group_ids): + """Describe a set of consumer groups. + + :param group_ids: A list of consumer group id names + :return: Appropriate version of DescribeGroupsResponse class + """ + version = self._matching_api_version(DescribeGroupsRequest) + if version <= 1: + request = DescribeGroupsRequest[version]( + groups = group_ids + ) + else: + raise UnsupportedVersionError( + "missing implementation of DescribeGroups for library supported version {}" + .format(version)) + return self._send(request) + + def list_consumer_groups(self): + """List all consumer groups known to the cluster. + + :return: Appropriate version of ListGroupsResponse class + """ + version = self._matching_api_version(ListGroupsRequest) + if version <= 1: + request = ListGroupsRequest[version]() + else: + raise UnsupportedVersionError( + "missing implementation of ListGroups for library supported version {}" + .format(version)) + return self._send(request) + + # delete groups protocol not implemented diff --git a/kafka/admin/new_partitions.py b/kafka/admin/new_partitions.py new file mode 100644 index 000000000..429b2e190 --- /dev/null +++ b/kafka/admin/new_partitions.py @@ -0,0 +1,19 @@ +from __future__ import absolute_import + + +class NewPartitions(object): + """A class for new partition creation on existing topics. Note that the length of new_assignments, if specified, + must be the difference between the new total number of partitions and the existing number of partitions. + Arguments: + total_count (int): the total number of partitions that should exist on the topic + new_assignments ([[int]]): an array of arrays of replica assignments for new partitions. + If not set, broker assigns replicas per an internal algorithm. + """ + + def __init__( + self, + total_count, + new_assignments=None + ): + self.total_count = total_count + self.new_assignments = new_assignments diff --git a/kafka/admin/new_topic.py b/kafka/admin/new_topic.py new file mode 100644 index 000000000..645ac383a --- /dev/null +++ b/kafka/admin/new_topic.py @@ -0,0 +1,34 @@ +from __future__ import absolute_import + +from kafka.errors import IllegalArgumentError + + +class NewTopic(object): + """ A class for new topic creation + Arguments: + name (string): name of the topic + num_partitions (int): number of partitions + or -1 if replica_assignment has been specified + replication_factor (int): replication factor or -1 if + replica assignment is specified + replica_assignment (dict of int: [int]): A mapping containing + partition id and replicas to assign to it. + topic_configs (dict of str: str): A mapping of config key + and value for the topic. + """ + + def __init__( + self, + name, + num_partitions, + replication_factor, + replica_assignments=None, + topic_configs=None, + ): + if not (num_partitions == -1 or replication_factor == -1) ^ (replica_assignments is None): + raise IllegalArgumentError('either num_partitions/replication_factor or replica_assignment must be specified') + self.name = name + self.num_partitions = num_partitions + self.replication_factor = replication_factor + self.replica_assignments = replica_assignments or {} + self.topic_configs = topic_configs or {} diff --git a/kafka/client_async.py b/kafka/client_async.py index 5a161bb6a..ccf1e4b10 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -196,6 +196,7 @@ def __init__(self, **configs): self._metadata_refresh_in_progress = False self._selector = self.config['selector']() self._conns = Dict() # object to support weakrefs + self._api_versions = None self._connecting = set() self._refresh_on_disconnects = True self._last_bootstrap = 0 @@ -808,6 +809,17 @@ def refresh_done(val_or_error): # to let us know the selected connection might be usable again. return float('inf') + def get_api_versions(self): + """Return the ApiVersions map, if available. + + Note: A call to check_version must previously have succeeded and returned + version 0.10.0 or later + + Returns: a map of dict mapping {api_key : (min_version, max_version)}, + or None if ApiVersion is not supported by the kafka cluster. + """ + return self._api_versions + def check_version(self, node_id=None, timeout=2, strict=False): """Attempt to guess the version of a Kafka broker. @@ -841,6 +853,10 @@ def check_version(self, node_id=None, timeout=2, strict=False): try: remaining = end - time.time() version = conn.check_version(timeout=remaining, strict=strict, topics=list(self.config['bootstrap_topics_filter'])) + if version >= (0, 10, 0): + # cache the api versions map if it's available (starting + # in 0.10 cluster version) + self._api_versions = conn.get_api_versions() return version except Errors.NodeNotReadyError: # Only raise to user if this is a node-specific request diff --git a/kafka/conn.py b/kafka/conn.py index ccaa2ed62..5ec97575f 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -873,6 +873,16 @@ def _handle_api_version_response(self, response): ]) return self._api_versions + def get_api_versions(self): + version = self.check_version() + if version < (0, 10, 0): + raise Errors.UnsupportedVersionError( + "ApiVersion not supported by cluster version {} < 0.10.0" + .format(version)) + # _api_versions is set as a side effect of check_versions() on a cluster + # that supports 0.10.0 or later + return self._api_versions; + def _infer_broker_version_from_api_versions(self, api_versions): # The logic here is to check the list of supported request versions # in reverse order. As soon as we find one that works, return it diff --git a/kafka/protocol/__init__.py b/kafka/protocol/__init__.py index 050a0854f..8cf564033 100644 --- a/kafka/protocol/__init__.py +++ b/kafka/protocol/__init__.py @@ -44,4 +44,9 @@ 33: 'AlterConfigs', 36: 'SaslAuthenticate', 37: 'CreatePartitions', + 38: 'CreateDelegationToken', + 39: 'RenewDelegationToken', + 40: 'ExpireDelegationToken', + 41: 'DescribeDelegationToken', + 42: 'DeleteGroups', } diff --git a/test/test_admin.py b/test/test_admin.py new file mode 100644 index 000000000..fd9c54ddd --- /dev/null +++ b/test/test_admin.py @@ -0,0 +1,47 @@ +import pytest + +import kafka.admin +from kafka.errors import IllegalArgumentError + + +def test_config_resource(): + with pytest.raises(KeyError): + bad_resource = kafka.admin.ConfigResource('something', 'foo') + good_resource = kafka.admin.ConfigResource('broker', 'bar') + assert(good_resource.resource_type == kafka.admin.ConfigResourceType.BROKER) + assert(good_resource.name == 'bar') + assert(good_resource.configs is None) + good_resource = kafka.admin.ConfigResource(kafka.admin.ConfigResourceType.TOPIC, 'baz', {'frob' : 'nob'}) + assert(good_resource.resource_type == kafka.admin.ConfigResourceType.TOPIC) + assert(good_resource.name == 'baz') + assert(good_resource.configs == {'frob' : 'nob'}) + + +def test_new_partitions(): + good_partitions = kafka.admin.NewPartitions(6) + assert(good_partitions.total_count == 6) + assert(good_partitions.new_assignments is None) + good_partitions = kafka.admin.NewPartitions(7, [[1, 2, 3]]) + assert(good_partitions.total_count == 7) + assert(good_partitions.new_assignments == [[1, 2, 3]]) + + +def test_new_topic(): + with pytest.raises(IllegalArgumentError): + bad_topic = kafka.admin.NewTopic('foo', -1, -1) + with pytest.raises(IllegalArgumentError): + bad_topic = kafka.admin.NewTopic('foo', 1, -1) + with pytest.raises(IllegalArgumentError): + bad_topic = kafka.admin.NewTopic('foo', 1, 1, {1 : [1, 1, 1]}) + good_topic = kafka.admin.NewTopic('foo', 1, 2) + assert(good_topic.name == 'foo') + assert(good_topic.num_partitions == 1) + assert(good_topic.replication_factor == 2) + assert(good_topic.replica_assignments == {}) + assert(good_topic.topic_configs == {}) + good_topic = kafka.admin.NewTopic('bar', -1, -1, {1 : [1, 2, 3]}, {'key' : 'value'}) + assert(good_topic.name == 'bar') + assert(good_topic.num_partitions == -1) + assert(good_topic.replication_factor == -1) + assert(good_topic.replica_assignments == {1: [1, 2, 3]}) + assert(good_topic.topic_configs == {'key' : 'value'}) From cd0bd8e4db66a532f19a76908c677ebf004f642e Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Fri, 26 Oct 2018 14:09:54 -0700 Subject: [PATCH 20/28] Add temp workaround for upstream pylint bug Temporarily workaround https://github.com/PyCQA/pylint/issues/2571 so that we can stop pinning `pylint`. --- kafka/record/_crc32c.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kafka/record/_crc32c.py b/kafka/record/_crc32c.py index 9db2d89af..ecff48f5e 100644 --- a/kafka/record/_crc32c.py +++ b/kafka/record/_crc32c.py @@ -139,5 +139,7 @@ def crc(data): if __name__ == "__main__": import sys - data = sys.stdin.read() + # TODO remove the pylint disable once pylint fixes + # https://github.com/PyCQA/pylint/issues/2571 + data = sys.stdin.read() # pylint: disable=assignment-from-no-return print(hex(crc(data))) From b54607fbb88507e692f63c5c547742009f6bcaa8 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Wed, 24 Oct 2018 22:50:06 -0700 Subject: [PATCH 21/28] Stop pinning `pylint` We have many deprecation warnings in the travis logs for things that are fixed in newer versions of `pylint` or `pylint`'s dependencies. Note that `pylint` >= 2.0 does not support python 2, so this will result in different versions of pylint running for python 2 vs python 3. Personally, I am just fine with this. --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index ad95f9374..1760afffc 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,7 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s deps = pytest pytest-cov - py{27,34,35,36,py}: pylint==1.8.2 + py{27,34,35,36,py}: pylint py{27,34,35,36,py}: pytest-pylint pytest-mock mock From 8791f0558fc103df43a2d7ab214904ad9a15f147 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Fri, 26 Oct 2018 22:53:03 -0700 Subject: [PATCH 22/28] Stop using deprecated log.warn() I missed this in my previous cleanup back in 9221fcf83528b5c3657e43636cb84c1d18025acd. --- kafka/producer/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kafka/producer/base.py b/kafka/producer/base.py index 956cef6c5..1da74c841 100644 --- a/kafka/producer/base.py +++ b/kafka/producer/base.py @@ -83,7 +83,7 @@ def _send_upstream(queue, client, codec, batch_time, batch_size, try: client.reinit() except Exception as e: - log.warn('Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms) + log.warning('Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) else: break @@ -189,12 +189,12 @@ def _handle_error(error_cls, request): # doing backoff before next retry if retry_state['do_backoff'] and retry_options.backoff_ms: - log.warn('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms) + log.warning('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) # refresh topic metadata before next retry if retry_state['do_refresh']: - log.warn('Async producer forcing metadata refresh metadata before retrying') + log.warning('Async producer forcing metadata refresh metadata before retrying') try: client.load_metadata_for_topics() except Exception: From 4f4e4fb5da8861593d084fa8a9de223ed7439ccf Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Fri, 26 Oct 2018 22:14:42 -0700 Subject: [PATCH 23/28] Minor cleanup of testing doc Removed some of the hardcoded values as they are now outdated, and just pointed to where to find the current value in the code. Also some minor wordsmithing. --- docs/tests.rst | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/tests.rst b/docs/tests.rst index 74642c937..5983475e0 100644 --- a/docs/tests.rst +++ b/docs/tests.rst @@ -23,8 +23,13 @@ fixtures for client / consumer / producer testing. Unit tests ------------------ -To run the tests locally, install tox -- `pip install tox` -See https://tox.readthedocs.io/en/latest/install.html +To run the tests locally, install tox: + +.. code:: bash + + pip install tox + +For more details, see https://tox.readthedocs.io/en/latest/install.html Then simply run tox, optionally setting the python environment. If unset, tox will loop through all environments. @@ -49,8 +54,8 @@ Integration tests .. code:: bash - KAFKA_VERSION=0.10.1.1 tox -e py27 - KAFKA_VERSION=0.8.2.2 tox -e py35 + KAFKA_VERSION=0.8.2.2 tox -e py27 + KAFKA_VERSION=1.0.1 tox -e py36 Integration tests start Kafka and Zookeeper fixtures. This requires downloading @@ -60,25 +65,24 @@ kafka server binaries: ./build_integration.sh -By default, this will install 0.8.2.2, 0.9.0.1, 0.10.1.1, and -0.10.2.1 brokers into the servers/ directory. To install a specific version, - e.g., set `KAFKA_VERSION=0.10.2.1`: +By default, this will install the broker versions listed in build_integration.sh's `ALL_RELEASES` +into the servers/ directory. To install a specific version, set the `KAFKA_VERSION` variable: .. code:: bash - KAFKA_VERSION=0.10.2.1 ./build_integration.sh + KAFKA_VERSION=1.0.1 ./build_integration.sh -Then run the tests against supported Kafka versions, simply set the `KAFKA_VERSION` +Then to run the tests against a specific Kafka version, simply set the `KAFKA_VERSION` env variable to the server build you want to use for testing: .. code:: bash - KAFKA_VERSION=0.10.2.1 tox -e py27 + KAFKA_VERSION=1.0.1 tox -e py36 To test against the kafka source tree, set KAFKA_VERSION=trunk -[optionally set SCALA_VERSION (defaults to 2.10)] +[optionally set SCALA_VERSION (defaults to the value set in `build_integration.sh`)] .. code:: bash - SCALA_VERSION=2.11 KAFKA_VERSION=trunk ./build_integration.sh - KAFKA_VERSION=trunk tox -e py35 + SCALA_VERSION=2.12 KAFKA_VERSION=trunk ./build_integration.sh + KAFKA_VERSION=trunk tox -e py36 From 2b67493ff88aa3068bfc1be1aa089d95f4d60699 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Sat, 27 Oct 2018 01:17:29 -0700 Subject: [PATCH 24/28] Remove unused ivy_root variable This is no longer used anywhere in the codebase --- test/fixtures.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/fixtures.py b/test/fixtures.py index 08cc951a2..76e3071f3 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -48,7 +48,6 @@ class Fixture(object): os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) kafka_root = os.environ.get("KAFKA_ROOT", os.path.join(project_root, 'servers', kafka_version, "kafka-bin")) - ivy_root = os.environ.get('IVY_ROOT', os.path.expanduser("~/.ivy2/cache")) def __init__(self): self.child = None From 1945ad16a15f53a07fae489b20ac616bb184ca89 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Sat, 27 Oct 2018 02:01:08 -0700 Subject: [PATCH 25/28] Minor aesthetic cleanup of partitioner tests --- test/test_partitioner.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/test/test_partitioner.py b/test/test_partitioner.py index 47470e1bd..3a5264b7e 100644 --- a/test/test_partitioner.py +++ b/test/test_partitioner.py @@ -1,13 +1,14 @@ from __future__ import absolute_import +import pytest + from kafka.partitioner import DefaultPartitioner, Murmur2Partitioner, RoundRobinPartitioner from kafka.partitioner.hashed import murmur2 def test_default_partitioner(): partitioner = DefaultPartitioner() - all_partitions = list(range(100)) - available = all_partitions + all_partitions = available = list(range(100)) # partitioner should return the same partition for the same key p1 = partitioner(b'foo', all_partitions, available) p2 = partitioner(b'foo', all_partitions, available) @@ -23,8 +24,7 @@ def test_default_partitioner(): def test_roundrobin_partitioner(): partitioner = RoundRobinPartitioner() - all_partitions = list(range(100)) - available = all_partitions + all_partitions = available = list(range(100)) # partitioner should cycle between partitions i = 0 max_partition = all_partitions[len(all_partitions) - 1] @@ -53,15 +53,14 @@ def test_roundrobin_partitioner(): i += 1 -def test_murmur2_java_compatibility(): +@pytest.mark.parametrize("bytes_payload,partition_number", [ + (b'', 681), (b'a', 524), (b'ab', 434), (b'abc', 107), (b'123456789', 566), + (b'\x00 ', 742) +]) +def test_murmur2_java_compatibility(bytes_payload, partition_number): p = Murmur2Partitioner(range(1000)) # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner - assert p.partition(b'') == 681 - assert p.partition(b'a') == 524 - assert p.partition(b'ab') == 434 - assert p.partition(b'abc') == 107 - assert p.partition(b'123456789') == 566 - assert p.partition(b'\x00 ') == 742 + assert p.partition(bytes_payload) == partition_number def test_murmur2_not_ascii(): From f00016e7cec64cfc9697b233809cd37e0e19cc64 Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Sat, 27 Oct 2018 00:11:10 -0700 Subject: [PATCH 26/28] Cleanup fixture imports `random_string` now comes from `test.fixtures` and was being transparently imported via `test.testutil` so this bypasses the pointless indirect import. Similarly, `kafka_version` was transparently imported by `test.testutil` from `test.fixtures`. Also removed `random_port()` in `test.testutil` because its unused as its been replaced by the one in `test.fixtures`. This is part of the pytest migration that was started back in a1869c4be5f47b4f6433610249aaf29af4ec95e5. --- test/conftest.py | 4 ++-- test/test_codec.py | 2 +- test/test_consumer_group.py | 2 +- test/test_consumer_integration.py | 4 ++-- test/test_failover_integration.py | 4 ++-- test/test_producer.py | 2 +- test/testutil.py | 7 +------ 7 files changed, 10 insertions(+), 15 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index dbc2378d9..a751d9506 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -4,8 +4,8 @@ import pytest -from test.fixtures import KafkaFixture, ZookeeperFixture -from test.testutil import kafka_version, random_string +from test.fixtures import KafkaFixture, ZookeeperFixture, random_string, version as kafka_version + @pytest.fixture(scope="module") def version(): diff --git a/test/test_codec.py b/test/test_codec.py index e132c1d47..0fefe6faa 100644 --- a/test/test_codec.py +++ b/test/test_codec.py @@ -14,7 +14,7 @@ lz4_encode_old_kafka, lz4_decode_old_kafka, ) -from test.testutil import random_string +from test.fixtures import random_string def test_gzip(): diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index 55cf6625d..01eb39e1d 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -13,7 +13,7 @@ from kafka.structs import TopicPartition from test.conftest import version -from test.testutil import random_string +from test.fixtures import random_string def get_connect_str(kafka_broker): diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index ce934ea1c..9a7790eac 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -24,9 +24,9 @@ ) from test.conftest import version -from test.fixtures import ZookeeperFixture, KafkaFixture +from test.fixtures import ZookeeperFixture, KafkaFixture, random_string from test.testutil import ( - KafkaIntegrationTestCase, kafka_versions, random_string, Timer, + KafkaIntegrationTestCase, kafka_versions, Timer, send_messages ) diff --git a/test/test_failover_integration.py b/test/test_failover_integration.py index ad7dcb98b..48021a443 100644 --- a/test/test_failover_integration.py +++ b/test/test_failover_integration.py @@ -9,8 +9,8 @@ from kafka.producer.base import Producer from kafka.structs import TopicPartition -from test.fixtures import ZookeeperFixture, KafkaFixture -from test.testutil import KafkaIntegrationTestCase, random_string +from test.fixtures import ZookeeperFixture, KafkaFixture, random_string +from test.testutil import KafkaIntegrationTestCase log = logging.getLogger(__name__) diff --git a/test/test_producer.py b/test/test_producer.py index 176b23988..16da61898 100644 --- a/test/test_producer.py +++ b/test/test_producer.py @@ -8,7 +8,7 @@ from kafka import KafkaConsumer, KafkaProducer, TopicPartition from kafka.producer.buffer import SimpleBufferPool from test.conftest import version -from test.testutil import random_string +from test.fixtures import random_string def test_buffer_pool(): diff --git a/test/testutil.py b/test/testutil.py index a1383a0a0..feb6f6d5f 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -19,6 +19,7 @@ from kafka.structs import OffsetRequestPayload, ProduceRequestPayload from test.fixtures import random_string, version_str_to_list, version as kafka_version #pylint: disable=wrong-import-order + def kafka_versions(*versions): def construct_lambda(s): @@ -65,12 +66,6 @@ def wrapper(func, *args, **kwargs): return real_kafka_versions -def get_open_port(): - sock = socket.socket() - sock.bind(("", 0)) - port = sock.getsockname()[1] - sock.close() - return port _MESSAGES = {} def msg(message): From 4d13713c515796afa535e980b15fa0c2c86ba0eb Mon Sep 17 00:00:00 2001 From: Jeff Widman Date: Mon, 29 Oct 2018 00:45:40 -0700 Subject: [PATCH 27/28] Document KafkaAdmin class --- docs/apidoc/KafkaAdmin.rst | 5 +++++ docs/apidoc/modules.rst | 1 + kafka/admin/kafka.py | 7 +++++++ 3 files changed, 13 insertions(+) create mode 100644 docs/apidoc/KafkaAdmin.rst diff --git a/docs/apidoc/KafkaAdmin.rst b/docs/apidoc/KafkaAdmin.rst new file mode 100644 index 000000000..f8c80ab45 --- /dev/null +++ b/docs/apidoc/KafkaAdmin.rst @@ -0,0 +1,5 @@ +KafkaAdmin +=========== + +.. autoclass:: kafka.admin.KafkaAdmin + :members: diff --git a/docs/apidoc/modules.rst b/docs/apidoc/modules.rst index 947788713..1173cfeed 100644 --- a/docs/apidoc/modules.rst +++ b/docs/apidoc/modules.rst @@ -5,6 +5,7 @@ kafka-python API KafkaConsumer KafkaProducer + KafkaAdmin KafkaClient BrokerConnection ClusterMetadata diff --git a/kafka/admin/kafka.py b/kafka/admin/kafka.py index e78bdbfa7..37a80a70d 100644 --- a/kafka/admin/kafka.py +++ b/kafka/admin/kafka.py @@ -18,6 +18,13 @@ class KafkaAdmin(object): """An class for administering the kafka cluster. + Warning: + This is an unstable interface that was recently added and is subject to + change without warning. In particular, many methods currently return + raw protocol tuples. In future releases, we plan to make these into + nicer, more pythonic objects. Unfortunately, this will likely break + those interfaces. + The KafkaAdmin class will negotiate for the latest version of each message protocol format supported by both the kafka-python client library and the kafka broker. Usage of optional fields from protocol versions that are not supported by the broker will result in UnsupportedVersionError exceptions. From 3689da3d5c02e362d872cf1fb2d65201419c4b93 Mon Sep 17 00:00:00 2001 From: billyevans Date: Fri, 20 Jul 2018 14:11:41 -0700 Subject: [PATCH 28/28] Pre-compile pack/unpack function calls I noticed that pack/unpack functions from https://github.com/dpkp/kafka-python/blob/master/kafka/protocol/types.py might be slightly improved. I made pre-compilation for them. It gives about 10% better performance compared to the current implementation. Consumption of 100msg: ``` 239884 0.187 0.000 0.287 0.000 types.py:18(_unpack) # new version 239884 0.192 0.000 0.323 0.000 types.py:17(_unpack) ``` I also made some profiling for producers/consumers. It gives about 1-1.5% time savings. --- kafka/protocol/types.py | 42 ++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/kafka/protocol/types.py b/kafka/protocol/types.py index 5ccb83ea7..d508b2605 100644 --- a/kafka/protocol/types.py +++ b/kafka/protocol/types.py @@ -1,13 +1,14 @@ from __future__ import absolute_import -from struct import pack, unpack, error +import struct +from struct import error from kafka.protocol.abstract import AbstractType def _pack(f, value): try: - return pack(f, value) + return f(value) except error as e: raise ValueError("Error encountered when attempting to convert value: " "{!r} to struct format: '{}', hit error: {}" @@ -16,7 +17,7 @@ def _pack(f, value): def _unpack(f, data): try: - (value,) = unpack(f, data) + (value,) = f(data) return value except error as e: raise ValueError("Error encountered when attempting to convert value: " @@ -25,43 +26,55 @@ def _unpack(f, data): class Int8(AbstractType): + _pack = struct.Struct('>b').pack + _unpack = struct.Struct('>b').unpack + @classmethod def encode(cls, value): - return _pack('>b', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>b', data.read(1)) + return _unpack(cls._unpack, data.read(1)) class Int16(AbstractType): + _pack = struct.Struct('>h').pack + _unpack = struct.Struct('>h').unpack + @classmethod def encode(cls, value): - return _pack('>h', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>h', data.read(2)) + return _unpack(cls._unpack, data.read(2)) class Int32(AbstractType): + _pack = struct.Struct('>i').pack + _unpack = struct.Struct('>i').unpack + @classmethod def encode(cls, value): - return _pack('>i', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>i', data.read(4)) + return _unpack(cls._unpack, data.read(4)) class Int64(AbstractType): + _pack = struct.Struct('>q').pack + _unpack = struct.Struct('>q').unpack + @classmethod def encode(cls, value): - return _pack('>q', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>q', data.read(8)) + return _unpack(cls._unpack, data.read(8)) class String(AbstractType): @@ -108,13 +121,16 @@ def repr(cls, value): class Boolean(AbstractType): + _pack = struct.Struct('>?').pack + _unpack = struct.Struct('>?').unpack + @classmethod def encode(cls, value): - return _pack('>?', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>?', data.read(1)) + return _unpack(cls._unpack, data.read(1)) class Schema(AbstractType):