diff --git a/README.rst b/README.rst index dcade4339..a82573bbf 100644 --- a/README.rst +++ b/README.rst @@ -70,6 +70,11 @@ that expose basic message attributes: topic, partition, offset, key, and value: >>> for msg in consumer: ... assert isinstance(msg.value, dict) +>>> # Access record headers. The returned value is a list of tuples +>>> # with str, bytes for key and value +>>> for msg in consumer: +... print (msg.headers) + >>> # Get consumer metrics >>> metrics = consumer.metrics() @@ -112,6 +117,10 @@ for more details. >>> for i in range(1000): ... producer.send('foobar', b'msg %d' % i) +>>> # Include record headers. The format is list of tuples with string key +>>> # and bytes value. +>>> producer.send('foobar', value=b'c29tZSB2YWx1ZQ==', headers=[('content-encoding', b'base64')]) + >>> # Get producer performance metrics >>> metrics = producer.metrics() diff --git a/benchmarks/README b/benchmarks/README index 369e8b626..531b78940 100644 --- a/benchmarks/README +++ b/benchmarks/README @@ -1,4 +1,4 @@ The `record_batch_*` benchmarks in this section are written using ``perf`` library, created by Viktor Stinner. For more information on how to get reliable results of test runs please consult -http://perf.readthedocs.io/en/latest/run_benchmark.html. +https://perf.readthedocs.io/en/latest/run_benchmark.html. diff --git a/benchmarks/consumer_performance.py b/benchmarks/consumer_performance.py index 3e879ae58..5ffd3f5f6 100755 --- a/benchmarks/consumer_performance.py +++ b/benchmarks/consumer_performance.py @@ -10,6 +10,8 @@ import threading import traceback +from kafka.vendor.six.moves import range + from kafka import KafkaConsumer, KafkaProducer from test.fixtures import KafkaFixture, ZookeeperFixture @@ -64,7 +66,7 @@ def run(args): record = bytes(bytearray(args.record_size)) producer = KafkaProducer(compression_type=args.fixture_compression, **props) - for i in xrange(args.num_records): + for i in range(args.num_records): producer.send(topic=args.topic, value=record) producer.flush() producer.close() diff --git a/benchmarks/producer_performance.py b/benchmarks/producer_performance.py index e9587358e..0c29cbc24 100755 --- a/benchmarks/producer_performance.py +++ b/benchmarks/producer_performance.py @@ -9,6 +9,8 @@ import threading import traceback +from kafka.vendor.six.moves import range + from kafka import KafkaProducer from test.fixtures import KafkaFixture, ZookeeperFixture @@ -77,7 +79,7 @@ def run(args): print('-> OK!') print() - for i in xrange(args.num_records): + for i in range(args.num_records): producer.send(topic=args.topic, value=record) producer.flush() diff --git a/benchmarks/varint_speed.py b/benchmarks/varint_speed.py index 2c5cd620d..624a12a42 100644 --- a/benchmarks/varint_speed.py +++ b/benchmarks/varint_speed.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from __future__ import print_function import perf -import six +from kafka.vendor import six test_data = [ diff --git a/docs/Makefile b/docs/Makefile index 5751f68c6..b27cf7742 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -9,7 +9,7 @@ BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from https://www.sphinx-doc.org/) endif # Internal variables. diff --git a/docs/apidoc/KafkaAdmin.rst b/docs/apidoc/KafkaAdmin.rst new file mode 100644 index 000000000..f8c80ab45 --- /dev/null +++ b/docs/apidoc/KafkaAdmin.rst @@ -0,0 +1,5 @@ +KafkaAdmin +=========== + +.. autoclass:: kafka.admin.KafkaAdmin + :members: diff --git a/docs/apidoc/modules.rst b/docs/apidoc/modules.rst index 947788713..1173cfeed 100644 --- a/docs/apidoc/modules.rst +++ b/docs/apidoc/modules.rst @@ -5,6 +5,7 @@ kafka-python API KafkaConsumer KafkaProducer + KafkaAdmin KafkaClient BrokerConnection ClusterMetadata diff --git a/docs/install.rst b/docs/install.rst index fe740f660..d6473ecd4 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -37,7 +37,7 @@ Optional Snappy install Install Development Libraries ============================= -Download and build Snappy from http://code.google.com/p/snappy/downloads/list +Download and build Snappy from https://google.github.io/snappy/ Ubuntu: @@ -55,9 +55,9 @@ From Source: .. code:: bash - wget http://snappy.googlecode.com/files/snappy-1.0.5.tar.gz - tar xzvf snappy-1.0.5.tar.gz - cd snappy-1.0.5 + wget https://github.com/google/snappy/releases/download/1.1.3/snappy-1.1.3.tar.gz + tar xzvf snappy-1.1.3.tar.gz + cd snappy-1.1.3 ./configure make sudo make install diff --git a/docs/make.bat b/docs/make.bat index 2e9d7dc51..3332a3a1b 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -56,7 +56,7 @@ if errorlevel 9009 ( echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ + echo.https://www.sphinx-doc.org/ exit /b 1 ) diff --git a/docs/tests.rst b/docs/tests.rst index 74642c937..5983475e0 100644 --- a/docs/tests.rst +++ b/docs/tests.rst @@ -23,8 +23,13 @@ fixtures for client / consumer / producer testing. Unit tests ------------------ -To run the tests locally, install tox -- `pip install tox` -See https://tox.readthedocs.io/en/latest/install.html +To run the tests locally, install tox: + +.. code:: bash + + pip install tox + +For more details, see https://tox.readthedocs.io/en/latest/install.html Then simply run tox, optionally setting the python environment. If unset, tox will loop through all environments. @@ -49,8 +54,8 @@ Integration tests .. code:: bash - KAFKA_VERSION=0.10.1.1 tox -e py27 - KAFKA_VERSION=0.8.2.2 tox -e py35 + KAFKA_VERSION=0.8.2.2 tox -e py27 + KAFKA_VERSION=1.0.1 tox -e py36 Integration tests start Kafka and Zookeeper fixtures. This requires downloading @@ -60,25 +65,24 @@ kafka server binaries: ./build_integration.sh -By default, this will install 0.8.2.2, 0.9.0.1, 0.10.1.1, and -0.10.2.1 brokers into the servers/ directory. To install a specific version, - e.g., set `KAFKA_VERSION=0.10.2.1`: +By default, this will install the broker versions listed in build_integration.sh's `ALL_RELEASES` +into the servers/ directory. To install a specific version, set the `KAFKA_VERSION` variable: .. code:: bash - KAFKA_VERSION=0.10.2.1 ./build_integration.sh + KAFKA_VERSION=1.0.1 ./build_integration.sh -Then run the tests against supported Kafka versions, simply set the `KAFKA_VERSION` +Then to run the tests against a specific Kafka version, simply set the `KAFKA_VERSION` env variable to the server build you want to use for testing: .. code:: bash - KAFKA_VERSION=0.10.2.1 tox -e py27 + KAFKA_VERSION=1.0.1 tox -e py36 To test against the kafka source tree, set KAFKA_VERSION=trunk -[optionally set SCALA_VERSION (defaults to 2.10)] +[optionally set SCALA_VERSION (defaults to the value set in `build_integration.sh`)] .. code:: bash - SCALA_VERSION=2.11 KAFKA_VERSION=trunk ./build_integration.sh - KAFKA_VERSION=trunk tox -e py35 + SCALA_VERSION=2.12 KAFKA_VERSION=trunk ./build_integration.sh + KAFKA_VERSION=trunk tox -e py36 diff --git a/kafka/__init__.py b/kafka/__init__.py index f108eff1c..fa50bf61c 100644 --- a/kafka/__init__.py +++ b/kafka/__init__.py @@ -18,6 +18,7 @@ def emit(self, record): logging.getLogger(__name__).addHandler(NullHandler()) +from kafka.admin import KafkaAdmin from kafka.consumer import KafkaConsumer from kafka.consumer.subscription_state import ConsumerRebalanceListener from kafka.producer import KafkaProducer @@ -25,8 +26,8 @@ def emit(self, record): from kafka.protocol import ( create_message, create_gzip_message, create_snappy_message) from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner -from kafka.structs import TopicPartition, OffsetAndMetadata from kafka.serializer import Serializer, Deserializer +from kafka.structs import TopicPartition, OffsetAndMetadata # To be deprecated when KafkaProducer interface is released from kafka.client import SimpleClient @@ -46,9 +47,10 @@ def __init__(self, *args, **kwargs): __all__ = [ + 'KafkaAdmin', 'KafkaConsumer', 'KafkaProducer', 'KafkaClient', 'BrokerConnection', 'SimpleClient', 'SimpleProducer', 'KeyedProducer', 'RoundRobinPartitioner', 'HashedPartitioner', 'create_message', 'create_gzip_message', 'create_snappy_message', - 'SimpleConsumer', 'MultiProcessConsumer', + 'SimpleConsumer', 'MultiProcessConsumer', 'ConsumerRebalanceListener', ] diff --git a/kafka/admin/__init__.py b/kafka/admin/__init__.py new file mode 100644 index 000000000..069bc7c88 --- /dev/null +++ b/kafka/admin/__init__.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import + +from kafka.admin.config_resource import ConfigResource, ConfigResourceType +from kafka.admin.kafka import KafkaAdmin +from kafka.admin.new_topic import NewTopic +from kafka.admin.new_partitions import NewPartitions + +__all__ = [ + 'ConfigResource', 'ConfigResourceType', 'KafkaAdmin', 'NewTopic', 'NewPartitions' +] diff --git a/kafka/admin/config_resource.py b/kafka/admin/config_resource.py new file mode 100644 index 000000000..e3294c9c4 --- /dev/null +++ b/kafka/admin/config_resource.py @@ -0,0 +1,36 @@ +from __future__ import absolute_import + +# enum in stdlib as of py3.4 +try: + from enum import IntEnum # pylint: disable=import-error +except ImportError: + # vendored backport module + from kafka.vendor.enum34 import IntEnum + + +class ConfigResourceType(IntEnum): + """An enumerated type of config resources""" + + BROKER = 4, + TOPIC = 2 + + +class ConfigResource(object): + """A class for specifying config resources. + Arguments: + resource_type (ConfigResourceType): the type of kafka resource + name (string): The name of the kafka resource + configs ({key : value}): A maps of config keys to values. + """ + + def __init__( + self, + resource_type, + name, + configs=None + ): + if not isinstance(resource_type, (ConfigResourceType)): + resource_type = ConfigResourceType[str(resource_type).upper()] # pylint: disable-msg=unsubscriptable-object + self.resource_type = resource_type + self.name = name + self.configs = configs diff --git a/kafka/admin/kafka.py b/kafka/admin/kafka.py new file mode 100644 index 000000000..37a80a70d --- /dev/null +++ b/kafka/admin/kafka.py @@ -0,0 +1,512 @@ +from __future__ import absolute_import + +import copy +import logging +import socket +from kafka.client_async import KafkaClient, selectors +from kafka.errors import ( + KafkaConfigurationError, UnsupportedVersionError, NodeNotReadyError, NotControllerError, KafkaConnectionError) +from kafka.metrics import MetricConfig, Metrics +from kafka.protocol.admin import ( + CreateTopicsRequest, DeleteTopicsRequest, DescribeConfigsRequest, AlterConfigsRequest, CreatePartitionsRequest, + ListGroupsRequest, DescribeGroupsRequest) +from kafka.protocol.metadata import MetadataRequest +from kafka.version import __version__ + +log = logging.getLogger(__name__) + +class KafkaAdmin(object): + """An class for administering the kafka cluster. + + Warning: + This is an unstable interface that was recently added and is subject to + change without warning. In particular, many methods currently return + raw protocol tuples. In future releases, we plan to make these into + nicer, more pythonic objects. Unfortunately, this will likely break + those interfaces. + + The KafkaAdmin class will negotiate for the latest version of each message protocol format supported + by both the kafka-python client library and the kafka broker. Usage of optional fields from protocol + versions that are not supported by the broker will result in UnsupportedVersionError exceptions. + + Use of this class requires a minimum broker version >= 0.10.0.0. + + Keyword Arguments: + bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' + strings) that the consumer should contact to bootstrap initial + cluster metadata. This does not have to be the full node list. + It just needs to have at least one broker that will respond to a + Metadata API Request. Default port is 9092. If no servers are + specified, will default to localhost:9092. + client_id (str): a name for this client. This string is passed in + each request to servers and can be used to identify specific + server-side log entries that correspond to this client. Also + submitted to GroupCoordinator for logging with respect to + consumer group administration. Default: 'kafka-python-{version}' + reconnect_backoff_ms (int): The amount of time in milliseconds to + wait before attempting to reconnect to a given host. + Default: 50. + reconnect_backoff_max_ms (int): The maximum amount of time in + milliseconds to wait when reconnecting to a broker that has + repeatedly failed to connect. If provided, the backoff per host + will increase exponentially for each consecutive connection + failure, up to this maximum. To avoid connection storms, a + randomization factor of 0.2 will be applied to the backoff + resulting in a random range between 20% below and 20% above + the computed value. Default: 1000. + request_timeout_ms (int): Client request timeout in milliseconds. + Default: 30000. + connections_max_idle_ms: Close idle connections after the number of + milliseconds specified by this config. The broker closes idle + connections after connections.max.idle.ms, so this avoids hitting + unexpected socket disconnected errors on the client. + Default: 540000 + retry_backoff_ms (int): Milliseconds to backoff when retrying on + errors. Default: 100. + max_in_flight_requests_per_connection (int): Requests are pipelined + to kafka brokers up to this number of maximum requests per + broker connection. Default: 5. + receive_buffer_bytes (int): The size of the TCP receive buffer + (SO_RCVBUF) to use when reading data. Default: None (relies on + system defaults). Java client defaults to 32768. + send_buffer_bytes (int): The size of the TCP send buffer + (SO_SNDBUF) to use when sending data. Default: None (relies on + system defaults). Java client defaults to 131072. + socket_options (list): List of tuple-arguments to socket.setsockopt + to apply to broker connection sockets. Default: + [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + metadata_max_age_ms (int): The period of time in milliseconds after + which we force a refresh of metadata even if we haven't seen any + partition leadership changes to proactively discover any new + brokers or partitions. Default: 300000 + security_protocol (str): Protocol used to communicate with brokers. + Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. + ssl_context (ssl.SSLContext): Pre-configured SSLContext for wrapping + socket connections. If provided, all other ssl_* configurations + will be ignored. Default: None. + ssl_check_hostname (bool): Flag to configure whether SSL handshake + should verify that the certificate matches the broker's hostname. + Default: True. + ssl_cafile (str): Optional filename of CA file to use in certificate + veriication. Default: None. + ssl_certfile (str): Optional filename of file in PEM format containing + the client certificate, as well as any CA certificates needed to + establish the certificate's authenticity. Default: None. + ssl_keyfile (str): Optional filename containing the client private key. + Default: None. + ssl_password (str): Optional password to be used when loading the + certificate chain. Default: None. + ssl_crlfile (str): Optional filename containing the CRL to check for + certificate expiration. By default, no CRL check is done. When + providing a file, only the leaf certificate will be checked against + this CRL. The CRL can only be checked with Python 3.4+ or 2.7.9+. + Default: None. + api_version (tuple): Specify which Kafka API version to use. If set + to None, KafkaClient will attempt to infer the broker version by + probing various APIs. Example: (0, 10, 2). Default: None + api_version_auto_timeout_ms (int): number of milliseconds to throw a + timeout exception from the constructor when checking the broker + api version. Only applies if api_version is None + selector (selectors.BaseSelector): Provide a specific selector + implementation to use for I/O multiplexing. + Default: selectors.DefaultSelector + metrics (kafka.metrics.Metrics): Optionally provide a metrics + instance for capturing network IO stats. Default: None. + metric_group_prefix (str): Prefix for metric names. Default: '' + sasl_mechanism (str): string picking sasl mechanism when security_protocol + is SASL_PLAINTEXT or SASL_SSL. Currently only PLAIN is supported. + Default: None + sasl_plain_username (str): username for sasl PLAIN authentication. + Default: None + sasl_plain_password (str): password for sasl PLAIN authentication. + Default: None + sasl_kerberos_service_name (str): Service name to include in GSSAPI + sasl mechanism handshake. Default: 'kafka' + + """ + DEFAULT_CONFIG = { + # client configs + 'bootstrap_servers': 'localhost', + 'client_id': 'kafka-python-' + __version__, + 'request_timeout_ms': 30000, + 'connections_max_idle_ms': 9 * 60 * 1000, + 'reconnect_backoff_ms': 50, + 'reconnect_backoff_max_ms': 1000, + 'max_in_flight_requests_per_connection': 5, + 'receive_buffer_bytes': None, + 'send_buffer_bytes': None, + 'socket_options': [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)], + 'sock_chunk_bytes': 4096, # undocumented experimental option + 'sock_chunk_buffer_count': 1000, # undocumented experimental option + 'retry_backoff_ms': 100, + 'metadata_max_age_ms': 300000, + 'security_protocol': 'PLAINTEXT', + 'ssl_context': None, + 'ssl_check_hostname': True, + 'ssl_cafile': None, + 'ssl_certfile': None, + 'ssl_keyfile': None, + 'ssl_password': None, + 'ssl_crlfile': None, + 'api_version': None, + 'api_version_auto_timeout_ms': 2000, + 'selector': selectors.DefaultSelector, + 'sasl_mechanism': None, + 'sasl_plain_username': None, + 'sasl_plain_password': None, + 'sasl_kerberos_service_name': 'kafka', + + # metrics configs + 'metric_reporters' : [], + 'metrics_num_samples': 2, + 'metrics_sample_window_ms': 30000, + } + + def __init__(self, **configs): + log.debug("Starting Kafka administration interface") + extra_configs = set(configs).difference(self.DEFAULT_CONFIG) + if extra_configs: + raise KafkaConfigurationError("Unrecognized configs: %s" % extra_configs) + + self.config = copy.copy(self.DEFAULT_CONFIG) + self.config.update(configs) + + # api_version was previously a str. accept old format for now + if isinstance(self.config['api_version'], str): + deprecated = self.config['api_version'] + if deprecated == 'auto': + self.config['api_version'] = None + else: + self.config['api_version'] = tuple(map(int, deprecated.split('.'))) + log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated', + str(self.config['api_version']), deprecated) + + # Configure metrics + metrics_tags = {'client-id': self.config['client_id']} + metric_config = MetricConfig(samples=self.config['metrics_num_samples'], + time_window_ms=self.config['metrics_sample_window_ms'], + tags=metrics_tags) + reporters = [reporter() for reporter in self.config['metric_reporters']] + self._metrics = Metrics(metric_config, reporters) + + self._client = KafkaClient(metrics=self._metrics, metric_group_prefix='admin', + **self.config) + + # Get auto-discovered version from client if necessary + if self.config['api_version'] is None: + self.config['api_version'] = self._client.config['api_version'] + + self._closed = False + self._refresh_controller_id() + log.debug('Kafka administration interface started') + + def close(self): + """Close the administration connection to the kafka broker""" + if not hasattr(self, '_closed') or self._closed: + log.info('Kafka administration interface already closed') + return + + self._metrics.close() + self._client.close() + self._closed = True + log.debug('Kafka administartion interface has closed') + + def _matching_api_version(self, operation): + """Find matching api version, the lesser of either the latest api version the library supports, or + the max version supported by the broker + + :param operation: An operation array from kafka.protocol + :return: The max matching version number between client and broker + """ + version = min(len(operation) - 1, + self._client.get_api_versions()[operation[0].API_KEY][1]) + if version < self._client.get_api_versions()[operation[0].API_KEY][0]: + # max library version is less than min broker version. Not sure any brokers + # actually set a min version greater than 0 right now, tho. But maybe in the future? + raise UnsupportedVersionError( + "Could not find matching protocol version for {}" + .format(operation.__name__)) + return version + + def _validate_timeout(self, timeout_ms): + """Validate the timeout is set or use the configuration default + + :param timeout_ms: The timeout provided by api call, in milliseconds + :return: The timeout to use for the operation + """ + return timeout_ms or self.config['request_timeout_ms'] + + def _refresh_controller_id(self): + """Determine the kafka cluster controller + """ + response = self._send_request_to_node( + self._client.least_loaded_node(), + MetadataRequest[1]([]) + ) + self._controller_id = response.controller_id + version = self._client.check_version(self._controller_id) + if version < (0, 10, 0): + raise UnsupportedVersionError( + "Kafka Admin interface not supported for cluster controller version {} < 0.10.0.0" + .format(version)) + + def _send_request_to_node(self, node, request): + """Send a kafka protocol message to a specific broker. Will block until the message result is received. + + :param node: The broker id to which to send the message + :param request: The message to send + :return: The kafka protocol response for the message + :exception: The exception if the message could not be sent + """ + while not self._client.ready(node): + # connection to broker not ready, poll until it is or send will fail with NodeNotReadyError + self._client.poll() + future = self._client.send(node, request) + self._client.poll(future=future) + if future.succeeded(): + return future.value + else: + raise future.exception # pylint: disable-msg=raising-bad-type + + def _send(self, request): + """Send a kafka protocol message to the cluster controller. Will block until the message result is received. + + :param request: The message to send + :return The kafka protocol response for the message + :exception NodeNotReadyError: If the controller connection can't be established + """ + remaining_tries = 2 + while remaining_tries > 0: + remaining_tries = remaining_tries - 1 + try: + return self._send_request_to_node(self._controller_id, request) + except (NotControllerError, KafkaConnectionError) as e: + # controller changed? refresh it + self._refresh_controller_id() + raise NodeNotReadyError(self._controller_id) + + @staticmethod + def _convert_new_topic_request(new_topic): + return ( + new_topic.name, + new_topic.num_partitions, + new_topic.replication_factor, + [ + (partition_id, replicas) for partition_id, replicas in new_topic.replica_assignments.items() + ], + [ + (config_key, config_value) for config_key, config_value in new_topic.topic_configs.items() + ] + ) + + def create_topics(self, new_topics, timeout_ms=None, validate_only=None): + """Create new topics in the cluster. + + :param new_topics: Array of NewTopic objects + :param timeout_ms: Milliseconds to wait for new topics to be created before broker returns + :param validate_only: If True, don't actually create new topics. Not supported by all versions. + :return: Appropriate version of CreateTopicResponse class + """ + version = self._matching_api_version(CreateTopicsRequest) + timeout_ms = self._validate_timeout(timeout_ms) + if version == 0: + if validate_only: + raise UnsupportedVersionError( + "validate_only not supported on cluster version {}" + .format(self.config['api_version'])) + request = CreateTopicsRequest[version]( + create_topic_requests = [self._convert_new_topic_request(new_topic) for new_topic in new_topics], + timeout = timeout_ms + ) + elif version <= 2: + validate_only = validate_only or False + request = CreateTopicsRequest[version]( + create_topic_requests = [self._convert_new_topic_request(new_topic) for new_topic in new_topics], + timeout = timeout_ms, + validate_only = validate_only + ) + else: + raise UnsupportedVersionError( + "missing implementation of CreateTopics for library supported version {}" + .format(version) + ) + return self._send(request) + + def delete_topics(self, topics, timeout_ms=None): + """Delete topics from the cluster + + :param topics: Array of topic name strings + :param timeout_ms: Milliseconds to wait for topics to be deleted before broker returns + :return: Appropriate version of DeleteTopicsResponse class + """ + version = self._matching_api_version(DeleteTopicsRequest) + timeout_ms = self._validate_timeout(timeout_ms) + if version <= 1: + request = DeleteTopicsRequest[version]( + topics = topics, + timeout = timeout_ms + ) + else: + raise UnsupportedVersionError( + "missing implementation of DeleteTopics for library supported version {}" + .format(version)) + return self._send(request) + + # list topics functionality is in ClusterMetadata + + # describe topics functionality is in ClusterMetadata + + # describe cluster functionality is in ClusterMetadata + + # describe_acls protocol not implemented + + # create_acls protocol not implemented + + # delete_acls protocol not implemented + + @staticmethod + def _convert_describe_config_resource_request(config_resource): + return ( + config_resource.resource_type, + config_resource.name, + [ + config_key for config_key, config_value in config_resource.configs.items() + ] if config_resource.configs else None + ) + + def describe_configs(self, config_resources, include_synonyms=None): + """Fetch configuration parameters for one or more kafka resources. + + :param config_resources: An array of ConfigResource objects. + Any keys in ConfigResource.configs dict will be used to filter the result. The configs dict should be None + to get all values. An empty dict will get zero values (as per kafka protocol). + :param include_synonyms: If True, return synonyms in response. Not supported by all versions. + :return: Appropriate version of DescribeConfigsResponse class + """ + version = self._matching_api_version(DescribeConfigsRequest) + if version == 0: + if include_synonyms: + raise UnsupportedVersionError( + "include_synonyms not supported on cluster version {}" + .format(self.config['api_version'])) + request = DescribeConfigsRequest[version]( + resources = [self._convert_describe_config_resource_request(config_resource) for config_resource in config_resources] + ) + elif version <= 1: + include_synonyms = include_synonyms or False + request = DescribeConfigsRequest[version]( + resources = [self._convert_describe_config_resource_request(config_resource) for config_resource in config_resources], + include_synonyms = include_synonyms + ) + else: + raise UnsupportedVersionError( + "missing implementation of DescribeConfigs for library supported version {}" + .format(version)) + return self._send(request) + + @staticmethod + def _convert_alter_config_resource_request(config_resource): + return ( + config_resource.resource_type, + config_resource.name, + [ + (config_key, config_value) for config_key, config_value in config_resource.configs.items() + ] + ) + + def alter_configs(self, config_resources): + """Alter configuration parameters of one or more kafka resources. + + :param config_resources: An array of ConfigResource objects. + :return: Appropriate version of AlterConfigsResponse class + """ + version = self._matching_api_version(AlterConfigsRequest) + if version == 0: + request = AlterConfigsRequest[version]( + resources = [self._convert_alter_config_resource_request(config_resource) for config_resource in config_resources] + ) + else: + raise UnsupportedVersionError( + "missing implementation of AlterConfigs for library supported version {}" + .format(version)) + return self._send(request) + + # alter replica logs dir protocol not implemented + + # describe log dirs protocol not implemented + + @staticmethod + def _convert_create_partitions_request(topic_name, new_partitions): + return ( + topic_name, + ( + new_partitions.total_count, + new_partitions.new_assignments + ) + ) + + def create_partitions(self, topic_partitions, timeout_ms=None, validate_only=None): + """Create additional partitions for an existing topic. + + :param topic_partitions: A map of topic name strings to NewPartition objects + :param timeout_ms: Milliseconds to wait for new partitions to be created before broker returns + :param validate_only: If True, don't actually create new partitions. + :return: Appropriate version of CreatePartitionsResponse class + """ + version = self._matching_api_version(CreatePartitionsRequest) + timeout_ms = self._validate_timeout(timeout_ms) + validate_only = validate_only or False + if version == 0: + request = CreatePartitionsRequest[version]( + topic_partitions = [self._convert_create_partitions_request(topic_name, new_partitions) for topic_name, new_partitions in topic_partitions.items()], + timeout = timeout_ms, + validate_only = validate_only + ) + else: + raise UnsupportedVersionError( + "missing implementation of CreatePartitions for library supported version {}" + .format(version)) + return self._send(request) + + # delete records protocol not implemented + + # create delegation token protocol not implemented + + # renew delegation token protocol not implemented + + # expire delegation_token protocol not implemented + + # describe delegation_token protocol not implemented + + def describe_consumer_groups(self, group_ids): + """Describe a set of consumer groups. + + :param group_ids: A list of consumer group id names + :return: Appropriate version of DescribeGroupsResponse class + """ + version = self._matching_api_version(DescribeGroupsRequest) + if version <= 1: + request = DescribeGroupsRequest[version]( + groups = group_ids + ) + else: + raise UnsupportedVersionError( + "missing implementation of DescribeGroups for library supported version {}" + .format(version)) + return self._send(request) + + def list_consumer_groups(self): + """List all consumer groups known to the cluster. + + :return: Appropriate version of ListGroupsResponse class + """ + version = self._matching_api_version(ListGroupsRequest) + if version <= 1: + request = ListGroupsRequest[version]() + else: + raise UnsupportedVersionError( + "missing implementation of ListGroups for library supported version {}" + .format(version)) + return self._send(request) + + # delete groups protocol not implemented diff --git a/kafka/admin/new_partitions.py b/kafka/admin/new_partitions.py new file mode 100644 index 000000000..429b2e190 --- /dev/null +++ b/kafka/admin/new_partitions.py @@ -0,0 +1,19 @@ +from __future__ import absolute_import + + +class NewPartitions(object): + """A class for new partition creation on existing topics. Note that the length of new_assignments, if specified, + must be the difference between the new total number of partitions and the existing number of partitions. + Arguments: + total_count (int): the total number of partitions that should exist on the topic + new_assignments ([[int]]): an array of arrays of replica assignments for new partitions. + If not set, broker assigns replicas per an internal algorithm. + """ + + def __init__( + self, + total_count, + new_assignments=None + ): + self.total_count = total_count + self.new_assignments = new_assignments diff --git a/kafka/admin/new_topic.py b/kafka/admin/new_topic.py new file mode 100644 index 000000000..645ac383a --- /dev/null +++ b/kafka/admin/new_topic.py @@ -0,0 +1,34 @@ +from __future__ import absolute_import + +from kafka.errors import IllegalArgumentError + + +class NewTopic(object): + """ A class for new topic creation + Arguments: + name (string): name of the topic + num_partitions (int): number of partitions + or -1 if replica_assignment has been specified + replication_factor (int): replication factor or -1 if + replica assignment is specified + replica_assignment (dict of int: [int]): A mapping containing + partition id and replicas to assign to it. + topic_configs (dict of str: str): A mapping of config key + and value for the topic. + """ + + def __init__( + self, + name, + num_partitions, + replication_factor, + replica_assignments=None, + topic_configs=None, + ): + if not (num_partitions == -1 or replication_factor == -1) ^ (replica_assignments is None): + raise IllegalArgumentError('either num_partitions/replication_factor or replica_assignment must be specified') + self.name = name + self.num_partitions = num_partitions + self.replication_factor = replication_factor + self.replica_assignments = replica_assignments or {} + self.topic_configs = topic_configs or {} diff --git a/kafka/client_async.py b/kafka/client_async.py index a9704fafd..ccf1e4b10 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -79,6 +79,11 @@ class KafkaClient(object): the computed value. Default: 1000. request_timeout_ms (int): Client request timeout in milliseconds. Default: 30000. + connections_max_idle_ms: Close idle connections after the number of + milliseconds specified by this config. The broker closes idle + connections after connections.max.idle.ms, so this avoids hitting + unexpected socket disconnected errors on the client. + Default: 540000 retry_backoff_ms (int): Milliseconds to backoff when retrying on errors. Default: 100. max_in_flight_requests_per_connection (int): Requests are pipelined @@ -140,10 +145,13 @@ class KafkaClient(object): Default: None sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers """ DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', + 'bootstrap_topics_filter': set(), 'client_id': 'kafka-python-' + __version__, 'request_timeout_ms': 30000, 'connections_max_idle_ms': 9 * 60 * 1000, @@ -174,6 +182,7 @@ class KafkaClient(object): 'sasl_plain_username': None, 'sasl_plain_password': None, 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None } def __init__(self, **configs): @@ -187,6 +196,7 @@ def __init__(self, **configs): self._metadata_refresh_in_progress = False self._selector = self.config['selector']() self._conns = Dict() # object to support weakrefs + self._api_versions = None self._connecting = set() self._refresh_on_disconnects = True self._last_bootstrap = 0 @@ -231,9 +241,15 @@ def _bootstrap(self, hosts): self._last_bootstrap = time.time() if self.config['api_version'] is None or self.config['api_version'] < (0, 10): - metadata_request = MetadataRequest[0]([]) + if self.config['bootstrap_topics_filter']: + metadata_request = MetadataRequest[0](list(self.config['bootstrap_topics_filter'])) + else: + metadata_request = MetadataRequest[0]([]) else: - metadata_request = MetadataRequest[1](None) + if self.config['bootstrap_topics_filter']: + metadata_request = MetadataRequest[1](list(self.config['bootstrap_topics_filter'])) + else: + metadata_request = MetadataRequest[1](None) for host, port, afi in hosts: log.debug("Attempting to bootstrap via node at %s:%s", host, port) @@ -793,6 +809,17 @@ def refresh_done(val_or_error): # to let us know the selected connection might be usable again. return float('inf') + def get_api_versions(self): + """Return the ApiVersions map, if available. + + Note: A call to check_version must previously have succeeded and returned + version 0.10.0 or later + + Returns: a map of dict mapping {api_key : (min_version, max_version)}, + or None if ApiVersion is not supported by the kafka cluster. + """ + return self._api_versions + def check_version(self, node_id=None, timeout=2, strict=False): """Attempt to guess the version of a Kafka broker. @@ -825,7 +852,11 @@ def check_version(self, node_id=None, timeout=2, strict=False): self._refresh_on_disconnects = False try: remaining = end - time.time() - version = conn.check_version(timeout=remaining, strict=strict) + version = conn.check_version(timeout=remaining, strict=strict, topics=list(self.config['bootstrap_topics_filter'])) + if version >= (0, 10, 0): + # cache the api versions map if it's available (starting + # in 0.10 cluster version) + self._api_versions = conn.get_api_versions() return version except Errors.NodeNotReadyError: # Only raise to user if this is a node-specific request diff --git a/kafka/codec.py b/kafka/codec.py index 4d180ddd3..aa9fc8291 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -6,7 +6,7 @@ import struct from kafka.vendor import six -from kafka.vendor.six.moves import xrange # pylint: disable=import-error +from kafka.vendor.six.moves import range _XERIAL_V1_HEADER = (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1) _XERIAL_V1_FORMAT = 'bccccccBii' @@ -150,7 +150,7 @@ def snappy_encode(payload, xerial_compatible=True, xerial_blocksize=32*1024): chunker = lambda payload, i, size: memoryview(payload)[i:size+i].tobytes() for chunk in (chunker(payload, i, xerial_blocksize) - for i in xrange(0, len(payload), xerial_blocksize)): + for i in range(0, len(payload), xerial_blocksize)): block = snappy.compress(chunk) block_size = len(block) diff --git a/kafka/conn.py b/kafka/conn.py index a2d5ee6cc..5ec97575f 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -176,6 +176,8 @@ class BrokerConnection(object): Default: None sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers """ DEFAULT_CONFIG = { @@ -206,7 +208,8 @@ class BrokerConnection(object): 'sasl_mechanism': 'PLAIN', 'sasl_plain_username': None, 'sasl_plain_password': None, - 'sasl_kerberos_service_name': 'kafka' + 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None } SECURITY_PROTOCOLS = ('PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL') SASL_MECHANISMS = ('PLAIN', 'GSSAPI') @@ -567,7 +570,8 @@ def _try_authenticate_plain(self, future): return future.success(True) def _try_authenticate_gssapi(self, future): - auth_id = self.config['sasl_kerberos_service_name'] + '@' + self.host + kerberos_damin_name = self.config['sasl_kerberos_domain_name'] or self.host + auth_id = self.config['sasl_kerberos_service_name'] + '@' + kerberos_damin_name gssapi_name = gssapi.Name( auth_id, name_type=gssapi.NameType.hostbased_service @@ -869,6 +873,16 @@ def _handle_api_version_response(self, response): ]) return self._api_versions + def get_api_versions(self): + version = self.check_version() + if version < (0, 10, 0): + raise Errors.UnsupportedVersionError( + "ApiVersion not supported by cluster version {} < 0.10.0" + .format(version)) + # _api_versions is set as a side effect of check_versions() on a cluster + # that supports 0.10.0 or later + return self._api_versions; + def _infer_broker_version_from_api_versions(self, api_versions): # The logic here is to check the list of supported request versions # in reverse order. As soon as we find one that works, return it @@ -892,7 +906,7 @@ def _infer_broker_version_from_api_versions(self, api_versions): # so if all else fails, choose that return (0, 10, 0) - def check_version(self, timeout=2, strict=False): + def check_version(self, timeout=2, strict=False, topics=[]): """Attempt to guess the broker version. Note: This is a blocking call. @@ -925,7 +939,7 @@ def check_version(self, timeout=2, strict=False): ((0, 9), ListGroupsRequest[0]()), ((0, 8, 2), GroupCoordinatorRequest[0]('kafka-python-default-group')), ((0, 8, 1), OffsetFetchRequest[0]('kafka-python-default-group', [])), - ((0, 8, 0), MetadataRequest[0]([])), + ((0, 8, 0), MetadataRequest[0](topics)), ] for version, request in test_cases: @@ -941,7 +955,7 @@ def check_version(self, timeout=2, strict=False): # the attempt to write to a disconnected socket should # immediately fail and allow us to infer that the prior # request was unrecognized - mr = self.send(MetadataRequest[0]([])) + mr = self.send(MetadataRequest[0](topics)) selector = self.config['selector']() selector.register(self._sock, selectors.EVENT_READ) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 6ec1b71ed..7d58b7caa 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -29,7 +29,7 @@ ConsumerRecord = collections.namedtuple("ConsumerRecord", ["topic", "partition", "offset", "timestamp", "timestamp_type", - "key", "value", "checksum", "serialized_key_size", "serialized_value_size"]) + "key", "value", "headers", "checksum", "serialized_key_size", "serialized_value_size", "serialized_header_size"]) CompletedFetch = collections.namedtuple("CompletedFetch", @@ -456,10 +456,12 @@ def _unpack_message_set(self, tp, records): value = self._deserialize( self.config['value_deserializer'], tp.topic, record.value) + headers = record.headers + header_size = sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1 yield ConsumerRecord( tp.topic, tp.partition, record.offset, record.timestamp, - record.timestamp_type, key, value, record.checksum, - key_size, value_size) + record.timestamp_type, key, value, headers, record.checksum, + key_size, value_size, header_size) batch = records.next_batch() diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 9abf15e9b..279cce033 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -212,6 +212,11 @@ class KafkaConsumer(six.Iterator): api_version_auto_timeout_ms (int): number of milliseconds to throw a timeout exception from the constructor when checking the broker api version. Only applies if api_version set to 'auto' + connections_max_idle_ms: Close idle connections after the number of + milliseconds specified by this config. The broker closes idle + connections after connections.max.idle.ms, so this avoids hitting + unexpected socket disconnected errors on the client. + Default: 540000 metric_reporters (list): A list of classes to use as metrics reporters. Implementing the AbstractMetricsReporter interface allows plugging in classes that will be notified of new metric creation. Default: [] @@ -235,6 +240,8 @@ class KafkaConsumer(six.Iterator): Default: None sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers Note: Configuration parameters are described in more detail at @@ -293,7 +300,8 @@ class KafkaConsumer(six.Iterator): 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, - 'sasl_kerberos_service_name': 'kafka' + 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None } DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000 diff --git a/kafka/consumer/multiprocess.py b/kafka/consumer/multiprocess.py index 1da4a3353..758bb92f8 100644 --- a/kafka/consumer/multiprocess.py +++ b/kafka/consumer/multiprocess.py @@ -8,7 +8,7 @@ from kafka.vendor.six.moves import queue # pylint: disable=import-error -from kafka.common import KafkaError +from kafka.errors import KafkaError from kafka.consumer.base import ( Consumer, AUTO_COMMIT_MSG_COUNT, AUTO_COMMIT_INTERVAL, @@ -92,7 +92,7 @@ def _mp_consume(client, group, topic, message_queue, size, events, **consumer_op except KafkaError as e: # Retry with exponential backoff - log.error("Problem communicating with Kafka (%s), retrying in %d seconds..." % (e, interval)) + log.exception("Problem communicating with Kafka, retrying in %d seconds...", interval) time.sleep(interval) interval = interval*2 if interval*2 < MAX_BACKOFF_SECONDS else MAX_BACKOFF_SECONDS diff --git a/kafka/consumer/simple.py b/kafka/consumer/simple.py index c0c1b1ed3..b60a5865b 100644 --- a/kafka/consumer/simple.py +++ b/kafka/consumer/simple.py @@ -24,13 +24,13 @@ ITER_TIMEOUT_SECONDS, NO_MESSAGES_WAIT_TIME_SECONDS ) -from kafka.common import ( - FetchRequestPayload, KafkaError, OffsetRequestPayload, - ConsumerFetchSizeTooSmall, +from kafka.errors import ( + KafkaError, ConsumerFetchSizeTooSmall, UnknownTopicOrPartitionError, NotLeaderForPartitionError, OffsetOutOfRangeError, FailedPayloadsError, check_error ) from kafka.protocol.message import PartialMessage +from kafka.structs import FetchRequestPayload, OffsetRequestPayload log = logging.getLogger(__name__) diff --git a/kafka/coordinator/assignors/roundrobin.py b/kafka/coordinator/assignors/roundrobin.py index a8310338c..2d24a5c8b 100644 --- a/kafka/coordinator/assignors/roundrobin.py +++ b/kafka/coordinator/assignors/roundrobin.py @@ -7,8 +7,8 @@ from kafka.vendor import six from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor -from kafka.common import TopicPartition from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment +from kafka.structs import TopicPartition log = logging.getLogger(__name__) diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index cb1de0d2e..647a6b585 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -11,7 +11,7 @@ from kafka.coordinator.assignors.range import RangePartitionAssignor from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor from kafka.coordinator.protocol import ConsumerProtocol -from kafka import errors as Errors +import kafka.errors as Errors from kafka.future import Future from kafka.metrics import AnonMeasurable from kafka.metrics.stats import Avg, Count, Max, Rate @@ -441,10 +441,13 @@ def commit_offsets_async(self, offsets, callback=None): response will be either an Exception or a OffsetCommitResponse struct. This callback can be used to trigger custom actions when a commit request completes. + + Returns: + kafka.future.Future """ self._invoke_completed_offset_commit_callbacks() if not self.coordinator_unknown(): - self._do_commit_offsets_async(offsets, callback) + future = self._do_commit_offsets_async(offsets, callback) else: # we don't know the current coordinator, so try to find it and then # send the commit or fail (we don't want recursive retries which can @@ -464,6 +467,8 @@ def commit_offsets_async(self, offsets, callback=None): # through delayed task execution. self._client.poll(timeout_ms=0) # no wakeup if we add that feature + return future + def _do_commit_offsets_async(self, offsets, callback=None): assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API' assert all(map(lambda k: isinstance(k, TopicPartition), offsets)) diff --git a/kafka/errors.py b/kafka/errors.py index 47d228e48..fb9576c3f 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -476,10 +476,6 @@ class ConsumerNoMoreData(KafkaError): pass -class ConsumerTimeout(KafkaError): - pass - - class ProtocolError(KafkaError): pass diff --git a/kafka/metrics/metrics.py b/kafka/metrics/metrics.py index e9c465deb..f2e99edc9 100644 --- a/kafka/metrics/metrics.py +++ b/kafka/metrics/metrics.py @@ -257,3 +257,5 @@ def close(self): """Close this metrics repository.""" for reporter in self._reporters: reporter.close() + + self._metrics.clear() diff --git a/kafka/producer/base.py b/kafka/producer/base.py index c9dd6c3a1..1da74c841 100644 --- a/kafka/producer/base.py +++ b/kafka/producer/base.py @@ -14,13 +14,13 @@ from kafka.vendor import six -from kafka.structs import ( - ProduceRequestPayload, ProduceResponsePayload, TopicPartition, RetryOptions) from kafka.errors import ( kafka_errors, UnsupportedCodecError, FailedPayloadsError, RequestTimedOutError, AsyncProducerQueueFull, UnknownError, RETRY_ERROR_TYPES, RETRY_BACKOFF_ERROR_TYPES, RETRY_REFRESH_ERROR_TYPES) from kafka.protocol import CODEC_NONE, ALL_CODECS, create_message_set +from kafka.structs import ( + ProduceRequestPayload, ProduceResponsePayload, TopicPartition, RetryOptions) log = logging.getLogger('kafka.producer') @@ -83,7 +83,7 @@ def _send_upstream(queue, client, codec, batch_time, batch_size, try: client.reinit() except Exception as e: - log.warn('Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms) + log.warning('Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) else: break @@ -189,12 +189,12 @@ def _handle_error(error_cls, request): # doing backoff before next retry if retry_state['do_backoff'] and retry_options.backoff_ms: - log.warn('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms) + log.warning('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) # refresh topic metadata before next retry if retry_state['do_refresh']: - log.warn('Async producer forcing metadata refresh metadata before retrying') + log.warning('Async producer forcing metadata refresh metadata before retrying') try: client.load_metadata_for_topics() except Exception: diff --git a/kafka/producer/future.py b/kafka/producer/future.py index aa216c4e5..1c5d6d7bf 100644 --- a/kafka/producer/future.py +++ b/kafka/producer/future.py @@ -29,11 +29,11 @@ def wait(self, timeout=None): class FutureRecordMetadata(Future): - def __init__(self, produce_future, relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size): + def __init__(self, produce_future, relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size): super(FutureRecordMetadata, self).__init__() self._produce_future = produce_future # packing args as a tuple is a minor speed optimization - self.args = (relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size) + self.args = (relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size) produce_future.add_callback(self._produce_success) produce_future.add_errback(self.failure) @@ -42,7 +42,7 @@ def _produce_success(self, offset_and_timestamp): # Unpacking from args tuple is minor speed optimization (relative_offset, timestamp_ms, checksum, - serialized_key_size, serialized_value_size) = self.args + serialized_key_size, serialized_value_size, serialized_header_size) = self.args # None is when Broker does not support the API (<0.10) and # -1 is when the broker is configured for CREATE_TIME timestamps @@ -53,7 +53,7 @@ def _produce_success(self, offset_and_timestamp): tp = self._produce_future.topic_partition metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms, checksum, serialized_key_size, - serialized_value_size) + serialized_value_size, serialized_header_size) self.success(metadata) def get(self, timeout=None): @@ -68,4 +68,4 @@ def get(self, timeout=None): RecordMetadata = collections.namedtuple( 'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp', - 'checksum', 'serialized_key_size', 'serialized_value_size']) + 'checksum', 'serialized_key_size', 'serialized_value_size', 'serialized_header_size']) diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index f285ab474..7878c0a57 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -10,18 +10,18 @@ from kafka.vendor import six -from kafka import errors as Errors +import kafka.errors as Errors from kafka.client_async import KafkaClient, selectors from kafka.codec import has_gzip, has_snappy, has_lz4 from kafka.metrics import MetricConfig, Metrics from kafka.partitioner.default import DefaultPartitioner +from kafka.producer.future import FutureRecordMetadata, FutureProduceResult +from kafka.producer.record_accumulator import AtomicInteger, RecordAccumulator +from kafka.producer.sender import Sender from kafka.record.default_records import DefaultRecordBatchBuilder from kafka.record.legacy_records import LegacyRecordBatchBuilder from kafka.serializer import Serializer from kafka.structs import TopicPartition -from kafka.producer.future import FutureRecordMetadata, FutureProduceResult -from kafka.producer.record_accumulator import AtomicInteger, RecordAccumulator -from kafka.producer.sender import Sender log = logging.getLogger(__name__) @@ -51,7 +51,7 @@ class KafkaProducer(object): 'retries' is configured to 0. Enabling retries also opens up the possibility of duplicates (see the documentation on message delivery semantics for details: - http://kafka.apache.org/documentation.html#semantics + https://kafka.apache.org/documentation.html#semantics ). The producer maintains buffers of unsent records for each partition. These @@ -171,6 +171,11 @@ class KafkaProducer(object): will block up to max_block_ms, raising an exception on timeout. In the current implementation, this setting is an approximation. Default: 33554432 (32MB) + connections_max_idle_ms: Close idle connections after the number of + milliseconds specified by this config. The broker closes idle + connections after connections.max.idle.ms, so this avoids hitting + unexpected socket disconnected errors on the client. + Default: 540000 max_block_ms (int): Number of milliseconds to block during :meth:`~kafka.KafkaProducer.send` and :meth:`~kafka.KafkaProducer.partitions_for`. These methods can be @@ -265,6 +270,8 @@ class KafkaProducer(object): Default: None sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' + sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI + sasl mechanism handshake. Default: one of bootstrap servers Note: Configuration parameters are described in more detail at @@ -276,6 +283,7 @@ class KafkaProducer(object): 'key_serializer': None, 'value_serializer': None, 'acks': 1, + 'bootstrap_topics_filter': set(), 'compression_type': None, 'retries': 0, 'batch_size': 16384, @@ -313,7 +321,8 @@ class KafkaProducer(object): 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, - 'sasl_kerberos_service_name': 'kafka' + 'sasl_kerberos_service_name': 'kafka', + 'sasl_kerberos_domain_name': None } _COMPRESSORS = { @@ -504,7 +513,7 @@ def _estimate_size_in_bytes(self, key, value, headers=[]): return LegacyRecordBatchBuilder.estimate_size_in_bytes( magic, self.config['compression_type'], key, value) - def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): + def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None): """Publish a message to a topic. Arguments: @@ -513,7 +522,7 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): serializable to bytes via configured value_serializer. If value is None, key is required and message acts as a 'delete'. See kafka compaction documentation for more details: - http://kafka.apache.org/documentation.html#compaction + https://kafka.apache.org/documentation.html#compaction (compaction requires kafka >= 0.8.1) partition (int, optional): optionally specify a partition. If not set, the partition will be selected using the configured @@ -525,6 +534,8 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer. + headers (optional): a list of header key value pairs. List items + are tuples of str key and bytes value. timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time. @@ -554,13 +565,18 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): partition = self._partition(topic, partition, key, value, key_bytes, value_bytes) - message_size = self._estimate_size_in_bytes(key_bytes, value_bytes) + if headers is None: + headers = [] + assert type(headers) == list + assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers) + + message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers) self._ensure_valid_record_size(message_size) tp = TopicPartition(topic, partition) - log.debug("Sending (key=%r value=%r) to %s", key, value, tp) + log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp) result = self._accumulator.append(tp, timestamp_ms, - key_bytes, value_bytes, + key_bytes, value_bytes, headers, self.config['max_block_ms'], estimated_size=message_size) future, batch_is_full, new_batch_created = result @@ -579,7 +595,8 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): FutureProduceResult(TopicPartition(topic, partition)), -1, None, None, len(key_bytes) if key_bytes is not None else -1, - len(value_bytes) if value_bytes is not None else -1 + len(value_bytes) if value_bytes is not None else -1, + sum(len(h_key.encode("utf-8")) + len(h_value) for h_key, h_value in headers) if headers else -1, ).failure(e) def flush(self, timeout=None): diff --git a/kafka/producer/record_accumulator.py b/kafka/producer/record_accumulator.py index 61f1e0e2a..84b01d1b5 100644 --- a/kafka/producer/record_accumulator.py +++ b/kafka/producer/record_accumulator.py @@ -6,12 +6,12 @@ import threading import time -from kafka import errors as Errors +import kafka.errors as Errors from kafka.producer.buffer import SimpleBufferPool from kafka.producer.future import FutureRecordMetadata, FutureProduceResult -from kafka.structs import TopicPartition from kafka.record.memory_records import MemoryRecordsBuilder from kafka.record.legacy_records import LegacyRecordBatchBuilder +from kafka.structs import TopicPartition log = logging.getLogger(__name__) @@ -55,8 +55,8 @@ def __init__(self, tp, records, buffer): def record_count(self): return self.records.next_offset() - def try_append(self, timestamp_ms, key, value): - metadata = self.records.append(timestamp_ms, key, value) + def try_append(self, timestamp_ms, key, value, headers): + metadata = self.records.append(timestamp_ms, key, value, headers) if metadata is None: return None @@ -65,7 +65,8 @@ def try_append(self, timestamp_ms, key, value): future = FutureRecordMetadata(self.produce_future, metadata.offset, metadata.timestamp, metadata.crc, len(key) if key is not None else -1, - len(value) if value is not None else -1) + len(value) if value is not None else -1, + sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1) return future def done(self, base_offset=None, timestamp_ms=None, exception=None): @@ -196,7 +197,7 @@ def __init__(self, **configs): self.muted = set() self._drain_index = 0 - def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, + def append(self, tp, timestamp_ms, key, value, headers, max_time_to_block_ms, estimated_size=0): """Add a record to the accumulator, return the append result. @@ -209,6 +210,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, timestamp_ms (int): The timestamp of the record (epoch ms) key (bytes): The key for the record value (bytes): The value for the record + headers (List[Tuple[str, bytes]]): The header fields for the record max_time_to_block_ms (int): The maximum time in milliseconds to block for buffer memory to be available @@ -231,7 +233,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, dq = self._batches[tp] if dq: last = dq[-1] - future = last.try_append(timestamp_ms, key, value) + future = last.try_append(timestamp_ms, key, value, headers) if future is not None: batch_is_full = len(dq) > 1 or last.records.is_full() return future, batch_is_full, False @@ -246,7 +248,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, if dq: last = dq[-1] - future = last.try_append(timestamp_ms, key, value) + future = last.try_append(timestamp_ms, key, value, headers) if future is not None: # Somebody else found us a batch, return the one we # waited for! Hopefully this doesn't happen often... @@ -261,7 +263,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms, ) batch = ProducerBatch(tp, records, buf) - future = batch.try_append(timestamp_ms, key, value) + future = batch.try_append(timestamp_ms, key, value, headers) if not future: raise Exception() diff --git a/kafka/producer/simple.py b/kafka/producer/simple.py index 91e0abc4c..e06e65954 100644 --- a/kafka/producer/simple.py +++ b/kafka/producer/simple.py @@ -4,7 +4,7 @@ import logging import random -from kafka.vendor.six.moves import xrange # pylint: disable=import-error +from kafka.vendor.six.moves import range from kafka.producer.base import Producer @@ -39,7 +39,7 @@ def _next_partition(self, topic): # Randomize the initial partition that is returned if self.random_start: num_partitions = len(self.client.get_partition_ids_for_topic(topic)) - for _ in xrange(random.randint(0, num_partitions-1)): + for _ in range(random.randint(0, num_partitions-1)): next(self.partition_cycles[topic]) return next(self.partition_cycles[topic]) diff --git a/kafka/protocol/__init__.py b/kafka/protocol/__init__.py index 050a0854f..8cf564033 100644 --- a/kafka/protocol/__init__.py +++ b/kafka/protocol/__init__.py @@ -44,4 +44,9 @@ 33: 'AlterConfigs', 36: 'SaslAuthenticate', 37: 'CreatePartitions', + 38: 'CreateDelegationToken', + 39: 'RenewDelegationToken', + 40: 'ExpireDelegationToken', + 41: 'DescribeDelegationToken', + 42: 'DeleteGroups', } diff --git a/kafka/protocol/legacy.py b/kafka/protocol/legacy.py index b8f84e717..7dd258032 100644 --- a/kafka/protocol/legacy.py +++ b/kafka/protocol/legacy.py @@ -15,7 +15,6 @@ from kafka.codec import gzip_encode, snappy_encode from kafka.errors import ProtocolError, UnsupportedCodecError -from kafka.structs import ConsumerMetadataResponse from kafka.util import ( crc32, read_short_string, relative_unpack, write_int_string, group_by_topic_and_partition) @@ -322,7 +321,7 @@ def encode_consumer_metadata_request(cls, client_id, correlation_id, payloads): @classmethod def decode_consumer_metadata_response(cls, data): """ - Decode bytes to a ConsumerMetadataResponse + Decode bytes to a kafka.structs.ConsumerMetadataResponse Arguments: data: bytes to decode @@ -331,7 +330,7 @@ def decode_consumer_metadata_response(cls, data): (host, cur) = read_short_string(data, cur) ((port,), cur) = relative_unpack('>i', data, cur) - return ConsumerMetadataResponse(error, nodeId, host, port) + return kafka.structs.ConsumerMetadataResponse(error, nodeId, host, port) @classmethod def encode_offset_commit_request(cls, group, payloads): diff --git a/kafka/protocol/types.py b/kafka/protocol/types.py index 5ccb83ea7..d508b2605 100644 --- a/kafka/protocol/types.py +++ b/kafka/protocol/types.py @@ -1,13 +1,14 @@ from __future__ import absolute_import -from struct import pack, unpack, error +import struct +from struct import error from kafka.protocol.abstract import AbstractType def _pack(f, value): try: - return pack(f, value) + return f(value) except error as e: raise ValueError("Error encountered when attempting to convert value: " "{!r} to struct format: '{}', hit error: {}" @@ -16,7 +17,7 @@ def _pack(f, value): def _unpack(f, data): try: - (value,) = unpack(f, data) + (value,) = f(data) return value except error as e: raise ValueError("Error encountered when attempting to convert value: " @@ -25,43 +26,55 @@ def _unpack(f, data): class Int8(AbstractType): + _pack = struct.Struct('>b').pack + _unpack = struct.Struct('>b').unpack + @classmethod def encode(cls, value): - return _pack('>b', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>b', data.read(1)) + return _unpack(cls._unpack, data.read(1)) class Int16(AbstractType): + _pack = struct.Struct('>h').pack + _unpack = struct.Struct('>h').unpack + @classmethod def encode(cls, value): - return _pack('>h', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>h', data.read(2)) + return _unpack(cls._unpack, data.read(2)) class Int32(AbstractType): + _pack = struct.Struct('>i').pack + _unpack = struct.Struct('>i').unpack + @classmethod def encode(cls, value): - return _pack('>i', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>i', data.read(4)) + return _unpack(cls._unpack, data.read(4)) class Int64(AbstractType): + _pack = struct.Struct('>q').pack + _unpack = struct.Struct('>q').unpack + @classmethod def encode(cls, value): - return _pack('>q', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>q', data.read(8)) + return _unpack(cls._unpack, data.read(8)) class String(AbstractType): @@ -108,13 +121,16 @@ def repr(cls, value): class Boolean(AbstractType): + _pack = struct.Struct('>?').pack + _unpack = struct.Struct('>?').unpack + @classmethod def encode(cls, value): - return _pack('>?', value) + return _pack(cls._pack, value) @classmethod def decode(cls, data): - return _unpack('>?', data.read(1)) + return _unpack(cls._unpack, data.read(1)) class Schema(AbstractType): diff --git a/kafka/record/_crc32c.py b/kafka/record/_crc32c.py index 5704f8238..ecff48f5e 100644 --- a/kafka/record/_crc32c.py +++ b/kafka/record/_crc32c.py @@ -18,9 +18,9 @@ # limitations under the License. # """Implementation of CRC-32C checksumming as in rfc3720 section B.4. -See http://en.wikipedia.org/wiki/Cyclic_redundancy_check for details on CRC-32C +See https://en.wikipedia.org/wiki/Cyclic_redundancy_check for details on CRC-32C This code is a manual python translation of c code generated by -pycrc 0.7.1 (http://www.tty1.net/pycrc/). Command line used: +pycrc 0.7.1 (https://pycrc.org/). Command line used: './pycrc.py --model=crc-32c --generate c --algorithm=table-driven' """ @@ -139,5 +139,7 @@ def crc(data): if __name__ == "__main__": import sys - data = sys.stdin.read() + # TODO remove the pylint disable once pylint fixes + # https://github.com/PyCQA/pylint/issues/2571 + data = sys.stdin.read() # pylint: disable=assignment-from-no-return print(hex(crc(data))) diff --git a/kafka/structs.py b/kafka/structs.py index 62f36dd4c..e15e92ed6 100644 --- a/kafka/structs.py +++ b/kafka/structs.py @@ -93,7 +93,3 @@ # Limit value: int >= 0, 0 means no retries RetryOptions = namedtuple("RetryOptions", ["limit", "backoff_ms", "retry_on_timeouts"]) - - -# Support legacy imports from kafka.common -from kafka.errors import * diff --git a/kafka/util.py b/kafka/util.py index 75538ddb4..9354bd936 100644 --- a/kafka/util.py +++ b/kafka/util.py @@ -134,7 +134,7 @@ def __del__(self): class WeakMethod(object): """ Callable that weakly references a method and the object it is bound to. It - is based on http://stackoverflow.com/a/24287465. + is based on https://stackoverflow.com/a/24287465. Arguments: diff --git a/kafka/vendor/enum34.py b/kafka/vendor/enum34.py new file mode 100644 index 000000000..5f64bd2d8 --- /dev/null +++ b/kafka/vendor/enum34.py @@ -0,0 +1,841 @@ +# pylint: skip-file +# vendored from: +# https://bitbucket.org/stoneleaf/enum34/src/58c4cd7174ca35f164304c8a6f0a4d47b779c2a7/enum/__init__.py?at=1.1.6 + +"""Python Enumerations""" + +import sys as _sys + +__all__ = ['Enum', 'IntEnum', 'unique'] + +version = 1, 1, 6 + +pyver = float('%s.%s' % _sys.version_info[:2]) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + +try: + from collections import OrderedDict +except ImportError: + OrderedDict = None + +try: + basestring +except NameError: + # In Python 2 basestring is the ancestor of both str and unicode + # in Python 3 it's just str, but was missing in 3.1 + basestring = str + +try: + unicode +except NameError: + # In Python 3 unicode no longer exists (it's just str) + unicode = str + +class _RouteClassAttributeToGetattr(object): + """Route attribute access on a class to __getattr__. + + This is a descriptor, used to define attributes that act differently when + accessed through an instance and through a class. Instance access remains + normal, but access to an attribute through a class will be routed to the + class's __getattr__ method; this is done by raising AttributeError. + + """ + def __init__(self, fget=None): + self.fget = fget + + def __get__(self, instance, ownerclass=None): + if instance is None: + raise AttributeError() + return self.fget(instance) + + def __set__(self, instance, value): + raise AttributeError("can't set attribute") + + def __delete__(self, instance): + raise AttributeError("can't delete attribute") + + +def _is_descriptor(obj): + """Returns True if obj is a descriptor, False otherwise.""" + return ( + hasattr(obj, '__get__') or + hasattr(obj, '__set__') or + hasattr(obj, '__delete__')) + + +def _is_dunder(name): + """Returns True if a __dunder__ name, False otherwise.""" + return (name[:2] == name[-2:] == '__' and + name[2:3] != '_' and + name[-3:-2] != '_' and + len(name) > 4) + + +def _is_sunder(name): + """Returns True if a _sunder_ name, False otherwise.""" + return (name[0] == name[-1] == '_' and + name[1:2] != '_' and + name[-2:-1] != '_' and + len(name) > 2) + + +def _make_class_unpicklable(cls): + """Make the given class un-picklable.""" + def _break_on_call_reduce(self, protocol=None): + raise TypeError('%r cannot be pickled' % self) + cls.__reduce_ex__ = _break_on_call_reduce + cls.__module__ = '' + + +class _EnumDict(dict): + """Track enum member order and ensure member names are not reused. + + EnumMeta will use the names found in self._member_names as the + enumeration member names. + + """ + def __init__(self): + super(_EnumDict, self).__init__() + self._member_names = [] + + def __setitem__(self, key, value): + """Changes anything not dundered or not a descriptor. + + If a descriptor is added with the same name as an enum member, the name + is removed from _member_names (this may leave a hole in the numerical + sequence of values). + + If an enum member name is used twice, an error is raised; duplicate + values are not checked for. + + Single underscore (sunder) names are reserved. + + Note: in 3.x __order__ is simply discarded as a not necessary piece + leftover from 2.x + + """ + if pyver >= 3.0 and key in ('_order_', '__order__'): + return + elif key == '__order__': + key = '_order_' + if _is_sunder(key): + if key != '_order_': + raise ValueError('_names_ are reserved for future Enum use') + elif _is_dunder(key): + pass + elif key in self._member_names: + # descriptor overwriting an enum? + raise TypeError('Attempted to reuse key: %r' % key) + elif not _is_descriptor(value): + if key in self: + # enum overwriting a descriptor? + raise TypeError('Key already defined as: %r' % self[key]) + self._member_names.append(key) + super(_EnumDict, self).__setitem__(key, value) + + +# Dummy value for Enum as EnumMeta explicity checks for it, but of course until +# EnumMeta finishes running the first time the Enum class doesn't exist. This +# is also why there are checks in EnumMeta like `if Enum is not None` +Enum = None + + +class EnumMeta(type): + """Metaclass for Enum""" + @classmethod + def __prepare__(metacls, cls, bases): + return _EnumDict() + + def __new__(metacls, cls, bases, classdict): + # an Enum class is final once enumeration items have been defined; it + # cannot be mixed with other types (int, float, etc.) if it has an + # inherited __new__ unless a new __new__ is defined (or the resulting + # class will fail). + if type(classdict) is dict: + original_dict = classdict + classdict = _EnumDict() + for k, v in original_dict.items(): + classdict[k] = v + + member_type, first_enum = metacls._get_mixins_(bases) + __new__, save_new, use_args = metacls._find_new_(classdict, member_type, + first_enum) + # save enum items into separate mapping so they don't get baked into + # the new class + members = dict((k, classdict[k]) for k in classdict._member_names) + for name in classdict._member_names: + del classdict[name] + + # py2 support for definition order + _order_ = classdict.get('_order_') + if _order_ is None: + if pyver < 3.0: + try: + _order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] + except TypeError: + _order_ = [name for name in sorted(members.keys())] + else: + _order_ = classdict._member_names + else: + del classdict['_order_'] + if pyver < 3.0: + _order_ = _order_.replace(',', ' ').split() + aliases = [name for name in members if name not in _order_] + _order_ += aliases + + # check for illegal enum names (any others?) + invalid_names = set(members) & set(['mro']) + if invalid_names: + raise ValueError('Invalid enum member name(s): %s' % ( + ', '.join(invalid_names), )) + + # save attributes from super classes so we know if we can take + # the shortcut of storing members in the class dict + base_attributes = set([a for b in bases for a in b.__dict__]) + # create our new Enum type + enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) + enum_class._member_names_ = [] # names in random order + if OrderedDict is not None: + enum_class._member_map_ = OrderedDict() + else: + enum_class._member_map_ = {} # name->value map + enum_class._member_type_ = member_type + + # Reverse value->name map for hashable values. + enum_class._value2member_map_ = {} + + # instantiate them, checking for duplicates as we go + # we instantiate first instead of checking for duplicates first in case + # a custom __new__ is doing something funky with the values -- such as + # auto-numbering ;) + if __new__ is None: + __new__ = enum_class.__new__ + for member_name in _order_: + value = members[member_name] + if not isinstance(value, tuple): + args = (value, ) + else: + args = value + if member_type is tuple: # special case for tuple enums + args = (args, ) # wrap it one more time + if not use_args or not args: + enum_member = __new__(enum_class) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = value + else: + enum_member = __new__(enum_class, *args) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = member_type(*args) + value = enum_member._value_ + enum_member._name_ = member_name + enum_member.__objclass__ = enum_class + enum_member.__init__(*args) + # If another member with the same value was already defined, the + # new member becomes an alias to the existing one. + for name, canonical_member in enum_class._member_map_.items(): + if canonical_member.value == enum_member._value_: + enum_member = canonical_member + break + else: + # Aliases don't appear in member names (only in __members__). + enum_class._member_names_.append(member_name) + # performance boost for any member that would not shadow + # a DynamicClassAttribute (aka _RouteClassAttributeToGetattr) + if member_name not in base_attributes: + setattr(enum_class, member_name, enum_member) + # now add to _member_map_ + enum_class._member_map_[member_name] = enum_member + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_[value] = enum_member + except TypeError: + pass + + + # If a custom type is mixed into the Enum, and it does not know how + # to pickle itself, pickle.dumps will succeed but pickle.loads will + # fail. Rather than have the error show up later and possibly far + # from the source, sabotage the pickle protocol for this class so + # that pickle.dumps also fails. + # + # However, if the new class implements its own __reduce_ex__, do not + # sabotage -- it's on them to make sure it works correctly. We use + # __reduce_ex__ instead of any of the others as it is preferred by + # pickle over __reduce__, and it handles all pickle protocols. + unpicklable = False + if '__reduce_ex__' not in classdict: + if member_type is not object: + methods = ('__getnewargs_ex__', '__getnewargs__', + '__reduce_ex__', '__reduce__') + if not any(m in member_type.__dict__ for m in methods): + _make_class_unpicklable(enum_class) + unpicklable = True + + + # double check that repr and friends are not the mixin's or various + # things break (such as pickle) + for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): + class_method = getattr(enum_class, name) + obj_method = getattr(member_type, name, None) + enum_method = getattr(first_enum, name, None) + if name not in classdict and class_method is not enum_method: + if name == '__reduce_ex__' and unpicklable: + continue + setattr(enum_class, name, enum_method) + + # method resolution and int's are not playing nice + # Python's less than 2.6 use __cmp__ + + if pyver < 2.6: + + if issubclass(enum_class, int): + setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) + + elif pyver < 3.0: + + if issubclass(enum_class, int): + for method in ( + '__le__', + '__lt__', + '__gt__', + '__ge__', + '__eq__', + '__ne__', + '__hash__', + ): + setattr(enum_class, method, getattr(int, method)) + + # replace any other __new__ with our own (as long as Enum is not None, + # anyway) -- again, this is to support pickle + if Enum is not None: + # if the user defined their own __new__, save it before it gets + # clobbered in case they subclass later + if save_new: + setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) + setattr(enum_class, '__new__', Enum.__dict__['__new__']) + return enum_class + + def __bool__(cls): + """ + classes/types should always be True. + """ + return True + + def __call__(cls, value, names=None, module=None, type=None, start=1): + """Either returns an existing member, or creates a new enum class. + + This method is used both when an enum class is given a value to match + to an enumeration member (i.e. Color(3)) and for the functional API + (i.e. Color = Enum('Color', names='red green blue')). + + When used for the functional API: `module`, if set, will be stored in + the new class' __module__ attribute; `type`, if set, will be mixed in + as the first base class. + + Note: if `module` is not set this routine will attempt to discover the + calling module by walking the frame stack; if this is unsuccessful + the resulting class will not be pickleable. + + """ + if names is None: # simple value lookup + return cls.__new__(cls, value) + # otherwise, functional API: we're creating a new Enum type + return cls._create_(value, names, module=module, type=type, start=start) + + def __contains__(cls, member): + return isinstance(member, cls) and member.name in cls._member_map_ + + def __delattr__(cls, attr): + # nicer error message when someone tries to delete an attribute + # (see issue19025). + if attr in cls._member_map_: + raise AttributeError( + "%s: cannot delete Enum member." % cls.__name__) + super(EnumMeta, cls).__delattr__(attr) + + def __dir__(self): + return (['__class__', '__doc__', '__members__', '__module__'] + + self._member_names_) + + @property + def __members__(cls): + """Returns a mapping of member name->value. + + This mapping lists all enum members, including aliases. Note that this + is a copy of the internal mapping. + + """ + return cls._member_map_.copy() + + def __getattr__(cls, name): + """Return the enum member matching `name` + + We use __getattr__ instead of descriptors or inserting into the enum + class' __dict__ in order to support `name` and `value` being both + properties for enum members (which live in the class' __dict__) and + enum members themselves. + + """ + if _is_dunder(name): + raise AttributeError(name) + try: + return cls._member_map_[name] + except KeyError: + raise AttributeError(name) + + def __getitem__(cls, name): + return cls._member_map_[name] + + def __iter__(cls): + return (cls._member_map_[name] for name in cls._member_names_) + + def __reversed__(cls): + return (cls._member_map_[name] for name in reversed(cls._member_names_)) + + def __len__(cls): + return len(cls._member_names_) + + __nonzero__ = __bool__ + + def __repr__(cls): + return "" % cls.__name__ + + def __setattr__(cls, name, value): + """Block attempts to reassign Enum members. + + A simple assignment to the class namespace only changes one of the + several possible ways to get an Enum member from the Enum class, + resulting in an inconsistent Enumeration. + + """ + member_map = cls.__dict__.get('_member_map_', {}) + if name in member_map: + raise AttributeError('Cannot reassign members.') + super(EnumMeta, cls).__setattr__(name, value) + + def _create_(cls, class_name, names=None, module=None, type=None, start=1): + """Convenience method to create a new Enum class. + + `names` can be: + + * A string containing member names, separated either with spaces or + commas. Values are auto-numbered from 1. + * An iterable of member names. Values are auto-numbered from 1. + * An iterable of (member name, value) pairs. + * A mapping of member name -> value. + + """ + if pyver < 3.0: + # if class_name is unicode, attempt a conversion to ASCII + if isinstance(class_name, unicode): + try: + class_name = class_name.encode('ascii') + except UnicodeEncodeError: + raise TypeError('%r is not representable in ASCII' % class_name) + metacls = cls.__class__ + if type is None: + bases = (cls, ) + else: + bases = (type, cls) + classdict = metacls.__prepare__(class_name, bases) + _order_ = [] + + # special processing needed for names? + if isinstance(names, basestring): + names = names.replace(',', ' ').split() + if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): + names = [(e, i+start) for (i, e) in enumerate(names)] + + # Here, names is either an iterable of (name, value) or a mapping. + item = None # in case names is empty + for item in names: + if isinstance(item, basestring): + member_name, member_value = item, names[item] + else: + member_name, member_value = item + classdict[member_name] = member_value + _order_.append(member_name) + # only set _order_ in classdict if name/value was not from a mapping + if not isinstance(item, basestring): + classdict['_order_'] = ' '.join(_order_) + enum_class = metacls.__new__(metacls, class_name, bases, classdict) + + # TODO: replace the frame hack if a blessed way to know the calling + # module is ever developed + if module is None: + try: + module = _sys._getframe(2).f_globals['__name__'] + except (AttributeError, ValueError): + pass + if module is None: + _make_class_unpicklable(enum_class) + else: + enum_class.__module__ = module + + return enum_class + + @staticmethod + def _get_mixins_(bases): + """Returns the type for creating enum members, and the first inherited + enum class. + + bases: the tuple of bases that was given to __new__ + + """ + if not bases or Enum is None: + return object, Enum + + + # double check that we are not subclassing a class with existing + # enumeration members; while we're at it, see if any other data + # type has been mixed in so we can use the correct __new__ + member_type = first_enum = None + for base in bases: + if (base is not Enum and + issubclass(base, Enum) and + base._member_names_): + raise TypeError("Cannot extend enumerations") + # base is now the last base in bases + if not issubclass(base, Enum): + raise TypeError("new enumerations must be created as " + "`ClassName([mixin_type,] enum_type)`") + + # get correct mix-in type (either mix-in type of Enum subclass, or + # first base if last base is Enum) + if not issubclass(bases[0], Enum): + member_type = bases[0] # first data type + first_enum = bases[-1] # enum type + else: + for base in bases[0].__mro__: + # most common: (IntEnum, int, Enum, object) + # possible: (, , + # , , + # ) + if issubclass(base, Enum): + if first_enum is None: + first_enum = base + else: + if member_type is None: + member_type = base + + return member_type, first_enum + + if pyver < 3.0: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + if __new__: + return None, True, True # __new__, save_new, use_args + + N__new__ = getattr(None, '__new__') + O__new__ = getattr(object, '__new__') + if Enum is None: + E__new__ = N__new__ + else: + E__new__ = Enum.__dict__['__new__'] + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + try: + target = possible.__dict__[method] + except (AttributeError, KeyError): + target = getattr(possible, method, None) + if target not in [ + None, + N__new__, + O__new__, + E__new__, + ]: + if method == '__member_new__': + classdict['__new__'] = target + return None, False, True + if isinstance(target, staticmethod): + target = target.__get__(member_type) + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, False, use_args + else: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + + # should __new__ be saved as __member_new__ later? + save_new = __new__ is not None + + if __new__ is None: + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + target = getattr(possible, method, None) + if target not in ( + None, + None.__new__, + object.__new__, + Enum.__new__, + ): + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, save_new, use_args + + +######################################################## +# In order to support Python 2 and 3 with a single +# codebase we have to create the Enum methods separately +# and then use the `type(name, bases, dict)` method to +# create the class. +######################################################## +temp_enum_dict = {} +temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" + +def __new__(cls, value): + # all enum instances are actually created during class construction + # without calling this method; this method is called by the metaclass' + # __call__ (i.e. Color(3) ), and by pickle + if type(value) is cls: + # For lookups like Color(Color.red) + value = value.value + #return value + # by-value search for a matching enum member + # see if it's in the reverse mapping (for hashable values) + try: + if value in cls._value2member_map_: + return cls._value2member_map_[value] + except TypeError: + # not there, now do long search -- O(n) behavior + for member in cls._member_map_.values(): + if member.value == value: + return member + raise ValueError("%s is not a valid %s" % (value, cls.__name__)) +temp_enum_dict['__new__'] = __new__ +del __new__ + +def __repr__(self): + return "<%s.%s: %r>" % ( + self.__class__.__name__, self._name_, self._value_) +temp_enum_dict['__repr__'] = __repr__ +del __repr__ + +def __str__(self): + return "%s.%s" % (self.__class__.__name__, self._name_) +temp_enum_dict['__str__'] = __str__ +del __str__ + +if pyver >= 3.0: + def __dir__(self): + added_behavior = [ + m + for cls in self.__class__.mro() + for m in cls.__dict__ + if m[0] != '_' and m not in self._member_map_ + ] + return (['__class__', '__doc__', '__module__', ] + added_behavior) + temp_enum_dict['__dir__'] = __dir__ + del __dir__ + +def __format__(self, format_spec): + # mixed-in Enums should use the mixed-in type's __format__, otherwise + # we can get strange results with the Enum name showing up instead of + # the value + + # pure Enum branch + if self._member_type_ is object: + cls = str + val = str(self) + # mix-in branch + else: + cls = self._member_type_ + val = self.value + return cls.__format__(val, format_spec) +temp_enum_dict['__format__'] = __format__ +del __format__ + + +#################################### +# Python's less than 2.6 use __cmp__ + +if pyver < 2.6: + + def __cmp__(self, other): + if type(other) is self.__class__: + if self is other: + return 0 + return -1 + return NotImplemented + raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__cmp__'] = __cmp__ + del __cmp__ + +else: + + def __le__(self, other): + raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__le__'] = __le__ + del __le__ + + def __lt__(self, other): + raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__lt__'] = __lt__ + del __lt__ + + def __ge__(self, other): + raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__ge__'] = __ge__ + del __ge__ + + def __gt__(self, other): + raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__gt__'] = __gt__ + del __gt__ + + +def __eq__(self, other): + if type(other) is self.__class__: + return self is other + return NotImplemented +temp_enum_dict['__eq__'] = __eq__ +del __eq__ + +def __ne__(self, other): + if type(other) is self.__class__: + return self is not other + return NotImplemented +temp_enum_dict['__ne__'] = __ne__ +del __ne__ + +def __hash__(self): + return hash(self._name_) +temp_enum_dict['__hash__'] = __hash__ +del __hash__ + +def __reduce_ex__(self, proto): + return self.__class__, (self._value_, ) +temp_enum_dict['__reduce_ex__'] = __reduce_ex__ +del __reduce_ex__ + +# _RouteClassAttributeToGetattr is used to provide access to the `name` +# and `value` properties of enum members while keeping some measure of +# protection from modification, while still allowing for an enumeration +# to have members named `name` and `value`. This works because enumeration +# members are not set directly on the enum class -- __getattr__ is +# used to look them up. + +@_RouteClassAttributeToGetattr +def name(self): + return self._name_ +temp_enum_dict['name'] = name +del name + +@_RouteClassAttributeToGetattr +def value(self): + return self._value_ +temp_enum_dict['value'] = value +del value + +@classmethod +def _convert(cls, name, module, filter, source=None): + """ + Create a new Enum subclass that replaces a collection of global constants + """ + # convert all constants from source (or module) that pass filter() to + # a new Enum called name, and export the enum and its members back to + # module; + # also, replace the __reduce_ex__ method so unpickling works in + # previous Python versions + module_globals = vars(_sys.modules[module]) + if source: + source = vars(source) + else: + source = module_globals + members = dict((name, value) for name, value in source.items() if filter(name)) + cls = cls(name, members, module=module) + cls.__reduce_ex__ = _reduce_ex_by_name + module_globals.update(cls.__members__) + module_globals[name] = cls + return cls +temp_enum_dict['_convert'] = _convert +del _convert + +Enum = EnumMeta('Enum', (object, ), temp_enum_dict) +del temp_enum_dict + +# Enum has now been created +########################### + +class IntEnum(int, Enum): + """Enum where members are also (and must be) ints""" + +def _reduce_ex_by_name(self, proto): + return self.name + +def unique(enumeration): + """Class decorator that ensures only unique members exist in an enumeration.""" + duplicates = [] + for name, member in enumeration.__members__.items(): + if name != member.name: + duplicates.append((name, member.name)) + if duplicates: + duplicate_names = ', '.join( + ["%s -> %s" % (alias, name) for (alias, name) in duplicates] + ) + raise ValueError('duplicate names found in %r: %s' % + (enumeration, duplicate_names) + ) + return enumeration diff --git a/kafka/vendor/six.py b/kafka/vendor/six.py index a949b9539..3621a0ab4 100644 --- a/kafka/vendor/six.py +++ b/kafka/vendor/six.py @@ -1,7 +1,6 @@ # pylint: skip-file -"""Utilities for writing code that runs on Python 2 and 3""" -# Copyright (c) 2010-2015 Benjamin Peterson +# Copyright (c) 2010-2017 Benjamin Peterson # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,6 +20,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +"""Utilities for writing code that runs on Python 2 and 3""" + from __future__ import absolute_import import functools @@ -30,7 +31,7 @@ import types __author__ = "Benjamin Peterson " -__version__ = "1.10.0" +__version__ = "1.11.0" # Useful for very coarse version differentiation. @@ -71,7 +72,9 @@ def __len__(self): # 64-bit MAXSIZE = int((1 << 63) - 1) - # Don't del it here, cause with gc disabled this "leaks" to garbage + # Don't del it here, cause with gc disabled this "leaks" to garbage. + # Note: This is a kafka-python customization, details at: + # https://github.com/dpkp/kafka-python/pull/979#discussion_r100403389 # del X @@ -244,6 +247,7 @@ class _MovedItems(_LazyModule): MovedAttribute("map", "itertools", "builtins", "imap", "map"), MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("getoutput", "commands", "subprocess"), MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), MovedAttribute("reduce", "__builtin__", "functools"), @@ -265,10 +269,11 @@ class _MovedItems(_LazyModule): MovedModule("html_entities", "htmlentitydefs", "html.entities"), MovedModule("html_parser", "HTMLParser", "html.parser"), MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"), MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), @@ -340,10 +345,12 @@ class Module_six_moves_urllib_parse(_LazyModule): MovedAttribute("quote_plus", "urllib", "urllib.parse"), MovedAttribute("unquote", "urllib", "urllib.parse"), MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes"), MovedAttribute("urlencode", "urllib", "urllib.parse"), MovedAttribute("splitquery", "urllib", "urllib.parse"), MovedAttribute("splittag", "urllib", "urllib.parse"), MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("splitvalue", "urllib", "urllib.parse"), MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), MovedAttribute("uses_params", "urlparse", "urllib.parse"), @@ -419,6 +426,8 @@ class Module_six_moves_urllib_request(_LazyModule): MovedAttribute("URLopener", "urllib", "urllib.request"), MovedAttribute("FancyURLopener", "urllib", "urllib.request"), MovedAttribute("proxy_bypass", "urllib", "urllib.request"), + MovedAttribute("parse_http_list", "urllib2", "urllib.request"), + MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"), ] for attr in _urllib_request_moved_attributes: setattr(Module_six_moves_urllib_request, attr.name, attr) @@ -682,11 +691,15 @@ def assertRegex(self, *args, **kwargs): exec_ = getattr(moves.builtins, "exec") def reraise(tp, value, tb=None): - if value is None: - value = tp() - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None else: def exec_(_code_, _globs_=None, _locs_=None): @@ -702,19 +715,28 @@ def exec_(_code_, _globs_=None, _locs_=None): exec("""exec _code_ in _globs_, _locs_""") exec_("""def reraise(tp, value, tb=None): - raise tp, value, tb + try: + raise tp, value, tb + finally: + tb = None """) if sys.version_info[:2] == (3, 2): exec_("""def raise_from(value, from_value): - if from_value is None: - raise value - raise value from from_value + try: + if from_value is None: + raise value + raise value from from_value + finally: + value = None """) elif sys.version_info[:2] > (3, 2): exec_("""def raise_from(value, from_value): - raise value from from_value + try: + raise value from from_value + finally: + value = None """) else: def raise_from(value, from_value): @@ -805,10 +827,14 @@ def with_metaclass(meta, *bases): # This requires a bit of explanation: the basic idea is to make a dummy # metaclass for one level of class instantiation that replaces itself with # the actual metaclass. - class metaclass(meta): + class metaclass(type): def __new__(cls, name, this_bases, d): return meta(name, bases, d) + + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) return type.__new__(metaclass, 'temporary_class', (), {}) diff --git a/pylint.rc b/pylint.rc index d22e523ec..851275bcc 100644 --- a/pylint.rc +++ b/pylint.rc @@ -1,5 +1,6 @@ [TYPECHECK] ignored-classes=SyncManager,_socketobject +ignored-modules=kafka.vendor.six.moves generated-members=py.* [MESSAGES CONTROL] diff --git a/test/conftest.py b/test/conftest.py index dbc2378d9..a751d9506 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -4,8 +4,8 @@ import pytest -from test.fixtures import KafkaFixture, ZookeeperFixture -from test.testutil import kafka_version, random_string +from test.fixtures import KafkaFixture, ZookeeperFixture, random_string, version as kafka_version + @pytest.fixture(scope="module") def version(): diff --git a/test/fixtures.py b/test/fixtures.py index 493a664a5..76e3071f3 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -12,8 +12,8 @@ import uuid import py -from six.moves import urllib, xrange -from six.moves.urllib.parse import urlparse # pylint: disable=E0611,F0401 +from kafka.vendor.six.moves import urllib, range +from kafka.vendor.six.moves.urllib.parse import urlparse # pylint: disable=E0611,F0401 from kafka import errors, KafkaConsumer, KafkaProducer, SimpleClient from kafka.client_async import KafkaClient @@ -24,7 +24,7 @@ log = logging.getLogger(__name__) def random_string(length): - return "".join(random.choice(string.ascii_letters) for i in xrange(length)) + return "".join(random.choice(string.ascii_letters) for i in range(length)) def version_str_to_list(version_str): return tuple(map(int, version_str.split('.'))) # e.g., (0, 8, 1, 1) @@ -48,7 +48,6 @@ class Fixture(object): os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) kafka_root = os.environ.get("KAFKA_ROOT", os.path.join(project_root, 'servers', kafka_version, "kafka-bin")) - ivy_root = os.environ.get('IVY_ROOT', os.path.expanduser("~/.ivy2/cache")) def __init__(self): self.child = None diff --git a/test/record/test_default_records.py b/test/record/test_default_records.py index 6e2f5e8ac..c3a7b02c8 100644 --- a/test/record/test_default_records.py +++ b/test/record/test_default_records.py @@ -119,8 +119,12 @@ def test_default_batch_builder_validates_arguments(): builder.append( 5, timestamp=9999999, key=b"123", value=None, headers=[]) + # Check record with headers + builder.append( + 6, timestamp=9999999, key=b"234", value=None, headers=[("hkey", b"hval")]) + # in case error handling code fails to fix inner buffer in builder - assert len(builder.build()) == 104 + assert len(builder.build()) == 124 def test_default_correct_metadata_response(): diff --git a/test/record/test_records.py b/test/record/test_records.py index 224989f38..f1b8baa40 100644 --- a/test/record/test_records.py +++ b/test/record/test_records.py @@ -22,6 +22,11 @@ b'\x85\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x01]\xff|\xe7\x9d\x00\x00\x01]' b'\xff|\xe7\x9d\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff' b'\x00\x00\x00\x01\x12\x00\x00\x00\x01\x06123\x00' + # Fourth batch value = "hdr" with header hkey=hval + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00E\x00\x00\x00\x00\x02\\' + b'\xd8\xefR\x00\x00\x00\x00\x00\x00\x00\x00\x01e\x85\xb6\xf3\xc1\x00\x00' + b'\x01e\x85\xb6\xf3\xc1\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff' + b'\xff\xff\x00\x00\x00\x01&\x00\x00\x00\x01\x06hdr\x02\x08hkey\x08hval' ] record_batch_data_v1 = [ @@ -60,8 +65,8 @@ def test_memory_records_v2(): data_bytes = b"".join(record_batch_data_v2) + b"\x00" * 4 records = MemoryRecords(data_bytes) - assert records.size_in_bytes() == 222 - assert records.valid_bytes() == 218 + assert records.size_in_bytes() == 303 + assert records.valid_bytes() == 299 assert records.has_next() is True batch = records.next_batch() @@ -77,6 +82,12 @@ def test_memory_records_v2(): assert records.next_batch() is not None assert records.next_batch() is not None + batch = records.next_batch() + recs = list(batch) + assert len(recs) == 1 + assert recs[0].value == b"hdr" + assert recs[0].headers == [('hkey', b'hval')] + assert records.has_next() is False assert records.next_batch() is None assert records.next_batch() is None diff --git a/test/test_admin.py b/test/test_admin.py new file mode 100644 index 000000000..fd9c54ddd --- /dev/null +++ b/test/test_admin.py @@ -0,0 +1,47 @@ +import pytest + +import kafka.admin +from kafka.errors import IllegalArgumentError + + +def test_config_resource(): + with pytest.raises(KeyError): + bad_resource = kafka.admin.ConfigResource('something', 'foo') + good_resource = kafka.admin.ConfigResource('broker', 'bar') + assert(good_resource.resource_type == kafka.admin.ConfigResourceType.BROKER) + assert(good_resource.name == 'bar') + assert(good_resource.configs is None) + good_resource = kafka.admin.ConfigResource(kafka.admin.ConfigResourceType.TOPIC, 'baz', {'frob' : 'nob'}) + assert(good_resource.resource_type == kafka.admin.ConfigResourceType.TOPIC) + assert(good_resource.name == 'baz') + assert(good_resource.configs == {'frob' : 'nob'}) + + +def test_new_partitions(): + good_partitions = kafka.admin.NewPartitions(6) + assert(good_partitions.total_count == 6) + assert(good_partitions.new_assignments is None) + good_partitions = kafka.admin.NewPartitions(7, [[1, 2, 3]]) + assert(good_partitions.total_count == 7) + assert(good_partitions.new_assignments == [[1, 2, 3]]) + + +def test_new_topic(): + with pytest.raises(IllegalArgumentError): + bad_topic = kafka.admin.NewTopic('foo', -1, -1) + with pytest.raises(IllegalArgumentError): + bad_topic = kafka.admin.NewTopic('foo', 1, -1) + with pytest.raises(IllegalArgumentError): + bad_topic = kafka.admin.NewTopic('foo', 1, 1, {1 : [1, 1, 1]}) + good_topic = kafka.admin.NewTopic('foo', 1, 2) + assert(good_topic.name == 'foo') + assert(good_topic.num_partitions == 1) + assert(good_topic.replication_factor == 2) + assert(good_topic.replica_assignments == {}) + assert(good_topic.topic_configs == {}) + good_topic = kafka.admin.NewTopic('bar', -1, -1, {1 : [1, 2, 3]}, {'key' : 'value'}) + assert(good_topic.name == 'bar') + assert(good_topic.num_partitions == -1) + assert(good_topic.replication_factor == -1) + assert(good_topic.replica_assignments == {1: [1, 2, 3]}) + assert(good_topic.topic_configs == {'key' : 'value'}) diff --git a/test/test_client.py b/test/test_client.py index c53983c94..1c689789b 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -2,7 +2,7 @@ from mock import ANY, MagicMock, patch from operator import itemgetter -import six +from kafka.vendor import six from . import unittest from kafka import SimpleClient diff --git a/test/test_client_async.py b/test/test_client_async.py index eccb56421..09781ac2c 100644 --- a/test/test_client_async.py +++ b/test/test_client_async.py @@ -13,14 +13,13 @@ import pytest from kafka.client_async import KafkaClient, IdleConnectionManager +from kafka.cluster import ClusterMetadata from kafka.conn import ConnectionStates import kafka.errors as Errors from kafka.future import Future from kafka.protocol.metadata import MetadataResponse, MetadataRequest from kafka.protocol.produce import ProduceRequest from kafka.structs import BrokerMetadata -from kafka.cluster import ClusterMetadata -from kafka.future import Future @pytest.fixture diff --git a/test/test_codec.py b/test/test_codec.py index d31fc8674..0fefe6faa 100644 --- a/test/test_codec.py +++ b/test/test_codec.py @@ -4,7 +4,7 @@ import struct import pytest -from six.moves import xrange +from kafka.vendor.six.moves import range from kafka.codec import ( has_snappy, has_gzip, has_lz4, @@ -14,11 +14,11 @@ lz4_encode_old_kafka, lz4_decode_old_kafka, ) -from test.testutil import random_string +from test.fixtures import random_string def test_gzip(): - for i in xrange(1000): + for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = gzip_decode(gzip_encode(b1)) assert b1 == b2 @@ -26,7 +26,7 @@ def test_gzip(): @pytest.mark.skipif(not has_snappy(), reason="Snappy not available") def test_snappy(): - for i in xrange(1000): + for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) assert b1 == b2 @@ -86,7 +86,7 @@ def test_snappy_encode_xerial(): @pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', reason="python-lz4 crashes on old versions of pypy") def test_lz4(): - for i in xrange(1000): + for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = lz4_decode(lz4_encode(b1)) assert len(b1) == len(b2) @@ -96,7 +96,7 @@ def test_lz4(): @pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', reason="python-lz4 crashes on old versions of pypy") def test_lz4_old(): - for i in xrange(1000): + for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = lz4_decode_old_kafka(lz4_encode_old_kafka(b1)) assert len(b1) == len(b2) @@ -106,7 +106,7 @@ def test_lz4_old(): @pytest.mark.skipif(not has_lz4() or platform.python_implementation() == 'PyPy', reason="python-lz4 crashes on old versions of pypy") def test_lz4_incremental(): - for i in xrange(1000): + for i in range(1000): # lz4 max single block size is 4MB # make sure we test with multiple-blocks b1 = random_string(100).encode('utf-8') * 50000 diff --git a/test/test_conn.py b/test/test_conn.py index fbdeeb9e7..27d77beb3 100644 --- a/test/test_conn.py +++ b/test/test_conn.py @@ -13,7 +13,7 @@ from kafka.protocol.metadata import MetadataRequest from kafka.protocol.produce import ProduceRequest -import kafka.common as Errors +import kafka.errors as Errors @pytest.fixture diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index f9a41a46a..01eb39e1d 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -4,7 +4,7 @@ import time import pytest -import six +from kafka.vendor import six from kafka import SimpleClient from kafka.conn import ConnectionStates @@ -13,7 +13,7 @@ from kafka.structs import TopicPartition from test.conftest import version -from test.testutil import random_string +from test.fixtures import random_string def get_connect_str(kafka_broker): diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index e6f140598..9a7790eac 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -6,8 +6,8 @@ import kafka.codec import pytest -from six.moves import xrange -import six +from kafka.vendor.six.moves import range +from kafka.vendor import six from . import unittest from kafka import ( @@ -24,9 +24,9 @@ ) from test.conftest import version -from test.fixtures import ZookeeperFixture, KafkaFixture +from test.fixtures import ZookeeperFixture, KafkaFixture, random_string from test.testutil import ( - KafkaIntegrationTestCase, kafka_versions, random_string, Timer, + KafkaIntegrationTestCase, kafka_versions, Timer, send_messages ) @@ -473,7 +473,7 @@ def test_offset_behavior__resuming_behavior(self): ) # Grab the first 195 messages - output_msgs1 = [ consumer1.get_message().message.value for _ in xrange(195) ] + output_msgs1 = [ consumer1.get_message().message.value for _ in range(195) ] self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 @@ -603,7 +603,7 @@ def test_kafka_consumer__offset_commit_resume(self): # Grab the first 180 messages output_msgs1 = [] - for _ in xrange(180): + for _ in range(180): m = next(consumer1) output_msgs1.append(m) self.assert_message_count(output_msgs1, 180) @@ -619,7 +619,7 @@ def test_kafka_consumer__offset_commit_resume(self): # 181-200 output_msgs2 = [] - for _ in xrange(20): + for _ in range(20): m = next(consumer2) output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) diff --git a/test/test_coordinator.py b/test/test_coordinator.py index 7a2627ea0..4afdcd9ac 100644 --- a/test/test_coordinator.py +++ b/test/test_coordinator.py @@ -5,7 +5,6 @@ import pytest from kafka.client_async import KafkaClient -from kafka.structs import TopicPartition, OffsetAndMetadata from kafka.consumer.subscription_state import ( SubscriptionState, ConsumerRebalanceListener) from kafka.coordinator.assignors.range import RangePartitionAssignor @@ -21,6 +20,7 @@ OffsetCommitRequest, OffsetCommitResponse, OffsetFetchRequest, OffsetFetchResponse) from kafka.protocol.metadata import MetadataResponse +from kafka.structs import TopicPartition, OffsetAndMetadata from kafka.util import WeakMethod @@ -34,7 +34,7 @@ def coordinator(client): def test_init(client, coordinator): - # metadata update on init + # metadata update on init assert client.cluster._need_update is True assert WeakMethod(coordinator._handle_metadata_update) in client.cluster._listeners @@ -542,7 +542,7 @@ def test_send_offset_fetch_request_success(patched_coord, partitions): response = OffsetFetchResponse[0]([('foobar', [(0, 123, b'', 0), (1, 234, b'', 0)])]) _f.success(response) patched_coord._handle_offset_fetch_response.assert_called_with( - future, response) + future, response) @pytest.mark.parametrize('response,error,dead', [ diff --git a/test/test_failover_integration.py b/test/test_failover_integration.py index ad7dcb98b..48021a443 100644 --- a/test/test_failover_integration.py +++ b/test/test_failover_integration.py @@ -9,8 +9,8 @@ from kafka.producer.base import Producer from kafka.structs import TopicPartition -from test.fixtures import ZookeeperFixture, KafkaFixture -from test.testutil import KafkaIntegrationTestCase, random_string +from test.fixtures import ZookeeperFixture, KafkaFixture, random_string +from test.testutil import KafkaIntegrationTestCase log = logging.getLogger(__name__) diff --git a/test/test_fetcher.py b/test/test_fetcher.py index fc031f742..e37a70db5 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -12,16 +12,16 @@ CompletedFetch, ConsumerRecord, Fetcher, NoOffsetForPartitionError ) from kafka.consumer.subscription_state import SubscriptionState +from kafka.future import Future from kafka.metrics import Metrics from kafka.protocol.fetch import FetchRequest, FetchResponse from kafka.protocol.offset import OffsetResponse -from kafka.structs import TopicPartition -from kafka.future import Future from kafka.errors import ( StaleMetadata, LeaderNotAvailableError, NotLeaderForPartitionError, UnknownTopicOrPartitionError, OffsetOutOfRangeError ) from kafka.record.memory_records import MemoryRecordsBuilder, MemoryRecords +from kafka.structs import TopicPartition @pytest.fixture @@ -509,7 +509,7 @@ def test_partition_records_offset(): fetch_offset = 123 tp = TopicPartition('foo', 0) messages = [ConsumerRecord(tp.topic, tp.partition, i, - None, None, 'key', 'value', 'checksum', 0, 0) + None, None, 'key', 'value', [], 'checksum', 0, 0, -1) for i in range(batch_start, batch_end)] records = Fetcher.PartitionRecords(fetch_offset, None, messages) assert len(records) > 0 @@ -534,7 +534,7 @@ def test_partition_records_no_fetch_offset(): fetch_offset = 123 tp = TopicPartition('foo', 0) messages = [ConsumerRecord(tp.topic, tp.partition, i, - None, None, 'key', 'value', 'checksum', 0, 0) + None, None, 'key', 'value', None, 'checksum', 0, 0, -1) for i in range(batch_start, batch_end)] records = Fetcher.PartitionRecords(fetch_offset, None, messages) assert len(records) == 0 @@ -549,7 +549,7 @@ def test_partition_records_compacted_offset(): fetch_offset = 42 tp = TopicPartition('foo', 0) messages = [ConsumerRecord(tp.topic, tp.partition, i, - None, None, 'key', 'value', 'checksum', 0, 0) + None, None, 'key', 'value', None, 'checksum', 0, 0, -1) for i in range(batch_start, batch_end) if i != fetch_offset] records = Fetcher.PartitionRecords(fetch_offset, None, messages) assert len(records) == batch_end - fetch_offset - 1 diff --git a/test/test_partitioner.py b/test/test_partitioner.py index 47470e1bd..3a5264b7e 100644 --- a/test/test_partitioner.py +++ b/test/test_partitioner.py @@ -1,13 +1,14 @@ from __future__ import absolute_import +import pytest + from kafka.partitioner import DefaultPartitioner, Murmur2Partitioner, RoundRobinPartitioner from kafka.partitioner.hashed import murmur2 def test_default_partitioner(): partitioner = DefaultPartitioner() - all_partitions = list(range(100)) - available = all_partitions + all_partitions = available = list(range(100)) # partitioner should return the same partition for the same key p1 = partitioner(b'foo', all_partitions, available) p2 = partitioner(b'foo', all_partitions, available) @@ -23,8 +24,7 @@ def test_default_partitioner(): def test_roundrobin_partitioner(): partitioner = RoundRobinPartitioner() - all_partitions = list(range(100)) - available = all_partitions + all_partitions = available = list(range(100)) # partitioner should cycle between partitions i = 0 max_partition = all_partitions[len(all_partitions) - 1] @@ -53,15 +53,14 @@ def test_roundrobin_partitioner(): i += 1 -def test_murmur2_java_compatibility(): +@pytest.mark.parametrize("bytes_payload,partition_number", [ + (b'', 681), (b'a', 524), (b'ab', 434), (b'abc', 107), (b'123456789', 566), + (b'\x00 ', 742) +]) +def test_murmur2_java_compatibility(bytes_payload, partition_number): p = Murmur2Partitioner(range(1000)) # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner - assert p.partition(b'') == 681 - assert p.partition(b'a') == 524 - assert p.partition(b'ab') == 434 - assert p.partition(b'abc') == 107 - assert p.partition(b'123456789') == 566 - assert p.partition(b'\x00 ') == 742 + assert p.partition(bytes_payload) == partition_number def test_murmur2_not_ascii(): diff --git a/test/test_producer.py b/test/test_producer.py index 09d184f34..16da61898 100644 --- a/test/test_producer.py +++ b/test/test_producer.py @@ -8,7 +8,7 @@ from kafka import KafkaConsumer, KafkaProducer, TopicPartition from kafka.producer.buffer import SimpleBufferPool from test.conftest import version -from test.testutil import random_string +from test.fixtures import random_string def test_buffer_pool(): @@ -91,10 +91,16 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): compression_type=compression) magic = producer._max_usable_produce_magic() + # record headers are supported in 0.11.0 + if version() < (0, 11, 0): + headers = None + else: + headers = [("Header Key", b"Header Value")] + topic = random_string(5) future = producer.send( topic, - value=b"Simple value", key=b"Simple key", timestamp_ms=9999999, + value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, partition=0) record = future.get(timeout=5) assert record is not None @@ -116,6 +122,8 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): assert record.serialized_key_size == 10 assert record.serialized_value_size == 12 + if headers: + assert record.serialized_header_size == 22 # generated timestamp case is skipped for broker 0.9 and below if magic == 0: diff --git a/test/test_producer_integration.py b/test/test_producer_integration.py index 6533cfabb..35ce0d7a5 100644 --- a/test/test_producer_integration.py +++ b/test/test_producer_integration.py @@ -3,7 +3,7 @@ import uuid import pytest -from six.moves import range +from kafka.vendor.six.moves import range from kafka import ( SimpleProducer, KeyedProducer, diff --git a/test/test_producer_legacy.py b/test/test_producer_legacy.py index 6d00116c3..ab80ee707 100644 --- a/test/test_producer_legacy.py +++ b/test/test_producer_legacy.py @@ -16,7 +16,7 @@ from kafka.structs import ( ProduceResponsePayload, RetryOptions, TopicPartition) -from six.moves import queue, xrange +from kafka.vendor.six.moves import queue, range class TestKafkaProducer(unittest.TestCase): @@ -84,7 +84,7 @@ def test_producer_async_queue_overfilled(self, mock): message_list = [message] * (queue_size + 1) producer.send_messages(topic, partition, *message_list) self.assertEqual(producer.queue.qsize(), queue_size) - for _ in xrange(producer.queue.qsize()): + for _ in range(producer.queue.qsize()): producer.queue.get() def test_producer_sync_fail_on_error(self): @@ -253,5 +253,5 @@ def send_side_effect(reqs, *args, **kwargs): self.assertEqual(self.client.send_produce_request.call_count, 5) def tearDown(self): - for _ in xrange(self.queue.qsize()): + for _ in range(self.queue.qsize()): self.queue.get() diff --git a/test/test_protocol.py b/test/test_protocol.py index d96365026..7abcefb46 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -3,7 +3,7 @@ import struct import pytest -import six +from kafka.vendor import six from kafka.protocol.api import RequestHeader from kafka.protocol.commit import GroupCoordinatorRequest diff --git a/test/test_protocol_legacy.py b/test/test_protocol_legacy.py index d705e3a15..1341af003 100644 --- a/test/test_protocol_legacy.py +++ b/test/test_protocol_legacy.py @@ -2,7 +2,7 @@ from contextlib import contextmanager import struct -import six +from kafka.vendor import six from mock import patch, sentinel from . import unittest diff --git a/test/test_substription_state.py b/test/test_subscription_state.py similarity index 100% rename from test/test_substription_state.py rename to test/test_subscription_state.py diff --git a/test/test_util.py b/test/test_util.py index 58e5ab840..a4dbaa5ab 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- import struct -import six +from kafka.vendor import six from . import unittest import kafka.errors -import kafka.util import kafka.structs +import kafka.util class UtilTest(unittest.TestCase): diff --git a/test/testutil.py b/test/testutil.py index 365e47f3b..feb6f6d5f 100644 --- a/test/testutil.py +++ b/test/testutil.py @@ -11,12 +11,15 @@ from . import unittest from kafka import SimpleClient, create_message -from kafka.errors import LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError -from kafka.structs import OffsetRequestPayload, ProduceRequestPayload, \ - NotLeaderForPartitionError, UnknownTopicOrPartitionError, \ - FailedPayloadsError +from kafka.errors import ( + LeaderNotAvailableError, KafkaTimeoutError, InvalidTopicError, + NotLeaderForPartitionError, UnknownTopicOrPartitionError, + FailedPayloadsError +) +from kafka.structs import OffsetRequestPayload, ProduceRequestPayload from test.fixtures import random_string, version_str_to_list, version as kafka_version #pylint: disable=wrong-import-order + def kafka_versions(*versions): def construct_lambda(s): @@ -63,12 +66,6 @@ def wrapper(func, *args, **kwargs): return real_kafka_versions -def get_open_port(): - sock = socket.socket() - sock.bind(("", 0)) - port = sock.getsockname()[1] - sock.close() - return port _MESSAGES = {} def msg(message): diff --git a/tox.ini b/tox.ini index ad95f9374..1760afffc 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,7 @@ log_format = %(created)f %(filename)-23s %(threadName)s %(message)s deps = pytest pytest-cov - py{27,34,35,36,py}: pylint==1.8.2 + py{27,34,35,36,py}: pylint py{27,34,35,36,py}: pytest-pylint pytest-mock mock