From f47dc10f920c1e104a5bed89c9855150c7cb1a2f Mon Sep 17 00:00:00 2001 From: Jim Lim Date: Thu, 12 Sep 2013 10:54:30 -0700 Subject: [PATCH 1/7] update meta --- setup.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index a24691407..73143d484 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ def run(self): setup( - name="kafka-python", + name="kafka-quixey", version="0.8.1-1", install_requires=["distribute", "tox"], @@ -27,12 +27,14 @@ def run(self): packages=["kafka"], - author="David Arthur", - author_email="mumrah@gmail.com", - url="https://github.com/mumrah/kafka-python", + author="David Arthur, Jim Lim", + author_email="mumrah@gmail.com, jim@quixey.com", + url="https://github.com/quixey/kafka-python", license="Copyright 2012, David Arthur under Apache License, v2.0", description="Pure Python client for Apache Kafka", long_description=""" +I (Jim Lim) am releasing this to pypi under quixey for personal convenience. + This module provides low-level protocol support for Apache Kafka as well as high-level consumer and producer classes. Request batching is supported by the protocol as well as broker-aware request routing. Gzip and Snappy compression From ec97a30aacbce1450b67b3b44ea3897e9fb64617 Mon Sep 17 00:00:00 2001 From: Jim Lim Date: Wed, 18 Sep 2013 15:18:45 -0700 Subject: [PATCH 2/7] update client.py to warn about missing partitions --- kafka/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/client.py b/kafka/client.py index 2ec1f1f5b..77a74ec0f 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -80,7 +80,7 @@ def _load_metadata_for_topics(self, *topics): self.topic_partitions.pop(topic, None) if not partitions: - log.info("Partition is unassigned, delay for 1s and retry") + log.warn("Partition is unassigned, delay for 1s and retry") time.sleep(1) self._load_metadata_for_topics(topic) break From 4ea295409ba7659c84f0ee0890c45bd526e5f326 Mon Sep 17 00:00:00 2001 From: Jim Lim Date: Wed, 18 Sep 2013 15:49:21 -0700 Subject: [PATCH 3/7] use more informative warning --- kafka/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/client.py b/kafka/client.py index 77a74ec0f..45ef33363 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -80,7 +80,7 @@ def _load_metadata_for_topics(self, *topics): self.topic_partitions.pop(topic, None) if not partitions: - log.warn("Partition is unassigned, delay for 1s and retry") + log.warn("Partition is unassigned, delay for 1s and retry. Have you created {} on zookeeper?".format(topic)) time.sleep(1) self._load_metadata_for_topics(topic) break From b477fc21ddc8ca3d7c2f01c9d8f8100fbba5aeb2 Mon Sep 17 00:00:00 2001 From: Jim Lim Date: Tue, 24 Sep 2013 16:17:47 -0700 Subject: [PATCH 4/7] allow a client id to be set --- kafka/client.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 45ef33363..559673c36 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -20,9 +20,10 @@ class KafkaClient(object): CLIENT_ID = "kafka-python" ID_GEN = count() - def __init__(self, host, port, bufsize=4096): + def __init__(self, host, port, bufsize=4096, client_id=CLIENT_ID): # We need one connection to bootstrap - self.bufsize = bufsize + self.bufsize = bufsize + self.client_id = client_id self.conns = { # (host, port) -> KafkaConnection (host, port): KafkaConnection(host, port, bufsize) } @@ -59,7 +60,7 @@ def _load_metadata_for_topics(self, *topics): recurse in the event of a retry. """ requestId = self._next_id() - request = KafkaProtocol.encode_metadata_request(KafkaClient.CLIENT_ID, + request = KafkaProtocol.encode_metadata_request(self.client_id, requestId, topics) response = self._send_broker_unaware_request(requestId, request) @@ -156,7 +157,7 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): for broker, payloads in payloads_by_broker.items(): conn = self._get_conn_for_broker(broker) requestId = self._next_id() - request = encoder_fn(client_id=KafkaClient.CLIENT_ID, + request = encoder_fn(client_id=self.client_id, correlation_id=requestId, payloads=payloads) # Send the request, recv the response From 2aa8df0e08ae7101689ada2f0f4884838fb6262a Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Fri, 27 Sep 2013 14:02:10 -0700 Subject: [PATCH 5/7] make changes to be more fault tolerant: clean up connections, brokers, failed_messages --- kafka/client.py | 30 +++++++++++++++++++++++++----- kafka/common.py | 3 +++ kafka/producer.py | 10 ++++++++-- kafka/util.py | 2 ++ 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/kafka/client.py b/kafka/client.py index 559673c36..7494b9198 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -4,6 +4,7 @@ from itertools import count, cycle import logging from operator import attrgetter +import socket import struct import time import zlib @@ -72,7 +73,7 @@ def _load_metadata_for_topics(self, *topics): log.debug("Broker metadata: %s", brokers) log.debug("Topic metadata: %s", topics) - self.brokers.update(brokers) + self.brokers = brokers self.topics_to_brokers = {} for topic, partitions in topics.items(): @@ -100,6 +101,12 @@ def _next_id(self): "Generate a new correlation id" return KafkaClient.ID_GEN.next() + def _safe_conn_reinit(self, conn): + try: + conn.reinit() + except socket.error, e: + log.error("unsuccessful reinit", e) + def _send_broker_unaware_request(self, requestId, request): """ Attempt to send a broker-agnostic request to one of the available @@ -113,6 +120,7 @@ def _send_broker_unaware_request(self, requestId, request): except Exception, e: log.warning("Could not send request [%r] to server %s, " "trying next server: %s" % (request, conn, e)) + self._safe_conn_reinit(conn) continue return None @@ -153,6 +161,9 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): # Accumulate the responses in a dictionary acc = {} + # keep a list of payloads that were failed to be sent to brokers + failed_payloads = [] + # For each broker, send the list of request payloads for broker, payloads in payloads_by_broker.items(): conn = self._get_conn_for_broker(broker) @@ -161,15 +172,24 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): correlation_id=requestId, payloads=payloads) # Send the request, recv the response - conn.send(requestId, request) - - if decoder_fn is None: + try: + conn.send(requestId, request) + if decoder_fn is None: + continue + response = conn.recv(requestId) + except Exception, e: + log.warning("Could not send request [%s] to server %s: %s" % (request, conn, e)) + failed_payloads += payloads + self._safe_conn_reinit(conn) + self.topics_to_brokers = {} continue - response = conn.recv(requestId) for response in decoder_fn(response): acc[(response.topic, response.partition)] = response + if failed_payloads: + raise FailedPayloadsException(failed_payloads) + # Order the accumulated responses by the original key order return (acc[k] for k in original_keys) if acc else () diff --git a/kafka/common.py b/kafka/common.py index 0a1d3143b..0658d8b8c 100644 --- a/kafka/common.py +++ b/kafka/common.py @@ -69,6 +69,9 @@ class ErrorMapping(object): # Exceptions # ################# +class FailedPayloadsException(Exception): + pass + class BufferUnderflowError(Exception): pass diff --git a/kafka/producer.py b/kafka/producer.py index 06e468dfc..3e1fd3386 100644 --- a/kafka/producer.py +++ b/kafka/producer.py @@ -113,6 +113,7 @@ def _send_upstream(self, queue): self.client.send_produce_request(reqs, acks=self.req_acks, timeout=self.ack_timeout) except Exception as exp: + self.client._load_metadata_for_topics log.error("Error sending message", exc_info=sys.exc_info()) def send_messages(self, partition, *msg): @@ -126,8 +127,13 @@ def send_messages(self, partition, *msg): else: messages = [create_message(m) for m in msg] req = ProduceRequest(self.topic, partition, messages) - resp = self.client.send_produce_request([req], acks=self.req_acks, - timeout=self.ack_timeout) + try: + resp = self.client.send_produce_request([req], acks=self.req_acks, + timeout=self.ack_timeout) + except Exception as exp: + self.client._load_metadata_for_topics(self.topic) + log.error("Error sending message", exc_info=sys.exc_info()) + raise exp return resp def stop(self, timeout=1): diff --git a/kafka/util.py b/kafka/util.py index d580ad710..c153d629c 100644 --- a/kafka/util.py +++ b/kafka/util.py @@ -3,6 +3,8 @@ import struct from threading import Thread, Event +from common import BufferUnderflowError + def write_int_string(s): if s is None: From 88559a54e3d1f8eff7c32256b96a5ef502938fb3 Mon Sep 17 00:00:00 2001 From: Jim Lim Date: Wed, 2 Oct 2013 13:34:57 -0700 Subject: [PATCH 6/7] actually load metadata --- kafka/producer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka/producer.py b/kafka/producer.py index 3e1fd3386..f7962da8a 100644 --- a/kafka/producer.py +++ b/kafka/producer.py @@ -113,7 +113,7 @@ def _send_upstream(self, queue): self.client.send_produce_request(reqs, acks=self.req_acks, timeout=self.ack_timeout) except Exception as exp: - self.client._load_metadata_for_topics + self.client._load_metadata_for_topics(self.topic) log.error("Error sending message", exc_info=sys.exc_info()) def send_messages(self, partition, *msg): From d6595c5daaeeda81b402600f3440a831d5d97632 Mon Sep 17 00:00:00 2001 From: Jim Lim Date: Mon, 5 Jan 2015 13:51:10 -0800 Subject: [PATCH 7/7] update version in setup.py to q3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8c6299082..8928dd455 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def run(self): setup( name="kafka-quixey", - version="0.9.0-q2", + version="0.9.0-q3", tests_require=["tox", "mock"], cmdclass={"test": Tox},