diff --git a/kafka/client.py b/kafka/client.py index d0e07d072..dd3f8bb86 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -109,7 +109,7 @@ def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): Params ====== payloads: list of object-like entities with a topic and - partition attribute + partition attribute (ProduceRequests) encode_fn: a method to encode the list of payloads to a request body, must accept client_id, correlation_id, and payloads as keyword arguments @@ -269,7 +269,7 @@ def load_metadata_for_topics(self, *topics): def send_produce_request(self, payloads=[], acks=1, timeout=1000, fail_on_error=True, callback=None): """ - Encode and send some ProduceRequests + Encode and send some ProduceRequests -> (topic, patition, messages) ProduceRequests will be grouped by (topic, partition) and then sent to a specific broker. Output is a list of responses in the diff --git a/kafka/protocol.py b/kafka/protocol.py index 58661c7fd..6d81ba0cd 100644 --- a/kafka/protocol.py +++ b/kafka/protocol.py @@ -171,6 +171,9 @@ def _decode_message(cls, data, offset): # Public API # ################## + # used while sending msgs. takes a list of producerequests and + # generate a message (which includes bunch of headers n stuff as well). + # finally returns a byte array of len(msg),msg @classmethod def encode_produce_request(cls, client_id, correlation_id, payloads=None, acks=1, timeout=1000): @@ -196,12 +199,15 @@ def encode_produce_request(cls, client_id, correlation_id, message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.PRODUCE_KEY) + # len(grouped_payloads) = no. of diff topics message += struct.pack('>hii', acks, timeout, len(grouped_payloads)) for topic, topic_payloads in grouped_payloads.items(): + # len(topic_payloads) = no. of partition for that topic message += struct.pack('>h%dsi' % len(topic), len(topic), topic, len(topic_payloads)) + # payload = ["topic", "partition", "messages"] for partition, payload in topic_payloads.items(): msg_set = KafkaProtocol._encode_message_set(payload.messages) message += struct.pack('>ii%ds' % len(msg_set), partition, @@ -209,6 +215,10 @@ def encode_produce_request(cls, client_id, correlation_id, return struct.pack('>i%ds' % len(message), len(message), message) + + # when a producer sends some data, there is a response from the server + # received using conn.recv. This function is used to decode that response. + # it returns a list of ProduceResponse (topic, partition, error, offset) @classmethod def decode_produce_response(cls, data): """ @@ -254,11 +264,13 @@ def encode_fetch_request(cls, client_id, correlation_id, payloads=None, KafkaProtocol.FETCH_KEY) # -1 is the replica id + # len(grouped_payloads) = no. of diff topics message += struct.pack('>iiii', -1, max_wait_time, min_bytes, len(grouped_payloads)) for topic, topic_payloads in grouped_payloads.items(): message += write_short_string(topic) + # len(topic_payloads) = no. of partition for that topic message += struct.pack('>i', len(topic_payloads)) for partition, payload in topic_payloads.items(): message += struct.pack('>iqi', partition, payload.offset, diff --git a/kafka/util.py b/kafka/util.py index a9182346b..a50f708b5 100644 --- a/kafka/util.py +++ b/kafka/util.py @@ -7,6 +7,10 @@ def write_int_string(s): + """ + convert a string into a byte array + returns len(s), s in bytes + """ if s is None: return struct.pack('>i', -1) else: @@ -40,6 +44,10 @@ def read_short_string(data, cur): def read_int_string(data, cur): + """ + reads a byte array 'data' from the cur position. + the first 4 bytes denotes the size of the data followed by the data + """ if len(data) < cur + 4: raise BufferUnderflowError( "Not enough data left to read string len (%d < %d)" % @@ -69,7 +77,7 @@ def relative_unpack(fmt, data, cur): def group_by_topic_and_partition(tuples): out = collections.defaultdict(dict) for t in tuples: - out[t.topic][t.partition] = t + out[t.topic][t.partition] = t #shoudn't this be append? return out