Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Add baseline leader_epoch support for ListOffsets v4 / FetchRequest v10 #2511

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions kafka/admin/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,7 @@ def _list_consumer_group_offsets_send_request(self, group_id,
Returns:
A message future
"""
version = self._client.api_version(OffsetFetchRequest, max_version=3)
version = self._client.api_version(OffsetFetchRequest, max_version=5)
if version <= 3:
if partitions is None:
if version <= 1:
Expand Down Expand Up @@ -1386,7 +1386,7 @@ def _list_consumer_group_offsets_process_response(self, response):
A dictionary composed of TopicPartition keys and
OffsetAndMetadata values.
"""
if response.API_VERSION <= 3:
if response.API_VERSION <= 5:

# OffsetFetchResponse_v1 lacks a top-level error_code
if response.API_VERSION > 1:
Expand All @@ -1401,13 +1401,18 @@ def _list_consumer_group_offsets_process_response(self, response):
# OffsetAndMetadata values--this is what the Java AdminClient returns
offsets = {}
for topic, partitions in response.topics:
for partition, offset, metadata, error_code in partitions:
for partition_data in partitions:
if response.API_VERSION <= 4:
partition, offset, metadata, error_code = partition_data
leader_epoch = -1
else:
partition, offset, leader_epoch, metadata, error_code = partition_data
error_type = Errors.for_code(error_code)
if error_type is not Errors.NoError:
raise error_type(
"Unable to fetch consumer group offsets for topic {}, partition {}"
.format(topic, partition))
offsets[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata)
offsets[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata, leader_epoch)
else:
raise NotImplementedError(
"Support for OffsetFetchResponse_v{} has not yet been added to KafkaAdminClient."
Expand Down Expand Up @@ -1439,7 +1444,7 @@ def list_consumer_group_offsets(self, group_id, group_coordinator_id=None,

Returns:
dictionary: A dictionary with TopicPartition keys and
OffsetAndMetada values. Partitions that are not specified and for
OffsetAndMetadata values. Partitions that are not specified and for
which the group_id does not have a recorded offset are omitted. An
offset value of `-1` indicates the group_id has no offset for that
TopicPartition. A `-1` can only happen for partitions that are
Expand Down
3 changes: 3 additions & 0 deletions kafka/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ def leader_for_partition(self, partition):
return None
return self._partitions[partition.topic][partition.partition].leader

def leader_epoch_for_partition(self, partition):
return self._partitions[partition.topic][partition.partition].leader_epoch

def partitions_for_broker(self, broker_id):
"""Return TopicPartitions for which the broker is a leader.

Expand Down
131 changes: 75 additions & 56 deletions kafka/consumer/fetcher.py

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions kafka/consumer/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor
from kafka.metrics import MetricConfig, Metrics
from kafka.protocol.list_offsets import OffsetResetStrategy
from kafka.structs import TopicPartition
from kafka.structs import OffsetAndMetadata, TopicPartition
from kafka.version import __version__

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -732,16 +732,16 @@ def position(self, partition):
partition (TopicPartition): Partition to check

Returns:
int: Offset
int: Offset or None
"""
if not isinstance(partition, TopicPartition):
raise TypeError('partition must be a TopicPartition namedtuple')
assert self._subscription.is_assigned(partition), 'Partition is not assigned'
offset = self._subscription.assignment[partition].position
if offset is None:
position = self._subscription.assignment[partition].position
if position is None:
self._update_fetch_positions([partition])
offset = self._subscription.assignment[partition].position
return offset
position = self._subscription.assignment[partition].position
return position.offset if position else None

def highwater(self, partition):
"""Last known highwater offset for a partition.
Expand Down Expand Up @@ -1144,7 +1144,7 @@ def _message_generator_v2(self):
log.debug("Not returning fetched records for partition %s"
" since it is no longer fetchable", tp)
break
self._subscription.assignment[tp].position = record.offset + 1
self._subscription.assignment[tp].position = OffsetAndMetadata(record.offset + 1, '', -1)
yield record

def _message_generator(self):
Expand Down
7 changes: 4 additions & 3 deletions kafka/consumer/subscription_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def all_consumed_offsets(self):
all_consumed = {}
for partition, state in six.iteritems(self.assignment):
if state.has_valid_position:
all_consumed[partition] = OffsetAndMetadata(state.position, '')
all_consumed[partition] = state.position
return all_consumed

def need_offset_reset(self, partition, offset_reset_strategy=None):
Expand Down Expand Up @@ -379,7 +379,7 @@ def __init__(self):
self.paused = False # whether this partition has been paused by the user
self.awaiting_reset = False # whether we are awaiting reset
self.reset_strategy = None # the reset strategy if awaitingReset is set
self._position = None # offset exposed to the user
self._position = None # OffsetAndMetadata exposed to the user
self.highwater = None
self.drop_pending_record_batch = False
# The last message offset hint available from a record batch with
Expand All @@ -388,6 +388,7 @@ def __init__(self):

def _set_position(self, offset):
assert self.has_valid_position, 'Valid position required'
assert isinstance(offset, OffsetAndMetadata)
self._position = offset

def _get_position(self):
Expand All @@ -403,7 +404,7 @@ def await_reset(self, strategy):
self.has_valid_position = False

def seek(self, offset):
self._position = offset
self._position = OffsetAndMetadata(offset, '', -1)
self.awaiting_reset = False
self.reset_strategy = None
self.has_valid_position = True
Expand Down
6 changes: 3 additions & 3 deletions kafka/coordinator/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def _send_offset_commit_request(self, offsets):
topic, [(
partition,
offset.offset,
-1, # leader_epoch
offset.leader_epoch,
offset.metadata
) for partition, offset in six.iteritems(partitions)]
) for topic, partitions in six.iteritems(offset_data)]
Expand Down Expand Up @@ -809,7 +809,6 @@ def _handle_offset_fetch_response(self, future, response):
else:
metadata, error_code = partition_data[2:]
leader_epoch = -1
# TODO: save leader_epoch!
tp = TopicPartition(topic, partition)
error_type = Errors.for_code(error_code)
if error_type is not Errors.NoError:
Expand All @@ -836,7 +835,8 @@ def _handle_offset_fetch_response(self, future, response):
elif offset >= 0:
# record the position with the offset
# (-1 indicates no committed offset to fetch)
offsets[tp] = OffsetAndMetadata(offset, metadata)
# TODO: save leader_epoch
offsets[tp] = OffsetAndMetadata(offset, metadata, -1)
else:
log.debug("Group %s has no committed offset for partition"
" %s", self.group_id, tp)
Expand Down
1 change: 1 addition & 0 deletions kafka/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ class UnknownLeaderEpochError(BrokerResponseError):
message = 'UNKNOWN_LEADER_EPOCH'
description = 'The leader epoch in the request is newer than the epoch on the broker.'
retriable = True
invalid_metadata = True


class UnsupportedCompressionTypeError(BrokerResponseError):
Expand Down
2 changes: 1 addition & 1 deletion kafka/protocol/list_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class ListOffsetsRequest_v4(Request):
('topic', String('utf-8')),
('partitions', Array(
('partition', Int32),
('current_leader_epoch', Int64),
('current_leader_epoch', Int32),
('timestamp', Int64)))))
)
DEFAULTS = {
Expand Down
140 changes: 140 additions & 0 deletions kafka/protocol/offset_for_leader_epoch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from __future__ import absolute_import

from kafka.protocol.api import Request, Response
from kafka.protocol.types import Array, CompactArray, CompactString, Int16, Int32, Int64, Schema, String, TaggedFields


class OffsetForLeaderEpochResponse_v0(Request):
API_KEY = 23
API_VERSION = 0
SCHEMA = Schema(
('topics', Array(
('topic', String('utf-8')),
('partitions', Array(
('error_code', Int16),
('partition', Int32),
('end_offset', Int64))))))


class OffsetForLeaderEpochResponse_v1(Request):
API_KEY = 23
API_VERSION = 1
SCHEMA = Schema(
('topics', Array(
('topic', String('utf-8')),
('partitions', Array(
('error_code', Int16),
('partition', Int32),
('leader_epoch', Int32),
('end_offset', Int64))))))


class OffsetForLeaderEpochResponse_v2(Request):
API_KEY = 23
API_VERSION = 2
SCHEMA = Schema(
('throttle_time_ms', Int32),
('topics', Array(
('topic', String('utf-8')),
('partitions', Array(
('error_code', Int16),
('partition', Int32),
('leader_epoch', Int32),
('end_offset', Int64))))))


class OffsetForLeaderEpochResponse_v3(Request):
API_KEY = 23
API_VERSION = 3
SCHEMA = OffsetForLeaderEpochResponse_v2.SCHEMA


class OffsetForLeaderEpochResponse_v4(Request):
API_KEY = 23
API_VERSION = 4
SCHEMA = Schema(
('throttle_time_ms', Int32),
('topics', CompactArray(
('topic', CompactString('utf-8')),
('partitions', CompactArray(
('error_code', Int16),
('partition', Int32),
('leader_epoch', Int32),
('end_offset', Int64),
('tags', TaggedFields))),
('tags', TaggedFields))),
('tags', TaggedFields))


class OffsetForLeaderEpochRequest_v0(Request):
API_KEY = 23
API_VERSION = 0
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v0
SCHEMA = Schema(
('topics', Array(
('topic', String('utf-8')),
('partitions', Array(
('partition', Int32),
('leader_epoch', Int32))))))


class OffsetForLeaderEpochRequest_v1(Request):
API_KEY = 23
API_VERSION = 1
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v1
SCHEMA = OffsetForLeaderEpochRequest_v0.SCHEMA


class OffsetForLeaderEpochRequest_v2(Request):
API_KEY = 23
API_VERSION = 2
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v2
SCHEMA = Schema(
('topics', Array(
('topic', String('utf-8')),
('partitions', Array(
('partition', Int32),
('current_leader_epoch', Int32),
('leader_epoch', Int32))))))


class OffsetForLeaderEpochRequest_v3(Request):
API_KEY = 23
API_VERSION = 3
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v3
SCHEMA = Schema(
('replica_id', Int32),
('topics', Array(
('topic', String('utf-8')),
('partitions', Array(
('partition', Int32),
('current_leader_epoch', Int32),
('leader_epoch', Int32))))))


class OffsetForLeaderEpochRequest_v4(Request):
API_KEY = 23
API_VERSION = 4
RESPONSE_TYPE = OffsetForLeaderEpochResponse_v4
SCHEMA = Schema(
('replica_id', Int32),
('topics', CompactArray(
('topic', CompactString('utf-8')),
('partitions', CompactArray(
('partition', Int32),
('current_leader_epoch', Int32),
('leader_epoch', Int32),
('tags', TaggedFields))),
('tags', TaggedFields))),
('tags', TaggedFields))

OffsetForLeaderEpochRequest = [
OffsetForLeaderEpochRequest_v0, OffsetForLeaderEpochRequest_v1,
OffsetForLeaderEpochRequest_v2, OffsetForLeaderEpochRequest_v3,
OffsetForLeaderEpochRequest_v4,
]
OffsetForLeaderEpochResponse = [
OffsetForLeaderEpochResponse_v0, OffsetForLeaderEpochResponse_v1,
OffsetForLeaderEpochResponse_v2, OffsetForLeaderEpochResponse_v3,
OffsetForLeaderEpochResponse_v4,
]
4 changes: 4 additions & 0 deletions kafka/record/default_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ def __init__(self, buffer):
def base_offset(self):
return self._header_data[0]

@property
def leader_epoch(self):
return self._header_data[2]

@property
def magic(self):
return self._header_data[3]
Expand Down
7 changes: 4 additions & 3 deletions kafka/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,21 @@
Keyword Arguments:
offset (int): The offset to be committed
metadata (str): Non-null metadata
leader_epoch (int): The last known epoch from the leader / broker
"""
OffsetAndMetadata = namedtuple("OffsetAndMetadata",
# TODO add leaderEpoch: OffsetAndMetadata(offset, leaderEpoch, metadata)
["offset", "metadata"])
["offset", "metadata", "leader_epoch"])


"""An offset and timestamp tuple

Keyword Arguments:
offset (int): An offset
timestamp (int): The timestamp associated to the offset
leader_epoch (int): The last known epoch from the leader / broker
"""
OffsetAndTimestamp = namedtuple("OffsetAndTimestamp",
["offset", "timestamp"])
["offset", "timestamp", "leader_epoch"])

MemberInformation = namedtuple("MemberInformation",
["member_id", "client_id", "client_host", "member_metadata", "member_assignment"])
Expand Down
7 changes: 4 additions & 3 deletions test/test_consumer_integration.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import time

from mock import patch
from mock import patch, ANY
import pytest
from kafka.vendor.six.moves import range

Expand Down Expand Up @@ -258,9 +258,10 @@ def test_kafka_consumer_offsets_search_many_partitions(kafka_consumer, kafka_pro
tp1: send_time
})

leader_epoch = ANY if env_kafka_version() >= (2, 1) else -1
assert offsets == {
tp0: OffsetAndTimestamp(p0msg.offset, send_time),
tp1: OffsetAndTimestamp(p1msg.offset, send_time)
tp0: OffsetAndTimestamp(p0msg.offset, send_time, leader_epoch),
tp1: OffsetAndTimestamp(p1msg.offset, send_time, leader_epoch)
}

offsets = consumer.beginning_offsets([tp0, tp1])
Expand Down
Loading