From 5b9e0136e6cbfb6c2875117540313d1c1a732db3 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 9 Jan 2025 11:41:44 +0100 Subject: [PATCH 1/3] DRA API: bump maximum size of ReservedFor to 256 The original limit of 32 seemed sufficient for a single GPU on a node. But for shared non-local resources it is too low. For example, a ResourceClaim might be used to allocate an interconnect channel that connects all pods of a workload running on several different nodes, in which case the number of pods can be considerably larger. 256 is high enough for currently planned systems. If we need something even higher in the future, an alternative approach might be needed to avoid scalability problems. Normally, increasing such a limit would have to be done incrementally over two releases. In this case we decided on Slack (https://kubernetes.slack.com/archives/CJUQN3E4T/p1734593174791519) to make an exception and apply this change to current master for 1.33 and backport it to the next 1.32.x patch release for production usage. This breaks downgrades to a 1.32 release without this change if there are ResourceClaims with a number of consumers > 32 in ReservedFor. In practice, this breakage is very unlikely because there are no workloads yet which need so many consumers and such downgrades to a previous patch release are also unlikely. Downgrades to 1.31 already weren't supported when using DRA v1beta1. Kubernetes-commit: a5de75458e223ef44cf133ca623263fc2b871ddc --- resource/v1alpha3/generated.proto | 2 +- resource/v1alpha3/types.go | 6 +++--- resource/v1alpha3/types_swagger_doc_generated.go | 2 +- resource/v1beta1/generated.proto | 2 +- resource/v1beta1/types.go | 6 +++--- resource/v1beta1/types_swagger_doc_generated.go | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/resource/v1alpha3/generated.proto b/resource/v1alpha3/generated.proto index 13be7cbd8..e802a0143 100644 --- a/resource/v1alpha3/generated.proto +++ b/resource/v1alpha3/generated.proto @@ -675,7 +675,7 @@ message ResourceClaimStatus { // which issued it knows that it must put the pod back into the queue, // waiting for the ResourceClaim to become usable again. // - // There can be at most 32 such reservations. This may get increased in + // There can be at most 256 such reservations. This may get increased in // the future, but not reduced. // // +optional diff --git a/resource/v1alpha3/types.go b/resource/v1alpha3/types.go index e3d7fd894..fb4d7041d 100644 --- a/resource/v1alpha3/types.go +++ b/resource/v1alpha3/types.go @@ -687,7 +687,7 @@ type ResourceClaimStatus struct { // which issued it knows that it must put the pod back into the queue, // waiting for the ResourceClaim to become usable again. // - // There can be at most 32 such reservations. This may get increased in + // There can be at most 256 such reservations. This may get increased in // the future, but not reduced. // // +optional @@ -715,9 +715,9 @@ type ResourceClaimStatus struct { Devices []AllocatedDeviceStatus `json:"devices,omitempty" protobuf:"bytes,4,opt,name=devices"` } -// ReservedForMaxSize is the maximum number of entries in +// ResourceClaimReservedForMaxSize is the maximum number of entries in // claim.status.reservedFor. -const ResourceClaimReservedForMaxSize = 32 +const ResourceClaimReservedForMaxSize = 256 // ResourceClaimConsumerReference contains enough information to let you // locate the consumer of a ResourceClaim. The user must be a resource in the same diff --git a/resource/v1alpha3/types_swagger_doc_generated.go b/resource/v1alpha3/types_swagger_doc_generated.go index 1a71d64c1..b41609d11 100644 --- a/resource/v1alpha3/types_swagger_doc_generated.go +++ b/resource/v1alpha3/types_swagger_doc_generated.go @@ -291,7 +291,7 @@ func (ResourceClaimSpec) SwaggerDoc() map[string]string { var map_ResourceClaimStatus = map[string]string{ "": "ResourceClaimStatus tracks whether the resource has been allocated and what the result of that was.", "allocation": "Allocation is set once the claim has been allocated successfully.", - "reservedFor": "ReservedFor indicates which entities are currently allowed to use the claim. A Pod which references a ResourceClaim which is not reserved for that Pod will not be started. A claim that is in use or might be in use because it has been reserved must not get deallocated.\n\nIn a cluster with multiple scheduler instances, two pods might get scheduled concurrently by different schedulers. When they reference the same ResourceClaim which already has reached its maximum number of consumers, only one pod can be scheduled.\n\nBoth schedulers try to add their pod to the claim.status.reservedFor field, but only the update that reaches the API server first gets stored. The other one fails with an error and the scheduler which issued it knows that it must put the pod back into the queue, waiting for the ResourceClaim to become usable again.\n\nThere can be at most 32 such reservations. This may get increased in the future, but not reduced.", + "reservedFor": "ReservedFor indicates which entities are currently allowed to use the claim. A Pod which references a ResourceClaim which is not reserved for that Pod will not be started. A claim that is in use or might be in use because it has been reserved must not get deallocated.\n\nIn a cluster with multiple scheduler instances, two pods might get scheduled concurrently by different schedulers. When they reference the same ResourceClaim which already has reached its maximum number of consumers, only one pod can be scheduled.\n\nBoth schedulers try to add their pod to the claim.status.reservedFor field, but only the update that reaches the API server first gets stored. The other one fails with an error and the scheduler which issued it knows that it must put the pod back into the queue, waiting for the ResourceClaim to become usable again.\n\nThere can be at most 256 such reservations. This may get increased in the future, but not reduced.", "devices": "Devices contains the status of each device allocated for this claim, as reported by the driver. This can include driver-specific information. Entries are owned by their respective drivers.", } diff --git a/resource/v1beta1/generated.proto b/resource/v1beta1/generated.proto index 6d525d5b8..4ea13e033 100644 --- a/resource/v1beta1/generated.proto +++ b/resource/v1beta1/generated.proto @@ -683,7 +683,7 @@ message ResourceClaimStatus { // which issued it knows that it must put the pod back into the queue, // waiting for the ResourceClaim to become usable again. // - // There can be at most 32 such reservations. This may get increased in + // There can be at most 256 such reservations. This may get increased in // the future, but not reduced. // // +optional diff --git a/resource/v1beta1/types.go b/resource/v1beta1/types.go index a7f1ee7b5..ca79c5a66 100644 --- a/resource/v1beta1/types.go +++ b/resource/v1beta1/types.go @@ -695,7 +695,7 @@ type ResourceClaimStatus struct { // which issued it knows that it must put the pod back into the queue, // waiting for the ResourceClaim to become usable again. // - // There can be at most 32 such reservations. This may get increased in + // There can be at most 256 such reservations. This may get increased in // the future, but not reduced. // // +optional @@ -723,9 +723,9 @@ type ResourceClaimStatus struct { Devices []AllocatedDeviceStatus `json:"devices,omitempty" protobuf:"bytes,4,opt,name=devices"` } -// ReservedForMaxSize is the maximum number of entries in +// ResourceClaimReservedForMaxSize is the maximum number of entries in // claim.status.reservedFor. -const ResourceClaimReservedForMaxSize = 32 +const ResourceClaimReservedForMaxSize = 256 // ResourceClaimConsumerReference contains enough information to let you // locate the consumer of a ResourceClaim. The user must be a resource in the same diff --git a/resource/v1beta1/types_swagger_doc_generated.go b/resource/v1beta1/types_swagger_doc_generated.go index 1d0176cbc..4ecc35d08 100644 --- a/resource/v1beta1/types_swagger_doc_generated.go +++ b/resource/v1beta1/types_swagger_doc_generated.go @@ -300,7 +300,7 @@ func (ResourceClaimSpec) SwaggerDoc() map[string]string { var map_ResourceClaimStatus = map[string]string{ "": "ResourceClaimStatus tracks whether the resource has been allocated and what the result of that was.", "allocation": "Allocation is set once the claim has been allocated successfully.", - "reservedFor": "ReservedFor indicates which entities are currently allowed to use the claim. A Pod which references a ResourceClaim which is not reserved for that Pod will not be started. A claim that is in use or might be in use because it has been reserved must not get deallocated.\n\nIn a cluster with multiple scheduler instances, two pods might get scheduled concurrently by different schedulers. When they reference the same ResourceClaim which already has reached its maximum number of consumers, only one pod can be scheduled.\n\nBoth schedulers try to add their pod to the claim.status.reservedFor field, but only the update that reaches the API server first gets stored. The other one fails with an error and the scheduler which issued it knows that it must put the pod back into the queue, waiting for the ResourceClaim to become usable again.\n\nThere can be at most 32 such reservations. This may get increased in the future, but not reduced.", + "reservedFor": "ReservedFor indicates which entities are currently allowed to use the claim. A Pod which references a ResourceClaim which is not reserved for that Pod will not be started. A claim that is in use or might be in use because it has been reserved must not get deallocated.\n\nIn a cluster with multiple scheduler instances, two pods might get scheduled concurrently by different schedulers. When they reference the same ResourceClaim which already has reached its maximum number of consumers, only one pod can be scheduled.\n\nBoth schedulers try to add their pod to the claim.status.reservedFor field, but only the update that reaches the API server first gets stored. The other one fails with an error and the scheduler which issued it knows that it must put the pod back into the queue, waiting for the ResourceClaim to become usable again.\n\nThere can be at most 256 such reservations. This may get increased in the future, but not reduced.", "devices": "Devices contains the status of each device allocated for this claim, as reported by the driver. This can include driver-specific information. Entries are owned by their respective drivers.", } From 25d8df3c710cbfd87f34e6fa06ef28b7ef25ffac Mon Sep 17 00:00:00 2001 From: Kubernetes Publisher Date: Fri, 10 Jan 2025 11:14:31 -0800 Subject: [PATCH 2/3] Merge pull request #129544 from pohly/automated-cherry-pick-of-#129543-origin-release-1.32 Automated cherry pick of #129543: DRA API: bump maximum size of ReservedFor to 256 Kubernetes-commit: e616858316df68aea76390062becc70957f24f66 From bc0564c95e7494dc33c8f4c149f4daad616066eb Mon Sep 17 00:00:00 2001 From: Kubernetes Publisher Date: Thu, 16 Jan 2025 00:35:39 +0000 Subject: [PATCH 3/3] Update dependencies to v0.32.1 tag --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 0a8732ce0..780aa4754 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ godebug winsymlink=0 require ( github.com/gogo/protobuf v1.3.2 - k8s.io/apimachinery v0.0.0-20241206181855-59e9003f02d6 + k8s.io/apimachinery v0.32.1 ) require ( diff --git a/go.sum b/go.sum index a18029ed4..2ebba205a 100644 --- a/go.sum +++ b/go.sum @@ -88,8 +88,8 @@ gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/apimachinery v0.0.0-20241206181855-59e9003f02d6 h1:8ccyzZdQXEqpO3sboLCV3yt9bm6VFoO/jYOGS7gCAQs= -k8s.io/apimachinery v0.0.0-20241206181855-59e9003f02d6/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= +k8s.io/apimachinery v0.32.1 h1:683ENpaCBjma4CYqsmZyhEzrGz6cjn1MY/X2jB2hkZs= +k8s.io/apimachinery v0.32.1/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=