Thanks to visit codestin.com
Credit goes to github.com

Skip to content
This repository was archived by the owner on Dec 9, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ require (
github.com/josharian/native v1.1.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/knqyf263/go-plugin v0.9.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/mdlayher/packet v1.1.2 // indirect
github.com/mdlayher/socket v0.5.1 // indirect
Expand Down
67 changes: 65 additions & 2 deletions pkg/driver/dra_hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ func (np *NetworkDriver) PublishResources(ctx context.Context) {
klog.V(4).Infof("Received %d devices", len(devices))
devices = filter.FilterDevices(np.celProgram, devices)
klog.V(4).Infof("After filtering %d devices", len(devices))

np.publishResourcesPrometheusMetrics(devices)

resources := resourceslice.DriverResources{
Pools: map[string]resourceslice.Pool{
np.nodeName: {Slices: []resourceslice.Slice{{Devices: devices}}}},
Expand All @@ -75,11 +78,45 @@ func (np *NetworkDriver) PublishResources(ctx context.Context) {
}
}

func (np *NetworkDriver) publishResourcesPrometheusMetrics(devices []resourceapi.Device) {
rdmaCount := 0
for _, device := range devices {
if attr, ok := device.Attributes[apis.AttrRDMA]; ok && attr.BoolValue != nil && *attr.BoolValue {
rdmaCount++
}
}
publishedDevicesTotal.WithLabelValues("rdma").Set(float64(rdmaCount))
publishedDevicesTotal.WithLabelValues("total").Set(float64(len(devices)))
}

func (np *NetworkDriver) PrepareResourceClaims(ctx context.Context, claims []*resourceapi.ResourceClaim) (map[types.UID]kubeletplugin.PrepareResult, error) {
klog.V(2).Infof("PrepareResourceClaims is called: number of claims: %d", len(claims))
start := time.Now()
defer func() {
draPluginRequestsLatencySeconds.WithLabelValues(methodPrepareResourceClaims).Observe(time.Since(start).Seconds())
}()
result, err := np.prepareResourceClaims(ctx, claims)
if err != nil {
draPluginRequestsTotal.WithLabelValues(methodPrepareResourceClaims, statusFailed).Inc()
return result, err
}
// identify errors and log metrics
isError := false
for _, res := range result {
if res.Err != nil {
isError = true
break
}
}
if isError {
draPluginRequestsTotal.WithLabelValues(methodPrepareResourceClaims, statusFailed).Inc()
} else {
draPluginRequestsTotal.WithLabelValues(methodPrepareResourceClaims, statusSuccess).Inc()
}
return result, err
}

nodePrepareRequestsTotal.Inc()

func (np *NetworkDriver) prepareResourceClaims(ctx context.Context, claims []*resourceapi.ResourceClaim) (map[types.UID]kubeletplugin.PrepareResult, error) {
if len(claims) == 0 {
return nil, nil
}
Expand Down Expand Up @@ -323,6 +360,32 @@ func (np *NetworkDriver) prepareResourceClaim(ctx context.Context, claim *resour

func (np *NetworkDriver) UnprepareResourceClaims(ctx context.Context, claims []kubeletplugin.NamespacedObject) (map[types.UID]error, error) {
klog.V(2).Infof("UnprepareResourceClaims is called: number of claims: %d", len(claims))
start := time.Now()
defer func() {
draPluginRequestsLatencySeconds.WithLabelValues(methodUnprepareResourceClaims).Observe(time.Since(start).Seconds())
}()
result, err := np.unprepareResourceClaims(ctx, claims)
if err != nil {
draPluginRequestsTotal.WithLabelValues(methodUnprepareResourceClaims, statusFailed).Inc()
return result, err
}
// identify errors and log metrics
isError := false
for _, res := range result {
if res != nil {
isError = true
break
}
}
if isError {
draPluginRequestsTotal.WithLabelValues(methodUnprepareResourceClaims, statusFailed).Inc()
} else {
draPluginRequestsTotal.WithLabelValues(methodUnprepareResourceClaims, statusSuccess).Inc()
}
return result, err
}

func (np *NetworkDriver) unprepareResourceClaims(ctx context.Context, claims []kubeletplugin.NamespacedObject) (map[types.UID]error, error) {
if len(claims) == 0 {
return nil, nil
}
Expand Down
241 changes: 241 additions & 0 deletions pkg/driver/dra_hooks_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
/*
Copyright 2024 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package driver

import (
"context"
"strings"
"testing"

"github.com/google/dranet/pkg/apis"
"github.com/google/dranet/pkg/inventory"
"github.com/prometheus/client_golang/prometheus/testutil"
resourcev1 "k8s.io/api/resource/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/dynamic-resource-allocation/kubeletplugin"
)

func TestPublishResourcesPrometheusMetrics(t *testing.T) {
testCases := []struct {
name string
devices []resourcev1.Device
expectedRdma float64
expectedTotal float64
}{
{
name: "No devices",
devices: []resourcev1.Device{},
expectedRdma: 0,
expectedTotal: 0,
},
{
name: "Only RDMA devices",
devices: []resourcev1.Device{
{Attributes: map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{
apis.AttrRDMA: {BoolValue: func() *bool { b := true; return &b }()},
}},
},
expectedRdma: 1,
expectedTotal: 1,
},
{
name: "Only non-RDMA devices",
devices: []resourcev1.Device{
{Attributes: map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{
apis.AttrRDMA: {BoolValue: func() *bool { b := false; return &b }()},
}},
},
expectedRdma: 0,
expectedTotal: 1,
},
{
name: "Mixed RDMA and non-RDMA devices",
devices: []resourcev1.Device{
{Attributes: map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{
apis.AttrRDMA: {BoolValue: func() *bool { b := true; return &b }()},
}},
{Attributes: map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{
apis.AttrRDMA: {BoolValue: func() *bool { b := true; return &b }()},
}},
{Attributes: map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{
apis.AttrRDMA: {BoolValue: func() *bool { b := false; return &b }()},
}},
},
expectedRdma: 2,
expectedTotal: 3,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
publishedDevicesTotal.Reset()
np := &NetworkDriver{}
np.publishResourcesPrometheusMetrics(tc.devices)

if got := testutil.ToFloat64(publishedDevicesTotal.WithLabelValues("rdma")); got != tc.expectedRdma {
t.Errorf("Expected %f for RDMA devices, got %f", tc.expectedRdma, got)
}
if got := testutil.ToFloat64(publishedDevicesTotal.WithLabelValues("total")); got != tc.expectedTotal {
t.Errorf("Expected %f for Total devices, got %f", tc.expectedTotal, got)
}
})
}
}

func TestPrepareResourceClaimsMetrics(t *testing.T) {
ctx := context.Background()

t.Run("Success Case", func(t *testing.T) {
draPluginRequestsTotal.Reset()
draPluginRequestsLatencySeconds.Reset()

np := &NetworkDriver{}
if _, err := np.PrepareResourceClaims(ctx, []*resourcev1.ResourceClaim{}); err != nil {
t.Fatalf("PrepareResourceClaims failed: %v", err)
}

if got := testutil.ToFloat64(draPluginRequestsTotal.WithLabelValues(methodPrepareResourceClaims, statusSuccess)); got != float64(1) {
t.Errorf("Expected 1 success, got %f", got)
}
if got := testutil.ToFloat64(draPluginRequestsTotal.WithLabelValues(methodPrepareResourceClaims, statusFailed)); got != float64(0) {
t.Errorf("Expected 0 failures, got %f", got)
}

expected := `
# HELP dranet_driver_dra_plugin_requests_latency_seconds DRA plugin request latency in seconds.
# TYPE dranet_driver_dra_plugin_requests_latency_seconds histogram
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="0.005"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="0.01"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="0.025"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="0.05"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="0.1"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="0.25"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="0.5"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="1"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="2.5"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="5"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="10"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="PrepareResourceClaims",le="+Inf"} 1
`
if err := testutil.CollectAndCompare(draPluginRequestsLatencySeconds, strings.NewReader(expected), "dranet_driver_dra_plugin_requests_latency_seconds_bucket"); err != nil {
t.Fatalf("CollectAndCompare failed: %v", err)
}
})

t.Run("Failure Case", func(t *testing.T) {
draPluginRequestsTotal.Reset()
draPluginRequestsLatencySeconds.Reset()

np := &NetworkDriver{
netdb: inventory.New(),
driverName: "test.driver",
}

claims := []*resourcev1.ResourceClaim{
{
ObjectMeta: metav1.ObjectMeta{UID: "claim-uid-1"},
Status: resourcev1.ResourceClaimStatus{
ReservedFor: []resourcev1.ResourceClaimConsumerReference{
{APIGroup: "", Resource: "pods", Name: "test-pod", UID: "pod-uid-1"},
},
Allocation: &resourcev1.AllocationResult{
Devices: resourcev1.DeviceAllocationResult{
Results: []resourcev1.DeviceRequestAllocationResult{
{Driver: "test.driver", Device: "device-does-not-exist"},
},
},
},
},
},
}

res, err := np.PrepareResourceClaims(ctx, claims)
if err != nil {
t.Fatalf("PrepareResourceClaims failed: %v", err)
}
if res["claim-uid-1"].Err == nil {
t.Errorf("Expected an error for claim-uid-1, but got none")
}

if got := testutil.ToFloat64(draPluginRequestsTotal.WithLabelValues(methodPrepareResourceClaims, statusSuccess)); got != float64(0) {
t.Errorf("Expected 0 successes, got %f", got)
}
if got := testutil.ToFloat64(draPluginRequestsTotal.WithLabelValues(methodPrepareResourceClaims, statusFailed)); got != float64(1) {
t.Errorf("Expected 1 failure, got %f", got)
}

if count := testutil.CollectAndCount(draPluginRequestsLatencySeconds); count != 1 {
t.Errorf("Expected 1 latency metric, got %d", count)
}
})
}

func TestUnprepareResourceClaimsMetrics(t *testing.T) {
ctx := context.Background()

t.Run("Success Case", func(t *testing.T) {
draPluginRequestsTotal.Reset()
draPluginRequestsLatencySeconds.Reset()

np := &NetworkDriver{
podConfigStore: NewPodConfigStore(),
}
claimName := types.NamespacedName{Name: "test-claim", Namespace: "test-ns"}
np.podConfigStore.Set("pod-uid-1", "device-a", PodConfig{Claim: claimName})

claims := []kubeletplugin.NamespacedObject{
{NamespacedName: claimName, UID: "claim-uid-1"},
}

if _, err := np.UnprepareResourceClaims(ctx, claims); err != nil {
t.Fatalf("UnprepareResourceClaims failed: %v", err)
}

// Verify the claim was removed from the store
if _, ok := np.podConfigStore.GetPodConfigs("pod-uid-1"); ok {
t.Errorf("Pod config should have been removed, but was found")
}

if got := testutil.ToFloat64(draPluginRequestsTotal.WithLabelValues(methodUnprepareResourceClaims, statusSuccess)); got != float64(1) {
t.Errorf("Expected 1 success, got %f", got)
}
if got := testutil.ToFloat64(draPluginRequestsTotal.WithLabelValues(methodUnprepareResourceClaims, statusFailed)); got != float64(0) {
t.Errorf("Expected 0 failures, got %f", got)
}

expected := `
# HELP dranet_driver_dra_plugin_requests_latency_seconds DRA plugin request latency in seconds.
# TYPE dranet_driver_dra_plugin_requests_latency_seconds histogram
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="0.005"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="0.01"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="0.025"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="0.05"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="0.1"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="0.25"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="0.5"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="1"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="2.5"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="5"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="10"} 1
dranet_driver_dra_plugin_requests_latency_seconds_bucket{method="UnprepareResourceClaims",le="+Inf"} 1
`
if err := testutil.CollectAndCompare(draPluginRequestsLatencySeconds, strings.NewReader(expected), "dranet_driver_dra_plugin_requests_latency_seconds_bucket"); err != nil {
t.Fatalf("CollectAndCompare failed: %v", err)
}
})
}
Loading
Loading