Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c7e53cd

Browse files
committed
add another metric for retries
1 parent 179ee27 commit c7e53cd

File tree

3 files changed

+56
-22
lines changed

3 files changed

+56
-22
lines changed

coderd/coderdtest/promhelp/metrics.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,26 +39,34 @@ func Compare(reg prometheus.Gatherer, compare string, metricNames ...string) err
3939

4040
// HistogramValue returns the value of a histogram metric with the given name and labels.
4141
func HistogramValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) *io_prometheus_client.Histogram {
42+
t.Helper()
43+
4244
labeled := MetricValue(t, reg, metricName, labels)
4345
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
4446
return labeled.GetHistogram()
4547
}
4648

4749
// GaugeValue returns the value of a gauge metric with the given name and labels.
4850
func GaugeValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) int {
51+
t.Helper()
52+
4953
labeled := MetricValue(t, reg, metricName, labels)
5054
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
5155
return int(labeled.GetGauge().GetValue())
5256
}
5357

5458
// CounterValue returns the value of a counter metric with the given name and labels.
5559
func CounterValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) int {
60+
t.Helper()
61+
5662
labeled := MetricValue(t, reg, metricName, labels)
5763
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
5864
return int(labeled.GetCounter().GetValue())
5965
}
6066

6167
func MetricValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) *io_prometheus_client.Metric {
68+
t.Helper()
69+
6270
metrics, err := reg.Gather()
6371
require.NoError(t, err)
6472

coderd/database/dbmetrics/dbmetrics.go

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@ import (
1414

1515
type metricsStore struct {
1616
database.Store
17-
logger slog.Logger
17+
logger slog.Logger
18+
// txDuration is how long transactions take to execute.
1819
txDuration *prometheus.HistogramVec
20+
// txRetries is how many retries we are seeing for a given tx.
21+
txRetries *prometheus.CounterVec
1922
}
2023

2124
// NewDBMetrics returns a database.Store that registers metrics for the database
@@ -27,6 +30,21 @@ func NewDBMetrics(s database.Store, logger slog.Logger, reg prometheus.Registere
2730
if slices.Contains(s.Wrappers(), wrapname) {
2831
return s
2932
}
33+
txRetries := prometheus.NewCounterVec(prometheus.CounterOpts{
34+
Namespace: "coderd",
35+
Subsystem: "db",
36+
Name: "tx_executions_count",
37+
Help: "Total count of transactions executed. 'retries' is expected to be 0 for a successful transaction.",
38+
}, []string{
39+
"success", // Did the InTx function return an error?
40+
// Number of executions, since we have retry logic on serialization errors.
41+
// retries = Executions - 1 (as 1 execute is expected)
42+
"retries",
43+
// Uniquely naming some transactions can help debug reoccurring errors.
44+
"id",
45+
})
46+
reg.MustRegister(txRetries)
47+
3048
txDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{
3149
Namespace: "coderd",
3250
Subsystem: "db",
@@ -35,15 +53,14 @@ func NewDBMetrics(s database.Store, logger slog.Logger, reg prometheus.Registere
3553
Buckets: prometheus.DefBuckets,
3654
}, []string{
3755
"success", // Did the InTx function return an error?
38-
// Number of executions, since we have retry logic on serialization errors.
39-
"executions",
4056
// Uniquely naming some transactions can help debug reoccurring errors.
4157
"id",
4258
})
4359
reg.MustRegister(txDuration)
4460
return &metricsStore{
4561
Store: s,
4662
txDuration: txDuration,
63+
txRetries: txRetries,
4764
logger: logger,
4865
}
4966
}
@@ -61,14 +78,19 @@ func (m metricsStore) InTx(f func(database.Store) error, options *database.TxOpt
6178
err := m.Store.InTx(f, options)
6279
dur := time.Since(start)
6380
// The number of unique label combinations is
64-
// 2 x 3 (retry count) x #IDs
81+
// 2 x #IDs x #of buckets
6582
// So IDs should be used sparingly to prevent too much bloat.
6683
m.txDuration.With(prometheus.Labels{
67-
"success": strconv.FormatBool(err == nil),
68-
"executions": strconv.FormatInt(int64(options.ExecutionCount()), 10),
69-
"id": options.TxIdentifier, // Can be empty string for unlabeled
84+
"success": strconv.FormatBool(err == nil),
85+
"id": options.TxIdentifier, // Can be empty string for unlabeled
7086
}).Observe(dur.Seconds())
7187

88+
m.txRetries.With(prometheus.Labels{
89+
"success": strconv.FormatBool(err == nil),
90+
"retries": strconv.FormatInt(int64(options.ExecutionCount()-1), 10),
91+
"id": options.TxIdentifier, // Can be empty string for unlabeled
92+
}).Inc()
93+
7294
// Log all serializable transactions that are retried.
7395
// This is expected to happen in production, but should be kept
7496
// to a minimum. If these logs happen frequently, something is wrong.

coderd/database/dbmetrics/dbmetrics_test.go

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package dbmetrics_test
22

33
import (
44
"bytes"
5-
"fmt"
65
"testing"
76

87
"github.com/prometheus/client_golang/prometheus"
@@ -22,11 +21,11 @@ func TestInTxMetrics(t *testing.T) {
2221
t.Parallel()
2322

2423
successLabels := prometheus.Labels{
25-
"success": "true",
26-
"executions": "1",
27-
"id": "",
24+
"success": "true",
25+
"id": "",
2826
}
29-
const inTxMetricName = "coderd_db_tx_duration_seconds"
27+
const inTxHistMetricName = "coderd_db_tx_duration_seconds"
28+
const inTxCountMetricName = "coderd_db_tx_executions_count"
3029
t.Run("QueryMetrics", func(t *testing.T) {
3130
t.Parallel()
3231

@@ -40,7 +39,7 @@ func TestInTxMetrics(t *testing.T) {
4039
require.NoError(t, err)
4140

4241
// Check that the metrics are registered
43-
inTxMetric := promhelp.HistogramValue(t, reg, inTxMetricName, successLabels)
42+
inTxMetric := promhelp.HistogramValue(t, reg, inTxHistMetricName, successLabels)
4443
require.NotNil(t, inTxMetric)
4544
require.Equal(t, uint64(1), inTxMetric.GetSampleCount())
4645
})
@@ -58,7 +57,7 @@ func TestInTxMetrics(t *testing.T) {
5857
require.NoError(t, err)
5958

6059
// Check that the metrics are registered
61-
inTxMetric := promhelp.HistogramValue(t, reg, inTxMetricName, successLabels)
60+
inTxMetric := promhelp.HistogramValue(t, reg, inTxHistMetricName, successLabels)
6261
require.NotNil(t, inTxMetric)
6362
require.Equal(t, uint64(1), inTxMetric.GetSampleCount())
6463
})
@@ -85,21 +84,26 @@ func TestInTxMetrics(t *testing.T) {
8584
require.Error(t, err)
8685

8786
// Check that the metrics are registered
88-
inTxMetric := promhelp.HistogramValue(t, reg, inTxMetricName, prometheus.Labels{
89-
"success": "false",
90-
"executions": "2",
91-
"id": id,
87+
inTxHistMetric := promhelp.HistogramValue(t, reg, inTxHistMetricName, prometheus.Labels{
88+
"success": "false",
89+
"id": id,
9290
})
93-
require.NotNil(t, inTxMetric)
94-
require.Equal(t, uint64(1), inTxMetric.GetSampleCount())
91+
require.NotNil(t, inTxHistMetric)
92+
require.Equal(t, uint64(1), inTxHistMetric.GetSampleCount())
93+
94+
inTxCountMetric := promhelp.CounterValue(t, reg, inTxCountMetricName, prometheus.Labels{
95+
"success": "false",
96+
"retries": "1",
97+
"id": id,
98+
})
99+
require.NotNil(t, inTxCountMetric)
100+
require.Equal(t, 1, inTxCountMetric)
95101

96102
// Also check the logs
97103
require.Contains(t, output.String(), "some dumb error")
98104
require.Contains(t, output.String(), "database transaction hit serialization error and had to retry")
99105
require.Contains(t, output.String(), "success=false")
100106
require.Contains(t, output.String(), "executions=2")
101107
require.Contains(t, output.String(), "id="+id)
102-
103-
fmt.Println(promhelp.RegistryDump(reg))
104108
})
105109
}

0 commit comments

Comments
 (0)