Latency compensation feature.

wlame · wlame · commit f126219a5d07 · 2026-03-09T18:50:16.000+01:00
diff --git a/README.md b/README.md
@@ -180,7 +180,7 @@ Every successful request returns JSON with the **full resolved configuration** 
 }
 ```
 
-Optional fields appear only when non-zero: `burst_size` (token_bucket), `window_seconds` (sliding_window), `queue_timeout`, `dynamic`.
+Optional fields appear only when non-zero: `burst_size` (token_bucket), `window_seconds` (sliding_window), `queue_timeout`, `latency_compensation`, `network_latency_ms`, `dynamic`.
 
 | Field           | Description |
 |-----------------|-------------|
@@ -190,6 +190,8 @@ Optional fields appear only when non-zero: `burst_size` (token_bucket), `window_
 | `max_queue_size`| Maximum queue capacity |
 | `overflow`      | What happens when queue is full (`reject` or `block`) |
 | `dynamic`       | `true` if this endpoint was auto-created from an unconfigured path |
+| `latency_compensation` | Configured latency compensation in ms |
+| `network_latency_ms` | One-way network latency computed from `X-Sent-At` header (present only when header is sent) |
 
 When the queue is full (`overflow: reject`) or the estimated wait exceeds `queue_timeout`, rls returns HTTP 429:
 
@@ -244,6 +246,33 @@ Set `queue_timeout` (seconds) to reject requests upfront when the predicted wait
 
 Clients can override per-request with the `?timeout=N` query parameter (e.g. `?timeout=999` to effectively disable). A value of `0` (default) disables the check entirely. The timeout prediction is skipped for `lifo` and `random` schedulers where wait time is unpredictable.
 
+### Latency compensation
+
+When a client calls rls and then the target API, the total delay includes the network round-trip to rls. Set `latency_compensation` (ms) to release tickets early, so the actual API call hits the target closer to the ideal rate interval:
+
+```yaml
+defaults:
+  latency_compensation: 20  # compensate for 20ms one-way network latency
+
+endpoints:
+- path: "/api"
+  rate: 10
+  latency_compensation: 15  # per-endpoint override
+```
+
+Formula: `effective_interval = max(1ms, 1/rate - compensation_ms/1000)`. At 10 RPS (100ms interval) with 20ms compensation, the effective interval becomes 80ms (12.5 effective RPS). Defaults to `0` (no compensation, identical behavior to before).
+
+### `X-Sent-At` header
+
+Clients can send `X-Sent-At: <unix_milliseconds>` to measure one-way network latency. The server computes `network_latency_ms = now - sent_at` and includes it in the response for observability:
+
+```bash
+curl -H "X-Sent-At: $(date +%s%3N)" http://localhost:8080/
+# Response: {..., "network_latency_ms": 23}
+```
+
+If the header is missing, unparseable, or the timestamp is in the future (clock skew), the field is omitted or clamped to 0.
+
 ## Client example (Python)
 
 ```python
diff --git a/config/config.go b/config/config.go
@@ -21,6 +21,7 @@ type Defaults struct {
 	MaxQueueSize        int     `yaml:"max_queue_size" json:"max_queue_size"`
 	Overflow            string  `yaml:"overflow" json:"overflow"`
 	QueueTimeout        float64 `yaml:"queue_timeout" json:"queue_timeout"`
+	LatencyCompensation float64 `yaml:"latency_compensation" json:"latency_compensation"`
 	MaxDynamicEndpoints int     `yaml:"max_dynamic_endpoints" json:"max_dynamic_endpoints"`
 }
 
@@ -35,8 +36,9 @@ type EndpointConfig struct {
 	Overflow      string  `yaml:"overflow" json:"overflow"`
 	BurstSize     int     `yaml:"burst_size" json:"burst_size"`
 	WindowSeconds int     `yaml:"window_seconds" json:"window_seconds"`
-	QueueTimeout  float64 `yaml:"queue_timeout" json:"queue_timeout"`
-	Dynamic       bool    `yaml:"-" json:"-"`
+	QueueTimeout        float64 `yaml:"queue_timeout" json:"queue_timeout"`
+	LatencyCompensation float64 `yaml:"latency_compensation" json:"latency_compensation"`
+	Dynamic             bool    `yaml:"-" json:"-"`
 }
 
 // Config is the top-level configuration.
@@ -132,6 +134,9 @@ func ApplyDefaults(cfg *Config) {
 		if ep.Overflow == "" {
 			ep.Overflow = d.Overflow
 		}
+		if ep.LatencyCompensation == 0 {
+			ep.LatencyCompensation = d.LatencyCompensation
+		}
 	}
 }
 
@@ -165,6 +170,9 @@ func InheritFrom(child, parent EndpointConfig) EndpointConfig {
 	if child.QueueTimeout == 0 {
 		child.QueueTimeout = parent.QueueTimeout
 	}
+	if child.LatencyCompensation == 0 {
+		child.LatencyCompensation = parent.LatencyCompensation
+	}
 	return child
 }
 
diff --git a/config/config_test.go b/config/config_test.go
@@ -401,6 +401,43 @@ func TestInheritFrom_DynamicAndPathPreserved(t *testing.T) {
 	}
 }
 
+func TestInheritFrom_LatencyCompensation(t *testing.T) {
+	parent := EndpointConfig{Path: "/", Rate: 10, LatencyCompensation: 20}
+	child := EndpointConfig{Path: "/child"}
+	got := InheritFrom(child, parent)
+	if got.LatencyCompensation != 20 {
+		t.Errorf("latency_compensation: got %f, want 20", got.LatencyCompensation)
+	}
+
+	// Child with own value keeps it.
+	child2 := EndpointConfig{Path: "/child2", LatencyCompensation: 5}
+	got2 := InheritFrom(child2, parent)
+	if got2.LatencyCompensation != 5 {
+		t.Errorf("latency_compensation: got %f, want 5", got2.LatencyCompensation)
+	}
+}
+
+func TestApplyDefaults_LatencyCompensation(t *testing.T) {
+	cfg := &Config{
+		Defaults: Defaults{LatencyCompensation: 15},
+		Endpoints: []EndpointConfig{
+			{Path: "/", Rate: 1},
+			{Path: "/api", Rate: 2, LatencyCompensation: 10},
+		},
+	}
+	ApplyDefaults(cfg)
+
+	// Root should inherit from defaults.
+	for _, ep := range cfg.Endpoints {
+		if ep.Path == "/" && ep.LatencyCompensation != 15 {
+			t.Errorf("/: latency_compensation: got %f, want 15", ep.LatencyCompensation)
+		}
+		if ep.Path == "/api" && ep.LatencyCompensation != 10 {
+			t.Errorf("/api: latency_compensation: got %f, want 10 (own value)", ep.LatencyCompensation)
+		}
+	}
+}
+
 func TestMergeOverrides_Empty(t *testing.T) {
 	cfg := &Config{Server: ServerConfig{Host: "1.2.3.4", Port: 1234}}
 	if err := MergeOverrides(cfg, map[string]string{}); err != nil {
diff --git a/endpoint/endpoint.go b/endpoint/endpoint.go
@@ -34,8 +34,9 @@ func New(cfg config.EndpointConfig, opts ...Option) (*Endpoint, error) {
 	}
 
 	l, err := limiter.New(cfg.Algorithm, cfg.Rate, cfg.Unit, limiter.LimiterOptions{
-		BurstSize:     cfg.BurstSize,
-		WindowSeconds: cfg.WindowSeconds,
+		BurstSize:      cfg.BurstSize,
+		WindowSeconds:  cfg.WindowSeconds,
+		CompensationMs: cfg.LatencyCompensation,
 	})
 	if err != nil {
 		return nil, err
@@ -155,7 +156,18 @@ func (e *Endpoint) Handle(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	resp := buildResponse(e.cfg, e.queue.Len(), ticket.EnqueuedAt)
+	var networkLatencyMs *int64
+	if sentAt := r.Header.Get("X-Sent-At"); sentAt != "" {
+		if ms, err := strconv.ParseInt(sentAt, 10, 64); err == nil {
+			latency := time.Now().UnixMilli() - ms
+			if latency < 0 {
+				latency = 0
+			}
+			networkLatencyMs = &latency
+		}
+	}
+
+	resp := buildResponse(e.cfg, e.queue.Len(), ticket.EnqueuedAt, networkLatencyMs)
 	e.emit(Event{Kind: EventServed, Path: e.cfg.Path, WaitedMs: resp.QueuedForMs, QueueDepth: resp.QueueDepth})
 	req := r.URL.RawQuery
 	if req == "" {
diff --git a/endpoint/endpoint_test.go b/endpoint/endpoint_test.go
@@ -6,6 +6,7 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"sort"
+	"strconv"
 	"strings"
 	"sync"
 	"testing"
@@ -147,7 +148,7 @@ func TestEndpoint_PriorityHeader_Invalid(t *testing.T) {
 func TestBuildResponse_Fields(t *testing.T) {
 	cfg := baseConfig("/test", 5)
 	now := time.Now().Add(-50 * time.Millisecond) // simulated 50ms wait
-	resp := buildResponse(cfg, 3, now)
+	resp := buildResponse(cfg, 3, now, nil)
 
 	if !resp.OK {
 		t.Error("ok: want true")
@@ -189,7 +190,7 @@ func TestBuildResponse_AllConfigFields(t *testing.T) {
 		QueueTimeout:  5.5,
 		Dynamic:       false,
 	}
-	resp := buildResponse(cfg, 0, time.Now())
+	resp := buildResponse(cfg, 0, time.Now(), nil)
 
 	if resp.Algorithm != "token_bucket" {
 		t.Errorf("algorithm: got %q", resp.Algorithm)
@@ -222,7 +223,7 @@ func TestBuildResponse_DynamicEndpoint(t *testing.T) {
 		Overflow:     "reject",
 		Dynamic:      true,
 	}
-	resp := buildResponse(cfg, 2, time.Now().Add(-100*time.Millisecond))
+	resp := buildResponse(cfg, 2, time.Now().Add(-100*time.Millisecond), nil)
 
 	if !resp.Dynamic {
 		t.Error("dynamic: want true for dynamic endpoint")
@@ -244,7 +245,7 @@ func TestBuildResponse_JSONContainsAllFields(t *testing.T) {
 		Algorithm: "token_bucket", MaxQueueSize: 500, Overflow: "reject",
 		BurstSize: 20, WindowSeconds: 60, QueueTimeout: 3, Dynamic: true,
 	}
-	resp := buildResponse(cfg, 5, time.Now().Add(-200*time.Millisecond))
+	resp := buildResponse(cfg, 5, time.Now().Add(-200*time.Millisecond), nil)
 
 	data, err := json.Marshal(resp)
 	if err != nil {
@@ -275,7 +276,7 @@ func TestBuildResponse_OmitsZeroOptionalFields(t *testing.T) {
 		Overflow:     "reject",
 		// BurstSize, WindowSeconds, QueueTimeout all zero
 	}
-	resp := buildResponse(cfg, 0, time.Now())
+	resp := buildResponse(cfg, 0, time.Now(), nil)
 
 	if resp.BurstSize != 0 {
 		t.Errorf("burst_size: got %d, want 0", resp.BurstSize)
@@ -926,6 +927,89 @@ func TestEndpoint_ClientDisconnect_HandlerReturns(t *testing.T) {
 	wg.Wait()
 }
 
+func TestEndpoint_XSentAt_ReturnsNetworkLatency(t *testing.T) {
+	ep, err := New(baseConfig("/", 100))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ep.Stop()
+
+	sentAt := time.Now().Add(-50 * time.Millisecond).UnixMilli()
+	req := httptest.NewRequest(http.MethodGet, "/", nil)
+	req.Header.Set("X-Sent-At", strconv.FormatInt(sentAt, 10))
+	rr := httptest.NewRecorder()
+	ep.Handle(rr, req)
+
+	var resp Response
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatal(err)
+	}
+	if resp.NetworkLatencyMs == nil {
+		t.Fatal("expected network_latency_ms in response")
+	}
+	if *resp.NetworkLatencyMs < 40 || *resp.NetworkLatencyMs > 200 {
+		t.Errorf("network_latency_ms: got %d, want ~50", *resp.NetworkLatencyMs)
+	}
+}
+
+func TestEndpoint_XSentAt_MissingHeader_OmitsField(t *testing.T) {
+	ep, err := New(baseConfig("/", 100))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ep.Stop()
+
+	req := httptest.NewRequest(http.MethodGet, "/", nil)
+	rr := httptest.NewRecorder()
+	ep.Handle(rr, req)
+
+	if strings.Contains(rr.Body.String(), "network_latency_ms") {
+		t.Error("network_latency_ms should be omitted when X-Sent-At is absent")
+	}
+}
+
+func TestEndpoint_XSentAt_FutureTimestamp_ClampsToZero(t *testing.T) {
+	ep, err := New(baseConfig("/", 100))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ep.Stop()
+
+	sentAt := time.Now().Add(10 * time.Second).UnixMilli()
+	req := httptest.NewRequest(http.MethodGet, "/", nil)
+	req.Header.Set("X-Sent-At", strconv.FormatInt(sentAt, 10))
+	rr := httptest.NewRecorder()
+	ep.Handle(rr, req)
+
+	var resp Response
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatal(err)
+	}
+	if resp.NetworkLatencyMs == nil {
+		t.Fatal("expected network_latency_ms in response")
+	}
+	if *resp.NetworkLatencyMs != 0 {
+		t.Errorf("network_latency_ms: got %d, want 0 (clamped)", *resp.NetworkLatencyMs)
+	}
+}
+
+func TestEndpoint_XSentAt_InvalidValue_OmitsField(t *testing.T) {
+	ep, err := New(baseConfig("/", 100))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ep.Stop()
+
+	req := httptest.NewRequest(http.MethodGet, "/", nil)
+	req.Header.Set("X-Sent-At", "not-a-number")
+	rr := httptest.NewRecorder()
+	ep.Handle(rr, req)
+
+	if strings.Contains(rr.Body.String(), "network_latency_ms") {
+		t.Error("network_latency_ms should be omitted when X-Sent-At is invalid")
+	}
+}
+
 func TestRegistry_LongestPrefixWins(t *testing.T) {
 	cfgs := []config.EndpointConfig{
 		baseConfig("/api", 10),
diff --git a/endpoint/response.go b/endpoint/response.go
@@ -21,28 +21,32 @@ type Response struct {
 	Overflow      string  `json:"overflow"`
 	BurstSize     int     `json:"burst_size,omitempty"`
 	WindowSeconds int     `json:"window_seconds,omitempty"`
-	QueueTimeout  float64 `json:"queue_timeout,omitempty"`
-	Dynamic       bool    `json:"dynamic,omitempty"`
+	QueueTimeout        float64 `json:"queue_timeout,omitempty"`
+	LatencyCompensation float64 `json:"latency_compensation,omitempty"`
+	NetworkLatencyMs    *int64  `json:"network_latency_ms,omitempty"`
+	Dynamic             bool    `json:"dynamic,omitempty"`
 }
 
 // buildResponse constructs a Response from the endpoint config, current queue depth,
 // and the time the ticket was enqueued. All config fields are included — for dynamic
 // endpoints this reflects the fully resolved inherited values.
-func buildResponse(cfg config.EndpointConfig, queueDepth int, enqueuedAt time.Time) Response {
+func buildResponse(cfg config.EndpointConfig, queueDepth int, enqueuedAt time.Time, networkLatencyMs *int64) Response {
 	return Response{
-		OK:            true,
-		Endpoint:      cfg.Path,
-		QueuedForMs:   time.Since(enqueuedAt).Milliseconds(),
-		QueueDepth:    queueDepth,
-		Rate:          cfg.Rate,
-		Unit:          cfg.Unit,
-		Scheduler:     cfg.Scheduler,
-		Algorithm:     cfg.Algorithm,
-		MaxQueueSize:  cfg.MaxQueueSize,
-		Overflow:      cfg.Overflow,
-		BurstSize:     cfg.BurstSize,
-		WindowSeconds: cfg.WindowSeconds,
-		QueueTimeout:  cfg.QueueTimeout,
-		Dynamic:       cfg.Dynamic,
+		OK:                  true,
+		Endpoint:            cfg.Path,
+		QueuedForMs:         time.Since(enqueuedAt).Milliseconds(),
+		QueueDepth:          queueDepth,
+		Rate:                cfg.Rate,
+		Unit:                cfg.Unit,
+		Scheduler:           cfg.Scheduler,
+		Algorithm:           cfg.Algorithm,
+		MaxQueueSize:        cfg.MaxQueueSize,
+		Overflow:            cfg.Overflow,
+		BurstSize:           cfg.BurstSize,
+		WindowSeconds:       cfg.WindowSeconds,
+		QueueTimeout:        cfg.QueueTimeout,
+		LatencyCompensation: cfg.LatencyCompensation,
+		NetworkLatencyMs:    networkLatencyMs,
+		Dynamic:             cfg.Dynamic,
 	}
 }
diff --git a/limiter/limiter.go b/limiter/limiter.go
@@ -3,6 +3,7 @@ package limiter
 import (
 	"context"
 	"fmt"
+	"math"
 )
 
 // Limiter controls when rate-limited slots are made available.
@@ -21,8 +22,9 @@ type BurstQuerier interface {
 
 // LimiterOptions carries algorithm-specific configuration.
 type LimiterOptions struct {
-	BurstSize     int // token_bucket: max accumulated tokens
-	WindowSeconds int // sliding_window: observation window length
+	BurstSize      int     // token_bucket: max accumulated tokens
+	WindowSeconds  int     // sliding_window: observation window length
+	CompensationMs float64 // latency compensation: release tickets early by this many ms
 }
 
 // New creates a Limiter for the given algorithm, rate, and unit.
@@ -34,6 +36,11 @@ func New(algorithm string, rate float64, unit string, opts LimiterOptions) (Limi
 		return nil, fmt.Errorf("rate must be > 0, got %f %s", rate, unit)
 	}
 
+	if opts.CompensationMs > 0 {
+		interval := 1.0/rps - opts.CompensationMs/1000.0
+		rps = 1.0 / math.Max(0.001, interval)
+	}
+
 	switch algorithm {
 	case "strict":
 		return NewStrict(rps), nil
diff --git a/limiter/limiter_test.go b/limiter/limiter_test.go

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@ type Defaults struct {`
`21`	`21`	MaxQueueSize int `yaml:"max_queue_size" json:"max_queue_size"`
`22`	`22`	Overflow string `yaml:"overflow" json:"overflow"`
`23`	`23`	QueueTimeout float64 `yaml:"queue_timeout" json:"queue_timeout"`
	`24`	+ LatencyCompensation float64 `yaml:"latency_compensation" json:"latency_compensation"`
`24`	`25`	MaxDynamicEndpoints int `yaml:"max_dynamic_endpoints" json:"max_dynamic_endpoints"`
`25`	`26`	`}`
`26`	`27`
`@@ -35,8 +36,9 @@ type EndpointConfig struct {`
`35`	`36`	Overflow string `yaml:"overflow" json:"overflow"`
`36`	`37`	BurstSize int `yaml:"burst_size" json:"burst_size"`
`37`	`38`	WindowSeconds int `yaml:"window_seconds" json:"window_seconds"`
`38`		- QueueTimeout float64 `yaml:"queue_timeout" json:"queue_timeout"`
`39`		- Dynamic bool `yaml:"-" json:"-"`
	`39`	+ QueueTimeout float64 `yaml:"queue_timeout" json:"queue_timeout"`
	`40`	+ LatencyCompensation float64 `yaml:"latency_compensation" json:"latency_compensation"`
	`41`	+ Dynamic bool `yaml:"-" json:"-"`
`40`	`42`	`}`
`41`	`43`
`42`	`44`	`// Config is the top-level configuration.`
`@@ -132,6 +134,9 @@ func ApplyDefaults(cfg *Config) {`
`132`	`134`	`if ep.Overflow == "" {`
`133`	`135`	`ep.Overflow = d.Overflow`
`134`	`136`	`}`
	`137`	`+ if ep.LatencyCompensation == 0 {`
	`138`	`+ ep.LatencyCompensation = d.LatencyCompensation`
	`139`	`+ }`
`135`	`140`	`}`
`136`	`141`	`}`
`137`	`142`
`@@ -165,6 +170,9 @@ func InheritFrom(child, parent EndpointConfig) EndpointConfig {`
`165`	`170`	`if child.QueueTimeout == 0 {`
`166`	`171`	`child.QueueTimeout = parent.QueueTimeout`
`167`	`172`	`}`
	`173`	`+ if child.LatencyCompensation == 0 {`
	`174`	`+ child.LatencyCompensation = parent.LatencyCompensation`
	`175`	`+ }`
`168`	`176`	`return child`
`169`	`177`	`}`
`170`	`178`