Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2af0c87

Browse files
milan-zededaclaude
authored andcommitted
Add Signal handler for low-latency LPS config notifications
Open a long-lived GET /api/v1/signal stream to the Local Profile Server and, upon each incoming Signal message, immediately trigger the listed endpoints' pollers -- bypassing the ~1-minute periodic cadence while preserving it as the correctness fallback. This removes the minute-scale delay that operators previously saw between entering a config change in the LPS UI and EVE picking it up. The Signal handler runs as an additional LocalCmdAgent goroutine. Connection open is guarded by the existing startTask/runInterruptible/ endTask pattern used by the other pollers; the long body read runs without the task lock so it cannot block pause(). On URL change, UpdateLpsConfig cancels the in-flight stream and wakes the goroutine, which reconnects against the current LPS address. Dispatches are rate-limited (1 signal / 3s, burst 3). LPS 404 throttles reconnect attempts to once per hour. No watchdog is registered -- a legitimately long blocking Read must not trigger a device reboot. A new controllerconn.Client.OpenLocalStream helper provides the streaming HTTP client (reuses DialerWithResolverCache, adds TCP keepalive for dead-peer detection, disables HTTP keep-alive for clean connection teardown, and drops the per-request timeout that SendLocal applies). The existing triggerProfileGET is exported as TriggerProfileGET for symmetry with the other Trigger*POST helpers. Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]> Signed-off-by: Milan Lenco <[email protected]>
1 parent caca29d commit 2af0c87

4 files changed

Lines changed: 435 additions & 5 deletions

File tree

pkg/pillar/controllerconn/send.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,6 +1391,81 @@ func (c *Client) SendLocalProto(destURL string, intf string, ipSrc net.IP,
13911391
return httpResp, nil
13921392
}
13931393

1394+
// OpenLocalStream initiates a long-lived HTTP GET request towards an application
1395+
// deployed on the edge device and returns the response so the caller can stream
1396+
// the body. Unlike SendLocal, no request-level timeout is applied — the caller
1397+
// is responsible for reading the streaming body (typically for the lifetime
1398+
// of the connection) and MUST close resp.Body when done.
1399+
//
1400+
// TCP keepalive is enabled on the underlying connection with the given period
1401+
// to detect silently broken peers; pass 0 to leave keepalive at the OS default.
1402+
// HTTP keep-alive is disabled so that closing resp.Body cleanly shuts down
1403+
// the underlying TCP connection without leaving idle connections in the pool.
1404+
//
1405+
// The request is bound to ctx; cancel ctx to abort an in-flight read.
1406+
func (c *Client) OpenLocalStream(
1407+
ctx context.Context, destURL string, intf string, ipSrc net.IP,
1408+
keepAlive time.Duration) (*http.Response, error) {
1409+
1410+
var reqURL string
1411+
if strings.HasPrefix(destURL, "http:") || strings.HasPrefix(destURL, "https:") {
1412+
reqURL = destURL
1413+
} else {
1414+
reqURL = "https://" + destURL
1415+
}
1416+
1417+
dialer := &DialerWithResolverCache{
1418+
log: c.log,
1419+
ifName: intf,
1420+
localIP: ipSrc,
1421+
timeout: c.NetworkDialTimeout,
1422+
resolverCache: c.ResolverCacheFunc,
1423+
}
1424+
dialContext := func(ctx context.Context, network, address string) (net.Conn, error) {
1425+
conn, err := dialer.DialContext(ctx, network, address)
1426+
if err != nil {
1427+
return nil, err
1428+
}
1429+
if keepAlive > 0 {
1430+
if tcpConn, ok := conn.(*net.TCPConn); ok {
1431+
_ = tcpConn.SetKeepAlive(true)
1432+
_ = tcpConn.SetKeepAlivePeriod(keepAlive)
1433+
} else {
1434+
c.log.Warnf("OpenLocalStream: cannot set TCP keepalive: unexpected conn type %T", conn)
1435+
}
1436+
}
1437+
return conn, nil
1438+
}
1439+
1440+
transport := &http.Transport{
1441+
TLSClientConfig: c.TLSConfig,
1442+
DialContext: dialContext,
1443+
DisableKeepAlives: true,
1444+
}
1445+
// No top-level timeout — the response body is expected to be read over
1446+
// a long period. Cancellation is via ctx.
1447+
client := &http.Client{Transport: transport}
1448+
1449+
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
1450+
if err != nil {
1451+
return nil, fmt.Errorf("NewRequestWithContext failed: %w", err)
1452+
}
1453+
1454+
// Add a per-request UUID to the HTTP Header for traceability in the receiver.
1455+
id, err := uuid.NewV4()
1456+
if err != nil {
1457+
return nil, fmt.Errorf("uuid.NewV4 failed: %w", err)
1458+
}
1459+
req.Header.Add("X-Request-Id", id.String())
1460+
1461+
c.log.Tracef("OpenLocalStream: url %s", reqURL)
1462+
resp, err := client.Do(req)
1463+
if err != nil {
1464+
return nil, fmt.Errorf("client.Do: %w", err)
1465+
}
1466+
return resp, nil
1467+
}
1468+
13941469
// Describe send attempts in a concise and readable form.
13951470
func (c *Client) describeSendAttempts(attempts []SendAttempt) string {
13961471
var attemptDescriptions []string

pkg/pillar/localcommand/agent.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package localcommand
55

66
import (
7+
"context"
78
"fmt"
89
"net"
910
"net/url"
@@ -16,6 +17,7 @@ import (
1617
"github.com/lf-edge/eve/pkg/pillar/flextimer"
1718
"github.com/lf-edge/eve/pkg/pillar/types"
1819
uuid "github.com/satori/go.uuid"
20+
"golang.org/x/time/rate"
1921
)
2022

2123
const (
@@ -99,6 +101,12 @@ type LocalCmdAgent struct {
99101

100102
// LPS app boot info posting and boot config receiving
101103
appBootInfoTicker *taskTicker
104+
105+
// Signal handler (streams pending-change notifications from LPS)
106+
sigHandlerMx sync.Mutex
107+
sigHandlerCancel context.CancelFunc // non-nil while a stream is active
108+
sigHandlerLimiter *rate.Limiter
109+
restartSigHandlerCh chan struct{} // buffered; used to trigger restart of the Signal handler
102110
}
103111

104112
// ConstructorArgs are required input arguments for creating a LocalCmdAgent.
@@ -357,6 +365,7 @@ func NewLocalCmdAgent(args ConstructorArgs) *LocalCmdAgent {
357365
lc.initializeDevCommands()
358366
lc.initializeNetworkConfig()
359367
lc.initializeAppBootInfo()
368+
lc.initializeSigHandler()
360369
return lc
361370
}
362371

@@ -375,6 +384,7 @@ func (lc *LocalCmdAgent) RunTasks(args RunArgs) {
375384
go lc.runDevInfoTask()
376385
go lc.runNetworkTask()
377386
go lc.runAppBootInfoTask()
387+
go lc.runSigHandlerTask()
378388
}
379389

380390
// Pause temporarily suspends all tasks, blocking the processing of
@@ -482,7 +492,7 @@ func (lc *LocalCmdAgent) UpdateLpsConfig(globalProfile, lpsAddr, lpsToken string
482492
// If LPS address changed, disable throttling and trigger immediate LPS GET/POST
483493
// requests (from the possibly new LPS instance).
484494
if lpsAddrChanged {
485-
lc.triggerProfileGET()
495+
lc.TriggerProfileGET()
486496
lc.updateRadioTicker(false)
487497
lc.TriggerRadioPOST()
488498
lc.updateAppInfoTicker(false)
@@ -494,6 +504,7 @@ func (lc *LocalCmdAgent) UpdateLpsConfig(globalProfile, lpsAddr, lpsToken string
494504
lc.updateAppBootInfoTicker(false)
495505
lc.TriggerAppBootInfoPOST()
496506
lc.throttledLocation = false
507+
lc.restartSigHandler()
497508
}
498509
return nil
499510
}

pkg/pillar/localcommand/profile.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ func (lc *LocalCmdAgent) runProfileTask() {
4848
<-lc.profileTicker.tickerChan()
4949
lc.Log.Functionf("%s: profileTask: received the first trigger", logPrefix)
5050
// Trigger again to pass into loop.
51-
lc.triggerProfileGET()
51+
lc.TriggerProfileGET()
5252

5353
wdName := watchdogPrefix + "profile"
5454

@@ -167,7 +167,7 @@ func (lc *LocalCmdAgent) getLocalProfileConfig() (
167167
lc.Log.Functionf("%s: getLocalProfileConfig: LPS response discarded "+
168168
"due to task pause", logPrefix)
169169
// Retry ASAP to minimize delay in fetching the latest profile.
170-
lc.triggerProfileGET()
170+
lc.TriggerProfileGET()
171171
return nil, true
172172
}
173173
if err != nil {
@@ -214,8 +214,8 @@ func (lc *LocalCmdAgent) saveOrTouchReceivedLocalProfile(
214214
return
215215
}
216216

217-
// triggerProfileGET notifies task to reload local profile from LPS.
218-
func (lc *LocalCmdAgent) triggerProfileGET() {
217+
// TriggerProfileGET forces an immediate tick of the profileTicker.
218+
func (lc *LocalCmdAgent) TriggerProfileGET() {
219219
lc.profileTicker.tickNow()
220220
}
221221

0 commit comments

Comments
 (0)