@@ -18,56 +18,149 @@ import (
1818)
1919
2020const (
21- ansi = "[\u0009 \u001B \u009B ][[\\ ]()#;?]*(?:(?:(?:[a-zA-Z\\ d]*(?:;[a-zA-Z\\ d]*)*)?\u0007 )|(?:(?:\\ d{1,4}(?:;\\ d{0,4})*)?[\\ dA-PRZcf-ntqry=><~]))"
21+ ansi = "\u001B \\ [[0-9;]*[A-Za-z]|\u001B [\\ (\\ )\\ [\\ ]#;?]*[A-Za-z0-9]|\u009B [0-9;]*[A-Za-z]"
22+ )
23+
24+ var (
25+ memlogdSocket = "/run/memlogdq.sock"
2226)
2327
2428// getMemlogMsg - goroutine to get messages from memlogd queue
2529func getMemlogMsg (logChan chan inputEntry , panicFileChan chan []byte ) {
26- sockName := fmt .Sprintf ("/run/%s.sock" , "memlogdq" )
27- s , err := net .Dial ("unix" , sockName )
30+ s , err := net .Dial ("unix" , memlogdSocket )
2831 if err != nil {
2932 log .Fatal ("getMemlogMsg: Dial:" , err )
3033 }
3134 defer s .Close ()
3235 log .Functionf ("getMemlogMsg: got socket for memlogdq" )
3336
37+ processMemlogStream (s , logChan , panicFileChan )
38+ }
39+
40+ // processMemlogStream processes the memlogd stream from the provided connection.
41+ // This function is extracted to enable better testing.
42+ func processMemlogStream (conn net.Conn , logChan chan inputEntry , panicFileChan chan []byte ) {
3443 var writeByte byte = 2
3544 readTimeout := 30 * time .Second
3645
3746 // have to write byte value 2 to trigger memlogd queue streaming
38- _ , err = s .Write ([]byte {writeByte })
47+ _ , err := conn .Write ([]byte {writeByte })
3948 if err != nil {
40- log .Fatal ("getMemlogMsg : write to memlogd failed:" , err )
49+ log .Fatal ("processMemlogStream : write to memlogd failed:" , err )
4150 }
4251
4352 var panicStackCount int
44- bufReader := bufio .NewReader (s )
53+ bufReader := bufio .NewReader (conn )
4554 for {
46- if err = s .SetDeadline (time .Now ().Add (readTimeout )); err != nil {
47- log .Fatal ("getMemlogMsg : SetDeadline:" , err )
55+ if err = conn .SetDeadline (time .Now ().Add (readTimeout )); err != nil {
56+ log .Fatal ("processMemlogStream : SetDeadline:" , err )
4857 }
4958
5059 bytes , err := bufReader .ReadBytes ('\n' )
5160 if err != nil {
5261 if err != io .EOF && ! strings .HasSuffix (err .Error (), "i/o timeout" ) {
53- log .Fatal ("getMemlogMsg : bufRead Read:" , err )
62+ log .Fatal ("processMemlogStream : bufRead Read:" , err )
5463 }
5564 }
5665 if len (bytes ) == 0 {
5766 time .Sleep (5 * time .Second )
5867 continue
5968 }
60- var pidStr string
61- // Everything is json, in some cases with an embedded json Msg
62- var logEntry MemlogLogEntry
63- if err := json .Unmarshal (bytes , & logEntry ); err != nil {
64- log .Warnf ("Received non-json from memlogd: %s\n " ,
65- string (bytes ))
69+
70+ // Parse and convert the memlog entry
71+ entry , err := parseMemlogEntry (bytes )
72+ if err != nil {
73+ log .Warn (err )
74+ continue
75+ } else if entry == (inputEntry {}) {
6676 continue
6777 }
6878
69- // Is the Msg itself json?
70- var logInfo Loginfo
79+ // if we are in watchdog going down. fsync often
80+ checkWatchdogRestart (& entry , & panicStackCount , string (bytes ), panicFileChan )
81+
82+ logChan <- entry
83+ }
84+ }
85+
86+ // parseMemlogEntry parses a raw memlogd entry and converts it to an inputEntry.
87+ // Returns the parsed entry and any error encountered during parsing.
88+ func parseMemlogEntry (rawBytes []byte ) (inputEntry , error ) {
89+ var logEntry MemlogLogEntry
90+ if err := json .Unmarshal (rawBytes , & logEntry ); err != nil {
91+ return inputEntry {}, fmt .Errorf ("received non-json from memlogd: %s" , string (rawBytes ))
92+ }
93+
94+ // Parse the log info from the memlog entry
95+ logInfo := parseLogInfo (logEntry )
96+
97+ // don't process kube logs, since they are handled separately in /persist/kubelog
98+ if logInfo .Source == "kube" {
99+ return inputEntry {}, nil
100+ }
101+
102+ // all logs must have the level field
103+ if logInfo .Level == "" {
104+ logInfo .Level = logrus .InfoLevel .String ()
105+ }
106+
107+ logFromApp := isAppLog (logInfo )
108+
109+ // Update metrics
110+ if logFromApp {
111+ logmetrics .AppMetrics .NumInputEvent ++
112+ } else {
113+ logmetrics .DevMetrics .NumInputEvent ++
114+ }
115+
116+ var pidStr string
117+ if logInfo .Pid != 0 {
118+ pidStr = strconv .Itoa (logInfo .Pid )
119+ }
120+
121+ sendToRemote := shouldSendToRemote (logInfo , logFromApp )
122+
123+ entry := inputEntry {
124+ source : logInfo .Source ,
125+ content : logInfo .Msg ,
126+ pid : pidStr ,
127+ timestamp : logInfo .Time ,
128+ function : logInfo .Function ,
129+ filename : logInfo .Filename ,
130+ severity : logInfo .Level ,
131+ appUUID : logInfo .Appuuid ,
132+ acName : logInfo .Containername ,
133+ acLogTime : logInfo .Eventtime ,
134+ sendToRemote : sendToRemote ,
135+ }
136+
137+ return entry , nil
138+ }
139+
140+ // parseLogInfo extracts structured log information from a MemlogLogEntry.
141+ // It handles different log formats including JSON, key=value pairs, and plain text.
142+ func parseLogInfo (logEntry MemlogLogEntry ) Loginfo {
143+ var logInfo Loginfo
144+ // Start with the envelope - if there is no additional info inside msg, then just use the envelope info
145+ logInfo .Source = logEntry .Source
146+ logInfo .Time = logEntry .Time
147+ logInfo .Msg = logEntry .Msg
148+
149+ switch logEntry .Source {
150+ // most logs coming from our services have one of these three formats:
151+ // 1. JSON with logrus fields
152+ // 2. key=value pairs (logrus's standard text format)
153+ // 3. plain text (watchdog, debug and other non-go services as well as guest_vm)
154+ // We handle those three cases in the default case below
155+ // Some services use other logging libraries and formats and need to be added
156+ // as exceptions to ensure proper handling. Those are added as special cases:
157+ case "vector" , "vector.err" , "vector.out" :
158+ // These messages come from vector in different format
159+ // Treat them as plain text for now
160+ // (Vector's JSON format doesn't produce valid JSON (key collision), so we're not using it)
161+
162+ default :
163+ // These messages come from golang's logrus package
71164 if err := json .Unmarshal ([]byte (logEntry .Msg ), & logInfo ); err == nil {
72165 // Use the inner JSON struct
73166 // Go back to the envelope for anything not in the inner JSON
@@ -80,11 +173,6 @@ func getMemlogMsg(logChan chan inputEntry, panicFileChan chan []byte) {
80173 // and keep the original message text and fields
81174 logInfo .Msg = logEntry .Msg
82175 } else {
83- // Start with the envelope
84- logInfo .Source = logEntry .Source
85- logInfo .Time = logEntry .Time
86- logInfo .Msg = logEntry .Msg
87-
88176 // Some messages have attr=val syntax
89177 // If the inner message has Level, Time or Msg set they take
90178 // precedence over the envelope
@@ -99,99 +187,76 @@ func getMemlogMsg(logChan chan inputEntry, panicFileChan chan []byte) {
99187 logInfo .Msg = msg
100188 }
101189 }
190+ }
102191
103- // all logs must have the level field
104- if logInfo .Level == "" {
105- logInfo .Level = logrus .InfoLevel .String ()
106- }
107-
108- logFromApp := strings .Contains (logInfo .Source , "guest_vm" ) || logInfo .Containername != ""
109-
110- if logFromApp {
111- logmetrics .AppMetrics .NumInputEvent ++
112- } else {
113- logmetrics .DevMetrics .NumInputEvent ++
114- }
115-
116- if logInfo .Pid != 0 {
117- pidStr = strconv .Itoa (logInfo .Pid )
118- }
119-
120- // not to upload 'kube' container logs, one can find in /persist/kubelog for detail
121- if logInfo .Source == "kube" {
122- continue
123- }
192+ return logInfo
193+ }
124194
125- sendToRemote := false
126- if ! logFromApp { // there are no granularity nobs for the edge apps' log levels
127- loglevel , err := logrus .ParseLevel (logInfo .Level )
128- if err != nil {
129- log .Errorf ("getMemlogMsg: found invalid log level %s in message from %s" , logInfo .Level , logInfo .Source )
130- } else {
131- // see if we have an agent specific log level
132- if remoteLogLevel , ok := agentsRemoteLogLevel .Load (logInfo .Source ); ok {
133- sendToRemote = loglevel <= remoteLogLevel .(logrus.Level )
134- } else {
135- sendToRemote = loglevel <= agentDefaultRemoteLogLevel .Load ().(logrus.Level )
136- }
137- }
138- }
195+ // isAppLog determines if a log entry is from an application (as opposed to device/system).
196+ func isAppLog (logInfo Loginfo ) bool {
197+ return strings .Contains (logInfo .Source , "guest_vm" ) || logInfo .Containername != ""
198+ }
139199
140- entry := inputEntry {
141- source : logInfo .Source ,
142- content : logInfo .Msg ,
143- pid : pidStr ,
144- timestamp : logInfo .Time ,
145- function : logInfo .Function ,
146- filename : logInfo .Filename ,
147- severity : logInfo .Level ,
148- appUUID : logInfo .Appuuid ,
149- acName : logInfo .Containername ,
150- acLogTime : logInfo .Eventtime ,
151- sendToRemote : sendToRemote ,
152- }
200+ // shouldSendToRemote determines if a log should be sent to the remote endpoint
201+ // based on the configured log levels.
202+ func shouldSendToRemote (logInfo Loginfo , logFromApp bool ) bool {
203+ if logFromApp {
204+ // there are no granularity knobs for the edge apps' log levels
205+ return false
206+ }
153207
154- // if we are in watchdog going down. fsync often
155- checkWatchdogRestart (& entry , & panicStackCount , string (bytes ), panicFileChan )
208+ loglevel , err := logrus .ParseLevel (logInfo .Level )
209+ if err != nil {
210+ log .Errorf ("shouldSendToRemote: found invalid log level %s in message from %s" ,
211+ logInfo .Level , logInfo .Source )
212+ return false
213+ }
156214
157- logChan <- entry
215+ // see if we have an agent specific log level
216+ if remoteLogLevel , ok := agentsRemoteLogLevel .Load (logInfo .Source ); ok {
217+ return loglevel <= remoteLogLevel .(logrus.Level )
158218 }
219+ return loglevel <= agentDefaultRemoteLogLevel .Load ().(logrus.Level )
159220}
160221
161222// Returns level, time and msg if the string contains those attr=val
162223func parseLevelTimeMsg (content string ) (level string , timeStr string , msg string ) {
163- content = remNonPrintable (content )
224+ content = cleanForLogParsing (content )
164225 if strings .Contains (content , ",\" msg\" :" ) {
165226 // Json or something - bail
166227 return
167228 }
168229 level1 := strings .SplitN (content , "level=" , 2 )
169230 if len (level1 ) == 2 {
170231 level2 := strings .Split (level1 [1 ], " " )
171- level = level2 [0 ]
232+ level = strings . ToLower ( level2 [0 ])
172233 }
173234 time1 := strings .SplitN (content , "time=" , 2 )
174- if len (time1 ) == 2 {
235+ if len (time1 ) == 2 && strings . HasPrefix ( time1 [ 1 ], " \" " ) {
175236 time2 := strings .Split (time1 [1 ], "\" " )
176- if len (time2 ) = = 3 {
237+ if len (time2 ) > = 3 {
177238 timeStr = time2 [1 ]
178239 }
179240 }
180241 msg1 := strings .SplitN (content , "msg=" , 2 )
181- if len (msg1 ) == 2 {
242+ if len (msg1 ) == 2 && strings . HasPrefix ( msg1 [ 1 ], " \" " ) {
182243 msg2 := strings .Split (msg1 [1 ], "\" " )
183- if len (msg2 ) = = 3 {
244+ if len (msg2 ) > = 3 {
184245 msg = msg2 [1 ]
185246 }
186247 }
187248 return
188249}
189250
190- func remNonPrintable (str string ) string {
251+ func cleanForLogParsing (str string ) string {
252+ // Remove ANSI escape sequences (colors, cursor movement, etc.)
191253 var re = regexp .MustCompile (ansi )
192- myStr := re .ReplaceAllString (str , "" )
193- myStr = strings .Trim (myStr , "\r " )
194- return strings .Trim (myStr , "\n " )
254+ cleaned := re .ReplaceAllString (str , "" )
255+
256+ // Remove leading/trailing whitespace that interferes with parsing
257+ cleaned = strings .Trim (cleaned , "\r \n " )
258+
259+ return cleaned
195260}
196261
197262// flush more often when we are going down by reading from watchdog log message itself
0 commit comments