@@ -25,60 +25,61 @@ type FileAcquirer interface {
25
25
26
26
// New returns a file cache that will fetch files from a database
27
27
func New (registerer prometheus.Registerer , authz rbac.Authorizer ) * Cache {
28
- return (& Cache {
29
- lock : sync.Mutex {},
30
- data : make (map [uuid.UUID ]* cacheEntry ),
31
- authz : authz ,
32
- }).registerMetrics (registerer )
28
+ return & Cache {
29
+ lock : sync.Mutex {},
30
+ data : make (map [uuid.UUID ]* cacheEntry ),
31
+ authz : authz ,
32
+ cacheMetrics : newCacheMetrics (registerer ),
33
+ }
33
34
}
34
35
35
- func ( c * Cache ) registerMetrics ( registerer prometheus.Registerer ) * Cache {
36
+ func newCacheMetrics ( registerer prometheus.Registerer ) cacheMetrics {
36
37
subsystem := "file_cache"
37
38
f := promauto .With (registerer )
38
39
39
- c . currentCacheSize = f . NewGauge (prometheus. GaugeOpts {
40
- Namespace : "coderd" ,
41
- Subsystem : subsystem ,
42
- Name : "open_files_size_bytes_current" ,
43
- Help : "The current amount of memory of all files currently open in the file cache. " ,
44
- })
45
-
46
- c . totalCacheSize = f . NewCounter (prometheus. CounterOpts {
47
- Namespace : "coderd" ,
48
- Subsystem : subsystem ,
49
- Name : "open_files_size_bytes_total" ,
50
- Help : "The total amount of memory ever opened in the file cache. This number never decrements. " ,
51
- })
52
-
53
- c . currentOpenFiles = f . NewGauge (prometheus. GaugeOpts {
54
- Namespace : "coderd" ,
55
- Subsystem : subsystem ,
56
- Name : "open_files_current" ,
57
- Help : "The count of unique files currently open in the file cache. " ,
58
- })
59
-
60
- c . totalOpenedFiles = f . NewCounter (prometheus. CounterOpts {
61
- Namespace : "coderd" ,
62
- Subsystem : subsystem ,
63
- Name : "open_files_total" ,
64
- Help : "The total count of unique files ever opened in the file cache. " ,
65
- })
66
-
67
- c . currentOpenFileReferences = f . NewGauge (prometheus. GaugeOpts {
68
- Namespace : "coderd" ,
69
- Subsystem : subsystem ,
70
- Name : "open_file_refs_current" ,
71
- Help : "The count of file references currently open in the file cache. Multiple references can be held for the same file. " ,
72
- })
73
-
74
- c . totalOpenFileReferences = f . NewCounterVec (prometheus. CounterOpts {
75
- Namespace : "coderd" ,
76
- Subsystem : subsystem ,
77
- Name : "open_file_refs_total" ,
78
- Help : "The total number of file references ever opened in the file cache. The 'hit' label indicates if the file was loaded from the cache. " ,
79
- }, [] string { " hit" })
80
-
81
- return c
40
+ return cacheMetrics {
41
+ currentCacheSize : f . NewGauge (prometheus. GaugeOpts {
42
+ Namespace : "coderd" ,
43
+ Subsystem : subsystem ,
44
+ Name : "open_files_size_bytes_current " ,
45
+ Help : "The current amount of memory of all files currently open in the file cache." ,
46
+ }),
47
+
48
+ totalCacheSize : f . NewCounter (prometheus. CounterOpts {
49
+ Namespace : "coderd" ,
50
+ Subsystem : subsystem ,
51
+ Name : "open_files_size_bytes_total " ,
52
+ Help : "The total amount of memory ever opened in the file cache. This number never decrements." ,
53
+ }),
54
+
55
+ currentOpenFiles : f . NewGauge (prometheus. GaugeOpts {
56
+ Namespace : "coderd" ,
57
+ Subsystem : subsystem ,
58
+ Name : "open_files_current " ,
59
+ Help : "The count of unique files currently open in the file cache." ,
60
+ }),
61
+
62
+ totalOpenedFiles : f . NewCounter (prometheus. CounterOpts {
63
+ Namespace : "coderd" ,
64
+ Subsystem : subsystem ,
65
+ Name : "open_files_total " ,
66
+ Help : "The total count of unique files ever opened in the file cache." ,
67
+ }),
68
+
69
+ currentOpenFileReferences : f . NewGauge (prometheus. GaugeOpts {
70
+ Namespace : "coderd" ,
71
+ Subsystem : subsystem ,
72
+ Name : "open_file_refs_current " ,
73
+ Help : "The count of file references currently open in the file cache. Multiple references can be held for the same file." ,
74
+ }),
75
+
76
+ totalOpenFileReferences : f . NewCounterVec (prometheus. CounterOpts {
77
+ Namespace : "coderd" ,
78
+ Subsystem : subsystem ,
79
+ Name : "open_file_refs_total " ,
80
+ Help : "The total number of file references ever opened in the file cache. The ' hit' label indicates if the file was loaded from the cache." ,
81
+ }, [] string { "hit" }),
82
+ }
82
83
}
83
84
84
85
// Cache persists the files for template versions, and is used by dynamic
@@ -106,18 +107,21 @@ type cacheMetrics struct {
106
107
totalCacheSize prometheus.Counter
107
108
}
108
109
110
+ type cacheEntry struct {
111
+ // refCount must only be accessed while the cacheEntry lock is held.
112
+ lock sync.Mutex
113
+ refCount int
114
+ value * lazy.ValueWithError [CacheEntryValue ]
115
+
116
+ close func ()
117
+ }
118
+
109
119
type CacheEntryValue struct {
110
120
fs.FS
111
121
Object rbac.Object
112
122
Size int64
113
123
}
114
124
115
- type cacheEntry struct {
116
- // refCount must only be accessed while the Cache lock is held.
117
- refCount int
118
- value * lazy.ValueWithError [CacheEntryValue ]
119
- }
120
-
121
125
var _ fs.FS = (* CloseFS )(nil )
122
126
123
127
// CloseFS is a wrapper around fs.FS that implements io.Closer. The Close()
@@ -142,93 +146,116 @@ func (c *Cache) Acquire(ctx context.Context, db database.Store, fileID uuid.UUID
142
146
// mutex has been released, or we would continue to hold the lock until the
143
147
// entire file has been fetched, which may be slow, and would prevent other
144
148
// files from being fetched in parallel.
145
- it , err := c .prepare (ctx , db , fileID ).Load ()
149
+ e := c .prepare (ctx , db , fileID )
150
+ ev , err := e .value .Load ()
146
151
if err != nil {
147
- c .release (fileID )
152
+ c .purge (fileID )
153
+ return nil , err
154
+ }
155
+
156
+ // We always run the fetch under a system context and actor, so we need to check the caller's
157
+ // context manually before returning.
158
+
159
+ // Check if the caller's context was canceled
160
+ if err := ctx .Err (); err != nil {
148
161
return nil , err
149
162
}
150
163
164
+ // Check that the caller is authorized to access the file
151
165
subject , ok := dbauthz .ActorFromContext (ctx )
152
166
if ! ok {
153
167
return nil , dbauthz .ErrNoActor
154
168
}
155
- // Always check the caller can actually read the file.
156
- if err := c .authz .Authorize (ctx , subject , policy .ActionRead , it .Object ); err != nil {
157
- c .release (fileID )
169
+ if err := c .authz .Authorize (ctx , subject , policy .ActionRead , ev .Object ); err != nil {
170
+ e .close ()
158
171
return nil , err
159
172
}
160
173
161
- var once sync.Once
174
+ var closeOnce sync.Once
162
175
return & CloseFS {
163
- FS : it .FS ,
176
+ FS : ev .FS ,
164
177
close : func () {
165
178
// sync.Once makes the Close() idempotent, so we can call it
166
179
// multiple times without worrying about double-releasing.
167
- once .Do (func () { c .release (fileID ) })
180
+ closeOnce .Do (func () {
181
+ e .close ()
182
+ })
168
183
},
169
184
}, nil
170
185
}
171
186
172
- func (c * Cache ) prepare (ctx context.Context , db database.Store , fileID uuid.UUID ) * lazy. ValueWithError [ CacheEntryValue ] {
187
+ func (c * Cache ) prepare (ctx context.Context , db database.Store , fileID uuid.UUID ) * cacheEntry {
173
188
c .lock .Lock ()
174
189
defer c .lock .Unlock ()
175
190
176
191
hitLabel := "true"
177
192
entry , ok := c .data [fileID ]
178
193
if ! ok {
179
- value := lazy .NewWithError (func () (CacheEntryValue , error ) {
180
- val , err := fetch (ctx , db , fileID )
194
+ hitLabel = "false"
181
195
182
- // Always add to the cache size the bytes of the file loaded.
183
- if err == nil {
196
+ var releaseOnce sync.Once
197
+ entry = & cacheEntry {
198
+ refCount : 0 ,
199
+ value : lazy .NewWithError (func () (CacheEntryValue , error ) {
200
+ val , err := fetch (db , fileID )
201
+ if err != nil {
202
+ // Force future calls to Acquire to trigger a new fetch as soon as
203
+ // a fetch has failed, even if references are still held.
204
+ delete (c .data , fileID )
205
+ return val , err
206
+ }
207
+
208
+ // Add the size of the file to the cache size metrics.
184
209
c .currentCacheSize .Add (float64 (val .Size ))
185
210
c .totalCacheSize .Add (float64 (val .Size ))
186
- }
187
211
188
- return val , err
189
- })
212
+ return val , err
213
+ }),
190
214
191
- entry = & cacheEntry {
192
- value : value ,
193
- refCount : 0 ,
215
+ close : func () {
216
+ entry .lock .Lock ()
217
+ defer entry .lock .Unlock ()
218
+
219
+ entry .refCount --
220
+ c .currentOpenFileReferences .Dec ()
221
+
222
+ if entry .refCount == 0 {
223
+ releaseOnce .Do (func () {
224
+ c .purge (fileID )
225
+ })
226
+ }
227
+ },
194
228
}
195
229
c .data [fileID ] = entry
230
+
196
231
c .currentOpenFiles .Inc ()
197
232
c .totalOpenedFiles .Inc ()
198
- hitLabel = "false"
199
233
}
200
234
235
+ entry .lock .Lock ()
236
+ defer entry .lock .Unlock ()
201
237
c .currentOpenFileReferences .Inc ()
202
238
c .totalOpenFileReferences .WithLabelValues (hitLabel ).Inc ()
203
239
entry .refCount ++
204
- return entry . value
240
+ return entry
205
241
}
206
242
207
- // release decrements the reference count for the given fileID, and frees the
208
- // backing data if there are no further references being held.
209
- //
210
- // release should only be called after a successful call to Acquire using the Release()
211
- // method on the returned *CloseFS.
212
- func (c * Cache ) release (fileID uuid.UUID ) {
243
+ // purge immediately removes an entry from the cache. It should be called
244
+ func (c * Cache ) purge (fileID uuid.UUID ) {
213
245
c .lock .Lock ()
214
246
defer c .lock .Unlock ()
215
247
216
248
entry , ok := c .data [fileID ]
217
249
if ! ok {
218
- // If we land here, it's almost certainly because a bug already happened,
219
- // and we're freeing something that's already been freed, or we're calling
220
- // this function with an incorrect ID. Should this function return an error?
221
- return
222
- }
223
-
224
- c .currentOpenFileReferences .Dec ()
225
- entry .refCount --
226
- if entry .refCount > 0 {
250
+ // If we land here, it's probably because of a fetch attempt that
251
+ // resulted in an error, and got purged already. It may also be an
252
+ // erroneous extra close, but we can't really distinguish between those
253
+ // two cases currently.
227
254
return
228
255
}
229
256
257
+ // Purge the file from the cache.
230
258
c .currentOpenFiles .Dec ()
231
-
232
259
ev , err := entry .value .Load ()
233
260
if err == nil {
234
261
c .currentCacheSize .Add (- 1 * float64 (ev .Size ))
@@ -246,11 +273,18 @@ func (c *Cache) Count() int {
246
273
return len (c .data )
247
274
}
248
275
249
- func fetch (ctx context.Context , store database.Store , fileID uuid.UUID ) (CacheEntryValue , error ) {
250
- // Make sure the read does not fail due to authorization issues.
251
- // Authz is checked on the Acquire call, so this is safe.
276
+ func fetch (store database.Store , fileID uuid.UUID ) (CacheEntryValue , error ) {
277
+ // Because many callers can be waiting on the same file fetch concurrently, we
278
+ // want to prevent any failures that would cause them all to receive errors
279
+ // because the caller who initiated the fetch would fail.
280
+ // - We always run the fetch with an uncancelable context, and then check
281
+ // context cancellation for each acquirer afterwards.
282
+ // - We always run the fetch as a system user, and then check authorization
283
+ // for each acquirer afterwards.
284
+ // This prevents a canceled context or an unauthorized user from "holding up
285
+ // the queue".
252
286
//nolint:gocritic
253
- file , err := store .GetFileByID (dbauthz .AsFileReader (ctx ), fileID )
287
+ file , err := store .GetFileByID (dbauthz .AsFileReader (context . Background () ), fileID )
254
288
if err != nil {
255
289
return CacheEntryValue {}, xerrors .Errorf ("failed to read file from database: %w" , err )
256
290
}
0 commit comments