From 2d38a7989ae2b7c58347cb88b8d23a66b6c559e8 Mon Sep 17 00:00:00 2001 From: Sergey Shinderuk Date: Wed, 31 Aug 2022 15:39:12 +0300 Subject: [PATCH] PGPRO-6599: Avoid race when accessing the request shared variable. Consider the following sequence of events: 1. Session 1 calls pg_wait_sampling_reset_profile and sets the request shared variable to PROFILE_RESET. 2. The collector reads request and saves PROFILE_RESET to a local variable. 3. Session 2 queries pg_wait_sampling_profile, which sets request to PROFILE_REQUEST and waits for the collector in shm_mq_receive. 4. The collector continues and clears shared request, thus dropping PROFILE_REQUEST from Session 2. 5. Session 2 waits indefinitely in shm_mq_receive. A similar example with query cancellation: 1. Session 1 queries pg_wait_sampling_history and sets request to HISTORY_REQUEST. 2. Session 1 cancels the query while waiting for the collector. 3. The collector reads request and saves HISTORY_REQUEST to a local variable. 4. Session 2 queries pg_wait_sampling_profile, sets request to PROFILE_REQUEST and waits for the collector. 5. The collector continues and responds to HISTORY_REQUEST. 6. Session 2 receives history data and renders them as profile data returning invalid counts. These interleavings are avoided by acquiring the collector lock before reading request from shared memory in the collector. But we also need to hold the collector lock when we set request in receive_array in a backend. Otherwise, the following interleaving is possible: 1. Session 1 calls pg_wait_sampling_reset_profile and sets request to PROFILE_RESET. 2. Session 2 queries pg_wait_sampling_profile, acquires and releases the collector lock. 3. The collector acquires the lock, reads request and saves PROFILE_RESET to a local variable. 4. Session 2 sets request to PROFILE_REQUEST. 5. The collector clears request, and PROFILE_REQUEST is lost. 6. Session 2 waits indefinitely in shm_mq_receive. Same for the second example above. This patch, however, doesn't prevent loosing PROFILE_RESET requests: 1. Session 1 calls pg_wait_sampling_reset_profile and sets request to PROFILE_RESET. 2. Session 2 queries pg_wait_sampling_profile before the collector reads request. 3. The collector reads PROFILE_REQUEST, while PROFILE_RESET is lost. To fix this, we could make pg_wait_sampling_reset_profile wait for the collector, but we decided not to, as loosing a PROFILE_RESET isn't critical. Resolves #48. Author: Roman Zharkov Reported-By: Alexander Lakhin Reviewed-By: Maksim Milyutin, Sergey Shinderuk --- collector.c | 3 ++- pg_wait_sampling.c | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/collector.c b/collector.c index 9bb8cfb..80a559a 100644 --- a/collector.c +++ b/collector.c @@ -441,11 +441,12 @@ collector_main(Datum main_arg) if (collector_hdr->request != NO_REQUEST) { LOCKTAG tag; - SHMRequest request = collector_hdr->request; + SHMRequest request; init_lock_tag(&tag, PGWS_COLLECTOR_LOCK); LockAcquire(&tag, ExclusiveLock, false, false); + request = collector_hdr->request; collector_hdr->request = NO_REQUEST; if (request == HISTORY_REQUEST || request == PROFILE_REQUEST) diff --git a/pg_wait_sampling.c b/pg_wait_sampling.c index 8c45f10..6877a12 100644 --- a/pg_wait_sampling.c +++ b/pg_wait_sampling.c @@ -594,13 +594,11 @@ receive_array(SHMRequest request, Size item_size, Size *count) init_lock_tag(&queueTag, PGWS_QUEUE_LOCK); LockAcquire(&queueTag, ExclusiveLock, false, false); - /* Ensure collector has processed previous request */ init_lock_tag(&collectorTag, PGWS_COLLECTOR_LOCK); LockAcquire(&collectorTag, ExclusiveLock, false, false); - LockRelease(&collectorTag, ExclusiveLock, false); - recv_mq = shm_mq_create(collector_mq, COLLECTOR_QUEUE_SIZE); collector_hdr->request = request; + LockRelease(&collectorTag, ExclusiveLock, false); if (!collector_hdr->latch) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), @@ -770,9 +768,9 @@ pg_wait_sampling_reset_profile(PG_FUNCTION_ARGS) init_lock_tag(&tagCollector, PGWS_COLLECTOR_LOCK); LockAcquire(&tagCollector, ExclusiveLock, false, false); + collector_hdr->request = PROFILE_RESET; LockRelease(&tagCollector, ExclusiveLock, false); - collector_hdr->request = PROFILE_RESET; SetLatch(collector_hdr->latch); LockRelease(&tag, ExclusiveLock, false);