diff --git a/README.md b/README.md
index f4bd9bd..fef370e 100644
--- a/README.md
+++ b/README.md
@@ -27,19 +27,27 @@ When `pg_wait_sampling` is enabled, it collects two kinds of statistics.
    recent samples depending on history size (configurable).  Assuming there is
    a client who periodically read this history and dump it somewhere, user
    can have continuous history.
- * Waits profile.  It's implemented as in-memory hash table where count
-   of samples are accumulated per each process and each wait event
-   (and each query with `pg_stat_statements`).  This hash
-   table can be reset by user request.  Assuming there is a client who
-   periodically dumps profile and resets it, user can have statistics of
-   intensivity of wait events among time.
-
-In combination with `pg_stat_statements` this extension can also provide
-per query statistics.
+ * Waits profile.  It's implemented as bounded in-memory hash table where counts
+   of samples are accumulated per triple of process pid, wait event and query id
+   (when its computing is enabled on PG server, on versions below 14 this
+   requires `pg_stat_statements` extension). The least used entries are evicted
+   when overflow of hash table is encountered. Hash table also can be reset by
+   user request. Assuming there is a client who periodically dumps profile and
+   computes differential counters from adjacent dumps, user can have statistics
+   of intensivity of wait events among time.
+
+Starting from PG14 this extension might activate computing of query id on server
+side to enable per query id statistics. The older PG versions require to install
+`pg_stat_statements` extension for this purpose.
 
 `pg_wait_sampling` launches special background worker for gathering the
 statistics above.
 
+The profile statistics as well as history items are not persisted to disk so
+server restart resets all already accummulated data. This is not crucial for
+profile counters because we are primarily interested in differential values, not
+absolute values of these counters.
+
 Availability
 ------------
 
@@ -125,24 +133,28 @@ in-memory hash table.
 The work of wait event statistics collector worker is controlled by following
 GUCs.
 
-|         Parameter name              | Data type |                  Description                | Default value |
-| ----------------------------------- | --------- | ------------------------------------------- | ------------: |
-| pg_wait_sampling.history_size       | int4      | Size of history in-memory ring buffer       |          5000 |
-| pg_wait_sampling.history_period     | int4      | Period for history sampling in milliseconds |            10 |
-| pg_wait_sampling.profile_period     | int4      | Period for profile sampling in milliseconds |            10 |
-| pg_wait_sampling.profile_pid        | bool      | Whether profile should be per pid           |          true |
-| pg_wait_sampling.profile_queries    | bool      | Whether profile should be per query			|          true |
+|         Parameter name               | Data type |                  Description                                                        | Default value | Change policy |
+| ------------------------------------ | --------- | ----------------------------------------------------------------------------------- | ------------- | ------------- |
+| pg_wait_sampling.max_profile_entries | int4      | Maximum number of entries in profile hash table                                     |          5000 |       restart |
+| pg_wait_sampling.history_size        | int4      | Size of history in-memory ring buffer                                               |          5000 | 		 restart |
+| pg_wait_sampling.profile_period      | int4      | Period for profile sampling in milliseconds (zero value disables profile gathering) |            10 |        reload |
+| pg_wait_sampling.history_period      | int4      | Period for history sampling in milliseconds (zero value disables history gathering) |             0 |		  reload |
+| pg_wait_sampling.profile_pid         | bool      | Whether profile should be per pid                                                   |          true |       restart |
+| pg_wait_sampling.profile_queries     | bool      | Whether profile should be per query			                                     |          true |       restart |
 
 If `pg_wait_sampling.profile_pid` is set to false, sampling profile wouldn't be
-collected in per-process manner.  In this case the value of pid could would
-be always zero and corresponding row contain samples among all the processes.
+collected in per-process manner.  In this case the value of pid will be NULL and
+corresponding rows contain samples among all the processes.
 
-While `pg_wait_sampling.profile_queries` is set to false `queryid` field in
-views will be zero.
+__Caution__:
+When sampling per pid is enabled, all profile entries for already completed
+processes are left in hash table. Therefore, it's neccessary to take into
+account periodic flushing of profile to prevent recycling of 32-bit pid values
+in profile hash table and as consequence possible increments to profile entries
+belonging to some old processes with the same pid values as for current ones.
 
-These GUCs are allowed to be changed by superuser.  Also, they are placed into
-shared memory.  Thus, they could be changed from any backend and affects worker
-runtime.
+While `pg_wait_sampling.profile_queries` is set to false `queryid` field in
+views will be NULL.
 
 See
 [PostgreSQL documentation](http://www.postgresql.org/docs/devel/static/monitoring-stats.html#WAIT-EVENT-TABLE)
diff --git a/collector.c b/collector.c
index dcb9695..af3337f 100644
--- a/collector.c
+++ b/collector.c
@@ -5,301 +5,221 @@
  * Copyright (c) 2015-2016, Postgres Professional
  *
  * IDENTIFICATION
- *	  contrib/pg_wait_sampling/pg_wait_sampling.c
+ *	  contrib/pg_wait_sampling/collector.c
  */
 #include "postgres.h"
 
-#include "catalog/pg_type.h"
 #if PG_VERSION_NUM >= 130000
 #include "common/hashfn.h"
 #endif
-#include "funcapi.h"
-#include "miscadmin.h"
+#include "pgstat.h"
 #include "postmaster/bgworker.h"
+#if PG_VERSION_NUM >= 130000
+#include "postmaster/interrupt.h"
+#endif
 #include "storage/ipc.h"
-#include "storage/procarray.h"
+#include "storage/proc.h"
 #include "storage/procsignal.h"
-#include "storage/shm_mq.h"
-#include "storage/shm_toc.h"
-#include "storage/spin.h"
-#include "utils/memutils.h"
-#include "utils/resowner.h"
-#include "pgstat.h"
+#include "utils/guc.h"
 
 #include "compat.h"
 #include "pg_wait_sampling.h"
 
+static const double USAGE_INIT = 1.0;
+static const double USAGE_INCREASE = 1.0;
+static const double USAGE_DECREASE_FACTOR = 0.99;
+static const int USAGE_DEALLOC_PERCENT = 5;
+static const int USAGE_DEALLOC_MIN_NUM = 10;
 static volatile sig_atomic_t shutdown_requested = false;
 
 static void handle_sigterm(SIGNAL_ARGS);
 
-/*
- * Register background worker for collecting waits history.
- */
-void
-pgws_register_wait_collector(void)
+static void
+handle_sigterm(SIGNAL_ARGS)
 {
-	BackgroundWorker worker;
-
-	/* Set up background worker parameters */
-	memset(&worker, 0, sizeof(worker));
-	worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
-	worker.bgw_start_time = BgWorkerStart_ConsistentState;
-	worker.bgw_restart_time = 1;
-	worker.bgw_notify_pid = 0;
-	snprintf(worker.bgw_library_name, BGW_MAXLEN, "pg_wait_sampling");
-	snprintf(worker.bgw_function_name, BGW_MAXLEN, CppAsString(pgws_collector_main));
-	snprintf(worker.bgw_name, BGW_MAXLEN, "pg_wait_sampling collector");
-	worker.bgw_main_arg = (Datum) 0;
-	RegisterBackgroundWorker(&worker);
+	int save_errno = errno;
+	shutdown_requested = true;
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+	errno = save_errno;
 }
 
 /*
- * Allocate memory for waits history.
+ * qsort comparator for sorting into increasing usage order
  */
-static void
-alloc_history(History *observations, int count)
+static int
+entry_cmp(const void *lhs, const void *rhs)
 {
-	observations->items = (HistoryItem *) palloc0(sizeof(HistoryItem) * count);
-	observations->index = 0;
-	observations->count = count;
-	observations->wraparound = false;
+	double l_usage = (*(ProfileHashEntry *const *) lhs)->usage;
+	double r_usage = (*(ProfileHashEntry *const *) rhs)->usage;
+
+	if (l_usage < r_usage)
+		return -1;
+	else if (l_usage > r_usage)
+		return +1;
+	else
+		return 0;
 }
 
 /*
- * Reallocate memory for changed number of history items.
+ * Deallocate least used entries in profile hashtable.
+ * Caller must hold an exclusive lock.
  */
 static void
-realloc_history(History *observations, int count)
+pgws_entry_dealloc()
 {
-	HistoryItem	   *newitems;
-	int				copyCount,
-					i,
-					j;
-
-	/* Allocate new array for history */
-	newitems = (HistoryItem *) palloc0(sizeof(HistoryItem) * count);
-
-	/* Copy entries from old array to the new */
-	if (observations->wraparound)
-		copyCount = observations->count;
-	else
-		copyCount = observations->index;
+	HASH_SEQ_STATUS hash_seq;
+	ProfileHashEntry **entries;
+	ProfileHashEntry  *entry;
+	int			nvictims;
+	int			i;
 
-	copyCount = Min(copyCount, count);
+	/*
+	 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
+	 * While we're scanning the table, apply the decay factor to the usage
+	 * values.
+	 */
+	entries = palloc(
+		hash_get_num_entries(pgws_profile_hash) * sizeof(ProfileHashEntry *)
+	);
 
 	i = 0;
-	if (observations->wraparound)
-		j = observations->index + 1;
-	else
-		j = 0;
-	while (i < copyCount)
+	hash_seq_init(&hash_seq, pgws_profile_hash);
+	while ((entry = hash_seq_search(&hash_seq)) != NULL)
 	{
-		if (j >= observations->count)
-			j = 0;
-		memcpy(&newitems[i], &observations->items[j], sizeof(HistoryItem));
-		i++;
-		j++;
+		entries[i++] = entry;
+		entry->usage *= USAGE_DECREASE_FACTOR;
 	}
 
-	/* Switch to new history array */
-	pfree(observations->items);
-	observations->items = newitems;
-	observations->index = copyCount;
-	observations->count = count;
-	observations->wraparound = false;
-}
-
-static void
-handle_sigterm(SIGNAL_ARGS)
-{
-	int save_errno = errno;
-	shutdown_requested = true;
-	if (MyProc)
-		SetLatch(&MyProc->procLatch);
-	errno = save_errno;
-}
+	qsort(entries, i, sizeof(ProfileHashEntry *), entry_cmp);
 
-/*
- * Get next item of history with rotation.
- */
-static HistoryItem *
-get_next_observation(History *observations)
-{
-	HistoryItem *result;
+	/*
+	 * We remove USAGE_DEALLOC_PERCENT number of entries or at least
+	 * USAGE_DEALLOC_MIN_NUM entries if full number of existing entries is not
+	 * less
+	 */
+	nvictims = Max(USAGE_DEALLOC_MIN_NUM, i * USAGE_DEALLOC_PERCENT / 100);
+	nvictims = Min(nvictims, i);
 
-	if (observations->index >= observations->count)
+	for (i = 0; i < nvictims; i++)
 	{
-		observations->index = 0;
-		observations->wraparound = true;
+		hash_search(pgws_profile_hash, &entries[i]->key, HASH_REMOVE, NULL);
 	}
-	result = &observations->items[observations->index];
-	observations->index++;
-	return result;
+
+	pfree(entries);
 }
 
 /*
- * Read current waits from backends and write them to history array
- * and/or profile hash.
+ * Read current waits from backends and write them to shared structures
  */
 static void
-probe_waits(History *observations, HTAB *profile_hash,
-			bool write_history, bool write_profile, bool profile_pid)
+probe_waits(const bool write_history, const bool write_profile)
 {
-	int			i,
-				newSize;
-	TimestampTz	ts = GetCurrentTimestamp();
-
-	/* Realloc waits history if needed */
-	newSize = pgws_collector_hdr->historySize;
-	if (observations->count != newSize)
-		realloc_history(observations, newSize);
+	if (write_profile)
+		LWLockAcquire(pgws_profile_lock, LW_EXCLUSIVE);
+	if (write_history)
+		LWLockAcquire(pgws_history_lock, LW_EXCLUSIVE);
 
-	/* Iterate PGPROCs under shared lock */
+	/*
+	 * Iterate PGPROCs under shared lock.
+	 *
+	 * TODO:
+	 * ProcArrayLock is heavy enough and in current case we might perform the
+	 * non-trivial deallocation routine for profile hash table under this lock.
+	 * Therefore to reduce possible contention it's worth to segregate the logic
+	 * of PGPROCs iteration under ProcArrayLock and storing results to profile
+	 * and/or history under corresponding another lock.
+	 */
 	LWLockAcquire(ProcArrayLock, LW_SHARED);
-	for (i = 0; i < ProcGlobal->allProcCount; i++)
+	for (int i = 0; i < ProcGlobal->allProcCount; i++)
 	{
-		HistoryItem		item,
-					   *observation;
-		PGPROC		   *proc = &ProcGlobal->allProcs[i];
+		PGPROC	   *proc = GetPGProcByNumber(i);
+		pgwsQueryId queryId = WhetherProfileQueryId ? pgws_proc_queryids[i] : 0;
+		int32 	 	wait_event_info = proc->wait_event_info,
+					pid = proc->pid;
 
-		if (proc->pid == 0)
+		/*
+		 * FIXME: zero pid actually doesn't indicate that process slot is freed.
+		 * After process termination this field becomes unchanged and thereby
+		 * stores the pid of previous process. The possible indicator of process
+		 * termination might be a condition `proc->procLatch->owner_pid == 0`.
+		 * But even in this case ProcArrayLock doesn't protect `owner_pid`
+		 * field from concurrent modifications that might cause race conditions.
+		 *
+		 * Abother option is to use the lists of freed PGPROCs from ProcGlocal:
+		 * freeProcs, walsenderFreeProcs, bgworkerFreeProcs and autovacFreeProcs
+		 * to define indexes of all freed slots in allProcs. But this requires
+		 * acquiring ProcStructLock spinlock that is impractical for iteration
+		 * over so long lists.
+		 *
+		 * The most appropriate solution here is to iterate over ProcArray items
+		 * under ProcArrayLock and over AuxiliaryProcs under ProcStructLock
+		 * spinlock (AuxiliaryProcs contains just NUM_AUXILIARY_PROCS=5 slots)
+		 * or without any locks as it's done in pg_stat_get_activity() function.
+		 * These arrays are not accessible externally and require to add some
+		 * iterator object into corresponding containing modules.
+		 */
+		if (pid == 0)
 			continue;
 
-		if (proc->wait_event_info == 0)
+		// TODO: take into account the state without waiting as CPU time
+		if (wait_event_info == 0)
 			continue;
 
-		/* Collect next wait event sample */
-		item.pid = proc->pid;
-		item.wait_event_info = proc->wait_event_info;
-
-		if (pgws_collector_hdr->profileQueries)
-			item.queryId = pgws_proc_queryids[i];
-		else
-			item.queryId = 0;
-
-		item.ts = ts;
-
 		/* Write to the history if needed */
 		if (write_history)
 		{
-			observation = get_next_observation(observations);
-			*observation = item;
+			int index = pgws_history_ring->index % HistoryBufferSize;
+
+			pgws_history_ring->items[index] = (HistoryItem) {
+				pid, wait_event_info, queryId, GetCurrentTimestamp()
+			};
+			pgws_history_ring->index++;
 		}
 
 		/* Write to the profile if needed */
 		if (write_profile)
 		{
-			ProfileItem	   *profileItem;
-			bool			found;
+			ProfileHashKey		key;
+			ProfileHashEntry   *entry;
 
-			if (!profile_pid)
-				item.pid = 0;
+			/* Set up key for hashtable search */
+			key.pid = WhetherProfilePid ? pid : 0;
+			key.wait_event_info = wait_event_info;
+			key.queryid = queryId;
 
-			profileItem = (ProfileItem *) hash_search(profile_hash, &item, HASH_ENTER, &found);
-			if (found)
-				profileItem->count++;
-			else
-				profileItem->count = 1;
-		}
-	}
-	LWLockRelease(ProcArrayLock);
-}
+			/* Lookup the hash table entry with exclusive lock */
+			entry = (ProfileHashEntry *)
+				hash_search(pgws_profile_hash, &key, HASH_FIND, NULL);
 
-/*
- * Send waits history to shared memory queue.
- */
-static void
-send_history(History *observations, shm_mq_handle *mqh)
-{
-	Size	count,
-			i;
-	shm_mq_result	mq_result;
-
-	if (observations->wraparound)
-		count = observations->count;
-	else
-		count = observations->index;
+			/* Create new entry, if not present */
+			if (!entry)
+			{
 
-	mq_result = shm_mq_send_compat(mqh, sizeof(count), &count, false, true);
-	if (mq_result == SHM_MQ_DETACHED)
-	{
-		ereport(WARNING,
-				(errmsg("pg_wait_sampling collector: "
-						"receiver of message queue has been detached")));
-		return;
-	}
-	for (i = 0; i < count; i++)
-	{
-		mq_result = shm_mq_send_compat(mqh,
-								sizeof(HistoryItem),
-								&observations->items[i],
-								false,
-								true);
-		if (mq_result == SHM_MQ_DETACHED)
-		{
-			ereport(WARNING,
-					(errmsg("pg_wait_sampling collector: "
-							"receiver of message queue has been detached")));
-			return;
-		}
-	}
-}
+				/* Make space if needed */
+				while (hash_get_num_entries(pgws_profile_hash) >= MaxProfileEntries)
+					pgws_entry_dealloc();
 
-/*
- * Send profile to shared memory queue.
- */
-static void
-send_profile(HTAB *profile_hash, shm_mq_handle *mqh)
-{
-	HASH_SEQ_STATUS	scan_status;
-	ProfileItem	   *item;
-	Size			count = hash_get_num_entries(profile_hash);
-	shm_mq_result	mq_result;
+				entry = (ProfileHashEntry *)
+					hash_search(pgws_profile_hash, &key, HASH_ENTER_NULL, NULL);
+				Assert(entry);
 
-	mq_result = shm_mq_send_compat(mqh, sizeof(count), &count, false, true);
-	if (mq_result == SHM_MQ_DETACHED)
-	{
-		ereport(WARNING,
-				(errmsg("pg_wait_sampling collector: "
-						"receiver of message queue has been detached")));
-		return;
-	}
-	hash_seq_init(&scan_status, profile_hash);
-	while ((item = (ProfileItem *) hash_seq_search(&scan_status)) != NULL)
-	{
-		mq_result = shm_mq_send_compat(mqh, sizeof(ProfileItem), item, false,
-									   true);
-		if (mq_result == SHM_MQ_DETACHED)
-		{
-			hash_seq_term(&scan_status);
-			ereport(WARNING,
-					(errmsg("pg_wait_sampling collector: "
-							"receiver of message queue has been detached")));
-			return;
+				entry->counter = 1;
+				entry->usage = USAGE_INIT;
+			}
+			else
+			{
+				entry->counter++;
+				entry->usage += USAGE_INCREASE;
+			}
 		}
 	}
-}
-
-/*
- * Make hash table for wait profile.
- */
-static HTAB *
-make_profile_hash()
-{
-	HASHCTL hash_ctl;
-
-	hash_ctl.hash = tag_hash;
-	hash_ctl.hcxt = TopMemoryContext;
-
-	if (pgws_collector_hdr->profileQueries)
-		hash_ctl.keysize = offsetof(ProfileItem, count);
-	else
-		hash_ctl.keysize = offsetof(ProfileItem, queryId);
+	LWLockRelease(ProcArrayLock);
 
-	hash_ctl.entrysize = sizeof(ProfileItem);
-	return hash_create("Waits profile hash", 1024, &hash_ctl,
-					   HASH_FUNCTION | HASH_ELEM);
+	if (write_history)
+		LWLockRelease(pgws_history_lock);
+	if (write_profile)
+		LWLockRelease(pgws_profile_lock);
 }
 
 /*
@@ -323,10 +243,6 @@ millisecs_diff(TimestampTz tz1, TimestampTz tz2)
 void
 pgws_collector_main(Datum main_arg)
 {
-	HTAB		   *profile_hash = NULL;
-	History			observations;
-	MemoryContext	old_context,
-					collector_context;
 	TimestampTz		current_ts,
 					history_ts,
 					profile_ts;
@@ -350,6 +266,13 @@ pgws_collector_main(Datum main_arg)
 	 */
 	pqsignal(SIGTERM, handle_sigterm);
 	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+	pqsignal(SIGHUP,
+#if PG_VERSION_NUM >= 130000
+			SignalHandlerForConfigReload
+#else
+			PostgresSigHupHandler
+#endif
+			);
 	BackgroundWorkerUnblockSignals();
 	InitPostgresCompat(NULL, InvalidOid, NULL, InvalidOid, false, false, NULL);
 	SetProcessingMode(NormalProcessing);
@@ -357,16 +280,6 @@ pgws_collector_main(Datum main_arg)
 	/* Make pg_wait_sampling recognisable in pg_stat_activity */
 	pgstat_report_appname("pg_wait_sampling collector");
 
-	profile_hash = make_profile_hash();
-	pgws_collector_hdr->latch = &MyProc->procLatch;
-
-	CurrentResourceOwner = ResourceOwnerCreate(NULL, "pg_wait_sampling collector");
-	collector_context = AllocSetContextCreate(TopMemoryContext,
-			"pg_wait_sampling context", ALLOCSET_DEFAULT_SIZES);
-	old_context = MemoryContextSwitchTo(collector_context);
-	alloc_history(&observations, pgws_collector_hdr->historySize);
-	MemoryContextSwitchTo(old_context);
-
 	ereport(LOG, (errmsg("pg_wait_sampling collector started")));
 
 	/* Start counting time for history and profile samples */
@@ -374,33 +287,45 @@ pgws_collector_main(Datum main_arg)
 
 	while (1)
 	{
-		int				rc;
-		shm_mq_handle  *mqh;
-		int64			history_diff,
-						profile_diff;
-		int				history_period,
-						profile_period;
-		bool			write_history,
-						write_profile;
+		int		rc;
+		int64	history_diff,
+				profile_diff;
+		bool	write_history,
+				write_profile;
+		int		history_timeout,
+				profile_timeout,
+				actual_timeout;
+
+		/* Clear any already-pending wakeups */
+		ResetLatch(MyLatch);
 
 		/* We need an explicit call for at least ProcSignal notifications. */
 		CHECK_FOR_INTERRUPTS();
 
-		/* Wait calculate time to next sample for history or profile */
-		current_ts = GetCurrentTimestamp();
+		/* Process any requests or signals received recently */
+		if (ConfigReloadPending)
+		{
+			ConfigReloadPending = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+
+		/* Shutdown if requested */
+		if (shutdown_requested)
+			break;
 
+		/* Calculate time for the next sample of history or profile */
+		current_ts = GetCurrentTimestamp();
 		history_diff = millisecs_diff(history_ts, current_ts);
 		profile_diff = millisecs_diff(profile_ts, current_ts);
-		history_period = pgws_collector_hdr->historyPeriod;
-		profile_period = pgws_collector_hdr->profilePeriod;
-
-		write_history = (history_diff >= (int64)history_period);
-		write_profile = (profile_diff >= (int64)profile_period);
 
+		/* Write profile or history */
+		write_history = HistoryPeriod &&
+			(history_diff >= (int64) HistoryPeriod);
+		write_profile = ProfilePeriod &&
+			(profile_diff >= (int64) ProfilePeriod);
 		if (write_history || write_profile)
 		{
-			probe_waits(&observations, profile_hash,
-						write_history, write_profile, pgws_collector_hdr->profilePid);
+			probe_waits(write_history, write_profile);
 
 			if (write_history)
 			{
@@ -415,87 +340,29 @@ pgws_collector_main(Datum main_arg)
 			}
 		}
 
-		/* Shutdown if requested */
-		if (shutdown_requested)
-			break;
-
-		/*
-		 * Wait until next sample time or request to do something through
-		 * shared memory.
-		 */
-#if PG_VERSION_NUM >= 100000
-		rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
-				Min(history_period - (int)history_diff,
-					profile_period - (int)profile_diff), PG_WAIT_EXTENSION);
-#else
-		rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
-				Min(history_period - (int)history_diff,
-					profile_period - (int)profile_diff));
-#endif
+		/* Wait until next sample time */
+		history_timeout = HistoryPeriod >= (int) history_diff ?
+			HistoryPeriod - (int) history_diff : 0;
+		profile_timeout = ProfilePeriod >= (int) profile_diff ?
+			ProfilePeriod - (int) profile_diff : 0;
+
+		actual_timeout = 0;
+		if (ProfilePeriod && !HistoryPeriod)
+			actual_timeout = profile_timeout;
+		else if (HistoryPeriod && !ProfilePeriod)
+			actual_timeout = history_timeout;
+		else if (HistoryPeriod && ProfilePeriod)
+			actual_timeout = Min(history_timeout, profile_timeout);
+
+		rc = WaitLatchCompat(MyLatch,
+				WL_LATCH_SET | WL_POSTMASTER_DEATH |
+					(HistoryPeriod || ProfilePeriod ? WL_TIMEOUT : 0),
+				actual_timeout, PG_WAIT_EXTENSION);
 
 		if (rc & WL_POSTMASTER_DEATH)
 			proc_exit(1);
-
-		ResetLatch(&MyProc->procLatch);
-
-		/* Handle request if any */
-		if (pgws_collector_hdr->request != NO_REQUEST)
-		{
-			LOCKTAG		tag;
-			SHMRequest	request;
-
-			pgws_init_lock_tag(&tag, PGWS_COLLECTOR_LOCK);
-
-			LockAcquire(&tag, ExclusiveLock, false, false);
-			request = pgws_collector_hdr->request;
-			pgws_collector_hdr->request = NO_REQUEST;
-
-			if (request == HISTORY_REQUEST || request == PROFILE_REQUEST)
-			{
-				shm_mq_result	mq_result;
-
-				/* Send history or profile */
-				shm_mq_set_sender(pgws_collector_mq, MyProc);
-				mqh = shm_mq_attach(pgws_collector_mq, NULL, NULL);
-				mq_result = shm_mq_wait_for_attach(mqh);
-				switch (mq_result)
-				{
-					case SHM_MQ_SUCCESS:
-						switch (request)
-						{
-							case HISTORY_REQUEST:
-								send_history(&observations, mqh);
-								break;
-							case PROFILE_REQUEST:
-								send_profile(profile_hash, mqh);
-								break;
-							default:
-								Assert(false);
-						}
-						break;
-					case SHM_MQ_DETACHED:
-						ereport(WARNING,
-								(errmsg("pg_wait_sampling collector: "
-										"receiver of message queue have been "
-										"detached")));
-						break;
-					default:
-						Assert(false);
-				}
-				shm_mq_detach_compat(mqh, pgws_collector_mq);
-			}
-			else if (request == PROFILE_RESET)
-			{
-				/* Reset profile hash */
-				hash_destroy(profile_hash);
-				profile_hash = make_profile_hash();
-			}
-			LockRelease(&tag, ExclusiveLock, false);
-		}
 	}
 
-	MemoryContextReset(collector_context);
-
 	/*
 	 * We're done.  Explicitly detach the shared memory segment so that we
 	 * don't get a resource leak warning at commit time.  This will fire any
diff --git a/compat.h b/compat.h
index 32874f7..76aa874 100644
--- a/compat.h
+++ b/compat.h
@@ -14,8 +14,13 @@
 
 #include "access/tupdesc.h"
 #include "miscadmin.h"
-#include "storage/shm_mq.h"
-#include "utils/guc_tables.h"
+#include "storage/latch.h"
+
+#if PG_VERSION_NUM >= 110000
+typedef uint64 pgwsQueryId;
+#else
+typedef uint32 pgwsQueryId;
+#endif
 
 static inline TupleDesc
 CreateTemplateTupleDescCompat(int nattrs, bool hasoid)
@@ -27,27 +32,6 @@ CreateTemplateTupleDescCompat(int nattrs, bool hasoid)
 #endif
 }
 
-static inline void
-shm_mq_detach_compat(shm_mq_handle *mqh, shm_mq *mq)
-{
-#if PG_VERSION_NUM >= 100000
-	shm_mq_detach(mqh);
-#else
-	shm_mq_detach(mq);
-#endif
-}
-
-static inline shm_mq_result
-shm_mq_send_compat(shm_mq_handle *mqh, Size nbytes, const void *data,
-				   bool nowait, bool force_flush)
-{
-#if PG_VERSION_NUM >= 150000
-	return shm_mq_send(mqh, nbytes, data, nowait, force_flush);
-#else
-	return shm_mq_send(mqh, nbytes, data, nowait);
-#endif
-}
-
 static inline void
 InitPostgresCompat(const char *in_dbname, Oid dboid,
 				   const char *username, Oid useroid,
@@ -66,18 +50,20 @@ InitPostgresCompat(const char *in_dbname, Oid dboid,
 #endif
 }
 
-static inline void
-get_guc_variables_compat(struct config_generic ***vars, int *num_vars)
+static inline int
+WaitLatchCompat(Latch *latch, int wakeEvents, long timeout,
+				uint32 wait_event_info)
 {
-	Assert(vars != NULL);
-	Assert(num_vars != NULL);
-
-#if PG_VERSION_NUM >= 160000
-	*vars = get_guc_variables(num_vars);
+#if PG_VERSION_NUM >= 100000
+	return WaitLatch(latch, wakeEvents, timeout, wait_event_info);
 #else
-	*vars = get_guc_variables();
-	*num_vars = GetNumConfigOptions();
+#define PG_WAIT_EXTENSION -1
+	return WaitLatch(latch, wakeEvents, timeout);
 #endif
 }
 
+#if PG_VERSION_NUM < 100000
+#define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)])
+#endif
+
 #endif
diff --git a/pg_wait_sampling.c b/pg_wait_sampling.c
index eaa0327..8c7e781 100644
--- a/pg_wait_sampling.c
+++ b/pg_wait_sampling.c
@@ -10,27 +10,22 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
-#include "access/twophase.h"
 #include "catalog/pg_type.h"
-#include "fmgr.h"
 #include "funcapi.h"
-#include "miscadmin.h"
 #include "optimizer/planner.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "postmaster/bgworker.h"
 #if PG_VERSION_NUM >= 120000
 #include "replication/walsender.h"
 #endif
 #include "storage/ipc.h"
-#include "storage/pg_shmem.h"
-#include "storage/procarray.h"
-#include "storage/shm_mq.h"
-#include "storage/shm_toc.h"
-#include "storage/spin.h"
+#include "storage/proc.h"
 #include "utils/builtins.h"
-#include "utils/datetime.h"
-#include "utils/guc_tables.h"
 #include "utils/guc.h"
+#if PG_VERSION_NUM >= 140000
+#include "utils/queryjumble.h"
+#endif
 #include "utils/memutils.h" /* TopMemoryContext.  Actually for PG 9.6 only,
 							 * but there should be no harm for others. */
 
@@ -39,29 +34,28 @@
 
 PG_MODULE_MAGIC;
 
-void		_PG_init(void);
-
+/* Marker whether extension is setup in shared mode */
 static bool shmem_initialized = false;
 
+/* Global settings */
+int MaxProfileEntries = 5000;
+int HistoryBufferSize = 5000;
+int HistoryPeriod = 0;
+int ProfilePeriod = 10;
+bool WhetherProfilePid = true;
+bool WhetherProfileQueryId = true;
+
+/* Function declarations */
+void _PG_init(void);
+// TODO: add void _PG_fini(void);
+
 /* Hooks */
 static ExecutorEnd_hook_type	prev_ExecutorEnd = NULL;
 static planner_hook_type		planner_hook_next = NULL;
-
-/* Pointers to shared memory objects */
-shm_mq				   *pgws_collector_mq = NULL;
-uint64				   *pgws_proc_queryids = NULL;
-CollectorShmqHeader	   *pgws_collector_hdr = NULL;
-
-/* Receiver (backend) local shm_mq pointers and lock */
-static shm_mq *recv_mq = NULL;
-static shm_mq_handle *recv_mqh = NULL;
-static LOCKTAG queueTag;
-
 #if PG_VERSION_NUM >= 150000
-static shmem_request_hook_type prev_shmem_request_hook = NULL;
+static shmem_request_hook_type 	prev_shmem_request_hook = NULL;
 #endif
-static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
-static PGPROC * search_proc(int backendPid);
+static shmem_startup_hook_type	prev_shmem_startup_hook = NULL;
 static PlannedStmt *pgws_planner_hook(Query *parse,
 #if PG_VERSION_NUM >= 130000
 		const char *query_string,
@@ -69,6 +63,13 @@ static PlannedStmt *pgws_planner_hook(Query *parse,
 		int cursorOptions, ParamListInfo boundParams);
 static void pgws_ExecutorEnd(QueryDesc *queryDesc);
 
+/* Pointers to shared memory objects */
+pgwsQueryId *pgws_proc_queryids = NULL;
+HTAB		*pgws_profile_hash = NULL;
+LWLock		*pgws_profile_lock = NULL;
+History		*pgws_history_ring = NULL;
+LWLock		*pgws_history_lock = NULL;
+
 /*
  * Calculate max processes count.
  *
@@ -135,149 +136,63 @@ get_max_procs_count(void)
 static Size
 pgws_shmem_size(void)
 {
-	shm_toc_estimator	e;
-	Size				size;
-	int					nkeys;
-
-	shm_toc_initialize_estimator(&e);
+	Size size = 0;
 
-	nkeys = 3;
-
-	shm_toc_estimate_chunk(&e, sizeof(CollectorShmqHeader));
-	shm_toc_estimate_chunk(&e, (Size) COLLECTOR_QUEUE_SIZE);
-	shm_toc_estimate_chunk(&e, sizeof(uint64) * get_max_procs_count());
-
-	shm_toc_estimate_keys(&e, nkeys);
-	size = shm_toc_estimate(&e);
+	size = add_size(size, sizeof(pgwsQueryId) * get_max_procs_count());
+	size = add_size(size, hash_estimate_size(MaxProfileEntries,
+											 sizeof(ProfileHashEntry)));
+	size = add_size(size,
+					sizeof(History) + sizeof(HistoryItem) * HistoryBufferSize);
 
 	return size;
 }
 
-static bool
-shmem_int_guc_check_hook(int *newval, void **extra, GucSource source)
-{
-	if (UsedShmemSegAddr == NULL)
-		return false;
-	return true;
-}
-
-static bool
-shmem_bool_guc_check_hook(bool *newval, void **extra, GucSource source)
+static void
+pgwsEnableQueryId(bool newval, void *extra)
 {
-	if (UsedShmemSegAddr == NULL)
-		return false;
-	return true;
+#if PG_VERSION_NUM >= 140000
+	if (newval)
+		EnableQueryId();
+#endif
 }
 
-/*
- * This union allows us to mix the numerous different types of structs
- * that we are organizing.
- */
-typedef union
-{
-	struct config_generic generic;
-	struct config_bool _bool;
-	struct config_real real;
-	struct config_int integer;
-	struct config_string string;
-	struct config_enum _enum;
-} mixedStruct;
-
 /*
  * Setup new GUCs or modify existsing.
  */
 static void
 setup_gucs()
 {
-	struct config_generic **guc_vars;
-	int			numOpts,
-				i;
-	bool		history_size_found = false,
-				history_period_found = false,
-				profile_period_found = false,
-				profile_pid_found = false,
-				profile_queries_found = false;
-
-	get_guc_variables_compat(&guc_vars, &numOpts);
-
-	for (i = 0; i < numOpts; i++)
-	{
-		mixedStruct *var = (mixedStruct *) guc_vars[i];
-		const char *name = var->generic.name;
-
-		if (var->generic.flags & GUC_CUSTOM_PLACEHOLDER)
-			continue;
-
-		if (!strcmp(name, "pg_wait_sampling.history_size"))
-		{
-			history_size_found = true;
-			var->integer.variable = &pgws_collector_hdr->historySize;
-			pgws_collector_hdr->historySize = 5000;
-		}
-		else if (!strcmp(name, "pg_wait_sampling.history_period"))
-		{
-			history_period_found = true;
-			var->integer.variable = &pgws_collector_hdr->historyPeriod;
-			pgws_collector_hdr->historyPeriod = 10;
-		}
-		else if (!strcmp(name, "pg_wait_sampling.profile_period"))
-		{
-			profile_period_found = true;
-			var->integer.variable = &pgws_collector_hdr->profilePeriod;
-			pgws_collector_hdr->profilePeriod = 10;
-		}
-		else if (!strcmp(name, "pg_wait_sampling.profile_pid"))
-		{
-			profile_pid_found = true;
-			var->_bool.variable = &pgws_collector_hdr->profilePid;
-			pgws_collector_hdr->profilePid = true;
-		}
-		else if (!strcmp(name, "pg_wait_sampling.profile_queries"))
-		{
-			profile_queries_found = true;
-			var->_bool.variable = &pgws_collector_hdr->profileQueries;
-			pgws_collector_hdr->profileQueries = true;
-		}
-	}
-
-	if (!history_size_found)
-		DefineCustomIntVariable("pg_wait_sampling.history_size",
-				"Sets size of waits history.", NULL,
-				&pgws_collector_hdr->historySize, 5000, 100, INT_MAX,
-				PGC_SUSET, 0, shmem_int_guc_check_hook, NULL, NULL);
-
-	if (!history_period_found)
-		DefineCustomIntVariable("pg_wait_sampling.history_period",
-				"Sets period of waits history sampling.", NULL,
-				&pgws_collector_hdr->historyPeriod, 10, 1, INT_MAX,
-				PGC_SUSET, 0, shmem_int_guc_check_hook, NULL, NULL);
-
-	if (!profile_period_found)
-		DefineCustomIntVariable("pg_wait_sampling.profile_period",
-				"Sets period of waits profile sampling.", NULL,
-				&pgws_collector_hdr->profilePeriod, 10, 1, INT_MAX,
-				PGC_SUSET, 0, shmem_int_guc_check_hook, NULL, NULL);
-
-	if (!profile_pid_found)
-		DefineCustomBoolVariable("pg_wait_sampling.profile_pid",
-				"Sets whether profile should be collected per pid.", NULL,
-				&pgws_collector_hdr->profilePid, true,
-				PGC_SUSET, 0, shmem_bool_guc_check_hook, NULL, NULL);
-
-	if (!profile_queries_found)
-		DefineCustomBoolVariable("pg_wait_sampling.profile_queries",
-				"Sets whether profile should be collected per query.", NULL,
-				&pgws_collector_hdr->profileQueries, true,
-				PGC_SUSET, 0, shmem_bool_guc_check_hook, NULL, NULL);
-
-	if (history_size_found
-		|| history_period_found
-		|| profile_period_found
-		|| profile_pid_found
-		|| profile_queries_found)
-	{
-		ProcessConfigFile(PGC_SIGHUP);
-	}
+	DefineCustomIntVariable("pg_wait_sampling.max_profile_entries",
+			"Sets maximum number of entries in bounded profile table.", NULL,
+			&MaxProfileEntries, 5000, 100, INT_MAX,
+			PGC_POSTMASTER, 0, NULL, NULL, NULL);
+
+	DefineCustomIntVariable("pg_wait_sampling.history_size",
+			"Sets size for ring buffer for waits history in bytes.", NULL,
+			&HistoryBufferSize, 5000, 100, INT_MAX,
+			PGC_POSTMASTER, 0, NULL, NULL, NULL);
+
+	DefineCustomIntVariable("pg_wait_sampling.history_period",
+			"Sets period of waits history sampling in milliseconds.",
+			"0 disables history populating.",
+			&HistoryPeriod, 0, 0, INT_MAX,
+			PGC_SIGHUP, 0, NULL, NULL, NULL);
+
+	DefineCustomIntVariable("pg_wait_sampling.profile_period",
+			"Sets period of waits profile sampling in milliseconds.",
+			"0 disables profiling.",
+			&ProfilePeriod, 10, 0, INT_MAX,
+			PGC_SIGHUP, 0, NULL, NULL, NULL);
+
+	DefineCustomBoolVariable("pg_wait_sampling.profile_pid",
+			"Sets whether profile should be collected per pid.", NULL,
+			&WhetherProfilePid, true,
+			PGC_POSTMASTER, 0, NULL, NULL, NULL);
+
+	DefineCustomBoolVariable("pg_wait_sampling.profile_queries",
+			"Sets whether profile should be collected per query.", NULL,
+			&WhetherProfileQueryId, true,
+			PGC_POSTMASTER, 0, NULL, pgwsEnableQueryId, NULL);
 }
 
 #if PG_VERSION_NUM >= 150000
@@ -294,6 +209,7 @@ pgws_shmem_request(void)
 		prev_shmem_request_hook();
 
 	RequestAddinShmemSpace(pgws_shmem_size());
+	RequestNamedLWLockTranche("pg_wait_sampling", 2);
 }
 #endif
 
@@ -303,48 +219,45 @@ pgws_shmem_request(void)
 static void
 pgws_shmem_startup(void)
 {
-	bool		found;
-	Size		segsize = pgws_shmem_size();
-	void	   *pgws;
-	shm_toc	   *toc;
+	bool	found;
+	HASHCTL	info;
+
+	if (prev_shmem_startup_hook)
+		prev_shmem_startup_hook();
 
-	pgws = ShmemInitStruct("pg_wait_sampling", segsize, &found);
+	/* Create or attach to the shared memory state */
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
 
+	pgws_proc_queryids = ShmemInitStruct(
+			"pg_wait_sampling queryids",
+			sizeof(pgwsQueryId) * get_max_procs_count(),
+			&found);
+	MemSet(pgws_proc_queryids, 0, sizeof(pgwsQueryId) * get_max_procs_count());
 	if (!found)
 	{
-		toc = shm_toc_create(PG_WAIT_SAMPLING_MAGIC, pgws, segsize);
-
-		pgws_collector_hdr = shm_toc_allocate(toc, sizeof(CollectorShmqHeader));
-		shm_toc_insert(toc, 0, pgws_collector_hdr);
-		pgws_collector_mq = shm_toc_allocate(toc, COLLECTOR_QUEUE_SIZE);
-		shm_toc_insert(toc, 1, pgws_collector_mq);
-		pgws_proc_queryids = shm_toc_allocate(toc,
-									sizeof(uint64) * get_max_procs_count());
-		shm_toc_insert(toc, 2, pgws_proc_queryids);
-		MemSet(pgws_proc_queryids, 0, sizeof(uint64) * get_max_procs_count());
-
-		/* Initialize GUC variables in shared memory */
-		setup_gucs();
-	}
-	else
-	{
-		toc = shm_toc_attach(PG_WAIT_SAMPLING_MAGIC, pgws);
+		/* First time through ... */
+		LWLockPadded *locks = GetNamedLWLockTranche("pg_wait_sampling");
 
-#if PG_VERSION_NUM >= 100000
-		pgws_collector_hdr = shm_toc_lookup(toc, 0, false);
-		pgws_collector_mq = shm_toc_lookup(toc, 1, false);
-		pgws_proc_queryids = shm_toc_lookup(toc, 2, false);
-#else
-		pgws_collector_hdr = shm_toc_lookup(toc, 0);
-		pgws_collector_mq = shm_toc_lookup(toc, 1);
-		pgws_proc_queryids = shm_toc_lookup(toc, 2);
-#endif
+		pgws_profile_lock = &(locks[0]).lock;
+		pgws_history_lock = &(locks[1]).lock;
 	}
 
-	shmem_initialized = true;
+	pgws_history_ring = ShmemInitStruct(
+			"pg_wait_sampling history ring",
+			sizeof(History) + sizeof(HistoryItem) * HistoryBufferSize,
+			&found);
+	pgws_history_ring->index = 0;
 
-	if (prev_shmem_startup_hook)
-		prev_shmem_startup_hook();
+	memset(&info, 0, sizeof(info));
+	info.keysize = sizeof(ProfileHashKey);
+	info.entrysize = sizeof(ProfileHashEntry);
+	pgws_profile_hash = ShmemInitHash("pg_wait_sampling hash",
+									  MaxProfileEntries, MaxProfileEntries,
+									  &info, HASH_ELEM | HASH_BLOBS);
+
+	LWLockRelease(AddinShmemInitLock);
+
+	shmem_initialized = true;
 }
 
 /*
@@ -360,14 +273,28 @@ check_shmem(void)
 	}
 }
 
+/*
+ * Register background worker for collecting waits history.
+ */
 static void
-pgws_cleanup_callback(int code, Datum arg)
+pgws_register_wait_collector(void)
 {
-	elog(DEBUG3, "pg_wait_sampling cleanup: detaching shm_mq and releasing queue lock");
-	shm_mq_detach_compat(recv_mqh, recv_mq);
-	LockRelease(&queueTag, ExclusiveLock, false);
+	BackgroundWorker worker;
+
+	/* Set up background worker parameters */
+	memset(&worker, 0, sizeof(worker));
+	worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
+	worker.bgw_start_time = BgWorkerStart_ConsistentState;
+	worker.bgw_restart_time = 1;
+	worker.bgw_notify_pid = 0;
+	snprintf(worker.bgw_library_name, BGW_MAXLEN, "pg_wait_sampling");
+	snprintf(worker.bgw_function_name, BGW_MAXLEN, CppAsString(pgws_collector_main));
+	snprintf(worker.bgw_name, BGW_MAXLEN, "pg_wait_sampling collector");
+	worker.bgw_main_arg = (Datum) 0;
+	RegisterBackgroundWorker(&worker);
 }
 
+
 /*
  * Module load callback
  */
@@ -377,6 +304,8 @@ _PG_init(void)
 	if (!process_shared_preload_libraries_in_progress)
 		return;
 
+	setup_gucs();
+
 #if PG_VERSION_NUM < 150000
 	/*
 	 * Request additional shared resources.  (These are no-ops if we're not in
@@ -387,6 +316,7 @@ _PG_init(void)
 	 * in pgsp_shmem_request() for pg15 and later.
 	 */
 	RequestAddinShmemSpace(pgws_shmem_size());
+	RequestNamedLWLockTranche("pg_wait_sampling", 2);
 #endif
 
 	pgws_register_wait_collector();
@@ -555,111 +485,13 @@ pg_wait_sampling_get_current(PG_FUNCTION_ARGS)
 	}
 }
 
-typedef struct
-{
-	Size			count;
-	ProfileItem	   *items;
-} Profile;
-
-void
-pgws_init_lock_tag(LOCKTAG *tag, uint32 lock)
-{
-	tag->locktag_field1 = PG_WAIT_SAMPLING_MAGIC;
-	tag->locktag_field2 = lock;
-	tag->locktag_field3 = 0;
-	tag->locktag_field4 = 0;
-	tag->locktag_type = LOCKTAG_USERLOCK;
-	tag->locktag_lockmethodid = USER_LOCKMETHOD;
-}
-
-static void *
-receive_array(SHMRequest request, Size item_size, Size *count)
-{
-	LOCKTAG			collectorTag;
-	shm_mq_result	res;
-	Size			len,
-					i;
-	void		   *data;
-	Pointer			result,
-					ptr;
-	MemoryContext	oldctx;
-
-	/* Ensure nobody else trying to send request to queue */
-	pgws_init_lock_tag(&queueTag, PGWS_QUEUE_LOCK);
-	LockAcquire(&queueTag, ExclusiveLock, false, false);
-
-	pgws_init_lock_tag(&collectorTag, PGWS_COLLECTOR_LOCK);
-	LockAcquire(&collectorTag, ExclusiveLock, false, false);
-	recv_mq = shm_mq_create(pgws_collector_mq, COLLECTOR_QUEUE_SIZE);
-	pgws_collector_hdr->request = request;
-	LockRelease(&collectorTag, ExclusiveLock, false);
-
-	if (!pgws_collector_hdr->latch)
-		ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
-						errmsg("pg_wait_sampling collector wasn't started")));
-
-	SetLatch(pgws_collector_hdr->latch);
-
-	shm_mq_set_receiver(recv_mq, MyProc);
-
-	/*
-	 * We switch to TopMemoryContext, so that recv_mqh is allocated there
-	 * and is guaranteed to survive until before_shmem_exit callbacks are
-	 * fired.  Anyway, shm_mq_detach() will free handler on its own.
-	 *
-	 * NB: we do not pass `seg` to shm_mq_attach(), so it won't set its own
-	 * callback, i.e. we do not interfere here with shm_mq_detach_callback().
-	 */
-	oldctx = MemoryContextSwitchTo(TopMemoryContext);
-	recv_mqh = shm_mq_attach(recv_mq, NULL, NULL);
-	MemoryContextSwitchTo(oldctx);
-
-	/*
-	 * Now we surely attached to the shm_mq and got collector's attention.
-	 * If anything went wrong (e.g. Ctrl+C received from the client) we have
-	 * to cleanup some things, i.e. detach from the shm_mq, so collector was
-	 * able to continue responding to other requests.
-	 *
-	 * PG_ENSURE_ERROR_CLEANUP() guaranties that cleanup callback will be
-	 * fired for both ERROR and FATAL.
-	 */
-	PG_ENSURE_ERROR_CLEANUP(pgws_cleanup_callback, 0);
-	{
-		res = shm_mq_receive(recv_mqh, &len, &data, false);
-		if (res != SHM_MQ_SUCCESS || len != sizeof(*count))
-			elog(ERROR, "error reading mq");
-
-		memcpy(count, data, sizeof(*count));
-
-		result = palloc(item_size * (*count));
-		ptr = result;
-
-		for (i = 0; i < *count; i++)
-		{
-			res = shm_mq_receive(recv_mqh, &len, &data, false);
-			if (res != SHM_MQ_SUCCESS || len != item_size)
-				elog(ERROR, "error reading mq");
-
-			memcpy(ptr, data, item_size);
-			ptr += item_size;
-		}
-	}
-	PG_END_ENSURE_ERROR_CLEANUP(pgws_cleanup_callback, 0);
-
-	/* We still have to detach and release lock during normal operation. */
-	shm_mq_detach_compat(recv_mqh, recv_mq);
-	LockRelease(&queueTag, ExclusiveLock, false);
-
-	return result;
-}
-
 
 PG_FUNCTION_INFO_V1(pg_wait_sampling_get_profile);
 Datum
 pg_wait_sampling_get_profile(PG_FUNCTION_ARGS)
 {
-	Profile			   *profile;
-	FuncCallContext	   *funcctx;
+	ProfileHashEntry	*profile;
+	FuncCallContext		*funcctx;
 
 	check_shmem();
 
@@ -667,17 +499,31 @@ pg_wait_sampling_get_profile(PG_FUNCTION_ARGS)
 	{
 		MemoryContext		oldcontext;
 		TupleDesc			tupdesc;
+		HASH_SEQ_STATUS		hash_seq;
+		ProfileHashEntry   *entry;
+		int					profile_count,
+							entry_index;
 
 		funcctx = SRF_FIRSTCALL_INIT();
 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
 
-		/* Receive profile from shmq */
-		profile = (Profile *) palloc0(sizeof(Profile));
-		profile->items = (ProfileItem *) receive_array(PROFILE_REQUEST,
-										sizeof(ProfileItem), &profile->count);
+		/* Extract profile from shared memory */
+		profile_count = hash_get_num_entries(pgws_profile_hash);
+		profile = (ProfileHashEntry *)
+			palloc(sizeof(ProfileHashEntry) * profile_count);
 
+		entry_index = 0;
+		LWLockAcquire(pgws_profile_lock, LW_SHARED);
+		hash_seq_init(&hash_seq, pgws_profile_hash);
+		while ((entry = hash_seq_search(&hash_seq)) != NULL)
+		{
+			profile[entry_index++] = *entry;
+		}
+		LWLockRelease(pgws_profile_lock);
+
+		/* Build result rows */
 		funcctx->user_fctx = profile;
-		funcctx->max_calls = profile->count;
+		funcctx->max_calls = profile_count;
 
 		/* Make tuple descriptor */
 		tupdesc = CreateTemplateTupleDescCompat(5, false);
@@ -699,7 +545,7 @@ pg_wait_sampling_get_profile(PG_FUNCTION_ARGS)
 	/* stuff done on every call of the function */
 	funcctx = SRF_PERCALL_SETUP();
 
-	profile = (Profile *) funcctx->user_fctx;
+	profile = (ProfileHashEntry *) funcctx->user_fctx;
 
 	if (funcctx->call_cntr < funcctx->max_calls)
 	{
@@ -707,19 +553,22 @@ pg_wait_sampling_get_profile(PG_FUNCTION_ARGS)
 		Datum		values[5];
 		bool		nulls[5];
 		HeapTuple	tuple;
-		ProfileItem *item;
+		ProfileHashEntry *item;
 		const char *event_type,
 				   *event;
 
-		item = &profile->items[funcctx->call_cntr];
+		item = &profile[funcctx->call_cntr];
 
 		MemSet(values, 0, sizeof(values));
 		MemSet(nulls, 0, sizeof(nulls));
 
 		/* Make and return next tuple to caller */
-		event_type = pgstat_get_wait_event_type(item->wait_event_info);
-		event = pgstat_get_wait_event(item->wait_event_info);
-		values[0] = Int32GetDatum(item->pid);
+		event_type = pgstat_get_wait_event_type(item->key.wait_event_info);
+		event = pgstat_get_wait_event(item->key.wait_event_info);
+		if (WhetherProfilePid)
+			values[0] = Int32GetDatum(item->key.pid);
+		else
+			nulls[0] = true;
 		if (event_type)
 			values[1] = PointerGetDatum(cstring_to_text(event_type));
 		else
@@ -729,12 +578,12 @@ pg_wait_sampling_get_profile(PG_FUNCTION_ARGS)
 		else
 			nulls[2] = true;
 
-		if (pgws_collector_hdr->profileQueries)
-			values[3] = UInt64GetDatum(item->queryId);
+		if (WhetherProfileQueryId)
+			values[3] = UInt64GetDatum(item->key.queryid);
 		else
-			values[3] = (Datum) 0;
+			nulls[3] = true;
 
-		values[4] = UInt64GetDatum(item->count);
+		values[4] = UInt64GetDatum(item->counter);
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 
@@ -751,22 +600,29 @@ PG_FUNCTION_INFO_V1(pg_wait_sampling_reset_profile);
 Datum
 pg_wait_sampling_reset_profile(PG_FUNCTION_ARGS)
 {
-	LOCKTAG		collectorTag;
+	HASH_SEQ_STATUS		hash_seq;
+	ProfileHashEntry   *entry;
 
 	check_shmem();
 
-	pgws_init_lock_tag(&queueTag, PGWS_QUEUE_LOCK);
-
-	LockAcquire(&queueTag, ExclusiveLock, false, false);
+	LWLockAcquire(pgws_profile_lock, LW_EXCLUSIVE);
 
-	pgws_init_lock_tag(&collectorTag, PGWS_COLLECTOR_LOCK);
-	LockAcquire(&collectorTag, ExclusiveLock, false, false);
-	pgws_collector_hdr->request = PROFILE_RESET;
-	LockRelease(&collectorTag, ExclusiveLock, false);
+	/* Remove all profile entries. */
+	hash_seq_init(&hash_seq, pgws_profile_hash);
+	while ((entry = hash_seq_search(&hash_seq)) != NULL)
+	{
+		hash_search(pgws_profile_hash, &entry->key, HASH_REMOVE, NULL);
+	}
 
-	SetLatch(pgws_collector_hdr->latch);
+	LWLockRelease(pgws_profile_lock);
 
-	LockRelease(&queueTag, ExclusiveLock, false);
+	/*
+	 * TODO: consider saving of the time of statistics reset to more easly
+	 * compute the differential counters. It might look as global time
+	 * accessable via separate function call as it's done in pg_stat_statemens
+	 * or more granular time accounting per profile entries to take into account
+	 * evictions of these entries from restricted by size hashtable.
+	 */
 
 	PG_RETURN_VOID();
 }
@@ -775,7 +631,7 @@ PG_FUNCTION_INFO_V1(pg_wait_sampling_get_history);
 Datum
 pg_wait_sampling_get_history(PG_FUNCTION_ARGS)
 {
-	History				*history;
+	HistoryItem			*history;
 	FuncCallContext		*funcctx;
 
 	check_shmem();
@@ -784,17 +640,25 @@ pg_wait_sampling_get_history(PG_FUNCTION_ARGS)
 	{
 		MemoryContext	oldcontext;
 		TupleDesc		tupdesc;
+		int				history_size;
 
 		funcctx = SRF_FIRSTCALL_INIT();
 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
 
-		/* Receive history from shmq */
-		history = (History *) palloc0(sizeof(History));
-		history->items = (HistoryItem *) receive_array(HISTORY_REQUEST,
-										sizeof(HistoryItem), &history->count);
+		/* Extract history from shared ring buffer */
+		LWLockAcquire(pgws_history_lock, LW_SHARED);
+
+		history_size = pgws_history_ring->index < HistoryBufferSize ?
+			pgws_history_ring->index : HistoryBufferSize;
+		history = (HistoryItem *) palloc(history_size * sizeof(HistoryItem));
+		memcpy(history, pgws_history_ring->items,
+			   history_size * sizeof(HistoryItem));
 
+		LWLockRelease(pgws_history_lock);
+
+		/* Save function context */
 		funcctx->user_fctx = history;
-		funcctx->max_calls = history->count;
+		funcctx->max_calls = history_size;
 
 		/* Make tuple descriptor */
 		tupdesc = CreateTemplateTupleDescCompat(5, false);
@@ -816,9 +680,9 @@ pg_wait_sampling_get_history(PG_FUNCTION_ARGS)
 	/* stuff done on every call of the function */
 	funcctx = SRF_PERCALL_SETUP();
 
-	history = (History *) funcctx->user_fctx;
+	history = (HistoryItem *) funcctx->user_fctx;
 
-	if (history->index < history->count)
+	if (funcctx->call_cntr < funcctx->max_calls)
 	{
 		HeapTuple	tuple;
 		HistoryItem *item;
@@ -827,7 +691,7 @@ pg_wait_sampling_get_history(PG_FUNCTION_ARGS)
 		const char *event_type,
 				   *event;
 
-		item = &history->items[history->index];
+		item = &history[funcctx->call_cntr];
 
 		/* Make and return next tuple to caller */
 		MemSet(values, 0, sizeof(values));
@@ -849,7 +713,6 @@ pg_wait_sampling_get_history(PG_FUNCTION_ARGS)
 		values[4] = UInt64GetDatum(item->queryId);
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 
-		history->index++;
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
 	else
@@ -875,20 +738,11 @@ pgws_planner_hook(Query *parse,
 	if (MyProc)
 	{
 		int i = MyProc - ProcGlobal->allProcs;
-#if PG_VERSION_NUM >= 110000
-		/*
-		 * since we depend on queryId we need to check that its size
-		 * is uint64 as we coded in pg_wait_sampling
-		 */
-		StaticAssertExpr(sizeof(parse->queryId) == sizeof(uint64),
-				"queryId size is not uint64");
-#else
-		StaticAssertExpr(sizeof(parse->queryId) == sizeof(uint32),
-				"queryId size is not uint32");
-#endif
+
+		StaticAssertExpr(sizeof(parse->queryId) == sizeof(pgwsQueryId),
+						 "queryId size is not correct");
 		if (!pgws_proc_queryids[i])
 			pgws_proc_queryids[i] = parse->queryId;
-
 	}
 
 	/* Invoke original hook if needed */
diff --git a/pg_wait_sampling.h b/pg_wait_sampling.h
index 29425fc..56d1f91 100644
--- a/pg_wait_sampling.h
+++ b/pg_wait_sampling.h
@@ -17,67 +17,60 @@
 	#error "You are trying to build pg_wait_sampling with PostgreSQL version lower than 9.6.  Please, check you environment."
 #endif
 
-#include "storage/proc.h"
-#include "storage/shm_mq.h"
 #include "utils/timestamp.h"
 
 #define	PG_WAIT_SAMPLING_MAGIC		0xCA94B107
-#define COLLECTOR_QUEUE_SIZE		(16 * 1024)
-#define HISTORY_TIME_MULTIPLIER		10
-#define PGWS_QUEUE_LOCK				0
-#define PGWS_COLLECTOR_LOCK			1
 
 typedef struct
 {
 	uint32			pid;
 	uint32			wait_event_info;
-	uint64			queryId;
-	uint64			count;
-} ProfileItem;
-
-typedef struct
-{
-	uint32			pid;
-	uint32			wait_event_info;
-	uint64			queryId;
+	pgwsQueryId		queryId;
 	TimestampTz		ts;
 } HistoryItem;
 
 typedef struct
 {
-	bool			wraparound;
-	Size			index;
-	Size			count;
-	HistoryItem	   *items;
+	Size		index;
+	HistoryItem	items[FLEXIBLE_ARRAY_MEMBER];
 } History;
 
-typedef enum
+/*
+ * Hashtable key that defines the identity of a hashtable entry
+ */
+typedef struct
 {
-	NO_REQUEST,
-	HISTORY_REQUEST,
-	PROFILE_REQUEST,
-	PROFILE_RESET
-} SHMRequest;
+	int32		pid;			/* pid of observable process */
+	uint32		wait_event_info;/* proc's wait information */
+	pgwsQueryId	queryid;		/* query identifier */
+} ProfileHashKey;
 
+/*
+ * Wait statistics entry
+ */
 typedef struct
 {
-	Latch		   *latch;
-	SHMRequest		request;
-	int				historySize;
-	int				historyPeriod;
-	int				profilePeriod;
-	bool			profilePid;
-	bool			profileQueries;
-} CollectorShmqHeader;
+	ProfileHashKey	key;		/* hash key of entry - MUST BE FIRST */
+	int64			counter;	/* cummulative counter for this entry */
+	double			usage;		/* usage factor */
+} ProfileHashEntry;
 
 /* pg_wait_sampling.c */
-extern CollectorShmqHeader *pgws_collector_hdr;
-extern shm_mq			   *pgws_collector_mq;
-extern uint64			   *pgws_proc_queryids;
-extern void pgws_init_lock_tag(LOCKTAG *tag, uint32 lock);
+extern pgwsQueryId	*pgws_proc_queryids;
+extern HTAB 		*pgws_profile_hash;
+extern LWLock 		*pgws_profile_lock;
+extern History		*pgws_history_ring;
+extern LWLock		*pgws_history_lock;
+
+/* global settings */
+extern int MaxProfileEntries;
+extern int HistoryBufferSize;
+extern int HistoryPeriod;
+extern int ProfilePeriod;
+extern bool WhetherProfilePid;
+extern bool WhetherProfileQueryId;
 
 /* collector.c */
-extern void pgws_register_wait_collector(void);
 extern PGDLLEXPORT void pgws_collector_main(Datum main_arg);
 
 #endif