Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
slotsync.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 * slotsync.c
3 * Functionality for synchronizing slots to a standby server from the
4 * primary server.
5 *
6 * Copyright (c) 2024-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/logical/slotsync.c
10 *
11 * This file contains the code for slot synchronization on a physical standby
12 * to fetch logical failover slots information from the primary server, create
13 * the slots on the standby and synchronize them periodically.
14 *
15 * Slot synchronization can be performed either automatically by enabling slot
16 * sync worker or manually by calling SQL function pg_sync_replication_slots().
17 *
18 * If the WAL corresponding to the remote's restart_lsn is not available on the
19 * physical standby or the remote's catalog_xmin precedes the oldest xid for
20 * which it is guaranteed that rows wouldn't have been removed then we cannot
21 * create the local standby slot because that would mean moving the local slot
22 * backward and decoding won't be possible via such a slot. In this case, the
23 * slot will be marked as RS_TEMPORARY. Once the primary server catches up,
24 * the slot will be marked as RS_PERSISTENT (which means sync-ready) after
25 * which slot sync worker can perform the sync periodically or user can call
26 * pg_sync_replication_slots() periodically to perform the syncs.
27 *
28 * If synchronized slots fail to build a consistent snapshot from the
29 * restart_lsn before reaching confirmed_flush_lsn, they would become
30 * unreliable after promotion due to potential data loss from changes
31 * before reaching a consistent point. This can happen because the slots can
32 * be synced at some random time and we may not reach the consistent point
33 * at the same WAL location as the primary. So, we mark such slots as
34 * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
35 * consistent point, they will be marked as RS_PERSISTENT.
36 *
37 * The slot sync worker waits for some time before the next synchronization,
38 * with the duration varying based on whether any slots were updated during
39 * the last cycle. Refer to the comments above wait_for_slot_activity() for
40 * more details.
41 *
42 * Any standby synchronized slots will be dropped if they no longer need
43 * to be synchronized. See comment atop drop_local_obsolete_slots() for more
44 * details.
45 *---------------------------------------------------------------------------
46 */
47
48#include "postgres.h"
49
50#include <time.h>
51
53#include "access/xlogrecovery.h"
54#include "catalog/pg_database.h"
55#include "libpq/pqsignal.h"
56#include "pgstat.h"
58#include "replication/logical.h"
61#include "storage/ipc.h"
62#include "storage/lmgr.h"
63#include "storage/proc.h"
64#include "storage/procarray.h"
65#include "tcop/tcopprot.h"
66#include "utils/builtins.h"
67#include "utils/pg_lsn.h"
68#include "utils/ps_status.h"
69#include "utils/timeout.h"
70
71/*
72 * Struct for sharing information to control slot synchronization.
73 *
74 * The slot sync worker's pid is needed by the startup process to shut it
75 * down during promotion. The startup process shuts down the slot sync worker
76 * and also sets stopSignaled=true to handle the race condition when the
77 * postmaster has not noticed the promotion yet and thus may end up restarting
78 * the slot sync worker. If stopSignaled is set, the worker will exit in such a
79 * case. The SQL function pg_sync_replication_slots() will also error out if
80 * this flag is set. Note that we don't need to reset this variable as after
81 * promotion the slot sync worker won't be restarted because the pmState
82 * changes to PM_RUN from PM_HOT_STANDBY and we don't support demoting
83 * primary without restarting the server. See LaunchMissingBackgroundProcesses.
84 *
85 * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
86 * overwrites.
87 *
88 * The 'last_start_time' is needed by postmaster to start the slot sync worker
89 * once per SLOTSYNC_RESTART_INTERVAL_SEC. In cases where an immediate restart
90 * is expected (e.g., slot sync GUCs change), slot sync worker will reset
91 * last_start_time before exiting, so that postmaster can start the worker
92 * without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
93 */
94typedef struct SlotSyncCtxStruct
95{
96 pid_t pid;
98 bool syncing;
100 slock_t mutex;
102
104
105/* GUC variable */
107
108/*
109 * The sleep time (ms) between slot-sync cycles varies dynamically
110 * (within a MIN/MAX range) according to slot activity. See
111 * wait_for_slot_activity() for details.
112 */
113#define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200
114#define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */
115
117
118/* The restart interval for slot sync work used by postmaster */
119#define SLOTSYNC_RESTART_INTERVAL_SEC 10
120
121/*
122 * Flag to tell if we are syncing replication slots. Unlike the 'syncing' flag
123 * in SlotSyncCtxStruct, this flag is true only if the current process is
124 * performing slot synchronization.
125 */
126static bool syncing_slots = false;
127
128/*
129 * Structure to hold information fetched from the primary server about a logical
130 * replication slot.
131 */
132typedef struct RemoteSlot
133{
134 char *name;
135 char *plugin;
136 char *database;
143
144 /* RS_INVAL_NONE if valid, or the reason of invalidation */
147
148static void slotsync_failure_callback(int code, Datum arg);
149static void update_synced_slots_inactive_since(void);
150
151/*
152 * If necessary, update the local synced slot's metadata based on the data
153 * from the remote slot.
154 *
155 * If no update was needed (the data of the remote slot is the same as the
156 * local slot) return false, otherwise true.
157 *
158 * *found_consistent_snapshot will be true iff the remote slot's LSN or xmin is
159 * modified, and decoding from the corresponding LSN's can reach a
160 * consistent snapshot.
161 *
162 * *remote_slot_precedes will be true if the remote slot's LSN or xmin
163 * precedes locally reserved position.
164 */
165static bool
166update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
167 bool *found_consistent_snapshot,
168 bool *remote_slot_precedes)
169{
171 bool updated_xmin_or_lsn = false;
172 bool updated_config = false;
173
175
176 if (found_consistent_snapshot)
177 *found_consistent_snapshot = false;
178
179 if (remote_slot_precedes)
180 *remote_slot_precedes = false;
181
182 /*
183 * Don't overwrite if we already have a newer catalog_xmin and
184 * restart_lsn.
185 */
186 if (remote_slot->restart_lsn < slot->data.restart_lsn ||
188 slot->data.catalog_xmin))
189 {
190 /*
191 * This can happen in following situations:
192 *
193 * If the slot is temporary, it means either the initial WAL location
194 * reserved for the local slot is ahead of the remote slot's
195 * restart_lsn or the initial xmin_horizon computed for the local slot
196 * is ahead of the remote slot.
197 *
198 * If the slot is persistent, both restart_lsn and catalog_xmin of the
199 * synced slot could still be ahead of the remote slot. Since we use
200 * slot advance functionality to keep snapbuild/slot updated, it is
201 * possible that the restart_lsn and catalog_xmin are advanced to a
202 * later position than it has on the primary. This can happen when
203 * slot advancing machinery finds running xacts record after reaching
204 * the consistent state at a later point than the primary where it
205 * serializes the snapshot and updates the restart_lsn.
206 *
207 * We LOG the message if the slot is temporary as it can help the user
208 * to understand why the slot is not sync-ready. In the case of a
209 * persistent slot, it would be a more common case and won't directly
210 * impact the users, so we used DEBUG1 level to log the message.
211 */
213 errmsg("could not synchronize replication slot \"%s\"",
214 remote_slot->name),
215 errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
216 LSN_FORMAT_ARGS(remote_slot->restart_lsn),
217 remote_slot->catalog_xmin,
219 slot->data.catalog_xmin));
220
221 if (remote_slot_precedes)
222 *remote_slot_precedes = true;
223
224 /*
225 * Skip updating the configuration. This is required to avoid syncing
226 * two_phase_at without syncing confirmed_lsn. Otherwise, the prepared
227 * transaction between old confirmed_lsn and two_phase_at will
228 * unexpectedly get decoded and sent to the downstream after
229 * promotion. See comments in ReorderBufferFinishPrepared.
230 */
231 return false;
232 }
233
234 /*
235 * Attempt to sync LSNs and xmins only if remote slot is ahead of local
236 * slot.
237 */
238 if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
239 remote_slot->restart_lsn > slot->data.restart_lsn ||
241 slot->data.catalog_xmin))
242 {
243 /*
244 * We can't directly copy the remote slot's LSN or xmin unless there
245 * exists a consistent snapshot at that point. Otherwise, after
246 * promotion, the slots may not reach a consistent point before the
247 * confirmed_flush_lsn which can lead to a data loss. To avoid data
248 * loss, we let slot machinery advance the slot which ensures that
249 * snapbuilder/slot statuses are updated properly.
250 */
251 if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
252 {
253 /*
254 * Update the slot info directly if there is a serialized snapshot
255 * at the restart_lsn, as the slot can quickly reach consistency
256 * at restart_lsn by restoring the snapshot.
257 */
258 SpinLockAcquire(&slot->mutex);
259 slot->data.restart_lsn = remote_slot->restart_lsn;
260 slot->data.confirmed_flush = remote_slot->confirmed_lsn;
261 slot->data.catalog_xmin = remote_slot->catalog_xmin;
262 SpinLockRelease(&slot->mutex);
263
264 if (found_consistent_snapshot)
265 *found_consistent_snapshot = true;
266 }
267 else
268 {
270 found_consistent_snapshot);
271
272 /* Sanity check */
273 if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
275 errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
276 remote_slot->name),
277 errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
278 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
280 }
281
282 updated_xmin_or_lsn = true;
283 }
284
285 if (remote_dbid != slot->data.database ||
286 remote_slot->two_phase != slot->data.two_phase ||
287 remote_slot->failover != slot->data.failover ||
288 strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||
289 remote_slot->two_phase_at != slot->data.two_phase_at)
290 {
291 NameData plugin_name;
292
293 /* Avoid expensive operations while holding a spinlock. */
294 namestrcpy(&plugin_name, remote_slot->plugin);
295
296 SpinLockAcquire(&slot->mutex);
297 slot->data.plugin = plugin_name;
298 slot->data.database = remote_dbid;
299 slot->data.two_phase = remote_slot->two_phase;
300 slot->data.two_phase_at = remote_slot->two_phase_at;
301 slot->data.failover = remote_slot->failover;
302 SpinLockRelease(&slot->mutex);
303
304 updated_config = true;
305
306 /*
307 * Ensure that there is no risk of sending prepared transactions
308 * unexpectedly after the promotion.
309 */
311 }
312
313 /*
314 * We have to write the changed xmin to disk *before* we change the
315 * in-memory value, otherwise after a crash we wouldn't know that some
316 * catalog tuples might have been removed already.
317 */
318 if (updated_config || updated_xmin_or_lsn)
319 {
322 }
323
324 /*
325 * Now the new xmin is safely on disk, we can let the global value
326 * advance. We do not take ProcArrayLock or similar since we only advance
327 * xmin here and there's not much harm done by a concurrent computation
328 * missing that.
329 */
330 if (updated_xmin_or_lsn)
331 {
332 SpinLockAcquire(&slot->mutex);
333 slot->effective_catalog_xmin = remote_slot->catalog_xmin;
334 SpinLockRelease(&slot->mutex);
335
338 }
339
340 return updated_config || updated_xmin_or_lsn;
341}
342
343/*
344 * Get the list of local logical slots that are synchronized from the
345 * primary server.
346 */
347static List *
349{
350 List *local_slots = NIL;
351
352 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
353
354 for (int i = 0; i < max_replication_slots; i++)
355 {
357
358 /* Check if it is a synchronized slot */
359 if (s->in_use && s->data.synced)
360 {
362 local_slots = lappend(local_slots, s);
363 }
364 }
365
366 LWLockRelease(ReplicationSlotControlLock);
367
368 return local_slots;
369}
370
371/*
372 * Helper function to check if local_slot is required to be retained.
373 *
374 * Return false either if local_slot does not exist in the remote_slots list
375 * or is invalidated while the corresponding remote slot is still valid,
376 * otherwise true.
377 */
378static bool
380{
381 bool remote_exists = false;
382 bool locally_invalidated = false;
383
384 foreach_ptr(RemoteSlot, remote_slot, remote_slots)
385 {
386 if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
387 {
388 remote_exists = true;
389
390 /*
391 * If remote slot is not invalidated but local slot is marked as
392 * invalidated, then set locally_invalidated flag.
393 */
394 SpinLockAcquire(&local_slot->mutex);
395 locally_invalidated =
396 (remote_slot->invalidated == RS_INVAL_NONE) &&
397 (local_slot->data.invalidated != RS_INVAL_NONE);
398 SpinLockRelease(&local_slot->mutex);
399
400 break;
401 }
402 }
403
404 return (remote_exists && !locally_invalidated);
405}
406
407/*
408 * Drop local obsolete slots.
409 *
410 * Drop the local slots that no longer need to be synced i.e. these either do
411 * not exist on the primary or are no longer enabled for failover.
412 *
413 * Additionally, drop any slots that are valid on the primary but got
414 * invalidated on the standby. This situation may occur due to the following
415 * reasons:
416 * - The 'max_slot_wal_keep_size' on the standby is insufficient to retain WAL
417 * records from the restart_lsn of the slot.
418 * - 'primary_slot_name' is temporarily reset to null and the physical slot is
419 * removed.
420 * These dropped slots will get recreated in next sync-cycle and it is okay to
421 * drop and recreate such slots as long as these are not consumable on the
422 * standby (which is the case currently).
423 *
424 * Note: Change of 'wal_level' on the primary server to a level lower than
425 * logical may also result in slot invalidation and removal on the standby.
426 * This is because such 'wal_level' change is only possible if the logical
427 * slots are removed on the primary server, so it's expected to see the
428 * slots being invalidated and removed on the standby too (and re-created
429 * if they are re-created on the primary server).
430 */
431static void
433{
434 List *local_slots = get_local_synced_slots();
435
436 foreach_ptr(ReplicationSlot, local_slot, local_slots)
437 {
438 /* Drop the local slot if it is not required to be retained. */
439 if (!local_sync_slot_required(local_slot, remote_slot_list))
440 {
441 bool synced_slot;
442
443 /*
444 * Use shared lock to prevent a conflict with
445 * ReplicationSlotsDropDBSlots(), trying to drop the same slot
446 * during a drop-database operation.
447 */
448 LockSharedObject(DatabaseRelationId, local_slot->data.database,
449 0, AccessShareLock);
450
451 /*
452 * In the small window between getting the slot to drop and
453 * locking the database, there is a possibility of a parallel
454 * database drop by the startup process and the creation of a new
455 * slot by the user. This new user-created slot may end up using
456 * the same shared memory as that of 'local_slot'. Thus check if
457 * local_slot is still the synced one before performing actual
458 * drop.
459 */
460 SpinLockAcquire(&local_slot->mutex);
461 synced_slot = local_slot->in_use && local_slot->data.synced;
462 SpinLockRelease(&local_slot->mutex);
463
464 if (synced_slot)
465 {
466 ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);
468 }
469
470 UnlockSharedObject(DatabaseRelationId, local_slot->data.database,
471 0, AccessShareLock);
472
473 ereport(LOG,
474 errmsg("dropped replication slot \"%s\" of database with OID %u",
475 NameStr(local_slot->data.name),
476 local_slot->data.database));
477 }
478 }
479}
480
481/*
482 * Reserve WAL for the currently active local slot using the specified WAL
483 * location (restart_lsn).
484 *
485 * If the given WAL location has been removed, reserve WAL using the oldest
486 * existing WAL segment.
487 */
488static void
490{
491 XLogSegNo oldest_segno;
492 XLogSegNo segno;
494
495 Assert(slot != NULL);
497
498 while (true)
499 {
500 SpinLockAcquire(&slot->mutex);
501 slot->data.restart_lsn = restart_lsn;
502 SpinLockRelease(&slot->mutex);
503
504 /* Prevent WAL removal as fast as possible */
506
508
509 /*
510 * Find the oldest existing WAL segment file.
511 *
512 * Normally, we can determine it by using the last removed segment
513 * number. However, if no WAL segment files have been removed by a
514 * checkpoint since startup, we need to search for the oldest segment
515 * file from the current timeline existing in XLOGDIR.
516 *
517 * XXX: Currently, we are searching for the oldest segment in the
518 * current timeline as there is less chance of the slot's restart_lsn
519 * from being some prior timeline, and even if it happens, in the
520 * worst case, we will wait to sync till the slot's restart_lsn moved
521 * to the current timeline.
522 */
523 oldest_segno = XLogGetLastRemovedSegno() + 1;
524
525 if (oldest_segno == 1)
526 {
527 TimeLineID cur_timeline;
528
529 GetWalRcvFlushRecPtr(NULL, &cur_timeline);
530 oldest_segno = XLogGetOldestSegno(cur_timeline);
531 }
532
533 elog(DEBUG1, "segno: " UINT64_FORMAT " of purposed restart_lsn for the synced slot, oldest_segno: " UINT64_FORMAT " available",
534 segno, oldest_segno);
535
536 /*
537 * If all required WAL is still there, great, otherwise retry. The
538 * slot should prevent further removal of WAL, unless there's a
539 * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
540 * the new restart_lsn above, so normally we should never need to loop
541 * more than twice.
542 */
543 if (segno >= oldest_segno)
544 break;
545
546 /* Retry using the location of the oldest wal segment */
547 XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size, restart_lsn);
548 }
549}
550
551/*
552 * If the remote restart_lsn and catalog_xmin have caught up with the
553 * local ones, then update the LSNs and persist the local synced slot for
554 * future synchronization; otherwise, do nothing.
555 *
556 * Return true if the slot is marked as RS_PERSISTENT (sync-ready), otherwise
557 * false.
558 */
559static bool
561{
563 bool found_consistent_snapshot = false;
564 bool remote_slot_precedes = false;
565
566 (void) update_local_synced_slot(remote_slot, remote_dbid,
567 &found_consistent_snapshot,
568 &remote_slot_precedes);
569
570 /*
571 * Check if the primary server has caught up. Refer to the comment atop
572 * the file for details on this check.
573 */
574 if (remote_slot_precedes)
575 {
576 /*
577 * The remote slot didn't catch up to locally reserved position.
578 *
579 * We do not drop the slot because the restart_lsn can be ahead of the
580 * current location when recreating the slot in the next cycle. It may
581 * take more time to create such a slot. Therefore, we keep this slot
582 * and attempt the synchronization in the next cycle.
583 */
584 return false;
585 }
586
587 /*
588 * Don't persist the slot if it cannot reach the consistent point from the
589 * restart_lsn. See comments atop this file.
590 */
591 if (!found_consistent_snapshot)
592 {
593 ereport(LOG,
594 errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),
595 errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
597
598 return false;
599 }
600
602
603 ereport(LOG,
604 errmsg("newly created replication slot \"%s\" is sync-ready now",
605 remote_slot->name));
606
607 return true;
608}
609
610/*
611 * Synchronize a single slot to the given position.
612 *
613 * This creates a new slot if there is no existing one and updates the
614 * metadata of the slot as per the data received from the primary server.
615 *
616 * The slot is created as a temporary slot and stays in the same state until the
617 * remote_slot catches up with locally reserved position and local slot is
618 * updated. The slot is then persisted and is considered as sync-ready for
619 * periodic syncs.
620 *
621 * Returns TRUE if the local slot is updated.
622 */
623static bool
624synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
625{
626 ReplicationSlot *slot;
627 XLogRecPtr latestFlushPtr;
628 bool slot_updated = false;
629
630 /*
631 * Make sure that concerned WAL is received and flushed before syncing
632 * slot to target lsn received from the primary server.
633 */
634 latestFlushPtr = GetStandbyFlushRecPtr(NULL);
635 if (remote_slot->confirmed_lsn > latestFlushPtr)
636 {
637 /*
638 * Can get here only if GUC 'synchronized_standby_slots' on the
639 * primary server was not configured correctly.
640 */
642 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
643 errmsg("skipping slot synchronization because the received slot sync"
644 " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
645 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
646 remote_slot->name,
647 LSN_FORMAT_ARGS(latestFlushPtr)));
648
649 return false;
650 }
651
652 /* Search for the named slot */
653 if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
654 {
655 bool synced;
656
657 SpinLockAcquire(&slot->mutex);
658 synced = slot->data.synced;
659 SpinLockRelease(&slot->mutex);
660
661 /* User-created slot with the same name exists, raise ERROR. */
662 if (!synced)
664 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
665 errmsg("exiting from slot synchronization because same"
666 " name slot \"%s\" already exists on the standby",
667 remote_slot->name));
668
669 /*
670 * The slot has been synchronized before.
671 *
672 * It is important to acquire the slot here before checking
673 * invalidation. If we don't acquire the slot first, there could be a
674 * race condition that the local slot could be invalidated just after
675 * checking the 'invalidated' flag here and we could end up
676 * overwriting 'invalidated' flag to remote_slot's value. See
677 * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
678 * if the slot is not acquired by other processes.
679 *
680 * XXX: If it ever turns out that slot acquire/release is costly for
681 * cases when none of the slot properties is changed then we can do a
682 * pre-check to ensure that at least one of the slot properties is
683 * changed before acquiring the slot.
684 */
685 ReplicationSlotAcquire(remote_slot->name, true, false);
686
687 Assert(slot == MyReplicationSlot);
688
689 /*
690 * Copy the invalidation cause from remote only if local slot is not
691 * invalidated locally, we don't want to overwrite existing one.
692 */
693 if (slot->data.invalidated == RS_INVAL_NONE &&
694 remote_slot->invalidated != RS_INVAL_NONE)
695 {
696 SpinLockAcquire(&slot->mutex);
697 slot->data.invalidated = remote_slot->invalidated;
698 SpinLockRelease(&slot->mutex);
699
700 /* Make sure the invalidated state persists across server restart */
703
704 slot_updated = true;
705 }
706
707 /* Skip the sync of an invalidated slot */
708 if (slot->data.invalidated != RS_INVAL_NONE)
709 {
711 return slot_updated;
712 }
713
714 /* Slot not ready yet, let's attempt to make it sync-ready now. */
715 if (slot->data.persistency == RS_TEMPORARY)
716 {
717 slot_updated = update_and_persist_local_synced_slot(remote_slot,
718 remote_dbid);
719 }
720
721 /* Slot ready for sync, so sync it. */
722 else
723 {
724 /*
725 * Sanity check: As long as the invalidations are handled
726 * appropriately as above, this should never happen.
727 *
728 * We don't need to check restart_lsn here. See the comments in
729 * update_local_synced_slot() for details.
730 */
731 if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
733 errmsg_internal("cannot synchronize local slot \"%s\"",
734 remote_slot->name),
735 errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
737 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
738
739 slot_updated = update_local_synced_slot(remote_slot, remote_dbid,
740 NULL, NULL);
741 }
742 }
743 /* Otherwise create the slot first. */
744 else
745 {
746 NameData plugin_name;
747 TransactionId xmin_horizon = InvalidTransactionId;
748
749 /* Skip creating the local slot if remote_slot is invalidated already */
750 if (remote_slot->invalidated != RS_INVAL_NONE)
751 return false;
752
753 /*
754 * We create temporary slots instead of ephemeral slots here because
755 * we want the slots to survive after releasing them. This is done to
756 * avoid dropping and re-creating the slots in each synchronization
757 * cycle if the restart_lsn or catalog_xmin of the remote slot has not
758 * caught up.
759 */
760 ReplicationSlotCreate(remote_slot->name, true, RS_TEMPORARY,
761 remote_slot->two_phase,
762 remote_slot->failover,
763 true);
764
765 /* For shorter lines. */
766 slot = MyReplicationSlot;
767
768 /* Avoid expensive operations while holding a spinlock. */
769 namestrcpy(&plugin_name, remote_slot->plugin);
770
771 SpinLockAcquire(&slot->mutex);
772 slot->data.database = remote_dbid;
773 slot->data.plugin = plugin_name;
774 SpinLockRelease(&slot->mutex);
775
777
778 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
779 xmin_horizon = GetOldestSafeDecodingTransactionId(true);
780 SpinLockAcquire(&slot->mutex);
781 slot->effective_catalog_xmin = xmin_horizon;
782 slot->data.catalog_xmin = xmin_horizon;
783 SpinLockRelease(&slot->mutex);
785 LWLockRelease(ProcArrayLock);
786
787 update_and_persist_local_synced_slot(remote_slot, remote_dbid);
788
789 slot_updated = true;
790 }
791
793
794 return slot_updated;
795}
796
797/*
798 * Synchronize slots.
799 *
800 * Gets the failover logical slots info from the primary server and updates
801 * the slots locally. Creates the slots if not present on the standby.
802 *
803 * Returns TRUE if any of the slots gets updated in this sync-cycle.
804 */
805static bool
807{
808#define SLOTSYNC_COLUMN_COUNT 10
809 Oid slotRow[SLOTSYNC_COLUMN_COUNT] = {TEXTOID, TEXTOID, LSNOID,
810 LSNOID, XIDOID, BOOLOID, LSNOID, BOOLOID, TEXTOID, TEXTOID};
811
812 WalRcvExecResult *res;
813 TupleTableSlot *tupslot;
814 List *remote_slot_list = NIL;
815 bool some_slot_updated = false;
816 bool started_tx = false;
817 const char *query = "SELECT slot_name, plugin, confirmed_flush_lsn,"
818 " restart_lsn, catalog_xmin, two_phase, two_phase_at, failover,"
819 " database, invalidation_reason"
820 " FROM pg_catalog.pg_replication_slots"
821 " WHERE failover and NOT temporary";
822
823 /* The syscache access in walrcv_exec() needs a transaction env. */
824 if (!IsTransactionState())
825 {
827 started_tx = true;
828 }
829
830 /* Execute the query */
831 res = walrcv_exec(wrconn, query, SLOTSYNC_COLUMN_COUNT, slotRow);
832 if (res->status != WALRCV_OK_TUPLES)
834 errmsg("could not fetch failover logical slots info from the primary server: %s",
835 res->err));
836
837 /* Construct the remote_slot tuple and synchronize each slot locally */
839 while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
840 {
841 bool isnull;
842 RemoteSlot *remote_slot = palloc0(sizeof(RemoteSlot));
843 Datum d;
844 int col = 0;
845
846 remote_slot->name = TextDatumGetCString(slot_getattr(tupslot, ++col,
847 &isnull));
848 Assert(!isnull);
849
850 remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
851 &isnull));
852 Assert(!isnull);
853
854 /*
855 * It is possible to get null values for LSN and Xmin if slot is
856 * invalidated on the primary server, so handle accordingly.
857 */
858 d = slot_getattr(tupslot, ++col, &isnull);
859 remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
860 DatumGetLSN(d);
861
862 d = slot_getattr(tupslot, ++col, &isnull);
863 remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
864
865 d = slot_getattr(tupslot, ++col, &isnull);
866 remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
868
869 remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
870 &isnull));
871 Assert(!isnull);
872
873 d = slot_getattr(tupslot, ++col, &isnull);
874 remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
875
876 remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
877 &isnull));
878 Assert(!isnull);
879
880 remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
881 ++col, &isnull));
882 Assert(!isnull);
883
884 d = slot_getattr(tupslot, ++col, &isnull);
885 remote_slot->invalidated = isnull ? RS_INVAL_NONE :
887
888 /* Sanity check */
890
891 /*
892 * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
893 * slot is valid, that means we have fetched the remote_slot in its
894 * RS_EPHEMERAL state. In such a case, don't sync it; we can always
895 * sync it in the next sync cycle when the remote_slot is persisted
896 * and has valid lsn(s) and xmin values.
897 *
898 * XXX: In future, if we plan to expose 'slot->data.persistency' in
899 * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
900 * slots in the first place.
901 */
902 if ((XLogRecPtrIsInvalid(remote_slot->restart_lsn) ||
903 XLogRecPtrIsInvalid(remote_slot->confirmed_lsn) ||
904 !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
905 remote_slot->invalidated == RS_INVAL_NONE)
906 pfree(remote_slot);
907 else
908 /* Create list of remote slots */
909 remote_slot_list = lappend(remote_slot_list, remote_slot);
910
911 ExecClearTuple(tupslot);
912 }
913
914 /* Drop local slots that no longer need to be synced. */
915 drop_local_obsolete_slots(remote_slot_list);
916
917 /* Now sync the slots locally */
918 foreach_ptr(RemoteSlot, remote_slot, remote_slot_list)
919 {
920 Oid remote_dbid = get_database_oid(remote_slot->database, false);
921
922 /*
923 * Use shared lock to prevent a conflict with
924 * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
925 * a drop-database operation.
926 */
927 LockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
928
929 some_slot_updated |= synchronize_one_slot(remote_slot, remote_dbid);
930
931 UnlockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
932 }
933
934 /* We are done, free remote_slot_list elements */
935 list_free_deep(remote_slot_list);
936
938
939 if (started_tx)
941
942 return some_slot_updated;
943}
944
945/*
946 * Checks the remote server info.
947 *
948 * We ensure that the 'primary_slot_name' exists on the remote server and the
949 * remote server is not a standby node.
950 */
951static void
953{
954#define PRIMARY_INFO_OUTPUT_COL_COUNT 2
955 WalRcvExecResult *res;
956 Oid slotRow[PRIMARY_INFO_OUTPUT_COL_COUNT] = {BOOLOID, BOOLOID};
957 StringInfoData cmd;
958 bool isnull;
959 TupleTableSlot *tupslot;
960 bool remote_in_recovery;
961 bool primary_slot_valid;
962 bool started_tx = false;
963
964 initStringInfo(&cmd);
965 appendStringInfo(&cmd,
966 "SELECT pg_is_in_recovery(), count(*) = 1"
967 " FROM pg_catalog.pg_replication_slots"
968 " WHERE slot_type='physical' AND slot_name=%s",
970
971 /* The syscache access in walrcv_exec() needs a transaction env. */
972 if (!IsTransactionState())
973 {
975 started_tx = true;
976 }
977
979 pfree(cmd.data);
980
981 if (res->status != WALRCV_OK_TUPLES)
983 errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
984 PrimarySlotName, res->err),
985 errhint("Check if \"primary_slot_name\" is configured correctly."));
986
988 if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
989 elog(ERROR,
990 "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
991
992 remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
993 Assert(!isnull);
994
995 /*
996 * Slot sync is currently not supported on a cascading standby. This is
997 * because if we allow it, the primary server needs to wait for all the
998 * cascading standbys, otherwise, logical subscribers can still be ahead
999 * of one of the cascading standbys which we plan to promote. Thus, to
1000 * avoid this additional complexity, we restrict it for the time being.
1001 */
1002 if (remote_in_recovery)
1003 ereport(ERROR,
1004 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1005 errmsg("cannot synchronize replication slots from a standby server"));
1006
1007 primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
1008 Assert(!isnull);
1009
1010 if (!primary_slot_valid)
1011 ereport(ERROR,
1012 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1013 /* translator: second %s is a GUC variable name */
1014 errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
1015 PrimarySlotName, "primary_slot_name"));
1016
1017 ExecClearTuple(tupslot);
1019
1020 if (started_tx)
1022}
1023
1024/*
1025 * Checks if dbname is specified in 'primary_conninfo'.
1026 *
1027 * Error out if not specified otherwise return it.
1028 */
1029char *
1031{
1032 char *dbname;
1033
1034 /*
1035 * The slot synchronization needs a database connection for walrcv_exec to
1036 * work.
1037 */
1039 if (dbname == NULL)
1040 ereport(ERROR,
1041 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1042
1043 /*
1044 * translator: first %s is a connection option; second %s is a GUC
1045 * variable name
1046 */
1047 errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1048 "dbname", "primary_conninfo"));
1049 return dbname;
1050}
1051
1052/*
1053 * Return true if all necessary GUCs for slot synchronization are set
1054 * appropriately, otherwise, return false.
1055 */
1056bool
1058{
1059 /*
1060 * Logical slot sync/creation requires wal_level >= logical.
1061 */
1063 {
1064 ereport(elevel,
1065 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1066 errmsg("replication slot synchronization requires \"wal_level\" >= \"logical\""));
1067 return false;
1068 }
1069
1070 /*
1071 * A physical replication slot(primary_slot_name) is required on the
1072 * primary to ensure that the rows needed by the standby are not removed
1073 * after restarting, so that the synchronized slot on the standby will not
1074 * be invalidated.
1075 */
1076 if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1077 {
1078 ereport(elevel,
1079 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1080 /* translator: %s is a GUC variable name */
1081 errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1082 return false;
1083 }
1084
1085 /*
1086 * hot_standby_feedback must be enabled to cooperate with the physical
1087 * replication slot, which allows informing the primary about the xmin and
1088 * catalog_xmin values on the standby.
1089 */
1091 {
1092 ereport(elevel,
1093 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1094 /* translator: %s is a GUC variable name */
1095 errmsg("replication slot synchronization requires \"%s\" to be enabled",
1096 "hot_standby_feedback"));
1097 return false;
1098 }
1099
1100 /*
1101 * The primary_conninfo is required to make connection to primary for
1102 * getting slots information.
1103 */
1104 if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1105 {
1106 ereport(elevel,
1107 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1108 /* translator: %s is a GUC variable name */
1109 errmsg("replication slot synchronization requires \"%s\" to be set",
1110 "primary_conninfo"));
1111 return false;
1112 }
1113
1114 return true;
1115}
1116
1117/*
1118 * Re-read the config file.
1119 *
1120 * Exit if any of the slot sync GUCs have changed. The postmaster will
1121 * restart it.
1122 */
1123static void
1125{
1126 char *old_primary_conninfo = pstrdup(PrimaryConnInfo);
1127 char *old_primary_slotname = pstrdup(PrimarySlotName);
1128 bool old_sync_replication_slots = sync_replication_slots;
1129 bool old_hot_standby_feedback = hot_standby_feedback;
1130 bool conninfo_changed;
1131 bool primary_slotname_changed;
1132
1134
1135 ConfigReloadPending = false;
1137
1138 conninfo_changed = strcmp(old_primary_conninfo, PrimaryConnInfo) != 0;
1139 primary_slotname_changed = strcmp(old_primary_slotname, PrimarySlotName) != 0;
1140 pfree(old_primary_conninfo);
1141 pfree(old_primary_slotname);
1142
1143 if (old_sync_replication_slots != sync_replication_slots)
1144 {
1145 ereport(LOG,
1146 /* translator: %s is a GUC variable name */
1147 errmsg("replication slot synchronization worker will shut down because \"%s\" is disabled", "sync_replication_slots"));
1148 proc_exit(0);
1149 }
1150
1151 if (conninfo_changed ||
1152 primary_slotname_changed ||
1153 (old_hot_standby_feedback != hot_standby_feedback))
1154 {
1155 ereport(LOG,
1156 errmsg("replication slot synchronization worker will restart because of a parameter change"));
1157
1158 /*
1159 * Reset the last-start time for this worker so that the postmaster
1160 * can restart it without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
1161 */
1163
1164 proc_exit(0);
1165 }
1166
1167}
1168
1169/*
1170 * Interrupt handler for main loop of slot sync worker.
1171 */
1172static void
1174{
1176
1178 {
1179 ereport(LOG,
1180 errmsg("replication slot synchronization worker is shutting down on receiving SIGINT"));
1181
1182 proc_exit(0);
1183 }
1184
1187}
1188
1189/*
1190 * Connection cleanup function for slotsync worker.
1191 *
1192 * Called on slotsync worker exit.
1193 */
1194static void
1196{
1198
1200}
1201
1202/*
1203 * Cleanup function for slotsync worker.
1204 *
1205 * Called on slotsync worker exit.
1206 */
1207static void
1209{
1210 /*
1211 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1212 *
1213 * The startup process during promotion invokes ShutDownSlotSync() which
1214 * waits for slot sync to finish and it does that by checking the
1215 * 'syncing' flag. Thus the slot sync worker must be done with slots'
1216 * release and cleanup to avoid any dangling temporary slots or active
1217 * slots before it marks itself as finished syncing.
1218 */
1219
1220 /* Make sure active replication slots are released */
1221 if (MyReplicationSlot != NULL)
1223
1224 /* Also cleanup the temporary slots. */
1226
1228
1230
1231 /*
1232 * If syncing_slots is true, it indicates that the process errored out
1233 * without resetting the flag. So, we need to clean up shared memory and
1234 * reset the flag here.
1235 */
1236 if (syncing_slots)
1237 {
1238 SlotSyncCtx->syncing = false;
1239 syncing_slots = false;
1240 }
1241
1243}
1244
1245/*
1246 * Sleep for long enough that we believe it's likely that the slots on primary
1247 * get updated.
1248 *
1249 * If there is no slot activity the wait time between sync-cycles will double
1250 * (to a maximum of 30s). If there is some slot activity the wait time between
1251 * sync-cycles is reset to the minimum (200ms).
1252 */
1253static void
1254wait_for_slot_activity(bool some_slot_updated)
1255{
1256 int rc;
1257
1258 if (!some_slot_updated)
1259 {
1260 /*
1261 * No slots were updated, so double the sleep time, but not beyond the
1262 * maximum allowable value.
1263 */
1265 }
1266 else
1267 {
1268 /*
1269 * Some slots were updated since the last sleep, so reset the sleep
1270 * time.
1271 */
1273 }
1274
1275 rc = WaitLatch(MyLatch,
1277 sleep_ms,
1278 WAIT_EVENT_REPLICATION_SLOTSYNC_MAIN);
1279
1280 if (rc & WL_LATCH_SET)
1282}
1283
1284/*
1285 * Emit an error if a promotion or a concurrent sync call is in progress.
1286 * Otherwise, advertise that a sync is in progress.
1287 */
1288static void
1290{
1292
1293 /* The worker pid must not be already assigned in SlotSyncCtx */
1294 Assert(worker_pid == InvalidPid || SlotSyncCtx->pid == InvalidPid);
1295
1296 /*
1297 * Emit an error if startup process signaled the slot sync machinery to
1298 * stop. See comments atop SlotSyncCtxStruct.
1299 */
1301 {
1303 ereport(ERROR,
1304 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1305 errmsg("cannot synchronize replication slots when standby promotion is ongoing"));
1306 }
1307
1308 if (SlotSyncCtx->syncing)
1309 {
1311 ereport(ERROR,
1312 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1313 errmsg("cannot synchronize replication slots concurrently"));
1314 }
1315
1316 SlotSyncCtx->syncing = true;
1317
1318 /*
1319 * Advertise the required PID so that the startup process can kill the
1320 * slot sync worker on promotion.
1321 */
1322 SlotSyncCtx->pid = worker_pid;
1323
1325
1326 syncing_slots = true;
1327}
1328
1329/*
1330 * Reset syncing flag.
1331 */
1332static void
1334{
1336 SlotSyncCtx->syncing = false;
1338
1339 syncing_slots = false;
1340}
1341
1342/*
1343 * The main loop of our worker process.
1344 *
1345 * It connects to the primary server, fetches logical failover slots
1346 * information periodically in order to create and sync the slots.
1347 */
1348void
1349ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
1350{
1351 WalReceiverConn *wrconn = NULL;
1352 char *dbname;
1353 char *err;
1354 sigjmp_buf local_sigjmp_buf;
1355 StringInfoData app_name;
1356
1357 Assert(startup_data_len == 0);
1358
1360
1361 init_ps_display(NULL);
1362
1364
1365 /*
1366 * Create a per-backend PGPROC struct in shared memory. We must do this
1367 * before we access any shared memory.
1368 */
1369 InitProcess();
1370
1371 /*
1372 * Early initialization.
1373 */
1374 BaseInit();
1375
1376 Assert(SlotSyncCtx != NULL);
1377
1378 /*
1379 * If an exception is encountered, processing resumes here.
1380 *
1381 * We just need to clean up, report the error, and go away.
1382 *
1383 * If we do not have this handling here, then since this worker process
1384 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1385 * Therefore, we create our own exception handler to catch ERRORs.
1386 */
1387 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1388 {
1389 /* since not using PG_TRY, must reset error stack by hand */
1390 error_context_stack = NULL;
1391
1392 /* Prevents interrupts while cleaning up */
1394
1395 /* Report the error to the server log */
1397
1398 /*
1399 * We can now go away. Note that because we called InitProcess, a
1400 * callback was registered to do ProcKill, which will clean up
1401 * necessary state.
1402 */
1403 proc_exit(0);
1404 }
1405
1406 /* We can now handle ereport(ERROR) */
1407 PG_exception_stack = &local_sigjmp_buf;
1408
1409 /* Setup signal handling */
1412 pqsignal(SIGTERM, die);
1415 pqsignal(SIGUSR2, SIG_IGN);
1416 pqsignal(SIGPIPE, SIG_IGN);
1417 pqsignal(SIGCHLD, SIG_DFL);
1418
1420
1421 ereport(LOG, errmsg("slot sync worker started"));
1422
1423 /* Register it as soon as SlotSyncCtx->pid is initialized. */
1425
1426 /*
1427 * Establishes SIGALRM handler and initialize timeout module. It is needed
1428 * by InitPostgres to register different timeouts.
1429 */
1431
1432 /* Load the libpq-specific functions */
1433 load_file("libpqwalreceiver", false);
1434
1435 /*
1436 * Unblock signals (they were blocked when the postmaster forked us)
1437 */
1438 sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
1439
1440 /*
1441 * Set always-secure search path, so malicious users can't redirect user
1442 * code (e.g. operators).
1443 *
1444 * It's not strictly necessary since we won't be scanning or writing to
1445 * any user table locally, but it's good to retain it here for added
1446 * precaution.
1447 */
1448 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1449
1451
1452 /*
1453 * Connect to the database specified by the user in primary_conninfo. We
1454 * need a database connection for walrcv_exec to work which we use to
1455 * fetch slot information from the remote node. See comments atop
1456 * libpqrcv_exec.
1457 *
1458 * We do not specify a specific user here since the slot sync worker will
1459 * operate as a superuser. This is safe because the slot sync worker does
1460 * not interact with user tables, eliminating the risk of executing
1461 * arbitrary code within triggers.
1462 */
1463 InitPostgres(dbname, InvalidOid, NULL, InvalidOid, 0, NULL);
1464
1466
1467 initStringInfo(&app_name);
1468 if (cluster_name[0])
1469 appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1470 else
1471 appendStringInfoString(&app_name, "slotsync worker");
1472
1473 /*
1474 * Establish the connection to the primary server for slot
1475 * synchronization.
1476 */
1477 wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1478 app_name.data, &err);
1479
1480 if (!wrconn)
1481 ereport(ERROR,
1482 errcode(ERRCODE_CONNECTION_FAILURE),
1483 errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1484 app_name.data, err));
1485
1486 pfree(app_name.data);
1487
1488 /*
1489 * Register the disconnection callback.
1490 *
1491 * XXX: This can be combined with previous cleanup registration of
1492 * slotsync_worker_onexit() but that will need the connection to be made
1493 * global and we want to avoid introducing global for this purpose.
1494 */
1496
1497 /*
1498 * Using the specified primary server connection, check that we are not a
1499 * cascading standby and slot configured in 'primary_slot_name' exists on
1500 * the primary server.
1501 */
1503
1504 /* Main loop to synchronize slots */
1505 for (;;)
1506 {
1507 bool some_slot_updated = false;
1508
1510
1511 some_slot_updated = synchronize_slots(wrconn);
1512
1513 wait_for_slot_activity(some_slot_updated);
1514 }
1515
1516 /*
1517 * The slot sync worker can't get here because it will only stop when it
1518 * receives a SIGINT from the startup process, or when there is an error.
1519 */
1520 Assert(false);
1521}
1522
1523/*
1524 * Update the inactive_since property for synced slots.
1525 *
1526 * Note that this function is currently called when we shutdown the slot
1527 * sync machinery.
1528 */
1529static void
1531{
1532 TimestampTz now = 0;
1533
1534 /*
1535 * We need to update inactive_since only when we are promoting standby to
1536 * correctly interpret the inactive_since if the standby gets promoted
1537 * without a restart. We don't want the slots to appear inactive for a
1538 * long time after promotion if they haven't been synchronized recently.
1539 * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1540 */
1541 if (!StandbyMode)
1542 return;
1543
1544 /* The slot sync worker or SQL function mustn't be running by now */
1546
1547 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1548
1549 for (int i = 0; i < max_replication_slots; i++)
1550 {
1552
1553 /* Check if it is a synchronized slot */
1554 if (s->in_use && s->data.synced)
1555 {
1557
1558 /* The slot must not be acquired by any process */
1559 Assert(s->active_pid == 0);
1560
1561 /* Use the same inactive_since time for all the slots. */
1562 if (now == 0)
1564
1566 }
1567 }
1568
1569 LWLockRelease(ReplicationSlotControlLock);
1570}
1571
1572/*
1573 * Shut down the slot sync worker.
1574 *
1575 * This function sends signal to shutdown slot sync worker, if required. It
1576 * also waits till the slot sync worker has exited or
1577 * pg_sync_replication_slots() has finished.
1578 */
1579void
1581{
1582 pid_t worker_pid;
1583
1585
1586 SlotSyncCtx->stopSignaled = true;
1587
1588 /*
1589 * Return if neither the slot sync worker is running nor the function
1590 * pg_sync_replication_slots() is executing.
1591 */
1592 if (!SlotSyncCtx->syncing)
1593 {
1596 return;
1597 }
1598
1599 worker_pid = SlotSyncCtx->pid;
1600
1602
1603 if (worker_pid != InvalidPid)
1604 kill(worker_pid, SIGINT);
1605
1606 /* Wait for slot sync to end */
1607 for (;;)
1608 {
1609 int rc;
1610
1611 /* Wait a bit, we don't expect to have to wait long */
1612 rc = WaitLatch(MyLatch,
1614 10L, WAIT_EVENT_REPLICATION_SLOTSYNC_SHUTDOWN);
1615
1616 if (rc & WL_LATCH_SET)
1617 {
1620 }
1621
1623
1624 /* Ensure that no process is syncing the slots. */
1625 if (!SlotSyncCtx->syncing)
1626 break;
1627
1629 }
1630
1632
1634}
1635
1636/*
1637 * SlotSyncWorkerCanRestart
1638 *
1639 * Returns true if enough time (SLOTSYNC_RESTART_INTERVAL_SEC) has passed
1640 * since it was launched last. Otherwise returns false.
1641 *
1642 * This is a safety valve to protect against continuous respawn attempts if the
1643 * worker is dying immediately at launch. Note that since we will retry to
1644 * launch the worker from the postmaster main loop, we will get another
1645 * chance later.
1646 */
1647bool
1649{
1650 time_t curtime = time(NULL);
1651
1652 /* Return false if too soon since last start. */
1653 if ((unsigned int) (curtime - SlotSyncCtx->last_start_time) <
1654 (unsigned int) SLOTSYNC_RESTART_INTERVAL_SEC)
1655 return false;
1656
1657 SlotSyncCtx->last_start_time = curtime;
1658
1659 return true;
1660}
1661
1662/*
1663 * Is current process syncing replication slots?
1664 *
1665 * Could be either backend executing SQL function or slot sync worker.
1666 */
1667bool
1669{
1670 return syncing_slots;
1671}
1672
1673/*
1674 * Amount of shared memory required for slot synchronization.
1675 */
1676Size
1678{
1679 return sizeof(SlotSyncCtxStruct);
1680}
1681
1682/*
1683 * Allocate and initialize the shared memory of slot synchronization.
1684 */
1685void
1687{
1688 Size size = SlotSyncShmemSize();
1689 bool found;
1690
1692 ShmemInitStruct("Slot Sync Data", size, &found);
1693
1694 if (!found)
1695 {
1696 memset(SlotSyncCtx, 0, size);
1699 }
1700}
1701
1702/*
1703 * Error cleanup callback for slot sync SQL function.
1704 */
1705static void
1707{
1709
1710 /*
1711 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1712 *
1713 * The startup process during promotion invokes ShutDownSlotSync() which
1714 * waits for slot sync to finish and it does that by checking the
1715 * 'syncing' flag. Thus the SQL function must be done with slots' release
1716 * and cleanup to avoid any dangling temporary slots or active slots
1717 * before it marks itself as finished syncing.
1718 */
1719
1720 /* Make sure active replication slots are released */
1721 if (MyReplicationSlot != NULL)
1723
1724 /* Also cleanup the synced temporary slots. */
1726
1727 /*
1728 * The set syncing_slots indicates that the process errored out without
1729 * resetting the flag. So, we need to clean up shared memory and reset the
1730 * flag here.
1731 */
1732 if (syncing_slots)
1734
1736}
1737
1738/*
1739 * Synchronize the failover enabled replication slots using the specified
1740 * primary server connection.
1741 */
1742void
1744{
1746 {
1748
1750
1752
1753 /* Cleanup the synced temporary slots */
1755
1756 /* We are done with sync, so reset sync flag */
1758 }
1760}
sigset_t UnBlockSig
Definition: pqsignal.c:22
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:752
#define Min(x, y)
Definition: c.h:1004
#define UINT64_FORMAT
Definition: c.h:558
uint32 TransactionId
Definition: c.h:658
size_t Size
Definition: c.h:611
int64 TimestampTz
Definition: timestamp.h:39
Oid get_database_oid(const char *dbname, bool missing_ok)
Definition: dbcommands.c:3167
void load_file(const char *filename, bool restricted)
Definition: dfmgr.c:149
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1161
void EmitErrorReport(void)
Definition: elog.c:1695
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1234
int errdetail(const char *fmt,...)
Definition: elog.c:1207
ErrorContextCallback * error_context_stack
Definition: elog.c:95
int errhint(const char *fmt,...)
Definition: elog.c:1321
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
sigjmp_buf * PG_exception_stack
Definition: elog.c:97
#define LOG
Definition: elog.h:31
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
void err(int eval, const char *fmt,...)
Definition: err.c:43
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1427
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:86
int MyProcPid
Definition: globals.c:47
struct Latch * MyLatch
Definition: globals.c:63
void ProcessConfigFile(GucContext context)
Definition: guc-file.l:120
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4337
@ PGC_S_OVERRIDE
Definition: guc.h:123
@ PGC_SUSET
Definition: guc.h:78
@ PGC_SIGHUP
Definition: guc.h:75
char * cluster_name
Definition: guc_tables.c:555
Assert(PointerIsAligned(start, uint64))
void SignalHandlerForShutdownRequest(SIGNAL_ARGS)
Definition: interrupt.c:104
volatile sig_atomic_t ShutdownRequestPending
Definition: interrupt.c:28
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition: interrupt.c:61
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337
void proc_exit(int code)
Definition: ipc.c:104
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
int i
Definition: isn.c:77
void ResetLatch(Latch *latch)
Definition: latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:172
List * lappend(List *list, void *datum)
Definition: list.c:339
void list_free_deep(List *list)
Definition: list.c:1560
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1088
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1148
#define AccessShareLock
Definition: lockdefs.h:36
XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot)
Definition: logical.c:2081
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
@ NormalProcessing
Definition: miscadmin.h:471
@ InitProcessing
Definition: miscadmin.h:470
#define GetProcessingMode()
Definition: miscadmin.h:480
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define AmLogicalSlotSyncWorkerProcess()
Definition: miscadmin.h:385
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:133
#define SetProcessingMode(mode)
Definition: miscadmin.h:482
@ B_SLOTSYNC_WORKER
Definition: miscadmin.h:347
#define InvalidPid
Definition: miscadmin.h:32
BackendType MyBackendType
Definition: miscinit.c:64
void namestrcpy(Name name, const char *str)
Definition: name.c:233
void * arg
#define NIL
Definition: pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469
static XLogRecPtr DatumGetLSN(Datum X)
Definition: pg_lsn.h:25
#define die(msg)
#define pqsignal
Definition: port.h:531
void FloatExceptionHandler(SIGNAL_ARGS)
Definition: postgres.c:3078
static bool DatumGetBool(Datum X)
Definition: postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:332
uint64_t Datum
Definition: postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:322
static TransactionId DatumGetTransactionId(Datum X)
Definition: postgres.h:272
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
void BaseInit(void)
Definition: postinit.c:611
void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, bits32 flags, char *out_dbname)
Definition: postinit.c:711
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2907
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition: procsignal.c:674
void init_ps_display(const char *fixed_part)
Definition: ps_status.c:285
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:103
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition: slot.c:593
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:352
void ReplicationSlotDropAcquired(void)
Definition: slot.c:964
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1106
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)
Definition: slot.c:2707
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1145
void ReplicationSlotPersist(void)
Definition: slot.c:1123
ReplicationSlot * MyReplicationSlot
Definition: slot.c:148
void ReplicationSlotSave(void)
Definition: slot.c:1088
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:513
void ReplicationSlotRelease(void)
Definition: slot.c:731
int max_replication_slots
Definition: slot.c:151
ReplicationSlotCtlData * ReplicationSlotCtl
Definition: slot.c:145
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1201
void ReplicationSlotCleanup(bool synced_only)
Definition: slot.c:820
@ RS_TEMPORARY
Definition: slot.h:47
ReplicationSlotInvalidationCause
Definition: slot.h:59
@ RS_INVAL_NONE
Definition: slot.h:60
#define SlotIsLogical(slot)
Definition: slot.h:255
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Definition: slot.h:273
static List * get_local_synced_slots(void)
Definition: slotsync.c:348
#define MIN_SLOTSYNC_WORKER_NAPTIME_MS
Definition: slotsync.c:113
#define PRIMARY_INFO_OUTPUT_COL_COUNT
static void slotsync_worker_disconnect(int code, Datum arg)
Definition: slotsync.c:1195
void SyncReplicationSlots(WalReceiverConn *wrconn)
Definition: slotsync.c:1743
static bool local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
Definition: slotsync.c:379
static void drop_local_obsolete_slots(List *remote_slot_list)
Definition: slotsync.c:432
static void reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
Definition: slotsync.c:489
void ShutDownSlotSync(void)
Definition: slotsync.c:1580
static bool update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition: slotsync.c:560
bool sync_replication_slots
Definition: slotsync.c:106
static SlotSyncCtxStruct * SlotSyncCtx
Definition: slotsync.c:103
static void slotsync_failure_callback(int code, Datum arg)
Definition: slotsync.c:1706
#define SLOTSYNC_COLUMN_COUNT
static long sleep_ms
Definition: slotsync.c:116
#define SLOTSYNC_RESTART_INTERVAL_SEC
Definition: slotsync.c:119
static void reset_syncing_flag()
Definition: slotsync.c:1333
char * CheckAndGetDbnameFromConninfo(void)
Definition: slotsync.c:1030
static bool syncing_slots
Definition: slotsync.c:126
struct RemoteSlot RemoteSlot
static void ProcessSlotSyncInterrupts(void)
Definition: slotsync.c:1173
struct SlotSyncCtxStruct SlotSyncCtxStruct
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS
Definition: slotsync.c:114
static bool synchronize_slots(WalReceiverConn *wrconn)
Definition: slotsync.c:806
bool SlotSyncWorkerCanRestart(void)
Definition: slotsync.c:1648
static bool synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition: slotsync.c:624
static void wait_for_slot_activity(bool some_slot_updated)
Definition: slotsync.c:1254
static void slotsync_reread_config(void)
Definition: slotsync.c:1124
void SlotSyncShmemInit(void)
Definition: slotsync.c:1686
static bool update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *found_consistent_snapshot, bool *remote_slot_precedes)
Definition: slotsync.c:166
static void slotsync_worker_onexit(int code, Datum arg)
Definition: slotsync.c:1208
static void check_and_set_sync_info(pid_t worker_pid)
Definition: slotsync.c:1289
static void update_synced_slots_inactive_since(void)
Definition: slotsync.c:1530
bool ValidateSlotSyncParams(int elevel)
Definition: slotsync.c:1057
static void validate_remote_info(WalReceiverConn *wrconn)
Definition: slotsync.c:952
bool IsSyncingReplicationSlots(void)
Definition: slotsync.c:1668
void ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
Definition: slotsync.c:1349
Size SlotSyncShmemSize(void)
Definition: slotsync.c:1677
bool SnapBuildSnapshotExists(XLogRecPtr lsn)
Definition: snapbuild.c:2058
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
void InitProcess(void)
Definition: proc.c:390
char * dbname
Definition: streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
Definition: pg_list.h:54
bool two_phase
Definition: slotsync.c:137
char * plugin
Definition: slotsync.c:135
char * name
Definition: slotsync.c:134
char * database
Definition: slotsync.c:136
bool failover
Definition: slotsync.c:138
ReplicationSlotInvalidationCause invalidated
Definition: slotsync.c:145
XLogRecPtr confirmed_lsn
Definition: slotsync.c:140
XLogRecPtr restart_lsn
Definition: slotsync.c:139
XLogRecPtr two_phase_at
Definition: slotsync.c:141
TransactionId catalog_xmin
Definition: slotsync.c:142
ReplicationSlot replication_slots[1]
Definition: slot.h:266
TransactionId catalog_xmin
Definition: slot.h:104
XLogRecPtr confirmed_flush
Definition: slot.h:118
ReplicationSlotPersistency persistency
Definition: slot.h:88
ReplicationSlotInvalidationCause invalidated
Definition: slot.h:110
TransactionId effective_catalog_xmin
Definition: slot.h:189
slock_t mutex
Definition: slot.h:165
pid_t active_pid
Definition: slot.h:171
bool in_use
Definition: slot.h:168
ReplicationSlotPersistentData data
Definition: slot.h:192
time_t last_start_time
Definition: slotsync.c:99
Tuplestorestate * tuplestore
Definition: walreceiver.h:223
TupleDesc tupledesc
Definition: walreceiver.h:224
WalRcvExecStatus status
Definition: walreceiver.h:220
Definition: c.h:747
void InitializeTimeouts(void)
Definition: timeout.c:470
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
Definition: tuplestore.c:1130
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:399
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:458
#define WL_TIMEOUT
Definition: waiteventset.h:37
#define WL_EXIT_ON_PM_DEATH
Definition: waiteventset.h:39
#define WL_LATCH_SET
Definition: waiteventset.h:34
static WalReceiverConn * wrconn
Definition: walreceiver.c:93
bool hot_standby_feedback
Definition: walreceiver.c:90
#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)
Definition: walreceiver.h:435
@ WALRCV_OK_TUPLES
Definition: walreceiver.h:207
static void walrcv_clear_result(WalRcvExecResult *walres)
Definition: walreceiver.h:471
#define walrcv_get_dbname_from_conninfo(conninfo)
Definition: walreceiver.h:445
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
Definition: walreceiver.h:465
#define walrcv_disconnect(conn)
Definition: walreceiver.h:467
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)
Definition: walsender.c:3617
#define SIGCHLD
Definition: win32_port.h:168
#define SIGHUP
Definition: win32_port.h:158
#define SIGPIPE
Definition: win32_port.h:163
#define kill(pid, sig)
Definition: win32_port.h:493
#define SIGUSR1
Definition: win32_port.h:170
#define SIGUSR2
Definition: win32_port.h:171
bool IsTransactionState(void)
Definition: xact.c:387
void StartTransactionCommand(void)
Definition: xact.c:3071
void CommitTransactionCommand(void)
Definition: xact.c:3169
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3771
int wal_level
Definition: xlog.c:132
int wal_segment_size
Definition: xlog.c:144
XLogSegNo XLogGetOldestSegno(TimeLineID tli)
Definition: xlog.c:3787
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:46
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:62
uint64 XLogSegNo
Definition: xlogdefs.h:51
char * PrimarySlotName
Definition: xlogrecovery.c:99
bool StandbyMode
Definition: xlogrecovery.c:149
char * PrimaryConnInfo
Definition: xlogrecovery.c:98