Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
slot.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * slot.c
4 * Replication slot management.
5 *
6 *
7 * Copyright (c) 2012-2025, PostgreSQL Global Development Group
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/replication/slot.c
12 *
13 * NOTES
14 *
15 * Replication slots are used to keep state about replication streams
16 * originating from this cluster. Their primary purpose is to prevent the
17 * premature removal of WAL or of old tuple versions in a manner that would
18 * interfere with replication; they are also useful for monitoring purposes.
19 * Slots need to be permanent (to allow restarts), crash-safe, and allocatable
20 * on standbys (to support cascading setups). The requirement that slots be
21 * usable on standbys precludes storing them in the system catalogs.
22 *
23 * Each replication slot gets its own directory inside the directory
24 * $PGDATA / PG_REPLSLOT_DIR. Inside that directory the state file will
25 * contain the slot's own data. Additional data can be stored alongside that
26 * file if required. While the server is running, the state data is also
27 * cached in memory for efficiency.
28 *
29 * ReplicationSlotAllocationLock must be taken in exclusive mode to allocate
30 * or free a slot. ReplicationSlotControlLock must be taken in shared mode
31 * to iterate over the slots, and in exclusive mode to change the in_use flag
32 * of a slot. The remaining data in each slot is protected by its mutex.
33 *
34 *-------------------------------------------------------------------------
35 */
36
37#include "postgres.h"
38
39#include <unistd.h>
40#include <sys/stat.h>
41
42#include "access/transam.h"
44#include "access/xlogrecovery.h"
45#include "common/file_utils.h"
46#include "common/string.h"
47#include "miscadmin.h"
48#include "pgstat.h"
52#include "replication/slot.h"
54#include "storage/fd.h"
55#include "storage/ipc.h"
56#include "storage/proc.h"
57#include "storage/procarray.h"
58#include "utils/builtins.h"
59#include "utils/guc_hooks.h"
61#include "utils/varlena.h"
62
63/*
64 * Replication slot on-disk data structure.
65 */
67{
68 /* first part of this struct needs to be version independent */
69
70 /* data not covered by checksum */
73
74 /* data covered by checksum */
77
78 /*
79 * The actual data in the slot that follows can differ based on the above
80 * 'version'.
81 */
82
85
86/*
87 * Struct for the configuration of synchronized_standby_slots.
88 *
89 * Note: this must be a flat representation that can be held in a single chunk
90 * of guc_malloc'd memory, so that it can be stored as the "extra" data for the
91 * synchronized_standby_slots GUC.
92 */
93typedef struct
94{
95 /* Number of slot names in the slot_names[] */
97
98 /*
99 * slot_names contains 'nslotnames' consecutive null-terminated C strings.
100 */
101 char slot_names[FLEXIBLE_ARRAY_MEMBER];
103
104/*
105 * Lookup table for slot invalidation causes.
106 */
108{
110 const char *cause_name;
112
114 {RS_INVAL_NONE, "none"},
115 {RS_INVAL_WAL_REMOVED, "wal_removed"},
116 {RS_INVAL_HORIZON, "rows_removed"},
117 {RS_INVAL_WAL_LEVEL, "wal_level_insufficient"},
118 {RS_INVAL_IDLE_TIMEOUT, "idle_timeout"},
119};
120
121/*
122 * Ensure that the lookup table is up-to-date with the enums defined in
123 * ReplicationSlotInvalidationCause.
124 */
126 "array length mismatch");
127
128/* size of version independent data */
129#define ReplicationSlotOnDiskConstantSize \
130 offsetof(ReplicationSlotOnDisk, slotdata)
131/* size of the part of the slot not covered by the checksum */
132#define ReplicationSlotOnDiskNotChecksummedSize \
133 offsetof(ReplicationSlotOnDisk, version)
134/* size of the part covered by the checksum */
135#define ReplicationSlotOnDiskChecksummedSize \
136 sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskNotChecksummedSize
137/* size of the slot data that is version dependent */
138#define ReplicationSlotOnDiskV2Size \
139 sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize
140
141#define SLOT_MAGIC 0x1051CA1 /* format identifier */
142#define SLOT_VERSION 5 /* version for new files */
143
144/* Control array for replication slot management */
146
147/* My backend's replication slot in the shared memory array */
149
150/* GUC variables */
151int max_replication_slots = 10; /* the maximum number of replication
152 * slots */
153
154/*
155 * Invalidate replication slots that have remained idle longer than this
156 * duration; '0' disables it.
157 */
159
160/*
161 * This GUC lists streaming replication standby server slot names that
162 * logical WAL sender processes will wait for.
163 */
165
166/* This is the parsed and cached configuration for synchronized_standby_slots */
168
169/*
170 * Oldest LSN that has been confirmed to be flushed to the standbys
171 * corresponding to the physical slots specified in the synchronized_standby_slots GUC.
172 */
174
175static void ReplicationSlotShmemExit(int code, Datum arg);
176static bool IsSlotForConflictCheck(const char *name);
177static void ReplicationSlotDropPtr(ReplicationSlot *slot);
178
179/* internal persistency functions */
180static void RestoreSlotFromDisk(const char *name);
181static void CreateSlotOnDisk(ReplicationSlot *slot);
182static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel);
183
184/*
185 * Report shared-memory space needed by ReplicationSlotsShmemInit.
186 */
187Size
189{
190 Size size = 0;
191
192 if (max_replication_slots == 0)
193 return size;
194
195 size = offsetof(ReplicationSlotCtlData, replication_slots);
196 size = add_size(size,
198
199 return size;
200}
201
202/*
203 * Allocate and initialize shared memory for replication slots.
204 */
205void
207{
208 bool found;
209
210 if (max_replication_slots == 0)
211 return;
212
214 ShmemInitStruct("ReplicationSlot Ctl", ReplicationSlotsShmemSize(),
215 &found);
216
217 if (!found)
218 {
219 int i;
220
221 /* First time through, so initialize */
223
224 for (i = 0; i < max_replication_slots; i++)
225 {
227
228 /* everything else is zeroed by the memset above */
229 SpinLockInit(&slot->mutex);
231 LWTRANCHE_REPLICATION_SLOT_IO);
233 }
234 }
235}
236
237/*
238 * Register the callback for replication slot cleanup and releasing.
239 */
240void
242{
244}
245
246/*
247 * Release and cleanup replication slots.
248 */
249static void
251{
252 /* Make sure active replication slots are released */
253 if (MyReplicationSlot != NULL)
255
256 /* Also cleanup all the temporary slots. */
258}
259
260/*
261 * Check whether the passed slot name is valid and report errors at elevel.
262 *
263 * An error will be reported for a reserved replication slot name if
264 * allow_reserved_name is set to false.
265 *
266 * Slot names may consist out of [a-z0-9_]{1,NAMEDATALEN-1} which should allow
267 * the name to be used as a directory name on every supported OS.
268 *
269 * Returns whether the directory name is valid or not if elevel < ERROR.
270 */
271bool
272ReplicationSlotValidateName(const char *name, bool allow_reserved_name,
273 int elevel)
274{
275 const char *cp;
276
277 if (strlen(name) == 0)
278 {
279 ereport(elevel,
280 (errcode(ERRCODE_INVALID_NAME),
281 errmsg("replication slot name \"%s\" is too short",
282 name)));
283 return false;
284 }
285
286 if (strlen(name) >= NAMEDATALEN)
287 {
288 ereport(elevel,
289 (errcode(ERRCODE_NAME_TOO_LONG),
290 errmsg("replication slot name \"%s\" is too long",
291 name)));
292 return false;
293 }
294
295 for (cp = name; *cp; cp++)
296 {
297 if (!((*cp >= 'a' && *cp <= 'z')
298 || (*cp >= '0' && *cp <= '9')
299 || (*cp == '_')))
300 {
301 ereport(elevel,
302 (errcode(ERRCODE_INVALID_NAME),
303 errmsg("replication slot name \"%s\" contains invalid character",
304 name),
305 errhint("Replication slot names may only contain lower case letters, numbers, and the underscore character.")));
306 return false;
307 }
308 }
309
310 if (!allow_reserved_name && IsSlotForConflictCheck(name))
311 {
312 ereport(elevel,
313 errcode(ERRCODE_RESERVED_NAME),
314 errmsg("replication slot name \"%s\" is reserved",
315 name),
316 errdetail("The name \"%s\" is reserved for the conflict detection slot.",
318
319 return false;
320 }
321
322 return true;
323}
324
325/*
326 * Return true if the replication slot name is "pg_conflict_detection".
327 */
328static bool
330{
331 return (strcmp(name, CONFLICT_DETECTION_SLOT) == 0);
332}
333
334/*
335 * Create a new replication slot and mark it as used by this backend.
336 *
337 * name: Name of the slot
338 * db_specific: logical decoding is db specific; if the slot is going to
339 * be used for that pass true, otherwise false.
340 * two_phase: Allows decoding of prepared transactions. We allow this option
341 * to be enabled only at the slot creation time. If we allow this option
342 * to be changed during decoding then it is quite possible that we skip
343 * prepare first time because this option was not enabled. Now next time
344 * during getting changes, if the two_phase option is enabled it can skip
345 * prepare because by that time start decoding point has been moved. So the
346 * user will only get commit prepared.
347 * failover: If enabled, allows the slot to be synced to standbys so
348 * that logical replication can be resumed after failover.
349 * synced: True if the slot is synchronized from the primary server.
350 */
351void
352ReplicationSlotCreate(const char *name, bool db_specific,
353 ReplicationSlotPersistency persistency,
354 bool two_phase, bool failover, bool synced)
355{
356 ReplicationSlot *slot = NULL;
357 int i;
358
359 Assert(MyReplicationSlot == NULL);
360
361 /*
362 * The logical launcher or pg_upgrade may create or migrate an internal
363 * slot, so using a reserved name is allowed in these cases.
364 */
366 ERROR);
367
368 if (failover)
369 {
370 /*
371 * Do not allow users to create the failover enabled slots on the
372 * standby as we do not support sync to the cascading standby.
373 *
374 * However, failover enabled slots can be created during slot
375 * synchronization because we need to retain the same values as the
376 * remote slot.
377 */
380 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
381 errmsg("cannot enable failover for a replication slot created on the standby"));
382
383 /*
384 * Do not allow users to create failover enabled temporary slots,
385 * because temporary slots will not be synced to the standby.
386 *
387 * However, failover enabled temporary slots can be created during
388 * slot synchronization. See the comments atop slotsync.c for details.
389 */
390 if (persistency == RS_TEMPORARY && !IsSyncingReplicationSlots())
392 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
393 errmsg("cannot enable failover for a temporary replication slot"));
394 }
395
396 /*
397 * If some other backend ran this code concurrently with us, we'd likely
398 * both allocate the same slot, and that would be bad. We'd also be at
399 * risk of missing a name collision. Also, we don't want to try to create
400 * a new slot while somebody's busy cleaning up an old one, because we
401 * might both be monkeying with the same directory.
402 */
403 LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
404
405 /*
406 * Check for name collision, and identify an allocatable slot. We need to
407 * hold ReplicationSlotControlLock in shared mode for this, so that nobody
408 * else can change the in_use flags while we're looking at them.
409 */
410 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
411 for (i = 0; i < max_replication_slots; i++)
412 {
414
415 if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
418 errmsg("replication slot \"%s\" already exists", name)));
419 if (!s->in_use && slot == NULL)
420 slot = s;
421 }
422 LWLockRelease(ReplicationSlotControlLock);
423
424 /* If all slots are in use, we're out of luck. */
425 if (slot == NULL)
427 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
428 errmsg("all replication slots are in use"),
429 errhint("Free one or increase \"max_replication_slots\".")));
430
431 /*
432 * Since this slot is not in use, nobody should be looking at any part of
433 * it other than the in_use field unless they're trying to allocate it.
434 * And since we hold ReplicationSlotAllocationLock, nobody except us can
435 * be doing that. So it's safe to initialize the slot.
436 */
437 Assert(!slot->in_use);
438 Assert(slot->active_pid == 0);
439
440 /* first initialize persistent data */
441 memset(&slot->data, 0, sizeof(ReplicationSlotPersistentData));
442 namestrcpy(&slot->data.name, name);
443 slot->data.database = db_specific ? MyDatabaseId : InvalidOid;
444 slot->data.persistency = persistency;
445 slot->data.two_phase = two_phase;
447 slot->data.failover = failover;
448 slot->data.synced = synced;
449
450 /* and then data only present in shared memory */
451 slot->just_dirtied = false;
452 slot->dirty = false;
461 slot->inactive_since = 0;
462
463 /*
464 * Create the slot on disk. We haven't actually marked the slot allocated
465 * yet, so no special cleanup is required if this errors out.
466 */
467 CreateSlotOnDisk(slot);
468
469 /*
470 * We need to briefly prevent any other backend from iterating over the
471 * slots while we flip the in_use flag. We also need to set the active
472 * flag while holding the ControlLock as otherwise a concurrent
473 * ReplicationSlotAcquire() could acquire the slot as well.
474 */
475 LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
476
477 slot->in_use = true;
478
479 /* We can now mark the slot active, and that makes it our slot. */
480 SpinLockAcquire(&slot->mutex);
481 Assert(slot->active_pid == 0);
482 slot->active_pid = MyProcPid;
483 SpinLockRelease(&slot->mutex);
484 MyReplicationSlot = slot;
485
486 LWLockRelease(ReplicationSlotControlLock);
487
488 /*
489 * Create statistics entry for the new logical slot. We don't collect any
490 * stats for physical slots, so no need to create an entry for the same.
491 * See ReplicationSlotDropPtr for why we need to do this before releasing
492 * ReplicationSlotAllocationLock.
493 */
494 if (SlotIsLogical(slot))
496
497 /*
498 * Now that the slot has been marked as in_use and active, it's safe to
499 * let somebody else try to allocate a slot.
500 */
501 LWLockRelease(ReplicationSlotAllocationLock);
502
503 /* Let everybody know we've modified this slot */
505}
506
507/*
508 * Search for the named replication slot.
509 *
510 * Return the replication slot if found, otherwise NULL.
511 */
513SearchNamedReplicationSlot(const char *name, bool need_lock)
514{
515 int i;
516 ReplicationSlot *slot = NULL;
517
518 if (need_lock)
519 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
520
521 for (i = 0; i < max_replication_slots; i++)
522 {
524
525 if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
526 {
527 slot = s;
528 break;
529 }
530 }
531
532 if (need_lock)
533 LWLockRelease(ReplicationSlotControlLock);
534
535 return slot;
536}
537
538/*
539 * Return the index of the replication slot in
540 * ReplicationSlotCtl->replication_slots.
541 *
542 * This is mainly useful to have an efficient key for storing replication slot
543 * stats.
544 */
545int
547{
549 slot < ReplicationSlotCtl->replication_slots + max_replication_slots);
550
552}
553
554/*
555 * If the slot at 'index' is unused, return false. Otherwise 'name' is set to
556 * the slot's name and true is returned.
557 *
558 * This likely is only useful for pgstat_replslot.c during shutdown, in other
559 * cases there are obvious TOCTOU issues.
560 */
561bool
563{
564 ReplicationSlot *slot;
565 bool found;
566
568
569 /*
570 * Ensure that the slot cannot be dropped while we copy the name. Don't
571 * need the spinlock as the name of an existing slot cannot change.
572 */
573 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
574 found = slot->in_use;
575 if (slot->in_use)
577 LWLockRelease(ReplicationSlotControlLock);
578
579 return found;
580}
581
582/*
583 * Find a previously created slot and mark it as used by this process.
584 *
585 * An error is raised if nowait is true and the slot is currently in use. If
586 * nowait is false, we sleep until the slot is released by the owning process.
587 *
588 * An error is raised if error_if_invalid is true and the slot is found to
589 * be invalid. It should always be set to true, except when we are temporarily
590 * acquiring the slot and don't intend to change it.
591 */
592void
593ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
594{
596 int active_pid;
597
598 Assert(name != NULL);
599
600retry:
601 Assert(MyReplicationSlot == NULL);
602
603 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
604
605 /* Check if the slot exits with the given name. */
607 if (s == NULL || !s->in_use)
608 {
609 LWLockRelease(ReplicationSlotControlLock);
610
612 (errcode(ERRCODE_UNDEFINED_OBJECT),
613 errmsg("replication slot \"%s\" does not exist",
614 name)));
615 }
616
617 /*
618 * Do not allow users to acquire the reserved slot. This scenario may
619 * occur if the launcher that owns the slot has terminated unexpectedly
620 * due to an error, and a backend process attempts to reuse the slot.
621 */
624 errcode(ERRCODE_UNDEFINED_OBJECT),
625 errmsg("cannot acquire replication slot \"%s\"", name),
626 errdetail("The slot is reserved for conflict detection and can only be acquired by logical replication launcher."));
627
628 /*
629 * This is the slot we want; check if it's active under some other
630 * process. In single user mode, we don't need this check.
631 */
633 {
634 /*
635 * Get ready to sleep on the slot in case it is active. (We may end
636 * up not sleeping, but we don't want to do this while holding the
637 * spinlock.)
638 */
639 if (!nowait)
641
642 /*
643 * It is important to reset the inactive_since under spinlock here to
644 * avoid race conditions with slot invalidation. See comments related
645 * to inactive_since in InvalidatePossiblyObsoleteSlot.
646 */
648 if (s->active_pid == 0)
650 active_pid = s->active_pid;
653 }
654 else
655 {
656 s->active_pid = active_pid = MyProcPid;
658 }
659 LWLockRelease(ReplicationSlotControlLock);
660
661 /*
662 * If we found the slot but it's already active in another process, we
663 * wait until the owning process signals us that it's been released, or
664 * error out.
665 */
666 if (active_pid != MyProcPid)
667 {
668 if (!nowait)
669 {
670 /* Wait here until we get signaled, and then restart */
672 WAIT_EVENT_REPLICATION_SLOT_DROP);
674 goto retry;
675 }
676
678 (errcode(ERRCODE_OBJECT_IN_USE),
679 errmsg("replication slot \"%s\" is active for PID %d",
680 NameStr(s->data.name), active_pid)));
681 }
682 else if (!nowait)
683 ConditionVariableCancelSleep(); /* no sleep needed after all */
684
685 /* We made this slot active, so it's ours now. */
687
688 /*
689 * We need to check for invalidation after making the slot ours to avoid
690 * the possible race condition with the checkpointer that can otherwise
691 * invalidate the slot immediately after the check.
692 */
693 if (error_if_invalid && s->data.invalidated != RS_INVAL_NONE)
695 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
696 errmsg("can no longer access replication slot \"%s\"",
697 NameStr(s->data.name)),
698 errdetail("This replication slot has been invalidated due to \"%s\".",
700
701 /* Let everybody know we've modified this slot */
703
704 /*
705 * The call to pgstat_acquire_replslot() protects against stats for a
706 * different slot, from before a restart or such, being present during
707 * pgstat_report_replslot().
708 */
709 if (SlotIsLogical(s))
711
712
713 if (am_walsender)
714 {
717 ? errmsg("acquired logical replication slot \"%s\"",
718 NameStr(s->data.name))
719 : errmsg("acquired physical replication slot \"%s\"",
720 NameStr(s->data.name)));
721 }
722}
723
724/*
725 * Release the replication slot that this backend considers to own.
726 *
727 * This or another backend can re-acquire the slot later.
728 * Resources this slot requires will be preserved.
729 */
730void
732{
734 char *slotname = NULL; /* keep compiler quiet */
735 bool is_logical = false; /* keep compiler quiet */
736 TimestampTz now = 0;
737
738 Assert(slot != NULL && slot->active_pid != 0);
739
740 if (am_walsender)
741 {
742 slotname = pstrdup(NameStr(slot->data.name));
743 is_logical = SlotIsLogical(slot);
744 }
745
746 if (slot->data.persistency == RS_EPHEMERAL)
747 {
748 /*
749 * Delete the slot. There is no !PANIC case where this is allowed to
750 * fail, all that may happen is an incomplete cleanup of the on-disk
751 * data.
752 */
754 }
755
756 /*
757 * If slot needed to temporarily restrain both data and catalog xmin to
758 * create the catalog snapshot, remove that temporary constraint.
759 * Snapshots can only be exported while the initial snapshot is still
760 * acquired.
761 */
762 if (!TransactionIdIsValid(slot->data.xmin) &&
764 {
765 SpinLockAcquire(&slot->mutex);
767 SpinLockRelease(&slot->mutex);
769 }
770
771 /*
772 * Set the time since the slot has become inactive. We get the current
773 * time beforehand to avoid system call while holding the spinlock.
774 */
776
777 if (slot->data.persistency == RS_PERSISTENT)
778 {
779 /*
780 * Mark persistent slot inactive. We're not freeing it, just
781 * disconnecting, but wake up others that may be waiting for it.
782 */
783 SpinLockAcquire(&slot->mutex);
784 slot->active_pid = 0;
786 SpinLockRelease(&slot->mutex);
788 }
789 else
791
792 MyReplicationSlot = NULL;
793
794 /* might not have been set when we've been a plain slot */
795 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
796 MyProc->statusFlags &= ~PROC_IN_LOGICAL_DECODING;
798 LWLockRelease(ProcArrayLock);
799
800 if (am_walsender)
801 {
803 is_logical
804 ? errmsg("released logical replication slot \"%s\"",
805 slotname)
806 : errmsg("released physical replication slot \"%s\"",
807 slotname));
808
809 pfree(slotname);
810 }
811}
812
813/*
814 * Cleanup temporary slots created in current session.
815 *
816 * Cleanup only synced temporary slots if 'synced_only' is true, else
817 * cleanup all temporary slots.
818 */
819void
820ReplicationSlotCleanup(bool synced_only)
821{
822 int i;
823
824 Assert(MyReplicationSlot == NULL);
825
826restart:
827 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
828 for (i = 0; i < max_replication_slots; i++)
829 {
831
832 if (!s->in_use)
833 continue;
834
836 if ((s->active_pid == MyProcPid &&
837 (!synced_only || s->data.synced)))
838 {
841 LWLockRelease(ReplicationSlotControlLock); /* avoid deadlock */
842
844
846 goto restart;
847 }
848 else
850 }
851
852 LWLockRelease(ReplicationSlotControlLock);
853}
854
855/*
856 * Permanently drop replication slot identified by the passed in name.
857 */
858void
859ReplicationSlotDrop(const char *name, bool nowait)
860{
861 Assert(MyReplicationSlot == NULL);
862
863 ReplicationSlotAcquire(name, nowait, false);
864
865 /*
866 * Do not allow users to drop the slots which are currently being synced
867 * from the primary to the standby.
868 */
871 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
872 errmsg("cannot drop replication slot \"%s\"", name),
873 errdetail("This replication slot is being synchronized from the primary server."));
874
876}
877
878/*
879 * Change the definition of the slot identified by the specified name.
880 */
881void
882ReplicationSlotAlter(const char *name, const bool *failover,
883 const bool *two_phase)
884{
885 bool update_slot = false;
886
887 Assert(MyReplicationSlot == NULL);
889
890 ReplicationSlotAcquire(name, false, true);
891
894 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
895 errmsg("cannot use %s with a physical replication slot",
896 "ALTER_REPLICATION_SLOT"));
897
898 if (RecoveryInProgress())
899 {
900 /*
901 * Do not allow users to alter the slots which are currently being
902 * synced from the primary to the standby.
903 */
906 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
907 errmsg("cannot alter replication slot \"%s\"", name),
908 errdetail("This replication slot is being synchronized from the primary server."));
909
910 /*
911 * Do not allow users to enable failover on the standby as we do not
912 * support sync to the cascading standby.
913 */
914 if (failover && *failover)
916 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
917 errmsg("cannot enable failover for a replication slot"
918 " on the standby"));
919 }
920
921 if (failover)
922 {
923 /*
924 * Do not allow users to enable failover for temporary slots as we do
925 * not support syncing temporary slots to the standby.
926 */
929 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
930 errmsg("cannot enable failover for a temporary replication slot"));
931
933 {
937
938 update_slot = true;
939 }
940 }
941
943 {
947
948 update_slot = true;
949 }
950
951 if (update_slot)
952 {
955 }
956
958}
959
960/*
961 * Permanently drop the currently acquired replication slot.
962 */
963void
965{
967
968 Assert(MyReplicationSlot != NULL);
969
970 /* slot isn't acquired anymore */
971 MyReplicationSlot = NULL;
972
974}
975
976/*
977 * Permanently drop the replication slot which will be released by the point
978 * this function returns.
979 */
980static void
982{
983 char path[MAXPGPATH];
984 char tmppath[MAXPGPATH];
985
986 /*
987 * If some other backend ran this code concurrently with us, we might try
988 * to delete a slot with a certain name while someone else was trying to
989 * create a slot with the same name.
990 */
991 LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
992
993 /* Generate pathnames. */
994 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, NameStr(slot->data.name));
995 sprintf(tmppath, "%s/%s.tmp", PG_REPLSLOT_DIR, NameStr(slot->data.name));
996
997 /*
998 * Rename the slot directory on disk, so that we'll no longer recognize
999 * this as a valid slot. Note that if this fails, we've got to mark the
1000 * slot inactive before bailing out. If we're dropping an ephemeral or a
1001 * temporary slot, we better never fail hard as the caller won't expect
1002 * the slot to survive and this might get called during error handling.
1003 */
1004 if (rename(path, tmppath) == 0)
1005 {
1006 /*
1007 * We need to fsync() the directory we just renamed and its parent to
1008 * make sure that our changes are on disk in a crash-safe fashion. If
1009 * fsync() fails, we can't be sure whether the changes are on disk or
1010 * not. For now, we handle that by panicking;
1011 * StartupReplicationSlots() will try to straighten it out after
1012 * restart.
1013 */
1015 fsync_fname(tmppath, true);
1018 }
1019 else
1020 {
1021 bool fail_softly = slot->data.persistency != RS_PERSISTENT;
1022
1023 SpinLockAcquire(&slot->mutex);
1024 slot->active_pid = 0;
1025 SpinLockRelease(&slot->mutex);
1026
1027 /* wake up anyone waiting on this slot */
1029
1030 ereport(fail_softly ? WARNING : ERROR,
1032 errmsg("could not rename file \"%s\" to \"%s\": %m",
1033 path, tmppath)));
1034 }
1035
1036 /*
1037 * The slot is definitely gone. Lock out concurrent scans of the array
1038 * long enough to kill it. It's OK to clear the active PID here without
1039 * grabbing the mutex because nobody else can be scanning the array here,
1040 * and nobody can be attached to this slot and thus access it without
1041 * scanning the array.
1042 *
1043 * Also wake up processes waiting for it.
1044 */
1045 LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
1046 slot->active_pid = 0;
1047 slot->in_use = false;
1048 LWLockRelease(ReplicationSlotControlLock);
1050
1051 /*
1052 * Slot is dead and doesn't prevent resource removal anymore, recompute
1053 * limits.
1054 */
1057
1058 /*
1059 * If removing the directory fails, the worst thing that will happen is
1060 * that the user won't be able to create a new slot with the same name
1061 * until the next server restart. We warn about it, but that's all.
1062 */
1063 if (!rmtree(tmppath, true))
1065 (errmsg("could not remove directory \"%s\"", tmppath)));
1066
1067 /*
1068 * Drop the statistics entry for the replication slot. Do this while
1069 * holding ReplicationSlotAllocationLock so that we don't drop a
1070 * statistics entry for another slot with the same name just created in
1071 * another session.
1072 */
1073 if (SlotIsLogical(slot))
1075
1076 /*
1077 * We release this at the very end, so that nobody starts trying to create
1078 * a slot while we're still cleaning up the detritus of the old one.
1079 */
1080 LWLockRelease(ReplicationSlotAllocationLock);
1081}
1082
1083/*
1084 * Serialize the currently acquired slot's state from memory to disk, thereby
1085 * guaranteeing the current state will survive a crash.
1086 */
1087void
1089{
1090 char path[MAXPGPATH];
1091
1092 Assert(MyReplicationSlot != NULL);
1093
1096}
1097
1098/*
1099 * Signal that it would be useful if the currently acquired slot would be
1100 * flushed out to disk.
1101 *
1102 * Note that the actual flush to disk can be delayed for a long time, if
1103 * required for correctness explicitly do a ReplicationSlotSave().
1104 */
1105void
1107{
1109
1110 Assert(MyReplicationSlot != NULL);
1111
1112 SpinLockAcquire(&slot->mutex);
1114 MyReplicationSlot->dirty = true;
1115 SpinLockRelease(&slot->mutex);
1116}
1117
1118/*
1119 * Convert a slot that's marked as RS_EPHEMERAL or RS_TEMPORARY to a
1120 * RS_PERSISTENT slot, guaranteeing it will be there after an eventual crash.
1121 */
1122void
1124{
1126
1127 Assert(slot != NULL);
1129
1130 SpinLockAcquire(&slot->mutex);
1132 SpinLockRelease(&slot->mutex);
1133
1136}
1137
1138/*
1139 * Compute the oldest xmin across all slots and store it in the ProcArray.
1140 *
1141 * If already_locked is true, ProcArrayLock has already been acquired
1142 * exclusively.
1143 */
1144void
1146{
1147 int i;
1149 TransactionId agg_catalog_xmin = InvalidTransactionId;
1150
1151 Assert(ReplicationSlotCtl != NULL);
1152
1153 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1154
1155 for (i = 0; i < max_replication_slots; i++)
1156 {
1158 TransactionId effective_xmin;
1159 TransactionId effective_catalog_xmin;
1160 bool invalidated;
1161
1162 if (!s->in_use)
1163 continue;
1164
1166 effective_xmin = s->effective_xmin;
1167 effective_catalog_xmin = s->effective_catalog_xmin;
1168 invalidated = s->data.invalidated != RS_INVAL_NONE;
1170
1171 /* invalidated slots need not apply */
1172 if (invalidated)
1173 continue;
1174
1175 /* check the data xmin */
1176 if (TransactionIdIsValid(effective_xmin) &&
1177 (!TransactionIdIsValid(agg_xmin) ||
1178 TransactionIdPrecedes(effective_xmin, agg_xmin)))
1179 agg_xmin = effective_xmin;
1180
1181 /* check the catalog xmin */
1182 if (TransactionIdIsValid(effective_catalog_xmin) &&
1183 (!TransactionIdIsValid(agg_catalog_xmin) ||
1184 TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin)))
1185 agg_catalog_xmin = effective_catalog_xmin;
1186 }
1187
1188 LWLockRelease(ReplicationSlotControlLock);
1189
1190 ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
1191}
1192
1193/*
1194 * Compute the oldest restart LSN across all slots and inform xlog module.
1195 *
1196 * Note: while max_slot_wal_keep_size is theoretically relevant for this
1197 * purpose, we don't try to account for that, because this module doesn't
1198 * know what to compare against.
1199 */
1200void
1202{
1203 int i;
1204 XLogRecPtr min_required = InvalidXLogRecPtr;
1205
1206 Assert(ReplicationSlotCtl != NULL);
1207
1208 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1209 for (i = 0; i < max_replication_slots; i++)
1210 {
1212 XLogRecPtr restart_lsn;
1213 XLogRecPtr last_saved_restart_lsn;
1214 bool invalidated;
1215 ReplicationSlotPersistency persistency;
1216
1217 if (!s->in_use)
1218 continue;
1219
1221 persistency = s->data.persistency;
1222 restart_lsn = s->data.restart_lsn;
1223 invalidated = s->data.invalidated != RS_INVAL_NONE;
1224 last_saved_restart_lsn = s->last_saved_restart_lsn;
1226
1227 /* invalidated slots need not apply */
1228 if (invalidated)
1229 continue;
1230
1231 /*
1232 * For persistent slot use last_saved_restart_lsn to compute the
1233 * oldest LSN for removal of WAL segments. The segments between
1234 * last_saved_restart_lsn and restart_lsn might be needed by a
1235 * persistent slot in the case of database crash. Non-persistent
1236 * slots can't survive the database crash, so we don't care about
1237 * last_saved_restart_lsn for them.
1238 */
1239 if (persistency == RS_PERSISTENT)
1240 {
1241 if (last_saved_restart_lsn != InvalidXLogRecPtr &&
1242 restart_lsn > last_saved_restart_lsn)
1243 {
1244 restart_lsn = last_saved_restart_lsn;
1245 }
1246 }
1247
1248 if (restart_lsn != InvalidXLogRecPtr &&
1249 (min_required == InvalidXLogRecPtr ||
1250 restart_lsn < min_required))
1251 min_required = restart_lsn;
1252 }
1253 LWLockRelease(ReplicationSlotControlLock);
1254
1256}
1257
1258/*
1259 * Compute the oldest WAL LSN required by *logical* decoding slots..
1260 *
1261 * Returns InvalidXLogRecPtr if logical decoding is disabled or no logical
1262 * slots exist.
1263 *
1264 * NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it
1265 * ignores physical replication slots.
1266 *
1267 * The results aren't required frequently, so we don't maintain a precomputed
1268 * value like we do for ComputeRequiredLSN() and ComputeRequiredXmin().
1269 */
1272{
1274 int i;
1275
1276 if (max_replication_slots <= 0)
1277 return InvalidXLogRecPtr;
1278
1279 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1280
1281 for (i = 0; i < max_replication_slots; i++)
1282 {
1283 ReplicationSlot *s;
1284 XLogRecPtr restart_lsn;
1285 XLogRecPtr last_saved_restart_lsn;
1286 bool invalidated;
1287 ReplicationSlotPersistency persistency;
1288
1290
1291 /* cannot change while ReplicationSlotCtlLock is held */
1292 if (!s->in_use)
1293 continue;
1294
1295 /* we're only interested in logical slots */
1296 if (!SlotIsLogical(s))
1297 continue;
1298
1299 /* read once, it's ok if it increases while we're checking */
1301 persistency = s->data.persistency;
1302 restart_lsn = s->data.restart_lsn;
1303 invalidated = s->data.invalidated != RS_INVAL_NONE;
1304 last_saved_restart_lsn = s->last_saved_restart_lsn;
1306
1307 /* invalidated slots need not apply */
1308 if (invalidated)
1309 continue;
1310
1311 /*
1312 * For persistent slot use last_saved_restart_lsn to compute the
1313 * oldest LSN for removal of WAL segments. The segments between
1314 * last_saved_restart_lsn and restart_lsn might be needed by a
1315 * persistent slot in the case of database crash. Non-persistent
1316 * slots can't survive the database crash, so we don't care about
1317 * last_saved_restart_lsn for them.
1318 */
1319 if (persistency == RS_PERSISTENT)
1320 {
1321 if (last_saved_restart_lsn != InvalidXLogRecPtr &&
1322 restart_lsn > last_saved_restart_lsn)
1323 {
1324 restart_lsn = last_saved_restart_lsn;
1325 }
1326 }
1327
1328 if (restart_lsn == InvalidXLogRecPtr)
1329 continue;
1330
1331 if (result == InvalidXLogRecPtr ||
1332 restart_lsn < result)
1333 result = restart_lsn;
1334 }
1335
1336 LWLockRelease(ReplicationSlotControlLock);
1337
1338 return result;
1339}
1340
1341/*
1342 * ReplicationSlotsCountDBSlots -- count the number of slots that refer to the
1343 * passed database oid.
1344 *
1345 * Returns true if there are any slots referencing the database. *nslots will
1346 * be set to the absolute number of slots in the database, *nactive to ones
1347 * currently active.
1348 */
1349bool
1350ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
1351{
1352 int i;
1353
1354 *nslots = *nactive = 0;
1355
1356 if (max_replication_slots <= 0)
1357 return false;
1358
1359 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1360 for (i = 0; i < max_replication_slots; i++)
1361 {
1362 ReplicationSlot *s;
1363
1365
1366 /* cannot change while ReplicationSlotCtlLock is held */
1367 if (!s->in_use)
1368 continue;
1369
1370 /* only logical slots are database specific, skip */
1371 if (!SlotIsLogical(s))
1372 continue;
1373
1374 /* not our database, skip */
1375 if (s->data.database != dboid)
1376 continue;
1377
1378 /* NB: intentionally counting invalidated slots */
1379
1380 /* count slots with spinlock held */
1382 (*nslots)++;
1383 if (s->active_pid != 0)
1384 (*nactive)++;
1386 }
1387 LWLockRelease(ReplicationSlotControlLock);
1388
1389 if (*nslots > 0)
1390 return true;
1391 return false;
1392}
1393
1394/*
1395 * ReplicationSlotsDropDBSlots -- Drop all db-specific slots relating to the
1396 * passed database oid. The caller should hold an exclusive lock on the
1397 * pg_database oid for the database to prevent creation of new slots on the db
1398 * or replay from existing slots.
1399 *
1400 * Another session that concurrently acquires an existing slot on the target DB
1401 * (most likely to drop it) may cause this function to ERROR. If that happens
1402 * it may have dropped some but not all slots.
1403 *
1404 * This routine isn't as efficient as it could be - but we don't drop
1405 * databases often, especially databases with lots of slots.
1406 */
1407void
1409{
1410 int i;
1411
1412 if (max_replication_slots <= 0)
1413 return;
1414
1415restart:
1416 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1417 for (i = 0; i < max_replication_slots; i++)
1418 {
1419 ReplicationSlot *s;
1420 char *slotname;
1421 int active_pid;
1422
1424
1425 /* cannot change while ReplicationSlotCtlLock is held */
1426 if (!s->in_use)
1427 continue;
1428
1429 /* only logical slots are database specific, skip */
1430 if (!SlotIsLogical(s))
1431 continue;
1432
1433 /* not our database, skip */
1434 if (s->data.database != dboid)
1435 continue;
1436
1437 /* NB: intentionally including invalidated slots */
1438
1439 /* acquire slot, so ReplicationSlotDropAcquired can be reused */
1441 /* can't change while ReplicationSlotControlLock is held */
1442 slotname = NameStr(s->data.name);
1443 active_pid = s->active_pid;
1444 if (active_pid == 0)
1445 {
1447 s->active_pid = MyProcPid;
1448 }
1450
1451 /*
1452 * Even though we hold an exclusive lock on the database object a
1453 * logical slot for that DB can still be active, e.g. if it's
1454 * concurrently being dropped by a backend connected to another DB.
1455 *
1456 * That's fairly unlikely in practice, so we'll just bail out.
1457 *
1458 * The slot sync worker holds a shared lock on the database before
1459 * operating on synced logical slots to avoid conflict with the drop
1460 * happening here. The persistent synced slots are thus safe but there
1461 * is a possibility that the slot sync worker has created a temporary
1462 * slot (which stays active even on release) and we are trying to drop
1463 * that here. In practice, the chances of hitting this scenario are
1464 * less as during slot synchronization, the temporary slot is
1465 * immediately converted to persistent and thus is safe due to the
1466 * shared lock taken on the database. So, we'll just bail out in such
1467 * a case.
1468 *
1469 * XXX: We can consider shutting down the slot sync worker before
1470 * trying to drop synced temporary slots here.
1471 */
1472 if (active_pid)
1473 ereport(ERROR,
1474 (errcode(ERRCODE_OBJECT_IN_USE),
1475 errmsg("replication slot \"%s\" is active for PID %d",
1476 slotname, active_pid)));
1477
1478 /*
1479 * To avoid duplicating ReplicationSlotDropAcquired() and to avoid
1480 * holding ReplicationSlotControlLock over filesystem operations,
1481 * release ReplicationSlotControlLock and use
1482 * ReplicationSlotDropAcquired.
1483 *
1484 * As that means the set of slots could change, restart scan from the
1485 * beginning each time we release the lock.
1486 */
1487 LWLockRelease(ReplicationSlotControlLock);
1489 goto restart;
1490 }
1491 LWLockRelease(ReplicationSlotControlLock);
1492}
1493
1494
1495/*
1496 * Check whether the server's configuration supports using replication
1497 * slots.
1498 */
1499void
1501{
1502 /*
1503 * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
1504 * needs the same check.
1505 */
1506
1507 if (max_replication_slots == 0)
1508 ereport(ERROR,
1509 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1510 errmsg("replication slots can only be used if \"max_replication_slots\" > 0")));
1511
1513 ereport(ERROR,
1514 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1515 errmsg("replication slots can only be used if \"wal_level\" >= \"replica\"")));
1516}
1517
1518/*
1519 * Check whether the user has privilege to use replication slots.
1520 */
1521void
1523{
1525 ereport(ERROR,
1526 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1527 errmsg("permission denied to use replication slots"),
1528 errdetail("Only roles with the %s attribute may use replication slots.",
1529 "REPLICATION")));
1530}
1531
1532/*
1533 * Reserve WAL for the currently active slot.
1534 *
1535 * Compute and set restart_lsn in a manner that's appropriate for the type of
1536 * the slot and concurrency safe.
1537 */
1538void
1540{
1542
1543 Assert(slot != NULL);
1546
1547 /*
1548 * The replication slot mechanism is used to prevent removal of required
1549 * WAL. As there is no interlock between this routine and checkpoints, WAL
1550 * segments could concurrently be removed when a now stale return value of
1551 * ReplicationSlotsComputeRequiredLSN() is used. In the unlikely case that
1552 * this happens we'll just retry.
1553 */
1554 while (true)
1555 {
1556 XLogSegNo segno;
1557 XLogRecPtr restart_lsn;
1558
1559 /*
1560 * For logical slots log a standby snapshot and start logical decoding
1561 * at exactly that position. That allows the slot to start up more
1562 * quickly. But on a standby we cannot do WAL writes, so just use the
1563 * replay pointer; effectively, an attempt to create a logical slot on
1564 * standby will cause it to wait for an xl_running_xact record to be
1565 * logged independently on the primary, so that a snapshot can be
1566 * built using the record.
1567 *
1568 * None of this is needed (or indeed helpful) for physical slots as
1569 * they'll start replay at the last logged checkpoint anyway. Instead
1570 * return the location of the last redo LSN. While that slightly
1571 * increases the chance that we have to retry, it's where a base
1572 * backup has to start replay at.
1573 */
1574 if (SlotIsPhysical(slot))
1575 restart_lsn = GetRedoRecPtr();
1576 else if (RecoveryInProgress())
1577 restart_lsn = GetXLogReplayRecPtr(NULL);
1578 else
1579 restart_lsn = GetXLogInsertRecPtr();
1580
1581 SpinLockAcquire(&slot->mutex);
1582 slot->data.restart_lsn = restart_lsn;
1583 SpinLockRelease(&slot->mutex);
1584
1585 /* prevent WAL removal as fast as possible */
1587
1588 /*
1589 * If all required WAL is still there, great, otherwise retry. The
1590 * slot should prevent further removal of WAL, unless there's a
1591 * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
1592 * the new restart_lsn above, so normally we should never need to loop
1593 * more than twice.
1594 */
1596 if (XLogGetLastRemovedSegno() < segno)
1597 break;
1598 }
1599
1600 if (!RecoveryInProgress() && SlotIsLogical(slot))
1601 {
1602 XLogRecPtr flushptr;
1603
1604 /* make sure we have enough information to start */
1605 flushptr = LogStandbySnapshot();
1606
1607 /* and make sure it's fsynced to disk */
1608 XLogFlush(flushptr);
1609 }
1610}
1611
1612/*
1613 * Report that replication slot needs to be invalidated
1614 */
1615static void
1617 bool terminating,
1618 int pid,
1619 NameData slotname,
1620 XLogRecPtr restart_lsn,
1621 XLogRecPtr oldestLSN,
1622 TransactionId snapshotConflictHorizon,
1623 long slot_idle_seconds)
1624{
1625 StringInfoData err_detail;
1626 StringInfoData err_hint;
1627
1628 initStringInfo(&err_detail);
1629 initStringInfo(&err_hint);
1630
1631 switch (cause)
1632 {
1634 {
1635 uint64 ex = oldestLSN - restart_lsn;
1636
1637 appendStringInfo(&err_detail,
1638 ngettext("The slot's restart_lsn %X/%08X exceeds the limit by %" PRIu64 " byte.",
1639 "The slot's restart_lsn %X/%08X exceeds the limit by %" PRIu64 " bytes.",
1640 ex),
1641 LSN_FORMAT_ARGS(restart_lsn),
1642 ex);
1643 /* translator: %s is a GUC variable name */
1644 appendStringInfo(&err_hint, _("You might need to increase \"%s\"."),
1645 "max_slot_wal_keep_size");
1646 break;
1647 }
1648 case RS_INVAL_HORIZON:
1649 appendStringInfo(&err_detail, _("The slot conflicted with xid horizon %u."),
1650 snapshotConflictHorizon);
1651 break;
1652
1653 case RS_INVAL_WAL_LEVEL:
1654 appendStringInfoString(&err_detail, _("Logical decoding on standby requires \"wal_level\" >= \"logical\" on the primary server."));
1655 break;
1656
1658 {
1659 /* translator: %s is a GUC variable name */
1660 appendStringInfo(&err_detail, _("The slot's idle time of %lds exceeds the configured \"%s\" duration of %ds."),
1661 slot_idle_seconds, "idle_replication_slot_timeout",
1663 /* translator: %s is a GUC variable name */
1664 appendStringInfo(&err_hint, _("You might need to increase \"%s\"."),
1665 "idle_replication_slot_timeout");
1666 break;
1667 }
1668 case RS_INVAL_NONE:
1670 }
1671
1672 ereport(LOG,
1673 terminating ?
1674 errmsg("terminating process %d to release replication slot \"%s\"",
1675 pid, NameStr(slotname)) :
1676 errmsg("invalidating obsolete replication slot \"%s\"",
1677 NameStr(slotname)),
1678 errdetail_internal("%s", err_detail.data),
1679 err_hint.len ? errhint("%s", err_hint.data) : 0);
1680
1681 pfree(err_detail.data);
1682 pfree(err_hint.data);
1683}
1684
1685/*
1686 * Can we invalidate an idle replication slot?
1687 *
1688 * Idle timeout invalidation is allowed only when:
1689 *
1690 * 1. Idle timeout is set
1691 * 2. Slot has reserved WAL
1692 * 3. Slot is inactive
1693 * 4. The slot is not being synced from the primary while the server is in
1694 * recovery. This is because synced slots are always considered to be
1695 * inactive because they don't perform logical decoding to produce changes.
1696 */
1697static inline bool
1699{
1702 s->inactive_since > 0 &&
1703 !(RecoveryInProgress() && s->data.synced));
1704}
1705
1706/*
1707 * DetermineSlotInvalidationCause - Determine the cause for which a slot
1708 * becomes invalid among the given possible causes.
1709 *
1710 * This function sequentially checks all possible invalidation causes and
1711 * returns the first one for which the slot is eligible for invalidation.
1712 */
1715 XLogRecPtr oldestLSN, Oid dboid,
1716 TransactionId snapshotConflictHorizon,
1717 TransactionId initial_effective_xmin,
1718 TransactionId initial_catalog_effective_xmin,
1719 XLogRecPtr initial_restart_lsn,
1720 TimestampTz *inactive_since, TimestampTz now)
1721{
1722 Assert(possible_causes != RS_INVAL_NONE);
1723
1724 if (possible_causes & RS_INVAL_WAL_REMOVED)
1725 {
1726 if (initial_restart_lsn != InvalidXLogRecPtr &&
1727 initial_restart_lsn < oldestLSN)
1728 return RS_INVAL_WAL_REMOVED;
1729 }
1730
1731 if (possible_causes & RS_INVAL_HORIZON)
1732 {
1733 /* invalid DB oid signals a shared relation */
1734 if (SlotIsLogical(s) &&
1735 (dboid == InvalidOid || dboid == s->data.database))
1736 {
1737 if (TransactionIdIsValid(initial_effective_xmin) &&
1738 TransactionIdPrecedesOrEquals(initial_effective_xmin,
1739 snapshotConflictHorizon))
1740 return RS_INVAL_HORIZON;
1741 else if (TransactionIdIsValid(initial_catalog_effective_xmin) &&
1742 TransactionIdPrecedesOrEquals(initial_catalog_effective_xmin,
1743 snapshotConflictHorizon))
1744 return RS_INVAL_HORIZON;
1745 }
1746 }
1747
1748 if (possible_causes & RS_INVAL_WAL_LEVEL)
1749 {
1750 if (SlotIsLogical(s))
1751 return RS_INVAL_WAL_LEVEL;
1752 }
1753
1754 if (possible_causes & RS_INVAL_IDLE_TIMEOUT)
1755 {
1756 Assert(now > 0);
1757
1758 if (CanInvalidateIdleSlot(s))
1759 {
1760 /*
1761 * Simulate the invalidation due to idle_timeout to test the
1762 * timeout behavior promptly, without waiting for it to trigger
1763 * naturally.
1764 */
1765#ifdef USE_INJECTION_POINTS
1766 if (IS_INJECTION_POINT_ATTACHED("slot-timeout-inval"))
1767 {
1768 *inactive_since = 0; /* since the beginning of time */
1769 return RS_INVAL_IDLE_TIMEOUT;
1770 }
1771#endif
1772
1773 /*
1774 * Check if the slot needs to be invalidated due to
1775 * idle_replication_slot_timeout GUC.
1776 */
1779 {
1780 *inactive_since = s->inactive_since;
1781 return RS_INVAL_IDLE_TIMEOUT;
1782 }
1783 }
1784 }
1785
1786 return RS_INVAL_NONE;
1787}
1788
1789/*
1790 * Helper for InvalidateObsoleteReplicationSlots
1791 *
1792 * Acquires the given slot and mark it invalid, if necessary and possible.
1793 *
1794 * Returns whether ReplicationSlotControlLock was released in the interim (and
1795 * in that case we're not holding the lock at return, otherwise we are).
1796 *
1797 * Sets *invalidated true if the slot was invalidated. (Untouched otherwise.)
1798 *
1799 * This is inherently racy, because we release the LWLock
1800 * for syscalls, so caller must restart if we return true.
1801 */
1802static bool
1804 ReplicationSlot *s,
1805 XLogRecPtr oldestLSN,
1806 Oid dboid, TransactionId snapshotConflictHorizon,
1807 bool *invalidated)
1808{
1809 int last_signaled_pid = 0;
1810 bool released_lock = false;
1811 bool terminated = false;
1812 TransactionId initial_effective_xmin = InvalidTransactionId;
1813 TransactionId initial_catalog_effective_xmin = InvalidTransactionId;
1814 XLogRecPtr initial_restart_lsn = InvalidXLogRecPtr;
1816 TimestampTz inactive_since = 0;
1817
1818 for (;;)
1819 {
1820 XLogRecPtr restart_lsn;
1821 NameData slotname;
1822 int active_pid = 0;
1824 TimestampTz now = 0;
1825 long slot_idle_secs = 0;
1826
1827 Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock, LW_SHARED));
1828
1829 if (!s->in_use)
1830 {
1831 if (released_lock)
1832 LWLockRelease(ReplicationSlotControlLock);
1833 break;
1834 }
1835
1836 if (possible_causes & RS_INVAL_IDLE_TIMEOUT)
1837 {
1838 /*
1839 * Assign the current time here to avoid system call overhead
1840 * while holding the spinlock in subsequent code.
1841 */
1843 }
1844
1845 /*
1846 * Check if the slot needs to be invalidated. If it needs to be
1847 * invalidated, and is not currently acquired, acquire it and mark it
1848 * as having been invalidated. We do this with the spinlock held to
1849 * avoid race conditions -- for example the restart_lsn could move
1850 * forward, or the slot could be dropped.
1851 */
1853
1854 restart_lsn = s->data.restart_lsn;
1855
1856 /* we do nothing if the slot is already invalid */
1857 if (s->data.invalidated == RS_INVAL_NONE)
1858 {
1859 /*
1860 * The slot's mutex will be released soon, and it is possible that
1861 * those values change since the process holding the slot has been
1862 * terminated (if any), so record them here to ensure that we
1863 * would report the correct invalidation cause.
1864 *
1865 * Unlike other slot attributes, slot's inactive_since can't be
1866 * changed until the acquired slot is released or the owning
1867 * process is terminated. So, the inactive slot can only be
1868 * invalidated immediately without being terminated.
1869 */
1870 if (!terminated)
1871 {
1872 initial_restart_lsn = s->data.restart_lsn;
1873 initial_effective_xmin = s->effective_xmin;
1874 initial_catalog_effective_xmin = s->effective_catalog_xmin;
1875 }
1876
1877 invalidation_cause = DetermineSlotInvalidationCause(possible_causes,
1878 s, oldestLSN,
1879 dboid,
1880 snapshotConflictHorizon,
1881 initial_effective_xmin,
1882 initial_catalog_effective_xmin,
1883 initial_restart_lsn,
1884 &inactive_since,
1885 now);
1886 }
1887
1888 /*
1889 * The invalidation cause recorded previously should not change while
1890 * the process owning the slot (if any) has been terminated.
1891 */
1892 Assert(!(invalidation_cause_prev != RS_INVAL_NONE && terminated &&
1893 invalidation_cause_prev != invalidation_cause));
1894
1895 /* if there's no invalidation, we're done */
1896 if (invalidation_cause == RS_INVAL_NONE)
1897 {
1899 if (released_lock)
1900 LWLockRelease(ReplicationSlotControlLock);
1901 break;
1902 }
1903
1904 slotname = s->data.name;
1905 active_pid = s->active_pid;
1906
1907 /*
1908 * If the slot can be acquired, do so and mark it invalidated
1909 * immediately. Otherwise we'll signal the owning process, below, and
1910 * retry.
1911 */
1912 if (active_pid == 0)
1913 {
1915 s->active_pid = MyProcPid;
1916 s->data.invalidated = invalidation_cause;
1917
1918 /*
1919 * XXX: We should consider not overwriting restart_lsn and instead
1920 * just rely on .invalidated.
1921 */
1922 if (invalidation_cause == RS_INVAL_WAL_REMOVED)
1923 {
1926 }
1927
1928 /* Let caller know */
1929 *invalidated = true;
1930 }
1931
1933
1934 /*
1935 * Calculate the idle time duration of the slot if slot is marked
1936 * invalidated with RS_INVAL_IDLE_TIMEOUT.
1937 */
1938 if (invalidation_cause == RS_INVAL_IDLE_TIMEOUT)
1939 {
1940 int slot_idle_usecs;
1941
1942 TimestampDifference(inactive_since, now, &slot_idle_secs,
1943 &slot_idle_usecs);
1944 }
1945
1946 if (active_pid != 0)
1947 {
1948 /*
1949 * Prepare the sleep on the slot's condition variable before
1950 * releasing the lock, to close a possible race condition if the
1951 * slot is released before the sleep below.
1952 */
1954
1955 LWLockRelease(ReplicationSlotControlLock);
1956 released_lock = true;
1957
1958 /*
1959 * Signal to terminate the process that owns the slot, if we
1960 * haven't already signalled it. (Avoidance of repeated
1961 * signalling is the only reason for there to be a loop in this
1962 * routine; otherwise we could rely on caller's restart loop.)
1963 *
1964 * There is the race condition that other process may own the slot
1965 * after its current owner process is terminated and before this
1966 * process owns it. To handle that, we signal only if the PID of
1967 * the owning process has changed from the previous time. (This
1968 * logic assumes that the same PID is not reused very quickly.)
1969 */
1970 if (last_signaled_pid != active_pid)
1971 {
1972 ReportSlotInvalidation(invalidation_cause, true, active_pid,
1973 slotname, restart_lsn,
1974 oldestLSN, snapshotConflictHorizon,
1975 slot_idle_secs);
1976
1977 if (MyBackendType == B_STARTUP)
1978 (void) SendProcSignal(active_pid,
1981 else
1982 (void) kill(active_pid, SIGTERM);
1983
1984 last_signaled_pid = active_pid;
1985 terminated = true;
1986 invalidation_cause_prev = invalidation_cause;
1987 }
1988
1989 /* Wait until the slot is released. */
1991 WAIT_EVENT_REPLICATION_SLOT_DROP);
1992
1993 /*
1994 * Re-acquire lock and start over; we expect to invalidate the
1995 * slot next time (unless another process acquires the slot in the
1996 * meantime).
1997 */
1998 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1999 continue;
2000 }
2001 else
2002 {
2003 /*
2004 * We hold the slot now and have already invalidated it; flush it
2005 * to ensure that state persists.
2006 *
2007 * Don't want to hold ReplicationSlotControlLock across file
2008 * system operations, so release it now but be sure to tell caller
2009 * to restart from scratch.
2010 */
2011 LWLockRelease(ReplicationSlotControlLock);
2012 released_lock = true;
2013
2014 /* Make sure the invalidated state persists across server restart */
2018
2019 ReportSlotInvalidation(invalidation_cause, false, active_pid,
2020 slotname, restart_lsn,
2021 oldestLSN, snapshotConflictHorizon,
2022 slot_idle_secs);
2023
2024 /* done with this slot for now */
2025 break;
2026 }
2027 }
2028
2029 Assert(released_lock == !LWLockHeldByMe(ReplicationSlotControlLock));
2030
2031 return released_lock;
2032}
2033
2034/*
2035 * Invalidate slots that require resources about to be removed.
2036 *
2037 * Returns true when any slot have got invalidated.
2038 *
2039 * Whether a slot needs to be invalidated depends on the invalidation cause.
2040 * A slot is invalidated if it:
2041 * - RS_INVAL_WAL_REMOVED: requires a LSN older than the given segment
2042 * - RS_INVAL_HORIZON: requires a snapshot <= the given horizon in the given
2043 * db; dboid may be InvalidOid for shared relations
2044 * - RS_INVAL_WAL_LEVEL: is logical and wal_level is insufficient
2045 * - RS_INVAL_IDLE_TIMEOUT: has been idle longer than the configured
2046 * "idle_replication_slot_timeout" duration.
2047 *
2048 * Note: This function attempts to invalidate the slot for multiple possible
2049 * causes in a single pass, minimizing redundant iterations. The "cause"
2050 * parameter can be a MASK representing one or more of the defined causes.
2051 *
2052 * NB - this runs as part of checkpoint, so avoid raising errors if possible.
2053 */
2054bool
2056 XLogSegNo oldestSegno, Oid dboid,
2057 TransactionId snapshotConflictHorizon)
2058{
2059 XLogRecPtr oldestLSN;
2060 bool invalidated = false;
2061
2062 Assert(!(possible_causes & RS_INVAL_HORIZON) || TransactionIdIsValid(snapshotConflictHorizon));
2063 Assert(!(possible_causes & RS_INVAL_WAL_REMOVED) || oldestSegno > 0);
2064 Assert(possible_causes != RS_INVAL_NONE);
2065
2066 if (max_replication_slots == 0)
2067 return invalidated;
2068
2069 XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
2070
2071restart:
2072 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2073 for (int i = 0; i < max_replication_slots; i++)
2074 {
2076
2077 if (!s->in_use)
2078 continue;
2079
2080 /* Prevent invalidation of logical slots during binary upgrade */
2082 continue;
2083
2084 if (InvalidatePossiblyObsoleteSlot(possible_causes, s, oldestLSN, dboid,
2085 snapshotConflictHorizon,
2086 &invalidated))
2087 {
2088 /* if the lock was released, start from scratch */
2089 goto restart;
2090 }
2091 }
2092 LWLockRelease(ReplicationSlotControlLock);
2093
2094 /*
2095 * If any slots have been invalidated, recalculate the resource limits.
2096 */
2097 if (invalidated)
2098 {
2101 }
2102
2103 return invalidated;
2104}
2105
2106/*
2107 * Flush all replication slots to disk.
2108 *
2109 * It is convenient to flush dirty replication slots at the time of checkpoint.
2110 * Additionally, in case of a shutdown checkpoint, we also identify the slots
2111 * for which the confirmed_flush LSN has been updated since the last time it
2112 * was saved and flush them.
2113 */
2114void
2116{
2117 int i;
2118 bool last_saved_restart_lsn_updated = false;
2119
2120 elog(DEBUG1, "performing replication slot checkpoint");
2121
2122 /*
2123 * Prevent any slot from being created/dropped while we're active. As we
2124 * explicitly do *not* want to block iterating over replication_slots or
2125 * acquiring a slot we cannot take the control lock - but that's OK,
2126 * because holding ReplicationSlotAllocationLock is strictly stronger, and
2127 * enough to guarantee that nobody can change the in_use bits on us.
2128 */
2129 LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
2130
2131 for (i = 0; i < max_replication_slots; i++)
2132 {
2134 char path[MAXPGPATH];
2135
2136 if (!s->in_use)
2137 continue;
2138
2139 /* save the slot to disk, locking is handled in SaveSlotToPath() */
2140 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, NameStr(s->data.name));
2141
2142 /*
2143 * Slot's data is not flushed each time the confirmed_flush LSN is
2144 * updated as that could lead to frequent writes. However, we decide
2145 * to force a flush of all logical slot's data at the time of shutdown
2146 * if the confirmed_flush LSN is changed since we last flushed it to
2147 * disk. This helps in avoiding an unnecessary retreat of the
2148 * confirmed_flush LSN after restart.
2149 */
2150 if (is_shutdown && SlotIsLogical(s))
2151 {
2153
2154 if (s->data.invalidated == RS_INVAL_NONE &&
2156 {
2157 s->just_dirtied = true;
2158 s->dirty = true;
2159 }
2161 }
2162
2163 /*
2164 * Track if we're going to update slot's last_saved_restart_lsn. We
2165 * need this to know if we need to recompute the required LSN.
2166 */
2168 last_saved_restart_lsn_updated = true;
2169
2170 SaveSlotToPath(s, path, LOG);
2171 }
2172 LWLockRelease(ReplicationSlotAllocationLock);
2173
2174 /*
2175 * Recompute the required LSN if SaveSlotToPath() updated
2176 * last_saved_restart_lsn for any slot.
2177 */
2178 if (last_saved_restart_lsn_updated)
2180}
2181
2182/*
2183 * Load all replication slots from disk into memory at server startup. This
2184 * needs to be run before we start crash recovery.
2185 */
2186void
2188{
2189 DIR *replication_dir;
2190 struct dirent *replication_de;
2191
2192 elog(DEBUG1, "starting up replication slots");
2193
2194 /* restore all slots by iterating over all on-disk entries */
2195 replication_dir = AllocateDir(PG_REPLSLOT_DIR);
2196 while ((replication_de = ReadDir(replication_dir, PG_REPLSLOT_DIR)) != NULL)
2197 {
2198 char path[MAXPGPATH + sizeof(PG_REPLSLOT_DIR)];
2199 PGFileType de_type;
2200
2201 if (strcmp(replication_de->d_name, ".") == 0 ||
2202 strcmp(replication_de->d_name, "..") == 0)
2203 continue;
2204
2205 snprintf(path, sizeof(path), "%s/%s", PG_REPLSLOT_DIR, replication_de->d_name);
2206 de_type = get_dirent_type(path, replication_de, false, DEBUG1);
2207
2208 /* we're only creating directories here, skip if it's not our's */
2209 if (de_type != PGFILETYPE_ERROR && de_type != PGFILETYPE_DIR)
2210 continue;
2211
2212 /* we crashed while a slot was being setup or deleted, clean up */
2213 if (pg_str_endswith(replication_de->d_name, ".tmp"))
2214 {
2215 if (!rmtree(path, true))
2216 {
2218 (errmsg("could not remove directory \"%s\"",
2219 path)));
2220 continue;
2221 }
2223 continue;
2224 }
2225
2226 /* looks like a slot in a normal state, restore */
2227 RestoreSlotFromDisk(replication_de->d_name);
2228 }
2229 FreeDir(replication_dir);
2230
2231 /* currently no slots exist, we're done. */
2232 if (max_replication_slots <= 0)
2233 return;
2234
2235 /* Now that we have recovered all the data, compute replication xmin */
2238}
2239
2240/* ----
2241 * Manipulation of on-disk state of replication slots
2242 *
2243 * NB: none of the routines below should take any notice whether a slot is the
2244 * current one or not, that's all handled a layer above.
2245 * ----
2246 */
2247static void
2249{
2250 char tmppath[MAXPGPATH];
2251 char path[MAXPGPATH];
2252 struct stat st;
2253
2254 /*
2255 * No need to take out the io_in_progress_lock, nobody else can see this
2256 * slot yet, so nobody else will write. We're reusing SaveSlotToPath which
2257 * takes out the lock, if we'd take the lock here, we'd deadlock.
2258 */
2259
2260 sprintf(path, "%s/%s", PG_REPLSLOT_DIR, NameStr(slot->data.name));
2261 sprintf(tmppath, "%s/%s.tmp", PG_REPLSLOT_DIR, NameStr(slot->data.name));
2262
2263 /*
2264 * It's just barely possible that some previous effort to create or drop a
2265 * slot with this name left a temp directory lying around. If that seems
2266 * to be the case, try to remove it. If the rmtree() fails, we'll error
2267 * out at the MakePGDirectory() below, so we don't bother checking
2268 * success.
2269 */
2270 if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
2271 rmtree(tmppath, true);
2272
2273 /* Create and fsync the temporary slot directory. */
2274 if (MakePGDirectory(tmppath) < 0)
2275 ereport(ERROR,
2277 errmsg("could not create directory \"%s\": %m",
2278 tmppath)));
2279 fsync_fname(tmppath, true);
2280
2281 /* Write the actual state file. */
2282 slot->dirty = true; /* signal that we really need to write */
2283 SaveSlotToPath(slot, tmppath, ERROR);
2284
2285 /* Rename the directory into place. */
2286 if (rename(tmppath, path) != 0)
2287 ereport(ERROR,
2289 errmsg("could not rename file \"%s\" to \"%s\": %m",
2290 tmppath, path)));
2291
2292 /*
2293 * If we'd now fail - really unlikely - we wouldn't know whether this slot
2294 * would persist after an OS crash or not - so, force a restart. The
2295 * restart would try to fsync this again till it works.
2296 */
2298
2299 fsync_fname(path, true);
2301
2303}
2304
2305/*
2306 * Shared functionality between saving and creating a replication slot.
2307 */
2308static void
2309SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
2310{
2311 char tmppath[MAXPGPATH];
2312 char path[MAXPGPATH];
2313 int fd;
2315 bool was_dirty;
2316
2317 /* first check whether there's something to write out */
2318 SpinLockAcquire(&slot->mutex);
2319 was_dirty = slot->dirty;
2320 slot->just_dirtied = false;
2321 SpinLockRelease(&slot->mutex);
2322
2323 /* and don't do anything if there's nothing to write */
2324 if (!was_dirty)
2325 return;
2326
2328
2329 /* silence valgrind :( */
2330 memset(&cp, 0, sizeof(ReplicationSlotOnDisk));
2331
2332 sprintf(tmppath, "%s/state.tmp", dir);
2333 sprintf(path, "%s/state", dir);
2334
2335 fd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
2336 if (fd < 0)
2337 {
2338 /*
2339 * If not an ERROR, then release the lock before returning. In case
2340 * of an ERROR, the error recovery path automatically releases the
2341 * lock, but no harm in explicitly releasing even in that case. Note
2342 * that LWLockRelease() could affect errno.
2343 */
2344 int save_errno = errno;
2345
2347 errno = save_errno;
2348 ereport(elevel,
2350 errmsg("could not create file \"%s\": %m",
2351 tmppath)));
2352 return;
2353 }
2354
2355 cp.magic = SLOT_MAGIC;
2357 cp.version = SLOT_VERSION;
2359
2360 SpinLockAcquire(&slot->mutex);
2361
2362 memcpy(&cp.slotdata, &slot->data, sizeof(ReplicationSlotPersistentData));
2363
2364 SpinLockRelease(&slot->mutex);
2365
2369 FIN_CRC32C(cp.checksum);
2370
2371 errno = 0;
2372 pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_WRITE);
2373 if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
2374 {
2375 int save_errno = errno;
2376
2380
2381 /* if write didn't set errno, assume problem is no disk space */
2382 errno = save_errno ? save_errno : ENOSPC;
2383 ereport(elevel,
2385 errmsg("could not write to file \"%s\": %m",
2386 tmppath)));
2387 return;
2388 }
2390
2391 /* fsync the temporary file */
2392 pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_SYNC);
2393 if (pg_fsync(fd) != 0)
2394 {
2395 int save_errno = errno;
2396
2400 errno = save_errno;
2401 ereport(elevel,
2403 errmsg("could not fsync file \"%s\": %m",
2404 tmppath)));
2405 return;
2406 }
2408
2409 if (CloseTransientFile(fd) != 0)
2410 {
2411 int save_errno = errno;
2412
2414 errno = save_errno;
2415 ereport(elevel,
2417 errmsg("could not close file \"%s\": %m",
2418 tmppath)));
2419 return;
2420 }
2421
2422 /* rename to permanent file, fsync file and directory */
2423 if (rename(tmppath, path) != 0)
2424 {
2425 int save_errno = errno;
2426
2428 errno = save_errno;
2429 ereport(elevel,
2431 errmsg("could not rename file \"%s\" to \"%s\": %m",
2432 tmppath, path)));
2433 return;
2434 }
2435
2436 /*
2437 * Check CreateSlotOnDisk() for the reasoning of using a critical section.
2438 */
2440
2441 fsync_fname(path, false);
2442 fsync_fname(dir, true);
2444
2446
2447 /*
2448 * Successfully wrote, unset dirty bit, unless somebody dirtied again
2449 * already and remember the confirmed_flush LSN value.
2450 */
2451 SpinLockAcquire(&slot->mutex);
2452 if (!slot->just_dirtied)
2453 slot->dirty = false;
2456 SpinLockRelease(&slot->mutex);
2457
2459}
2460
2461/*
2462 * Load a single slot from disk into memory.
2463 */
2464static void
2466{
2468 int i;
2469 char slotdir[MAXPGPATH + sizeof(PG_REPLSLOT_DIR)];
2470 char path[MAXPGPATH + sizeof(PG_REPLSLOT_DIR) + 10];
2471 int fd;
2472 bool restored = false;
2473 int readBytes;
2474 pg_crc32c checksum;
2475 TimestampTz now = 0;
2476
2477 /* no need to lock here, no concurrent access allowed yet */
2478
2479 /* delete temp file if it exists */
2480 sprintf(slotdir, "%s/%s", PG_REPLSLOT_DIR, name);
2481 sprintf(path, "%s/state.tmp", slotdir);
2482 if (unlink(path) < 0 && errno != ENOENT)
2483 ereport(PANIC,
2485 errmsg("could not remove file \"%s\": %m", path)));
2486
2487 sprintf(path, "%s/state", slotdir);
2488
2489 elog(DEBUG1, "restoring replication slot from \"%s\"", path);
2490
2491 /* on some operating systems fsyncing a file requires O_RDWR */
2492 fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
2493
2494 /*
2495 * We do not need to handle this as we are rename()ing the directory into
2496 * place only after we fsync()ed the state file.
2497 */
2498 if (fd < 0)
2499 ereport(PANIC,
2501 errmsg("could not open file \"%s\": %m", path)));
2502
2503 /*
2504 * Sync state file before we're reading from it. We might have crashed
2505 * while it wasn't synced yet and we shouldn't continue on that basis.
2506 */
2507 pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
2508 if (pg_fsync(fd) != 0)
2509 ereport(PANIC,
2511 errmsg("could not fsync file \"%s\": %m",
2512 path)));
2514
2515 /* Also sync the parent directory */
2517 fsync_fname(slotdir, true);
2519
2520 /* read part of statefile that's guaranteed to be version independent */
2521 pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2522 readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
2524 if (readBytes != ReplicationSlotOnDiskConstantSize)
2525 {
2526 if (readBytes < 0)
2527 ereport(PANIC,
2529 errmsg("could not read file \"%s\": %m", path)));
2530 else
2531 ereport(PANIC,
2533 errmsg("could not read file \"%s\": read %d of %zu",
2534 path, readBytes,
2536 }
2537
2538 /* verify magic */
2539 if (cp.magic != SLOT_MAGIC)
2540 ereport(PANIC,
2542 errmsg("replication slot file \"%s\" has wrong magic number: %u instead of %u",
2543 path, cp.magic, SLOT_MAGIC)));
2544
2545 /* verify version */
2546 if (cp.version != SLOT_VERSION)
2547 ereport(PANIC,
2549 errmsg("replication slot file \"%s\" has unsupported version %u",
2550 path, cp.version)));
2551
2552 /* boundary check on length */
2554 ereport(PANIC,
2556 errmsg("replication slot file \"%s\" has corrupted length %u",
2557 path, cp.length)));
2558
2559 /* Now that we know the size, read the entire file */
2560 pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2561 readBytes = read(fd,
2562 (char *) &cp + ReplicationSlotOnDiskConstantSize,
2563 cp.length);
2565 if (readBytes != cp.length)
2566 {
2567 if (readBytes < 0)
2568 ereport(PANIC,
2570 errmsg("could not read file \"%s\": %m", path)));
2571 else
2572 ereport(PANIC,
2574 errmsg("could not read file \"%s\": read %d of %zu",
2575 path, readBytes, (Size) cp.length)));
2576 }
2577
2578 if (CloseTransientFile(fd) != 0)
2579 ereport(PANIC,
2581 errmsg("could not close file \"%s\": %m", path)));
2582
2583 /* now verify the CRC */
2584 INIT_CRC32C(checksum);
2585 COMP_CRC32C(checksum,
2588 FIN_CRC32C(checksum);
2589
2590 if (!EQ_CRC32C(checksum, cp.checksum))
2591 ereport(PANIC,
2592 (errmsg("checksum mismatch for replication slot file \"%s\": is %u, should be %u",
2593 path, checksum, cp.checksum)));
2594
2595 /*
2596 * If we crashed with an ephemeral slot active, don't restore but delete
2597 * it.
2598 */
2600 {
2601 if (!rmtree(slotdir, true))
2602 {
2604 (errmsg("could not remove directory \"%s\"",
2605 slotdir)));
2606 }
2608 return;
2609 }
2610
2611 /*
2612 * Verify that requirements for the specific slot type are met. That's
2613 * important because if these aren't met we're not guaranteed to retain
2614 * all the necessary resources for the slot.
2615 *
2616 * NB: We have to do so *after* the above checks for ephemeral slots,
2617 * because otherwise a slot that shouldn't exist anymore could prevent
2618 * restarts.
2619 *
2620 * NB: Changing the requirements here also requires adapting
2621 * CheckSlotRequirements() and CheckLogicalDecodingRequirements().
2622 */
2623 if (cp.slotdata.database != InvalidOid)
2624 {
2626 ereport(FATAL,
2627 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2628 errmsg("logical replication slot \"%s\" exists, but \"wal_level\" < \"logical\"",
2629 NameStr(cp.slotdata.name)),
2630 errhint("Change \"wal_level\" to be \"logical\" or higher.")));
2631
2632 /*
2633 * In standby mode, the hot standby must be enabled. This check is
2634 * necessary to ensure logical slots are invalidated when they become
2635 * incompatible due to insufficient wal_level. Otherwise, if the
2636 * primary reduces wal_level < logical while hot standby is disabled,
2637 * logical slots would remain valid even after promotion.
2638 */
2640 ereport(FATAL,
2641 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2642 errmsg("logical replication slot \"%s\" exists on the standby, but \"hot_standby\" = \"off\"",
2643 NameStr(cp.slotdata.name)),
2644 errhint("Change \"hot_standby\" to be \"on\".")));
2645 }
2646 else if (wal_level < WAL_LEVEL_REPLICA)
2647 ereport(FATAL,
2648 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2649 errmsg("physical replication slot \"%s\" exists, but \"wal_level\" < \"replica\"",
2650 NameStr(cp.slotdata.name)),
2651 errhint("Change \"wal_level\" to be \"replica\" or higher.")));
2652
2653 /* nothing can be active yet, don't lock anything */
2654 for (i = 0; i < max_replication_slots; i++)
2655 {
2656 ReplicationSlot *slot;
2657
2659
2660 if (slot->in_use)
2661 continue;
2662
2663 /* restore the entire set of persistent data */
2664 memcpy(&slot->data, &cp.slotdata,
2666
2667 /* initialize in memory state */
2668 slot->effective_xmin = cp.slotdata.xmin;
2672
2677
2678 slot->in_use = true;
2679 slot->active_pid = 0;
2680
2681 /*
2682 * Set the time since the slot has become inactive after loading the
2683 * slot from the disk into memory. Whoever acquires the slot i.e.
2684 * makes the slot active will reset it. Use the same inactive_since
2685 * time for all the slots.
2686 */
2687 if (now == 0)
2689
2691
2692 restored = true;
2693 break;
2694 }
2695
2696 if (!restored)
2697 ereport(FATAL,
2698 (errmsg("too many replication slots active before shutdown"),
2699 errhint("Increase \"max_replication_slots\" and try again.")));
2700}
2701
2702/*
2703 * Maps an invalidation reason for a replication slot to
2704 * ReplicationSlotInvalidationCause.
2705 */
2707GetSlotInvalidationCause(const char *cause_name)
2708{
2709 Assert(cause_name);
2710
2711 /* Search lookup table for the cause having this name */
2712 for (int i = 0; i <= RS_INVAL_MAX_CAUSES; i++)
2713 {
2714 if (strcmp(SlotInvalidationCauses[i].cause_name, cause_name) == 0)
2716 }
2717
2718 Assert(false);
2719 return RS_INVAL_NONE; /* to keep compiler quiet */
2720}
2721
2722/*
2723 * Maps an ReplicationSlotInvalidationCause to the invalidation
2724 * reason for a replication slot.
2725 */
2726const char *
2728{
2729 /* Search lookup table for the name of this cause */
2730 for (int i = 0; i <= RS_INVAL_MAX_CAUSES; i++)
2731 {
2732 if (SlotInvalidationCauses[i].cause == cause)
2734 }
2735
2736 Assert(false);
2737 return "none"; /* to keep compiler quiet */
2738}
2739
2740/*
2741 * A helper function to validate slots specified in GUC synchronized_standby_slots.
2742 *
2743 * The rawname will be parsed, and the result will be saved into *elemlist.
2744 */
2745static bool
2746validate_sync_standby_slots(char *rawname, List **elemlist)
2747{
2748 bool ok;
2749
2750 /* Verify syntax and parse string into a list of identifiers */
2751 ok = SplitIdentifierString(rawname, ',', elemlist);
2752
2753 if (!ok)
2754 {
2755 GUC_check_errdetail("List syntax is invalid.");
2756 }
2757 else if (MyProc)
2758 {
2759 /*
2760 * Check that each specified slot exist and is physical.
2761 *
2762 * Because we need an LWLock, we cannot do this on processes without a
2763 * PGPROC, so we skip it there; but see comments in
2764 * StandbySlotsHaveCaughtup() as to why that's not a problem.
2765 */
2766 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2767
2768 foreach_ptr(char, name, *elemlist)
2769 {
2770 ReplicationSlot *slot;
2771
2772 slot = SearchNamedReplicationSlot(name, false);
2773
2774 if (!slot)
2775 {
2776 GUC_check_errdetail("Replication slot \"%s\" does not exist.",
2777 name);
2778 ok = false;
2779 break;
2780 }
2781
2782 if (!SlotIsPhysical(slot))
2783 {
2784 GUC_check_errdetail("\"%s\" is not a physical replication slot.",
2785 name);
2786 ok = false;
2787 break;
2788 }
2789 }
2790
2791 LWLockRelease(ReplicationSlotControlLock);
2792 }
2793
2794 return ok;
2795}
2796
2797/*
2798 * GUC check_hook for synchronized_standby_slots
2799 */
2800bool
2802{
2803 char *rawname;
2804 char *ptr;
2805 List *elemlist;
2806 int size;
2807 bool ok;
2809
2810 if ((*newval)[0] == '\0')
2811 return true;
2812
2813 /* Need a modifiable copy of the GUC string */
2814 rawname = pstrdup(*newval);
2815
2816 /* Now verify if the specified slots exist and have correct type */
2817 ok = validate_sync_standby_slots(rawname, &elemlist);
2818
2819 if (!ok || elemlist == NIL)
2820 {
2821 pfree(rawname);
2822 list_free(elemlist);
2823 return ok;
2824 }
2825
2826 /* Compute the size required for the SyncStandbySlotsConfigData struct */
2827 size = offsetof(SyncStandbySlotsConfigData, slot_names);
2828 foreach_ptr(char, slot_name, elemlist)
2829 size += strlen(slot_name) + 1;
2830
2831 /* GUC extra value must be guc_malloc'd, not palloc'd */
2832 config = (SyncStandbySlotsConfigData *) guc_malloc(LOG, size);
2833 if (!config)
2834 return false;
2835
2836 /* Transform the data into SyncStandbySlotsConfigData */
2837 config->nslotnames = list_length(elemlist);
2838
2839 ptr = config->slot_names;
2840 foreach_ptr(char, slot_name, elemlist)
2841 {
2842 strcpy(ptr, slot_name);
2843 ptr += strlen(slot_name) + 1;
2844 }
2845
2846 *extra = config;
2847
2848 pfree(rawname);
2849 list_free(elemlist);
2850 return true;
2851}
2852
2853/*
2854 * GUC assign_hook for synchronized_standby_slots
2855 */
2856void
2858{
2859 /*
2860 * The standby slots may have changed, so we must recompute the oldest
2861 * LSN.
2862 */
2864
2866}
2867
2868/*
2869 * Check if the passed slot_name is specified in the synchronized_standby_slots GUC.
2870 */
2871bool
2872SlotExistsInSyncStandbySlots(const char *slot_name)
2873{
2874 const char *standby_slot_name;
2875
2876 /* Return false if there is no value in synchronized_standby_slots */
2878 return false;
2879
2880 /*
2881 * XXX: We are not expecting this list to be long so a linear search
2882 * shouldn't hurt but if that turns out not to be true then we can cache
2883 * this information for each WalSender as well.
2884 */
2885 standby_slot_name = synchronized_standby_slots_config->slot_names;
2886 for (int i = 0; i < synchronized_standby_slots_config->nslotnames; i++)
2887 {
2888 if (strcmp(standby_slot_name, slot_name) == 0)
2889 return true;
2890
2891 standby_slot_name += strlen(standby_slot_name) + 1;
2892 }
2893
2894 return false;
2895}
2896
2897/*
2898 * Return true if the slots specified in synchronized_standby_slots have caught up to
2899 * the given WAL location, false otherwise.
2900 *
2901 * The elevel parameter specifies the error level used for logging messages
2902 * related to slots that do not exist, are invalidated, or are inactive.
2903 */
2904bool
2905StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
2906{
2907 const char *name;
2908 int caught_up_slot_num = 0;
2909 XLogRecPtr min_restart_lsn = InvalidXLogRecPtr;
2910
2911 /*
2912 * Don't need to wait for the standbys to catch up if there is no value in
2913 * synchronized_standby_slots.
2914 */
2916 return true;
2917
2918 /*
2919 * Don't need to wait for the standbys to catch up if we are on a standby
2920 * server, since we do not support syncing slots to cascading standbys.
2921 */
2922 if (RecoveryInProgress())
2923 return true;
2924
2925 /*
2926 * Don't need to wait for the standbys to catch up if they are already
2927 * beyond the specified WAL location.
2928 */
2930 ss_oldest_flush_lsn >= wait_for_lsn)
2931 return true;
2932
2933 /*
2934 * To prevent concurrent slot dropping and creation while filtering the
2935 * slots, take the ReplicationSlotControlLock outside of the loop.
2936 */
2937 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2938
2940 for (int i = 0; i < synchronized_standby_slots_config->nslotnames; i++)
2941 {
2942 XLogRecPtr restart_lsn;
2943 bool invalidated;
2944 bool inactive;
2945 ReplicationSlot *slot;
2946
2947 slot = SearchNamedReplicationSlot(name, false);
2948
2949 /*
2950 * If a slot name provided in synchronized_standby_slots does not
2951 * exist, report a message and exit the loop.
2952 *
2953 * Though validate_sync_standby_slots (the GUC check_hook) tries to
2954 * avoid this, it can nonetheless happen because the user can specify
2955 * a nonexistent slot name before server startup. That function cannot
2956 * validate such a slot during startup, as ReplicationSlotCtl is not
2957 * initialized by then. Also, the user might have dropped one slot.
2958 */
2959 if (!slot)
2960 {
2961 ereport(elevel,
2962 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2963 errmsg("replication slot \"%s\" specified in parameter \"%s\" does not exist",
2964 name, "synchronized_standby_slots"),
2965 errdetail("Logical replication is waiting on the standby associated with replication slot \"%s\".",
2966 name),
2967 errhint("Create the replication slot \"%s\" or amend parameter \"%s\".",
2968 name, "synchronized_standby_slots"));
2969 break;
2970 }
2971
2972 /* Same as above: if a slot is not physical, exit the loop. */
2973 if (SlotIsLogical(slot))
2974 {
2975 ereport(elevel,
2976 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2977 errmsg("cannot specify logical replication slot \"%s\" in parameter \"%s\"",
2978 name, "synchronized_standby_slots"),
2979 errdetail("Logical replication is waiting for correction on replication slot \"%s\".",
2980 name),
2981 errhint("Remove the logical replication slot \"%s\" from parameter \"%s\".",
2982 name, "synchronized_standby_slots"));
2983 break;
2984 }
2985
2986 SpinLockAcquire(&slot->mutex);
2987 restart_lsn = slot->data.restart_lsn;
2988 invalidated = slot->data.invalidated != RS_INVAL_NONE;
2989 inactive = slot->active_pid == 0;
2990 SpinLockRelease(&slot->mutex);
2991
2992 if (invalidated)
2993 {
2994 /* Specified physical slot has been invalidated */
2995 ereport(elevel,
2996 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2997 errmsg("physical replication slot \"%s\" specified in parameter \"%s\" has been invalidated",
2998 name, "synchronized_standby_slots"),
2999 errdetail("Logical replication is waiting on the standby associated with replication slot \"%s\".",
3000 name),
3001 errhint("Drop and recreate the replication slot \"%s\", or amend parameter \"%s\".",
3002 name, "synchronized_standby_slots"));
3003 break;
3004 }
3005
3006 if (XLogRecPtrIsInvalid(restart_lsn) || restart_lsn < wait_for_lsn)
3007 {
3008 /* Log a message if no active_pid for this physical slot */
3009 if (inactive)
3010 ereport(elevel,
3011 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3012 errmsg("replication slot \"%s\" specified in parameter \"%s\" does not have active_pid",
3013 name, "synchronized_standby_slots"),
3014 errdetail("Logical replication is waiting on the standby associated with replication slot \"%s\".",
3015 name),
3016 errhint("Start the standby associated with the replication slot \"%s\", or amend parameter \"%s\".",
3017 name, "synchronized_standby_slots"));
3018
3019 /* Continue if the current slot hasn't caught up. */
3020 break;
3021 }
3022
3023 Assert(restart_lsn >= wait_for_lsn);
3024
3025 if (XLogRecPtrIsInvalid(min_restart_lsn) ||
3026 min_restart_lsn > restart_lsn)
3027 min_restart_lsn = restart_lsn;
3028
3029 caught_up_slot_num++;
3030
3031 name += strlen(name) + 1;
3032 }
3033
3034 LWLockRelease(ReplicationSlotControlLock);
3035
3036 /*
3037 * Return false if not all the standbys have caught up to the specified
3038 * WAL location.
3039 */
3040 if (caught_up_slot_num != synchronized_standby_slots_config->nslotnames)
3041 return false;
3042
3043 /* The ss_oldest_flush_lsn must not retreat. */
3045 min_restart_lsn >= ss_oldest_flush_lsn);
3046
3047 ss_oldest_flush_lsn = min_restart_lsn;
3048
3049 return true;
3050}
3051
3052/*
3053 * Wait for physical standbys to confirm receiving the given lsn.
3054 *
3055 * Used by logical decoding SQL functions. It waits for physical standbys
3056 * corresponding to the physical slots specified in the synchronized_standby_slots GUC.
3057 */
3058void
3060{
3061 /*
3062 * Don't need to wait for the standby to catch up if the current acquired
3063 * slot is not a logical failover slot, or there is no value in
3064 * synchronized_standby_slots.
3065 */
3067 return;
3068
3070
3071 for (;;)
3072 {
3074
3076 {
3077 ConfigReloadPending = false;
3079 }
3080
3081 /* Exit if done waiting for every slot. */
3082 if (StandbySlotsHaveCaughtup(wait_for_lsn, WARNING))
3083 break;
3084
3085 /*
3086 * Wait for the slots in the synchronized_standby_slots to catch up,
3087 * but use a timeout (1s) so we can also check if the
3088 * synchronized_standby_slots has been changed.
3089 */
3091 WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION);
3092 }
3093
3095}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1721
bool TimestampDifferenceExceedsSeconds(TimestampTz start_time, TimestampTz stop_time, int threshold_sec)
Definition: timestamp.c:1795
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
#define NameStr(name)
Definition: c.h:752
#define ngettext(s, p, n)
Definition: c.h:1181
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:223
#define PG_BINARY
Definition: c.h:1273
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:471
uint64_t uint64
Definition: c.h:540
#define pg_unreachable()
Definition: c.h:331
uint32_t uint32
Definition: c.h:539
#define lengthof(array)
Definition: c.h:788
#define MemSet(start, val, len)
Definition: c.h:1020
uint32 TransactionId
Definition: c.h:658
size_t Size
Definition: c.h:611
bool ConditionVariableCancelSleep(void)
bool ConditionVariableTimedSleep(ConditionVariable *cv, long timeout, uint32 wait_event_info)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int64 TimestampTz
Definition: timestamp.h:39
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1234
int errcode_for_file_access(void)
Definition: elog.c:877
int errdetail(const char *fmt,...)
Definition: elog.c:1207
int errhint(const char *fmt,...)
Definition: elog.c:1321
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define _(x)
Definition: elog.c:91
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3975
int FreeDir(DIR *dir)
Definition: fd.c:3022
int CloseTransientFile(int fd)
Definition: fd.c:2868
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:753
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2904
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2970
int pg_fsync(int fd)
Definition: fd.c:386
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2691
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:547
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_DIR
Definition: file_utils.h:23
@ PGFILETYPE_ERROR
Definition: file_utils.h:20
bool IsBinaryUpgrade
Definition: globals.c:121
int MyProcPid
Definition: globals.c:47
bool IsUnderPostmaster
Definition: globals.c:120
Oid MyDatabaseId
Definition: globals.c:94
void ProcessConfigFile(GucContext context)
Definition: guc-file.l:120
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:639
#define newval
#define GUC_check_errdetail
Definition: guc.h:505
GucSource
Definition: guc.h:112
@ PGC_SIGHUP
Definition: guc.h:75
Assert(PointerIsAligned(start, uint64))
#define IS_INJECTION_POINT_ATTACHED(name)
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337
int i
Definition: isn.c:77
bool IsLogicalLauncher(void)
Definition: launcher.c:1531
void list_free(List *list)
Definition: list.c:1546
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1977
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2021
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:698
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void pfree(void *pointer)
Definition: mcxt.c:1594
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
@ B_STARTUP
Definition: miscadmin.h:364
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
Oid GetUserId(void)
Definition: miscinit.c:469
BackendType MyBackendType
Definition: miscinit.c:64
bool has_rolreplication(Oid roleid)
Definition: miscinit.c:688
void namestrcpy(Name name, const char *str)
Definition: name.c:233
void * arg
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
#define NAMEDATALEN
#define MAXPGPATH
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469
static bool two_phase
static bool failover
static rewind_source * source
Definition: pg_rewind.c:89
void pgstat_create_replslot(ReplicationSlot *slot)
void pgstat_acquire_replslot(ReplicationSlot *slot)
void pgstat_drop_replslot(ReplicationSlot *slot)
#define sprintf
Definition: port.h:241
#define snprintf
Definition: port.h:239
uint64_t Datum
Definition: postgres.h:70
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
static int fd(const char *x, int i)
Definition: preproc-init.c:105
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3905
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition: procsignal.c:284
@ PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT
Definition: procsignal.h:46
bool rmtree(const char *path, bool rmtopdir)
Definition: rmtree.c:50
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
int ReplicationSlotIndex(ReplicationSlot *slot)
Definition: slot.c:546
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition: slot.c:593
static const SlotInvalidationCauseMap SlotInvalidationCauses[]
Definition: slot.c:113
char * synchronized_standby_slots
Definition: slot.c:164
void assign_synchronized_standby_slots(const char *newval, void *extra)
Definition: slot.c:2857
#define ReplicationSlotOnDiskChecksummedSize
Definition: slot.c:135
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:2115
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:352
int idle_replication_slot_timeout_secs
Definition: slot.c:158
void ReplicationSlotDropAcquired(void)
Definition: slot.c:964
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1106
void ReplicationSlotReserveWal(void)
Definition: slot.c:1539
bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
Definition: slot.c:1350
struct SlotInvalidationCauseMap SlotInvalidationCauseMap
static XLogRecPtr ss_oldest_flush_lsn
Definition: slot.c:173
static ReplicationSlotInvalidationCause DetermineSlotInvalidationCause(uint32 possible_causes, ReplicationSlot *s, XLogRecPtr oldestLSN, Oid dboid, TransactionId snapshotConflictHorizon, TransactionId initial_effective_xmin, TransactionId initial_catalog_effective_xmin, XLogRecPtr initial_restart_lsn, TimestampTz *inactive_since, TimestampTz now)
Definition: slot.c:1714
void ReplicationSlotsDropDBSlots(Oid dboid)
Definition: slot.c:1408
#define ReplicationSlotOnDiskNotChecksummedSize
Definition: slot.c:132
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:1271
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)
Definition: slot.c:2707
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1145
static void RestoreSlotFromDisk(const char *name)
Definition: slot.c:2465
void ReplicationSlotPersist(void)
Definition: slot.c:1123
static void ReportSlotInvalidation(ReplicationSlotInvalidationCause cause, bool terminating, int pid, NameData slotname, XLogRecPtr restart_lsn, XLogRecPtr oldestLSN, TransactionId snapshotConflictHorizon, long slot_idle_seconds)
Definition: slot.c:1616
ReplicationSlot * MyReplicationSlot
Definition: slot.c:148
static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
Definition: slot.c:2309
void ReplicationSlotDrop(const char *name, bool nowait)
Definition: slot.c:859
bool SlotExistsInSyncStandbySlots(const char *slot_name)
Definition: slot.c:2872
static bool validate_sync_standby_slots(char *rawname, List **elemlist)
Definition: slot.c:2746
void ReplicationSlotSave(void)
Definition: slot.c:1088
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:513
static void CreateSlotOnDisk(ReplicationSlot *slot)
Definition: slot.c:2248
#define ReplicationSlotOnDiskV2Size
Definition: slot.c:138
void CheckSlotPermissions(void)
Definition: slot.c:1522
bool ReplicationSlotName(int index, Name name)
Definition: slot.c:562
bool check_synchronized_standby_slots(char **newval, void **extra, GucSource source)
Definition: slot.c:2801
void ReplicationSlotsShmemInit(void)
Definition: slot.c:206
bool ReplicationSlotValidateName(const char *name, bool allow_reserved_name, int elevel)
Definition: slot.c:272
void ReplicationSlotAlter(const char *name, const bool *failover, const bool *two_phase)
Definition: slot.c:882
void ReplicationSlotRelease(void)
Definition: slot.c:731
int max_replication_slots
Definition: slot.c:151
StaticAssertDecl(lengthof(SlotInvalidationCauses)==(RS_INVAL_MAX_CAUSES+1), "array length mismatch")
ReplicationSlotCtlData * ReplicationSlotCtl
Definition: slot.c:145
#define SLOT_VERSION
Definition: slot.c:142
struct ReplicationSlotOnDisk ReplicationSlotOnDisk
void WaitForStandbyConfirmation(XLogRecPtr wait_for_lsn)
Definition: slot.c:3059
bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
Definition: slot.c:2905
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1201
void ReplicationSlotCleanup(bool synced_only)
Definition: slot.c:820
void ReplicationSlotInitialize(void)
Definition: slot.c:241
static void ReplicationSlotDropPtr(ReplicationSlot *slot)
Definition: slot.c:981
void StartupReplicationSlots(void)
Definition: slot.c:2187
static bool InvalidatePossiblyObsoleteSlot(uint32 possible_causes, ReplicationSlot *s, XLogRecPtr oldestLSN, Oid dboid, TransactionId snapshotConflictHorizon, bool *invalidated)
Definition: slot.c:1803
static bool CanInvalidateIdleSlot(ReplicationSlot *s)
Definition: slot.c:1698
void CheckSlotRequirements(void)
Definition: slot.c:1500
#define SLOT_MAGIC
Definition: slot.c:141
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:2055
static SyncStandbySlotsConfigData * synchronized_standby_slots_config
Definition: slot.c:167
#define ReplicationSlotOnDiskConstantSize
Definition: slot.c:129
Size ReplicationSlotsShmemSize(void)
Definition: slot.c:188
const char * GetSlotInvalidationCauseName(ReplicationSlotInvalidationCause cause)
Definition: slot.c:2727
static void ReplicationSlotShmemExit(int code, Datum arg)
Definition: slot.c:250
static bool IsSlotForConflictCheck(const char *name)
Definition: slot.c:329
#define CONFLICT_DETECTION_SLOT
Definition: slot.h:28
#define RS_INVAL_MAX_CAUSES
Definition: slot.h:72
ReplicationSlotPersistency
Definition: slot.h:44
@ RS_PERSISTENT
Definition: slot.h:45
@ RS_EPHEMERAL
Definition: slot.h:46
@ RS_TEMPORARY
Definition: slot.h:47
#define SlotIsPhysical(slot)
Definition: slot.h:254
#define PG_REPLSLOT_DIR
Definition: slot.h:21
ReplicationSlotInvalidationCause
Definition: slot.h:59
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:62
@ RS_INVAL_IDLE_TIMEOUT
Definition: slot.h:68
@ RS_INVAL_HORIZON
Definition: slot.h:64
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:66
@ RS_INVAL_NONE
Definition: slot.h:60
#define SlotIsLogical(slot)
Definition: slot.h:255
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Definition: slot.h:273
bool IsSyncingReplicationSlots(void)
Definition: slotsync.c:1668
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
PGPROC * MyProc
Definition: proc.c:66
PROC_HDR * ProcGlobal
Definition: proc.c:78
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1282
#define ERRCODE_DUPLICATE_OBJECT
Definition: streamutil.c:30
bool pg_str_endswith(const char *str, const char *end)
Definition: string.c:31
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
Definition: dirent.c:26
Definition: pg_list.h:54
uint8 statusFlags
Definition: proc.h:259
int pgxactoff
Definition: proc.h:201
uint8 * statusFlags
Definition: proc.h:403
ReplicationSlot replication_slots[1]
Definition: slot.h:266
uint32 version
Definition: slot.c:75
ReplicationSlotPersistentData slotdata
Definition: slot.c:83
pg_crc32c checksum
Definition: slot.c:72
TransactionId xmin
Definition: slot.h:96
TransactionId catalog_xmin
Definition: slot.h:104
XLogRecPtr confirmed_flush
Definition: slot.h:118
ReplicationSlotPersistency persistency
Definition: slot.h:88
ReplicationSlotInvalidationCause invalidated
Definition: slot.h:110
XLogRecPtr candidate_xmin_lsn
Definition: slot.h:208
TransactionId effective_catalog_xmin
Definition: slot.h:189
slock_t mutex
Definition: slot.h:165
XLogRecPtr candidate_restart_valid
Definition: slot.h:209
XLogRecPtr last_saved_confirmed_flush
Definition: slot.h:217
pid_t active_pid
Definition: slot.h:171
bool in_use
Definition: slot.h:168
TransactionId effective_xmin
Definition: slot.h:188
bool just_dirtied
Definition: slot.h:174
XLogRecPtr last_saved_restart_lsn
Definition: slot.h:250
XLogRecPtr candidate_restart_lsn
Definition: slot.h:210
LWLock io_in_progress_lock
Definition: slot.h:195
ConditionVariable active_cv
Definition: slot.h:198
TransactionId candidate_catalog_xmin
Definition: slot.h:207
bool dirty
Definition: slot.h:175
ReplicationSlotPersistentData data
Definition: slot.h:192
TimestampTz inactive_since
Definition: slot.h:224
const char * cause_name
Definition: slot.c:110
ReplicationSlotInvalidationCause cause
Definition: slot.c:109
char slot_names[FLEXIBLE_ARRAY_MEMBER]
Definition: slot.c:101
ConditionVariable wal_confirm_rcv_cv
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
Definition: type.h:96
Definition: c.h:747
unsigned short st_mode
Definition: win32_port.h:258
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:2744
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:69
static void pgstat_report_wait_end(void)
Definition: wait_event.h:85
const char * name
bool am_walsender
Definition: walsender.c:123
bool log_replication_commands
Definition: walsender.c:133
WalSndCtlData * WalSndCtl
Definition: walsender.c:117
#define stat
Definition: win32_port.h:274
#define S_ISDIR(m)
Definition: win32_port.h:315
#define kill(pid, sig)
Definition: win32_port.h:493
bool RecoveryInProgress(void)
Definition: xlog.c:6383
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3771
bool EnableHotStandby
Definition: xlog.c:122
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6486
int wal_level
Definition: xlog.c:132
int wal_segment_size
Definition: xlog.c:144
void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn)
Definition: xlog.c:2666
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9476
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2780
@ WAL_LEVEL_REPLICA
Definition: xlog.h:75
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:46
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint64 XLogSegNo
Definition: xlogdefs.h:51
bool StandbyMode
Definition: xlogrecovery.c:149
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)