Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/pgstat_internal.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static char * str_time (pg_time_t tnow, char *buf, size_t bufsize)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
bool GetDefaultCharSignedness (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const char * show_archive_command (void)
 
const char * show_in_hot_standby (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
bool CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
char * wal_consistency_checking_string = NULL
 
bool * wal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 112 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 603 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 580 of file xlog.c.

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 584 of file xlog.c.

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 151 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
pg_read_barrier(); \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:465
pg_atomic_uint64 logWriteResult
Definition: xlog.c:472
pg_atomic_uint64 logFlushResult
Definition: xlog.c:473
static XLogCtlData * XLogCtl
Definition: xlog.c:566

Definition at line 620 of file xlog.c.

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 597 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 591 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

typedef struct XLogCtlData XLogCtlData

◆ XLogCtlInsert

typedef struct XLogCtlInsert XLogCtlInsert

◆ XLogwrtResult

typedef struct XLogwrtResult XLogwrtResult

◆ XLogwrtRqst

typedef struct XLogwrtRqst XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 559 of file xlog.c.

560{
WalInsertClass
Definition: xlog.c:560
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:562
@ WALINSERT_NORMAL
Definition: xlog.c:561
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:563

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1988 of file xlog.c.

1989{
1991 int nextidx;
1992 XLogRecPtr OldPageRqstPtr;
1993 XLogwrtRqst WriteRqst;
1994 XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
1995 XLogRecPtr NewPageBeginPtr;
1996 XLogPageHeader NewPage;
1997 int npages pg_attribute_unused() = 0;
1998
1999 LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2000
2001 /*
2002 * Now that we have the lock, check if someone initialized the page
2003 * already.
2004 */
2005 while (upto >= XLogCtl->InitializedUpTo || opportunistic)
2006 {
2008
2009 /*
2010 * Get ending-offset of the buffer page we need to replace (this may
2011 * be zero if the buffer hasn't been used yet). Fall through if it's
2012 * already written out.
2013 */
2014 OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]);
2015 if (LogwrtResult.Write < OldPageRqstPtr)
2016 {
2017 /*
2018 * Nope, got work to do. If we just want to pre-initialize as much
2019 * as we can without flushing, give up now.
2020 */
2021 if (opportunistic)
2022 break;
2023
2024 /* Advance shared memory write request position */
2026 if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
2027 XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
2029
2030 /*
2031 * Acquire an up-to-date LogwrtResult value and see if we still
2032 * need to write it or if someone else already did.
2033 */
2035 if (LogwrtResult.Write < OldPageRqstPtr)
2036 {
2037 /*
2038 * Must acquire write lock. Release WALBufMappingLock first,
2039 * to make sure that all insertions that we need to wait for
2040 * can finish (up to this same position). Otherwise we risk
2041 * deadlock.
2042 */
2043 LWLockRelease(WALBufMappingLock);
2044
2045 WaitXLogInsertionsToFinish(OldPageRqstPtr);
2046
2047 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2048
2050 if (LogwrtResult.Write >= OldPageRqstPtr)
2051 {
2052 /* OK, someone wrote it already */
2053 LWLockRelease(WALWriteLock);
2054 }
2055 else
2056 {
2057 /* Have to write it ourselves */
2058 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2059 WriteRqst.Write = OldPageRqstPtr;
2060 WriteRqst.Flush = 0;
2061 XLogWrite(WriteRqst, tli, false);
2062 LWLockRelease(WALWriteLock);
2064 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2065
2066 /*
2067 * Required for the flush of pending stats WAL data, per
2068 * update of pgWalUsage.
2069 */
2070 pgstat_report_fixed = true;
2071 }
2072 /* Re-acquire WALBufMappingLock and retry */
2073 LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2074 continue;
2075 }
2076 }
2077
2078 /*
2079 * Now the next buffer slot is free and we can set it up to be the
2080 * next output page.
2081 */
2082 NewPageBeginPtr = XLogCtl->InitializedUpTo;
2083 NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2084
2085 Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
2086
2087 NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2088
2089 /*
2090 * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2091 * before initializing. Otherwise, the old page may be partially
2092 * zeroed but look valid.
2093 */
2096
2097 /*
2098 * Be sure to re-zero the buffer so that bytes beyond what we've
2099 * written will look like zeroes and not valid XLOG records...
2100 */
2101 MemSet(NewPage, 0, XLOG_BLCKSZ);
2102
2103 /*
2104 * Fill the new page's header
2105 */
2106 NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2107
2108 /* NewPage->xlp_info = 0; */ /* done by memset */
2109 NewPage->xlp_tli = tli;
2110 NewPage->xlp_pageaddr = NewPageBeginPtr;
2111
2112 /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2113
2114 /*
2115 * If online backup is not in progress, mark the header to indicate
2116 * that WAL records beginning in this page have removable backup
2117 * blocks. This allows the WAL archiver to know whether it is safe to
2118 * compress archived WAL data by transforming full-block records into
2119 * the non-full-block format. It is sufficient to record this at the
2120 * page level because we force a page switch (in fact a segment
2121 * switch) when starting a backup, so the flag will be off before any
2122 * records can be written during the backup. At the end of a backup,
2123 * the last page will be marked as all unsafe when perhaps only part
2124 * is unsafe, but at worst the archiver would miss the opportunity to
2125 * compress a few records.
2126 */
2127 if (Insert->runningBackups == 0)
2128 NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2129
2130 /*
2131 * If first page of an XLOG segment file, make it a long header.
2132 */
2133 if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2134 {
2135 XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2136
2137 NewLongPage->xlp_sysid = ControlFile->system_identifier;
2138 NewLongPage->xlp_seg_size = wal_segment_size;
2139 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2140 NewPage->xlp_info |= XLP_LONG_HEADER;
2141 }
2142
2143 /*
2144 * Make sure the initialization of the page becomes visible to others
2145 * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2146 * holding a lock.
2147 */
2149
2150 pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
2151 XLogCtl->InitializedUpTo = NewPageEndPtr;
2152
2153 npages++;
2154 }
2155 LWLockRelease(WALBufMappingLock);
2156
2157#ifdef WAL_DEBUG
2158 if (XLOG_DEBUG && npages > 0)
2159 {
2160 elog(DEBUG1, "initialized %d pages, up to %X/%08X",
2161 npages, LSN_FORMAT_ARGS(NewPageEndPtr));
2162 }
2163#endif
2164}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:483
#define pg_write_barrier()
Definition: atomics.h:155
#define pg_attribute_unused()
Definition: c.h:132
#define MemSet(start, val, len)
Definition: c.h:1020
size_t Size
Definition: c.h:611
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:226
static void Insert(File file)
Definition: fd.c:1314
Assert(PointerIsAligned(start, uint64))
WalUsage pgWalUsage
Definition: instrument.c:22
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894
@ LW_EXCLUSIVE
Definition: lwlock.h:112
bool pgstat_report_fixed
Definition: pgstat.c:218
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
uint64 system_identifier
Definition: pg_control.h:110
int64 wal_buffers_full
Definition: instrument.h:56
XLogwrtRqst LogwrtRqst
Definition: xlog.c:456
slock_t info_lck
Definition: xlog.c:553
XLogRecPtr InitializedUpTo
Definition: xlog.c:485
char * pages
Definition: xlog.c:492
pg_atomic_uint64 * xlblocks
Definition: xlog.c:493
XLogCtlInsert Insert
Definition: xlog.c:453
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr Write
Definition: xlog.c:328
XLogRecPtr Flush
Definition: xlog.c:323
XLogRecPtr Write
Definition: xlog.c:322
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1507
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:620
int wal_segment_size
Definition: xlog.c:144
static XLogwrtResult LogwrtResult
Definition: xlog.c:612
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:591
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2304
static ControlFileData * ControlFile
Definition: xlog.c:574
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:78
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:46
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), ControlFile, DEBUG1, elog, XLogwrtRqst::Flush, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, pgstat_report_fixed, pgWalUsage, RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), WalUsage::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2207 of file xlog.c.

2208{
2211}
double CheckPointCompletionTarget
Definition: checkpointer.c:159
#define newval
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2171

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2200 of file xlog.c.

2201{
2204}
int max_wal_size_mb
Definition: xlog.c:115

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char *  newval,
void *  extra 
)

Definition at line 4799 of file xlog.c.

4800{
4801 /*
4802 * If some checks were deferred, it's possible that the checks will fail
4803 * later during InitializeWalConsistencyChecking(). But in that case, the
4804 * postmaster will exit anyway, so it's safe to proceed with the
4805 * assignment.
4806 *
4807 * Any built-in resource managers specified are assigned immediately,
4808 * which affects WAL created before shared_preload_libraries are
4809 * processed. Any custom resource managers specified won't be assigned
4810 * until after shared_preload_libraries are processed, but that's OK
4811 * because WAL for a custom resource manager can't be written before the
4812 * module is loaded anyway.
4813 */
4815}
bool * wal_consistency_checking
Definition: xlog.c:127

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void *  extra 
)

Definition at line 8697 of file xlog.c.

8698{
8699 if (wal_sync_method != new_wal_sync_method)
8700 {
8701 /*
8702 * To ensure that no blocks escape unsynced, force an fsync on the
8703 * currently open log segment (if any). Also, if the open flag is
8704 * changing, close the log file so it will be reopened (with new flag
8705 * bit) at next use.
8706 */
8707 if (openLogFile >= 0)
8708 {
8709 pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
8710 if (pg_fsync(openLogFile) != 0)
8711 {
8712 char xlogfname[MAXFNAMELEN];
8713 int save_errno;
8714
8715 save_errno = errno;
8718 errno = save_errno;
8719 ereport(PANIC,
8721 errmsg("could not fsync file \"%s\": %m", xlogfname)));
8722 }
8723
8725 if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
8726 XLogFileClose();
8727 }
8728 }
8729}
int errcode_for_file_access(void)
Definition: elog.c:877
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:150
int pg_fsync(int fd)
Definition: fd.c:386
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:69
static void pgstat_report_wait_end(void)
Definition: wait_event.h:85
static int openLogFile
Definition: xlog.c:635
static int get_sync_bit(int method)
Definition: xlog.c:8649
int wal_sync_method
Definition: xlog.c:131
static TimeLineID openLogTLI
Definition: xlog.c:637
static void XLogFileClose(void)
Definition: xlog.c:3655
static XLogSegNo openLogSegNo
Definition: xlog.c:636
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5075 of file xlog.c.

5076{
5077 CheckPoint checkPoint;
5078 char *buffer;
5079 XLogPageHeader page;
5080 XLogLongPageHeader longpage;
5081 XLogRecord *record;
5082 char *recptr;
5083 uint64 sysidentifier;
5084 struct timeval tv;
5085 pg_crc32c crc;
5086
5087 /* allow ordinary WAL segment creation, like StartupXLOG() would */
5089
5090 /*
5091 * Select a hopefully-unique system identifier code for this installation.
5092 * We use the result of gettimeofday(), including the fractional seconds
5093 * field, as being about as unique as we can easily get. (Think not to
5094 * use random(), since it hasn't been seeded and there's no portable way
5095 * to seed it other than the system clock value...) The upper half of the
5096 * uint64 value is just the tv_sec part, while the lower half contains the
5097 * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5098 * PID for a little extra uniqueness. A person knowing this encoding can
5099 * determine the initialization time of the installation, which could
5100 * perhaps be useful sometimes.
5101 */
5102 gettimeofday(&tv, NULL);
5103 sysidentifier = ((uint64) tv.tv_sec) << 32;
5104 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5105 sysidentifier |= getpid() & 0xFFF;
5106
5107 /* page buffer must be aligned suitably for O_DIRECT */
5108 buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5109 page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5110 memset(page, 0, XLOG_BLCKSZ);
5111
5112 /*
5113 * Set up information for the initial checkpoint record
5114 *
5115 * The initial checkpoint record is written to the beginning of the WAL
5116 * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5117 * used, so that we can use 0/0 to mean "before any valid WAL segment".
5118 */
5122 checkPoint.fullPageWrites = fullPageWrites;
5123 checkPoint.wal_level = wal_level;
5124 checkPoint.nextXid =
5126 checkPoint.nextOid = FirstGenbkiObjectId;
5127 checkPoint.nextMulti = FirstMultiXactId;
5128 checkPoint.nextMultiOffset = 0;
5130 checkPoint.oldestXidDB = Template1DbOid;
5131 checkPoint.oldestMulti = FirstMultiXactId;
5132 checkPoint.oldestMultiDB = Template1DbOid;
5135 checkPoint.time = (pg_time_t) time(NULL);
5137
5138 TransamVariables->nextXid = checkPoint.nextXid;
5139 TransamVariables->nextOid = checkPoint.nextOid;
5141 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5142 AdvanceOldestClogXid(checkPoint.oldestXid);
5143 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5144 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5146
5147 /* Set up the XLOG page header */
5148 page->xlp_magic = XLOG_PAGE_MAGIC;
5149 page->xlp_info = XLP_LONG_HEADER;
5152 longpage = (XLogLongPageHeader) page;
5153 longpage->xlp_sysid = sysidentifier;
5154 longpage->xlp_seg_size = wal_segment_size;
5155 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5156
5157 /* Insert the initial checkpoint record */
5158 recptr = ((char *) page + SizeOfXLogLongPHD);
5159 record = (XLogRecord *) recptr;
5160 record->xl_prev = 0;
5161 record->xl_xid = InvalidTransactionId;
5162 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5164 record->xl_rmid = RM_XLOG_ID;
5165 recptr += SizeOfXLogRecord;
5166 /* fill the XLogRecordDataHeaderShort struct */
5167 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5168 *(recptr++) = sizeof(checkPoint);
5169 memcpy(recptr, &checkPoint, sizeof(checkPoint));
5170 recptr += sizeof(checkPoint);
5171 Assert(recptr - (char *) record == record->xl_tot_len);
5172
5174 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5175 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5176 FIN_CRC32C(crc);
5177 record->xl_crc = crc;
5178
5179 /* Create first XLOG segment file */
5182
5183 /*
5184 * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5185 * close the file again in a moment.
5186 */
5187
5188 /* Write the first page with the initial record */
5189 errno = 0;
5190 pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5191 if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5192 {
5193 /* if write didn't set errno, assume problem is no disk space */
5194 if (errno == 0)
5195 errno = ENOSPC;
5196 ereport(PANIC,
5198 errmsg("could not write bootstrap write-ahead log file: %m")));
5199 }
5201
5202 pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5203 if (pg_fsync(openLogFile) != 0)
5204 ereport(PANIC,
5206 errmsg("could not fsync bootstrap write-ahead log file: %m")));
5208
5209 if (close(openLogFile) != 0)
5210 ereport(PANIC,
5212 errmsg("could not close bootstrap write-ahead log file: %m")));
5213
5214 openLogFile = -1;
5215
5216 /* Now create pg_control */
5217 InitControlFile(sysidentifier, data_checksum_version);
5218 ControlFile->time = checkPoint.time;
5219 ControlFile->checkPoint = checkPoint.redo;
5220 ControlFile->checkPointCopy = checkPoint;
5221
5222 /* some additional ControlFile fields are set in WriteControlFile() */
5224
5225 /* Bootstrap the commit log, too */
5226 BootStrapCLOG();
5230
5231 pfree(buffer);
5232
5233 /*
5234 * Force control file to be read - in contrast to normal processing we'd
5235 * otherwise never run the checks and GUC related initializations therein.
5236 */
5238}
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:804
uint64_t uint64
Definition: c.h:540
void BootStrapCLOG(void)
Definition: clog.c:831
void BootStrapCommitTs(void)
Definition: commit_ts.c:594
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:887
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2258
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2292
void BootStrapMultiXact(void)
Definition: multixact.c:2025
#define FirstMultiXactId
Definition: multixact.h:26
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:68
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:51
MultiXactId oldestMulti
Definition: pg_control.h:50
MultiXactOffset nextMultiOffset
Definition: pg_control.h:47
TransactionId newestCommitTsXid
Definition: pg_control.h:55
TransactionId oldestXid
Definition: pg_control.h:48
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:45
TransactionId oldestActiveXid
Definition: pg_control.h:64
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:46
FullTransactionId nextXid
Definition: pg_control.h:44
TransactionId oldestCommitTsXid
Definition: pg_control.h:53
pg_time_t time
Definition: pg_control.h:52
int wal_level
Definition: pg_control.h:43
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:49
CheckPoint checkPointCopy
Definition: pg_control.h:135
pg_time_t time
Definition: pg_control.h:132
XLogRecPtr checkPoint
Definition: pg_control.h:133
FullTransactionId nextXid
Definition: transam.h:220
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:269
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3396
bool fullPageWrites
Definition: xlog.c:123
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition: xlog.c:4220
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9528
int wal_level
Definition: xlog.c:132
static void WriteControlFile(void)
Definition: xlog.c:4255
#define BootstrapTimeLineID
Definition: xlog.c:112
static void ReadControlFile(void)
Definition: xlog.c:4364
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert(), BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_level, CheckPoint::wal_level, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2171 of file xlog.c.

2172{
2173 double target;
2174
2175 /*-------
2176 * Calculate the distance at which to trigger a checkpoint, to avoid
2177 * exceeding max_wal_size_mb. This is based on two assumptions:
2178 *
2179 * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2180 * WAL for two checkpoint cycles to allow us to recover from the
2181 * secondary checkpoint if the first checkpoint failed, though we
2182 * only did this on the primary anyway, not on standby. Keeping just
2183 * one checkpoint simplifies processing and reduces disk space in
2184 * many smaller databases.)
2185 * b) during checkpoint, we consume checkpoint_completion_target *
2186 * number of segments consumed between checkpoints.
2187 *-------
2188 */
2189 target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2191
2192 /* round down */
2193 CheckPointSegments = (int) target;
2194
2195 if (CheckPointSegments < 1)
2197}
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:603
int CheckPointSegments
Definition: xlog.c:157

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 4677 of file xlog.c.

4678{
4679 /*
4680 * -1 indicates a request for auto-tune.
4681 */
4682 if (*newval == -1)
4683 {
4684 /*
4685 * If we haven't yet changed the boot_val default of -1, just let it
4686 * be. We'll fix it when XLOGShmemSize is called.
4687 */
4688 if (XLOGbuffers == -1)
4689 return true;
4690
4691 /* Otherwise, substitute the auto-tune value */
4693 }
4694
4695 /*
4696 * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4697 * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4698 * the case, we just silently treat such values as a request for the
4699 * minimum. (We could throw an error instead, but that doesn't seem very
4700 * helpful.)
4701 */
4702 if (*newval < 4)
4703 *newval = 4;
4704
4705 return true;
4706}
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4661
int XLOGbuffers
Definition: xlog.c:118

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4712 of file xlog.c.

4713{
4714 char *rawstring;
4715 List *elemlist;
4716 ListCell *l;
4717 bool newwalconsistency[RM_MAX_ID + 1];
4718
4719 /* Initialize the array */
4720 MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4721
4722 /* Need a modifiable copy of string */
4723 rawstring = pstrdup(*newval);
4724
4725 /* Parse string into list of identifiers */
4726 if (!SplitIdentifierString(rawstring, ',', &elemlist))
4727 {
4728 /* syntax error in list */
4729 GUC_check_errdetail("List syntax is invalid.");
4730 pfree(rawstring);
4731 list_free(elemlist);
4732 return false;
4733 }
4734
4735 foreach(l, elemlist)
4736 {
4737 char *tok = (char *) lfirst(l);
4738 int rmid;
4739
4740 /* Check for 'all'. */
4741 if (pg_strcasecmp(tok, "all") == 0)
4742 {
4743 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4744 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4745 newwalconsistency[rmid] = true;
4746 }
4747 else
4748 {
4749 /* Check if the token matches any known resource manager. */
4750 bool found = false;
4751
4752 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4753 {
4754 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4755 pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4756 {
4757 newwalconsistency[rmid] = true;
4758 found = true;
4759 break;
4760 }
4761 }
4762 if (!found)
4763 {
4764 /*
4765 * During startup, it might be a not-yet-loaded custom
4766 * resource manager. Defer checking until
4767 * InitializeWalConsistencyChecking().
4768 */
4770 {
4772 }
4773 else
4774 {
4775 GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4776 pfree(rawstring);
4777 list_free(elemlist);
4778 return false;
4779 }
4780 }
4781 }
4782 }
4783
4784 pfree(rawstring);
4785 list_free(elemlist);
4786
4787 /* assign new value */
4788 *extra = guc_malloc(LOG, (RM_MAX_ID + 1) * sizeof(bool));
4789 if (!*extra)
4790 return false;
4791 memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4792 return true;
4793}
#define LOG
Definition: elog.h:31
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:639
#define GUC_check_errdetail
Definition: guc.h:505
void list_free(List *list)
Definition: list.c:1546
char * pstrdup(const char *in)
Definition: mcxt.c:1759
bool process_shared_preload_libraries_done
Definition: miscinit.c:1787
#define lfirst(lc)
Definition: pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define RM_MAX_ID
Definition: rmgr.h:33
Definition: pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:2744
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:167
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), LOG, MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2214 of file xlog.c.

2215{
2217 {
2218 GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2219 return false;
2220 }
2221
2222 return true;
2223}
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7559 of file xlog.c.

7560{
7566
7567 /* Write out all dirty data in SLRUs and the main buffer pool */
7568 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
7575 CheckPointBuffers(flags);
7576
7577 /* Perform all queued up fsyncs */
7578 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
7582 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
7583
7584 /* We deliberately delay 2PC checkpointing as long as possible */
7585 CheckPointTwoPhase(checkPointRedo);
7586}
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
void CheckPointBuffers(int flags)
Definition: bufmgr.c:4184
void CheckPointCLOG(void)
Definition: clog.c:902
void CheckPointCommitTs(void)
Definition: commit_ts.c:794
void CheckPointMultiXact(void)
Definition: multixact.c:2234
void CheckPointReplicationOrigin(void)
Definition: origin.c:596
void CheckPointPredicate(void)
Definition: predicate.c:1041
void CheckPointRelationMap(void)
Definition: relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1155
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:2115
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1970
TimestampTz ckpt_write_t
Definition: xlog.h:162
TimestampTz ckpt_sync_end_t
Definition: xlog.h:164
TimestampTz ckpt_sync_t
Definition: xlog.h:163
void CheckPointSUBTRANS(void)
Definition: subtrans.c:329
void ProcessSyncRequests(void)
Definition: sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1822
CheckpointStatsData CheckpointStats
Definition: xlog.c:210
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5425 of file xlog.c.

5426{
5427 /*
5428 * For archive recovery, the WAL must be generated with at least 'replica'
5429 * wal_level.
5430 */
5432 {
5433 ereport(FATAL,
5434 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5435 errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5436 errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5437 errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5438 }
5439
5440 /*
5441 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5442 * must have at least as many backend slots as the primary.
5443 */
5445 {
5446 /* We ignore autovacuum_worker_slots when we make this test. */
5447 RecoveryRequiresIntParameter("max_connections",
5450 RecoveryRequiresIntParameter("max_worker_processes",
5453 RecoveryRequiresIntParameter("max_wal_senders",
5456 RecoveryRequiresIntParameter("max_prepared_transactions",
5459 RecoveryRequiresIntParameter("max_locks_per_transaction",
5462 }
5463}
int errdetail(const char *fmt,...)
Definition: elog.c:1207
int errhint(const char *fmt,...)
Definition: elog.c:1321
int errcode(int sqlerrcode)
Definition: elog.c:854
#define FATAL
Definition: elog.h:41
int MaxConnections
Definition: globals.c:143
int max_worker_processes
Definition: globals.c:144
int max_locks_per_xact
Definition: lock.c:53
int max_worker_processes
Definition: pg_control.h:181
int max_locks_per_xact
Definition: pg_control.h:184
int max_prepared_xacts
Definition: pg_control.h:183
int max_prepared_xacts
Definition: twophase.c:116
int max_wal_senders
Definition: walsender.c:129
bool EnableHotStandby
Definition: xlog.c:122
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:74
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:139
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg(), FATAL, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3743 of file xlog.c.

3744{
3745 int save_errno = errno;
3746 XLogSegNo lastRemovedSegNo;
3747
3749 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3751
3752 if (segno <= lastRemovedSegNo)
3753 {
3754 char filename[MAXFNAMELEN];
3755
3757 errno = save_errno;
3758 ereport(ERROR,
3760 errmsg("requested WAL segment %s has already been removed",
3761 filename)));
3762 }
3763 errno = save_errno;
3764}
#define ERROR
Definition: elog.h:39
static char * filename
Definition: pg_dumpall.c:120
XLogSegNo lastRemovedSegNo
Definition: xlog.c:461
uint64 XLogSegNo
Definition: xlogdefs.h:51

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5329 of file xlog.c.

5331{
5332 /*
5333 * Execute the recovery_end_command, if any.
5334 */
5335 if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
5337 "recovery_end_command",
5338 true,
5339 WAIT_EVENT_RECOVERY_END_COMMAND);
5340
5341 /*
5342 * We switched to a new timeline. Clean up segments on the old timeline.
5343 *
5344 * If there are any higher-numbered segments on the old timeline, remove
5345 * them. They might contain valid WAL, but they might also be
5346 * pre-allocated files containing garbage. In any case, they are not part
5347 * of the new timeline's history so we don't need them.
5348 */
5349 RemoveNonParentXlogFiles(EndOfLog, newTLI);
5350
5351 /*
5352 * If the switch happened in the middle of a segment, what to do with the
5353 * last, partial segment on the old timeline? If we don't archive it, and
5354 * the server that created the WAL never archives it either (e.g. because
5355 * it was hit by a meteor), it will never make it to the archive. That's
5356 * OK from our point of view, because the new segment that we created with
5357 * the new TLI contains all the WAL from the old timeline up to the switch
5358 * point. But if you later try to do PITR to the "missing" WAL on the old
5359 * timeline, recovery won't find it in the archive. It's physically
5360 * present in the new file with new TLI, but recovery won't look there
5361 * when it's recovering to the older timeline. On the other hand, if we
5362 * archive the partial segment, and the original server on that timeline
5363 * is still running and archives the completed version of the same segment
5364 * later, it will fail. (We used to do that in 9.4 and below, and it
5365 * caused such problems).
5366 *
5367 * As a compromise, we rename the last segment with the .partial suffix,
5368 * and archive it. Archive recovery will never try to read .partial
5369 * segments, so they will normally go unused. But in the odd PITR case,
5370 * the administrator can copy them manually to the pg_wal directory
5371 * (removing the suffix). They can be useful in debugging, too.
5372 *
5373 * If a .done or .ready file already exists for the old timeline, however,
5374 * we had already determined that the segment is complete, so we can let
5375 * it be archived normally. (In particular, if it was restored from the
5376 * archive to begin with, it's expected to have a .done file).
5377 */
5378 if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
5380 {
5381 char origfname[MAXFNAMELEN];
5382 XLogSegNo endLogSegNo;
5383
5384 XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
5385 XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
5386
5387 if (!XLogArchiveIsReadyOrDone(origfname))
5388 {
5389 char origpath[MAXPGPATH];
5390 char partialfname[MAXFNAMELEN];
5391 char partialpath[MAXPGPATH];
5392
5393 /*
5394 * If we're summarizing WAL, we can't rename the partial file
5395 * until the summarizer finishes with it, else it will fail.
5396 */
5397 if (summarize_wal)
5398 WaitForWalSummarization(EndOfLog);
5399
5400 XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
5401 snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5402 snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5403
5404 /*
5405 * Make sure there's no .done or .ready file for the .partial
5406 * file.
5407 */
5408 XLogArchiveCleanup(partialfname);
5409
5410 durable_rename(origpath, partialpath, ERROR);
5411 XLogArchiveNotify(partialfname);
5412 }
5413 }
5414}
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:779
#define MAXPGPATH
#define snprintf
Definition: port.h:239
bool summarize_wal
void WaitForWalSummarization(XLogRecPtr lsn)
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition: xlog.c:3956
#define XLogArchivingActive()
Definition: xlog.h:99
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
Definition: xlogarchive.c:664
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
void XLogArchiveNotify(const char *xlog)
Definition: xlogarchive.c:444
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:712
char * recoveryEndCommand
Definition: xlogrecovery.c:85

References durable_rename(), ERROR, ExecuteRecoveryCommand(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, summarize_wal, WaitForWalSummarization(), wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4177 of file xlog.c.

4178{
4179 DIR *xldir;
4180 struct dirent *xlde;
4181 char path[MAXPGPATH + sizeof(XLOGDIR)];
4182
4183 xldir = AllocateDir(XLOGDIR);
4184
4185 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4186 {
4188 {
4189 if (XLogArchiveCheckDone(xlde->d_name))
4190 {
4191 elog(DEBUG2, "removing WAL backup history file \"%s\"",
4192 xlde->d_name);
4193 snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4194 unlink(path);
4196 }
4197 }
4198 }
4199
4200 FreeDir(xldir);
4201}
#define DEBUG2
Definition: elog.h:29
int FreeDir(DIR *dir)
Definition: fd.c:3022
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2904
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2970
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:565

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1228 of file xlog.c.

1230{
1231 char *currpos;
1232 int freespace;
1233 int written;
1234 XLogRecPtr CurrPos;
1235 XLogPageHeader pagehdr;
1236
1237 /*
1238 * Get a pointer to the right place in the right WAL buffer to start
1239 * inserting to.
1240 */
1241 CurrPos = StartPos;
1242 currpos = GetXLogBuffer(CurrPos, tli);
1243 freespace = INSERT_FREESPACE(CurrPos);
1244
1245 /*
1246 * there should be enough space for at least the first field (xl_tot_len)
1247 * on this page.
1248 */
1249 Assert(freespace >= sizeof(uint32));
1250
1251 /* Copy record data */
1252 written = 0;
1253 while (rdata != NULL)
1254 {
1255 const char *rdata_data = rdata->data;
1256 int rdata_len = rdata->len;
1257
1258 while (rdata_len > freespace)
1259 {
1260 /*
1261 * Write what fits on this page, and continue on the next page.
1262 */
1263 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1264 memcpy(currpos, rdata_data, freespace);
1265 rdata_data += freespace;
1266 rdata_len -= freespace;
1267 written += freespace;
1268 CurrPos += freespace;
1269
1270 /*
1271 * Get pointer to beginning of next page, and set the xlp_rem_len
1272 * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1273 *
1274 * It's safe to set the contrecord flag and xlp_rem_len without a
1275 * lock on the page. All the other flags were already set when the
1276 * page was initialized, in AdvanceXLInsertBuffer, and we're the
1277 * only backend that needs to set the contrecord flag.
1278 */
1279 currpos = GetXLogBuffer(CurrPos, tli);
1280 pagehdr = (XLogPageHeader) currpos;
1281 pagehdr->xlp_rem_len = write_len - written;
1283
1284 /* skip over the page header */
1285 if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1286 {
1287 CurrPos += SizeOfXLogLongPHD;
1288 currpos += SizeOfXLogLongPHD;
1289 }
1290 else
1291 {
1292 CurrPos += SizeOfXLogShortPHD;
1293 currpos += SizeOfXLogShortPHD;
1294 }
1295 freespace = INSERT_FREESPACE(CurrPos);
1296 }
1297
1298 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1299 memcpy(currpos, rdata_data, rdata_len);
1300 currpos += rdata_len;
1301 CurrPos += rdata_len;
1302 freespace -= rdata_len;
1303 written += rdata_len;
1304
1305 rdata = rdata->next;
1306 }
1307 Assert(written == write_len);
1308
1309 /*
1310 * If this was an xlog-switch, it's not enough to write the switch record,
1311 * we also have to consume all the remaining space in the WAL segment. We
1312 * have already reserved that space, but we need to actually fill it.
1313 */
1314 if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1315 {
1316 /* An xlog-switch record doesn't contain any data besides the header */
1317 Assert(write_len == SizeOfXLogRecord);
1318
1319 /* Assert that we did reserve the right amount of space */
1321
1322 /* Use up all the remaining space on the current page */
1323 CurrPos += freespace;
1324
1325 /*
1326 * Cause all remaining pages in the segment to be flushed, leaving the
1327 * XLog position where it should be, at the start of the next segment.
1328 * We do this one page at a time, to make sure we don't deadlock
1329 * against ourselves if wal_buffers < wal_segment_size.
1330 */
1331 while (CurrPos < EndPos)
1332 {
1333 /*
1334 * The minimal action to flush the page would be to call
1335 * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1336 * AdvanceXLInsertBuffer(...). The page would be left initialized
1337 * mostly to zeros, except for the page header (always the short
1338 * variant, as this is never a segment's first page).
1339 *
1340 * The large vistas of zeros are good for compressibility, but the
1341 * headers interrupting them every XLOG_BLCKSZ (with values that
1342 * differ from page to page) are not. The effect varies with
1343 * compression tool, but bzip2 for instance compresses about an
1344 * order of magnitude worse if those headers are left in place.
1345 *
1346 * Rather than complicating AdvanceXLInsertBuffer itself (which is
1347 * called in heavily-loaded circumstances as well as this lightly-
1348 * loaded one) with variant behavior, we just use GetXLogBuffer
1349 * (which itself calls the two methods we need) to get the pointer
1350 * and zero most of the page. Then we just zero the page header.
1351 */
1352 currpos = GetXLogBuffer(CurrPos, tli);
1353 MemSet(currpos, 0, SizeOfXLogShortPHD);
1354
1355 CurrPos += XLOG_BLCKSZ;
1356 }
1357 }
1358 else
1359 {
1360 /* Align the end position, so that the next record starts aligned */
1361 CurrPos = MAXALIGN64(CurrPos);
1362 }
1363
1364 if (CurrPos != EndPos)
1365 ereport(PANIC,
1367 errmsg_internal("space reserved for WAL record does not match what was written"));
1368}
uint32_t uint32
Definition: c.h:539
#define MAXALIGN64(LEN)
Definition: c.h:836
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1161
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
const void * data
struct XLogRecData * next
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:580
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition: xlog.c:1635
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52

References Assert(), XLogRecData::data, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 6941 of file xlog.c.

6942{
6943 bool shutdown;
6944 CheckPoint checkPoint;
6945 XLogRecPtr recptr;
6946 XLogSegNo _logSegNo;
6948 uint32 freespace;
6949 XLogRecPtr PriorRedoPtr;
6950 XLogRecPtr last_important_lsn;
6951 VirtualTransactionId *vxids;
6952 int nvxids;
6953 int oldXLogAllowed = 0;
6954
6955 /*
6956 * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6957 * issued at a different time.
6958 */
6960 shutdown = true;
6961 else
6962 shutdown = false;
6963
6964 /* sanity check */
6965 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6966 elog(ERROR, "can't create a checkpoint during recovery");
6967
6968 /*
6969 * Prepare to accumulate statistics.
6970 *
6971 * Note: because it is possible for log_checkpoints to change while a
6972 * checkpoint proceeds, we always accumulate stats, even if
6973 * log_checkpoints is currently off.
6974 */
6977
6978 /*
6979 * Let smgr prepare for checkpoint; this has to happen outside the
6980 * critical section and before we determine the REDO pointer. Note that
6981 * smgr must not do anything that'd have to be undone if we decide no
6982 * checkpoint is needed.
6983 */
6985
6986 /*
6987 * Use a critical section to force system panic if we have trouble.
6988 */
6990
6991 if (shutdown)
6992 {
6993 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6996 LWLockRelease(ControlFileLock);
6997 }
6998
6999 /* Begin filling in the checkpoint WAL record */
7000 MemSet(&checkPoint, 0, sizeof(checkPoint));
7001 checkPoint.time = (pg_time_t) time(NULL);
7002
7003 /*
7004 * For Hot Standby, derive the oldestActiveXid before we fix the redo
7005 * pointer. This allows us to begin accumulating changes to assemble our
7006 * starting snapshot of locks and transactions.
7007 */
7008 if (!shutdown && XLogStandbyInfoActive())
7009 checkPoint.oldestActiveXid = GetOldestActiveTransactionId(false, true);
7010 else
7012
7013 /*
7014 * Get location of last important record before acquiring insert locks (as
7015 * GetLastImportantRecPtr() also locks WAL locks).
7016 */
7017 last_important_lsn = GetLastImportantRecPtr();
7018
7019 /*
7020 * If this isn't a shutdown or forced checkpoint, and if there has been no
7021 * WAL activity requiring a checkpoint, skip it. The idea here is to
7022 * avoid inserting duplicate checkpoints when the system is idle.
7023 */
7025 CHECKPOINT_FORCE)) == 0)
7026 {
7027 if (last_important_lsn == ControlFile->checkPoint)
7028 {
7031 (errmsg_internal("checkpoint skipped because system is idle")));
7032 return false;
7033 }
7034 }
7035
7036 /*
7037 * An end-of-recovery checkpoint is created before anyone is allowed to
7038 * write WAL. To allow us to write the checkpoint record, temporarily
7039 * enable XLogInsertAllowed.
7040 */
7041 if (flags & CHECKPOINT_END_OF_RECOVERY)
7042 oldXLogAllowed = LocalSetXLogInsertAllowed();
7043
7045 if (flags & CHECKPOINT_END_OF_RECOVERY)
7047 else
7048 checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
7049
7050 /*
7051 * We must block concurrent insertions while examining insert state.
7052 */
7054
7055 checkPoint.fullPageWrites = Insert->fullPageWrites;
7056 checkPoint.wal_level = wal_level;
7057
7058 if (shutdown)
7059 {
7060 XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
7061
7062 /*
7063 * Compute new REDO record ptr = location of next XLOG record.
7064 *
7065 * Since this is a shutdown checkpoint, there can't be any concurrent
7066 * WAL insertion.
7067 */
7068 freespace = INSERT_FREESPACE(curInsert);
7069 if (freespace == 0)
7070 {
7071 if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
7072 curInsert += SizeOfXLogLongPHD;
7073 else
7074 curInsert += SizeOfXLogShortPHD;
7075 }
7076 checkPoint.redo = curInsert;
7077
7078 /*
7079 * Here we update the shared RedoRecPtr for future XLogInsert calls;
7080 * this must be done while holding all the insertion locks.
7081 *
7082 * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7083 * left pointing past where it really needs to point. This is okay;
7084 * the only consequence is that XLogInsert might back up whole buffers
7085 * that it didn't really need to. We can't postpone advancing
7086 * RedoRecPtr because XLogInserts that happen while we are dumping
7087 * buffers must assume that their buffer changes are not included in
7088 * the checkpoint.
7089 */
7090 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7091 }
7092
7093 /*
7094 * Now we can release the WAL insertion locks, allowing other xacts to
7095 * proceed while we are flushing disk buffers.
7096 */
7098
7099 /*
7100 * If this is an online checkpoint, we have not yet determined the redo
7101 * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7102 * record; the LSN at which it starts becomes the new redo pointer. We
7103 * don't do this for a shutdown checkpoint, because in that case no WAL
7104 * can be written between the redo point and the insertion of the
7105 * checkpoint record itself, so the checkpoint record itself serves to
7106 * mark the redo point.
7107 */
7108 if (!shutdown)
7109 {
7110 /* Include WAL level in record for WAL summarizer's benefit. */
7113 (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
7114
7115 /*
7116 * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7117 * shared memory and RedoRecPtr in backend-local memory, but we need
7118 * to copy that into the record that will be inserted when the
7119 * checkpoint is complete.
7120 */
7121 checkPoint.redo = RedoRecPtr;
7122 }
7123
7124 /* Update the info_lck-protected copy of RedoRecPtr as well */
7126 XLogCtl->RedoRecPtr = checkPoint.redo;
7128
7129 /*
7130 * If enabled, log checkpoint start. We postpone this until now so as not
7131 * to log anything if we decided to skip the checkpoint.
7132 */
7133 if (log_checkpoints)
7134 LogCheckpointStart(flags, false);
7135
7136 /* Update the process title */
7137 update_checkpoint_display(flags, false, false);
7138
7139 TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7140
7141 /*
7142 * Get the other info we need for the checkpoint record.
7143 *
7144 * We don't need to save oldestClogXid in the checkpoint, it only matters
7145 * for the short period in which clog is being truncated, and if we crash
7146 * during that we'll redo the clog truncation and fix up oldestClogXid
7147 * there.
7148 */
7149 LWLockAcquire(XidGenLock, LW_SHARED);
7150 checkPoint.nextXid = TransamVariables->nextXid;
7151 checkPoint.oldestXid = TransamVariables->oldestXid;
7153 LWLockRelease(XidGenLock);
7154
7155 LWLockAcquire(CommitTsLock, LW_SHARED);
7158 LWLockRelease(CommitTsLock);
7159
7160 LWLockAcquire(OidGenLock, LW_SHARED);
7161 checkPoint.nextOid = TransamVariables->nextOid;
7162 if (!shutdown)
7163 checkPoint.nextOid += TransamVariables->oidCount;
7164 LWLockRelease(OidGenLock);
7165
7166 MultiXactGetCheckptMulti(shutdown,
7167 &checkPoint.nextMulti,
7168 &checkPoint.nextMultiOffset,
7169 &checkPoint.oldestMulti,
7170 &checkPoint.oldestMultiDB);
7171
7172 /*
7173 * Having constructed the checkpoint record, ensure all shmem disk buffers
7174 * and commit-log buffers are flushed to disk.
7175 *
7176 * This I/O could fail for various reasons. If so, we will fail to
7177 * complete the checkpoint, but there is no reason to force a system
7178 * panic. Accordingly, exit critical section while doing it.
7179 */
7181
7182 /*
7183 * In some cases there are groups of actions that must all occur on one
7184 * side or the other of a checkpoint record. Before flushing the
7185 * checkpoint record we must explicitly wait for any backend currently
7186 * performing those groups of actions.
7187 *
7188 * One example is end of transaction, so we must wait for any transactions
7189 * that are currently in commit critical sections. If an xact inserted
7190 * its commit record into XLOG just before the REDO point, then a crash
7191 * restart from the REDO point would not replay that record, which means
7192 * that our flushing had better include the xact's update of pg_xact. So
7193 * we wait till he's out of his commit critical section before proceeding.
7194 * See notes in RecordTransactionCommit().
7195 *
7196 * Because we've already released the insertion locks, this test is a bit
7197 * fuzzy: it is possible that we will wait for xacts we didn't really need
7198 * to wait for. But the delay should be short and it seems better to make
7199 * checkpoint take a bit longer than to hold off insertions longer than
7200 * necessary. (In fact, the whole reason we have this issue is that xact.c
7201 * does commit record XLOG insertion and clog update as two separate steps
7202 * protected by different locks, but again that seems best on grounds of
7203 * minimizing lock contention.)
7204 *
7205 * A transaction that has not yet set delayChkptFlags when we look cannot
7206 * be at risk, since it has not inserted its commit record yet; and one
7207 * that's already cleared it is not at risk either, since it's done fixing
7208 * clog and we will correctly flush the update below. So we cannot miss
7209 * any xacts we need to wait for.
7210 */
7212 if (nvxids > 0)
7213 {
7214 do
7215 {
7216 /*
7217 * Keep absorbing fsync requests while we wait. There could even
7218 * be a deadlock if we don't, if the process that prevents the
7219 * checkpoint is trying to add a request to the queue.
7220 */
7222
7223 pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7224 pg_usleep(10000L); /* wait for 10 msec */
7226 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7228 }
7229 pfree(vxids);
7230
7231 CheckPointGuts(checkPoint.redo, flags);
7232
7234 if (nvxids > 0)
7235 {
7236 do
7237 {
7239
7240 pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7241 pg_usleep(10000L); /* wait for 10 msec */
7243 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7245 }
7246 pfree(vxids);
7247
7248 /*
7249 * Take a snapshot of running transactions and write this to WAL. This
7250 * allows us to reconstruct the state of running transactions during
7251 * archive recovery, if required. Skip, if this info disabled.
7252 *
7253 * If we are shutting down, or Startup process is completing crash
7254 * recovery we don't need to write running xact data.
7255 */
7256 if (!shutdown && XLogStandbyInfoActive())
7258
7260
7261 /*
7262 * Now insert the checkpoint record into XLOG.
7263 */
7265 XLogRegisterData(&checkPoint, sizeof(checkPoint));
7266 recptr = XLogInsert(RM_XLOG_ID,
7267 shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7269
7270 XLogFlush(recptr);
7271
7272 /*
7273 * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7274 * overwritten at next startup. No-one should even try, this just allows
7275 * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7276 * to just temporarily disable writing until the system has exited
7277 * recovery.
7278 */
7279 if (shutdown)
7280 {
7281 if (flags & CHECKPOINT_END_OF_RECOVERY)
7282 LocalXLogInsertAllowed = oldXLogAllowed;
7283 else
7284 LocalXLogInsertAllowed = 0; /* never again write WAL */
7285 }
7286
7287 /*
7288 * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7289 * = end of actual checkpoint record.
7290 */
7291 if (shutdown && checkPoint.redo != ProcLastRecPtr)
7292 ereport(PANIC,
7293 (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7294
7295 /*
7296 * Remember the prior checkpoint's redo ptr for
7297 * UpdateCheckPointDistanceEstimate()
7298 */
7299 PriorRedoPtr = ControlFile->checkPointCopy.redo;
7300
7301 /*
7302 * Update the control file.
7303 */
7304 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7305 if (shutdown)
7308 ControlFile->checkPointCopy = checkPoint;
7309 /* crash recovery should always recover to the end of WAL */
7312
7313 /*
7314 * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7315 * unused on non-shutdown checkpoints, but seems useful to store it always
7316 * for debugging purposes.
7317 */
7319
7321 LWLockRelease(ControlFileLock);
7322
7323 /*
7324 * We are now done with critical updates; no need for system panic if we
7325 * have trouble while fooling with old log segments.
7326 */
7328
7329 /*
7330 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7331 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7332 * where (a) we're not inside of a critical section and (b) we can be
7333 * certain that the relevant record has been flushed to disk, which must
7334 * happen before it can be summarized.
7335 *
7336 * If this is a shutdown checkpoint, then this happens reasonably
7337 * promptly: we've only just inserted and flushed the
7338 * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7339 * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7340 * record was written before we began flushing data to disk, and that
7341 * could be many minutes ago at this point. However, we don't XLogFlush()
7342 * after inserting that record, so we're not guaranteed that it's on disk
7343 * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7344 * record.
7345 */
7347
7348 /*
7349 * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7350 */
7352
7353 /*
7354 * Update the average distance between checkpoints if the prior checkpoint
7355 * exists.
7356 */
7357 if (PriorRedoPtr != InvalidXLogRecPtr)
7359
7360 INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
7361
7362 /*
7363 * Delete old log files, those no longer needed for last checkpoint to
7364 * prevent the disk holding the xlog from growing full.
7365 */
7367 KeepLogSeg(recptr, &_logSegNo);
7369 _logSegNo, InvalidOid,
7371 {
7372 /*
7373 * Some slots have been invalidated; recalculate the old-segment
7374 * horizon, starting again from RedoRecPtr.
7375 */
7377 KeepLogSeg(recptr, &_logSegNo);
7378 }
7379 _logSegNo--;
7380 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7381 checkPoint.ThisTimeLineID);
7382
7383 /*
7384 * Make more log segments if needed. (Do this after recycling old log
7385 * segments, since that may supply some of the needed files.)
7386 */
7387 if (!shutdown)
7388 PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7389
7390 /*
7391 * Truncate pg_subtrans if possible. We can throw away all data before
7392 * the oldest XMIN of any running transaction. No future transaction will
7393 * attempt to reference any pg_subtrans entry older than that (see Asserts
7394 * in subtrans.c). During recovery, though, we mustn't do this because
7395 * StartupSUBTRANS hasn't been called yet.
7396 */
7397 if (!RecoveryInProgress())
7399
7400 /* Real work is done; log and update stats. */
7401 LogCheckpointEnd(false);
7402
7403 /* Reset the process title */
7404 update_checkpoint_display(flags, false, true);
7405
7406 TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7407 NBuffers,
7411
7412 return true;
7413}
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:474
void AbsorbSyncRequests(void)
int NBuffers
Definition: globals.c:142
#define INJECTION_POINT(name, arg)
@ LW_SHARED
Definition: lwlock.h:113
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2212
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:82
@ DB_SHUTDOWNING
Definition: pg_control.h:94
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:69
#define InvalidOid
Definition: postgres_ext.h:37
#define DELAY_CHKPT_START
Definition: proc.h:135
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:136
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:1982
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3051
TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
Definition: procarray.c:2833
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3005
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:2055
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:62
@ RS_INVAL_IDLE_TIMEOUT
Definition: slot.h:68
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1282
TimestampTz ckpt_start_t
Definition: xlog.h:161
int ckpt_segs_removed
Definition: xlog.h:171
int ckpt_segs_added
Definition: xlog.h:170
int ckpt_bufs_written
Definition: xlog.h:167
int ckpt_segs_recycled
Definition: xlog.h:172
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
XLogRecPtr unloggedLSN
Definition: pg_control.h:137
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
TimeLineID InsertTimeLineID
Definition: xlog.c:509
XLogRecPtr RedoRecPtr
Definition: xlog.c:457
TimeLineID PrevTimeLineID
Definition: xlog.c:510
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:464
XLogRecPtr RedoRecPtr
Definition: xlog.c:431
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:385
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:254
bool RecoveryInProgress(void)
Definition: xlog.c:6386
static void WALInsertLockRelease(void)
Definition: xlog.c:1448
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1861
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1419
static void UpdateControlFile(void)
Definition: xlog.c:4586
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3881
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6701
static XLogRecPtr RedoRecPtr
Definition: xlog.c:274
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6733
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3706
bool log_checkpoints
Definition: xlog.c:130
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:8001
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6474
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6608
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6838
static int LocalXLogInsertAllowed
Definition: xlog.c:237
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2780
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7559
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6876
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FORCE
Definition: xlog.h:142
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7424 of file xlog.c.

7425{
7426 xl_end_of_recovery xlrec;
7427 XLogRecPtr recptr;
7428
7429 /* sanity check */
7430 if (!RecoveryInProgress())
7431 elog(ERROR, "can only be used to end recovery");
7432
7433 xlrec.end_time = GetCurrentTimestamp();
7434 xlrec.wal_level = wal_level;
7435
7440
7442
7444 XLogRegisterData(&xlrec, sizeof(xl_end_of_recovery));
7445 recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
7446
7447 XLogFlush(recptr);
7448
7449 /*
7450 * Update the control file so that crash recovery can follow the timeline
7451 * changes to this point.
7452 */
7453 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7454 ControlFile->minRecoveryPoint = recptr;
7457 LWLockRelease(ControlFileLock);
7458
7460}
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:77
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID

References ControlFile, elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, xl_end_of_recovery::ThisTimeLineID, UpdateControlFile(), wal_level, xl_end_of_recovery::wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7489 of file xlog.c.

7491{
7493 XLogRecPtr recptr;
7494 XLogPageHeader pagehdr;
7495 XLogRecPtr startPos;
7496
7497 /* sanity checks */
7498 if (!RecoveryInProgress())
7499 elog(ERROR, "can only be used at end of recovery");
7500 if (pagePtr % XLOG_BLCKSZ != 0)
7501 elog(ERROR, "invalid position for missing continuation record %X/%08X",
7502 LSN_FORMAT_ARGS(pagePtr));
7503
7504 /* The current WAL insert position should be right after the page header */
7505 startPos = pagePtr;
7506 if (XLogSegmentOffset(startPos, wal_segment_size) == 0)
7507 startPos += SizeOfXLogLongPHD;
7508 else
7509 startPos += SizeOfXLogShortPHD;
7510 recptr = GetXLogInsertRecPtr();
7511 if (recptr != startPos)
7512 elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD",
7513 LSN_FORMAT_ARGS(recptr));
7514
7516
7517 /*
7518 * Initialize the XLOG page header (by GetXLogBuffer), and set the
7519 * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7520 *
7521 * No other backend is allowed to write WAL yet, so acquiring the WAL
7522 * insertion lock is just pro forma.
7523 */
7525 pagehdr = (XLogPageHeader) GetXLogBuffer(pagePtr, newTLI);
7528
7529 /*
7530 * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7531 * page. We know it becomes the first record, because no other backend is
7532 * allowed to write WAL yet.
7533 */
7535 xlrec.overwritten_lsn = aborted_lsn;
7538 recptr = XLogInsert(RM_XLOG_ID, XLOG_OVERWRITE_CONTRECORD);
7539
7540 /* check that the record was inserted to the right place */
7541 if (ProcLastRecPtr != startPos)
7542 elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X",
7544
7545 XLogFlush(recptr);
7546
7548
7549 return recptr;
7550}
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:81
static void WALInsertLockAcquire(void)
Definition: xlog.c:1374
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9479
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80

References elog, END_CRIT_SECTION, ERROR, GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, XLP_FIRST_IS_OVERWRITE_CONTRECORD, and XLogPageHeaderData::xlp_info.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7639 of file xlog.c.

7640{
7641 XLogRecPtr lastCheckPointRecPtr;
7642 XLogRecPtr lastCheckPointEndPtr;
7643 CheckPoint lastCheckPoint;
7644 XLogRecPtr PriorRedoPtr;
7645 XLogRecPtr receivePtr;
7646 XLogRecPtr replayPtr;
7647 TimeLineID replayTLI;
7648 XLogRecPtr endptr;
7649 XLogSegNo _logSegNo;
7650 TimestampTz xtime;
7651
7652 /* Concurrent checkpoint/restartpoint cannot happen */
7654
7655 /* Get a local copy of the last safe checkpoint record. */
7657 lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7658 lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7659 lastCheckPoint = XLogCtl->lastCheckPoint;
7661
7662 /*
7663 * Check that we're still in recovery mode. It's ok if we exit recovery
7664 * mode after this check, the restart point is valid anyway.
7665 */
7666 if (!RecoveryInProgress())
7667 {
7669 (errmsg_internal("skipping restartpoint, recovery has already ended")));
7670 return false;
7671 }
7672
7673 /*
7674 * If the last checkpoint record we've replayed is already our last
7675 * restartpoint, we can't perform a new restart point. We still update
7676 * minRecoveryPoint in that case, so that if this is a shutdown restart
7677 * point, we won't start up earlier than before. That's not strictly
7678 * necessary, but when hot standby is enabled, it would be rather weird if
7679 * the database opened up for read-only connections at a point-in-time
7680 * before the last shutdown. Such time travel is still possible in case of
7681 * immediate shutdown, though.
7682 *
7683 * We don't explicitly advance minRecoveryPoint when we do create a
7684 * restartpoint. It's assumed that flushing the buffers will do that as a
7685 * side-effect.
7686 */
7687 if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7688 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7689 {
7691 errmsg_internal("skipping restartpoint, already performed at %X/%08X",
7692 LSN_FORMAT_ARGS(lastCheckPoint.redo)));
7693
7695 if (flags & CHECKPOINT_IS_SHUTDOWN)
7696 {
7697 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7700 LWLockRelease(ControlFileLock);
7701 }
7702 return false;
7703 }
7704
7705 /*
7706 * Update the shared RedoRecPtr so that the startup process can calculate
7707 * the number of segments replayed since last restartpoint, and request a
7708 * restartpoint if it exceeds CheckPointSegments.
7709 *
7710 * Like in CreateCheckPoint(), hold off insertions to update it, although
7711 * during recovery this is just pro forma, because no WAL insertions are
7712 * happening.
7713 */
7715 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7717
7718 /* Also update the info_lck-protected copy */
7720 XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7722
7723 /*
7724 * Prepare to accumulate statistics.
7725 *
7726 * Note: because it is possible for log_checkpoints to change while a
7727 * checkpoint proceeds, we always accumulate stats, even if
7728 * log_checkpoints is currently off.
7729 */
7732
7733 if (log_checkpoints)
7734 LogCheckpointStart(flags, true);
7735
7736 /* Update the process title */
7737 update_checkpoint_display(flags, true, false);
7738
7739 CheckPointGuts(lastCheckPoint.redo, flags);
7740
7741 /*
7742 * This location needs to be after CheckPointGuts() to ensure that some
7743 * work has already happened during this checkpoint.
7744 */
7745 INJECTION_POINT("create-restart-point", NULL);
7746
7747 /*
7748 * Remember the prior checkpoint's redo ptr for
7749 * UpdateCheckPointDistanceEstimate()
7750 */
7751 PriorRedoPtr = ControlFile->checkPointCopy.redo;
7752
7753 /*
7754 * Update pg_control, using current time. Check that it still shows an
7755 * older checkpoint, else do nothing; this is a quick hack to make sure
7756 * nothing really bad happens if somehow we get here after the
7757 * end-of-recovery checkpoint.
7758 */
7759 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7760 if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7761 {
7762 /*
7763 * Update the checkpoint information. We do this even if the cluster
7764 * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7765 * segments recycled below.
7766 */
7767 ControlFile->checkPoint = lastCheckPointRecPtr;
7768 ControlFile->checkPointCopy = lastCheckPoint;
7769
7770 /*
7771 * Ensure minRecoveryPoint is past the checkpoint record and update it
7772 * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7773 * this will have happened already while writing out dirty buffers,
7774 * but not necessarily - e.g. because no buffers were dirtied. We do
7775 * this because a backup performed in recovery uses minRecoveryPoint
7776 * to determine which WAL files must be included in the backup, and
7777 * the file (or files) containing the checkpoint record must be
7778 * included, at a minimum. Note that for an ordinary restart of
7779 * recovery there's no value in having the minimum recovery point any
7780 * earlier than this anyway, because redo will begin just after the
7781 * checkpoint record.
7782 */
7784 {
7785 if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7786 {
7787 ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7789
7790 /* update local copy */
7793 }
7794 if (flags & CHECKPOINT_IS_SHUTDOWN)
7796 }
7798 }
7799 LWLockRelease(ControlFileLock);
7800
7801 /*
7802 * Update the average distance between checkpoints/restartpoints if the
7803 * prior checkpoint exists.
7804 */
7805 if (PriorRedoPtr != InvalidXLogRecPtr)
7807
7808 /*
7809 * Delete old log files, those no longer needed for last restartpoint to
7810 * prevent the disk holding the xlog from growing full.
7811 */
7813
7814 /*
7815 * Retreat _logSegNo using the current end of xlog replayed or received,
7816 * whichever is later.
7817 */
7818 receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7819 replayPtr = GetXLogReplayRecPtr(&replayTLI);
7820 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7821 KeepLogSeg(endptr, &_logSegNo);
7823 _logSegNo, InvalidOid,
7825 {
7826 /*
7827 * Some slots have been invalidated; recalculate the old-segment
7828 * horizon, starting again from RedoRecPtr.
7829 */
7831 KeepLogSeg(endptr, &_logSegNo);
7832 }
7833 _logSegNo--;
7834
7835 /*
7836 * Try to recycle segments on a useful timeline. If we've been promoted
7837 * since the beginning of this restartpoint, use the new timeline chosen
7838 * at end of recovery. If we're still in recovery, use the timeline we're
7839 * currently replaying.
7840 *
7841 * There is no guarantee that the WAL segments will be useful on the
7842 * current timeline; if recovery proceeds to a new timeline right after
7843 * this, the pre-allocated WAL segments on this timeline will not be used,
7844 * and will go wasted until recycled on the next restartpoint. We'll live
7845 * with that.
7846 */
7847 if (!RecoveryInProgress())
7848 replayTLI = XLogCtl->InsertTimeLineID;
7849
7850 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7851
7852 /*
7853 * Make more log segments if needed. (Do this after recycling old log
7854 * segments, since that may supply some of the needed files.)
7855 */
7856 PreallocXlogFiles(endptr, replayTLI);
7857
7858 /*
7859 * Truncate pg_subtrans if possible. We can throw away all data before
7860 * the oldest XMIN of any running transaction. No future transaction will
7861 * attempt to reference any pg_subtrans entry older than that (see Asserts
7862 * in subtrans.c). When hot standby is disabled, though, we mustn't do
7863 * this because StartupSUBTRANS hasn't been called yet.
7864 */
7865 if (EnableHotStandby)
7867
7868 /* Real work is done; log and update stats. */
7869 LogCheckpointEnd(true);
7870
7871 /* Reset the process title */
7872 update_checkpoint_display(flags, true, true);
7873
7874 xtime = GetLatestXTime();
7876 errmsg("recovery restart point at %X/%08X",
7877 LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7878 xtime ? errdetail("Last completed transaction was at log time %s.",
7879 timestamptz_to_str(xtime)) : 0);
7880
7881 /*
7882 * Finally, execute archive_cleanup_command, if any.
7883 */
7884 if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7886 "archive_cleanup_command",
7887 false,
7888 WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7889
7890 return true;
7891}
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1862
int64 TimestampTz
Definition: timestamp.h:39
bool IsUnderPostmaster
Definition: globals.c:120
@ B_CHECKPOINTER
Definition: miscadmin.h:362
BackendType MyBackendType
Definition: miscinit.c:64
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
CheckPoint lastCheckPoint
Definition: xlog.c:545
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:543
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:544
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2700
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:646
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:647
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:62
char * archiveCleanupCommand
Definition: xlogrecovery.c:86
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert(), B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9438 of file xlog.c.

9439{
9440 bool during_backup_start = DatumGetBool(arg);
9441
9442 /* If called during backup start, there shouldn't be one already running */
9443 Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9444
9445 if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9446 {
9450
9453
9454 if (!during_backup_start)
9456 errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9457 }
9458}
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:100
int runningBackups
Definition: xlog.c:439
static SessionBackupState sessionBackupState
Definition: xlog.c:392
@ SESSION_BACKUP_NONE
Definition: xlog.h:288

References arg, Assert(), DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8836 of file xlog.c.

8838{
8840
8841 Assert(state != NULL);
8843
8844 /*
8845 * During recovery, we don't need to check WAL level. Because, if WAL
8846 * level is not sufficient, it's impossible to get here during recovery.
8847 */
8849 ereport(ERROR,
8850 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8851 errmsg("WAL level not sufficient for making an online backup"),
8852 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8853
8854 if (strlen(backupidstr) > MAXPGPATH)
8855 ereport(ERROR,
8856 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8857 errmsg("backup label too long (max %d bytes)",
8858 MAXPGPATH)));
8859
8860 strlcpy(state->name, backupidstr, sizeof(state->name));
8861
8862 /*
8863 * Mark backup active in shared memory. We must do full-page WAL writes
8864 * during an on-line backup even if not doing so at other times, because
8865 * it's quite possible for the backup dump to obtain a "torn" (partially
8866 * written) copy of a database page if it reads the page concurrently with
8867 * our write to the same page. This can be fixed as long as the first
8868 * write to the page in the WAL sequence is a full-page write. Hence, we
8869 * increment runningBackups then force a CHECKPOINT, to ensure there are
8870 * no dirty pages in shared memory that might get dumped while the backup
8871 * is in progress without having a corresponding WAL record. (Once the
8872 * backup is complete, we need not force full-page writes anymore, since
8873 * we expect that any pages not modified during the backup interval must
8874 * have been correctly captured by the backup.)
8875 *
8876 * Note that forcing full-page writes has no effect during an online
8877 * backup from the standby.
8878 *
8879 * We must hold all the insertion locks to change the value of
8880 * runningBackups, to ensure adequate interlocking against
8881 * XLogInsertRecord().
8882 */
8886
8887 /*
8888 * Ensure we decrement runningBackups if we fail below. NB -- for this to
8889 * work correctly, it is critical that sessionBackupState is only updated
8890 * after this block is over.
8891 */
8893 {
8894 bool gotUniqueStartpoint = false;
8895 DIR *tblspcdir;
8896 struct dirent *de;
8897 tablespaceinfo *ti;
8898 int datadirpathlen;
8899
8900 /*
8901 * Force an XLOG file switch before the checkpoint, to ensure that the
8902 * WAL segment the checkpoint is written to doesn't contain pages with
8903 * old timeline IDs. That would otherwise happen if you called
8904 * pg_backup_start() right after restoring from a PITR archive: the
8905 * first WAL segment containing the startup checkpoint has pages in
8906 * the beginning with the old timeline ID. That can cause trouble at
8907 * recovery: we won't have a history file covering the old timeline if
8908 * pg_wal directory was not included in the base backup and the WAL
8909 * archive was cleared too before starting the backup.
8910 *
8911 * This also ensures that we have emitted a WAL page header that has
8912 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8913 * Therefore, if a WAL archiver (such as pglesslog) is trying to
8914 * compress out removable backup blocks, it won't remove any that
8915 * occur after this point.
8916 *
8917 * During recovery, we skip forcing XLOG file switch, which means that
8918 * the backup taken during recovery is not available for the special
8919 * recovery case described above.
8920 */
8922 RequestXLogSwitch(false);
8923
8924 do
8925 {
8926 bool checkpointfpw;
8927
8928 /*
8929 * Force a CHECKPOINT. Aside from being necessary to prevent torn
8930 * page problems, this guarantees that two successive backup runs
8931 * will have different checkpoint positions and hence different
8932 * history file names, even if nothing happened in between.
8933 *
8934 * During recovery, establish a restartpoint if possible. We use
8935 * the last restartpoint as the backup starting checkpoint. This
8936 * means that two successive backup runs can have same checkpoint
8937 * positions.
8938 *
8939 * Since the fact that we are executing do_pg_backup_start()
8940 * during recovery means that checkpointer is running, we can use
8941 * RequestCheckpoint() to establish a restartpoint.
8942 *
8943 * We use CHECKPOINT_FAST only if requested by user (via passing
8944 * fast = true). Otherwise this can take awhile.
8945 */
8947 (fast ? CHECKPOINT_FAST : 0));
8948
8949 /*
8950 * Now we need to fetch the checkpoint record location, and also
8951 * its REDO pointer. The oldest point in WAL that would be needed
8952 * to restore starting from the checkpoint is precisely the REDO
8953 * pointer.
8954 */
8955 LWLockAcquire(ControlFileLock, LW_SHARED);
8956 state->checkpointloc = ControlFile->checkPoint;
8957 state->startpoint = ControlFile->checkPointCopy.redo;
8959 checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8960 LWLockRelease(ControlFileLock);
8961
8963 {
8964 XLogRecPtr recptr;
8965
8966 /*
8967 * Check to see if all WAL replayed during online backup
8968 * (i.e., since last restartpoint used as backup starting
8969 * checkpoint) contain full-page writes.
8970 */
8972 recptr = XLogCtl->lastFpwDisableRecPtr;
8974
8975 if (!checkpointfpw || state->startpoint <= recptr)
8976 ereport(ERROR,
8977 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8978 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
8979 "since last restartpoint"),
8980 errhint("This means that the backup being taken on the standby "
8981 "is corrupt and should not be used. "
8982 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
8983 "and then try an online backup again.")));
8984
8985 /*
8986 * During recovery, since we don't use the end-of-backup WAL
8987 * record and don't write the backup history file, the
8988 * starting WAL location doesn't need to be unique. This means
8989 * that two base backups started at the same time might use
8990 * the same checkpoint as starting locations.
8991 */
8992 gotUniqueStartpoint = true;
8993 }
8994
8995 /*
8996 * If two base backups are started at the same time (in WAL sender
8997 * processes), we need to make sure that they use different
8998 * checkpoints as starting locations, because we use the starting
8999 * WAL location as a unique identifier for the base backup in the
9000 * end-of-backup WAL record and when we write the backup history
9001 * file. Perhaps it would be better generate a separate unique ID
9002 * for each backup instead of forcing another checkpoint, but
9003 * taking a checkpoint right after another is not that expensive
9004 * either because only few buffers have been dirtied yet.
9005 */
9007 if (XLogCtl->Insert.lastBackupStart < state->startpoint)
9008 {
9009 XLogCtl->Insert.lastBackupStart = state->startpoint;
9010 gotUniqueStartpoint = true;
9011 }
9013 } while (!gotUniqueStartpoint);
9014
9015 /*
9016 * Construct tablespace_map file.
9017 */
9018 datadirpathlen = strlen(DataDir);
9019
9020 /* Collect information about all tablespaces */
9021 tblspcdir = AllocateDir(PG_TBLSPC_DIR);
9022 while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
9023 {
9024 char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
9025 char linkpath[MAXPGPATH];
9026 char *relpath = NULL;
9027 char *s;
9028 PGFileType de_type;
9029 char *badp;
9030 Oid tsoid;
9031
9032 /*
9033 * Try to parse the directory name as an unsigned integer.
9034 *
9035 * Tablespace directories should be positive integers that can be
9036 * represented in 32 bits, with no leading zeroes or trailing
9037 * garbage. If we come across a name that doesn't meet those
9038 * criteria, skip it.
9039 */
9040 if (de->d_name[0] < '1' || de->d_name[1] > '9')
9041 continue;
9042 errno = 0;
9043 tsoid = strtoul(de->d_name, &badp, 10);
9044 if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9045 continue;
9046
9047 snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9048
9049 de_type = get_dirent_type(fullpath, de, false, ERROR);
9050
9051 if (de_type == PGFILETYPE_LNK)
9052 {
9053 StringInfoData escapedpath;
9054 int rllen;
9055
9056 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9057 if (rllen < 0)
9058 {
9060 (errmsg("could not read symbolic link \"%s\": %m",
9061 fullpath)));
9062 continue;
9063 }
9064 else if (rllen >= sizeof(linkpath))
9065 {
9067 (errmsg("symbolic link \"%s\" target is too long",
9068 fullpath)));
9069 continue;
9070 }
9071 linkpath[rllen] = '\0';
9072
9073 /*
9074 * Relpath holds the relative path of the tablespace directory
9075 * when it's located within PGDATA, or NULL if it's located
9076 * elsewhere.
9077 */
9078 if (rllen > datadirpathlen &&
9079 strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
9080 IS_DIR_SEP(linkpath[datadirpathlen]))
9081 relpath = pstrdup(linkpath + datadirpathlen + 1);
9082
9083 /*
9084 * Add a backslash-escaped version of the link path to the
9085 * tablespace map file.
9086 */
9087 initStringInfo(&escapedpath);
9088 for (s = linkpath; *s; s++)
9089 {
9090 if (*s == '\n' || *s == '\r' || *s == '\\')
9091 appendStringInfoChar(&escapedpath, '\\');
9092 appendStringInfoChar(&escapedpath, *s);
9093 }
9094 appendStringInfo(tblspcmapfile, "%s %s\n",
9095 de->d_name, escapedpath.data);
9096 pfree(escapedpath.data);
9097 }
9098 else if (de_type == PGFILETYPE_DIR)
9099 {
9100 /*
9101 * It's possible to use allow_in_place_tablespaces to create
9102 * directories directly under pg_tblspc, for testing purposes
9103 * only.
9104 *
9105 * In this case, we store a relative path rather than an
9106 * absolute path into the tablespaceinfo.
9107 */
9108 snprintf(linkpath, sizeof(linkpath), "%s/%s",
9109 PG_TBLSPC_DIR, de->d_name);
9110 relpath = pstrdup(linkpath);
9111 }
9112 else
9113 {
9114 /* Skip any other file type that appears here. */
9115 continue;
9116 }
9117
9118 ti = palloc(sizeof(tablespaceinfo));
9119 ti->oid = tsoid;
9120 ti->path = pstrdup(linkpath);
9121 ti->rpath = relpath;
9122 ti->size = -1;
9123
9124 if (tablespaces)
9125 *tablespaces = lappend(*tablespaces, ti);
9126 }
9127 FreeDir(tblspcdir);
9128
9129 state->starttime = (pg_time_t) time(NULL);
9130 }
9132
9133 state->started_in_recovery = backup_started_in_recovery;
9134
9135 /*
9136 * Mark that the start phase has correctly finished for the backup.
9137 */
9139}
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:547
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:71
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
#define IS_DIR_SEP(ch)
Definition: port.h:103
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
unsigned int Oid
Definition: postgres_ext.h:32
#define relpath(rlocator, forknum)
Definition: relpath.h:150
#define PG_TBLSPC_DIR
Definition: relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:551
XLogRecPtr lastBackupStart
Definition: xlog.c:440
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:226
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:8110
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9438
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:289
#define CHECKPOINT_WAIT
Definition: xlog.h:145
#define CHECKPOINT_FAST
Definition: xlog.h:141
#define XLogIsNeeded()
Definition: xlog.h:109

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert(), backup_started_in_recovery, BoolGetDatum(), ControlFileData::checkPoint, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9164 of file xlog.c.

9165{
9166 bool backup_stopped_in_recovery = false;
9167 char histfilepath[MAXPGPATH];
9168 char lastxlogfilename[MAXFNAMELEN];
9169 char histfilename[MAXFNAMELEN];
9170 XLogSegNo _logSegNo;
9171 FILE *fp;
9172 int seconds_before_warning;
9173 int waits = 0;
9174 bool reported_waiting = false;
9175
9176 Assert(state != NULL);
9177
9178 backup_stopped_in_recovery = RecoveryInProgress();
9179
9180 /*
9181 * During recovery, we don't need to check WAL level. Because, if WAL
9182 * level is not sufficient, it's impossible to get here during recovery.
9183 */
9184 if (!backup_stopped_in_recovery && !XLogIsNeeded())
9185 ereport(ERROR,
9186 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9187 errmsg("WAL level not sufficient for making an online backup"),
9188 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9189
9190 /*
9191 * OK to update backup counter and session-level lock.
9192 *
9193 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9194 * otherwise they can be updated inconsistently, which might cause
9195 * do_pg_abort_backup() to fail.
9196 */
9198
9199 /*
9200 * It is expected that each do_pg_backup_start() call is matched by
9201 * exactly one do_pg_backup_stop() call.
9202 */
9205
9206 /*
9207 * Clean up session-level lock.
9208 *
9209 * You might think that WALInsertLockRelease() can be called before
9210 * cleaning up session-level lock because session-level lock doesn't need
9211 * to be protected with WAL insertion lock. But since
9212 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9213 * cleaned up before it.
9214 */
9216
9218
9219 /*
9220 * If we are taking an online backup from the standby, we confirm that the
9221 * standby has not been promoted during the backup.
9222 */
9223 if (state->started_in_recovery && !backup_stopped_in_recovery)
9224 ereport(ERROR,
9225 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9226 errmsg("the standby was promoted during online backup"),
9227 errhint("This means that the backup being taken is corrupt "
9228 "and should not be used. "
9229 "Try taking another online backup.")));
9230
9231 /*
9232 * During recovery, we don't write an end-of-backup record. We assume that
9233 * pg_control was backed up last and its minimum recovery point can be
9234 * available as the backup end location. Since we don't have an
9235 * end-of-backup record, we use the pg_control value to check whether
9236 * we've reached the end of backup when starting recovery from this
9237 * backup. We have no way of checking if pg_control wasn't backed up last
9238 * however.
9239 *
9240 * We don't force a switch to new WAL file but it is still possible to
9241 * wait for all the required files to be archived if waitforarchive is
9242 * true. This is okay if we use the backup to start a standby and fetch
9243 * the missing WAL using streaming replication. But in the case of an
9244 * archive recovery, a user should set waitforarchive to true and wait for
9245 * them to be archived to ensure that all the required files are
9246 * available.
9247 *
9248 * We return the current minimum recovery point as the backup end
9249 * location. Note that it can be greater than the exact backup end
9250 * location if the minimum recovery point is updated after the backup of
9251 * pg_control. This is harmless for current uses.
9252 *
9253 * XXX currently a backup history file is for informational and debug
9254 * purposes only. It's not essential for an online backup. Furthermore,
9255 * even if it's created, it will not be archived during recovery because
9256 * an archiver is not invoked. So it doesn't seem worthwhile to write a
9257 * backup history file during recovery.
9258 */
9259 if (backup_stopped_in_recovery)
9260 {
9261 XLogRecPtr recptr;
9262
9263 /*
9264 * Check to see if all WAL replayed during online backup contain
9265 * full-page writes.
9266 */
9268 recptr = XLogCtl->lastFpwDisableRecPtr;
9270
9271 if (state->startpoint <= recptr)
9272 ereport(ERROR,
9273 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9274 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9275 "during online backup"),
9276 errhint("This means that the backup being taken on the standby "
9277 "is corrupt and should not be used. "
9278 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9279 "and then try an online backup again.")));
9280
9281
9282 LWLockAcquire(ControlFileLock, LW_SHARED);
9283 state->stoppoint = ControlFile->minRecoveryPoint;
9285 LWLockRelease(ControlFileLock);
9286 }
9287 else
9288 {
9289 char *history_file;
9290
9291 /*
9292 * Write the backup-end xlog record
9293 */
9295 XLogRegisterData(&state->startpoint,
9296 sizeof(state->startpoint));
9297 state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9298
9299 /*
9300 * Given that we're not in recovery, InsertTimeLineID is set and can't
9301 * change, so we can read it without a lock.
9302 */
9303 state->stoptli = XLogCtl->InsertTimeLineID;
9304
9305 /*
9306 * Force a switch to a new xlog segment file, so that the backup is
9307 * valid as soon as archiver moves out the current segment file.
9308 */
9309 RequestXLogSwitch(false);
9310
9311 state->stoptime = (pg_time_t) time(NULL);
9312
9313 /*
9314 * Write the backup history file
9315 */
9316 XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9317 BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9318 state->startpoint, wal_segment_size);
9319 fp = AllocateFile(histfilepath, "w");
9320 if (!fp)
9321 ereport(ERROR,
9323 errmsg("could not create file \"%s\": %m",
9324 histfilepath)));
9325
9326 /* Build and save the contents of the backup history file */
9327 history_file = build_backup_content(state, true);
9328 fprintf(fp, "%s", history_file);
9329 pfree(history_file);
9330
9331 if (fflush(fp) || ferror(fp) || FreeFile(fp))
9332 ereport(ERROR,
9334 errmsg("could not write file \"%s\": %m",
9335 histfilepath)));
9336
9337 /*
9338 * Clean out any no-longer-needed history files. As a side effect,
9339 * this will post a .ready file for the newly created history file,
9340 * notifying the archiver that history file may be archived
9341 * immediately.
9342 */
9344 }
9345
9346 /*
9347 * If archiving is enabled, wait for all the required WAL files to be
9348 * archived before returning. If archiving isn't enabled, the required WAL
9349 * needs to be transported via streaming replication (hopefully with
9350 * wal_keep_size set high enough), or some more exotic mechanism like
9351 * polling and copying files from pg_wal with script. We have no knowledge
9352 * of those mechanisms, so it's up to the user to ensure that he gets all
9353 * the required WAL.
9354 *
9355 * We wait until both the last WAL file filled during backup and the
9356 * history file have been archived, and assume that the alphabetic sorting
9357 * property of the WAL files ensures any earlier WAL files are safely
9358 * archived as well.
9359 *
9360 * We wait forever, since archive_command is supposed to work and we
9361 * assume the admin wanted his backup to work completely. If you don't
9362 * wish to wait, then either waitforarchive should be passed in as false,
9363 * or you can set statement_timeout. Also, some notices are issued to
9364 * clue in anyone who might be doing this interactively.
9365 */
9366
9367 if (waitforarchive &&
9368 ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9369 (backup_stopped_in_recovery && XLogArchivingAlways())))
9370 {
9371 XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9372 XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9374
9375 XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9376 BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9377 state->startpoint, wal_segment_size);
9378
9379 seconds_before_warning = 60;
9380 waits = 0;
9381
9382 while (XLogArchiveIsBusy(lastxlogfilename) ||
9383 XLogArchiveIsBusy(histfilename))
9384 {
9386
9387 if (!reported_waiting && waits > 5)
9388 {
9390 (errmsg("base backup done, waiting for required WAL segments to be archived")));
9391 reported_waiting = true;
9392 }
9393
9394 (void) WaitLatch(MyLatch,
9396 1000L,
9397 WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9399
9400 if (++waits >= seconds_before_warning)
9401 {
9402 seconds_before_warning *= 2; /* This wraps in >10 years... */
9404 (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9405 waits),
9406 errhint("Check that your \"archive_command\" is executing properly. "
9407 "You can safely cancel this backup, "
9408 "but the database backup will not be usable without all the WAL segments.")));
9409 }
9410 }
9411
9413 (errmsg("all required WAL segments have been archived")));
9414 }
9415 else if (waitforarchive)
9417 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9418}
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
#define NOTICE
Definition: elog.h:35
int FreeFile(FILE *file)
Definition: fd.c:2840
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2641
struct Latch * MyLatch
Definition: globals.c:63
void ResetLatch(Latch *latch)
Definition: latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:172
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:73
#define WL_TIMEOUT
Definition: waiteventset.h:37
#define WL_EXIT_ON_PM_DEATH
Definition: waiteventset.h:39
#define WL_LATCH_SET
Definition: waiteventset.h:34
static void CleanupBackupHistory(void)
Definition: xlog.c:4177
#define XLogArchivingAlways()
Definition: xlog.h:102
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert(), BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9145 of file xlog.c.

9146{
9147 return sessionBackupState;
9148}

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8649 of file xlog.c.

8650{
8651 int o_direct_flag = 0;
8652
8653 /*
8654 * Use O_DIRECT if requested, except in walreceiver process. The WAL
8655 * written by walreceiver is normally read by the startup process soon
8656 * after it's written. Also, walreceiver performs unaligned writes, which
8657 * don't work with O_DIRECT, so it is required for correctness too.
8658 */
8660 o_direct_flag = PG_O_DIRECT;
8661
8662 /* If fsync is disabled, never open in sync mode */
8663 if (!enableFsync)
8664 return o_direct_flag;
8665
8666 switch (method)
8667 {
8668 /*
8669 * enum values for all sync options are defined even if they are
8670 * not supported on the current platform. But if not, they are
8671 * not included in the enum option array, and therefore will never
8672 * be seen here.
8673 */
8677 return o_direct_flag;
8678#ifdef O_SYNC
8680 return O_SYNC | o_direct_flag;
8681#endif
8682#ifdef O_DSYNC
8684 return O_DSYNC | o_direct_flag;
8685#endif
8686 default:
8687 /* can't happen (unless we are out of sync with option array) */
8688 elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
8689 return 0; /* silence warning */
8690 }
8691}
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_WAL
Definition: fd.h:55
#define PG_O_DIRECT
Definition: fd.h:97
bool enableFsync
Definition: globals.c:129
#define AmWalReceiverProcess()
Definition: miscadmin.h:390
#define O_DSYNC
Definition: win32_port.h:342
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

References AmWalReceiverProcess, elog, enableFsync, ERROR, io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4901 of file xlog.c.

4902{
4903 return ControlFile->wal_level;
4904}

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetDefaultCharSignedness()

bool GetDefaultCharSignedness ( void  )

Definition at line 4629 of file xlog.c.

4630{
4632}
bool default_char_signedness
Definition: pg_control.h:228

References ControlFile, and ControlFileData::default_char_signedness.

Referenced by CMPTRGM_CHOOSE().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4644 of file xlog.c.

4645{
4647}
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:520

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool *  doPageWrites_p 
)

Definition at line 6519 of file xlog.c.

6520{
6521 *RedoRecPtr_p = RedoRecPtr;
6522 *doPageWrites_p = doPageWrites;
6523}
static bool doPageWrites
Definition: xlog.c:287

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6608 of file xlog.c.

6609{
6611 int i;
6612
6613 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6614 {
6615 XLogRecPtr last_important;
6616
6617 /*
6618 * Need to take a lock to prevent torn reads of the LSN, which are
6619 * possible on some of the supported platforms. WAL insert locks only
6620 * support exclusive mode, so we have to use that.
6621 */
6623 last_important = WALInsertLocks[i].l.lastImportantAt;
6624 LWLockRelease(&WALInsertLocks[i].l.lock);
6625
6626 if (res < last_important)
6627 res = last_important;
6628 }
6629
6630 return res;
6631}
int i
Definition: isn.c:77
XLogRecPtr lastImportantAt
Definition: xlog.c:372
WALInsertLock l
Definition: xlog.c:384
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:569
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:151

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6637 of file xlog.c.

6638{
6639 pg_time_t result;
6640
6641 /* Need WALWriteLock, but shared lock is sufficient */
6642 LWLockAcquire(WALWriteLock, LW_SHARED);
6643 result = XLogCtl->lastSegSwitchTime;
6644 *lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
6645 LWLockRelease(WALWriteLock);
6646
6647 return result;
6648}
pg_time_t lastSegSwitchTime
Definition: xlog.c:467
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:468

References XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char * GetMockAuthenticationNonce ( void  )

Definition at line 4605 of file xlog.c.

4606{
4607 Assert(ControlFile != NULL);
4609}
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:235

References Assert(), ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

void GetOldestRestartPoint ( XLogRecPtr oldrecptr,
TimeLineID oldtli 
)

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6422 of file xlog.c.

6423{
6424 RecoveryState retval;
6425
6427 retval = XLogCtl->SharedRecoveryState;
6429
6430 return retval;
6431}
RecoveryState
Definition: xlog.h:90

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6489 of file xlog.c.

6490{
6491 XLogRecPtr ptr;
6492
6493 /*
6494 * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6495 * grabbed a WAL insertion lock to read the authoritative value in
6496 * Insert->RedoRecPtr, someone might update it just after we've released
6497 * the lock.
6498 */
6500 ptr = XLogCtl->RedoRecPtr;
6502
6503 if (RedoRecPtr < ptr)
6504 RedoRecPtr = ptr;
6505
6506 return RedoRecPtr;
6507}

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7917 of file xlog.c.

7918{
7919 XLogRecPtr currpos; /* current write LSN */
7920 XLogSegNo currSeg; /* segid of currpos */
7921 XLogSegNo targetSeg; /* segid of targetLSN */
7922 XLogSegNo oldestSeg; /* actual oldest segid */
7923 XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7924 XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7925 uint64 keepSegs;
7926
7927 /*
7928 * slot does not reserve WAL. Either deactivated, or has never been active
7929 */
7930 if (XLogRecPtrIsInvalid(targetLSN))
7931 return WALAVAIL_INVALID_LSN;
7932
7933 /*
7934 * Calculate the oldest segment currently reserved by all slots,
7935 * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7936 * oldestSlotSeg to the current segment.
7937 */
7938 currpos = GetXLogWriteRecPtr();
7939 XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7940 KeepLogSeg(currpos, &oldestSlotSeg);
7941
7942 /*
7943 * Find the oldest extant segment file. We get 1 until checkpoint removes
7944 * the first WAL segment file since startup, which causes the status being
7945 * wrong under certain abnormal conditions but that doesn't actually harm.
7946 */
7947 oldestSeg = XLogGetLastRemovedSegno() + 1;
7948
7949 /* calculate oldest segment by max_wal_size */
7950 XLByteToSeg(currpos, currSeg, wal_segment_size);
7952
7953 if (currSeg > keepSegs)
7954 oldestSegMaxWalSize = currSeg - keepSegs;
7955 else
7956 oldestSegMaxWalSize = 1;
7957
7958 /* the segment we care about */
7959 XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7960
7961 /*
7962 * No point in returning reserved or extended status values if the
7963 * targetSeg is known to be lost.
7964 */
7965 if (targetSeg >= oldestSlotSeg)
7966 {
7967 /* show "reserved" when targetSeg is within max_wal_size */
7968 if (targetSeg >= oldestSegMaxWalSize)
7969 return WALAVAIL_RESERVED;
7970
7971 /* being retained by slots exceeding max_wal_size */
7972 return WALAVAIL_EXTENDED;
7973 }
7974
7975 /* WAL segments are no longer retained but haven't been removed yet */
7976 if (targetSeg >= oldestSeg)
7977 return WALAVAIL_UNRESERVED;
7978
7979 /* Definitely lost */
7980 return WALAVAIL_REMOVED;
7981}
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3774
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9495
@ WALAVAIL_REMOVED
Definition: xlog.h:194
@ WALAVAIL_RESERVED
Definition: xlog.h:190
@ WALAVAIL_UNRESERVED
Definition: xlog.h:193
@ WALAVAIL_EXTENDED
Definition: xlog.h:191
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:189

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

Definition at line 6588 of file xlog.c.

6589{
6590 TimeLineID insertTLI;
6591
6593 insertTLI = XLogCtl->InsertTimeLineID;
6595
6596 return insertTLI;
6597}

References XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by GetLatestLSN().

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1635 of file xlog.c.

1636{
1637 int idx;
1638 XLogRecPtr endptr;
1639 static uint64 cachedPage = 0;
1640 static char *cachedPos = NULL;
1641 XLogRecPtr expectedEndPtr;
1642
1643 /*
1644 * Fast path for the common case that we need to access again the same
1645 * page as last time.
1646 */
1647 if (ptr / XLOG_BLCKSZ == cachedPage)
1648 {
1649 Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1650 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1651 return cachedPos + ptr % XLOG_BLCKSZ;
1652 }
1653
1654 /*
1655 * The XLog buffer cache is organized so that a page is always loaded to a
1656 * particular buffer. That way we can easily calculate the buffer a given
1657 * page must be loaded into, from the XLogRecPtr alone.
1658 */
1659 idx = XLogRecPtrToBufIdx(ptr);
1660
1661 /*
1662 * See what page is loaded in the buffer at the moment. It could be the
1663 * page we're looking for, or something older. It can't be anything newer
1664 * - that would imply the page we're looking for has already been written
1665 * out to disk and evicted, and the caller is responsible for making sure
1666 * that doesn't happen.
1667 *
1668 * We don't hold a lock while we read the value. If someone is just about
1669 * to initialize or has just initialized the page, it's possible that we
1670 * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1671 * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1672 * we're looking for.
1673 */
1674 expectedEndPtr = ptr;
1675 expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1676
1678 if (expectedEndPtr != endptr)
1679 {
1680 XLogRecPtr initializedUpto;
1681
1682 /*
1683 * Before calling AdvanceXLInsertBuffer(), which can block, let others
1684 * know how far we're finished with inserting the record.
1685 *
1686 * NB: If 'ptr' points to just after the page header, advertise a
1687 * position at the beginning of the page rather than 'ptr' itself. If
1688 * there are no other insertions running, someone might try to flush
1689 * up to our advertised location. If we advertised a position after
1690 * the page header, someone might try to flush the page header, even
1691 * though page might actually not be initialized yet. As the first
1692 * inserter on the page, we are effectively responsible for making
1693 * sure that it's initialized, before we let insertingAt to move past
1694 * the page header.
1695 */
1696 if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1697 XLogSegmentOffset(ptr, wal_segment_size) > XLOG_BLCKSZ)
1698 initializedUpto = ptr - SizeOfXLogShortPHD;
1699 else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1700 XLogSegmentOffset(ptr, wal_segment_size) < XLOG_BLCKSZ)
1701 initializedUpto = ptr - SizeOfXLogLongPHD;
1702 else
1703 initializedUpto = ptr;
1704
1705 WALInsertLockUpdateInsertingAt(initializedUpto);
1706
1707 AdvanceXLInsertBuffer(ptr, tli, false);
1709
1710 if (expectedEndPtr != endptr)
1711 elog(PANIC, "could not find WAL buffer for %X/%08X",
1712 LSN_FORMAT_ARGS(ptr));
1713 }
1714 else
1715 {
1716 /*
1717 * Make sure the initialization of the page is visible to us, and
1718 * won't arrive later to overwrite the WAL data we write on the page.
1719 */
1721 }
1722
1723 /*
1724 * Found the buffer holding this page. Return a pointer to the right
1725 * offset within the page.
1726 */
1727 cachedPage = ptr / XLOG_BLCKSZ;
1728 cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1729
1730 Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1731 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1732
1733 return cachedPos + ptr % XLOG_BLCKSZ;
1734}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
#define pg_memory_barrier()
Definition: atomics.h:141
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition: xlog.c:1474
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1988

References AdvanceXLInsertBuffer(), Assert(), elog, idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9479 of file xlog.c.

9480{
9482 uint64 current_bytepos;
9483
9484 SpinLockAcquire(&Insert->insertpos_lck);
9485 current_bytepos = Insert->CurrBytePos;
9486 SpinLockRelease(&Insert->insertpos_lck);
9487
9488 return XLogBytePosToRecPtr(current_bytepos);
9489}

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4220 of file xlog.c.

4221{
4222 char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
4223
4224 /*
4225 * Generate a random nonce. This is used for authentication requests that
4226 * will fail because the user does not exist. The nonce is used to create
4227 * a genuine-looking password challenge for the non-existent user, in lieu
4228 * of an actual stored password.
4229 */
4230 if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
4231 ereport(PANIC,
4232 (errcode(ERRCODE_INTERNAL_ERROR),
4233 errmsg("could not generate secret authorization token")));
4234
4235 memset(ControlFile, 0, sizeof(ControlFileData));
4236 /* Initialize pg_control status fields */
4237 ControlFile->system_identifier = sysidentifier;
4241
4242 /* Set important parameter values for use when replaying WAL */
4251 ControlFile->data_checksum_version = data_checksum_version;
4252}
bool track_commit_timestamp
Definition: commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition: pg_control.h:185
bool wal_log_hints
Definition: xlog.c:124
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36

References ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4826 of file xlog.c.

4827{
4829
4831 {
4832 struct config_generic *guc;
4833
4834 guc = find_option("wal_consistency_checking", false, false, ERROR);
4835
4837
4838 set_config_option_ext("wal_consistency_checking",
4840 guc->scontext, guc->source, guc->srole,
4841 GUC_ACTION_SET, true, ERROR, false);
4842
4843 /* checking should not be deferred again */
4845 }
4846}
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3387
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1236
@ GUC_ACTION_SET
Definition: guc.h:203
GucContext scontext
Definition: guc_tables.h:185
GucSource source
Definition: guc_tables.h:183
char * wal_consistency_checking_string
Definition: xlog.c:126

References Assert(), check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char *  tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3579 of file xlog.c.

3581{
3582 char path[MAXPGPATH];
3583 struct stat stat_buf;
3584
3585 Assert(tli != 0);
3586
3587 XLogFilePath(path, tli, *segno, wal_segment_size);
3588
3589 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3591 {
3592 LWLockRelease(ControlFileLock);
3593 return false;
3594 }
3595
3596 if (!find_free)
3597 {
3598 /* Force installation: get rid of any pre-existing segment file */
3599 durable_unlink(path, DEBUG1);
3600 }
3601 else
3602 {
3603 /* Find a free slot to put it in */
3604 while (stat(path, &stat_buf) == 0)
3605 {
3606 if ((*segno) >= max_segno)
3607 {
3608 /* Failed to find a free slot within specified range */
3609 LWLockRelease(ControlFileLock);
3610 return false;
3611 }
3612 (*segno)++;
3613 XLogFilePath(path, tli, *segno, wal_segment_size);
3614 }
3615 }
3616
3617 Assert(access(path, F_OK) != 0 && errno == ENOENT);
3618 if (durable_rename(tmppath, path, LOG) != 0)
3619 {
3620 LWLockRelease(ControlFileLock);
3621 /* durable_rename already emitted log message */
3622 return false;
3623 }
3624
3625 LWLockRelease(ControlFileLock);
3626
3627 return true;
3628}
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:869
short access
Definition: preproc-type.c:36
bool InstallXLogFileSegmentActive
Definition: xlog.c:526
#define stat
Definition: win32_port.h:274

References Assert(), DEBUG1, durable_rename(), durable_unlink(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9536 of file xlog.c.

9537{
9538 bool result;
9539
9540 LWLockAcquire(ControlFileLock, LW_SHARED);
9542 LWLockRelease(ControlFileLock);
9543
9544 return result;
9545}

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8739 of file xlog.c.

8740{
8741 char *msg = NULL;
8743
8744 Assert(tli != 0);
8745
8746 /*
8747 * Quick exit if fsync is disabled or write() has already synced the WAL
8748 * file.
8749 */
8750 if (!enableFsync ||
8753 return;
8754
8755 /*
8756 * Measure I/O timing to sync the WAL file for pg_stat_io.
8757 */
8759
8760 pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8761 switch (wal_sync_method)
8762 {
8764 if (pg_fsync_no_writethrough(fd) != 0)
8765 msg = _("could not fsync file \"%s\": %m");
8766 break;
8767#ifdef HAVE_FSYNC_WRITETHROUGH
8769 if (pg_fsync_writethrough(fd) != 0)
8770 msg = _("could not fsync write-through file \"%s\": %m");
8771 break;
8772#endif
8774 if (pg_fdatasync(fd) != 0)
8775 msg = _("could not fdatasync file \"%s\": %m");
8776 break;
8779 /* not reachable */
8780 Assert(false);
8781 break;
8782 default:
8783 ereport(PANIC,
8784 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8785 errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8786 break;
8787 }
8788
8789 /* PANIC if failed to fsync */
8790 if (msg)
8791 {
8792 char xlogfname[MAXFNAMELEN];
8793 int save_errno = errno;
8794
8795 XLogFileName(xlogfname, tli, segno, wal_segment_size);
8796 errno = save_errno;
8797 ereport(PANIC,
8799 errmsg(msg, xlogfname)));
8800 }
8801
8803
8805 start, 1, 0);
8806}
#define _(x)
Definition: elog.c:91
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:438
int pg_fdatasync(int fd)
Definition: fd.c:477
int pg_fsync_writethrough(int fd)
Definition: fd.c:458
return str start
@ IOOBJECT_WAL
Definition: pgstat.h:277
@ IOCONTEXT_NORMAL
Definition: pgstat.h:287
@ IOOP_FSYNC
Definition: pgstat.h:306
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:122
static int fd(const char *x, int i)
Definition: preproc-init.c:105
bool track_wal_io_timing
Definition: xlog.c:138

References _, Assert(), enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_FSYNC, MAXFNAMELEN, PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 8001 of file xlog.c.

8002{
8003 XLogSegNo currSegNo;
8004 XLogSegNo segno;
8005 XLogRecPtr keep;
8006
8007 XLByteToSeg(recptr, currSegNo, wal_segment_size);
8008 segno = currSegNo;
8009
8010 /* Calculate how many segments are kept by slots. */
8012 if (keep != InvalidXLogRecPtr && keep < recptr)
8013 {
8014 XLByteToSeg(keep, segno, wal_segment_size);
8015
8016 /*
8017 * Account for max_slot_wal_keep_size to avoid keeping more than
8018 * configured. However, don't do that during a binary upgrade: if
8019 * slots were to be invalidated because of this, it would not be
8020 * possible to preserve logical ones during the upgrade.
8021 */
8023 {
8024 uint64 slot_keep_segs;
8025
8026 slot_keep_segs =
8028
8029 if (currSegNo - segno > slot_keep_segs)
8030 segno = currSegNo - slot_keep_segs;
8031 }
8032 }
8033
8034 /*
8035 * If WAL summarization is in use, don't remove WAL that has yet to be
8036 * summarized.
8037 */
8038 keep = GetOldestUnsummarizedLSN(NULL, NULL);
8039 if (keep != InvalidXLogRecPtr)
8040 {
8041 XLogSegNo unsummarized_segno;
8042
8043 XLByteToSeg(keep, unsummarized_segno, wal_segment_size);
8044 if (unsummarized_segno < segno)
8045 segno = unsummarized_segno;
8046 }
8047
8048 /* but, keep at least wal_keep_size if that's set */
8049 if (wal_keep_size_mb > 0)
8050 {
8051 uint64 keep_segs;
8052
8054 if (currSegNo - segno < keep_segs)
8055 {
8056 /* avoid underflow, don't go below 1 */
8057 if (currSegNo <= keep_segs)
8058 segno = 1;
8059 else
8060 segno = currSegNo - keep_segs;
8061 }
8062 }
8063
8064 /* don't delete WAL segments newer than the calculated segment */
8065 if (segno < *logSegNo)
8066 *logSegNo = segno;
8067}
bool IsBinaryUpgrade
Definition: globals.c:121
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition: xlog.c:117
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition: xlog.c:2679
int max_slot_wal_keep_size_mb
Definition: xlog.c:136

References ConvertToXSegs, GetOldestUnsummarizedLSN(), InvalidXLogRecPtr, IsBinaryUpgrade, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, and XLogGetReplicationSlotMinimumLSN().

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4888 of file xlog.c.

4889{
4890 Assert(reset || ControlFile == NULL);
4893}
void reset(void)
Definition: sql-declare.c:600

References Assert(), ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6474 of file xlog.c.

6475{
6476 int oldXLogAllowed = LocalXLogInsertAllowed;
6477
6479
6480 return oldXLogAllowed;
6481}

References LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6733 of file xlog.c.

6734{
6735 long write_msecs,
6736 sync_msecs,
6737 total_msecs,
6738 longest_msecs,
6739 average_msecs;
6740 uint64 average_sync_time;
6741
6743
6746
6749
6750 /* Accumulate checkpoint timing summary data, in milliseconds. */
6751 PendingCheckpointerStats.write_time += write_msecs;
6752 PendingCheckpointerStats.sync_time += sync_msecs;
6753
6754 /*
6755 * All of the published timing statistics are accounted for. Only
6756 * continue if a log message is to be written.
6757 */
6758 if (!log_checkpoints)
6759 return;
6760
6763
6764 /*
6765 * Timing values returned from CheckpointStats are in microseconds.
6766 * Convert to milliseconds for consistent printing.
6767 */
6768 longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
6769
6770 average_sync_time = 0;
6772 average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6774 average_msecs = (long) ((average_sync_time + 999) / 1000);
6775
6776 /*
6777 * ControlFileLock is not required to see ControlFile->checkPoint and
6778 * ->checkPointCopy here as we are the only updator of those variables at
6779 * this moment.
6780 */
6781 if (restartpoint)
6782 ereport(LOG,
6783 (errmsg("restartpoint complete: wrote %d buffers (%.1f%%), "
6784 "wrote %d SLRU buffers; %d WAL file(s) added, "
6785 "%d removed, %d recycled; write=%ld.%03d s, "
6786 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6787 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6788 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
6795 write_msecs / 1000, (int) (write_msecs % 1000),
6796 sync_msecs / 1000, (int) (sync_msecs % 1000),
6797 total_msecs / 1000, (int) (total_msecs % 1000),
6799 longest_msecs / 1000, (int) (longest_msecs % 1000),
6800 average_msecs / 1000, (int) (average_msecs % 1000),
6801 (int) (PrevCheckPointDistance / 1024.0),
6802 (int) (CheckPointDistanceEstimate / 1024.0),
6805 else
6806 ereport(LOG,
6807 (errmsg("checkpoint complete: wrote %d buffers (%.1f%%), "
6808 "wrote %d SLRU buffers; %d WAL file(s) added, "
6809 "%d removed, %d recycled; write=%ld.%03d s, "
6810 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6811 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6812 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
6819 write_msecs / 1000, (int) (write_msecs % 1000),
6820 sync_msecs / 1000, (int) (sync_msecs % 1000),
6821 total_msecs / 1000, (int) (total_msecs % 1000),
6823 longest_msecs / 1000, (int) (longest_msecs % 1000),
6824 average_msecs / 1000, (int) (average_msecs % 1000),
6825 (int) (PrevCheckPointDistance / 1024.0),
6826 (int) (CheckPointDistanceEstimate / 1024.0),
6829}
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1757
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition: xlog.h:176
uint64 ckpt_longest_sync
Definition: xlog.h:175
TimestampTz ckpt_end_t
Definition: xlog.h:165
int ckpt_slru_written
Definition: xlog.h:168
int ckpt_sync_rels
Definition: xlog.h:174
PgStat_Counter sync_time
Definition: pgstat.h:263
PgStat_Counter write_time
Definition: pgstat.h:262
static double CheckPointDistanceEstimate
Definition: xlog.c:160
static double PrevCheckPointDistance
Definition: xlog.c:161

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_slru_written, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6701 of file xlog.c.

6702{
6703 if (restartpoint)
6704 ereport(LOG,
6705 /* translator: the placeholders show checkpoint options */
6706 (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6707 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6708 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6709 (flags & CHECKPOINT_FAST) ? " fast" : "",
6710 (flags & CHECKPOINT_FORCE) ? " force" : "",
6711 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6712 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6713 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6714 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
6715 else
6716 ereport(LOG,
6717 /* translator: the placeholders show checkpoint options */
6718 (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6719 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6720 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6721 (flags & CHECKPOINT_FAST) ? " fast" : "",
6722 (flags & CHECKPOINT_FORCE) ? " force" : "",
6723 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6724 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6725 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6726 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
6727}
#define CHECKPOINT_FLUSH_UNLOGGED
Definition: xlog.h:143
#define CHECKPOINT_CAUSE_XLOG
Definition: xlog.h:148
#define CHECKPOINT_CAUSE_TIME
Definition: xlog.h:149

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6336 of file xlog.c.

6337{
6338 bool promoted = false;
6339
6340 /*
6341 * Perform a checkpoint to update all our recovery activity to disk.
6342 *
6343 * Note that we write a shutdown checkpoint rather than an on-line one.
6344 * This is not particularly critical, but since we may be assigning a new
6345 * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6346 * only changes in shutdown checkpoints, which allows some extra error
6347 * checking in xlog_redo.
6348 *
6349 * In promotion, only create a lightweight end-of-recovery record instead
6350 * of a full checkpoint. A checkpoint is requested later, after we're
6351 * fully out of recovery mode and already accepting queries.
6352 */
6355 {
6356 promoted = true;
6357
6358 /*
6359 * Insert a special WAL record to mark the end of recovery, since we
6360 * aren't doing a checkpoint. That means that the checkpointer process
6361 * may likely be in the middle of a time-smoothed restartpoint and
6362 * could continue to be for minutes after this. That sounds strange,
6363 * but the effect is roughly the same and it would be stranger to try
6364 * to come out of the restartpoint and then checkpoint. We request a
6365 * checkpoint later anyway, just for safety.
6366 */
6368 }
6369 else
6370 {
6374 }
6375
6376 return promoted;
6377}
static void CreateEndOfRecoveryRecord(void)
Definition: xlog.c:7424
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3706 of file xlog.c.

3707{
3708 XLogSegNo _logSegNo;
3709 int lf;
3710 bool added;
3711 char path[MAXPGPATH];
3712 uint64 offset;
3713
3715 return; /* unlocked check says no */
3716
3717 XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
3718 offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3719 if (offset >= (uint32) (0.75 * wal_segment_size))
3720 {
3721 _logSegNo++;
3722 lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3723 if (lf >= 0)
3724 close(lf);
3725 if (added)
3727 }
3728}
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3208

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6299 of file xlog.c.

6300{
6301 /*
6302 * We have reached the end of base backup, as indicated by pg_control. The
6303 * data on disk is now consistent (unless minRecoveryPoint is further
6304 * ahead, which can happen if we crashed during previous recovery). Reset
6305 * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6306 * make sure we don't allow starting up at an earlier point even if
6307 * recovery is stopped and restarted soon after this.
6308 */
6309 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6310
6311 if (ControlFile->minRecoveryPoint < EndRecPtr)
6312 {
6313 ControlFile->minRecoveryPoint = EndRecPtr;
6315 }
6316
6321
6322 LWLockRelease(ControlFileLock);
6323}
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
bool backupEndRequired
Definition: pg_control.h:172
XLogRecPtr backupEndPoint
Definition: pg_control.h:171

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4364 of file xlog.c.

4365{
4366 pg_crc32c crc;
4367 int fd;
4368 char wal_segsz_str[20];
4369 int r;
4370
4371 /*
4372 * Read data...
4373 */
4375 O_RDWR | PG_BINARY);
4376 if (fd < 0)
4377 ereport(PANIC,
4379 errmsg("could not open file \"%s\": %m",
4381
4382 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
4383 r = read(fd, ControlFile, sizeof(ControlFileData));
4384 if (r != sizeof(ControlFileData))
4385 {
4386 if (r < 0)
4387 ereport(PANIC,
4389 errmsg("could not read file \"%s\": %m",
4391 else
4392 ereport(PANIC,
4394 errmsg("could not read file \"%s\": read %d of %zu",
4395 XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4396 }
4398
4399 close(fd);
4400
4401 /*
4402 * Check for expected pg_control format version. If this is wrong, the
4403 * CRC check will likely fail because we'll be checking the wrong number
4404 * of bytes. Complaining about wrong version will probably be more
4405 * enlightening than complaining about wrong CRC.
4406 */
4407
4409 ereport(FATAL,
4410 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4411 errmsg("database files are incompatible with server"),
4412 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4413 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4416 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4417
4419 ereport(FATAL,
4420 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4421 errmsg("database files are incompatible with server"),
4422 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4423 " but the server was compiled with PG_CONTROL_VERSION %d.",
4425 errhint("It looks like you need to initdb.")));
4426
4427 /* Now check the CRC. */
4431 offsetof(ControlFileData, crc));
4432 FIN_CRC32C(crc);
4433
4434 if (!EQ_CRC32C(crc, ControlFile->crc))
4435 ereport(FATAL,
4436 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4437 errmsg("incorrect checksum in control file")));
4438
4439 /*
4440 * Do compatibility checking immediately. If the database isn't
4441 * compatible with the backend executable, we want to abort before we can
4442 * possibly do any damage.
4443 */
4445 ereport(FATAL,
4446 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4447 errmsg("database files are incompatible with server"),
4448 /* translator: %s is a variable name and %d is its value */
4449 errdetail("The database cluster was initialized with %s %d,"
4450 " but the server was compiled with %s %d.",
4451 "CATALOG_VERSION_NO", ControlFile->catalog_version_no,
4452 "CATALOG_VERSION_NO", CATALOG_VERSION_NO),
4453 errhint("It looks like you need to initdb.")));
4454 if (ControlFile->maxAlign != MAXIMUM_ALIGNOF)
4455 ereport(FATAL,
4456 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4457 errmsg("database files are incompatible with server"),
4458 /* translator: %s is a variable name and %d is its value */
4459 errdetail("The database cluster was initialized with %s %d,"
4460 " but the server was compiled with %s %d.",
4461 "MAXALIGN", ControlFile->maxAlign,
4462 "MAXALIGN", MAXIMUM_ALIGNOF),
4463 errhint("It looks like you need to initdb.")));
4465 ereport(FATAL,
4466 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4467 errmsg("database files are incompatible with server"),
4468 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4469 errhint("It looks like you need to initdb.")));
4470 if (ControlFile->blcksz != BLCKSZ)
4471 ereport(FATAL,
4472 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4473 errmsg("database files are incompatible with server"),
4474 /* translator: %s is a variable name and %d is its value */
4475 errdetail("The database cluster was initialized with %s %d,"
4476 " but the server was compiled with %s %d.",
4477 "BLCKSZ", ControlFile->blcksz,
4478 "BLCKSZ", BLCKSZ),
4479 errhint("It looks like you need to recompile or initdb.")));
4480 if (ControlFile->relseg_size != RELSEG_SIZE)
4481 ereport(FATAL,
4482 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4483 errmsg("database files are incompatible with server"),
4484 /* translator: %s is a variable name and %d is its value */
4485 errdetail("The database cluster was initialized with %s %d,"
4486 " but the server was compiled with %s %d.",
4487 "RELSEG_SIZE", ControlFile->relseg_size,
4488 "RELSEG_SIZE", RELSEG_SIZE),
4489 errhint("It looks like you need to recompile or initdb.")));
4490 if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
4491 ereport(FATAL,
4492 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4493 errmsg("database files are incompatible with server"),
4494 /* translator: %s is a variable name and %d is its value */
4495 errdetail("The database cluster was initialized with %s %d,"
4496 " but the server was compiled with %s %d.",
4497 "XLOG_BLCKSZ", ControlFile->xlog_blcksz,
4498 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4499 errhint("It looks like you need to recompile or initdb.")));
4501 ereport(FATAL,
4502 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4503 errmsg("database files are incompatible with server"),
4504 /* translator: %s is a variable name and %d is its value */
4505 errdetail("The database cluster was initialized with %s %d,"
4506 " but the server was compiled with %s %d.",
4507 "NAMEDATALEN", ControlFile->nameDataLen,
4508 "NAMEDATALEN", NAMEDATALEN),
4509 errhint("It looks like you need to recompile or initdb.")));
4511 ereport(FATAL,
4512 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4513 errmsg("database files are incompatible with server"),
4514 /* translator: %s is a variable name and %d is its value */
4515 errdetail("The database cluster was initialized with %s %d,"
4516 " but the server was compiled with %s %d.",
4517 "INDEX_MAX_KEYS", ControlFile->indexMaxKeys,
4518 "INDEX_MAX_KEYS", INDEX_MAX_KEYS),
4519 errhint("It looks like you need to recompile or initdb.")));
4521 ereport(FATAL,
4522 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4523 errmsg("database files are incompatible with server"),
4524 /* translator: %s is a variable name and %d is its value */
4525 errdetail("The database cluster was initialized with %s %d,"
4526 " but the server was compiled with %s %d.",
4527 "TOAST_MAX_CHUNK_SIZE", ControlFile->toast_max_chunk_size,
4528 "TOAST_MAX_CHUNK_SIZE", (int) TOAST_MAX_CHUNK_SIZE),
4529 errhint("It looks like you need to recompile or initdb.")));
4531 ereport(FATAL,
4532 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4533 errmsg("database files are incompatible with server"),
4534 /* translator: %s is a variable name and %d is its value */
4535 errdetail("The database cluster was initialized with %s %d,"
4536 " but the server was compiled with %s %d.",
4537 "LOBLKSIZE", ControlFile->loblksize,
4538 "LOBLKSIZE", (int) LOBLKSIZE),
4539 errhint("It looks like you need to recompile or initdb.")));
4540
4541 Assert(ControlFile->float8ByVal); /* vestigial, not worth an error msg */
4542
4544
4546 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4547 errmsg_plural("invalid WAL segment size in control file (%d byte)",
4548 "invalid WAL segment size in control file (%d bytes)",
4551 errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4552
4553 snprintf(wal_segsz_str, sizeof(wal_segsz_str), "%d", wal_segment_size);
4554 SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4556
4557 /* check and update variables dependent on wal_segment_size */
4559 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4560 /* translator: both %s are GUC names */
4561 errmsg("\"%s\" must be at least twice \"%s\"",
4562 "min_wal_size", "wal_segment_size")));
4563
4565 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4566 /* translator: both %s are GUC names */
4567 errmsg("\"%s\" must be at least twice \"%s\"",
4568 "max_wal_size", "wal_segment_size")));
4569
4571 (wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) -
4573
4575
4576 /* Make the initdb settings visible as GUC variables, too */
4577 SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4579}
#define PG_BINARY
Definition: c.h:1273
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1184
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1086
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4337
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:114
@ PGC_INTERNAL
Definition: guc.h:73
#define TOAST_MAX_CHUNK_SIZE
Definition: heaptoast.h:84
#define read(a, b, c)
Definition: win32.h:13
#define LOBLKSIZE
Definition: large_object.h:70
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define FLOATFORMAT_VALUE
Definition: pg_control.h:201
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
uint32 pg_control_version
Definition: pg_control.h:125
uint32 xlog_seg_size
Definition: pg_control.h:211
uint32 nameDataLen
Definition: pg_control.h:213
uint32 indexMaxKeys
Definition: pg_control.h:214
uint32 relseg_size
Definition: pg_control.h:208
uint32 catalog_version_no
Definition: pg_control.h:126
double floatFormat
Definition: pg_control.h:200
uint32 xlog_blcksz
Definition: pg_control.h:210
uint32 loblksize
Definition: pg_control.h:217
pg_crc32c crc
Definition: pg_control.h:238
uint32 toast_max_chunk_size
Definition: pg_control.h:216
#define UsableBytesInPage
Definition: xlog.c:597
bool DataChecksumsEnabled(void)
Definition: xlog.c:4615
static int UsableBytesInSegment
Definition: xlog.c:606
int min_wal_size_mb
Definition: xlog.c:116
#define XLOG_CONTROL_FILE

References Assert(), BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6386 of file xlog.c.

6387{
6388 /*
6389 * We check shared state each time only until we leave recovery mode. We
6390 * can't re-enter recovery, so there's no need to keep checking after the
6391 * shared variable has once been seen false.
6392 */
6394 return false;
6395 else
6396 {
6397 /*
6398 * use volatile pointer to make sure we make a fresh read of the
6399 * shared variable.
6400 */
6401 volatile XLogCtlData *xlogctl = XLogCtl;
6402
6404
6405 /*
6406 * Note: We don't need a memory barrier when we're still in recovery.
6407 * We might exit recovery immediately after return, so the caller
6408 * can't rely on 'true' meaning that we're still in recovery anyway.
6409 */
6410
6412 }
6413}
static bool LocalRecoveryInProgress
Definition: xlog.c:225

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by amcheck_index_mainfork_expected(), attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), CanInvalidateIdleSlot(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), ExecCheckpoint(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), InitWalSender(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemoveFull(), PreventCommandDuringRecovery(), ProcessStandbyPSRequestMessage(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7599 of file xlog.c.

7600{
7601 /*
7602 * Also refrain from creating a restartpoint if we have seen any
7603 * references to non-existent pages. Restarting recovery from the
7604 * restartpoint would not see the references, so we would lose the
7605 * cross-check that the pages belonged to a relation that was dropped
7606 * later.
7607 */
7609 {
7610 elog(DEBUG2,
7611 "could not record restart point at %X/%08X because there are unresolved references to invalid pages",
7612 LSN_FORMAT_ARGS(checkPoint->redo));
7613 return;
7614 }
7615
7616 /*
7617 * Copy the checkpoint record to shared memory, so that checkpointer can
7618 * work out the next time it wants to perform a restartpoint.
7619 */
7623 XLogCtl->lastCheckPoint = *checkPoint;
7625}
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool XLogHaveInvalidPages(void)
Definition: xlogutils.c:224

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9465 of file xlog.c.

9466{
9467 static bool already_done = false;
9468
9469 if (already_done)
9470 return;
9472 already_done = true;
9473}
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), BoolGetDatum(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3956 of file xlog.c.

3957{
3958 DIR *xldir;
3959 struct dirent *xlde;
3960 char switchseg[MAXFNAMELEN];
3961 XLogSegNo endLogSegNo;
3962 XLogSegNo switchLogSegNo;
3963 XLogSegNo recycleSegNo;
3964
3965 /*
3966 * Initialize info about where to begin the work. This will recycle,
3967 * somewhat arbitrarily, 10 future segments.
3968 */
3969 XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3970 XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3971 recycleSegNo = endLogSegNo + 10;
3972
3973 /*
3974 * Construct a filename of the last segment to be kept.
3975 */
3976 XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3977
3978 elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3979 switchseg);
3980
3981 xldir = AllocateDir(XLOGDIR);
3982
3983 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3984 {
3985 /* Ignore files that are not XLOG segments */
3986 if (!IsXLogFileName(xlde->d_name))
3987 continue;
3988
3989 /*
3990 * Remove files that are on a timeline older than the new one we're
3991 * switching to, but with a segment number >= the first segment on the
3992 * new timeline.
3993 */
3994 if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
3995 strcmp(xlde->d_name + 8, switchseg + 8) > 0)
3996 {
3997 /*
3998 * If the file has already been marked as .ready, however, don't
3999 * remove it yet. It should be OK to remove it - files that are
4000 * not part of our timeline history are not required for recovery
4001 * - but seems safer to let them be archived and removed later.
4002 */
4003 if (!XLogArchiveIsReady(xlde->d_name))
4004 RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
4005 }
4006 }
4007
4008 FreeDir(xldir);
4009}
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:4025
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3881 of file xlog.c.

3883{
3884 DIR *xldir;
3885 struct dirent *xlde;
3886 char lastoff[MAXFNAMELEN];
3887 XLogSegNo endlogSegNo;
3888 XLogSegNo recycleSegNo;
3889
3890 /* Initialize info about where to try to recycle to */
3891 XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
3892 recycleSegNo = XLOGfileslop(lastredoptr);
3893
3894 /*
3895 * Construct a filename of the last segment to be kept. The timeline ID
3896 * doesn't matter, we ignore that in the comparison. (During recovery,
3897 * InsertTimeLineID isn't set, so we can't use that.)
3898 */
3899 XLogFileName(lastoff, 0, segno, wal_segment_size);
3900
3901 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3902 lastoff);
3903
3904 xldir = AllocateDir(XLOGDIR);
3905
3906 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3907 {
3908 /* Ignore files that are not XLOG segments */
3909 if (!IsXLogFileName(xlde->d_name) &&
3911 continue;
3912
3913 /*
3914 * We ignore the timeline part of the XLOG segment identifiers in
3915 * deciding whether a segment is still needed. This ensures that we
3916 * won't prematurely remove a segment from a parent timeline. We could
3917 * probably be a little more proactive about removing segments of
3918 * non-parent timelines, but that would be a whole lot more
3919 * complicated.
3920 *
3921 * We use the alphanumeric sorting property of the filenames to decide
3922 * which ones are earlier than the lastoff segment.
3923 */
3924 if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3925 {
3926 if (XLogArchiveCheckDone(xlde->d_name))
3927 {
3928 /* Update the last removed location in shared memory first */
3930
3931 RemoveXlogFile(xlde, recycleSegNo, &endlogSegNo, insertTLI);
3932 }
3933 }
3934 }
3935
3936 FreeDir(xldir);
3937}
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition: xlog.c:2230
static void UpdateLastRemovedPtr(char *filename)
Definition: xlog.c:3828
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3848 of file xlog.c.

3849{
3850 DIR *xldir;
3851 struct dirent *xlde;
3852
3853 elog(DEBUG2, "removing all temporary WAL segments");
3854
3855 xldir = AllocateDir(XLOGDIR);
3856 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3857 {
3858 char path[MAXPGPATH];
3859
3860 if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3861 continue;
3862
3863 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3864 unlink(path);
3865 elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3866 }
3867 FreeDir(xldir);
3868}

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 4025 of file xlog.c.

4028{
4029 char path[MAXPGPATH];
4030#ifdef WIN32
4031 char newpath[MAXPGPATH];
4032#endif
4033 const char *segname = segment_de->d_name;
4034
4035 snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4036
4037 /*
4038 * Before deleting the file, see if it can be recycled as a future log
4039 * segment. Only recycle normal files, because we don't want to recycle
4040 * symbolic links pointing to a separate archive directory.
4041 */
4042 if (wal_recycle &&
4043 *endlogSegNo <= recycleSegNo &&
4044 XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4045 get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4046 InstallXLogFileSegment(endlogSegNo, path,
4047 true, recycleSegNo, insertTLI))
4048 {
4050 (errmsg_internal("recycled write-ahead log file \"%s\"",
4051 segname)));
4053 /* Needn't recheck that slot on future iterations */
4054 (*endlogSegNo)++;
4055 }
4056 else
4057 {
4058 /* No need for any more future segments, or recycling failed ... */
4059 int rc;
4060
4062 (errmsg_internal("removing write-ahead log file \"%s\"",
4063 segname)));
4064
4065#ifdef WIN32
4066
4067 /*
4068 * On Windows, if another process (e.g another backend) holds the file
4069 * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4070 * will still show up in directory listing until the last handle is
4071 * closed. To avoid confusing the lingering deleted file for a live
4072 * WAL file that needs to be archived, rename it before deleting it.
4073 *
4074 * If another process holds the file open without FILE_SHARE_DELETE
4075 * flag, rename will fail. We'll try again at the next checkpoint.
4076 */
4077 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4078 if (rename(path, newpath) != 0)
4079 {
4080 ereport(LOG,
4082 errmsg("could not rename file \"%s\": %m",
4083 path)));
4084 return;
4085 }
4086 rc = durable_unlink(newpath, LOG);
4087#else
4088 rc = durable_unlink(path, LOG);
4089#endif
4090 if (rc != 0)
4091 {
4092 /* Message already logged by durable_unlink() */
4093 return;
4094 }
4096 }
4097
4098 XLogArchiveCleanup(segname);
4099}
@ PGFILETYPE_REG
Definition: file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition: xlog.c:3579
bool wal_recycle
Definition: xlog.c:129

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, dirent::d_name, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 8110 of file xlog.c.

8111{
8112 XLogRecPtr RecPtr;
8113
8114 /* XLOG SWITCH has no data */
8116
8117 if (mark_unimportant)
8119 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
8120
8121 return RecPtr;
8122}
#define XLOG_SWITCH
Definition: pg_control.h:72
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1111 of file xlog.c.

1113{
1115 uint64 startbytepos;
1116 uint64 endbytepos;
1117 uint64 prevbytepos;
1118
1119 size = MAXALIGN(size);
1120
1121 /* All (non xlog-switch) records should contain data. */
1122 Assert(size > SizeOfXLogRecord);
1123
1124 /*
1125 * The duration the spinlock needs to be held is minimized by minimizing
1126 * the calculations that have to be done while holding the lock. The
1127 * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1128 * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1129 * page headers. The mapping between "usable" byte positions and physical
1130 * positions (XLogRecPtrs) can be done outside the locked region, and
1131 * because the usable byte position doesn't include any headers, reserving
1132 * X bytes from WAL is almost as simple as "CurrBytePos += X".
1133 */
1134 SpinLockAcquire(&Insert->insertpos_lck);
1135
1136 startbytepos = Insert->CurrBytePos;
1137 endbytepos = startbytepos + size;
1138 prevbytepos = Insert->PrevBytePos;
1139 Insert->CurrBytePos = endbytepos;
1140 Insert->PrevBytePos = startbytepos;
1141
1142 SpinLockRelease(&Insert->insertpos_lck);
1143
1144 *StartPos = XLogBytePosToRecPtr(startbytepos);
1145 *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1146 *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1147
1148 /*
1149 * Check that the conversions between "usable byte positions" and
1150 * XLogRecPtrs work consistently in both directions.
1151 */
1152 Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1153 Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1154 Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1155}
#define MAXALIGN(LEN)
Definition: c.h:811
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition: xlog.c:1901
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1944

References Assert(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1167 of file xlog.c.

1168{
1170 uint64 startbytepos;
1171 uint64 endbytepos;
1172 uint64 prevbytepos;
1174 XLogRecPtr ptr;
1175 uint32 segleft;
1176
1177 /*
1178 * These calculations are a bit heavy-weight to be done while holding a
1179 * spinlock, but since we're holding all the WAL insertion locks, there
1180 * are no other inserters competing for it. GetXLogInsertRecPtr() does
1181 * compete for it, but that's not called very frequently.
1182 */
1183 SpinLockAcquire(&Insert->insertpos_lck);
1184
1185 startbytepos = Insert->CurrBytePos;
1186
1187 ptr = XLogBytePosToEndRecPtr(startbytepos);
1188 if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1189 {
1190 SpinLockRelease(&Insert->insertpos_lck);
1191 *EndPos = *StartPos = ptr;
1192 return false;
1193 }
1194
1195 endbytepos = startbytepos + size;
1196 prevbytepos = Insert->PrevBytePos;
1197
1198 *StartPos = XLogBytePosToRecPtr(startbytepos);
1199 *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1200
1202 if (segleft != wal_segment_size)
1203 {
1204 /* consume the rest of the segment */
1205 *EndPos += segleft;
1206 endbytepos = XLogRecPtrToBytePos(*EndPos);
1207 }
1208 Insert->CurrBytePos = endbytepos;
1209 Insert->PrevBytePos = startbytepos;
1210
1211 SpinLockRelease(&Insert->insertpos_lck);
1212
1213 *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1214
1216 Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1217 Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1218 Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1219
1220 return true;
1221}

References Assert(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9528 of file xlog.c.

9529{
9530 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9532 LWLockRelease(ControlFileLock);
9533}

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9551 of file xlog.c.

9552{
9554 XLogCtl->WalWriterSleeping = sleeping;
9556}
bool WalWriterSleeping
Definition: xlog.c:533

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ show_archive_command()

const char * show_archive_command ( void  )

Definition at line 4852 of file xlog.c.

4853{
4854 if (XLogArchivingActive())
4855 return XLogArchiveCommand;
4856 else
4857 return "(disabled)";
4858}
char * XLogArchiveCommand
Definition: xlog.c:121

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_in_hot_standby()

const char * show_in_hot_standby ( void  )

Definition at line 4864 of file xlog.c.

4865{
4866 /*
4867 * We display the actual state based on shared memory, so that this GUC
4868 * reports up-to-date state if examined intra-query. The underlying
4869 * variable (in_hot_standby_guc) changes only when we transmit a new value
4870 * to the client.
4871 */
4872 return RecoveryInProgress() ? "on" : "off";
4873}

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6654 of file xlog.c.

6655{
6656 /*
6657 * We should have an aux process resource owner to use, and we should not
6658 * be in a transaction that's installed some other resowner.
6659 */
6661 Assert(CurrentResourceOwner == NULL ||
6664
6665 /* Don't be chatty in standalone mode */
6667 (errmsg("shutting down")));
6668
6669 /*
6670 * Signal walsenders to move to stopping state.
6671 */
6673
6674 /*
6675 * Wait for WAL senders to be in stopping state. This prevents commands
6676 * from writing new WAL.
6677 */
6679
6680 if (RecoveryInProgress())
6682 else
6683 {
6684 /*
6685 * If archiving is enabled, rotate the last XLOG file so that all the
6686 * remaining records are archived (postmaster wakes up the archiver
6687 * process one more time at the end of shutdown). The checkpoint
6688 * record will go to the next XLOG file and won't be archived (yet).
6689 */
6690 if (XLogArchivingActive())
6691 RequestXLogSwitch(false);
6692
6694 }
6695}
bool IsPostmasterEnvironment
Definition: globals.c:119
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:176
void WalSndInitStopping(void)
Definition: walsender.c:3858
void WalSndWaitStopping(void)
Definition: walsender.c:3884
bool CreateRestartPoint(int flags)
Definition: xlog.c:7639
bool CreateCheckPoint(int flags)
Definition: xlog.c:6941

References Assert(), AuxProcessResourceOwner, CHECKPOINT_FAST, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5469 of file xlog.c.

5470{
5472 CheckPoint checkPoint;
5473 bool wasShutdown;
5474 bool didCrash;
5475 bool haveTblspcMap;
5476 bool haveBackupLabel;
5477 XLogRecPtr EndOfLog;
5478 TimeLineID EndOfLogTLI;
5479 TimeLineID newTLI;
5480 bool performedWalRecovery;
5481 EndOfWalRecoveryInfo *endOfRecoveryInfo;
5484 TransactionId oldestActiveXID;
5485 bool promoted = false;
5486 char timebuf[128];
5487
5488 /*
5489 * We should have an aux process resource owner to use, and we should not
5490 * be in a transaction that's installed some other resowner.
5491 */
5493 Assert(CurrentResourceOwner == NULL ||
5496
5497 /*
5498 * Check that contents look valid.
5499 */
5501 ereport(FATAL,
5503 errmsg("control file contains invalid checkpoint location")));
5504
5505 switch (ControlFile->state)
5506 {
5507 case DB_SHUTDOWNED:
5508
5509 /*
5510 * This is the expected case, so don't be chatty in standalone
5511 * mode
5512 */
5514 (errmsg("database system was shut down at %s",
5516 timebuf, sizeof(timebuf)))));
5517 break;
5518
5520 ereport(LOG,
5521 (errmsg("database system was shut down in recovery at %s",
5523 timebuf, sizeof(timebuf)))));
5524 break;
5525
5526 case DB_SHUTDOWNING:
5527 ereport(LOG,
5528 (errmsg("database system shutdown was interrupted; last known up at %s",
5530 timebuf, sizeof(timebuf)))));
5531 break;
5532
5534 ereport(LOG,
5535 (errmsg("database system was interrupted while in recovery at %s",
5537 timebuf, sizeof(timebuf))),
5538 errhint("This probably means that some data is corrupted and"
5539 " you will have to use the last backup for recovery.")));
5540 break;
5541
5543 ereport(LOG,
5544 (errmsg("database system was interrupted while in recovery at log time %s",
5546 timebuf, sizeof(timebuf))),
5547 errhint("If this has occurred more than once some data might be corrupted"
5548 " and you might need to choose an earlier recovery target.")));
5549 break;
5550
5551 case DB_IN_PRODUCTION:
5552 ereport(LOG,
5553 (errmsg("database system was interrupted; last known up at %s",
5555 timebuf, sizeof(timebuf)))));
5556 break;
5557
5558 default:
5559 ereport(FATAL,
5561 errmsg("control file contains invalid database cluster state")));
5562 }
5563
5564 /* This is just to allow attaching to startup process with a debugger */
5565#ifdef XLOG_REPLAY_DELAY
5567 pg_usleep(60000000L);
5568#endif
5569
5570 /*
5571 * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5572 * In cases where someone has performed a copy for PITR, these directories
5573 * may have been excluded and need to be re-created.
5574 */
5576
5577 /* Set up timeout handler needed to report startup progress. */
5581
5582 /*----------
5583 * If we previously crashed, perform a couple of actions:
5584 *
5585 * - The pg_wal directory may still include some temporary WAL segments
5586 * used when creating a new segment, so perform some clean up to not
5587 * bloat this path. This is done first as there is no point to sync
5588 * this temporary data.
5589 *
5590 * - There might be data which we had written, intending to fsync it, but
5591 * which we had not actually fsync'd yet. Therefore, a power failure in
5592 * the near future might cause earlier unflushed writes to be lost, even
5593 * though more recent data written to disk from here on would be
5594 * persisted. To avoid that, fsync the entire data directory.
5595 */
5598 {
5601 didCrash = true;
5602 }
5603 else
5604 didCrash = false;
5605
5606 /*
5607 * Prepare for WAL recovery if needed.
5608 *
5609 * InitWalRecovery analyzes the control file and the backup label file, if
5610 * any. It updates the in-memory ControlFile buffer according to the
5611 * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5612 * It also applies the tablespace map file, if any.
5613 */
5614 InitWalRecovery(ControlFile, &wasShutdown,
5615 &haveBackupLabel, &haveTblspcMap);
5616 checkPoint = ControlFile->checkPointCopy;
5617
5618 /* initialize shared memory variables from the checkpoint record */
5619 TransamVariables->nextXid = checkPoint.nextXid;
5620 TransamVariables->nextOid = checkPoint.nextOid;
5622 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5623 AdvanceOldestClogXid(checkPoint.oldestXid);
5624 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5625 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5627 checkPoint.newestCommitTsXid);
5628
5629 /*
5630 * Clear out any old relcache cache files. This is *necessary* if we do
5631 * any WAL replay, since that would probably result in the cache files
5632 * being out of sync with database reality. In theory we could leave them
5633 * in place if the database had been cleanly shut down, but it seems
5634 * safest to just remove them always and let them be rebuilt during the
5635 * first backend startup. These files needs to be removed from all
5636 * directories including pg_tblspc, however the symlinks are created only
5637 * after reading tablespace_map file in case of archive recovery from
5638 * backup, so needs to clear old relcache files here after creating
5639 * symlinks.
5640 */
5642
5643 /*
5644 * Initialize replication slots, before there's a chance to remove
5645 * required resources.
5646 */
5648
5649 /*
5650 * Startup logical state, needs to be setup now so we have proper data
5651 * during crash recovery.
5652 */
5654
5655 /*
5656 * Startup CLOG. This must be done after TransamVariables->nextXid has
5657 * been initialized and before we accept connections or begin WAL replay.
5658 */
5659 StartupCLOG();
5660
5661 /*
5662 * Startup MultiXact. We need to do this early to be able to replay
5663 * truncations.
5664 */
5666
5667 /*
5668 * Ditto for commit timestamps. Activate the facility if the setting is
5669 * enabled in the control file, as there should be no tracking of commit
5670 * timestamps done when the setting was disabled. This facility can be
5671 * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5672 */
5675
5676 /*
5677 * Recover knowledge about replay progress of known replication partners.
5678 */
5680
5681 /*
5682 * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5683 * control file. On recovery, all unlogged relations are blown away, so
5684 * the unlogged LSN counter can be reset too.
5685 */
5689 else
5692
5693 /*
5694 * Copy any missing timeline history files between 'now' and the recovery
5695 * target timeline from archive to pg_wal. While we don't need those files
5696 * ourselves - the history file of the recovery target timeline covers all
5697 * the previous timelines in the history too - a cascading standby server
5698 * might be interested in them. Or, if you archive the WAL from this
5699 * server to a different archive than the primary, it'd be good for all
5700 * the history files to get archived there after failover, so that you can
5701 * use one of the old timelines as a PITR target. Timeline history files
5702 * are small, so it's better to copy them unnecessarily than not copy them
5703 * and regret later.
5704 */
5706
5707 /*
5708 * Before running in recovery, scan pg_twophase and fill in its status to
5709 * be able to work on entries generated by redo. Doing a scan before
5710 * taking any recovery action has the merit to discard any 2PC files that
5711 * are newer than the first record to replay, saving from any conflicts at
5712 * replay. This avoids as well any subsequent scans when doing recovery
5713 * of the on-disk two-phase data.
5714 */
5716
5717 /*
5718 * When starting with crash recovery, reset pgstat data - it might not be
5719 * valid. Otherwise restore pgstat data. It's safe to do this here,
5720 * because postmaster will not yet have started any other processes.
5721 *
5722 * NB: Restoring replication slot stats relies on slot state to have
5723 * already been restored from disk.
5724 *
5725 * TODO: With a bit of extra work we could just start with a pgstat file
5726 * associated with the checkpoint redo location we're starting from.
5727 */
5728 if (didCrash)
5730 else
5732
5734
5737
5738 /* REDO */
5739 if (InRecovery)
5740 {
5741 /* Initialize state for RecoveryInProgress() */
5745 else
5748
5749 /*
5750 * Update pg_control to show that we are recovering and to show the
5751 * selected checkpoint as the place we are starting from. We also mark
5752 * pg_control with any minimum recovery stop point obtained from a
5753 * backup history file.
5754 *
5755 * No need to hold ControlFileLock yet, we aren't up far enough.
5756 */
5758
5759 /*
5760 * If there was a backup label file, it's done its job and the info
5761 * has now been propagated into pg_control. We must get rid of the
5762 * label file so that if we crash during recovery, we'll pick up at
5763 * the latest recovery restartpoint instead of going all the way back
5764 * to the backup start point. It seems prudent though to just rename
5765 * the file out of the way rather than delete it completely.
5766 */
5767 if (haveBackupLabel)
5768 {
5769 unlink(BACKUP_LABEL_OLD);
5771 }
5772
5773 /*
5774 * If there was a tablespace_map file, it's done its job and the
5775 * symlinks have been created. We must get rid of the map file so
5776 * that if we crash during recovery, we don't create symlinks again.
5777 * It seems prudent though to just rename the file out of the way
5778 * rather than delete it completely.
5779 */
5780 if (haveTblspcMap)
5781 {
5782 unlink(TABLESPACE_MAP_OLD);
5784 }
5785
5786 /*
5787 * Initialize our local copy of minRecoveryPoint. When doing crash
5788 * recovery we want to replay up to the end of WAL. Particularly, in
5789 * the case of a promoted standby minRecoveryPoint value in the
5790 * control file is only updated after the first checkpoint. However,
5791 * if the instance crashes before the first post-recovery checkpoint
5792 * is completed then recovery will use a stale location causing the
5793 * startup process to think that there are still invalid page
5794 * references when checking for data consistency.
5795 */
5797 {
5800 }
5801 else
5802 {
5805 }
5806
5807 /* Check that the GUCs used to generate the WAL allow recovery */
5809
5810 /*
5811 * We're in recovery, so unlogged relations may be trashed and must be
5812 * reset. This should be done BEFORE allowing Hot Standby
5813 * connections, so that read-only backends don't try to read whatever
5814 * garbage is left over from before.
5815 */
5817
5818 /*
5819 * Likewise, delete any saved transaction snapshot files that got left
5820 * behind by crashed backends.
5821 */
5823
5824 /*
5825 * Initialize for Hot Standby, if enabled. We won't let backends in
5826 * yet, not until we've reached the min recovery point specified in
5827 * control file and we've established a recovery snapshot from a
5828 * running-xacts WAL record.
5829 */
5831 {
5832 TransactionId *xids;
5833 int nxids;
5834
5836 (errmsg_internal("initializing for hot standby")));
5837
5839
5840 if (wasShutdown)
5841 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5842 else
5843 oldestActiveXID = checkPoint.oldestActiveXid;
5844 Assert(TransactionIdIsValid(oldestActiveXID));
5845
5846 /* Tell procarray about the range of xids it has to deal with */
5848
5849 /*
5850 * Startup subtrans only. CLOG, MultiXact and commit timestamp
5851 * have already been started up and other SLRUs are not maintained
5852 * during recovery and need not be started yet.
5853 */
5854 StartupSUBTRANS(oldestActiveXID);
5855
5856 /*
5857 * If we're beginning at a shutdown checkpoint, we know that
5858 * nothing was running on the primary at this point. So fake-up an
5859 * empty running-xacts record and use that here and now. Recover
5860 * additional standby state for prepared transactions.
5861 */
5862 if (wasShutdown)
5863 {
5865 TransactionId latestCompletedXid;
5866
5867 /* Update pg_subtrans entries for any prepared transactions */
5869
5870 /*
5871 * Construct a RunningTransactions snapshot representing a
5872 * shut down server, with only prepared transactions still
5873 * alive. We're never overflowed at this point because all
5874 * subxids are listed with their parent prepared transactions.
5875 */
5876 running.xcnt = nxids;
5877 running.subxcnt = 0;
5879 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5880 running.oldestRunningXid = oldestActiveXID;
5881 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5882 TransactionIdRetreat(latestCompletedXid);
5883 Assert(TransactionIdIsNormal(latestCompletedXid));
5884 running.latestCompletedXid = latestCompletedXid;
5885 running.xids = xids;
5886
5888 }
5889 }
5890
5891 /*
5892 * We're all set for replaying the WAL now. Do it.
5893 */
5895 performedWalRecovery = true;
5896 }
5897 else
5898 performedWalRecovery = false;
5899
5900 /*
5901 * Finish WAL recovery.
5902 */
5903 endOfRecoveryInfo = FinishWalRecovery();
5904 EndOfLog = endOfRecoveryInfo->endOfLog;
5905 EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5906 abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5907 missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5908
5909 /*
5910 * Reset ps status display, so as no information related to recovery shows
5911 * up.
5912 */
5913 set_ps_display("");
5914
5915 /*
5916 * When recovering from a backup (we are in recovery, and archive recovery
5917 * was requested), complain if we did not roll forward far enough to reach
5918 * the point where the database is consistent. For regular online
5919 * backup-from-primary, that means reaching the end-of-backup WAL record
5920 * (at which point we reset backupStartPoint to be Invalid), for
5921 * backup-from-replica (which can't inject records into the WAL stream),
5922 * that point is when we reach the minRecoveryPoint in pg_control (which
5923 * we purposefully copy last when backing up from a replica). For
5924 * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5925 * or snapshot-style backups (which don't), backupEndRequired will be set
5926 * to false.
5927 *
5928 * Note: it is indeed okay to look at the local variable
5929 * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5930 * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5931 * been advanced beyond the WAL we processed.
5932 */
5933 if (InRecovery &&
5934 (EndOfLog < LocalMinRecoveryPoint ||
5936 {
5937 /*
5938 * Ran off end of WAL before reaching end-of-backup WAL record, or
5939 * minRecoveryPoint. That's a bad sign, indicating that you tried to
5940 * recover from an online backup but never called pg_backup_stop(), or
5941 * you didn't archive all the WAL needed.
5942 */
5944 {
5946 ereport(FATAL,
5947 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5948 errmsg("WAL ends before end of online backup"),
5949 errhint("All WAL generated while online backup was taken must be available at recovery.")));
5950 else
5951 ereport(FATAL,
5952 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5953 errmsg("WAL ends before consistent recovery point")));
5954 }
5955 }
5956
5957 /*
5958 * Reset unlogged relations to the contents of their INIT fork. This is
5959 * done AFTER recovery is complete so as to include any unlogged relations
5960 * created during recovery, but BEFORE recovery is marked as having
5961 * completed successfully. Otherwise we'd not retry if any of the post
5962 * end-of-recovery steps fail.
5963 */
5964 if (InRecovery)
5966
5967 /*
5968 * Pre-scan prepared transactions to find out the range of XIDs present.
5969 * This information is not quite needed yet, but it is positioned here so
5970 * as potential problems are detected before any on-disk change is done.
5971 */
5972 oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5973
5974 /*
5975 * Allow ordinary WAL segment creation before possibly switching to a new
5976 * timeline, which creates a new segment, and after the last ReadRecord().
5977 */
5979
5980 /*
5981 * Consider whether we need to assign a new timeline ID.
5982 *
5983 * If we did archive recovery, we always assign a new ID. This handles a
5984 * couple of issues. If we stopped short of the end of WAL during
5985 * recovery, then we are clearly generating a new timeline and must assign
5986 * it a unique new ID. Even if we ran to the end, modifying the current
5987 * last segment is problematic because it may result in trying to
5988 * overwrite an already-archived copy of that segment, and we encourage
5989 * DBAs to make their archive_commands reject that. We can dodge the
5990 * problem by making the new active segment have a new timeline ID.
5991 *
5992 * In a normal crash recovery, we can just extend the timeline we were in.
5993 */
5994 newTLI = endOfRecoveryInfo->lastRecTLI;
5996 {
5998 ereport(LOG,
5999 (errmsg("selected new timeline ID: %u", newTLI)));
6000
6001 /*
6002 * Make a writable copy of the last WAL segment. (Note that we also
6003 * have a copy of the last block of the old WAL in
6004 * endOfRecovery->lastPage; we will use that below.)
6005 */
6006 XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
6007
6008 /*
6009 * Remove the signal files out of the way, so that we don't
6010 * accidentally re-enter archive recovery mode in a subsequent crash.
6011 */
6012 if (endOfRecoveryInfo->standby_signal_file_found)
6014
6015 if (endOfRecoveryInfo->recovery_signal_file_found)
6017
6018 /*
6019 * Write the timeline history file, and have it archived. After this
6020 * point (or rather, as soon as the file is archived), the timeline
6021 * will appear as "taken" in the WAL archive and to any standby
6022 * servers. If we crash before actually switching to the new
6023 * timeline, standby servers will nevertheless think that we switched
6024 * to the new timeline, and will try to connect to the new timeline.
6025 * To minimize the window for that, try to do as little as possible
6026 * between here and writing the end-of-recovery record.
6027 */
6029 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
6030
6031 ereport(LOG,
6032 (errmsg("archive recovery complete")));
6033 }
6034
6035 /* Save the selected TimeLineID in shared memory, too */
6037 XLogCtl->InsertTimeLineID = newTLI;
6038 XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
6040
6041 /*
6042 * Actually, if WAL ended in an incomplete record, skip the parts that
6043 * made it through and start writing after the portion that persisted.
6044 * (It's critical to first write an OVERWRITE_CONTRECORD message, which
6045 * we'll do as soon as we're open for writing new WAL.)
6046 */
6048 {
6049 /*
6050 * We should only have a missingContrecPtr if we're not switching to a
6051 * new timeline. When a timeline switch occurs, WAL is copied from the
6052 * old timeline to the new only up to the end of the last complete
6053 * record, so there can't be an incomplete WAL record that we need to
6054 * disregard.
6055 */
6056 Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6058 EndOfLog = missingContrecPtr;
6059 }
6060
6061 /*
6062 * Prepare to write WAL starting at EndOfLog location, and init xlog
6063 * buffer cache using the block containing the last record from the
6064 * previous incarnation.
6065 */
6066 Insert = &XLogCtl->Insert;
6067 Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
6068 Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6069
6070 /*
6071 * Tricky point here: lastPage contains the *last* block that the LastRec
6072 * record spans, not the one it starts in. The last block is indeed the
6073 * one we want to use.
6074 */
6075 if (EndOfLog % XLOG_BLCKSZ != 0)
6076 {
6077 char *page;
6078 int len;
6079 int firstIdx;
6080
6081 firstIdx = XLogRecPtrToBufIdx(EndOfLog);
6082 len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6083 Assert(len < XLOG_BLCKSZ);
6084
6085 /* Copy the valid part of the last block, and zero the rest */
6086 page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6087 memcpy(page, endOfRecoveryInfo->lastPage, len);
6088 memset(page + len, 0, XLOG_BLCKSZ - len);
6089
6090 pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6091 XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
6092 }
6093 else
6094 {
6095 /*
6096 * There is no partial block to copy. Just set InitializedUpTo, and
6097 * let the first attempt to insert a log record to initialize the next
6098 * buffer.
6099 */
6100 XLogCtl->InitializedUpTo = EndOfLog;
6101 }
6102
6103 /*
6104 * Update local and shared status. This is OK to do without any locks
6105 * because no other process can be reading or writing WAL yet.
6106 */
6107 LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6111 XLogCtl->LogwrtRqst.Write = EndOfLog;
6112 XLogCtl->LogwrtRqst.Flush = EndOfLog;
6113
6114 /*
6115 * Preallocate additional log files, if wanted.
6116 */
6117 PreallocXlogFiles(EndOfLog, newTLI);
6118
6119 /*
6120 * Okay, we're officially UP.
6121 */
6122 InRecovery = false;
6123
6124 /* start the archive_timeout timer and LSN running */
6125 XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6126 XLogCtl->lastSegSwitchLSN = EndOfLog;
6127
6128 /* also initialize latestCompletedXid, to nextXid - 1 */
6129 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6132 LWLockRelease(ProcArrayLock);
6133
6134 /*
6135 * Start up subtrans, if not already done for hot standby. (commit
6136 * timestamps are started below, if necessary.)
6137 */
6139 StartupSUBTRANS(oldestActiveXID);
6140
6141 /*
6142 * Perform end of recovery actions for any SLRUs that need it.
6143 */
6144 TrimCLOG();
6145 TrimMultiXact();
6146
6147 /*
6148 * Reload shared-memory state for prepared transactions. This needs to
6149 * happen before renaming the last partial segment of the old timeline as
6150 * it may be possible that we have to recover some transactions from it.
6151 */
6153
6154 /* Shut down xlogreader */
6156
6157 /* Enable WAL writes for this backend only. */
6159
6160 /* If necessary, write overwrite-contrecord before doing anything else */
6162 {
6165 }
6166
6167 /*
6168 * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6169 * record before resource manager writes cleanup WAL records or checkpoint
6170 * record is written.
6171 */
6172 Insert->fullPageWrites = lastFullPageWrites;
6174
6175 /*
6176 * Emit checkpoint or end-of-recovery record in XLOG, if required.
6177 */
6178 if (performedWalRecovery)
6179 promoted = PerformRecoveryXLogAction();
6180
6181 /*
6182 * If any of the critical GUCs have changed, log them before we allow
6183 * backends to write WAL.
6184 */
6186
6187 /* If this is archive recovery, perform post-recovery cleanup actions. */
6189 CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6190
6191 /*
6192 * Local WAL inserts enabled, so it's time to finish initialization of
6193 * commit timestamp.
6194 */
6196
6197 /* Clean up EndOfWalRecoveryInfo data to appease Valgrind leak checking */
6198 if (endOfRecoveryInfo->lastPage)
6199 pfree(endOfRecoveryInfo->lastPage);
6200 pfree(endOfRecoveryInfo->recoveryStopReason);
6201 pfree(endOfRecoveryInfo);
6202
6203 /*
6204 * All done with end-of-recovery actions.
6205 *
6206 * Now allow backends to write WAL and update the control file status in
6207 * consequence. SharedRecoveryState, that controls if backends can write
6208 * WAL, is updated while holding ControlFileLock to prevent other backends
6209 * to look at an inconsistent state of the control file in shared memory.
6210 * There is still a small window during which backends can write WAL and
6211 * the control file is still referring to a system not in DB_IN_PRODUCTION
6212 * state while looking at the on-disk control file.
6213 *
6214 * Also, we use info_lck to update SharedRecoveryState to ensure that
6215 * there are no race conditions concerning visibility of other recent
6216 * updates to shared memory.
6217 */
6218 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6220
6224
6226 LWLockRelease(ControlFileLock);
6227
6228 /*
6229 * Shutdown the recovery environment. This must occur after
6230 * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6231 * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6232 * any session building a snapshot will not rely on KnownAssignedXids as
6233 * RecoveryInProgress() would return false at this stage. This is
6234 * particularly critical for prepared 2PC transactions, that would still
6235 * need to be included in snapshots once recovery has ended.
6236 */
6239
6240 /*
6241 * If there were cascading standby servers connected to us, nudge any wal
6242 * sender processes to notice that we've been promoted.
6243 */
6244 WalSndWakeup(true, true);
6245
6246 /*
6247 * If this was a promotion, request an (online) checkpoint now. This isn't
6248 * required for consistency, but the last restartpoint might be far back,
6249 * and in case of a crash, recovering from it might take a longer than is
6250 * appropriate now that we're not in standby mode anymore.
6251 */
6252 if (promoted)
6254}
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:492
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:658
void StartupCLOG(void)
Definition: clog.c:842
void TrimCLOG(void)
Definition: clog.c:857
void StartupCommitTs(void)
Definition: commit_ts.c:608
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:618
void SyncDataDirectory(void)
Definition: fd.c:3606
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:476
void TrimMultiXact(void)
Definition: multixact.c:2108
void StartupMultiXact(void)
Definition: multixact.c:2083
void StartupReplicationOrigin(void)
Definition: origin.c:722
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:95
const void size_t len
void pgstat_restore_stats(void)
Definition: pgstat.c:505
void pgstat_discard_stats(void)
Definition: pgstat.c:517
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6900
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:2187
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1585
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:95
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:161
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:121
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:130
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:131
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:119
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
FullTransactionId latestCompletedXid
Definition: transam.h:238
pg_atomic_uint64 logInsertResult
Definition: xlog.c:471
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:283
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2083
void restoreTwoPhaseData(void)
Definition: twophase.c:1904
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1966
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2045
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3779
void UpdateFullPageWrites(void)
Definition: xlog.c:8216
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4115
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7489
static void XLogReportParameters(void)
Definition: xlog.c:8153
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6336
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5329
static bool lastFullPageWrites
Definition: xlog.c:218
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5254
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5425
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3848
static char * str_time(pg_time_t tnow, char *buf, size_t bufsize)
Definition: xlog.c:5241
#define TABLESPACE_MAP_OLD
Definition: xlog.h:307
#define TABLESPACE_MAP
Definition: xlog.h:306
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:302
#define BACKUP_LABEL_OLD
Definition: xlog.h:304
#define BACKUP_LABEL_FILE
Definition: xlog.h:303
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:301
@ RECOVERY_STATE_CRASH
Definition: xlog.h:91
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:92
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
Definition: xlogrecovery.c:140
void PerformWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:380
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:379
EndOfWalRecoveryInfo * FinishWalRecovery(void)
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:519
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:124
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert(), AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pfree(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow,
char *  buf,
size_t  bufsize 
)
static

Definition at line 5241 of file xlog.c.

5242{
5244 "%Y-%m-%d %H:%M:%S %Z",
5245 pg_localtime(&tnow, log_timezone));
5246
5247 return buf;
5248}
#define bufsize
Definition: indent_globs.h:36
static char * buf
Definition: pg_test_fsync.c:72
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1344
PGDLLIMPORT pg_tz * log_timezone
Definition: pgtz.c:31

References buf, bufsize, log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6261 of file xlog.c.

6262{
6263 /* initialize minRecoveryPoint to this record */
6264 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6266 if (ControlFile->minRecoveryPoint < EndRecPtr)
6267 {
6268 ControlFile->minRecoveryPoint = EndRecPtr;
6269 ControlFile->minRecoveryPointTLI = replayTLI;
6270 }
6271 /* update local copy */
6274
6275 /*
6276 * The startup process can update its local copy of minRecoveryPoint from
6277 * this point.
6278 */
6280
6282
6283 /*
6284 * We update SharedRecoveryState while holding the lock on ControlFileLock
6285 * so both states are consistent in shared memory.
6286 */
6290
6291 LWLockRelease(ControlFileLock);
6292}
static bool updateMinRecoveryPoint
Definition: xlog.c:648

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 6876 of file xlog.c.

6877{
6878 /*
6879 * The status is reported only for end-of-recovery and shutdown
6880 * checkpoints or shutdown restartpoints. Updating the ps display is
6881 * useful in those situations as it may not be possible to rely on
6882 * pg_stat_activity to see the status of the checkpointer or the startup
6883 * process.
6884 */
6886 return;
6887
6888 if (reset)
6889 set_ps_display("");
6890 else
6891 {
6892 char activitymsg[128];
6893
6894 snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
6895 (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
6896 (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
6897 restartpoint ? "restartpoint" : "checkpoint");
6898 set_ps_display(activitymsg);
6899 }
6900}

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6838 of file xlog.c.

6839{
6840 /*
6841 * To estimate the number of segments consumed between checkpoints, keep a
6842 * moving average of the amount of WAL generated in previous checkpoint
6843 * cycles. However, if the load is bursty, with quiet periods and busy
6844 * periods, we want to cater for the peak load. So instead of a plain
6845 * moving average, let the average decline slowly if the previous cycle
6846 * used less WAL than estimated, but bump it up immediately if it used
6847 * more.
6848 *
6849 * When checkpoints are triggered by max_wal_size, this should converge to
6850 * CheckpointSegments * wal_segment_size,
6851 *
6852 * Note: This doesn't pay any attention to what caused the checkpoint.
6853 * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6854 * starting a base backup, are counted the same as those created
6855 * automatically. The slow-decline will largely mask them out, if they are
6856 * not frequent. If they are frequent, it seems reasonable to count them
6857 * in as any others; if you issue a manual checkpoint every 5 minutes and
6858 * never let a timed checkpoint happen, it makes sense to base the
6859 * preallocation on that 5 minute interval rather than whatever
6860 * checkpoint_timeout is set to.
6861 */
6862 PrevCheckPointDistance = nbytes;
6863 if (CheckPointDistanceEstimate < nbytes)
6865 else
6867 (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
6868}

References CheckPointDistanceEstimate, and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4586 of file xlog.c.

4587{
4589}
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8216 of file xlog.c.

8217{
8219 bool recoveryInProgress;
8220
8221 /*
8222 * Do nothing if full_page_writes has not been changed.
8223 *
8224 * It's safe to check the shared full_page_writes without the lock,
8225 * because we assume that there is no concurrently running process which
8226 * can update it.
8227 */
8228 if (fullPageWrites == Insert->fullPageWrites)
8229 return;
8230
8231 /*
8232 * Perform this outside critical section so that the WAL insert
8233 * initialization done by RecoveryInProgress() doesn't trigger an
8234 * assertion failure.
8235 */
8236 recoveryInProgress = RecoveryInProgress();
8237
8239
8240 /*
8241 * It's always safe to take full page images, even when not strictly
8242 * required, but not the other round. So if we're setting full_page_writes
8243 * to true, first set it true and then write the WAL record. If we're
8244 * setting it to false, first write the WAL record and then set the global
8245 * flag.
8246 */
8247 if (fullPageWrites)
8248 {
8250 Insert->fullPageWrites = true;
8252 }
8253
8254 /*
8255 * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8256 * full_page_writes during archive recovery, if required.
8257 */
8258 if (XLogStandbyInfoActive() && !recoveryInProgress)
8259 {
8261 XLogRegisterData(&fullPageWrites, sizeof(bool));
8262
8263 XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8264 }
8265
8266 if (!fullPageWrites)
8267 {
8269 Insert->fullPageWrites = false;
8271 }
8273}
#define XLOG_FPW_CHANGE
Definition: pg_control.h:76

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char *  filename)
static

Definition at line 3828 of file xlog.c.

3829{
3830 uint32 tli;
3831 XLogSegNo segno;
3832
3834
3836 if (segno > XLogCtl->lastRemovedSegNo)
3837 XLogCtl->lastRemovedSegNo = segno;
3839}
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2700 of file xlog.c.

2701{
2702 /* Quick check using our local copy of the variable */
2703 if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2704 return;
2705
2706 /*
2707 * An invalid minRecoveryPoint means that we need to recover all the WAL,
2708 * i.e., we're doing crash recovery. We never modify the control file's
2709 * value in that case, so we can short-circuit future checks here too. The
2710 * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2711 * updated until crash recovery finishes. We only do this for the startup
2712 * process as it should not update its own reference of minRecoveryPoint
2713 * until it has finished crash recovery to make sure that all WAL
2714 * available is replayed in this case. This also saves from extra locks
2715 * taken on the control file from the startup process.
2716 */
2718 {
2719 updateMinRecoveryPoint = false;
2720 return;
2721 }
2722
2723 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
2724
2725 /* update local copy */
2728
2730 updateMinRecoveryPoint = false;
2731 else if (force || LocalMinRecoveryPoint < lsn)
2732 {
2733 XLogRecPtr newMinRecoveryPoint;
2734 TimeLineID newMinRecoveryPointTLI;
2735
2736 /*
2737 * To avoid having to update the control file too often, we update it
2738 * all the way to the last record being replayed, even though 'lsn'
2739 * would suffice for correctness. This also allows the 'force' case
2740 * to not need a valid 'lsn' value.
2741 *
2742 * Another important reason for doing it this way is that the passed
2743 * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2744 * the caller got it from a corrupted heap page. Accepting such a
2745 * value as the min recovery point would prevent us from coming up at
2746 * all. Instead, we just log a warning and continue with recovery.
2747 * (See also the comments about corrupt LSNs in XLogFlush.)
2748 */
2749 newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
2750 if (!force && newMinRecoveryPoint < lsn)
2751 elog(WARNING,
2752 "xlog min recovery request %X/%08X is past current point %X/%08X",
2753 LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
2754
2755 /* update control file */
2756 if (ControlFile->minRecoveryPoint < newMinRecoveryPoint)
2757 {
2758 ControlFile->minRecoveryPoint = newMinRecoveryPoint;
2759 ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
2761 LocalMinRecoveryPoint = newMinRecoveryPoint;
2762 LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
2763
2765 errmsg_internal("updated min recovery point to %X/%08X on timeline %u",
2766 LSN_FORMAT_ARGS(newMinRecoveryPoint),
2767 newMinRecoveryPointTLI));
2768 }
2769 }
2770 LWLockRelease(ControlFileLock);
2771}
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsInvalid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4115 of file xlog.c.

4116{
4117 char path[MAXPGPATH];
4118 struct stat stat_buf;
4119
4120 /* Check for pg_wal; if it doesn't exist, error out */
4121 if (stat(XLOGDIR, &stat_buf) != 0 ||
4122 !S_ISDIR(stat_buf.st_mode))
4123 ereport(FATAL,
4125 errmsg("required WAL directory \"%s\" does not exist",
4126 XLOGDIR)));
4127
4128 /* Check for archive_status */
4129 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4130 if (stat(path, &stat_buf) == 0)
4131 {
4132 /* Check for weird cases where it exists but isn't a directory */
4133 if (!S_ISDIR(stat_buf.st_mode))
4134 ereport(FATAL,
4136 errmsg("required WAL directory \"%s\" does not exist",
4137 path)));
4138 }
4139 else
4140 {
4141 ereport(LOG,
4142 (errmsg("creating missing WAL directory \"%s\"", path)));
4143 if (MakePGDirectory(path) < 0)
4144 ereport(FATAL,
4146 errmsg("could not create missing directory \"%s\": %m",
4147 path)));
4148 }
4149
4150 /* Check for summaries */
4151 snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4152 if (stat(path, &stat_buf) == 0)
4153 {
4154 /* Check for weird cases where it exists but isn't a directory */
4155 if (!S_ISDIR(stat_buf.st_mode))
4156 ereport(FATAL,
4157 (errmsg("required WAL directory \"%s\" does not exist",
4158 path)));
4159 }
4160 else
4161 {
4162 ereport(LOG,
4163 (errmsg("creating missing WAL directory \"%s\"", path)));
4164 if (MakePGDirectory(path) < 0)
4165 ereport(FATAL,
4166 (errmsg("could not create missing directory \"%s\": %m",
4167 path)));
4168 }
4169}
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3975
#define S_ISDIR(m)
Definition: win32_port.h:315

References ereport, errcode_for_file_access(), errmsg(), FATAL, LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat::st_mode, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1507 of file xlog.c.

1508{
1509 uint64 bytepos;
1510 XLogRecPtr inserted;
1511 XLogRecPtr reservedUpto;
1512 XLogRecPtr finishedUpto;
1514 int i;
1515
1516 if (MyProc == NULL)
1517 elog(PANIC, "cannot wait without a PGPROC structure");
1518
1519 /*
1520 * Check if there's any work to do. Use a barrier to ensure we get the
1521 * freshest value.
1522 */
1524 if (upto <= inserted)
1525 return inserted;
1526
1527 /* Read the current insert position */
1528 SpinLockAcquire(&Insert->insertpos_lck);
1529 bytepos = Insert->CurrBytePos;
1530 SpinLockRelease(&Insert->insertpos_lck);
1531 reservedUpto = XLogBytePosToEndRecPtr(bytepos);
1532
1533 /*
1534 * No-one should request to flush a piece of WAL that hasn't even been
1535 * reserved yet. However, it can happen if there is a block with a bogus
1536 * LSN on disk, for example. XLogFlush checks for that situation and
1537 * complains, but only after the flush. Here we just assume that to mean
1538 * that all WAL that has been reserved needs to be finished. In this
1539 * corner-case, the return value can be smaller than 'upto' argument.
1540 */
1541 if (upto > reservedUpto)
1542 {
1543 ereport(LOG,
1544 errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X",
1545 LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto)));
1546 upto = reservedUpto;
1547 }
1548
1549 /*
1550 * Loop through all the locks, sleeping on any in-progress insert older
1551 * than 'upto'.
1552 *
1553 * finishedUpto is our return value, indicating the point upto which all
1554 * the WAL insertions have been finished. Initialize it to the head of
1555 * reserved WAL, and as we iterate through the insertion locks, back it
1556 * out for any insertion that's still in progress.
1557 */
1558 finishedUpto = reservedUpto;
1559 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1560 {
1561 XLogRecPtr insertingat = InvalidXLogRecPtr;
1562
1563 do
1564 {
1565 /*
1566 * See if this insertion is in progress. LWLockWaitForVar will
1567 * wait for the lock to be released, or for the 'value' to be set
1568 * by a LWLockUpdateVar call. When a lock is initially acquired,
1569 * its value is 0 (InvalidXLogRecPtr), which means that we don't
1570 * know where it's inserting yet. We will have to wait for it. If
1571 * it's a small insertion, the record will most likely fit on the
1572 * same page and the inserter will release the lock without ever
1573 * calling LWLockUpdateVar. But if it has to sleep, it will
1574 * advertise the insertion point with LWLockUpdateVar before
1575 * sleeping.
1576 *
1577 * In this loop we are only waiting for insertions that started
1578 * before WaitXLogInsertionsToFinish was called. The lack of
1579 * memory barriers in the loop means that we might see locks as
1580 * "unused" that have since become used. This is fine because
1581 * they only can be used for later insertions that we would not
1582 * want to wait on anyway. Not taking a lock to acquire the
1583 * current insertingAt value means that we might see older
1584 * insertingAt values. This is also fine, because if we read a
1585 * value too old, we will add ourselves to the wait queue, which
1586 * contains atomic operations.
1587 */
1588 if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1590 insertingat, &insertingat))
1591 {
1592 /* the lock was free, so no insertion in progress */
1593 insertingat = InvalidXLogRecPtr;
1594 break;
1595 }
1596
1597 /*
1598 * This insertion is still in progress. Have to wait, unless the
1599 * inserter has proceeded past 'upto'.
1600 */
1601 } while (insertingat < upto);
1602
1603 if (insertingat != InvalidXLogRecPtr && insertingat < finishedUpto)
1604 finishedUpto = insertingat;
1605 }
1606
1607 /*
1608 * Advance the limit we know to have been inserted and return the freshest
1609 * value we know of, which might be beyond what we requested if somebody
1610 * is concurrently doing this with an 'upto' pointer ahead of us.
1611 */
1613 finishedUpto);
1614
1615 return finishedUpto;
1616}
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition: atomics.h:583
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1590
PGPROC * MyProc
Definition: proc.c:66
pg_atomic_uint64 insertingAt
Definition: xlog.c:371

References elog, ereport, errmsg(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), and XLogCtl.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1374 of file xlog.c.

1375{
1376 bool immed;
1377
1378 /*
1379 * It doesn't matter which of the WAL insertion locks we acquire, so try
1380 * the one we used last time. If the system isn't particularly busy, it's
1381 * a good bet that it's still available, and it's good to have some
1382 * affinity to a particular lock so that you don't unnecessarily bounce
1383 * cache lines between processes when there's no contention.
1384 *
1385 * If this is the first time through in this backend, pick a lock
1386 * (semi-)randomly. This allows the locks to be used evenly if you have a
1387 * lot of very short connections.
1388 */
1389 static int lockToTry = -1;
1390
1391 if (lockToTry == -1)
1392 lockToTry = MyProcNumber % NUM_XLOGINSERT_LOCKS;
1393 MyLockNo = lockToTry;
1394
1395 /*
1396 * The insertingAt value is initially set to 0, as we don't know our
1397 * insert location yet.
1398 */
1400 if (!immed)
1401 {
1402 /*
1403 * If we couldn't get the lock immediately, try another lock next
1404 * time. On a system with more insertion locks than concurrent
1405 * inserters, this causes all the inserters to eventually migrate to a
1406 * lock that no-one else is using. On a system with more inserters
1407 * than locks, it still helps to distribute the inserters evenly
1408 * across the locks.
1409 */
1410 lockToTry = (lockToTry + 1) % NUM_XLOGINSERT_LOCKS;
1411 }
1412}
ProcNumber MyProcNumber
Definition: globals.c:90
static int MyLockNo
Definition: xlog.c:651

References LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1419 of file xlog.c.

1420{
1421 int i;
1422
1423 /*
1424 * When holding all the locks, all but the last lock's insertingAt
1425 * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1426 * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1427 */
1428 for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1429 {
1434 }
1435 /* Variable value reset to 0 at release */
1437
1438 holdingAllLocks = true;
1439}
#define PG_UINT64_MAX
Definition: c.h:599
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1726
static bool holdingAllLocks
Definition: xlog.c:652

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1448 of file xlog.c.

1449{
1450 if (holdingAllLocks)
1451 {
1452 int i;
1453
1454 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1457 0);
1458
1459 holdingAllLocks = false;
1460 }
1461 else
1462 {
1465 0);
1466 }
1467}
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1923

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1474 of file xlog.c.

1475{
1476 if (holdingAllLocks)
1477 {
1478 /*
1479 * We use the last lock to mark our actual position, see comments in
1480 * WALInsertLockAcquireExclusive.
1481 */
1484 insertingAt);
1485 }
1486 else
1489 insertingAt);
1490}

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1751 of file xlog.c.

1753{
1754 char *pdst = dstbuf;
1755 XLogRecPtr recptr = startptr;
1756 XLogRecPtr inserted;
1757 Size nbytes = count;
1758
1760 return 0;
1761
1762 Assert(!XLogRecPtrIsInvalid(startptr));
1763
1764 /*
1765 * Caller should ensure that the requested data has been inserted into WAL
1766 * buffers before we try to read it.
1767 */
1769 if (startptr + count > inserted)
1770 ereport(ERROR,
1771 errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X",
1772 LSN_FORMAT_ARGS(startptr + count),
1773 LSN_FORMAT_ARGS(inserted)));
1774
1775 /*
1776 * Loop through the buffers without a lock. For each buffer, atomically
1777 * read and verify the end pointer, then copy the data out, and finally
1778 * re-read and re-verify the end pointer.
1779 *
1780 * Once a page is evicted, it never returns to the WAL buffers, so if the
1781 * end pointer matches the expected end pointer before and after we copy
1782 * the data, then the right page must have been present during the data
1783 * copy. Read barriers are necessary to ensure that the data copy actually
1784 * happens between the two verification steps.
1785 *
1786 * If either verification fails, we simply terminate the loop and return
1787 * with the data that had been already copied out successfully.
1788 */
1789 while (nbytes > 0)
1790 {
1791 uint32 offset = recptr % XLOG_BLCKSZ;
1792 int idx = XLogRecPtrToBufIdx(recptr);
1793 XLogRecPtr expectedEndPtr;
1794 XLogRecPtr endptr;
1795 const char *page;
1796 const char *psrc;
1797 Size npagebytes;
1798
1799 /*
1800 * Calculate the end pointer we expect in the xlblocks array if the
1801 * correct page is present.
1802 */
1803 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1804
1805 /*
1806 * First verification step: check that the correct page is present in
1807 * the WAL buffers.
1808 */
1810 if (expectedEndPtr != endptr)
1811 break;
1812
1813 /*
1814 * The correct page is present (or was at the time the endptr was
1815 * read; must re-verify later). Calculate pointer to source data and
1816 * determine how much data to read from this page.
1817 */
1818 page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1819 psrc = page + offset;
1820 npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1821
1822 /*
1823 * Ensure that the data copy and the first verification step are not
1824 * reordered.
1825 */
1827
1828 /* data copy */
1829 memcpy(pdst, psrc, npagebytes);
1830
1831 /*
1832 * Ensure that the data copy and the second verification step are not
1833 * reordered.
1834 */
1836
1837 /*
1838 * Second verification step: check that the page we read from wasn't
1839 * evicted while we were copying the data.
1840 */
1842 if (expectedEndPtr != endptr)
1843 break;
1844
1845 pdst += npagebytes;
1846 recptr += npagebytes;
1847 nbytes -= npagebytes;
1848 }
1849
1850 Assert(pdst - dstbuf <= count);
1851
1852 return pdst - dstbuf;
1853}
#define pg_read_barrier()
Definition: atomics.h:154
#define Min(x, y)
Definition: c.h:1004
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6572

References Assert(), ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4255 of file xlog.c.

4256{
4257 int fd;
4258 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4259
4260 /*
4261 * Initialize version and compatibility-check fields
4262 */
4265
4266 ControlFile->maxAlign = MAXIMUM_ALIGNOF;
4268
4269 ControlFile->blcksz = BLCKSZ;
4270 ControlFile->relseg_size = RELSEG_SIZE;
4271 ControlFile->xlog_blcksz = XLOG_BLCKSZ;
4273
4276
4279
4280 ControlFile->float8ByVal = true; /* vestigial */
4281
4282 /*
4283 * Initialize the default 'char' signedness.
4284 *
4285 * The signedness of the char type is implementation-defined. For instance
4286 * on x86 architecture CPUs, the char data type is typically treated as
4287 * signed by default, whereas on aarch architecture CPUs, it is typically
4288 * treated as unsigned by default. In v17 or earlier, we accidentally let
4289 * C implementation signedness affect persistent data. This led to
4290 * inconsistent results when comparing char data across different
4291 * platforms.
4292 *
4293 * This flag can be used as a hint to ensure consistent behavior for
4294 * pre-v18 data files that store data sorted by the 'char' type on disk,
4295 * especially in cross-platform replication scenarios.
4296 *
4297 * Newly created database clusters unconditionally set the default char
4298 * signedness to true. pg_upgrade changes this flag for clusters that were
4299 * initialized on signedness=false platforms. As a result,
4300 * signedness=false setting will become rare over time. If we had known
4301 * about this problem during the last development cycle that forced initdb
4302 * (v8.3), we would have made all clusters signed or all clusters
4303 * unsigned. Making pg_upgrade the only source of signedness=false will
4304 * cause the population of database clusters to converge toward that
4305 * retrospective ideal.
4306 */
4308
4309 /* Contents are protected with a CRC */
4313 offsetof(ControlFileData, crc));
4315
4316 /*
4317 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4318 * the excess over sizeof(ControlFileData). This reduces the odds of
4319 * premature-EOF errors when reading pg_control. We'll still fail when we
4320 * check the contents of the file, but hopefully with a more specific
4321 * error than "couldn't read pg_control".
4322 */
4323 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4324 memcpy(buffer, ControlFile, sizeof(ControlFileData));
4325
4327 O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
4328 if (fd < 0)
4329 ereport(PANIC,
4331 errmsg("could not create file \"%s\": %m",
4333
4334 errno = 0;
4335 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
4337 {
4338 /* if write didn't set errno, assume problem is no disk space */
4339 if (errno == 0)
4340 errno = ENOSPC;
4341 ereport(PANIC,
4343 errmsg("could not write to file \"%s\": %m",
4345 }
4347
4348 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
4349 if (pg_fsync(fd) != 0)
4350 ereport(PANIC,
4352 errmsg("could not fsync file \"%s\": %m",
4355
4356 if (close(fd) != 0)
4357 ereport(PANIC,
4359 errmsg("could not close file \"%s\": %m",
4361}
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:256

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ControlFileData::default_char_signedness, ereport, errcode_for_file_access(), errmsg(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8285 of file xlog.c.

8286{
8287 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8288 XLogRecPtr lsn = record->EndRecPtr;
8289
8290 /*
8291 * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8292 * XLOG_FPI_FOR_HINT records.
8293 */
8294 Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8295 !XLogRecHasAnyBlockRefs(record));
8296
8297 if (info == XLOG_NEXTOID)
8298 {
8299 Oid nextOid;
8300
8301 /*
8302 * We used to try to take the maximum of TransamVariables->nextOid and
8303 * the recorded nextOid, but that fails if the OID counter wraps
8304 * around. Since no OID allocation should be happening during replay
8305 * anyway, better to just believe the record exactly. We still take
8306 * OidGenLock while setting the variable, just in case.
8307 */
8308 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8309 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8310 TransamVariables->nextOid = nextOid;
8312 LWLockRelease(OidGenLock);
8313 }
8314 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8315 {
8316 CheckPoint checkPoint;
8317 TimeLineID replayTLI;
8318
8319 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8320 /* In a SHUTDOWN checkpoint, believe the counters exactly */
8321 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8322 TransamVariables->nextXid = checkPoint.nextXid;
8323 LWLockRelease(XidGenLock);
8324 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8325 TransamVariables->nextOid = checkPoint.nextOid;
8327 LWLockRelease(OidGenLock);
8329 checkPoint.nextMultiOffset);
8330
8332 checkPoint.oldestMultiDB);
8333
8334 /*
8335 * No need to set oldestClogXid here as well; it'll be set when we
8336 * redo an xl_clog_truncate if it changed since initialization.
8337 */
8338 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8339
8340 /*
8341 * If we see a shutdown checkpoint while waiting for an end-of-backup
8342 * record, the backup was canceled and the end-of-backup record will
8343 * never arrive.
8344 */
8348 ereport(PANIC,
8349 (errmsg("online backup was canceled, recovery cannot continue")));
8350
8351 /*
8352 * If we see a shutdown checkpoint, we know that nothing was running
8353 * on the primary at this point. So fake-up an empty running-xacts
8354 * record and use that here and now. Recover additional standby state
8355 * for prepared transactions.
8356 */
8358 {
8359 TransactionId *xids;
8360 int nxids;
8361 TransactionId oldestActiveXID;
8362 TransactionId latestCompletedXid;
8364
8365 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8366
8367 /* Update pg_subtrans entries for any prepared transactions */
8369
8370 /*
8371 * Construct a RunningTransactions snapshot representing a shut
8372 * down server, with only prepared transactions still alive. We're
8373 * never overflowed at this point because all subxids are listed
8374 * with their parent prepared transactions.
8375 */
8376 running.xcnt = nxids;
8377 running.subxcnt = 0;
8379 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8380 running.oldestRunningXid = oldestActiveXID;
8381 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8382 TransactionIdRetreat(latestCompletedXid);
8383 Assert(TransactionIdIsNormal(latestCompletedXid));
8384 running.latestCompletedXid = latestCompletedXid;
8385 running.xids = xids;
8386
8388 }
8389
8390 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8391 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8393 LWLockRelease(ControlFileLock);
8394
8395 /*
8396 * We should've already switched to the new TLI before replaying this
8397 * record.
8398 */
8399 (void) GetCurrentReplayRecPtr(&replayTLI);
8400 if (checkPoint.ThisTimeLineID != replayTLI)
8401 ereport(PANIC,
8402 (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8403 checkPoint.ThisTimeLineID, replayTLI)));
8404
8405 RecoveryRestartPoint(&checkPoint, record);
8406
8407 /*
8408 * After replaying a checkpoint record, free all smgr objects.
8409 * Otherwise we would never do so for dropped relations, as the
8410 * startup does not process shared invalidation messages or call
8411 * AtEOXact_SMgr().
8412 */
8414 }
8415 else if (info == XLOG_CHECKPOINT_ONLINE)
8416 {
8417 CheckPoint checkPoint;
8418 TimeLineID replayTLI;
8419
8420 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8421 /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8422 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8424 checkPoint.nextXid))
8425 TransamVariables->nextXid = checkPoint.nextXid;
8426 LWLockRelease(XidGenLock);
8427
8428 /*
8429 * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8430 * to track OID assignment through XLOG_NEXTOID records. The nextOid
8431 * counter is from the start of the checkpoint and might well be stale
8432 * compared to later XLOG_NEXTOID records. We could try to take the
8433 * maximum of the nextOid counter and our latest value, but since
8434 * there's no particular guarantee about the speed with which the OID
8435 * counter wraps around, that's a risky thing to do. In any case,
8436 * users of the nextOid counter are required to avoid assignment of
8437 * duplicates, so that a somewhat out-of-date value should be safe.
8438 */
8439
8440 /* Handle multixact */
8442 checkPoint.nextMultiOffset);
8443
8444 /*
8445 * NB: This may perform multixact truncation when replaying WAL
8446 * generated by an older primary.
8447 */
8449 checkPoint.oldestMultiDB);
8451 checkPoint.oldestXid))
8453 checkPoint.oldestXidDB);
8454 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8455 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8457 LWLockRelease(ControlFileLock);
8458
8459 /* TLI should not change in an on-line checkpoint */
8460 (void) GetCurrentReplayRecPtr(&replayTLI);
8461 if (checkPoint.ThisTimeLineID != replayTLI)
8462 ereport(PANIC,
8463 (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8464 checkPoint.ThisTimeLineID, replayTLI)));
8465
8466 RecoveryRestartPoint(&checkPoint, record);
8467
8468 /*
8469 * After replaying a checkpoint record, free all smgr objects.
8470 * Otherwise we would never do so for dropped relations, as the
8471 * startup does not process shared invalidation messages or call
8472 * AtEOXact_SMgr().
8473 */
8475 }
8476 else if (info == XLOG_OVERWRITE_CONTRECORD)
8477 {
8478 /* nothing to do here, handled in xlogrecovery_redo() */
8479 }
8480 else if (info == XLOG_END_OF_RECOVERY)
8481 {
8482 xl_end_of_recovery xlrec;
8483 TimeLineID replayTLI;
8484
8485 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8486
8487 /*
8488 * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8489 * but this case is rarer and harder to test, so the benefit doesn't
8490 * outweigh the potential extra cost of maintenance.
8491 */
8492
8493 /*
8494 * We should've already switched to the new TLI before replaying this
8495 * record.
8496 */
8497 (void) GetCurrentReplayRecPtr(&replayTLI);
8498 if (xlrec.ThisTimeLineID != replayTLI)
8499 ereport(PANIC,
8500 (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8501 xlrec.ThisTimeLineID, replayTLI)));
8502 }
8503 else if (info == XLOG_NOOP)
8504 {
8505 /* nothing to do here */
8506 }
8507 else if (info == XLOG_SWITCH)
8508 {
8509 /* nothing to do here */
8510 }
8511 else if (info == XLOG_RESTORE_POINT)
8512 {
8513 /* nothing to do here, handled in xlogrecovery.c */
8514 }
8515 else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8516 {
8517 /*
8518 * XLOG_FPI records contain nothing else but one or more block
8519 * references. Every block reference must include a full-page image
8520 * even if full_page_writes was disabled when the record was generated
8521 * - otherwise there would be no point in this record.
8522 *
8523 * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8524 * WAL-logged because of a hint bit update. They are only generated
8525 * when checksums and/or wal_log_hints are enabled. They may include
8526 * no full-page images if full_page_writes was disabled when they were
8527 * generated. In this case there is nothing to do here.
8528 *
8529 * No recovery conflicts are generated by these generic records - if a
8530 * resource manager needs to generate conflicts, it has to define a
8531 * separate WAL record type and redo routine.
8532 */
8533 for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8534 {
8535 Buffer buffer;
8536
8537 if (!XLogRecHasBlockImage(record, block_id))
8538 {
8539 if (info == XLOG_FPI)
8540 elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8541 continue;
8542 }
8543
8544 if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8545 elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8546 UnlockReleaseBuffer(buffer);
8547 }
8548 }
8549 else if (info == XLOG_BACKUP_END)
8550 {
8551 /* nothing to do here, handled in xlogrecovery_redo() */
8552 }
8553 else if (info == XLOG_PARAMETER_CHANGE)
8554 {
8555 xl_parameter_change xlrec;
8556
8557 /* Update our copy of the parameters in pg_control */
8558 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8559
8560 /*
8561 * Invalidate logical slots if we are in hot standby and the primary
8562 * does not have a WAL level sufficient for logical decoding. No need
8563 * to search for potentially conflicting logically slots if standby is
8564 * running with wal_level lower than logical, because in that case, we
8565 * would have either disallowed creation of logical slots or
8566 * invalidated existing ones.
8567 */
8568 if (InRecovery && InHotStandby &&
8569 xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8572 0, InvalidOid,
8574
8575 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8583
8584 /*
8585 * Update minRecoveryPoint to ensure that if recovery is aborted, we
8586 * recover back up to this point before allowing hot standby again.
8587 * This is important if the max_* settings are decreased, to ensure
8588 * you don't run queries against the WAL preceding the change. The
8589 * local copies cannot be updated as long as crash recovery is
8590 * happening and we expect all the WAL to be replayed.
8591 */
8593 {
8596 }
8598 {
8599 TimeLineID replayTLI;
8600
8601 (void) GetCurrentReplayRecPtr(&replayTLI);
8603 ControlFile->minRecoveryPointTLI = replayTLI;
8604 }
8605
8609
8611 LWLockRelease(ControlFileLock);
8612
8613 /* Check to see if any parameter change gives a problem on recovery */
8615 }
8616 else if (info == XLOG_FPW_CHANGE)
8617 {
8618 bool fpw;
8619
8620 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8621
8622 /*
8623 * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8624 * do_pg_backup_start() and do_pg_backup_stop() can check whether
8625 * full_page_writes has been disabled during online backup.
8626 */
8627 if (!fpw)
8628 {
8633 }
8634
8635 /* Keep track of full_page_writes */
8636 lastFullPageWrites = fpw;
8637 }
8638 else if (info == XLOG_CHECKPOINT_REDO)
8639 {
8640 /* nothing to do here, just for informational purposes */
8641 }
8642}
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5355
uint8_t uint8
Definition: c.h:537
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:640
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2466
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2441
#define XLOG_RESTORE_POINT
Definition: pg_control.h:75
#define XLOG_FPI
Definition: pg_control.h:79
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:78
#define XLOG_NEXTOID
Definition: pg_control.h:71
#define XLOG_NOOP
Definition: pg_control.h:70
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:74
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:66
void smgrdestroyall(void)
Definition: smgr.c:386
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7599
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:303
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60
@ BLK_RESTORED
Definition: xlogutils.h:76

References ArchiveRecoveryRequested, Assert(), ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), smgrdestroyall(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, and XLogRecPtrIsInvalid.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2975 of file xlog.c.

2976{
2977 XLogwrtRqst WriteRqst;
2978 bool flexible = true;
2979 static TimestampTz lastflush;
2981 int flushblocks;
2982 TimeLineID insertTLI;
2983
2984 /* XLOG doesn't need flushing during recovery */
2985 if (RecoveryInProgress())
2986 return false;
2987
2988 /*
2989 * Since we're not in recovery, InsertTimeLineID is set and can't change,
2990 * so we can read it without a lock.
2991 */
2992 insertTLI = XLogCtl->InsertTimeLineID;
2993
2994 /* read updated LogwrtRqst */
2996 WriteRqst = XLogCtl->LogwrtRqst;
2998
2999 /* back off to last completed page boundary */
3000 WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3001
3002 /* if we have already flushed that far, consider async commit records */
3004 if (WriteRqst.Write <= LogwrtResult.Flush)
3005 {
3007 WriteRqst.Write = XLogCtl->asyncXactLSN;
3009 flexible = false; /* ensure it all gets written */
3010 }
3011
3012 /*
3013 * If already known flushed, we're done. Just need to check if we are
3014 * holding an open file handle to a logfile that's no longer in use,
3015 * preventing the file from being deleted.
3016 */
3017 if (WriteRqst.Write <= LogwrtResult.Flush)
3018 {
3019 if (openLogFile >= 0)
3020 {
3023 {
3024 XLogFileClose();
3025 }
3026 }
3027 return false;
3028 }
3029
3030 /*
3031 * Determine how far to flush WAL, based on the wal_writer_delay and
3032 * wal_writer_flush_after GUCs.
3033 *
3034 * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3035 * wal_writer_flush_after, to decide when to wake us up. Make sure the
3036 * logic is the same in both places if you change this.
3037 */
3039 flushblocks =
3040 WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3041
3042 if (WalWriterFlushAfter == 0 || lastflush == 0)
3043 {
3044 /* first call, or block based limits disabled */
3045 WriteRqst.Flush = WriteRqst.Write;
3046 lastflush = now;
3047 }
3048 else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3049 {
3050 /*
3051 * Flush the writes at least every WalWriterDelay ms. This is
3052 * important to bound the amount of time it takes for an asynchronous
3053 * commit to hit disk.
3054 */
3055 WriteRqst.Flush = WriteRqst.Write;
3056 lastflush = now;
3057 }
3058 else if (flushblocks >= WalWriterFlushAfter)
3059 {
3060 /* exceeded wal_writer_flush_after blocks, flush */
3061 WriteRqst.Flush = WriteRqst.Write;
3062 lastflush = now;
3063 }
3064 else
3065 {
3066 /* no flushing, this time round */
3067 WriteRqst.Flush = 0;
3068 }
3069
3070#ifdef WAL_DEBUG
3071 if (XLOG_DEBUG)
3072 elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X",
3073 LSN_FORMAT_ARGS(WriteRqst.Write),
3074 LSN_FORMAT_ARGS(WriteRqst.Flush),
3077#endif
3078
3080
3081 /* now wait for any in-progress insertions to finish and get write lock */
3083 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3085 if (WriteRqst.Write > LogwrtResult.Write ||
3086 WriteRqst.Flush > LogwrtResult.Flush)
3087 {
3088 XLogWrite(WriteRqst, insertTLI, flexible);
3089 }
3090 LWLockRelease(WALWriteLock);
3091
3093
3094 /* wake up walsenders now that we've released heavily contended locks */
3096
3097 /*
3098 * Great, done. To take some work off the critical path, try to initialize
3099 * as many of the no-longer-needed WAL buffers for future use as we can.
3100 */
3101 AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3102
3103 /*
3104 * If we determined that we need to write data, but somebody else
3105 * wrote/flushed already, it should be considered as being active, to
3106 * avoid hibernating too early.
3107 */
3108 return true;
3109}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
XLogRecPtr asyncXactLSN
Definition: xlog.c:458
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:65
int WalWriterFlushAfter
Definition: walwriter.c:71
int WalWriterDelay
Definition: walwriter.c:70
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1901 of file xlog.c.

1902{
1903 uint64 fullsegs;
1904 uint64 fullpages;
1905 uint64 bytesleft;
1906 uint32 seg_offset;
1907 XLogRecPtr result;
1908
1909 fullsegs = bytepos / UsableBytesInSegment;
1910 bytesleft = bytepos % UsableBytesInSegment;
1911
1912 if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1913 {
1914 /* fits on first page of segment */
1915 if (bytesleft == 0)
1916 seg_offset = 0;
1917 else
1918 seg_offset = bytesleft + SizeOfXLogLongPHD;
1919 }
1920 else
1921 {
1922 /* account for the first page on segment with long header */
1923 seg_offset = XLOG_BLCKSZ;
1924 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1925
1926 fullpages = bytesleft / UsableBytesInPage;
1927 bytesleft = bytesleft % UsableBytesInPage;
1928
1929 if (bytesleft == 0)
1930 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1931 else
1932 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1933 }
1934
1935 XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1936
1937 return result;
1938}
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1861 of file xlog.c.

1862{
1863 uint64 fullsegs;
1864 uint64 fullpages;
1865 uint64 bytesleft;
1866 uint32 seg_offset;
1867 XLogRecPtr result;
1868
1869 fullsegs = bytepos / UsableBytesInSegment;
1870 bytesleft = bytepos % UsableBytesInSegment;
1871
1872 if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1873 {
1874 /* fits on first page of segment */
1875 seg_offset = bytesleft + SizeOfXLogLongPHD;
1876 }
1877 else
1878 {
1879 /* account for the first page on segment with long header */
1880 seg_offset = XLOG_BLCKSZ;
1881 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1882
1883 fullpages = bytesleft / UsableBytesInPage;
1884 bytesleft = bytesleft % UsableBytesInPage;
1885
1886 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1887 }
1888
1889 XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1890
1891 return result;
1892}

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2280 of file xlog.c.

2281{
2282 XLogSegNo old_segno;
2283
2285
2286 if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2287 return true;
2288 return false;
2289}

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4661 of file xlog.c.

4662{
4663 int xbuffers;
4664
4665 xbuffers = NBuffers / 32;
4666 if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
4667 xbuffers = (wal_segment_size / XLOG_BLCKSZ);
4668 if (xbuffers < 8)
4669 xbuffers = 8;
4670 return xbuffers;
4671}

References NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3655 of file xlog.c.

3656{
3657 Assert(openLogFile >= 0);
3658
3659 /*
3660 * WAL segment files will not be re-read in normal operation, so we advise
3661 * the OS to release any cached pages. But do not do so if WAL archiving
3662 * or streaming is active, because archiver and walsender process could
3663 * use the cache to read the WAL segment.
3664 */
3665#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3666 if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3667 (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3668#endif
3669
3670 if (close(openLogFile) != 0)
3671 {
3672 char xlogfname[MAXFNAMELEN];
3673 int save_errno = errno;
3674
3676 errno = save_errno;
3677 ereport(PANIC,
3679 errmsg("could not close file \"%s\": %m", xlogfname)));
3680 }
3681
3682 openLogFile = -1;
3684}
void ReleaseExternalFD(void)
Definition: fd.c:1238

References Assert(), close, ereport, errcode_for_file_access(), errmsg(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3434 of file xlog.c.

3437{
3438 char path[MAXPGPATH];
3439 char tmppath[MAXPGPATH];
3440 PGAlignedXLogBlock buffer;
3441 int srcfd;
3442 int fd;
3443 int nbytes;
3444
3445 /*
3446 * Open the source file
3447 */
3448 XLogFilePath(path, srcTLI, srcsegno, wal_segment_size);
3449 srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
3450 if (srcfd < 0)
3451 ereport(ERROR,
3453 errmsg("could not open file \"%s\": %m", path)));
3454
3455 /*
3456 * Copy into a temp file name.
3457 */
3458 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3459
3460 unlink(tmppath);
3461
3462 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3463 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
3464 if (fd < 0)
3465 ereport(ERROR,
3467 errmsg("could not create file \"%s\": %m", tmppath)));
3468
3469 /*
3470 * Do the data copying.
3471 */
3472 for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3473 {
3474 int nread;
3475
3476 nread = upto - nbytes;
3477
3478 /*
3479 * The part that is not read from the source file is filled with
3480 * zeros.
3481 */
3482 if (nread < sizeof(buffer))
3483 memset(buffer.data, 0, sizeof(buffer));
3484
3485 if (nread > 0)
3486 {
3487 int r;
3488
3489 if (nread > sizeof(buffer))
3490 nread = sizeof(buffer);
3491 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
3492 r = read(srcfd, buffer.data, nread);
3493 if (r != nread)
3494 {
3495 if (r < 0)
3496 ereport(ERROR,
3498 errmsg("could not read file \"%s\": %m",
3499 path)));
3500 else
3501 ereport(ERROR,
3503 errmsg("could not read file \"%s\": read %d of %zu",
3504 path, r, (Size) nread)));
3505 }
3507 }
3508 errno = 0;
3509 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
3510 if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3511 {
3512 int save_errno = errno;
3513
3514 /*
3515 * If we fail to make the file, delete it to release disk space
3516 */
3517 unlink(tmppath);
3518 /* if write didn't set errno, assume problem is no disk space */
3519 errno = save_errno ? save_errno : ENOSPC;
3520
3521 ereport(ERROR,
3523 errmsg("could not write to file \"%s\": %m", tmppath)));
3524 }
3526 }
3527
3528 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
3529 if (pg_fsync(fd) != 0)
3532 errmsg("could not fsync file \"%s\": %m", tmppath)));
3534
3535 if (CloseTransientFile(fd) != 0)
3536 ereport(ERROR,
3538 errmsg("could not close file \"%s\": %m", tmppath)));
3539
3540 if (CloseTransientFile(srcfd) != 0)
3541 ereport(ERROR,
3543 errmsg("could not close file \"%s\": %m", path)));
3544
3545 /*
3546 * Now move the segment into place with its final name.
3547 */
3548 if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, destTLI))
3549 elog(ERROR, "InstallXLogFileSegment should not have failed");
3550}
int CloseTransientFile(int fd)
Definition: fd.c:2868
int data_sync_elevel(int elevel)
Definition: fd.c:3998
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2691
char data[XLOG_BLCKSZ]
Definition: c.h:1148

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3396 of file xlog.c.

3397{
3398 bool ignore_added;
3399 char path[MAXPGPATH];
3400 int fd;
3401
3402 Assert(logtli != 0);
3403
3404 fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3405 if (fd >= 0)
3406 return fd;
3407
3408 /* Now open original target segment (might not be file I just made) */
3409 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3411 if (fd < 0)
3412 ereport(ERROR,
3414 errmsg("could not open file \"%s\": %m", path)));
3415 return fd;
3416}
#define O_CLOEXEC
Definition: win32_port.h:349

References Assert(), BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool *  added,
char *  path 
)
static

Definition at line 3208 of file xlog.c.

3210{
3211 char tmppath[MAXPGPATH];
3212 XLogSegNo installed_segno;
3213 XLogSegNo max_segno;
3214 int fd;
3215 int save_errno;
3216 int open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
3217 instr_time io_start;
3218
3219 Assert(logtli != 0);
3220
3221 XLogFilePath(path, logtli, logsegno, wal_segment_size);
3222
3223 /*
3224 * Try to use existent file (checkpoint maker may have created it already)
3225 */
3226 *added = false;
3227 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3229 if (fd < 0)
3230 {
3231 if (errno != ENOENT)
3232 ereport(ERROR,
3234 errmsg("could not open file \"%s\": %m", path)));
3235 }
3236 else
3237 return fd;
3238
3239 /*
3240 * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3241 * another process is doing the same thing. If so, we will end up
3242 * pre-creating an extra log segment. That seems OK, and better than
3243 * holding the lock throughout this lengthy process.
3244 */
3245 elog(DEBUG2, "creating and filling new WAL file");
3246
3247 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3248
3249 unlink(tmppath);
3250
3252 open_flags |= PG_O_DIRECT;
3253
3254 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3255 fd = BasicOpenFile(tmppath, open_flags);
3256 if (fd < 0)
3257 ereport(ERROR,
3259 errmsg("could not create file \"%s\": %m", tmppath)));
3260
3261 /* Measure I/O timing when initializing segment */
3263
3264 pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
3265 save_errno = 0;
3266 if (wal_init_zero)
3267 {
3268 ssize_t rc;
3269
3270 /*
3271 * Zero-fill the file. With this setting, we do this the hard way to
3272 * ensure that all the file space has really been allocated. On
3273 * platforms that allow "holes" in files, just seeking to the end
3274 * doesn't allocate intermediate space. This way, we know that we
3275 * have all the space and (after the fsync below) that all the
3276 * indirect blocks are down on disk. Therefore, fdatasync(2) or
3277 * O_DSYNC will be sufficient to sync future writes to the log file.
3278 */
3280
3281 if (rc < 0)
3282 save_errno = errno;
3283 }
3284 else
3285 {
3286 /*
3287 * Otherwise, seeking to the end and writing a solitary byte is
3288 * enough.
3289 */
3290 errno = 0;
3291 if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3292 {
3293 /* if write didn't set errno, assume no disk space */
3294 save_errno = errno ? errno : ENOSPC;
3295 }
3296 }
3298
3299 /*
3300 * A full segment worth of data is written when using wal_init_zero. One
3301 * byte is written when not using it.
3302 */
3304 io_start, 1,
3306
3307 if (save_errno)
3308 {
3309 /*
3310 * If we fail to make the file, delete it to release disk space
3311 */
3312 unlink(tmppath);
3313
3314 close(fd);
3315
3316 errno = save_errno;
3317
3318 ereport(ERROR,
3320 errmsg("could not write to file \"%s\": %m", tmppath)));
3321 }
3322
3323 /* Measure I/O timing when flushing segment */
3325
3326 pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
3327 if (pg_fsync(fd) != 0)
3328 {
3329 save_errno = errno;
3330 close(fd);
3331 errno = save_errno;
3332 ereport(ERROR,
3334 errmsg("could not fsync file \"%s\": %m", tmppath)));
3335 }
3337
3339 IOOP_FSYNC, io_start, 1, 0);
3340
3341 if (close(fd) != 0)
3342 ereport(ERROR,
3344 errmsg("could not close file \"%s\": %m", tmppath)));
3345
3346 /*
3347 * Now move the segment into place with its final name. Cope with
3348 * possibility that someone else has created the file while we were
3349 * filling ours: if so, use ours to pre-create a future log segment.
3350 */
3351 installed_segno = logsegno;
3352
3353 /*
3354 * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3355 * that was a constant, but that was always a bit dubious: normally, at a
3356 * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3357 * here, it was the offset from the insert location. We can't do the
3358 * normal XLOGfileslop calculation here because we don't have access to
3359 * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3360 * CheckPointSegments.
3361 */
3362 max_segno = logsegno + CheckPointSegments;
3363 if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno,
3364 logtli))
3365 {
3366 *added = true;
3367 elog(DEBUG2, "done creating and filling new WAL file");
3368 }
3369 else
3370 {
3371 /*
3372 * No need for any more future segments, or InstallXLogFileSegment()
3373 * failed to rename the file into place. If the rename failed, a
3374 * caller opening the file may fail.
3375 */
3376 unlink(tmppath);
3377 elog(DEBUG2, "abandoned new WAL file");
3378 }
3379
3380 return -1;
3381}
#define IO_DIRECT_WAL_INIT
Definition: fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset)
Definition: file_utils.c:709
@ IOCONTEXT_INIT
Definition: pgstat.h:286
@ IOOP_WRITE
Definition: pgstat.h:314
#define pg_pwrite
Definition: port.h:227
bool wal_init_zero
Definition: xlog.c:128

References Assert(), BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, IOCONTEXT_INIT, IOOBJECT_WAL, IOOP_FSYNC, IOOP_WRITE, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, track_wal_io_timing, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3634 of file xlog.c.

3635{
3636 char path[MAXPGPATH];
3637 int fd;
3638
3639 XLogFilePath(path, tli, segno, wal_segment_size);
3640
3641 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3643 if (fd < 0)
3644 ereport(PANIC,
3646 errmsg("could not open file \"%s\": %m", path)));
3647
3648 return fd;
3649}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2230 of file xlog.c.

2231{
2232 XLogSegNo minSegNo;
2233 XLogSegNo maxSegNo;
2234 double distance;
2235 XLogSegNo recycleSegNo;
2236
2237 /*
2238 * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2239 * correspond to. Always recycle enough segments to meet the minimum, and
2240 * remove enough segments to stay below the maximum.
2241 */
2242 minSegNo = lastredoptr / wal_segment_size +
2244 maxSegNo = lastredoptr / wal_segment_size +
2246
2247 /*
2248 * Between those limits, recycle enough segments to get us through to the
2249 * estimated end of next checkpoint.
2250 *
2251 * To estimate where the next checkpoint will finish, assume that the
2252 * system runs steadily consuming CheckPointDistanceEstimate bytes between
2253 * every checkpoint.
2254 */
2256 /* add 10% for good measure. */
2257 distance *= 1.10;
2258
2259 recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2261
2262 if (recycleSegNo < minSegNo)
2263 recycleSegNo = minSegNo;
2264 if (recycleSegNo > maxSegNo)
2265 recycleSegNo = maxSegNo;
2266
2267 return recycleSegNo;
2268}

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2780 of file xlog.c.

2781{
2782 XLogRecPtr WriteRqstPtr;
2783 XLogwrtRqst WriteRqst;
2784 TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2785
2786 /*
2787 * During REDO, we are reading not writing WAL. Therefore, instead of
2788 * trying to flush the WAL, we should update minRecoveryPoint instead. We
2789 * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2790 * to act this way too, and because when it tries to write the
2791 * end-of-recovery checkpoint, it should indeed flush.
2792 */
2793 if (!XLogInsertAllowed())
2794 {
2795 UpdateMinRecoveryPoint(record, false);
2796 return;
2797 }
2798
2799 /* Quick exit if already known flushed */
2800 if (record <= LogwrtResult.Flush)
2801 return;
2802
2803#ifdef WAL_DEBUG
2804 if (XLOG_DEBUG)
2805 elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X",
2806 LSN_FORMAT_ARGS(record),
2809#endif
2810
2812
2813 /*
2814 * Since fsync is usually a horribly expensive operation, we try to
2815 * piggyback as much data as we can on each fsync: if we see any more data
2816 * entered into the xlog buffer, we'll write and fsync that too, so that
2817 * the final value of LogwrtResult.Flush is as large as possible. This
2818 * gives us some chance of avoiding another fsync immediately after.
2819 */
2820
2821 /* initialize to given target; may increase below */
2822 WriteRqstPtr = record;
2823
2824 /*
2825 * Now wait until we get the write lock, or someone else does the flush
2826 * for us.
2827 */
2828 for (;;)
2829 {
2830 XLogRecPtr insertpos;
2831
2832 /* done already? */
2834 if (record <= LogwrtResult.Flush)
2835 break;
2836
2837 /*
2838 * Before actually performing the write, wait for all in-flight
2839 * insertions to the pages we're about to write to finish.
2840 */
2842 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2843 WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2845 insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2846
2847 /*
2848 * Try to get the write lock. If we can't get it immediately, wait
2849 * until it's released, and recheck if we still need to do the flush
2850 * or if the backend that held the lock did it for us already. This
2851 * helps to maintain a good rate of group committing when the system
2852 * is bottlenecked by the speed of fsyncing.
2853 */
2854 if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2855 {
2856 /*
2857 * The lock is now free, but we didn't acquire it yet. Before we
2858 * do, loop back to check if someone else flushed the record for
2859 * us already.
2860 */
2861 continue;
2862 }
2863
2864 /* Got the lock; recheck whether request is satisfied */
2866 if (record <= LogwrtResult.Flush)
2867 {
2868 LWLockRelease(WALWriteLock);
2869 break;
2870 }
2871
2872 /*
2873 * Sleep before flush! By adding a delay here, we may give further
2874 * backends the opportunity to join the backlog of group commit
2875 * followers; this can significantly improve transaction throughput,
2876 * at the risk of increasing transaction latency.
2877 *
2878 * We do not sleep if enableFsync is not turned on, nor if there are
2879 * fewer than CommitSiblings other backends with active transactions.
2880 */
2881 if (CommitDelay > 0 && enableFsync &&
2883 {
2885
2886 /*
2887 * Re-check how far we can now flush the WAL. It's generally not
2888 * safe to call WaitXLogInsertionsToFinish while holding
2889 * WALWriteLock, because an in-progress insertion might need to
2890 * also grab WALWriteLock to make progress. But we know that all
2891 * the insertions up to insertpos have already finished, because
2892 * that's what the earlier WaitXLogInsertionsToFinish() returned.
2893 * We're only calling it again to allow insertpos to be moved
2894 * further forward, not to actually wait for anyone.
2895 */
2896 insertpos = WaitXLogInsertionsToFinish(insertpos);
2897 }
2898
2899 /* try to write/flush later additions to XLOG as well */
2900 WriteRqst.Write = insertpos;
2901 WriteRqst.Flush = insertpos;
2902
2903 XLogWrite(WriteRqst, insertTLI, false);
2904
2905 LWLockRelease(WALWriteLock);
2906 /* done */
2907 break;
2908 }
2909
2911
2912 /* wake up walsenders now that we've released heavily contended locks */
2914
2915 /*
2916 * If we still haven't flushed to the request point then we have a
2917 * problem; most likely, the requested flush point is past end of XLOG.
2918 * This has been seen to occur when a disk page has a corrupted LSN.
2919 *
2920 * Formerly we treated this as a PANIC condition, but that hurts the
2921 * system's robustness rather than helping it: we do not want to take down
2922 * the whole system due to corruption on one data page. In particular, if
2923 * the bad page is encountered again during recovery then we would be
2924 * unable to restart the database at all! (This scenario actually
2925 * happened in the field several times with 7.1 releases.) As of 8.4, bad
2926 * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2927 * the only time we can reach here during recovery is while flushing the
2928 * end-of-recovery checkpoint record, and we don't expect that to have a
2929 * bad LSN.
2930 *
2931 * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2932 * since xact.c calls this routine inside a critical section. However,
2933 * calls from bufmgr.c are not within critical sections and so we will not
2934 * force a restart for a bad LSN on a data page.
2935 */
2936 if (LogwrtResult.Flush < record)
2937 elog(ERROR,
2938 "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X",
2939 LSN_FORMAT_ARGS(record),
2941
2942 /*
2943 * Cross-check XLogNeedsFlush(). Some of the checks of XLogFlush() and
2944 * XLogNeedsFlush() are duplicated, and this assertion ensures that these
2945 * remain consistent.
2946 */
2947 Assert(!XLogNeedsFlush(record));
2948}
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1402
bool MinimumActiveBackends(int min)
Definition: procarray.c:3508
int CommitDelay
Definition: xlog.c:133
int CommitSiblings
Definition: xlog.c:134
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3124
bool XLogInsertAllowed(void)
Definition: xlog.c:6441

References Assert(), CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), XLogNeedsFlush(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), finish_sync_worker(), FlushBuffer(), LogLogicalMessage(), pg_truncate_visibility_map(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3774 of file xlog.c.

3775{
3776 XLogSegNo lastRemovedSegNo;
3777
3779 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3781
3782 return lastRemovedSegNo;
3783}

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3790 of file xlog.c.

3791{
3792 DIR *xldir;
3793 struct dirent *xlde;
3794 XLogSegNo oldest_segno = 0;
3795
3796 xldir = AllocateDir(XLOGDIR);
3797 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3798 {
3799 TimeLineID file_tli;
3800 XLogSegNo file_segno;
3801
3802 /* Ignore files that are not XLOG segments. */
3803 if (!IsXLogFileName(xlde->d_name))
3804 continue;
3805
3806 /* Parse filename to get TLI and segno. */
3807 XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3809
3810 /* Ignore anything that's not from the TLI of interest. */
3811 if (tli != file_tli)
3812 continue;
3813
3814 /* If it's the oldest so far, update oldest_segno. */
3815 if (oldest_segno == 0 || file_segno < oldest_segno)
3816 oldest_segno = file_segno;
3817 }
3818
3819 FreeDir(xldir);
3820 return oldest_segno;
3821}

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogGetReplicationSlotMinimumLSN()

static XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )
static

Definition at line 2679 of file xlog.c.

2680{
2681 XLogRecPtr retval;
2682
2686
2687 return retval;
2688}
XLogRecPtr replicationSlotMinLSN
Definition: xlog.c:459

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5254 of file xlog.c.

5255{
5256 char xlogfname[MAXFNAMELEN];
5257 XLogSegNo endLogSegNo;
5258 XLogSegNo startLogSegNo;
5259
5260 /* we always switch to a new timeline after archive recovery */
5261 Assert(endTLI != newTLI);
5262
5263 /*
5264 * Update min recovery point one last time.
5265 */
5267
5268 /*
5269 * Calculate the last segment on the old timeline, and the first segment
5270 * on the new timeline. If the switch happens in the middle of a segment,
5271 * they are the same, but if the switch happens exactly at a segment
5272 * boundary, startLogSegNo will be endLogSegNo + 1.
5273 */
5274 XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size);
5275 XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size);
5276
5277 /*
5278 * Initialize the starting WAL segment for the new timeline. If the switch
5279 * happens in the middle of a segment, copy data from the last WAL segment
5280 * of the old timeline up to the switch point, to the starting WAL segment
5281 * on the new timeline.
5282 */
5283 if (endLogSegNo == startLogSegNo)
5284 {
5285 /*
5286 * Make a copy of the file on the new timeline.
5287 *
5288 * Writing WAL isn't allowed yet, so there are no locking
5289 * considerations. But we should be just as tense as XLogFileInit to
5290 * avoid emplacing a bogus file.
5291 */
5292 XLogFileCopy(newTLI, endLogSegNo, endTLI, endLogSegNo,
5294 }
5295 else
5296 {
5297 /*
5298 * The switch happened at a segment boundary, so just create the next
5299 * segment on the new timeline.
5300 */
5301 int fd;
5302
5303 fd = XLogFileInit(startLogSegNo, newTLI);
5304
5305 if (close(fd) != 0)
5306 {
5307 int save_errno = errno;
5308
5309 XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5310 errno = save_errno;
5311 ereport(ERROR,
5313 errmsg("could not close file \"%s\": %m", xlogfname)));
5314 }
5315 }
5316
5317 /*
5318 * Let's just make real sure there are not .ready or .done flags posted
5319 * for the new segment.
5320 */
5321 XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5322 XLogArchiveCleanup(xlogfname);
5323}
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition: xlog.c:3434

References Assert(), close, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6441 of file xlog.c.

6442{
6443 /*
6444 * If value is "unconditionally true" or "unconditionally false", just
6445 * return it. This provides the normal fast path once recovery is known
6446 * done.
6447 */
6448 if (LocalXLogInsertAllowed >= 0)
6449 return (bool) LocalXLogInsertAllowed;
6450
6451 /*
6452 * Else, must check to see if we're still in recovery.
6453 */
6454 if (RecoveryInProgress())
6455 return false;
6456
6457 /*
6458 * On exit from recovery, reset to "unconditionally true", since there is
6459 * no need to keep checking.
6460 */
6462 return true;
6463}

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), XLogInsertRecord(), and XLogNeedsFlush().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 748 of file xlog.c.

753{
755 pg_crc32c rdata_crc;
756 bool inserted;
757 XLogRecord *rechdr = (XLogRecord *) rdata->data;
758 uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
760 XLogRecPtr StartPos;
761 XLogRecPtr EndPos;
762 bool prevDoPageWrites = doPageWrites;
763 TimeLineID insertTLI;
764
765 /* Does this record type require special handling? */
766 if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
767 {
768 if (info == XLOG_SWITCH)
770 else if (info == XLOG_CHECKPOINT_REDO)
772 }
773
774 /* we assume that all of the record header is in the first chunk */
775 Assert(rdata->len >= SizeOfXLogRecord);
776
777 /* cross-check on whether we should be here or not */
778 if (!XLogInsertAllowed())
779 elog(ERROR, "cannot make new WAL entries during recovery");
780
781 /*
782 * Given that we're not in recovery, InsertTimeLineID is set and can't
783 * change, so we can read it without a lock.
784 */
785 insertTLI = XLogCtl->InsertTimeLineID;
786
787 /*----------
788 *
789 * We have now done all the preparatory work we can without holding a
790 * lock or modifying shared state. From here on, inserting the new WAL
791 * record to the shared WAL buffer cache is a two-step process:
792 *
793 * 1. Reserve the right amount of space from the WAL. The current head of
794 * reserved space is kept in Insert->CurrBytePos, and is protected by
795 * insertpos_lck.
796 *
797 * 2. Copy the record to the reserved WAL space. This involves finding the
798 * correct WAL buffer containing the reserved space, and copying the
799 * record in place. This can be done concurrently in multiple processes.
800 *
801 * To keep track of which insertions are still in-progress, each concurrent
802 * inserter acquires an insertion lock. In addition to just indicating that
803 * an insertion is in progress, the lock tells others how far the inserter
804 * has progressed. There is a small fixed number of insertion locks,
805 * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
806 * boundary, it updates the value stored in the lock to the how far it has
807 * inserted, to allow the previous buffer to be flushed.
808 *
809 * Holding onto an insertion lock also protects RedoRecPtr and
810 * fullPageWrites from changing until the insertion is finished.
811 *
812 * Step 2 can usually be done completely in parallel. If the required WAL
813 * page is not initialized yet, you have to grab WALBufMappingLock to
814 * initialize it, but the WAL writer tries to do that ahead of insertions
815 * to avoid that from happening in the critical path.
816 *
817 *----------
818 */
820
821 if (likely(class == WALINSERT_NORMAL))
822 {
824
825 /*
826 * Check to see if my copy of RedoRecPtr is out of date. If so, may
827 * have to go back and have the caller recompute everything. This can
828 * only happen just after a checkpoint, so it's better to be slow in
829 * this case and fast otherwise.
830 *
831 * Also check to see if fullPageWrites was just turned on or there's a
832 * running backup (which forces full-page writes); if we weren't
833 * already doing full-page writes then go back and recompute.
834 *
835 * If we aren't doing full-page writes then RedoRecPtr doesn't
836 * actually affect the contents of the XLOG record, so we'll update
837 * our local copy but not force a recomputation. (If doPageWrites was
838 * just turned off, we could recompute the record without full pages,
839 * but we choose not to bother.)
840 */
841 if (RedoRecPtr != Insert->RedoRecPtr)
842 {
843 Assert(RedoRecPtr < Insert->RedoRecPtr);
844 RedoRecPtr = Insert->RedoRecPtr;
845 }
846 doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
847
848 if (doPageWrites &&
849 (!prevDoPageWrites ||
850 (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
851 {
852 /*
853 * Oops, some buffer now needs to be backed up that the caller
854 * didn't back up. Start over.
855 */
858 return InvalidXLogRecPtr;
859 }
860
861 /*
862 * Reserve space for the record in the WAL. This also sets the xl_prev
863 * pointer.
864 */
865 ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
866 &rechdr->xl_prev);
867
868 /* Normal records are always inserted. */
869 inserted = true;
870 }
871 else if (class == WALINSERT_SPECIAL_SWITCH)
872 {
873 /*
874 * In order to insert an XLOG_SWITCH record, we need to hold all of
875 * the WAL insertion locks, not just one, so that no one else can
876 * begin inserting a record until we've figured out how much space
877 * remains in the current WAL segment and claimed all of it.
878 *
879 * Nonetheless, this case is simpler than the normal cases handled
880 * below, which must check for changes in doPageWrites and RedoRecPtr.
881 * Those checks are only needed for records that can contain buffer
882 * references, and an XLOG_SWITCH record never does.
883 */
884 Assert(fpw_lsn == InvalidXLogRecPtr);
886 inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
887 }
888 else
889 {
891
892 /*
893 * We need to update both the local and shared copies of RedoRecPtr,
894 * which means that we need to hold all the WAL insertion locks.
895 * However, there can't be any buffer references, so as above, we need
896 * not check RedoRecPtr before inserting the record; we just need to
897 * update it afterwards.
898 */
899 Assert(fpw_lsn == InvalidXLogRecPtr);
901 ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
902 &rechdr->xl_prev);
903 RedoRecPtr = Insert->RedoRecPtr = StartPos;
904 inserted = true;
905 }
906
907 if (inserted)
908 {
909 /*
910 * Now that xl_prev has been filled in, calculate CRC of the record
911 * header.
912 */
913 rdata_crc = rechdr->xl_crc;
914 COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
915 FIN_CRC32C(rdata_crc);
916 rechdr->xl_crc = rdata_crc;
917
918 /*
919 * All the record data, including the header, is now ready to be
920 * inserted. Copy the record in the space reserved.
921 */
923 class == WALINSERT_SPECIAL_SWITCH, rdata,
924 StartPos, EndPos, insertTLI);
925
926 /*
927 * Unless record is flagged as not important, update LSN of last
928 * important record in the current slot. When holding all locks, just
929 * update the first one.
930 */
931 if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
932 {
933 int lockno = holdingAllLocks ? 0 : MyLockNo;
934
935 WALInsertLocks[lockno].l.lastImportantAt = StartPos;
936 }
937 }
938 else
939 {
940 /*
941 * This was an xlog-switch record, but the current insert location was
942 * already exactly at the beginning of a segment, so there was no need
943 * to do anything.
944 */
945 }
946
947 /*
948 * Done! Let others know that we're finished.
949 */
951
953
955
956 /*
957 * Mark top transaction id is logged (if needed) so that we should not try
958 * to log it again with the next WAL record in the current subtransaction.
959 */
960 if (topxid_included)
962
963 /*
964 * Update shared LogwrtRqst.Write, if we crossed page boundary.
965 */
966 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
967 {
969 /* advance global request to include new block(s) */
970 if (XLogCtl->LogwrtRqst.Write < EndPos)
971 XLogCtl->LogwrtRqst.Write = EndPos;
974 }
975
976 /*
977 * If this was an XLOG_SWITCH record, flush the record and the empty
978 * padding space that fills the rest of the segment, and perform
979 * end-of-segment actions (eg, notifying archiver).
980 */
981 if (class == WALINSERT_SPECIAL_SWITCH)
982 {
983 TRACE_POSTGRESQL_WAL_SWITCH();
984 XLogFlush(EndPos);
985
986 /*
987 * Even though we reserved the rest of the segment for us, which is
988 * reflected in EndPos, we return a pointer to just the end of the
989 * xlog-switch record.
990 */
991 if (inserted)
992 {
993 EndPos = StartPos + SizeOfXLogRecord;
994 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
995 {
996 uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
997
998 if (offset == EndPos % XLOG_BLCKSZ)
999 EndPos += SizeOfXLogLongPHD;
1000 else
1001 EndPos += SizeOfXLogShortPHD;
1002 }
1003 }
1004 }
1005
1006#ifdef WAL_DEBUG
1007 if (XLOG_DEBUG)
1008 {
1009 static XLogReaderState *debug_reader = NULL;
1010 XLogRecord *record;
1011 DecodedXLogRecord *decoded;
1013 StringInfoData recordBuf;
1014 char *errormsg = NULL;
1015 MemoryContext oldCxt;
1016
1017 oldCxt = MemoryContextSwitchTo(walDebugCxt);
1018
1020 appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos));
1021
1022 /*
1023 * We have to piece together the WAL record data from the XLogRecData
1024 * entries, so that we can pass it to the rm_desc function as one
1025 * contiguous chunk.
1026 */
1027 initStringInfo(&recordBuf);
1028 for (; rdata != NULL; rdata = rdata->next)
1029 appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1030
1031 /* We also need temporary space to decode the record. */
1032 record = (XLogRecord *) recordBuf.data;
1033 decoded = (DecodedXLogRecord *)
1035
1036 if (!debug_reader)
1037 debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1038 XL_ROUTINE(.page_read = NULL,
1039 .segment_open = NULL,
1040 .segment_close = NULL),
1041 NULL);
1042 if (!debug_reader)
1043 {
1044 appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1045 }
1046 else if (!DecodeXLogRecord(debug_reader,
1047 decoded,
1048 record,
1049 EndPos,
1050 &errormsg))
1051 {
1052 appendStringInfo(&buf, "error decoding record: %s",
1053 errormsg ? errormsg : "no error message");
1054 }
1055 else
1056 {
1057 appendStringInfoString(&buf, " - ");
1058
1059 debug_reader->record = decoded;
1060 xlog_outdesc(&buf, debug_reader);
1061 debug_reader->record = NULL;
1062 }
1063 elog(LOG, "%s", buf.data);
1064
1065 pfree(decoded);
1066 pfree(buf.data);
1067 pfree(recordBuf.data);
1068 MemoryContextSwitchTo(oldCxt);
1069 }
1070#endif
1071
1072 /*
1073 * Update our global variables
1074 */
1075 ProcLastRecPtr = StartPos;
1076 XactLastRecEnd = EndPos;
1077
1078 /* Report WAL traffic to the instrumentation. */
1079 if (inserted)
1080 {
1081 pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1083 pgWalUsage.wal_fpi += num_fpi;
1084
1085 /* Required for the flush of pending stats WAL data */
1086 pgstat_report_fixed = true;
1087 }
1088
1089 return EndPos;
1090}
#define likely(x)
Definition: c.h:402
#define unlikely(x)
Definition: c.h:403
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:591
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:541
XLogRecPtr XactLastRecEnd
Definition: xlog.c:255
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1228
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1111
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1167
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:107
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1682
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1649
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert(), buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgstat_report_fixed, pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3124 of file xlog.c.

3125{
3126 /*
3127 * During recovery, we don't flush WAL but update minRecoveryPoint
3128 * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3129 * would need to be updated.
3130 *
3131 * Using XLogInsertAllowed() rather than RecoveryInProgress() matters for
3132 * the case of an end-of-recovery checkpoint, where WAL data is flushed.
3133 * This check should be consistent with the one in XLogFlush().
3134 */
3135 if (!XLogInsertAllowed())
3136 {
3137 /* Quick exit if already known to be updated or cannot be updated */
3139 return false;
3140
3141 /*
3142 * An invalid minRecoveryPoint means that we need to recover all the
3143 * WAL, i.e., we're doing crash recovery. We never modify the control
3144 * file's value in that case, so we can short-circuit future checks
3145 * here too. This triggers a quick exit path for the startup process,
3146 * which cannot update its local copy of minRecoveryPoint as long as
3147 * it has not replayed all WAL available when doing crash recovery.
3148 */
3150 {
3151 updateMinRecoveryPoint = false;
3152 return false;
3153 }
3154
3155 /*
3156 * Update local copy of minRecoveryPoint. But if the lock is busy,
3157 * just return a conservative guess.
3158 */
3159 if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3160 return true;
3163 LWLockRelease(ControlFileLock);
3164
3165 /*
3166 * Check minRecoveryPoint for any other process than the startup
3167 * process doing crash recovery, which should not update the control
3168 * file value if crash recovery is still running.
3169 */
3171 updateMinRecoveryPoint = false;
3172
3173 /* check again */
3175 return false;
3176 else
3177 return true;
3178 }
3179
3180 /* Quick exit if already known flushed */
3181 if (record <= LogwrtResult.Flush)
3182 return false;
3183
3184 /* read LogwrtResult and update local state */
3186
3187 /* check again */
3188 if (record <= LogwrtResult.Flush)
3189 return false;
3190
3191 return true;
3192}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1345

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RefreshXLogWriteResult, updateMinRecoveryPoint, XLogInsertAllowed(), and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), SetHintBits(), and XLogFlush().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8073 of file xlog.c.

8074{
8076 XLogRegisterData(&nextOid, sizeof(Oid));
8077 (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
8078
8079 /*
8080 * We need not flush the NEXTOID record immediately, because any of the
8081 * just-allocated OIDs could only reach disk as part of a tuple insert or
8082 * update that would have its own XLOG record that must follow the NEXTOID
8083 * record. Therefore, the standard buffer LSN interlock applied to those
8084 * records will ensure no such OID reaches disk before the NEXTOID record
8085 * does.
8086 *
8087 * Note, however, that the above statement only covers state "within" the
8088 * database. When we use a generated OID as a file or directory name, we
8089 * are in a sense violating the basic WAL rule, because that filesystem
8090 * change may reach disk before the NEXTOID WAL record does. The impact
8091 * of this is that if a database crash occurs immediately afterward, we
8092 * might after restart re-generate the same OID and find that it conflicts
8093 * with the leftover file or directory. But since for safety's sake we
8094 * always loop until finding a nonconflicting filename, this poses no real
8095 * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8096 */
8097}

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1944 of file xlog.c.

1945{
1946 uint64 fullsegs;
1947 uint32 fullpages;
1948 uint32 offset;
1949 uint64 result;
1950
1951 XLByteToSeg(ptr, fullsegs, wal_segment_size);
1952
1953 fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ;
1954 offset = ptr % XLOG_BLCKSZ;
1955
1956 if (fullpages == 0)
1957 {
1958 result = fullsegs * UsableBytesInSegment;
1959 if (offset > 0)
1960 {
1961 Assert(offset >= SizeOfXLogLongPHD);
1962 result += offset - SizeOfXLogLongPHD;
1963 }
1964 }
1965 else
1966 {
1967 result = fullsegs * UsableBytesInSegment +
1968 (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1969 (fullpages - 1) * UsableBytesInPage; /* full pages */
1970 if (offset > 0)
1971 {
1972 Assert(offset >= SizeOfXLogShortPHD);
1973 result += offset - SizeOfXLogShortPHD;
1974 }
1975 }
1976
1977 return result;
1978}

References Assert(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8153 of file xlog.c.

8154{
8163 {
8164 /*
8165 * The change in number of backend slots doesn't need to be WAL-logged
8166 * if archiving is not enabled, as you can't start archive recovery
8167 * with wal_level=minimal anyway. We don't really care about the
8168 * values in pg_control either if wal_level=minimal, but seems better
8169 * to keep them up-to-date to avoid confusion.
8170 */
8172 {
8173 xl_parameter_change xlrec;
8174 XLogRecPtr recptr;
8175
8181 xlrec.wal_level = wal_level;
8184
8186 XLogRegisterData(&xlrec, sizeof(xlrec));
8187
8188 recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
8189 XLogFlush(recptr);
8190 }
8191
8192 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8193
8203
8204 LWLockRelease(ControlFileLock);
8205 }
8206}

References ControlFile, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, wal_log_hints, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 8128 of file xlog.c.

8129{
8130 XLogRecPtr RecPtr;
8131 xl_restore_point xlrec;
8132
8133 xlrec.rp_time = GetCurrentTimestamp();
8134 strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8135
8137 XLogRegisterData(&xlrec, sizeof(xl_restore_point));
8138
8139 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8140
8141 ereport(LOG,
8142 errmsg("restore point \"%s\" created at %X/%08X",
8143 rpName, LSN_FORMAT_ARGS(RecPtr)));
8144
8145 return RecPtr;
8146}
char rp_name[MAXFNAMELEN]
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2609 of file xlog.c.

2610{
2611 XLogRecPtr WriteRqstPtr = asyncXactLSN;
2612 bool sleeping;
2613 bool wakeup = false;
2614 XLogRecPtr prevAsyncXactLSN;
2615
2617 sleeping = XLogCtl->WalWriterSleeping;
2618 prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2619 if (XLogCtl->asyncXactLSN < asyncXactLSN)
2620 XLogCtl->asyncXactLSN = asyncXactLSN;
2622
2623 /*
2624 * If somebody else already called this function with a more aggressive
2625 * LSN, they will have done what we needed (and perhaps more).
2626 */
2627 if (asyncXactLSN <= prevAsyncXactLSN)
2628 return;
2629
2630 /*
2631 * If the WALWriter is sleeping, kick it to make it come out of low-power
2632 * mode, so that this async commit will reach disk within the expected
2633 * amount of time. Otherwise, determine whether it has enough WAL
2634 * available to flush, the same way that XLogBackgroundFlush() does.
2635 */
2636 if (sleeping)
2637 wakeup = true;
2638 else
2639 {
2640 int flushblocks;
2641
2643
2644 flushblocks =
2645 WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2646
2647 if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2648 wakeup = true;
2649 }
2650
2651 if (wakeup)
2652 {
2653 volatile PROC_HDR *procglobal = ProcGlobal;
2654 ProcNumber walwriterProc = procglobal->walwriterProc;
2655
2656 if (walwriterProc != INVALID_PROC_NUMBER)
2657 SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2658 }
2659}
void SetLatch(Latch *latch)
Definition: latch.c:290
#define GetPGProcByNumber(n)
Definition: proc.h:440
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int ProcNumber
Definition: procnumber.h:24
PROC_HDR * ProcGlobal
Definition: proc.c:78
Definition: proc.h:386
ProcNumber walwriterProc
Definition: proc.h:424
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:130

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4960 of file xlog.c.

4961{
4962 bool foundCFile,
4963 foundXLog;
4964 char *allocptr;
4965 int i;
4966 ControlFileData *localControlFile;
4967
4968#ifdef WAL_DEBUG
4969
4970 /*
4971 * Create a memory context for WAL debugging that's exempt from the normal
4972 * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4973 * an allocation fails, but wal_debug is not for production use anyway.
4974 */
4975 if (walDebugCxt == NULL)
4976 {
4978 "WAL Debug",
4980 MemoryContextAllowInCriticalSection(walDebugCxt, true);
4981 }
4982#endif
4983
4984
4985 XLogCtl = (XLogCtlData *)
4986 ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
4987
4988 localControlFile = ControlFile;
4990 ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
4991
4992 if (foundCFile || foundXLog)
4993 {
4994 /* both should be present or neither */
4995 Assert(foundCFile && foundXLog);
4996
4997 /* Initialize local copy of WALInsertLocks */
4999
5000 if (localControlFile)
5001 pfree(localControlFile);
5002 return;
5003 }
5004 memset(XLogCtl, 0, sizeof(XLogCtlData));
5005
5006 /*
5007 * Already have read control file locally, unless in bootstrap mode. Move
5008 * contents into shared memory.
5009 */
5010 if (localControlFile)
5011 {
5012 memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
5013 pfree(localControlFile);
5014 }
5015
5016 /*
5017 * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
5018 * multiple of the alignment for same, so no extra alignment padding is
5019 * needed here.
5020 */
5021 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
5022 XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
5023 allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
5024
5025 for (i = 0; i < XLOGbuffers; i++)
5026 {
5028 }
5029
5030 /* WAL insertion locks. Ensure they're aligned to the full padded size */
5031 allocptr += sizeof(WALInsertLockPadded) -
5032 ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
5034 (WALInsertLockPadded *) allocptr;
5035 allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
5036
5037 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
5038 {
5039 LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT);
5042 }
5043
5044 /*
5045 * Align the start of the page buffers to a full xlog block size boundary.
5046 * This simplifies some calculations in XLOG insertion. It is also
5047 * required for O_DIRECT.
5048 */
5049 allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
5050 XLogCtl->pages = allocptr;
5051 memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
5052
5053 /*
5054 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5055 * in additional info.)
5056 */
5060 XLogCtl->WalWriterSleeping = false;
5061
5068}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:451
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:698
MemoryContext TopMemoryContext
Definition: mcxt.c:166
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:740
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:57
int XLogCacheBlck
Definition: xlog.c:494
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:445
slock_t insertpos_lck
Definition: xlog.c:399
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4910
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4910 of file xlog.c.

4911{
4912 Size size;
4913
4914 /*
4915 * If the value of wal_buffers is -1, use the preferred auto-tune value.
4916 * This isn't an amazingly clean place to do this, but we must wait till
4917 * NBuffers has received its final value, and must do it before using the
4918 * value of XLOGbuffers to do anything important.
4919 *
4920 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4921 * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4922 * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4923 * the matter with PGC_S_OVERRIDE.
4924 */
4925 if (XLOGbuffers == -1)
4926 {
4927 char buf[32];
4928
4929 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4930 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4932 if (XLOGbuffers == -1) /* failed to apply it? */
4933 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4935 }
4936 Assert(XLOGbuffers > 0);
4937
4938 /* XLogCtl */
4939 size = sizeof(XLogCtlData);
4940
4941 /* WAL insertion locks, plus alignment */
4942 size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
4943 /* xlblocks array */
4944 size = add_size(size, mul_size(sizeof(pg_atomic_uint64), XLOGbuffers));
4945 /* extra alignment padding for XLOG I/O buffers */
4946 size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4947 /* and the buffers themselves */
4948 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4949
4950 /*
4951 * Note: we don't count ControlFileData, it comes out of the "slop factor"
4952 * added by CreateSharedMemoryAndSemaphores. This lets us use this
4953 * routine again below to compute the actual allocation size.
4954 */
4955
4956 return size;
4957}
#define Max(x, y)
Definition: c.h:998
@ PGC_S_OVERRIDE
Definition: guc.h:123
@ PGC_POSTMASTER
Definition: guc.h:74
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

References add_size(), Assert(), buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9517 of file xlog.c.

9518{
9520
9521 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9523 LWLockRelease(ControlFileLock);
9524}
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2304 of file xlog.c.

2305{
2306 bool ispartialpage;
2307 bool last_iteration;
2308 bool finishing_seg;
2309 int curridx;
2310 int npages;
2311 int startidx;
2312 uint32 startoffset;
2313
2314 /* We should always be inside a critical section here */
2316
2317 /*
2318 * Update local LogwrtResult (caller probably did this already, but...)
2319 */
2321
2322 /*
2323 * Since successive pages in the xlog cache are consecutively allocated,
2324 * we can usually gather multiple pages together and issue just one
2325 * write() call. npages is the number of pages we have determined can be
2326 * written together; startidx is the cache block index of the first one,
2327 * and startoffset is the file offset at which it should go. The latter
2328 * two variables are only valid when npages > 0, but we must initialize
2329 * all of them to keep the compiler quiet.
2330 */
2331 npages = 0;
2332 startidx = 0;
2333 startoffset = 0;
2334
2335 /*
2336 * Within the loop, curridx is the cache block index of the page to
2337 * consider writing. Begin at the buffer containing the next unwritten
2338 * page, or last partially written page.
2339 */
2341
2342 while (LogwrtResult.Write < WriteRqst.Write)
2343 {
2344 /*
2345 * Make sure we're not ahead of the insert process. This could happen
2346 * if we're passed a bogus WriteRqst.Write that is past the end of the
2347 * last page that's been initialized by AdvanceXLInsertBuffer.
2348 */
2349 XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
2350
2351 if (LogwrtResult.Write >= EndPtr)
2352 elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X",
2354 LSN_FORMAT_ARGS(EndPtr));
2355
2356 /* Advance LogwrtResult.Write to end of current buffer page */
2357 LogwrtResult.Write = EndPtr;
2358 ispartialpage = WriteRqst.Write < LogwrtResult.Write;
2359
2362 {
2363 /*
2364 * Switch to new logfile segment. We cannot have any pending
2365 * pages here (since we dump what we have at segment end).
2366 */
2367 Assert(npages == 0);
2368 if (openLogFile >= 0)
2369 XLogFileClose();
2372 openLogTLI = tli;
2373
2374 /* create/use new log file */
2377 }
2378
2379 /* Make sure we have the current logfile open */
2380 if (openLogFile < 0)
2381 {
2384 openLogTLI = tli;
2387 }
2388
2389 /* Add current page to the set of pending pages-to-dump */
2390 if (npages == 0)
2391 {
2392 /* first of group */
2393 startidx = curridx;
2394 startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
2396 }
2397 npages++;
2398
2399 /*
2400 * Dump the set if this will be the last loop iteration, or if we are
2401 * at the last page of the cache area (since the next page won't be
2402 * contiguous in memory), or if we are at the end of the logfile
2403 * segment.
2404 */
2405 last_iteration = WriteRqst.Write <= LogwrtResult.Write;
2406
2407 finishing_seg = !ispartialpage &&
2408 (startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;
2409
2410 if (last_iteration ||
2411 curridx == XLogCtl->XLogCacheBlck ||
2412 finishing_seg)
2413 {
2414 char *from;
2415 Size nbytes;
2416 Size nleft;
2417 ssize_t written;
2419
2420 /* OK to write the page(s) */
2421 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2422 nbytes = npages * (Size) XLOG_BLCKSZ;
2423 nleft = nbytes;
2424 do
2425 {
2426 errno = 0;
2427
2428 /*
2429 * Measure I/O timing to write WAL data, for pg_stat_io.
2430 */
2432
2433 pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
2434 written = pg_pwrite(openLogFile, from, nleft, startoffset);
2436
2438 IOOP_WRITE, start, 1, written);
2439
2440 if (written <= 0)
2441 {
2442 char xlogfname[MAXFNAMELEN];
2443 int save_errno;
2444
2445 if (errno == EINTR)
2446 continue;
2447
2448 save_errno = errno;
2449 XLogFileName(xlogfname, tli, openLogSegNo,
2451 errno = save_errno;
2452 ereport(PANIC,
2454 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2455 xlogfname, startoffset, nleft)));
2456 }
2457 nleft -= written;
2458 from += written;
2459 startoffset += written;
2460 } while (nleft > 0);
2461
2462 npages = 0;
2463
2464 /*
2465 * If we just wrote the whole last page of a logfile segment,
2466 * fsync the segment immediately. This avoids having to go back
2467 * and re-open prior segments when an fsync request comes along
2468 * later. Doing it here ensures that one and only one backend will
2469 * perform this fsync.
2470 *
2471 * This is also the right place to notify the Archiver that the
2472 * segment is ready to copy to archival storage, and to update the
2473 * timer for archive_timeout, and to signal for a checkpoint if
2474 * too many logfile segments have been used since the last
2475 * checkpoint.
2476 */
2477 if (finishing_seg)
2478 {
2480
2481 /* signal that we need to wakeup walsenders later */
2483
2484 LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2485
2486 if (XLogArchivingActive())
2488
2489 XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
2491
2492 /*
2493 * Request a checkpoint if we've consumed too much xlog since
2494 * the last one. For speed, we first check using the local
2495 * copy of RedoRecPtr, which might be out of date; if it looks
2496 * like a checkpoint is needed, forcibly update RedoRecPtr and
2497 * recheck.
2498 */
2500 {
2501 (void) GetRedoRecPtr();
2504 }
2505 }
2506 }
2507
2508 if (ispartialpage)
2509 {
2510 /* Only asked to write a partial page */
2511 LogwrtResult.Write = WriteRqst.Write;
2512 break;
2513 }
2514 curridx = NextBufIdx(curridx);
2515
2516 /* If flexible, break out of loop as soon as we wrote something */
2517 if (flexible && npages == 0)
2518 break;
2519 }
2520
2521 Assert(npages == 0);
2522
2523 /*
2524 * If asked to flush, do so
2525 */
2526 if (LogwrtResult.Flush < WriteRqst.Flush &&
2528 {
2529 /*
2530 * Could get here without iterating above loop, in which case we might
2531 * have no open file or the wrong one. However, we do not need to
2532 * fsync more than one file.
2533 */
2536 {
2537 if (openLogFile >= 0 &&
2540 XLogFileClose();
2541 if (openLogFile < 0)
2542 {
2545 openLogTLI = tli;
2548 }
2549
2551 }
2552
2553 /* signal that we need to wakeup walsenders later */
2555
2557 }
2558
2559 /*
2560 * Update shared-memory status
2561 *
2562 * We make sure that the shared 'request' values do not fall behind the
2563 * 'result' values. This is not absolutely essential, but it saves some
2564 * code in a couple of places.
2565 */
2572
2573 /*
2574 * We write Write first, bar, then Flush. When reading, the opposite must
2575 * be done (with a matching barrier in between), so that we always see a
2576 * Flush value that trails behind the Write value seen.
2577 */
2581
2582#ifdef USE_ASSERT_CHECKING
2583 {
2587
2593
2594 /* WAL written to disk is always ahead of WAL flushed */
2595 Assert(Write >= Flush);
2596
2597 /* WAL inserted to buffers is always ahead of WAL written */
2598 Assert(Insert >= Write);
2599 }
2600#endif
2601}
void ReserveExternalFD(void)
Definition: fd.c:1220
volatile uint32 CritSectionCount
Definition: globals.c:45
XLogRecPtr Flush
Definition: walreceiver.c:112
XLogRecPtr Write
Definition: walreceiver.c:111
#define WalSndWakeupRequest()
Definition: walsender.h:58
#define EINTR
Definition: win32_port.h:364
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6489
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3634
#define NextBufIdx(idx)
Definition: xlog.c:584
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:8739
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition: xlog.c:2280
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
Definition: xlogarchive.c:492

References Assert(), CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_WRITE, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:67
@ ARCHIVE_MODE_OFF
Definition: xlog.h:65
@ ARCHIVE_MODE_ON
Definition: xlog.h:66

Definition at line 192 of file xlog.c.

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 167 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 160 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 133 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 134 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

bool doPageWrites
static

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 123 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 218 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 225 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 237 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 136 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 116 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 636 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 637 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 161 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 648 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 125 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 127 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 126 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 137 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 128 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 117 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 124 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 129 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 144 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), LogicalConfirmReceivedLocation(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 172 of file xlog.c.

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

char* XLogArchiveCommand = NULL

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 119 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 118 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl