Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9e4f914

Browse files
committed
Fix replay of create database records on standby
Crash recovery on standby may encounter missing directories when replaying database-creation WAL records. Prior to this patch, the standby would fail to recover in such a case; however, the directories could be legitimately missing. Consider the following sequence of commands: CREATE DATABASE DROP DATABASE DROP TABLESPACE If, after replaying the last WAL record and removing the tablespace directory, the standby crashes and has to replay the create database record again, crash recovery must be able to continue. A fix for this problem was already attempted in 49d9cfc, but it was reverted because of design issues. This new version is based on Robert Haas' proposal: any missing tablespaces are created during recovery before reaching consistency. Tablespaces are created as real directories, and should be deleted by later replay. CheckRecoveryConsistency ensures they have disappeared. The problems detected by this new code are reported as PANIC, except when allow_in_place_tablespaces is set to ON, in which case they are WARNING. Apart from making tests possible, this gives users an escape hatch in case things don't go as planned. Author: Kyotaro Horiguchi <[email protected]> Author: Asim R Praveen <[email protected]> Author: Paul Guo <[email protected]> Reviewed-by: Anastasia Lubennikova <[email protected]> (older versions) Reviewed-by: Fujii Masao <[email protected]> (older versions) Reviewed-by: Michaël Paquier <[email protected]> Diagnosed-by: Paul Guo <[email protected]> Discussion: https://postgr.es/m/CAEET0ZGx9AvioViLf7nbR_8tH9-=27DN5xWJ2P9-ROH16e4JUA@mail.gmail.com
1 parent d396606 commit 9e4f914

File tree

4 files changed

+305
-31
lines changed

4 files changed

+305
-31
lines changed

src/backend/access/transam/xlogrecovery.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "access/xlogutils.h"
4343
#include "catalog/pg_control.h"
4444
#include "commands/tablespace.h"
45+
#include "common/file_utils.h"
4546
#include "miscadmin.h"
4647
#include "pgstat.h"
4748
#include "postmaster/bgwriter.h"
@@ -2008,6 +2009,47 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
20082009
}
20092010
}
20102011

2012+
/*
2013+
* Verify that, in non-test mode, ./pg_tblspc doesn't contain any real
2014+
* directories.
2015+
*
2016+
* Replay of database creation XLOG records for databases that were later
2017+
* dropped can create fake directories in pg_tblspc. By the time consistency
2018+
* is reached these directories should have been removed; here we verify
2019+
* that this did indeed happen. This is to be called at the point where
2020+
* consistent state is reached.
2021+
*
2022+
* allow_in_place_tablespaces turns the PANIC into a WARNING, which is
2023+
* useful for testing purposes, and also allows for an escape hatch in case
2024+
* things go south.
2025+
*/
2026+
static void
2027+
CheckTablespaceDirectory(void)
2028+
{
2029+
DIR *dir;
2030+
struct dirent *de;
2031+
2032+
dir = AllocateDir("pg_tblspc");
2033+
while ((de = ReadDir(dir, "pg_tblspc")) != NULL)
2034+
{
2035+
char path[MAXPGPATH + 10];
2036+
2037+
/* Skip entries of non-oid names */
2038+
if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
2039+
continue;
2040+
2041+
snprintf(path, sizeof(path), "pg_tblspc/%s", de->d_name);
2042+
2043+
if (get_dirent_type(path, de, false, ERROR) != PGFILETYPE_LNK)
2044+
ereport(allow_in_place_tablespaces ? WARNING : PANIC,
2045+
(errcode(ERRCODE_DATA_CORRUPTED),
2046+
errmsg("unexpected directory entry \"%s\" found in %s",
2047+
de->d_name, "pg_tblspc/"),
2048+
errdetail("All directory entries in pg_tblspc/ should be symbolic links."),
2049+
errhint("Remove those directories, or set allow_in_place_tablespaces to ON transiently to let recovery complete.")));
2050+
}
2051+
}
2052+
20112053
/*
20122054
* Checks if recovery has reached a consistent state. When consistency is
20132055
* reached and we have a valid starting standby snapshot, tell postmaster
@@ -2068,6 +2110,14 @@ CheckRecoveryConsistency(void)
20682110
*/
20692111
XLogCheckInvalidPages();
20702112

2113+
/*
2114+
* Check that pg_tblspc doesn't contain any real directories. Replay
2115+
* of Database/CREATE_* records may have created ficticious tablespace
2116+
* directories that should have been removed by the time consistency
2117+
* was reached.
2118+
*/
2119+
CheckTablespaceDirectory();
2120+
20712121
reachedConsistency = true;
20722122
ereport(LOG,
20732123
(errmsg("consistent recovery state reached at %X/%X",

src/backend/commands/dbcommands.c

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "access/tableam.h"
3131
#include "access/xact.h"
3232
#include "access/xloginsert.h"
33+
#include "access/xlogrecovery.h"
3334
#include "access/xlogutils.h"
3435
#include "catalog/catalog.h"
3536
#include "catalog/dependency.h"
@@ -47,6 +48,7 @@
4748
#include "commands/defrem.h"
4849
#include "commands/seclabel.h"
4950
#include "commands/tablespace.h"
51+
#include "common/file_perm.h"
5052
#include "mb/pg_wchar.h"
5153
#include "miscadmin.h"
5254
#include "pgstat.h"
@@ -62,6 +64,7 @@
6264
#include "utils/acl.h"
6365
#include "utils/builtins.h"
6466
#include "utils/fmgroids.h"
67+
#include "utils/guc.h"
6568
#include "utils/pg_locale.h"
6669
#include "utils/relmapper.h"
6770
#include "utils/snapmgr.h"
@@ -135,6 +138,7 @@ static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid,
135138
bool isRedo);
136139
static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid,
137140
Oid dst_tsid);
141+
static void recovery_create_dbdir(char *path, bool only_tblspc);
138142

139143
/*
140144
* Create a new database using the WAL_LOG strategy.
@@ -2995,6 +2999,45 @@ get_database_name(Oid dbid)
29952999
return result;
29963000
}
29973001

3002+
/*
3003+
* recovery_create_dbdir()
3004+
*
3005+
* During recovery, there's a case where we validly need to recover a missing
3006+
* tablespace directory so that recovery can continue. This happens when
3007+
* recovery wants to create a database but the holding tablespace has been
3008+
* removed before the server stopped. Since we expect that the directory will
3009+
* be gone before reaching recovery consistency, and we have no knowledge about
3010+
* the tablespace other than its OID here, we create a real directory under
3011+
* pg_tblspc here instead of restoring the symlink.
3012+
*
3013+
* If only_tblspc is true, then the requested directory must be in pg_tblspc/
3014+
*/
3015+
static void
3016+
recovery_create_dbdir(char *path, bool only_tblspc)
3017+
{
3018+
struct stat st;
3019+
3020+
Assert(RecoveryInProgress());
3021+
3022+
if (stat(path, &st) == 0)
3023+
return;
3024+
3025+
if (only_tblspc && strstr(path, "pg_tblspc/") == NULL)
3026+
elog(PANIC, "requested to created invalid directory: %s", path);
3027+
3028+
if (reachedConsistency && !allow_in_place_tablespaces)
3029+
ereport(PANIC,
3030+
errmsg("missing directory \"%s\"", path));
3031+
3032+
elog(reachedConsistency ? WARNING : DEBUG1,
3033+
"creating missing directory: %s", path);
3034+
3035+
if (pg_mkdir_p(path, pg_dir_create_mode) != 0)
3036+
ereport(PANIC,
3037+
errmsg("could not create missing directory \"%s\": %m", path));
3038+
}
3039+
3040+
29983041
/*
29993042
* DATABASE resource manager's routines
30003043
*/
@@ -3012,6 +3055,7 @@ dbase_redo(XLogReaderState *record)
30123055
(xl_dbase_create_file_copy_rec *) XLogRecGetData(record);
30133056
char *src_path;
30143057
char *dst_path;
3058+
char *parent_path;
30153059
struct stat st;
30163060

30173061
src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
@@ -3031,6 +3075,33 @@ dbase_redo(XLogReaderState *record)
30313075
dst_path)));
30323076
}
30333077

3078+
/*
3079+
* If the parent of the target path doesn't exist, create it now. This
3080+
* enables us to create the target underneath later.
3081+
*/
3082+
parent_path = pstrdup(dst_path);
3083+
get_parent_directory(parent_path);
3084+
if (stat(parent_path, &st) < 0)
3085+
{
3086+
if (errno != ENOENT)
3087+
ereport(FATAL,
3088+
errmsg("could not stat directory \"%s\": %m",
3089+
dst_path));
3090+
3091+
/* create the parent directory if needed and valid */
3092+
recovery_create_dbdir(parent_path, true);
3093+
}
3094+
pfree(parent_path);
3095+
3096+
/*
3097+
* There's a case where the copy source directory is missing for the
3098+
* same reason above. Create the emtpy source directory so that
3099+
* copydir below doesn't fail. The directory will be dropped soon by
3100+
* recovery.
3101+
*/
3102+
if (stat(src_path, &st) < 0 && errno == ENOENT)
3103+
recovery_create_dbdir(src_path, false);
3104+
30343105
/*
30353106
* Force dirty buffers out to disk, to ensure source database is
30363107
* up-to-date for the copy.
@@ -3055,9 +3126,15 @@ dbase_redo(XLogReaderState *record)
30553126
xl_dbase_create_wal_log_rec *xlrec =
30563127
(xl_dbase_create_wal_log_rec *) XLogRecGetData(record);
30573128
char *dbpath;
3129+
char *parent_path;
30583130

30593131
dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
30603132

3133+
/* create the parent directory if needed and valid */
3134+
parent_path = pstrdup(dbpath);
3135+
get_parent_directory(parent_path);
3136+
recovery_create_dbdir(parent_path, true);
3137+
30613138
/* Create the database directory with the version file. */
30623139
CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id,
30633140
true);

src/backend/commands/tablespace.c

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,6 @@ TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
156156
/* Directory creation failed? */
157157
if (MakePGDirectory(dir) < 0)
158158
{
159-
char *parentdir;
160-
161159
/* Failure other than not exists or not in WAL replay? */
162160
if (errno != ENOENT || !isRedo)
163161
ereport(ERROR,
@@ -166,36 +164,16 @@ TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
166164
dir)));
167165

168166
/*
169-
* Parent directories are missing during WAL replay, so
170-
* continue by creating simple parent directories rather
171-
* than a symlink.
167+
* During WAL replay, it's conceivable that several levels
168+
* of directories are missing if tablespaces are dropped
169+
* further ahead of the WAL stream than we're currently
170+
* replaying. An easy way forward is to create them as
171+
* plain directories and hope they are removed by further
172+
* WAL replay if necessary. If this also fails, there is
173+
* trouble we cannot get out of, so just report that and
174+
* bail out.
172175
*/
173-
174-
/* create two parents up if not exist */
175-
parentdir = pstrdup(dir);
176-
get_parent_directory(parentdir);
177-
get_parent_directory(parentdir);
178-
/* Can't create parent and it doesn't already exist? */
179-
if (MakePGDirectory(parentdir) < 0 && errno != EEXIST)
180-
ereport(ERROR,
181-
(errcode_for_file_access(),
182-
errmsg("could not create directory \"%s\": %m",
183-
parentdir)));
184-
pfree(parentdir);
185-
186-
/* create one parent up if not exist */
187-
parentdir = pstrdup(dir);
188-
get_parent_directory(parentdir);
189-
/* Can't create parent and it doesn't already exist? */
190-
if (MakePGDirectory(parentdir) < 0 && errno != EEXIST)
191-
ereport(ERROR,
192-
(errcode_for_file_access(),
193-
errmsg("could not create directory \"%s\": %m",
194-
parentdir)));
195-
pfree(parentdir);
196-
197-
/* Create database directory */
198-
if (MakePGDirectory(dir) < 0)
176+
if (pg_mkdir_p(dir, pg_dir_create_mode) < 0)
199177
ereport(ERROR,
200178
(errcode_for_file_access(),
201179
errmsg("could not create directory \"%s\": %m",

0 commit comments

Comments
 (0)