Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 969d7cd

Browse files
committed
Install a "dead man switch" to allow the postmaster to detect cases where
a backend has done exit(0) or exit(1) without having disengaged itself from shared memory. We are at risk for this whenever third-party code is loaded into a backend, since such code might not know it's supposed to go through proc_exit() instead. Also, it is reported that under Windows there are ways to externally kill a process that cause the status code returned to the postmaster to be indistinguishable from a voluntary exit (thank you, Microsoft). If this does happen then the system is probably hosed --- for instance, the dead session might still be holding locks. So the best recovery method is to treat this like a backend crash. The dead man switch is armed for a particular child process when it acquires a regular PGPROC, and disarmed when the PGPROC is released; these should be the first and last touches of shared memory resources in a backend, or close enough anyway. This choice means there is no coverage for auxiliary processes, but I doubt we need that, since they shouldn't be executing any user-provided code anyway. This patch also improves the management of the EXEC_BACKEND ShmemBackendArray array a bit, by reducing search costs. Although this problem is of long standing, the lack of field complaints seems to mean it's not critical enough to risk back-patching; at least not till we get some more testing of this mechanism.
1 parent 8f34811 commit 969d7cd

File tree

8 files changed

+280
-92
lines changed

8 files changed

+280
-92
lines changed

src/backend/postmaster/postmaster.c

+97-72
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*
3838
*
3939
* IDENTIFICATION
40-
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.580 2009/05/04 02:46:36 tgl Exp $
40+
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.581 2009/05/05 19:59:00 tgl Exp $
4141
*
4242
* NOTES
4343
*
@@ -135,12 +135,14 @@
135135
* Also, "dead_end" children are in it: these are children launched just
136136
* for the purpose of sending a friendly rejection message to a would-be
137137
* client. We must track them because they are attached to shared memory,
138-
* but we know they will never become live backends.
138+
* but we know they will never become live backends. dead_end children are
139+
* not assigned a PMChildSlot.
139140
*/
140141
typedef struct bkend
141142
{
142143
pid_t pid; /* process id of backend */
143144
long cancel_key; /* cancel key for cancels for this backend */
145+
int child_slot; /* PMChildSlot for this backend, if any */
144146
bool is_autovacuum; /* is it an autovacuum process? */
145147
bool dead_end; /* is it going to send an error and quit? */
146148
Dlelem elem; /* list link in BackendList */
@@ -149,15 +151,6 @@ typedef struct bkend
149151
static Dllist *BackendList;
150152

151153
#ifdef EXEC_BACKEND
152-
/*
153-
* Number of entries in the shared-memory backend table. This table is used
154-
* only for sending cancels, and therefore only includes children we allow
155-
* cancels on: regular backends and autovac workers. In particular we exclude
156-
* dead_end children, allowing the table to have a known maximum size, to wit
157-
* the same too-many-children limit enforced by canAcceptConnections().
158-
*/
159-
#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
160-
161154
static Backend *ShmemBackendArray;
162155
#endif
163156

@@ -404,6 +397,7 @@ typedef struct
404397
char DataDir[MAXPGPATH];
405398
int ListenSocket[MAXLISTEN];
406399
long MyCancelKey;
400+
int MyPMChildSlot;
407401
unsigned long UsedShmemSegID;
408402
void *UsedShmemSegAddr;
409403
slock_t *ShmemLock;
@@ -413,6 +407,7 @@ typedef struct
413407
slock_t *ProcStructLock;
414408
PROC_HDR *ProcGlobal;
415409
PGPROC *AuxiliaryProcs;
410+
PMSignalData *PMSignalState;
416411
InheritableSocket pgStatSock;
417412
pid_t PostmasterPid;
418413
TimestampTz PgStartTime;
@@ -443,7 +438,7 @@ static bool save_backend_variables(BackendParameters * param, Port *port,
443438
#endif
444439

445440
static void ShmemBackendArrayAdd(Backend *bn);
446-
static void ShmemBackendArrayRemove(pid_t pid);
441+
static void ShmemBackendArrayRemove(Backend *bn);
447442
#endif /* EXEC_BACKEND */
448443

449444
#define StartupDataBase() StartChildProcess(StartupProcess)
@@ -1771,7 +1766,7 @@ processCancelRequest(Port *port, void *pkt)
17711766
{
17721767
bp = (Backend *) DLE_VAL(curr);
17731768
#else
1774-
for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1769+
for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
17751770
{
17761771
bp = (Backend *) &ShmemBackendArray[i];
17771772
#endif
@@ -1836,10 +1831,10 @@ canAcceptConnections(void)
18361831
* MaxBackends limit is enforced when a new backend tries to join the
18371832
* shared-inval backend array.
18381833
*
1839-
* In the EXEC_BACKEND case, the limit here must match the size of the
1840-
* ShmemBackendArray, since all these processes will have cancel codes.
1834+
* The limit here must match the sizes of the per-child-process arrays;
1835+
* see comments for MaxLivePostmasterChildren().
18411836
*/
1842-
if (CountChildren() >= 2 * MaxBackends)
1837+
if (CountChildren() >= MaxLivePostmasterChildren())
18431838
return CAC_TOOMANY;
18441839

18451840
return CAC_OK;
@@ -2439,8 +2434,8 @@ CleanupBackend(int pid,
24392434
/*
24402435
* If a backend dies in an ugly way then we must signal all other backends
24412436
* to quickdie. If exit status is zero (normal) or one (FATAL exit), we
2442-
* assume everything is all right and simply remove the backend from the
2443-
* active backend list.
2437+
* assume everything is all right and proceed to remove the backend from
2438+
* the active backend list.
24442439
*/
24452440
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
24462441
{
@@ -2454,10 +2449,21 @@ CleanupBackend(int pid,
24542449

24552450
if (bp->pid == pid)
24562451
{
2457-
#ifdef EXEC_BACKEND
24582452
if (!bp->dead_end)
2459-
ShmemBackendArrayRemove(pid);
2453+
{
2454+
if (!ReleasePostmasterChildSlot(bp->child_slot))
2455+
{
2456+
/*
2457+
* Uh-oh, the child failed to clean itself up. Treat
2458+
* as a crash after all.
2459+
*/
2460+
HandleChildCrash(pid, exitstatus, _("server process"));
2461+
return;
2462+
}
2463+
#ifdef EXEC_BACKEND
2464+
ShmemBackendArrayRemove(bp);
24602465
#endif
2466+
}
24612467
DLRemove(curr);
24622468
free(bp);
24632469
break;
@@ -2500,10 +2506,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
25002506
/*
25012507
* Found entry for freshly-dead backend, so remove it.
25022508
*/
2503-
#ifdef EXEC_BACKEND
25042509
if (!bp->dead_end)
2505-
ShmemBackendArrayRemove(pid);
2510+
{
2511+
(void) ReleasePostmasterChildSlot(bp->child_slot);
2512+
#ifdef EXEC_BACKEND
2513+
ShmemBackendArrayRemove(bp);
25062514
#endif
2515+
}
25072516
DLRemove(curr);
25082517
free(bp);
25092518
/* Keep looping so we can signal remaining backends */
@@ -2931,14 +2940,7 @@ BackendStartup(Port *port)
29312940
pid_t pid;
29322941

29332942
/*
2934-
* Compute the cancel key that will be assigned to this backend. The
2935-
* backend will have its own copy in the forked-off process' value of
2936-
* MyCancelKey, so that it can transmit the key to the frontend.
2937-
*/
2938-
MyCancelKey = PostmasterRandom();
2939-
2940-
/*
2941-
* Make room for backend data structure. Better before the fork() so we
2943+
* Create backend data structure. Better before the fork() so we
29422944
* can handle failure cleanly.
29432945
*/
29442946
bn = (Backend *) malloc(sizeof(Backend));
@@ -2950,8 +2952,26 @@ BackendStartup(Port *port)
29502952
return STATUS_ERROR;
29512953
}
29522954

2955+
/*
2956+
* Compute the cancel key that will be assigned to this backend. The
2957+
* backend will have its own copy in the forked-off process' value of
2958+
* MyCancelKey, so that it can transmit the key to the frontend.
2959+
*/
2960+
MyCancelKey = PostmasterRandom();
2961+
bn->cancel_key = MyCancelKey;
2962+
29532963
/* Pass down canAcceptConnections state */
29542964
port->canAcceptConnections = canAcceptConnections();
2965+
bn->dead_end = (port->canAcceptConnections != CAC_OK &&
2966+
port->canAcceptConnections != CAC_WAITBACKUP);
2967+
2968+
/*
2969+
* Unless it's a dead_end child, assign it a child slot number
2970+
*/
2971+
if (!bn->dead_end)
2972+
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
2973+
else
2974+
bn->child_slot = 0;
29552975

29562976
#ifdef EXEC_BACKEND
29572977
pid = backend_forkexec(port);
@@ -3009,10 +3029,7 @@ BackendStartup(Port *port)
30093029
* of backends.
30103030
*/
30113031
bn->pid = pid;
3012-
bn->cancel_key = MyCancelKey;
30133032
bn->is_autovacuum = false;
3014-
bn->dead_end = (port->canAcceptConnections != CAC_OK &&
3015-
port->canAcceptConnections != CAC_WAITBACKUP);
30163033
DLInitElem(&bn->elem, bn);
30173034
DLAddHead(BackendList, &bn->elem);
30183035
#ifdef EXEC_BACKEND
@@ -4271,23 +4288,26 @@ StartAutovacuumWorker(void)
42714288
*/
42724289
if (canAcceptConnections() == CAC_OK)
42734290
{
4274-
/*
4275-
* Compute the cancel key that will be assigned to this session. We
4276-
* probably don't need cancel keys for autovac workers, but we'd
4277-
* better have something random in the field to prevent unfriendly
4278-
* people from sending cancels to them.
4279-
*/
4280-
MyCancelKey = PostmasterRandom();
4281-
42824291
bn = (Backend *) malloc(sizeof(Backend));
42834292
if (bn)
42844293
{
4294+
/*
4295+
* Compute the cancel key that will be assigned to this session. We
4296+
* probably don't need cancel keys for autovac workers, but we'd
4297+
* better have something random in the field to prevent unfriendly
4298+
* people from sending cancels to them.
4299+
*/
4300+
MyCancelKey = PostmasterRandom();
4301+
bn->cancel_key = MyCancelKey;
4302+
4303+
/* Autovac workers are not dead_end and need a child slot */
4304+
bn->dead_end = false;
4305+
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4306+
42854307
bn->pid = StartAutoVacWorker();
42864308
if (bn->pid > 0)
42874309
{
4288-
bn->cancel_key = MyCancelKey;
42894310
bn->is_autovacuum = true;
4290-
bn->dead_end = false;
42914311
DLInitElem(&bn->elem, bn);
42924312
DLAddHead(BackendList, &bn->elem);
42934313
#ifdef EXEC_BACKEND
@@ -4353,6 +4373,24 @@ CreateOptsFile(int argc, char *argv[], char *fullprogname)
43534373
}
43544374

43554375

4376+
/*
4377+
* MaxLivePostmasterChildren
4378+
*
4379+
* This reports the number of entries needed in per-child-process arrays
4380+
* (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
4381+
* These arrays include regular backends and autovac workers, but not special
4382+
* children nor dead_end children. This allows the arrays to have a fixed
4383+
* maximum size, to wit the same too-many-children limit enforced by
4384+
* canAcceptConnections(). The exact value isn't too critical as long as
4385+
* it's more than MaxBackends.
4386+
*/
4387+
int
4388+
MaxLivePostmasterChildren(void)
4389+
{
4390+
return 2 * MaxBackends;
4391+
}
4392+
4393+
43564394
#ifdef EXEC_BACKEND
43574395

43584396
/*
@@ -4364,6 +4402,7 @@ extern LWLock *LWLockArray;
43644402
extern slock_t *ProcStructLock;
43654403
extern PROC_HDR *ProcGlobal;
43664404
extern PGPROC *AuxiliaryProcs;
4405+
extern PMSignalData *PMSignalState;
43674406
extern int pgStatSock;
43684407

43694408
#ifndef WIN32
@@ -4395,6 +4434,7 @@ save_backend_variables(BackendParameters * param, Port *port,
43954434
memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
43964435

43974436
param->MyCancelKey = MyCancelKey;
4437+
param->MyPMChildSlot = MyPMChildSlot;
43984438

43994439
param->UsedShmemSegID = UsedShmemSegID;
44004440
param->UsedShmemSegAddr = UsedShmemSegAddr;
@@ -4407,6 +4447,7 @@ save_backend_variables(BackendParameters * param, Port *port,
44074447
param->ProcStructLock = ProcStructLock;
44084448
param->ProcGlobal = ProcGlobal;
44094449
param->AuxiliaryProcs = AuxiliaryProcs;
4450+
param->PMSignalState = PMSignalState;
44104451
write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid);
44114452

44124453
param->PostmasterPid = PostmasterPid;
@@ -4601,6 +4642,7 @@ restore_backend_variables(BackendParameters * param, Port *port)
46014642
memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
46024643

46034644
MyCancelKey = param->MyCancelKey;
4645+
MyPMChildSlot = param->MyPMChildSlot;
46044646

46054647
UsedShmemSegID = param->UsedShmemSegID;
46064648
UsedShmemSegAddr = param->UsedShmemSegAddr;
@@ -4613,6 +4655,7 @@ restore_backend_variables(BackendParameters * param, Port *port)
46134655
ProcStructLock = param->ProcStructLock;
46144656
ProcGlobal = param->ProcGlobal;
46154657
AuxiliaryProcs = param->AuxiliaryProcs;
4658+
PMSignalState = param->PMSignalState;
46164659
read_inheritable_socket(&pgStatSock, &param->pgStatSock);
46174660

46184661
PostmasterPid = param->PostmasterPid;
@@ -4642,7 +4685,7 @@ restore_backend_variables(BackendParameters * param, Port *port)
46424685
Size
46434686
ShmemBackendArraySize(void)
46444687
{
4645-
return mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(Backend));
4688+
return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
46464689
}
46474690

46484691
void
@@ -4658,41 +4701,23 @@ ShmemBackendArrayAllocation(void)
46584701
static void
46594702
ShmemBackendArrayAdd(Backend *bn)
46604703
{
4661-
int i;
4662-
4663-
/* Find an empty slot */
4664-
for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
4665-
{
4666-
if (ShmemBackendArray[i].pid == 0)
4667-
{
4668-
ShmemBackendArray[i] = *bn;
4669-
return;
4670-
}
4671-
}
4704+
/* The array slot corresponding to my PMChildSlot should be free */
4705+
int i = bn->child_slot - 1;
46724706

4673-
ereport(FATAL,
4674-
(errmsg_internal("no free slots in shmem backend array")));
4707+
Assert(ShmemBackendArray[i].pid == 0);
4708+
ShmemBackendArray[i] = *bn;
46754709
}
46764710

46774711
static void
4678-
ShmemBackendArrayRemove(pid_t pid)
4712+
ShmemBackendArrayRemove(Backend *bn)
46794713
{
4680-
int i;
4681-
4682-
for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
4683-
{
4684-
if (ShmemBackendArray[i].pid == pid)
4685-
{
4686-
/* Mark the slot as empty */
4687-
ShmemBackendArray[i].pid = 0;
4688-
return;
4689-
}
4690-
}
4714+
int i = bn->child_slot - 1;
46914715

4692-
ereport(WARNING,
4693-
(errmsg_internal("could not find backend entry with pid %d",
4694-
(int) pid)));
4716+
Assert(ShmemBackendArray[i].pid == bn->pid);
4717+
/* Mark the slot as empty */
4718+
ShmemBackendArray[i].pid = 0;
46954719
}
4720+
46964721
#endif /* EXEC_BACKEND */
46974722

46984723

src/backend/storage/ipc/ipci.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.99 2009/01/03 17:08:39 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.100 2009/05/05 19:59:00 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -111,6 +111,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
111111
size = add_size(size, ProcArrayShmemSize());
112112
size = add_size(size, BackendStatusShmemSize());
113113
size = add_size(size, SInvalShmemSize());
114+
size = add_size(size, PMSignalShmemSize());
114115
size = add_size(size, BgWriterShmemSize());
115116
size = add_size(size, AutoVacuumShmemSize());
116117
size = add_size(size, BTreeShmemSize());
@@ -206,7 +207,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
206207
/*
207208
* Set up interprocess signaling mechanisms
208209
*/
209-
PMSignalInit();
210+
PMSignalShmemInit();
210211
BgWriterShmemInit();
211212
AutoVacuumShmemInit();
212213

0 commit comments

Comments
 (0)