Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 722acf5

Browse files
committed
Handle wraparound during truncation in multixact/members
In pg_multixact/members, relying on modulo-2^32 arithmetic for wraparound handling doesn't work all that well. Because we don't explicitely track wraparound of the allocation counter for members, it is possible that the "live" area exceeds 2^31 entries; trying to remove SLRU segments that are "old" according to the original logic might lead to removal of segments still in use. To fix, have the truncation routine use a tailored SlruScanDirectory callback that keeps track of the live area in actual use; that way, when the live range exceeds 2^31 entries, the oldest segments still live will not get removed untimely. This new SlruScanDir callback needs to take care not to remove segments that are "in the future": if new SLRU segments appear while the truncation is ongoing, make sure we don't remove them. This requires examination of shared memory state to recheck for false positives, but testing suggests that this doesn't cause a problem. The original coding didn't suffer from this pitfall because segments created when truncation is running are never considered to be removable. Per Andres Freund's investigation of bug #8673 reported by Serge Negodyuck.
1 parent 3cff187 commit 722acf5

File tree

3 files changed

+104
-18
lines changed

3 files changed

+104
-18
lines changed

src/backend/access/transam/multixact.c

+85-5
Original file line numberDiff line numberDiff line change
@@ -577,8 +577,13 @@ MultiXactIdSetOldestMember(void)
577577
* another someone else could compute an OldestVisibleMXactId that
578578
* would be after the value we are going to store when we get control
579579
* back. Which would be wrong.
580+
*
581+
* Note that a shared lock is sufficient, because it's enough to stop
582+
* someone from advancing nextMXact; and nobody else could be trying to
583+
* write to our OldestMember entry, only reading (and we assume storing
584+
* it is atomic.)
580585
*/
581-
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
586+
LWLockAcquire(MultiXactGenLock, LW_SHARED);
582587

583588
/*
584589
* We have to beware of the possibility that nextMXact is in the
@@ -1559,7 +1564,7 @@ AtEOXact_MultiXact(void)
15591564

15601565
/*
15611566
* AtPrepare_MultiXact
1562-
* Save multixact state at 2PC tranasction prepare
1567+
* Save multixact state at 2PC transaction prepare
15631568
*
15641569
* In this phase, we only store our OldestMemberMXactId value in the two-phase
15651570
* state file.
@@ -2335,6 +2340,65 @@ GetOldestMultiXactId(void)
23352340
return oldestMXact;
23362341
}
23372342

2343+
/*
2344+
* SlruScanDirectory callback.
2345+
* This callback deletes segments that are outside the range determined by
2346+
* the given page numbers.
2347+
*
2348+
* Both range endpoints are exclusive (that is, segments containing any of
2349+
* those pages are kept.)
2350+
*/
2351+
typedef struct MembersLiveRange
2352+
{
2353+
int rangeStart;
2354+
int rangeEnd;
2355+
} MembersLiveRange;
2356+
2357+
static bool
2358+
SlruScanDirCbRemoveMembers(SlruCtl ctl, char *filename, int segpage,
2359+
void *data)
2360+
{
2361+
MembersLiveRange *range = (MembersLiveRange *) data;
2362+
MultiXactOffset nextOffset;
2363+
2364+
if ((segpage == range->rangeStart) ||
2365+
(segpage == range->rangeEnd))
2366+
return false; /* easy case out */
2367+
2368+
/*
2369+
* To ensure that no segment is spuriously removed, we must keep track
2370+
* of new segments added since the start of the directory scan; to do this,
2371+
* we update our end-of-range point as we run.
2372+
*
2373+
* As an optimization, we can skip looking at shared memory if we know for
2374+
* certain that the current segment must be kept. This is so because
2375+
* nextOffset never decreases, and we never increase rangeStart during any
2376+
* one run.
2377+
*/
2378+
if (!((range->rangeStart > range->rangeEnd &&
2379+
segpage > range->rangeEnd && segpage < range->rangeStart) ||
2380+
(range->rangeStart < range->rangeEnd &&
2381+
(segpage < range->rangeStart || segpage > range->rangeEnd))))
2382+
return false;
2383+
2384+
/*
2385+
* Update our idea of the end of the live range.
2386+
*/
2387+
LWLockAcquire(MultiXactGenLock, LW_SHARED);
2388+
nextOffset = MultiXactState->nextOffset;
2389+
LWLockRelease(MultiXactGenLock);
2390+
range->rangeEnd = MXOffsetToMemberPage(nextOffset);
2391+
2392+
/* Recheck the deletion condition. If it still holds, perform deletion */
2393+
if ((range->rangeStart > range->rangeEnd &&
2394+
segpage > range->rangeEnd && segpage < range->rangeStart) ||
2395+
(range->rangeStart < range->rangeEnd &&
2396+
(segpage < range->rangeStart || segpage > range->rangeEnd)))
2397+
SlruDeleteSegment(ctl, filename);
2398+
2399+
return false; /* keep going */
2400+
}
2401+
23382402
typedef struct mxtruncinfo
23392403
{
23402404
int earliestExistingPage;
@@ -2376,8 +2440,10 @@ void
23762440
TruncateMultiXact(MultiXactId oldestMXact)
23772441
{
23782442
MultiXactOffset oldestOffset;
2443+
MultiXactOffset nextOffset;
23792444
mxtruncinfo trunc;
23802445
MultiXactId earliest;
2446+
MembersLiveRange range;
23812447

23822448
/*
23832449
* Note we can't just plow ahead with the truncation; it's possible that
@@ -2424,9 +2490,23 @@ TruncateMultiXact(MultiXactId oldestMXact)
24242490
SimpleLruTruncate(MultiXactOffsetCtl,
24252491
MultiXactIdToOffsetPage(oldestMXact));
24262492

2427-
/* truncate MultiXactMembers and we're done */
2428-
SimpleLruTruncate(MultiXactMemberCtl,
2429-
MXOffsetToMemberPage(oldestOffset));
2493+
/*
2494+
* To truncate MultiXactMembers, we need to figure out the active page
2495+
* range and delete all files outside that range. The start point is the
2496+
* start of the segment containing the oldest offset; an end point of the
2497+
* segment containing the next offset to use is enough. The end point is
2498+
* updated as MultiXactMember gets extended concurrently, elsewhere.
2499+
*/
2500+
range.rangeStart = MXOffsetToMemberPage(oldestOffset);
2501+
range.rangeStart -= range.rangeStart % SLRU_PAGES_PER_SEGMENT;
2502+
2503+
LWLockAcquire(MultiXactGenLock, LW_SHARED);
2504+
nextOffset = MultiXactState->nextOffset;
2505+
LWLockRelease(MultiXactGenLock);
2506+
2507+
range.rangeEnd = MXOffsetToMemberPage(nextOffset);
2508+
2509+
SlruScanDirectory(MultiXactMemberCtl, SlruScanDirCbRemoveMembers, &range);
24302510
}
24312511

24322512
/*

src/backend/access/transam/slru.c

+18-13
Original file line numberDiff line numberDiff line change
@@ -1210,6 +1210,17 @@ restart:;
12101210
(void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
12111211
}
12121212

1213+
void
1214+
SlruDeleteSegment(SlruCtl ctl, char *filename)
1215+
{
1216+
char path[MAXPGPATH];
1217+
1218+
snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
1219+
ereport(DEBUG2,
1220+
(errmsg("removing file \"%s\"", path)));
1221+
unlink(path);
1222+
}
1223+
12131224
/*
12141225
* SlruScanDirectory callback
12151226
* This callback reports true if there's any segment prior to the one
@@ -1235,16 +1246,10 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data
12351246
static bool
12361247
SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
12371248
{
1238-
char path[MAXPGPATH];
12391249
int cutoffPage = *(int *) data;
12401250

12411251
if (ctl->PagePrecedes(segpage, cutoffPage))
1242-
{
1243-
snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
1244-
ereport(DEBUG2,
1245-
(errmsg("removing file \"%s\"", path)));
1246-
unlink(path);
1247-
}
1252+
SlruDeleteSegment(ctl, filename);
12481253

12491254
return false; /* keep going */
12501255
}
@@ -1256,12 +1261,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
12561261
bool
12571262
SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
12581263
{
1259-
char path[MAXPGPATH];
1260-
1261-
snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
1262-
ereport(DEBUG2,
1263-
(errmsg("removing file \"%s\"", path)));
1264-
unlink(path);
1264+
SlruDeleteSegment(ctl, filename);
12651265

12661266
return false; /* keep going */
12671267
}
@@ -1272,6 +1272,11 @@ SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
12721272
* If the callback returns true, the scan is stopped. The last return value
12731273
* from the callback is returned.
12741274
*
1275+
* The callback receives the following arguments: 1. the SlruCtl struct for the
1276+
* slru being truncated; 2. the filename being considered; 3. the page number
1277+
* for the first page of that file; 4. a pointer to the opaque data given to us
1278+
* by the caller.
1279+
*
12751280
* Note that the ordering in which the directory is scanned is not guaranteed.
12761281
*
12771282
* Note that no locking is applied.

src/include/access/slru.h

+1
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);
150150
typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage,
151151
void *data);
152152
extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data);
153+
extern void SlruDeleteSegment(SlruCtl ctl, char *filename);
153154

154155
/* SlruScanDirectory public callbacks */
155156
extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,

0 commit comments

Comments
 (0)