Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a4015ec

Browse files
author
Amit Kapila
committed
Fix the logical replication timeout during large transactions.
The problem is that we don't send keep-alive messages for a long time while processing large transactions during logical replication where we don't send any data of such transactions. This can happen when the table modified in the transaction is not published or because all the changes got filtered. We do try to send the keep_alive if necessary at the end of the transaction (via WalSndWriteData()) but by that time the subscriber-side can timeout and exit. To fix this we try to send the keepalive message if required after processing certain threshold of changes. Reported-by: Fabrice Chapuis Author: Wang wei and Amit Kapila Reviewed By: Masahiko Sawada, Euler Taveira, Hou Zhijie, Hayato Kuroda Backpatch-through: 10 Discussion: https://postgr.es/m/CAA5-nLARN7-3SLU_QUxfy510pmrYK6JJb=bk3hcgemAM_pAv+w@mail.gmail.com
1 parent 5951ad1 commit a4015ec

File tree

4 files changed

+79
-6
lines changed

4 files changed

+79
-6
lines changed

src/backend/replication/logical/logical.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool i
589589

590590
/* set output state */
591591
ctx->accept_writes = false;
592+
ctx->end_xact = false;
592593

593594
/* do the actual work: call callback */
594595
ctx->callbacks.startup_cb(ctx, opt, is_init);
@@ -614,6 +615,7 @@ shutdown_cb_wrapper(LogicalDecodingContext *ctx)
614615

615616
/* set output state */
616617
ctx->accept_writes = false;
618+
ctx->end_xact = false;
617619

618620
/* do the actual work: call callback */
619621
ctx->callbacks.shutdown_cb(ctx);
@@ -647,6 +649,7 @@ begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
647649
ctx->accept_writes = true;
648650
ctx->write_xid = txn->xid;
649651
ctx->write_location = txn->first_lsn;
652+
ctx->end_xact = false;
650653

651654
/* do the actual work: call callback */
652655
ctx->callbacks.begin_cb(ctx, txn);
@@ -676,6 +679,7 @@ commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
676679
ctx->accept_writes = true;
677680
ctx->write_xid = txn->xid;
678681
ctx->write_location = txn->end_lsn; /* points to the end of the record */
682+
ctx->end_xact = true;
679683

680684
/* do the actual work: call callback */
681685
ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
@@ -713,6 +717,8 @@ change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
713717
*/
714718
ctx->write_location = change->lsn;
715719

720+
ctx->end_xact = false;
721+
716722
ctx->callbacks.change_cb(ctx, txn, relation, change);
717723

718724
/* Pop the error context stack */
@@ -737,6 +743,7 @@ filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id)
737743

738744
/* set output state */
739745
ctx->accept_writes = false;
746+
ctx->end_xact = false;
740747

741748
/* do the actual work: call callback */
742749
ret = ctx->callbacks.filter_by_origin_cb(ctx, origin_id);
@@ -772,6 +779,7 @@ message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
772779
ctx->accept_writes = true;
773780
ctx->write_xid = txn != NULL ? txn->xid : InvalidTransactionId;
774781
ctx->write_location = message_lsn;
782+
ctx->end_xact = false;
775783

776784
/* do the actual work: call callback */
777785
ctx->callbacks.message_cb(ctx, txn, message_lsn, transactional, prefix,

src/backend/replication/pgoutput/pgoutput.c

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ static bool publications_valid;
4848
static List *LoadPublications(List *pubnames);
4949
static void publication_invalidation_cb(Datum arg, int cacheid,
5050
uint32 hashvalue);
51+
static void update_replication_progress(LogicalDecodingContext *ctx);
5152

5253
/* Entry in the map used to remember which relation schemas we sent. */
5354
typedef struct RelationSyncEntry
@@ -246,7 +247,7 @@ static void
246247
pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
247248
XLogRecPtr commit_lsn)
248249
{
249-
OutputPluginUpdateProgress(ctx);
250+
update_replication_progress(ctx);
250251

251252
OutputPluginPrepareWrite(ctx, true);
252253
logicalrep_write_commit(ctx->out, txn, commit_lsn);
@@ -264,6 +265,8 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
264265
MemoryContext old;
265266
RelationSyncEntry *relentry;
266267

268+
update_replication_progress(ctx);
269+
267270
if (!is_publishable_relation(relation))
268271
return;
269272

@@ -628,3 +631,36 @@ rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue)
628631
while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL)
629632
entry->replicate_valid = false;
630633
}
634+
635+
/*
636+
* Try to update progress and send a keepalive message if too many changes were
637+
* processed.
638+
*
639+
* For a large transaction, if we don't send any change to the downstream for a
640+
* long time (exceeds the wal_receiver_timeout of standby) then it can timeout.
641+
* This can happen when all or most of the changes are not published.
642+
*/
643+
static void
644+
update_replication_progress(LogicalDecodingContext *ctx)
645+
{
646+
static int changes_count = 0;
647+
648+
/*
649+
* We don't want to try sending a keepalive message after processing each
650+
* change as that can have overhead. Tests revealed that there is no
651+
* noticeable overhead in doing it after continuously processing 100 or so
652+
* changes.
653+
*/
654+
#define CHANGES_THRESHOLD 100
655+
656+
/*
657+
* If we are at the end of transaction LSN, update progress tracking.
658+
* Otherwise, after continuously processing CHANGES_THRESHOLD changes, we
659+
* try to send a keepalive message if required.
660+
*/
661+
if (ctx->end_xact || ++changes_count >= CHANGES_THRESHOLD)
662+
{
663+
OutputPluginUpdateProgress(ctx);
664+
changes_count = 0;
665+
}
666+
}

src/backend/replication/walsender.c

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ static void ProcessStandbyMessage(void);
243243
static void ProcessStandbyReplyMessage(void);
244244
static void ProcessStandbyHSFeedbackMessage(void);
245245
static void ProcessRepliesIfAny(void);
246+
static void ProcessPendingWrites(void);
246247
static void WalSndKeepalive(bool requestReply);
247248
static void WalSndKeepaliveIfNecessary(void);
248249
static void WalSndCheckTimeOut(void);
@@ -1190,6 +1191,16 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
11901191
}
11911192

11921193
/* If we have pending write here, go to slow path */
1194+
ProcessPendingWrites();
1195+
}
1196+
1197+
/*
1198+
* Wait until there is no pending write. Also process replies from the other
1199+
* side and check timeouts during that.
1200+
*/
1201+
static void
1202+
ProcessPendingWrites(void)
1203+
{
11931204
for (;;)
11941205
{
11951206
int wakeEvents;
@@ -1256,18 +1267,35 @@ WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId
12561267
{
12571268
static TimestampTz sendTime = 0;
12581269
TimestampTz now = GetCurrentTimestamp();
1270+
bool end_xact = ctx->end_xact;
12591271

12601272
/*
12611273
* Track lag no more than once per WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS to
12621274
* avoid flooding the lag tracker when we commit frequently.
1275+
*
1276+
* We don't have a mechanism to get the ack for any LSN other than end
1277+
* xact LSN from the downstream. So, we track lag only for end of
1278+
* transaction LSN.
12631279
*/
12641280
#define WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS 1000
1265-
if (!TimestampDifferenceExceeds(sendTime, now,
1266-
WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS))
1267-
return;
1281+
if (end_xact && TimestampDifferenceExceeds(sendTime, now,
1282+
WALSND_LOGICAL_LAG_TRACK_INTERVAL_MS))
1283+
{
1284+
LagTrackerWrite(lsn, now);
1285+
sendTime = now;
1286+
}
12681287

1269-
LagTrackerWrite(lsn, now);
1270-
sendTime = now;
1288+
/*
1289+
* Try to send a keepalive if required. We don't need to try sending keep
1290+
* alive messages at the transaction end as that will be done at a later
1291+
* point in time. This is required only for large transactions where we
1292+
* don't send any changes to the downstream and the receiver can timeout
1293+
* due to that.
1294+
*/
1295+
if (!end_xact &&
1296+
now >= TimestampTzPlusMilliseconds(last_reply_timestamp,
1297+
wal_sender_timeout / 2))
1298+
ProcessPendingWrites();
12711299
}
12721300

12731301
/*

src/include/replication/logical.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ typedef struct LogicalDecodingContext
8080
*/
8181
bool accept_writes;
8282
bool prepared_write;
83+
bool end_xact;
8384
XLogRecPtr write_location;
8485
TransactionId write_xid;
8586
} LogicalDecodingContext;

0 commit comments

Comments
 (0)