pg_dump: Reduce memory usage of dumps with statistics.

nathan-bossart · Commitfest Bot · commit 6a824d99a5ae · 2025-04-03T02:39:29.000Z
Right now, pg_dump stores all generated commands for statistics in memory. These commands can be quite large and therefore can significantly increase pg_dump's memory footprint. To fix, wait until we are about to write out the commands before generating them, and be sure to free the commands after writing. This is implemented via a new defnDumper callback that works much like the dataDumper one but is specially designed for TOC entries. Custom dumps that include data might write the TOC twice (to update data offset information), which would ordinarily cause pg_dump to run the attribute statistics queries twice. However, as a hack, we save the length of the written-out entry in the first pass, and we skip over it in the second. While there is no known technical problem with executing the queries multiple times and rewriting the results, it's expensive and feels risky, so it seems prudent to avoid it. As an exception, we _do_ execute the queries twice for the tar format. This format does a second pass through the TOC to generate the restore.sql file, which isn't used by pg_restore, so different results won't corrupt the output (it'll just be different). We could alternatively save the definition in memory the first time it is generated, but that defeats the purpose of this change. In any case, past discussion indicates that the tar format might be a candidate for deprecation, so it doesn't seem worth trying too much harder. Author: Corey Huinker <corey.huinker@gmail.com> Co-authored-by: Nathan Bossart <nathandbossart@gmail.com> Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
@@ -285,6 +285,7 @@ typedef int DumpId;
  * Function pointer prototypes for assorted callback methods.
  */
 
+typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg);
 typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
 
 typedef void (*SetupWorkerPtrType) (Archive *AH);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
@@ -1266,6 +1266,9 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
 	newToc->dataDumperArg = opts->dumpArg;
 	newToc->hadDumper = opts->dumpFn ? true : false;
 
+	newToc->defnDumper = opts->defnFn;
+	newToc->defnDumperArg = opts->defnArg;
+
 	newToc->formatData = NULL;
 	newToc->dataLength = 0;
 
@@ -2621,7 +2624,40 @@ WriteToc(ArchiveHandle *AH)
 		WriteStr(AH, te->tag);
 		WriteStr(AH, te->desc);
 		WriteInt(AH, te->section);
-		WriteStr(AH, te->defn);
+
+		if (te->defnLen)
+		{
+			/*
+			 * defnLen should only be set for custom format's second call to
+			 * WriteToc(), which rewrites the TOC in place to update any data
+			 * offsets.  Rather than call the defnDumper a second time (which
+			 * would involve executing the queries again), just skip writing
+			 * the entry.  While regenerating the definition should in theory
+			 * produce the same result as before, it's expensive and feels
+			 * risky.
+			 *
+			 * The custom format only does a second WriteToc() if fseeko() is
+			 * usable (see _CloseArchive() in pg_backup_custom.c), so we can
+			 * just use it without checking.  For other formats, we fail
+			 * because this assumption must no longer hold true.
+			 */
+			if (AH->format != archCustom)
+				pg_fatal("unexpected TOC entry in WriteToc(): %d %s %s",
+						 te->dumpId, te->desc, te->tag);
+
+			if (fseeko(AH->FH, te->defnLen, SEEK_CUR != 0))
+				pg_fatal("error during file seek: %m");
+		}
+		else if (te->defnDumper)
+		{
+			char	   *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
+
+			te->defnLen = WriteStr(AH, defn);
+			pg_free(defn);
+		}
+		else
+			WriteStr(AH, te->defn);
+
 		WriteStr(AH, te->dropStmt);
 		WriteStr(AH, te->copyStmt);
 		WriteStr(AH, te->namespace);
@@ -3849,7 +3885,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
 
 	/*
 	 * Actually print the definition.  Normally we can just print the defn
-	 * string if any, but we have three special cases:
+	 * string if any, but we have four special cases:
 	 *
 	 * 1. A crude hack for suppressing AUTHORIZATION clause that old pg_dump
 	 * versions put into CREATE SCHEMA.  Don't mutate the variant for schema
@@ -3862,6 +3898,10 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
 	 * 3. ACL LARGE OBJECTS entries need special processing because they
 	 * contain only one copy of the ACL GRANT/REVOKE commands, which we must
 	 * apply to each large object listed in the associated BLOB METADATA.
+	 *
+	 * 4. Entries with a defnDumper need to call it to generate the
+	 * definition.  This is primarily intended to provide a way to save memory
+	 * for objects that need a lot of it (e.g., statistics data).
 	 */
 	if (ropt->noOwner &&
 		strcmp(te->desc, "SCHEMA") == 0 && strncmp(te->defn, "--", 2) != 0)
@@ -3877,6 +3917,39 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
 	{
 		IssueACLPerBlob(AH, te);
 	}
+	else if (te->defnLen && AH->format != archTar)
+	{
+		/*
+		 * If defnLen is set, the defnDumper has already been called for this
+		 * TOC entry. We ordinarily don't expect a defnDumper to be called on
+		 * a TOC entry a second time in _printTocEntry(), but of course
+		 * there's an exception.  The tar format first calls WriteToc(), which
+		 * scans through the entire TOC, and then it later calls
+		 * RestoreArchive() to generate restore.sql, which scans through the
+		 * TOC again. There doesn't seem to be a good way to avoid calling the
+		 * defnDumper again in that case without storing the definition in
+		 * memory, which is what we're trying to avoid in the first place.
+		 * This second defnDumper invocation should generate the exact same
+		 * output, but even if it doesn't, the worst case is that the
+		 * restore.sql file (which isn't used by pg_restore) is incorrect.
+		 * Past discussion on the mailing list indicates that tar format isn't
+		 * known to be heavily used and might be a candidate for deprecation,
+		 * so it doesn't seem worth trying much harder here.
+		 *
+		 * In all other cases, encountering a TOC entry a second time in
+		 * _printTocEntry() is unexpected, so we fail because one of our
+		 * assumptions must no longer hold true.
+		 */
+		pg_fatal("unexpected TOC entry in _printTocEntry(): %d %s %s",
+				 te->dumpId, te->desc, te->tag);
+	}
+	else if (te->defnDumper)
+	{
+		char	   *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
+
+		te->defnLen = ahprintf(AH, "%s\n\n", defn);
+		pg_free(defn);
+	}
 	else if (te->defn && strlen(te->defn) > 0)
 	{
 		ahprintf(AH, "%s\n\n", te->defn);
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
@@ -368,6 +368,10 @@ struct _tocEntry
 	const void *dataDumperArg;	/* Arg for above routine */
 	void	   *formatData;		/* TOC Entry data specific to file format */
 
+	DefnDumperPtr defnDumper;	/* routine to dump definition statement */
+	const void *defnDumperArg;	/* arg for above routine */
+	size_t		defnLen;		/* length of dumped definition */
+
 	/* working state while dumping/restoring */
 	pgoff_t		dataLength;		/* item's data size; 0 if none or unknown */
 	int			reqs;			/* do we need schema and/or data of object
@@ -407,6 +411,8 @@ typedef struct _archiveOpts
 	int			nDeps;
 	DataDumperPtr dumpFn;
 	const void *dumpArg;
+	DefnDumperPtr defnFn;
+	const void *defnArg;
 } ArchiveOpts;
 #define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__}
 /* Called to add a TOC entry */
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
@@ -10554,17 +10554,21 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
 }
 
 /*
- * dumpRelationStats --
+ * dumpRelationStats_dumper --
  *
- * Dump command to import stats into the relation on the new database.
+ * Generate command to import stats into the relation on the new database.
+ * This routine is called by the Archiver when it wants the statistics to be
+ * dumped.
  */
-static void
-dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
+static char *
+dumpRelationStats_dumper(Archive *fout, const void *userArg)
 {
+	const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg;
 	const DumpableObject *dobj = &rsinfo->dobj;
 	PGresult   *res;
 	PQExpBuffer query;
-	PQExpBuffer out;
+	PQExpBufferData out_data;
+	PQExpBuffer out = &out_data;
 	int			i_attname;
 	int			i_inherited;
 	int			i_null_frac;
@@ -10581,10 +10585,6 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
 	int			i_range_empty_frac;
 	int			i_range_bounds_histogram;
 
-	/* nothing to do if we are not dumping statistics */
-	if (!fout->dopt->dumpStatistics)
-		return;
-
 	query = createPQExpBuffer();
 	if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
 	{
@@ -10620,7 +10620,7 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
 		resetPQExpBuffer(query);
 	}
 
-	out = createPQExpBuffer();
+	initPQExpBuffer(out);
 
 	/* restore relation stats */
 	appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n");
@@ -10764,17 +10764,35 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
 
 	PQclear(res);
 
+	destroyPQExpBuffer(query);
+	return out->data;
+}
+
+/*
+ * dumpRelationStats --
+ *
+ * Make an ArchiveEntry for the relation statistics.  The Archiver will take
+ * care of gathering the statistics and generating the restore commands when
+ * they are needed.
+ */
+static void
+dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
+{
+	const DumpableObject *dobj = &rsinfo->dobj;
+
+	/* nothing to do if we are not dumping statistics */
+	if (!fout->dopt->dumpStatistics)
+		return;
+
 	ArchiveEntry(fout, nilCatalogId, createDumpId(),
 				 ARCHIVE_OPTS(.tag = dobj->name,
 							  .namespace = dobj->namespace->dobj.name,
 							  .description = "STATISTICS DATA",
 							  .section = rsinfo->section,
-							  .createStmt = out->data,
+							  .defnFn = dumpRelationStats_dumper,
+							  .defnArg = rsinfo,
 							  .deps = dobj->dependencies,
 							  .nDeps = dobj->nDeps));
-
-	destroyPQExpBuffer(out);
-	destroyPQExpBuffer(query);
 }
 
 /*