Put abbreviation logic into puttuple_common()

akorotkov · akorotkov · commit 033dd02db224 · 2022-07-27T08:27:46.000+03:00
Abbreviation code is very similar along tuplesort_put*() functions. This commit unifies that code and puts it into puttuple_common(). tuplesort_put*() functions differs in the abbreviation condition, so it has been added as an argument to the puttuple_common() function. Discussion: https://postgr.es/m/CAPpHfdvjix0Ahx-H3Jp1M2R%2B_74P-zKnGGygx4OWr%3DbUQ8BNdw%40mail.gmail.com Author: Alexander Korotkov Reviewed-by: Pavel Borisov, Maxim Orlov, Matthias van de Meent Reviewed-by: Andres Freund, John Naylor
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
@@ -616,7 +616,8 @@ static Tuplesortstate *tuplesort_begin_common(int workMem,
 											  SortCoordinate coordinate,
 											  int sortopt);
 static void tuplesort_begin_batch(Tuplesortstate *state);
-static void puttuple_common(Tuplesortstate *state, SortTuple *tuple);
+static void puttuple_common(Tuplesortstate *state, SortTuple *tuple,
+							bool useAbbrev);
 static bool consider_abort_common(Tuplesortstate *state);
 static void inittapes(Tuplesortstate *state, bool mergeruns);
 static void inittapestate(Tuplesortstate *state, int maxTapes);
@@ -1841,7 +1842,6 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
 	SortTuple	stup;
-	Datum		original;
 	MinimalTuple tuple;
 	HeapTupleData htup;
 
@@ -1852,49 +1852,15 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
 	/* set up first-column key value */
 	htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
 	htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
-	original = heap_getattr(&htup,
-							state->sortKeys[0].ssup_attno,
-							state->tupDesc,
-							&stup.isnull1);
+	stup.datum1 = heap_getattr(&htup,
+							   state->sortKeys[0].ssup_attno,
+							   state->tupDesc,
+							   &stup.isnull1);
 
 	MemoryContextSwitchTo(state->sortcontext);
 
-	if (!state->sortKeys->abbrev_converter || stup.isnull1)
-	{
-		/*
-		 * Store ordinary Datum representation, or NULL value.  If there is a
-		 * converter it won't expect NULL values, and cost model is not
-		 * required to account for NULL, so in that case we avoid calling
-		 * converter and just set datum1 to zeroed representation (to be
-		 * consistent, and to support cheap inequality tests for NULL
-		 * abbreviated keys).
-		 */
-		stup.datum1 = original;
-	}
-	else if (!consider_abort_common(state))
-	{
-		/* Store abbreviated key representation */
-		stup.datum1 = state->sortKeys->abbrev_converter(original,
-														state->sortKeys);
-	}
-	else
-	{
-		/* Abort abbreviation */
-		stup.datum1 = original;
-
-		/*
-		 * Set state to be consistent with never trying abbreviation.
-		 *
-		 * Alter datum1 representation in already-copied tuples, so as to
-		 * ensure a consistent representation (current tuple was just
-		 * handled).  It does not matter if some dumped tuples are already
-		 * sorted on tape, since serialized tuples lack abbreviated keys
-		 * (TSS_BUILDRUNS state prevents control reaching here in any case).
-		 */
-		REMOVEABBREV(state, state->memtuples, state->memtupcount);
-	}
-
-	puttuple_common(state, &stup);
+	puttuple_common(state, &stup,
+					state->sortKeys->abbrev_converter && !stup.isnull1);
 
 	MemoryContextSwitchTo(oldcontext);
 }
@@ -1908,7 +1874,6 @@ void
 tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
 {
 	SortTuple	stup;
-	Datum		original;
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
 
 	/* copy the tuple into sort storage */
@@ -1924,49 +1889,14 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
 	 */
 	if (state->haveDatum1)
 	{
-		original = heap_getattr(tup,
-								state->indexInfo->ii_IndexAttrNumbers[0],
-								state->tupDesc,
-								&stup.isnull1);
-
-		if (!state->sortKeys->abbrev_converter || stup.isnull1)
-		{
-			/*
-			 * Store ordinary Datum representation, or NULL value.  If there
-			 * is a converter it won't expect NULL values, and cost model is
-			 * not required to account for NULL, so in that case we avoid
-			 * calling converter and just set datum1 to zeroed representation
-			 * (to be consistent, and to support cheap inequality tests for
-			 * NULL abbreviated keys).
-			 */
-			stup.datum1 = original;
-		}
-		else if (!consider_abort_common(state))
-		{
-			/* Store abbreviated key representation */
-			stup.datum1 = state->sortKeys->abbrev_converter(original,
-															state->sortKeys);
-		}
-		else
-		{
-			/* Abort abbreviation */
-			stup.datum1 = original;
-
-			/*
-			 * Set state to be consistent with never trying abbreviation.
-			 *
-			 * Alter datum1 representation in already-copied tuples, so as to
-			 * ensure a consistent representation (current tuple was just
-			 * handled).  It does not matter if some dumped tuples are already
-			 * sorted on tape, since serialized tuples lack abbreviated keys
-			 * (TSS_BUILDRUNS state prevents control reaching here in any
-			 * case).
-			 */
-			REMOVEABBREV(state, state->memtuples, state->memtupcount);
-		}
+		stup.datum1 = heap_getattr(tup,
+								   state->indexInfo->ii_IndexAttrNumbers[0],
+								   state->tupDesc,
+								   &stup.isnull1);
 	}
 
-	puttuple_common(state, &stup);
+	puttuple_common(state, &stup,
+					state->haveDatum1 && state->sortKeys->abbrev_converter && !stup.isnull1);
 
 	MemoryContextSwitchTo(oldcontext);
 }
@@ -1982,7 +1912,6 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
 {
 	MemoryContext oldcontext;
 	SortTuple	stup;
-	Datum		original;
 	IndexTuple	tuple;
 
 	stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values,
@@ -1991,49 +1920,15 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
 	tuple->t_tid = *self;
 	USEMEM(state, GetMemoryChunkSpace(stup.tuple));
 	/* set up first-column key value */
-	original = index_getattr(tuple,
-							 1,
-							 RelationGetDescr(state->indexRel),
-							 &stup.isnull1);
+	stup.datum1 = index_getattr(tuple,
+								1,
+								RelationGetDescr(state->indexRel),
+								&stup.isnull1);
 
 	oldcontext = MemoryContextSwitchTo(state->sortcontext);
 
-	if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1)
-	{
-		/*
-		 * Store ordinary Datum representation, or NULL value.  If there is a
-		 * converter it won't expect NULL values, and cost model is not
-		 * required to account for NULL, so in that case we avoid calling
-		 * converter and just set datum1 to zeroed representation (to be
-		 * consistent, and to support cheap inequality tests for NULL
-		 * abbreviated keys).
-		 */
-		stup.datum1 = original;
-	}
-	else if (!consider_abort_common(state))
-	{
-		/* Store abbreviated key representation */
-		stup.datum1 = state->sortKeys->abbrev_converter(original,
-														state->sortKeys);
-	}
-	else
-	{
-		/* Abort abbreviation */
-		stup.datum1 = original;
-
-		/*
-		 * Set state to be consistent with never trying abbreviation.
-		 *
-		 * Alter datum1 representation in already-copied tuples, so as to
-		 * ensure a consistent representation (current tuple was just
-		 * handled).  It does not matter if some dumped tuples are already
-		 * sorted on tape, since serialized tuples lack abbreviated keys
-		 * (TSS_BUILDRUNS state prevents control reaching here in any case).
-		 */
-		REMOVEABBREV(state, state->memtuples, state->memtupcount);
-	}
-
-	puttuple_common(state, &stup);
+	puttuple_common(state, &stup,
+					state->sortKeys && state->sortKeys->abbrev_converter && !stup.isnull1);
 
 	MemoryContextSwitchTo(oldcontext);
 }
@@ -2074,43 +1969,15 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
 	}
 	else
 	{
-		Datum		original = datumCopy(val, false, state->datumTypeLen);
-
 		stup.isnull1 = false;
-		stup.tuple = DatumGetPointer(original);
+		stup.datum1 = datumCopy(val, false, state->datumTypeLen);
+		stup.tuple = DatumGetPointer(stup.datum1);
 		USEMEM(state, GetMemoryChunkSpace(stup.tuple));
 		MemoryContextSwitchTo(state->sortcontext);
-
-		if (!state->sortKeys->abbrev_converter)
-		{
-			stup.datum1 = original;
-		}
-		else if (!consider_abort_common(state))
-		{
-			/* Store abbreviated key representation */
-			stup.datum1 = state->sortKeys->abbrev_converter(original,
-															state->sortKeys);
-		}
-		else
-		{
-			/* Abort abbreviation */
-			stup.datum1 = original;
-
-			/*
-			 * Set state to be consistent with never trying abbreviation.
-			 *
-			 * Alter datum1 representation in already-copied tuples, so as to
-			 * ensure a consistent representation (current tuple was just
-			 * handled).  It does not matter if some dumped tuples are already
-			 * sorted on tape, since serialized tuples lack abbreviated keys
-			 * (TSS_BUILDRUNS state prevents control reaching here in any
-			 * case).
-			 */
-			REMOVEABBREV(state, state->memtuples, state->memtupcount);
-		}
 	}
 
-	puttuple_common(state, &stup);
+	puttuple_common(state, &stup,
+					state->tuples && !isNull && state->sortKeys->abbrev_converter);
 
 	MemoryContextSwitchTo(oldcontext);
 }
@@ -2119,10 +1986,41 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
  * Shared code for tuple and datum cases.
  */
 static void
-puttuple_common(Tuplesortstate *state, SortTuple *tuple)
+puttuple_common(Tuplesortstate *state, SortTuple *tuple, bool useAbbrev)
 {
 	Assert(!LEADER(state));
 
+	if (!useAbbrev)
+	{
+		/*
+		 * Leave ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to zeroed representation (to be
+		 * consistent, and to support cheap inequality tests for NULL
+		 * abbreviated keys).
+		 */
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		tuple->datum1 = state->sortKeys->abbrev_converter(tuple->datum1,
+														  state->sortKeys);
+	}
+	else
+	{
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just
+		 * handled).  It does not matter if some dumped tuples are already
+		 * sorted on tape, since serialized tuples lack abbreviated keys
+		 * (TSS_BUILDRUNS state prevents control reaching here in any case).
+		 */
+		REMOVEABBREV(state, state->memtuples, state->memtupcount);
+	}
+
 	switch (state->status)
 	{
 		case TSS_INITIAL: