Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b39e915

Browse files
committed
Improve hash join to discard input tuples immediately if they can't
match because they contain a null join key (and the join operator is known strict). Improves performance significantly when the inner relation contains a lot of nulls, as per bug #2930.
1 parent 28c480e commit b39e915

File tree

4 files changed

+70
-25
lines changed

4 files changed

+70
-25
lines changed

src/backend/executor/nodeHash.c

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.108 2007/01/05 22:19:28 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.109 2007/01/28 23:21:26 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -92,11 +92,14 @@ MultiExecHash(HashState *node)
9292
slot = ExecProcNode(outerNode);
9393
if (TupIsNull(slot))
9494
break;
95-
hashtable->totalTuples += 1;
9695
/* We have to compute the hash value */
9796
econtext->ecxt_innertuple = slot;
98-
hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
99-
ExecHashTableInsert(hashtable, slot, hashvalue);
97+
if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false,
98+
&hashvalue))
99+
{
100+
ExecHashTableInsert(hashtable, slot, hashvalue);
101+
hashtable->totalTuples += 1;
102+
}
100103
}
101104

102105
/* must provide our own instrumentation support */
@@ -261,19 +264,23 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
261264

262265
/*
263266
* Get info about the hash functions to be used for each hash key.
267+
* Also remember whether the join operators are strict.
264268
*/
265269
nkeys = list_length(hashOperators);
266270
hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
271+
hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
267272
i = 0;
268273
foreach(ho, hashOperators)
269274
{
275+
Oid hashop = lfirst_oid(ho);
270276
Oid hashfn;
271277

272-
hashfn = get_op_hash_function(lfirst_oid(ho));
278+
hashfn = get_op_hash_function(hashop);
273279
if (!OidIsValid(hashfn))
274280
elog(ERROR, "could not find hash function for hash operator %u",
275-
lfirst_oid(ho));
281+
hashop);
276282
fmgr_info(hashfn, &hashtable->hashfunctions[i]);
283+
hashtable->hashStrict[i] = op_strict(hashop);
277284
i++;
278285
}
279286

@@ -657,11 +664,18 @@ ExecHashTableInsert(HashJoinTable hashtable,
657664
* The tuple to be tested must be in either econtext->ecxt_outertuple or
658665
* econtext->ecxt_innertuple. Vars in the hashkeys expressions reference
659666
* either OUTER or INNER.
667+
*
668+
* A TRUE result means the tuple's hash value has been successfully computed
669+
* and stored at *hashvalue. A FALSE result means the tuple cannot match
670+
* because it contains a null attribute, and hence it should be discarded
671+
* immediately. (If keep_nulls is true then FALSE is never returned.)
660672
*/
661-
uint32
673+
bool
662674
ExecHashGetHashValue(HashJoinTable hashtable,
663675
ExprContext *econtext,
664-
List *hashkeys)
676+
List *hashkeys,
677+
bool keep_nulls,
678+
uint32 *hashvalue)
665679
{
666680
uint32 hashkey = 0;
667681
ListCell *hk;
@@ -691,10 +705,27 @@ ExecHashGetHashValue(HashJoinTable hashtable,
691705
keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL);
692706

693707
/*
694-
* Compute the hash function
708+
* If the attribute is NULL, and the join operator is strict, then
709+
* this tuple cannot pass the join qual so we can reject it
710+
* immediately (unless we're scanning the outside of an outer join,
711+
* in which case we must not reject it). Otherwise we act like the
712+
* hashcode of NULL is zero (this will support operators that act like
713+
* IS NOT DISTINCT, though not any more-random behavior). We treat
714+
* the hash support function as strict even if the operator is not.
715+
*
716+
* Note: currently, all hashjoinable operators must be strict since
717+
* the hash index AM assumes that. However, it takes so little
718+
* extra code here to allow non-strict that we may as well do it.
695719
*/
696-
if (!isNull) /* treat nulls as having hash key 0 */
720+
if (isNull)
721+
{
722+
if (hashtable->hashStrict[i] && !keep_nulls)
723+
return false; /* cannot match */
724+
/* else, leave hashkey unmodified, equivalent to hashcode 0 */
725+
}
726+
else
697727
{
728+
/* Compute the hash function */
698729
uint32 hkey;
699730

700731
hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
@@ -707,7 +738,8 @@ ExecHashGetHashValue(HashJoinTable hashtable,
707738

708739
MemoryContextSwitchTo(oldContext);
709740

710-
return hashkey;
741+
*hashvalue = hashkey;
742+
return true;
711743
}
712744

713745
/*

src/backend/executor/nodeHashjoin.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.86 2007/01/05 22:19:28 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.87 2007/01/28 23:21:26 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -547,9 +547,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
547547
int curbatch = hashtable->curbatch;
548548
TupleTableSlot *slot;
549549

550-
if (curbatch == 0)
551-
{ /* if it is the first pass */
552-
550+
if (curbatch == 0) /* if it is the first pass */
551+
{
553552
/*
554553
* Check to see if first outer tuple was already fetched by
555554
* ExecHashJoin() and not used yet.
@@ -559,21 +558,31 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
559558
hjstate->hj_FirstOuterTupleSlot = NULL;
560559
else
561560
slot = ExecProcNode(outerNode);
562-
if (!TupIsNull(slot))
561+
562+
while (!TupIsNull(slot))
563563
{
564564
/*
565565
* We have to compute the tuple's hash value.
566566
*/
567567
ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
568568

569569
econtext->ecxt_outertuple = slot;
570-
*hashvalue = ExecHashGetHashValue(hashtable, econtext,
571-
hjstate->hj_OuterHashKeys);
570+
if (ExecHashGetHashValue(hashtable, econtext,
571+
hjstate->hj_OuterHashKeys,
572+
(hjstate->js.jointype == JOIN_LEFT),
573+
hashvalue))
574+
{
575+
/* remember outer relation is not empty for possible rescan */
576+
hjstate->hj_OuterNotEmpty = true;
572577

573-
/* remember outer relation is not empty for possible rescan */
574-
hjstate->hj_OuterNotEmpty = true;
578+
return slot;
579+
}
575580

576-
return slot;
581+
/*
582+
* That tuple couldn't match because of a NULL, so discard it
583+
* and continue with the next one.
584+
*/
585+
slot = ExecProcNode(outerNode);
577586
}
578587

579588
/*

src/include/executor/hashjoin.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -108,6 +108,8 @@ typedef struct HashJoinTableData
108108
*/
109109
FmgrInfo *hashfunctions; /* lookup data for hash functions */
110110

111+
bool *hashStrict; /* is each hash join operator strict? */
112+
111113
Size spaceUsed; /* memory space currently used by tuples */
112114
Size spaceAllowed; /* upper limit for space used */
113115

src/include/executor/nodeHash.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -28,9 +28,11 @@ extern void ExecHashTableDestroy(HashJoinTable hashtable);
2828
extern void ExecHashTableInsert(HashJoinTable hashtable,
2929
TupleTableSlot *slot,
3030
uint32 hashvalue);
31-
extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
31+
extern bool ExecHashGetHashValue(HashJoinTable hashtable,
3232
ExprContext *econtext,
33-
List *hashkeys);
33+
List *hashkeys,
34+
bool keep_nulls,
35+
uint32 *hashvalue);
3436
extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
3537
uint32 hashvalue,
3638
int *bucketno,

0 commit comments

Comments
 (0)