Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
partbounds.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * partbounds.c
4 * Support routines for manipulating partition bounds
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/partitioning/partbounds.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres.h"
16
17#include "access/relation.h"
18#include "access/table.h"
19#include "access/tableam.h"
20#include "catalog/partition.h"
21#include "catalog/pg_inherits.h"
22#include "catalog/pg_type.h"
23#include "commands/tablecmds.h"
24#include "common/hashfn.h"
25#include "executor/executor.h"
26#include "miscadmin.h"
27#include "nodes/makefuncs.h"
28#include "nodes/nodeFuncs.h"
29#include "nodes/pathnodes.h"
30#include "parser/parse_coerce.h"
33#include "utils/array.h"
34#include "utils/builtins.h"
35#include "utils/datum.h"
36#include "utils/fmgroids.h"
37#include "utils/lsyscache.h"
38#include "utils/partcache.h"
39#include "utils/ruleutils.h"
40#include "utils/snapmgr.h"
41#include "utils/syscache.h"
42
43/*
44 * When qsort'ing partition bounds after reading from the catalog, each bound
45 * is represented with one of the following structs.
46 */
47
48/* One bound of a hash partition */
49typedef struct PartitionHashBound
50{
53 int index;
55
56/* One value coming from some (index'th) list partition */
57typedef struct PartitionListValue
58{
59 int index;
62
63/* One bound of a range partition */
64typedef struct PartitionRangeBound
65{
66 int index;
67 Datum *datums; /* range bound datums */
68 PartitionRangeDatumKind *kind; /* the kind of each datum */
69 bool lower; /* this is the lower (vs upper) bound */
71
72/*
73 * Mapping from partitions of a joining relation to partitions of a join
74 * relation being computed (a.k.a merged partitions)
75 */
76typedef struct PartitionMap
77{
78 int nparts; /* number of partitions */
79 int *merged_indexes; /* indexes of merged partitions */
80 bool *merged; /* flags to indicate whether partitions are
81 * merged with non-dummy partitions */
82 bool did_remapping; /* did we re-map partitions? */
83 int *old_indexes; /* old indexes of merged partitions if
84 * did_remapping */
86
87/* Macro for comparing two range bounds */
88#define compare_range_bounds(partnatts, partsupfunc, partcollations, \
89 bound1, bound2) \
90 (partition_rbound_cmp(partnatts, partsupfunc, partcollations, \
91 (bound1)->datums, (bound1)->kind, (bound1)->lower, \
92 bound2))
93
94static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
95static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
96 void *arg);
97static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
98 void *arg);
100 int nparts, PartitionKey key, int **mapping);
102 int nparts, PartitionKey key, int **mapping);
104 int nparts, PartitionKey key, int **mapping);
106 Oid *partcollation,
107 RelOptInfo *outer_rel,
108 RelOptInfo *inner_rel,
109 JoinType jointype,
110 List **outer_parts,
111 List **inner_parts);
112static PartitionBoundInfo merge_range_bounds(int partnatts,
113 FmgrInfo *partsupfuncs,
114 Oid *partcollations,
115 RelOptInfo *outer_rel,
116 RelOptInfo *inner_rel,
117 JoinType jointype,
118 List **outer_parts,
119 List **inner_parts);
120static void init_partition_map(RelOptInfo *rel, PartitionMap *map);
121static void free_partition_map(PartitionMap *map);
122static bool is_dummy_partition(RelOptInfo *rel, int part_index);
123static int merge_matching_partitions(PartitionMap *outer_map,
124 PartitionMap *inner_map,
125 int outer_index,
126 int inner_index,
127 int *next_index);
128static int process_outer_partition(PartitionMap *outer_map,
129 PartitionMap *inner_map,
130 bool outer_has_default,
131 bool inner_has_default,
132 int outer_index,
133 int inner_default,
134 JoinType jointype,
135 int *next_index,
136 int *default_index);
137static int process_inner_partition(PartitionMap *outer_map,
138 PartitionMap *inner_map,
139 bool outer_has_default,
140 bool inner_has_default,
141 int inner_index,
142 int outer_default,
143 JoinType jointype,
144 int *next_index,
145 int *default_index);
146static void merge_null_partitions(PartitionMap *outer_map,
147 PartitionMap *inner_map,
148 bool outer_has_null,
149 bool inner_has_null,
150 int outer_null,
151 int inner_null,
152 JoinType jointype,
153 int *next_index,
154 int *null_index);
155static void merge_default_partitions(PartitionMap *outer_map,
156 PartitionMap *inner_map,
157 bool outer_has_default,
158 bool inner_has_default,
159 int outer_default,
160 int inner_default,
161 JoinType jointype,
162 int *next_index,
163 int *default_index);
165 int *next_index);
166static void fix_merged_indexes(PartitionMap *outer_map,
167 PartitionMap *inner_map,
168 int nmerged, List *merged_indexes);
169static void generate_matching_part_pairs(RelOptInfo *outer_rel,
170 RelOptInfo *inner_rel,
171 PartitionMap *outer_map,
172 PartitionMap *inner_map,
173 int nmerged,
174 List **outer_parts,
175 List **inner_parts);
177 List *merged_datums,
178 List *merged_kinds,
179 List *merged_indexes,
180 int null_index,
181 int default_index);
182static int get_range_partition(RelOptInfo *rel,
184 int *lb_pos,
188 int *lb_pos,
191static bool compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
192 Oid *partcollations,
193 PartitionRangeBound *outer_lb,
194 PartitionRangeBound *outer_ub,
195 PartitionRangeBound *inner_lb,
196 PartitionRangeBound *inner_ub,
197 int *lb_cmpval, int *ub_cmpval);
198static void get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
199 Oid *partcollations, JoinType jointype,
200 PartitionRangeBound *outer_lb,
201 PartitionRangeBound *outer_ub,
202 PartitionRangeBound *inner_lb,
203 PartitionRangeBound *inner_ub,
204 int lb_cmpval, int ub_cmpval,
205 PartitionRangeBound *merged_lb,
206 PartitionRangeBound *merged_ub);
207static void add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
208 Oid *partcollations,
209 PartitionRangeBound *merged_lb,
210 PartitionRangeBound *merged_ub,
211 int merged_index,
212 List **merged_datums,
213 List **merged_kinds,
214 List **merged_indexes);
216 List *datums, bool lower);
217static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
218 int remainder2);
219static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
220 Oid *partcollation, Datum *datums1,
221 PartitionRangeDatumKind *kind1, bool lower1,
223static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
224 Oid *partcollation,
225 PartitionBoundInfo boundinfo,
226 PartitionRangeBound *probe, int32 *cmpval);
227static Expr *make_partition_op_expr(PartitionKey key, int keynum,
228 uint16 strategy, Expr *arg1, Expr *arg2);
230 StrategyNumber strategy, bool *need_relabel);
234 bool for_default);
235static void get_range_key_properties(PartitionKey key, int keynum,
236 PartitionRangeDatum *ldatum,
237 PartitionRangeDatum *udatum,
238 ListCell **partexprs_item,
239 Expr **keyCol,
240 Const **lower_val, Const **upper_val);
242
243/*
244 * get_qual_from_partbound
245 * Given a parser node for partition bound, return the list of executable
246 * expressions as partition constraint
247 */
248List *
250{
252 List *my_qual = NIL;
253
254 Assert(key != NULL);
255
256 switch (key->strategy)
257 {
260 my_qual = get_qual_for_hash(parent, spec);
261 break;
262
265 my_qual = get_qual_for_list(parent, spec);
266 break;
267
270 my_qual = get_qual_for_range(parent, spec, false);
271 break;
272 }
273
274 return my_qual;
275}
276
277/*
278 * partition_bounds_create
279 * Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
280 * nodes
281 *
282 * This function creates a PartitionBoundInfo and fills the values of its
283 * various members based on the input list. Importantly, 'datums' array will
284 * contain Datum representation of individual bounds (possibly after
285 * de-duplication as in case of range bounds), sorted in a canonical order
286 * defined by qsort_partition_* functions of respective partitioning methods.
287 * 'indexes' array will contain as many elements as there are bounds (specific
288 * exceptions to this rule are listed in the function body), which represent
289 * the 0-based canonical positions of partitions.
290 *
291 * Upon return from this function, *mapping is set to an array of
292 * list_length(boundspecs) elements, each of which maps the original index of
293 * a partition to its canonical index.
294 *
295 * Note: The objects returned by this function are wholly allocated in the
296 * current memory context.
297 */
300 PartitionKey key, int **mapping)
301{
302 int i;
303
304 Assert(nparts > 0);
305
306 /*
307 * For each partitioning method, we first convert the partition bounds
308 * from their parser node representation to the internal representation,
309 * along with any additional preprocessing (such as de-duplicating range
310 * bounds). Resulting bound datums are then added to the 'datums' array
311 * in PartitionBoundInfo. For each datum added, an integer indicating the
312 * canonical partition index is added to the 'indexes' array.
313 *
314 * For each bound, we remember its partition's position (0-based) in the
315 * original list to later map it to the canonical index.
316 */
317
318 /*
319 * Initialize mapping array with invalid values, this is filled within
320 * each sub-routine below depending on the bound type.
321 */
322 *mapping = (int *) palloc(sizeof(int) * nparts);
323 for (i = 0; i < nparts; i++)
324 (*mapping)[i] = -1;
325
326 switch (key->strategy)
327 {
329 return create_hash_bounds(boundspecs, nparts, key, mapping);
330
332 return create_list_bounds(boundspecs, nparts, key, mapping);
333
335 return create_range_bounds(boundspecs, nparts, key, mapping);
336 }
337
338 Assert(false);
339 return NULL; /* keep compiler quiet */
340}
341
342/*
343 * create_hash_bounds
344 * Create a PartitionBoundInfo for a hash partitioned table
345 */
347create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
348 PartitionKey key, int **mapping)
349{
350 PartitionBoundInfo boundinfo;
351 PartitionHashBound *hbounds;
352 int i;
353 int greatest_modulus;
354 Datum *boundDatums;
355
356 boundinfo = (PartitionBoundInfoData *)
358 boundinfo->strategy = key->strategy;
359 /* No special hash partitions. */
360 boundinfo->null_index = -1;
361 boundinfo->default_index = -1;
362
363 hbounds = (PartitionHashBound *)
364 palloc(nparts * sizeof(PartitionHashBound));
365
366 /* Convert from node to the internal representation */
367 for (i = 0; i < nparts; i++)
368 {
369 PartitionBoundSpec *spec = boundspecs[i];
370
372 elog(ERROR, "invalid strategy in partition bound spec");
373
374 hbounds[i].modulus = spec->modulus;
375 hbounds[i].remainder = spec->remainder;
376 hbounds[i].index = i;
377 }
378
379 /* Sort all the bounds in ascending order */
380 qsort(hbounds, nparts, sizeof(PartitionHashBound),
382
383 /* After sorting, moduli are now stored in ascending order. */
384 greatest_modulus = hbounds[nparts - 1].modulus;
385
386 boundinfo->ndatums = nparts;
387 boundinfo->datums = (Datum **) palloc0(nparts * sizeof(Datum *));
388 boundinfo->kind = NULL;
389 boundinfo->interleaved_parts = NULL;
390 boundinfo->nindexes = greatest_modulus;
391 boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
392 for (i = 0; i < greatest_modulus; i++)
393 boundinfo->indexes[i] = -1;
394
395 /*
396 * In the loop below, to save from allocating a series of small datum
397 * arrays, here we just allocate a single array and below we'll just
398 * assign a portion of this array per partition.
399 */
400 boundDatums = (Datum *) palloc(nparts * 2 * sizeof(Datum));
401
402 /*
403 * For hash partitioning, there are as many datums (modulus and remainder
404 * pairs) as there are partitions. Indexes are simply values ranging from
405 * 0 to (nparts - 1).
406 */
407 for (i = 0; i < nparts; i++)
408 {
409 int modulus = hbounds[i].modulus;
410 int remainder = hbounds[i].remainder;
411
412 boundinfo->datums[i] = &boundDatums[i * 2];
413 boundinfo->datums[i][0] = Int32GetDatum(modulus);
414 boundinfo->datums[i][1] = Int32GetDatum(remainder);
415
416 while (remainder < greatest_modulus)
417 {
418 /* overlap? */
419 Assert(boundinfo->indexes[remainder] == -1);
420 boundinfo->indexes[remainder] = i;
421 remainder += modulus;
422 }
423
424 (*mapping)[hbounds[i].index] = i;
425 }
426 pfree(hbounds);
427
428 return boundinfo;
429}
430
431/*
432 * get_non_null_list_datum_count
433 * Counts the number of non-null Datums in each partition.
434 */
435static int
437{
438 int i;
439 int count = 0;
440
441 for (i = 0; i < nparts; i++)
442 {
443 ListCell *lc;
444
445 foreach(lc, boundspecs[i]->listdatums)
446 {
447 Const *val = lfirst_node(Const, lc);
448
449 if (!val->constisnull)
450 count++;
451 }
452 }
453
454 return count;
455}
456
457/*
458 * create_list_bounds
459 * Create a PartitionBoundInfo for a list partitioned table
460 */
462create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
463 PartitionKey key, int **mapping)
464{
465 PartitionBoundInfo boundinfo;
466 PartitionListValue *all_values;
467 int i;
468 int j;
469 int ndatums;
470 int next_index = 0;
471 int default_index = -1;
472 int null_index = -1;
473 Datum *boundDatums;
474
475 boundinfo = (PartitionBoundInfoData *)
477 boundinfo->strategy = key->strategy;
478 /* Will be set correctly below. */
479 boundinfo->null_index = -1;
480 boundinfo->default_index = -1;
481
482 ndatums = get_non_null_list_datum_count(boundspecs, nparts);
483 all_values = (PartitionListValue *)
484 palloc(ndatums * sizeof(PartitionListValue));
485
486 /* Create a unified list of non-null values across all partitions. */
487 for (j = 0, i = 0; i < nparts; i++)
488 {
489 PartitionBoundSpec *spec = boundspecs[i];
490 ListCell *c;
491
493 elog(ERROR, "invalid strategy in partition bound spec");
494
495 /*
496 * Note the index of the partition bound spec for the default
497 * partition. There's no datum to add to the list on non-null datums
498 * for this partition.
499 */
500 if (spec->is_default)
501 {
502 default_index = i;
503 continue;
504 }
505
506 foreach(c, spec->listdatums)
507 {
509
510 if (!val->constisnull)
511 {
512 all_values[j].index = i;
513 all_values[j].value = val->constvalue;
514 j++;
515 }
516 else
517 {
518 /*
519 * Never put a null into the values array; save the index of
520 * the partition that stores nulls, instead.
521 */
522 if (null_index != -1)
523 elog(ERROR, "found null more than once");
524 null_index = i;
525 }
526 }
527 }
528
529 /* ensure we found a Datum for every slot in the all_values array */
530 Assert(j == ndatums);
531
532 qsort_arg(all_values, ndatums, sizeof(PartitionListValue),
534
535 boundinfo->ndatums = ndatums;
536 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
537 boundinfo->kind = NULL;
538 boundinfo->interleaved_parts = NULL;
539 boundinfo->nindexes = ndatums;
540 boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
541
542 /*
543 * In the loop below, to save from allocating a series of small datum
544 * arrays, here we just allocate a single array and below we'll just
545 * assign a portion of this array per datum.
546 */
547 boundDatums = (Datum *) palloc(ndatums * sizeof(Datum));
548
549 /*
550 * Copy values. Canonical indexes are values ranging from 0 to (nparts -
551 * 1) assigned to each partition such that all datums of a given partition
552 * receive the same value. The value for a given partition is the index of
553 * that partition's smallest datum in the all_values[] array.
554 */
555 for (i = 0; i < ndatums; i++)
556 {
557 int orig_index = all_values[i].index;
558
559 boundinfo->datums[i] = &boundDatums[i];
560 boundinfo->datums[i][0] = datumCopy(all_values[i].value,
561 key->parttypbyval[0],
562 key->parttyplen[0]);
563
564 /* If the old index has no mapping, assign one */
565 if ((*mapping)[orig_index] == -1)
566 (*mapping)[orig_index] = next_index++;
567
568 boundinfo->indexes[i] = (*mapping)[orig_index];
569 }
570
571 pfree(all_values);
572
573 /*
574 * Set the canonical value for null_index, if any.
575 *
576 * It is possible that the null-accepting partition has not been assigned
577 * an index yet, which could happen if such partition accepts only null
578 * and hence not handled in the above loop which only looked at non-null
579 * values.
580 */
581 if (null_index != -1)
582 {
583 Assert(null_index >= 0);
584 if ((*mapping)[null_index] == -1)
585 (*mapping)[null_index] = next_index++;
586 boundinfo->null_index = (*mapping)[null_index];
587 }
588
589 /* Set the canonical value for default_index, if any. */
590 if (default_index != -1)
591 {
592 /*
593 * The default partition accepts any value not specified in the lists
594 * of other partitions, hence it should not get mapped index while
595 * assigning those for non-null datums.
596 */
597 Assert(default_index >= 0);
598 Assert((*mapping)[default_index] == -1);
599 (*mapping)[default_index] = next_index++;
600 boundinfo->default_index = (*mapping)[default_index];
601 }
602
603 /*
604 * Calculate interleaved partitions. Here we look for partitions which
605 * might be interleaved with other partitions and set a bit in
606 * interleaved_parts for any partitions which may be interleaved with
607 * another partition.
608 */
609
610 /*
611 * There must be multiple partitions to have any interleaved partitions,
612 * otherwise there's nothing to interleave with.
613 */
614 if (nparts > 1)
615 {
616 /*
617 * Short-circuit check to see if only 1 Datum is allowed per
618 * partition. When this is true there's no need to do the more
619 * expensive checks to look for interleaved values.
620 */
621 if (boundinfo->ndatums +
623 partition_bound_has_default(boundinfo) != nparts)
624 {
625 int last_index = -1;
626
627 /*
628 * Since the indexes array is sorted in Datum order, if any
629 * partitions are interleaved then it will show up by the
630 * partition indexes not being in ascending order. Here we check
631 * for that and record all partitions that are out of order.
632 */
633 for (i = 0; i < boundinfo->nindexes; i++)
634 {
635 int index = boundinfo->indexes[i];
636
637 if (index < last_index)
638 boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
639 index);
640
641 /*
642 * Otherwise, if the null_index exists in the indexes array,
643 * then the NULL partition must also allow some other Datum,
644 * therefore it's "interleaved".
645 */
646 else if (partition_bound_accepts_nulls(boundinfo) &&
647 index == boundinfo->null_index)
648 boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
649 index);
650
651 last_index = index;
652 }
653 }
654
655 /*
656 * The DEFAULT partition is the "catch-all" partition that can contain
657 * anything that does not belong to any other partition. If there are
658 * any other partitions then the DEFAULT partition must be marked as
659 * interleaved.
660 */
661 if (partition_bound_has_default(boundinfo))
662 boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
663 boundinfo->default_index);
664 }
665
666
667 /* All partitions must now have been assigned canonical indexes. */
668 Assert(next_index == nparts);
669 return boundinfo;
670}
671
672/*
673 * create_range_bounds
674 * Create a PartitionBoundInfo for a range partitioned table
675 */
678 PartitionKey key, int **mapping)
679{
680 PartitionBoundInfo boundinfo;
681 PartitionRangeBound **rbounds = NULL;
682 PartitionRangeBound **all_bounds,
683 *prev;
684 int i,
685 k,
686 partnatts;
687 int ndatums = 0;
688 int default_index = -1;
689 int next_index = 0;
690 Datum *boundDatums;
691 PartitionRangeDatumKind *boundKinds;
692
693 boundinfo = (PartitionBoundInfoData *)
695 boundinfo->strategy = key->strategy;
696 /* There is no special null-accepting range partition. */
697 boundinfo->null_index = -1;
698 /* Will be set correctly below. */
699 boundinfo->default_index = -1;
700
701 all_bounds = (PartitionRangeBound **)
702 palloc0(2 * nparts * sizeof(PartitionRangeBound *));
703
704 /* Create a unified list of range bounds across all the partitions. */
705 ndatums = 0;
706 for (i = 0; i < nparts; i++)
707 {
708 PartitionBoundSpec *spec = boundspecs[i];
710 *upper;
711
713 elog(ERROR, "invalid strategy in partition bound spec");
714
715 /*
716 * Note the index of the partition bound spec for the default
717 * partition. There's no datum to add to the all_bounds array for
718 * this partition.
719 */
720 if (spec->is_default)
721 {
722 default_index = i;
723 continue;
724 }
725
728 all_bounds[ndatums++] = lower;
729 all_bounds[ndatums++] = upper;
730 }
731
732 Assert(ndatums == nparts * 2 ||
733 (default_index != -1 && ndatums == (nparts - 1) * 2));
734
735 /* Sort all the bounds in ascending order */
736 qsort_arg(all_bounds, ndatums,
737 sizeof(PartitionRangeBound *),
739 key);
740
741 /* Save distinct bounds from all_bounds into rbounds. */
742 rbounds = (PartitionRangeBound **)
743 palloc(ndatums * sizeof(PartitionRangeBound *));
744 k = 0;
745 prev = NULL;
746 for (i = 0; i < ndatums; i++)
747 {
748 PartitionRangeBound *cur = all_bounds[i];
749 bool is_distinct = false;
750 int j;
751
752 /* Is the current bound distinct from the previous one? */
753 for (j = 0; j < key->partnatts; j++)
754 {
755 Datum cmpval;
756
757 if (prev == NULL || cur->kind[j] != prev->kind[j])
758 {
759 is_distinct = true;
760 break;
761 }
762
763 /*
764 * If the bounds are both MINVALUE or MAXVALUE, stop now and treat
765 * them as equal, since any values after this point must be
766 * ignored.
767 */
768 if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
769 break;
770
771 cmpval = FunctionCall2Coll(&key->partsupfunc[j],
772 key->partcollation[j],
773 cur->datums[j],
774 prev->datums[j]);
775 if (DatumGetInt32(cmpval) != 0)
776 {
777 is_distinct = true;
778 break;
779 }
780 }
781
782 /*
783 * Only if the bound is distinct save it into a temporary array, i.e,
784 * rbounds which is later copied into boundinfo datums array.
785 */
786 if (is_distinct)
787 rbounds[k++] = all_bounds[i];
788
789 prev = cur;
790 }
791
792 pfree(all_bounds);
793
794 /* Update ndatums to hold the count of distinct datums. */
795 ndatums = k;
796
797 /*
798 * Add datums to boundinfo. Canonical indexes are values ranging from 0
799 * to nparts - 1, assigned in that order to each partition's upper bound.
800 * For 'datums' elements that are lower bounds, there is -1 in the
801 * 'indexes' array to signify that no partition exists for the values less
802 * than such a bound and greater than or equal to the previous upper
803 * bound.
804 */
805 boundinfo->ndatums = ndatums;
806 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
807 boundinfo->kind = (PartitionRangeDatumKind **)
808 palloc(ndatums *
809 sizeof(PartitionRangeDatumKind *));
810 boundinfo->interleaved_parts = NULL;
811
812 /*
813 * For range partitioning, an additional value of -1 is stored as the last
814 * element of the indexes[] array.
815 */
816 boundinfo->nindexes = ndatums + 1;
817 boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
818
819 /*
820 * In the loop below, to save from allocating a series of small arrays,
821 * here we just allocate a single array for Datums and another for
822 * PartitionRangeDatumKinds, below we'll just assign a portion of these
823 * arrays in each loop.
824 */
825 partnatts = key->partnatts;
826 boundDatums = (Datum *) palloc(ndatums * partnatts * sizeof(Datum));
827 boundKinds = (PartitionRangeDatumKind *) palloc(ndatums * partnatts *
829
830 for (i = 0; i < ndatums; i++)
831 {
832 int j;
833
834 boundinfo->datums[i] = &boundDatums[i * partnatts];
835 boundinfo->kind[i] = &boundKinds[i * partnatts];
836 for (j = 0; j < partnatts; j++)
837 {
838 if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
839 boundinfo->datums[i][j] =
840 datumCopy(rbounds[i]->datums[j],
841 key->parttypbyval[j],
842 key->parttyplen[j]);
843 boundinfo->kind[i][j] = rbounds[i]->kind[j];
844 }
845
846 /*
847 * There is no mapping for invalid indexes.
848 *
849 * Any lower bounds in the rbounds array have invalid indexes
850 * assigned, because the values between the previous bound (if there
851 * is one) and this (lower) bound are not part of the range of any
852 * existing partition.
853 */
854 if (rbounds[i]->lower)
855 boundinfo->indexes[i] = -1;
856 else
857 {
858 int orig_index = rbounds[i]->index;
859
860 /* If the old index has no mapping, assign one */
861 if ((*mapping)[orig_index] == -1)
862 (*mapping)[orig_index] = next_index++;
863
864 boundinfo->indexes[i] = (*mapping)[orig_index];
865 }
866 }
867
868 pfree(rbounds);
869
870 /* Set the canonical value for default_index, if any. */
871 if (default_index != -1)
872 {
873 Assert(default_index >= 0 && (*mapping)[default_index] == -1);
874 (*mapping)[default_index] = next_index++;
875 boundinfo->default_index = (*mapping)[default_index];
876 }
877
878 /* The extra -1 element. */
879 Assert(i == ndatums);
880 boundinfo->indexes[i] = -1;
881
882 /* All partitions must now have been assigned canonical indexes. */
883 Assert(next_index == nparts);
884 return boundinfo;
885}
886
887/*
888 * Are two partition bound collections logically equal?
889 *
890 * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
891 * This is also useful when b1 and b2 are bound collections of two separate
892 * relations, respectively, because PartitionBoundInfo is a canonical
893 * representation of partition bounds.
894 */
895bool
896partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
898{
899 int i;
900
901 if (b1->strategy != b2->strategy)
902 return false;
903
904 if (b1->ndatums != b2->ndatums)
905 return false;
906
907 if (b1->nindexes != b2->nindexes)
908 return false;
909
910 if (b1->null_index != b2->null_index)
911 return false;
912
913 if (b1->default_index != b2->default_index)
914 return false;
915
916 /* For all partition strategies, the indexes[] arrays have to match */
917 for (i = 0; i < b1->nindexes; i++)
918 {
919 if (b1->indexes[i] != b2->indexes[i])
920 return false;
921 }
922
923 /* Finally, compare the datums[] arrays */
925 {
926 /*
927 * We arrange the partitions in the ascending order of their moduli
928 * and remainders. Also every modulus is factor of next larger
929 * modulus. Therefore we can safely store index of a given partition
930 * in indexes array at remainder of that partition. Also entries at
931 * (remainder + N * modulus) positions in indexes array are all same
932 * for (modulus, remainder) specification for any partition. Thus the
933 * datums arrays from the given bounds are the same, if and only if
934 * their indexes arrays are the same. So, it suffices to compare the
935 * indexes arrays.
936 *
937 * Nonetheless make sure that the bounds are indeed the same when the
938 * indexes match. Hash partition bound stores modulus and remainder
939 * at b1->datums[i][0] and b1->datums[i][1] position respectively.
940 */
941#ifdef USE_ASSERT_CHECKING
942 for (i = 0; i < b1->ndatums; i++)
943 Assert((b1->datums[i][0] == b2->datums[i][0] &&
944 b1->datums[i][1] == b2->datums[i][1]));
945#endif
946 }
947 else
948 {
949 for (i = 0; i < b1->ndatums; i++)
950 {
951 int j;
952
953 for (j = 0; j < partnatts; j++)
954 {
955 /* For range partitions, the bounds might not be finite. */
956 if (b1->kind != NULL)
957 {
958 /* The different kinds of bound all differ from each other */
959 if (b1->kind[i][j] != b2->kind[i][j])
960 return false;
961
962 /*
963 * Non-finite bounds are equal without further
964 * examination.
965 */
966 if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
967 continue;
968 }
969
970 /*
971 * Compare the actual values. Note that it would be both
972 * incorrect and unsafe to invoke the comparison operator
973 * derived from the partitioning specification here. It would
974 * be incorrect because we want the relcache entry to be
975 * updated for ANY change to the partition bounds, not just
976 * those that the partitioning operator thinks are
977 * significant. It would be unsafe because we might reach
978 * this code in the context of an aborted transaction, and an
979 * arbitrary partitioning operator might not be safe in that
980 * context. datumIsEqual() should be simple enough to be
981 * safe.
982 */
983 if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
984 parttypbyval[j], parttyplen[j]))
985 return false;
986 }
987 }
988 }
989 return true;
990}
991
992/*
993 * Return a copy of given PartitionBoundInfo structure. The data types of bounds
994 * are described by given partition key specification.
995 *
996 * Note: it's important that this function and its callees not do any catalog
997 * access, nor anything else that would result in allocating memory other than
998 * the returned data structure. Since this is called in a long-lived context,
999 * that would result in unwanted memory leaks.
1000 */
1004{
1006 int i;
1007 int ndatums;
1008 int nindexes;
1009 int partnatts;
1010
1012
1013 dest->strategy = src->strategy;
1014 ndatums = dest->ndatums = src->ndatums;
1015 nindexes = dest->nindexes = src->nindexes;
1016 partnatts = key->partnatts;
1017
1018 /* List partitioned tables have only a single partition key. */
1019 Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
1020
1021 dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
1022
1023 if (src->kind != NULL && ndatums > 0)
1024 {
1025 PartitionRangeDatumKind *boundKinds;
1026
1027 /* only RANGE partition should have a non-NULL kind */
1028 Assert(key->strategy == PARTITION_STRATEGY_RANGE);
1029
1030 dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
1031 sizeof(PartitionRangeDatumKind *));
1032
1033 /*
1034 * In the loop below, to save from allocating a series of small arrays
1035 * for storing the PartitionRangeDatumKind, we allocate a single chunk
1036 * here and use a smaller portion of it for each datum.
1037 */
1038 boundKinds = (PartitionRangeDatumKind *) palloc(ndatums * partnatts *
1039 sizeof(PartitionRangeDatumKind));
1040
1041 for (i = 0; i < ndatums; i++)
1042 {
1043 dest->kind[i] = &boundKinds[i * partnatts];
1044 memcpy(dest->kind[i], src->kind[i],
1045 sizeof(PartitionRangeDatumKind) * partnatts);
1046 }
1047 }
1048 else
1049 dest->kind = NULL;
1050
1051 /* copy interleaved partitions for LIST partitioned tables */
1052 dest->interleaved_parts = bms_copy(src->interleaved_parts);
1053
1054 /*
1055 * For hash partitioning, datums array will have two elements - modulus
1056 * and remainder.
1057 */
1058 if (ndatums > 0)
1059 {
1060 bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
1061 int natts = hash_part ? 2 : partnatts;
1062 Datum *boundDatums = palloc(ndatums * natts * sizeof(Datum));
1063
1064 for (i = 0; i < ndatums; i++)
1065 {
1066 int j;
1067
1068 dest->datums[i] = &boundDatums[i * natts];
1069
1070 for (j = 0; j < natts; j++)
1071 {
1072 if (dest->kind == NULL ||
1074 {
1075 bool byval;
1076 int typlen;
1077
1078 if (hash_part)
1079 {
1080 typlen = sizeof(int32); /* Always int4 */
1081 byval = true; /* int4 is pass-by-value */
1082 }
1083 else
1084 {
1085 byval = key->parttypbyval[j];
1086 typlen = key->parttyplen[j];
1087 }
1088 dest->datums[i][j] = datumCopy(src->datums[i][j],
1089 byval, typlen);
1090 }
1091 }
1092 }
1093 }
1094
1095 dest->indexes = (int *) palloc(sizeof(int) * nindexes);
1096 memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes);
1097
1098 dest->null_index = src->null_index;
1099 dest->default_index = src->default_index;
1100
1101 return dest;
1102}
1103
1104/*
1105 * partition_bounds_merge
1106 * Check to see whether every partition of 'outer_rel' matches/overlaps
1107 * one partition of 'inner_rel' at most, and vice versa; and if so, build
1108 * and return the partition bounds for a join relation between the rels,
1109 * generating two lists of the matching/overlapping partitions, which are
1110 * returned to *outer_parts and *inner_parts respectively.
1111 *
1112 * The lists contain the same number of partitions, and the partitions at the
1113 * same positions in the lists indicate join pairs used for partitioned join.
1114 * If a partition on one side matches/overlaps multiple partitions on the other
1115 * side, this function returns NULL, setting *outer_parts and *inner_parts to
1116 * NIL.
1117 */
1120 FmgrInfo *partsupfunc, Oid *partcollation,
1121 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1122 JoinType jointype,
1123 List **outer_parts, List **inner_parts)
1124{
1125 /*
1126 * Currently, this function is called only from try_partitionwise_join(),
1127 * so the join type should be INNER, LEFT, FULL, SEMI, or ANTI.
1128 */
1129 Assert(jointype == JOIN_INNER || jointype == JOIN_LEFT ||
1130 jointype == JOIN_FULL || jointype == JOIN_SEMI ||
1131 jointype == JOIN_ANTI);
1132
1133 /* The partitioning strategies should be the same. */
1134 Assert(outer_rel->boundinfo->strategy == inner_rel->boundinfo->strategy);
1135
1136 *outer_parts = *inner_parts = NIL;
1137 switch (outer_rel->boundinfo->strategy)
1138 {
1140
1141 /*
1142 * For hash partitioned tables, we currently support partitioned
1143 * join only when they have exactly the same partition bounds.
1144 *
1145 * XXX: it might be possible to relax the restriction to support
1146 * cases where hash partitioned tables have missing partitions
1147 * and/or different moduli, but it's not clear if it would be
1148 * useful to support the former case since it's unusual to have
1149 * missing partitions. On the other hand, it would be useful to
1150 * support the latter case, but in that case, there is a high
1151 * probability that a partition on one side will match multiple
1152 * partitions on the other side, which is the scenario the current
1153 * implementation of partitioned join can't handle.
1154 */
1155 return NULL;
1156
1158 return merge_list_bounds(partsupfunc,
1159 partcollation,
1160 outer_rel,
1161 inner_rel,
1162 jointype,
1163 outer_parts,
1164 inner_parts);
1165
1167 return merge_range_bounds(partnatts,
1168 partsupfunc,
1169 partcollation,
1170 outer_rel,
1171 inner_rel,
1172 jointype,
1173 outer_parts,
1174 inner_parts);
1175 }
1176
1177 return NULL;
1178}
1179
1180/*
1181 * merge_list_bounds
1182 * Create the partition bounds for a join relation between list
1183 * partitioned tables, if possible
1184 *
1185 * In this function we try to find sets of matching partitions from both sides
1186 * by comparing list values stored in their partition bounds. Since the list
1187 * values appear in the ascending order, an algorithm similar to merge join is
1188 * used for that. If a partition on one side doesn't have a matching
1189 * partition on the other side, the algorithm tries to match it with the
1190 * default partition on the other side if any; if not, the algorithm tries to
1191 * match it with a dummy partition on the other side if it's on the
1192 * non-nullable side of an outer join. Also, if both sides have the default
1193 * partitions, the algorithm tries to match them with each other. We give up
1194 * if the algorithm finds a partition matching multiple partitions on the
1195 * other side, which is the scenario the current implementation of partitioned
1196 * join can't handle.
1197 */
1198static PartitionBoundInfo
1199merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation,
1200 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1201 JoinType jointype,
1202 List **outer_parts, List **inner_parts)
1203{
1204 PartitionBoundInfo merged_bounds = NULL;
1205 PartitionBoundInfo outer_bi = outer_rel->boundinfo;
1206 PartitionBoundInfo inner_bi = inner_rel->boundinfo;
1207 bool outer_has_default = partition_bound_has_default(outer_bi);
1208 bool inner_has_default = partition_bound_has_default(inner_bi);
1209 int outer_default = outer_bi->default_index;
1210 int inner_default = inner_bi->default_index;
1211 bool outer_has_null = partition_bound_accepts_nulls(outer_bi);
1212 bool inner_has_null = partition_bound_accepts_nulls(inner_bi);
1213 PartitionMap outer_map;
1214 PartitionMap inner_map;
1215 int outer_pos;
1216 int inner_pos;
1217 int next_index = 0;
1218 int null_index = -1;
1219 int default_index = -1;
1220 List *merged_datums = NIL;
1221 List *merged_indexes = NIL;
1222
1223 Assert(*outer_parts == NIL);
1224 Assert(*inner_parts == NIL);
1225 Assert(outer_bi->strategy == inner_bi->strategy &&
1226 outer_bi->strategy == PARTITION_STRATEGY_LIST);
1227 /* List partitioning doesn't require kinds. */
1228 Assert(!outer_bi->kind && !inner_bi->kind);
1229
1230 init_partition_map(outer_rel, &outer_map);
1231 init_partition_map(inner_rel, &inner_map);
1232
1233 /*
1234 * If the default partitions (if any) have been proven empty, deem them
1235 * non-existent.
1236 */
1237 if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
1238 outer_has_default = false;
1239 if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
1240 inner_has_default = false;
1241
1242 /*
1243 * Merge partitions from both sides. In each iteration we compare a pair
1244 * of list values, one from each side, and decide whether the
1245 * corresponding partitions match or not. If the two values match
1246 * exactly, move to the next pair of list values, otherwise move to the
1247 * next list value on the side with a smaller list value.
1248 */
1249 outer_pos = inner_pos = 0;
1250 while (outer_pos < outer_bi->ndatums || inner_pos < inner_bi->ndatums)
1251 {
1252 int outer_index = -1;
1253 int inner_index = -1;
1254 Datum *outer_datums;
1255 Datum *inner_datums;
1256 int cmpval;
1257 Datum *merged_datum = NULL;
1258 int merged_index = -1;
1259
1260 if (outer_pos < outer_bi->ndatums)
1261 {
1262 /*
1263 * If the partition on the outer side has been proven empty,
1264 * ignore it and move to the next datum on the outer side.
1265 */
1266 outer_index = outer_bi->indexes[outer_pos];
1267 if (is_dummy_partition(outer_rel, outer_index))
1268 {
1269 outer_pos++;
1270 continue;
1271 }
1272 }
1273 if (inner_pos < inner_bi->ndatums)
1274 {
1275 /*
1276 * If the partition on the inner side has been proven empty,
1277 * ignore it and move to the next datum on the inner side.
1278 */
1279 inner_index = inner_bi->indexes[inner_pos];
1280 if (is_dummy_partition(inner_rel, inner_index))
1281 {
1282 inner_pos++;
1283 continue;
1284 }
1285 }
1286
1287 /* Get the list values. */
1288 outer_datums = outer_pos < outer_bi->ndatums ?
1289 outer_bi->datums[outer_pos] : NULL;
1290 inner_datums = inner_pos < inner_bi->ndatums ?
1291 inner_bi->datums[inner_pos] : NULL;
1292
1293 /*
1294 * We run this loop till both sides finish. This allows us to avoid
1295 * duplicating code to handle the remaining values on the side which
1296 * finishes later. For that we set the comparison parameter cmpval in
1297 * such a way that it appears as if the side which finishes earlier
1298 * has an extra value higher than any other value on the unfinished
1299 * side. That way we advance the values on the unfinished side till
1300 * all of its values are exhausted.
1301 */
1302 if (outer_pos >= outer_bi->ndatums)
1303 cmpval = 1;
1304 else if (inner_pos >= inner_bi->ndatums)
1305 cmpval = -1;
1306 else
1307 {
1308 Assert(outer_datums != NULL && inner_datums != NULL);
1309 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
1310 partcollation[0],
1311 outer_datums[0],
1312 inner_datums[0]));
1313 }
1314
1315 if (cmpval == 0)
1316 {
1317 /* Two list values match exactly. */
1318 Assert(outer_pos < outer_bi->ndatums);
1319 Assert(inner_pos < inner_bi->ndatums);
1320 Assert(outer_index >= 0);
1321 Assert(inner_index >= 0);
1322
1323 /*
1324 * Try merging both partitions. If successful, add the list value
1325 * and index of the merged partition below.
1326 */
1327 merged_index = merge_matching_partitions(&outer_map, &inner_map,
1328 outer_index, inner_index,
1329 &next_index);
1330 if (merged_index == -1)
1331 goto cleanup;
1332
1333 merged_datum = outer_datums;
1334
1335 /* Move to the next pair of list values. */
1336 outer_pos++;
1337 inner_pos++;
1338 }
1339 else if (cmpval < 0)
1340 {
1341 /* A list value missing from the inner side. */
1342 Assert(outer_pos < outer_bi->ndatums);
1343
1344 /*
1345 * If the inner side has the default partition, or this is an
1346 * outer join, try to assign a merged partition to the outer
1347 * partition (see process_outer_partition()). Otherwise, the
1348 * outer partition will not contribute to the result.
1349 */
1350 if (inner_has_default || IS_OUTER_JOIN(jointype))
1351 {
1352 /* Get the outer partition. */
1353 outer_index = outer_bi->indexes[outer_pos];
1354 Assert(outer_index >= 0);
1355 merged_index = process_outer_partition(&outer_map,
1356 &inner_map,
1357 outer_has_default,
1358 inner_has_default,
1359 outer_index,
1360 inner_default,
1361 jointype,
1362 &next_index,
1363 &default_index);
1364 if (merged_index == -1)
1365 goto cleanup;
1366 merged_datum = outer_datums;
1367 }
1368
1369 /* Move to the next list value on the outer side. */
1370 outer_pos++;
1371 }
1372 else
1373 {
1374 /* A list value missing from the outer side. */
1375 Assert(cmpval > 0);
1376 Assert(inner_pos < inner_bi->ndatums);
1377
1378 /*
1379 * If the outer side has the default partition, or this is a FULL
1380 * join, try to assign a merged partition to the inner partition
1381 * (see process_inner_partition()). Otherwise, the inner
1382 * partition will not contribute to the result.
1383 */
1384 if (outer_has_default || jointype == JOIN_FULL)
1385 {
1386 /* Get the inner partition. */
1387 inner_index = inner_bi->indexes[inner_pos];
1388 Assert(inner_index >= 0);
1389 merged_index = process_inner_partition(&outer_map,
1390 &inner_map,
1391 outer_has_default,
1392 inner_has_default,
1393 inner_index,
1394 outer_default,
1395 jointype,
1396 &next_index,
1397 &default_index);
1398 if (merged_index == -1)
1399 goto cleanup;
1400 merged_datum = inner_datums;
1401 }
1402
1403 /* Move to the next list value on the inner side. */
1404 inner_pos++;
1405 }
1406
1407 /*
1408 * If we assigned a merged partition, add the list value and index of
1409 * the merged partition if appropriate.
1410 */
1411 if (merged_index >= 0 && merged_index != default_index)
1412 {
1413 merged_datums = lappend(merged_datums, merged_datum);
1414 merged_indexes = lappend_int(merged_indexes, merged_index);
1415 }
1416 }
1417
1418 /*
1419 * If the NULL partitions (if any) have been proven empty, deem them
1420 * non-existent.
1421 */
1422 if (outer_has_null &&
1423 is_dummy_partition(outer_rel, outer_bi->null_index))
1424 outer_has_null = false;
1425 if (inner_has_null &&
1426 is_dummy_partition(inner_rel, inner_bi->null_index))
1427 inner_has_null = false;
1428
1429 /* Merge the NULL partitions if any. */
1430 if (outer_has_null || inner_has_null)
1431 merge_null_partitions(&outer_map, &inner_map,
1432 outer_has_null, inner_has_null,
1433 outer_bi->null_index, inner_bi->null_index,
1434 jointype, &next_index, &null_index);
1435 else
1436 Assert(null_index == -1);
1437
1438 /* Merge the default partitions if any. */
1439 if (outer_has_default || inner_has_default)
1440 merge_default_partitions(&outer_map, &inner_map,
1441 outer_has_default, inner_has_default,
1442 outer_default, inner_default,
1443 jointype, &next_index, &default_index);
1444 else
1445 Assert(default_index == -1);
1446
1447 /* If we have merged partitions, create the partition bounds. */
1448 if (next_index > 0)
1449 {
1450 /* Fix the merged_indexes list if necessary. */
1451 if (outer_map.did_remapping || inner_map.did_remapping)
1452 {
1453 Assert(jointype == JOIN_FULL);
1454 fix_merged_indexes(&outer_map, &inner_map,
1455 next_index, merged_indexes);
1456 }
1457
1458 /* Use maps to match partitions from inputs. */
1459 generate_matching_part_pairs(outer_rel, inner_rel,
1460 &outer_map, &inner_map,
1461 next_index,
1462 outer_parts, inner_parts);
1463 Assert(*outer_parts != NIL);
1464 Assert(*inner_parts != NIL);
1465 Assert(list_length(*outer_parts) == list_length(*inner_parts));
1466 Assert(list_length(*outer_parts) <= next_index);
1467
1468 /* Make a PartitionBoundInfo struct to return. */
1469 merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
1470 merged_datums,
1471 NIL,
1472 merged_indexes,
1473 null_index,
1474 default_index);
1475 Assert(merged_bounds);
1476 }
1477
1478cleanup:
1479 /* Free local memory before returning. */
1480 list_free(merged_datums);
1481 list_free(merged_indexes);
1482 free_partition_map(&outer_map);
1483 free_partition_map(&inner_map);
1484
1485 return merged_bounds;
1486}
1487
1488/*
1489 * merge_range_bounds
1490 * Create the partition bounds for a join relation between range
1491 * partitioned tables, if possible
1492 *
1493 * In this function we try to find sets of overlapping partitions from both
1494 * sides by comparing ranges stored in their partition bounds. Since the
1495 * ranges appear in the ascending order, an algorithm similar to merge join is
1496 * used for that. If a partition on one side doesn't have an overlapping
1497 * partition on the other side, the algorithm tries to match it with the
1498 * default partition on the other side if any; if not, the algorithm tries to
1499 * match it with a dummy partition on the other side if it's on the
1500 * non-nullable side of an outer join. Also, if both sides have the default
1501 * partitions, the algorithm tries to match them with each other. We give up
1502 * if the algorithm finds a partition overlapping multiple partitions on the
1503 * other side, which is the scenario the current implementation of partitioned
1504 * join can't handle.
1505 */
1506static PartitionBoundInfo
1507merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
1508 Oid *partcollations,
1509 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1510 JoinType jointype,
1511 List **outer_parts, List **inner_parts)
1512{
1513 PartitionBoundInfo merged_bounds = NULL;
1514 PartitionBoundInfo outer_bi = outer_rel->boundinfo;
1515 PartitionBoundInfo inner_bi = inner_rel->boundinfo;
1516 bool outer_has_default = partition_bound_has_default(outer_bi);
1517 bool inner_has_default = partition_bound_has_default(inner_bi);
1518 int outer_default = outer_bi->default_index;
1519 int inner_default = inner_bi->default_index;
1520 PartitionMap outer_map;
1521 PartitionMap inner_map;
1522 int outer_index;
1523 int inner_index;
1524 int outer_lb_pos;
1525 int inner_lb_pos;
1526 PartitionRangeBound outer_lb;
1527 PartitionRangeBound outer_ub;
1528 PartitionRangeBound inner_lb;
1529 PartitionRangeBound inner_ub;
1530 int next_index = 0;
1531 int default_index = -1;
1532 List *merged_datums = NIL;
1533 List *merged_kinds = NIL;
1534 List *merged_indexes = NIL;
1535
1536 Assert(*outer_parts == NIL);
1537 Assert(*inner_parts == NIL);
1538 Assert(outer_bi->strategy == inner_bi->strategy &&
1539 outer_bi->strategy == PARTITION_STRATEGY_RANGE);
1540
1541 init_partition_map(outer_rel, &outer_map);
1542 init_partition_map(inner_rel, &inner_map);
1543
1544 /*
1545 * If the default partitions (if any) have been proven empty, deem them
1546 * non-existent.
1547 */
1548 if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
1549 outer_has_default = false;
1550 if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
1551 inner_has_default = false;
1552
1553 /*
1554 * Merge partitions from both sides. In each iteration we compare a pair
1555 * of ranges, one from each side, and decide whether the corresponding
1556 * partitions match or not. If the two ranges overlap, move to the next
1557 * pair of ranges, otherwise move to the next range on the side with a
1558 * lower range. outer_lb_pos/inner_lb_pos keep track of the positions of
1559 * lower bounds in the datums arrays in the outer/inner
1560 * PartitionBoundInfos respectively.
1561 */
1562 outer_lb_pos = inner_lb_pos = 0;
1563 outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1564 &outer_lb, &outer_ub);
1565 inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1566 &inner_lb, &inner_ub);
1567 while (outer_index >= 0 || inner_index >= 0)
1568 {
1569 bool overlap;
1570 int ub_cmpval;
1571 int lb_cmpval;
1572 PartitionRangeBound merged_lb = {-1, NULL, NULL, true};
1573 PartitionRangeBound merged_ub = {-1, NULL, NULL, false};
1574 int merged_index = -1;
1575
1576 /*
1577 * We run this loop till both sides finish. This allows us to avoid
1578 * duplicating code to handle the remaining ranges on the side which
1579 * finishes later. For that we set the comparison parameter cmpval in
1580 * such a way that it appears as if the side which finishes earlier
1581 * has an extra range higher than any other range on the unfinished
1582 * side. That way we advance the ranges on the unfinished side till
1583 * all of its ranges are exhausted.
1584 */
1585 if (outer_index == -1)
1586 {
1587 overlap = false;
1588 lb_cmpval = 1;
1589 ub_cmpval = 1;
1590 }
1591 else if (inner_index == -1)
1592 {
1593 overlap = false;
1594 lb_cmpval = -1;
1595 ub_cmpval = -1;
1596 }
1597 else
1598 overlap = compare_range_partitions(partnatts, partsupfuncs,
1599 partcollations,
1600 &outer_lb, &outer_ub,
1601 &inner_lb, &inner_ub,
1602 &lb_cmpval, &ub_cmpval);
1603
1604 if (overlap)
1605 {
1606 /* Two ranges overlap; form a join pair. */
1607
1608 PartitionRangeBound save_outer_ub;
1609 PartitionRangeBound save_inner_ub;
1610
1611 /* Both partitions should not have been merged yet. */
1612 Assert(outer_index >= 0);
1613 Assert(outer_map.merged_indexes[outer_index] == -1 &&
1614 outer_map.merged[outer_index] == false);
1615 Assert(inner_index >= 0);
1616 Assert(inner_map.merged_indexes[inner_index] == -1 &&
1617 inner_map.merged[inner_index] == false);
1618
1619 /*
1620 * Get the index of the merged partition. Both partitions aren't
1621 * merged yet, so the partitions should be merged successfully.
1622 */
1623 merged_index = merge_matching_partitions(&outer_map, &inner_map,
1624 outer_index, inner_index,
1625 &next_index);
1626 Assert(merged_index >= 0);
1627
1628 /* Get the range bounds of the merged partition. */
1629 get_merged_range_bounds(partnatts, partsupfuncs,
1630 partcollations, jointype,
1631 &outer_lb, &outer_ub,
1632 &inner_lb, &inner_ub,
1633 lb_cmpval, ub_cmpval,
1634 &merged_lb, &merged_ub);
1635
1636 /* Save the upper bounds of both partitions for use below. */
1637 save_outer_ub = outer_ub;
1638 save_inner_ub = inner_ub;
1639
1640 /* Move to the next pair of ranges. */
1641 outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1642 &outer_lb, &outer_ub);
1643 inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1644 &inner_lb, &inner_ub);
1645
1646 /*
1647 * If the range of a partition on one side overlaps the range of
1648 * the next partition on the other side, that will cause the
1649 * partition on one side to match at least two partitions on the
1650 * other side, which is the case that we currently don't support
1651 * partitioned join for; give up.
1652 */
1653 if (ub_cmpval > 0 && inner_index >= 0 &&
1654 compare_range_bounds(partnatts, partsupfuncs, partcollations,
1655 &save_outer_ub, &inner_lb) > 0)
1656 goto cleanup;
1657 if (ub_cmpval < 0 && outer_index >= 0 &&
1658 compare_range_bounds(partnatts, partsupfuncs, partcollations,
1659 &outer_lb, &save_inner_ub) < 0)
1660 goto cleanup;
1661
1662 /*
1663 * A row from a non-overlapping portion (if any) of a partition on
1664 * one side might find its join partner in the default partition
1665 * (if any) on the other side, causing the same situation as
1666 * above; give up in that case.
1667 */
1668 if ((outer_has_default && (lb_cmpval > 0 || ub_cmpval < 0)) ||
1669 (inner_has_default && (lb_cmpval < 0 || ub_cmpval > 0)))
1670 goto cleanup;
1671 }
1672 else if (ub_cmpval < 0)
1673 {
1674 /* A non-overlapping outer range. */
1675
1676 /* The outer partition should not have been merged yet. */
1677 Assert(outer_index >= 0);
1678 Assert(outer_map.merged_indexes[outer_index] == -1 &&
1679 outer_map.merged[outer_index] == false);
1680
1681 /*
1682 * If the inner side has the default partition, or this is an
1683 * outer join, try to assign a merged partition to the outer
1684 * partition (see process_outer_partition()). Otherwise, the
1685 * outer partition will not contribute to the result.
1686 */
1687 if (inner_has_default || IS_OUTER_JOIN(jointype))
1688 {
1689 merged_index = process_outer_partition(&outer_map,
1690 &inner_map,
1691 outer_has_default,
1692 inner_has_default,
1693 outer_index,
1694 inner_default,
1695 jointype,
1696 &next_index,
1697 &default_index);
1698 if (merged_index == -1)
1699 goto cleanup;
1700 merged_lb = outer_lb;
1701 merged_ub = outer_ub;
1702 }
1703
1704 /* Move to the next range on the outer side. */
1705 outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1706 &outer_lb, &outer_ub);
1707 }
1708 else
1709 {
1710 /* A non-overlapping inner range. */
1711 Assert(ub_cmpval > 0);
1712
1713 /* The inner partition should not have been merged yet. */
1714 Assert(inner_index >= 0);
1715 Assert(inner_map.merged_indexes[inner_index] == -1 &&
1716 inner_map.merged[inner_index] == false);
1717
1718 /*
1719 * If the outer side has the default partition, or this is a FULL
1720 * join, try to assign a merged partition to the inner partition
1721 * (see process_inner_partition()). Otherwise, the inner
1722 * partition will not contribute to the result.
1723 */
1724 if (outer_has_default || jointype == JOIN_FULL)
1725 {
1726 merged_index = process_inner_partition(&outer_map,
1727 &inner_map,
1728 outer_has_default,
1729 inner_has_default,
1730 inner_index,
1731 outer_default,
1732 jointype,
1733 &next_index,
1734 &default_index);
1735 if (merged_index == -1)
1736 goto cleanup;
1737 merged_lb = inner_lb;
1738 merged_ub = inner_ub;
1739 }
1740
1741 /* Move to the next range on the inner side. */
1742 inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1743 &inner_lb, &inner_ub);
1744 }
1745
1746 /*
1747 * If we assigned a merged partition, add the range bounds and index
1748 * of the merged partition if appropriate.
1749 */
1750 if (merged_index >= 0 && merged_index != default_index)
1751 add_merged_range_bounds(partnatts, partsupfuncs, partcollations,
1752 &merged_lb, &merged_ub, merged_index,
1753 &merged_datums, &merged_kinds,
1754 &merged_indexes);
1755 }
1756
1757 /* Merge the default partitions if any. */
1758 if (outer_has_default || inner_has_default)
1759 merge_default_partitions(&outer_map, &inner_map,
1760 outer_has_default, inner_has_default,
1761 outer_default, inner_default,
1762 jointype, &next_index, &default_index);
1763 else
1764 Assert(default_index == -1);
1765
1766 /* If we have merged partitions, create the partition bounds. */
1767 if (next_index > 0)
1768 {
1769 /*
1770 * Unlike the case of list partitioning, we wouldn't have re-merged
1771 * partitions, so did_remapping should be left alone.
1772 */
1773 Assert(!outer_map.did_remapping);
1774 Assert(!inner_map.did_remapping);
1775
1776 /* Use maps to match partitions from inputs. */
1777 generate_matching_part_pairs(outer_rel, inner_rel,
1778 &outer_map, &inner_map,
1779 next_index,
1780 outer_parts, inner_parts);
1781 Assert(*outer_parts != NIL);
1782 Assert(*inner_parts != NIL);
1783 Assert(list_length(*outer_parts) == list_length(*inner_parts));
1784 Assert(list_length(*outer_parts) == next_index);
1785
1786 /* Make a PartitionBoundInfo struct to return. */
1787 merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
1788 merged_datums,
1789 merged_kinds,
1790 merged_indexes,
1791 -1,
1792 default_index);
1793 Assert(merged_bounds);
1794 }
1795
1796cleanup:
1797 /* Free local memory before returning. */
1798 list_free(merged_datums);
1799 list_free(merged_kinds);
1800 list_free(merged_indexes);
1801 free_partition_map(&outer_map);
1802 free_partition_map(&inner_map);
1803
1804 return merged_bounds;
1805}
1806
1807/*
1808 * init_partition_map
1809 * Initialize a PartitionMap struct for given relation
1810 */
1811static void
1813{
1814 int nparts = rel->nparts;
1815 int i;
1816
1817 map->nparts = nparts;
1818 map->merged_indexes = (int *) palloc(sizeof(int) * nparts);
1819 map->merged = (bool *) palloc(sizeof(bool) * nparts);
1820 map->did_remapping = false;
1821 map->old_indexes = (int *) palloc(sizeof(int) * nparts);
1822 for (i = 0; i < nparts; i++)
1823 {
1824 map->merged_indexes[i] = map->old_indexes[i] = -1;
1825 map->merged[i] = false;
1826 }
1827}
1828
1829/*
1830 * free_partition_map
1831 */
1832static void
1834{
1835 pfree(map->merged_indexes);
1836 pfree(map->merged);
1837 pfree(map->old_indexes);
1838}
1839
1840/*
1841 * is_dummy_partition --- has partition been proven empty?
1842 */
1843static bool
1844is_dummy_partition(RelOptInfo *rel, int part_index)
1845{
1846 RelOptInfo *part_rel;
1847
1848 Assert(part_index >= 0);
1849 part_rel = rel->part_rels[part_index];
1850 if (part_rel == NULL || IS_DUMMY_REL(part_rel))
1851 return true;
1852 return false;
1853}
1854
1855/*
1856 * merge_matching_partitions
1857 * Try to merge given outer/inner partitions, and return the index of a
1858 * merged partition produced from them if successful, -1 otherwise
1859 *
1860 * If the merged partition is newly created, *next_index is incremented.
1861 */
1862static int
1864 int outer_index, int inner_index, int *next_index)
1865{
1866 int outer_merged_index;
1867 int inner_merged_index;
1868 bool outer_merged;
1869 bool inner_merged;
1870
1871 Assert(outer_index >= 0 && outer_index < outer_map->nparts);
1872 outer_merged_index = outer_map->merged_indexes[outer_index];
1873 outer_merged = outer_map->merged[outer_index];
1874 Assert(inner_index >= 0 && inner_index < inner_map->nparts);
1875 inner_merged_index = inner_map->merged_indexes[inner_index];
1876 inner_merged = inner_map->merged[inner_index];
1877
1878 /*
1879 * Handle cases where we have already assigned a merged partition to each
1880 * of the given partitions.
1881 */
1882 if (outer_merged_index >= 0 && inner_merged_index >= 0)
1883 {
1884 /*
1885 * If the merged partitions are the same, no need to do anything;
1886 * return the index of the merged partitions. Otherwise, if each of
1887 * the given partitions has been merged with a dummy partition on the
1888 * other side, re-map them to either of the two merged partitions.
1889 * Otherwise, they can't be merged, so return -1.
1890 */
1891 if (outer_merged_index == inner_merged_index)
1892 {
1893 Assert(outer_merged);
1894 Assert(inner_merged);
1895 return outer_merged_index;
1896 }
1897 if (!outer_merged && !inner_merged)
1898 {
1899 /*
1900 * This can only happen for a list-partitioning case. We re-map
1901 * them to the merged partition with the smaller of the two merged
1902 * indexes to preserve the property that the canonical order of
1903 * list partitions is determined by the indexes assigned to the
1904 * smallest list value of each partition.
1905 */
1906 if (outer_merged_index < inner_merged_index)
1907 {
1908 outer_map->merged[outer_index] = true;
1909 inner_map->merged_indexes[inner_index] = outer_merged_index;
1910 inner_map->merged[inner_index] = true;
1911 inner_map->did_remapping = true;
1912 inner_map->old_indexes[inner_index] = inner_merged_index;
1913 return outer_merged_index;
1914 }
1915 else
1916 {
1917 inner_map->merged[inner_index] = true;
1918 outer_map->merged_indexes[outer_index] = inner_merged_index;
1919 outer_map->merged[outer_index] = true;
1920 outer_map->did_remapping = true;
1921 outer_map->old_indexes[outer_index] = outer_merged_index;
1922 return inner_merged_index;
1923 }
1924 }
1925 return -1;
1926 }
1927
1928 /* At least one of the given partitions should not have yet been merged. */
1929 Assert(outer_merged_index == -1 || inner_merged_index == -1);
1930
1931 /*
1932 * If neither of them has been merged, merge them. Otherwise, if one has
1933 * been merged with a dummy partition on the other side (and the other
1934 * hasn't yet been merged with anything), re-merge them. Otherwise, they
1935 * can't be merged, so return -1.
1936 */
1937 if (outer_merged_index == -1 && inner_merged_index == -1)
1938 {
1939 int merged_index = *next_index;
1940
1941 Assert(!outer_merged);
1942 Assert(!inner_merged);
1943 outer_map->merged_indexes[outer_index] = merged_index;
1944 outer_map->merged[outer_index] = true;
1945 inner_map->merged_indexes[inner_index] = merged_index;
1946 inner_map->merged[inner_index] = true;
1947 *next_index = *next_index + 1;
1948 return merged_index;
1949 }
1950 if (outer_merged_index >= 0 && !outer_map->merged[outer_index])
1951 {
1952 Assert(inner_merged_index == -1);
1953 Assert(!inner_merged);
1954 inner_map->merged_indexes[inner_index] = outer_merged_index;
1955 inner_map->merged[inner_index] = true;
1956 outer_map->merged[outer_index] = true;
1957 return outer_merged_index;
1958 }
1959 if (inner_merged_index >= 0 && !inner_map->merged[inner_index])
1960 {
1961 Assert(outer_merged_index == -1);
1962 Assert(!outer_merged);
1963 outer_map->merged_indexes[outer_index] = inner_merged_index;
1964 outer_map->merged[outer_index] = true;
1965 inner_map->merged[inner_index] = true;
1966 return inner_merged_index;
1967 }
1968 return -1;
1969}
1970
1971/*
1972 * process_outer_partition
1973 * Try to assign given outer partition a merged partition, and return the
1974 * index of the merged partition if successful, -1 otherwise
1975 *
1976 * If the partition is newly created, *next_index is incremented. Also, if it
1977 * is the default partition of the join relation, *default_index is set to the
1978 * index if not already done.
1979 */
1980static int
1982 PartitionMap *inner_map,
1983 bool outer_has_default,
1984 bool inner_has_default,
1985 int outer_index,
1986 int inner_default,
1987 JoinType jointype,
1988 int *next_index,
1989 int *default_index)
1990{
1991 int merged_index = -1;
1992
1993 Assert(outer_index >= 0);
1994
1995 /*
1996 * If the inner side has the default partition, a row from the outer
1997 * partition might find its join partner in the default partition; try
1998 * merging the outer partition with the default partition. Otherwise,
1999 * this should be an outer join, in which case the outer partition has to
2000 * be scanned all the way anyway; merge the outer partition with a dummy
2001 * partition on the other side.
2002 */
2003 if (inner_has_default)
2004 {
2005 Assert(inner_default >= 0);
2006
2007 /*
2008 * If the outer side has the default partition as well, the default
2009 * partition on the inner side will have two matching partitions on
2010 * the other side: the outer partition and the default partition on
2011 * the outer side. Partitionwise join doesn't handle this scenario
2012 * yet.
2013 */
2014 if (outer_has_default)
2015 return -1;
2016
2017 merged_index = merge_matching_partitions(outer_map, inner_map,
2018 outer_index, inner_default,
2019 next_index);
2020 if (merged_index == -1)
2021 return -1;
2022
2023 /*
2024 * If this is a FULL join, the default partition on the inner side has
2025 * to be scanned all the way anyway, so the resulting partition will
2026 * contain all key values from the default partition, which any other
2027 * partition of the join relation will not contain. Thus the
2028 * resulting partition will act as the default partition of the join
2029 * relation; record the index in *default_index if not already done.
2030 */
2031 if (jointype == JOIN_FULL)
2032 {
2033 if (*default_index == -1)
2034 *default_index = merged_index;
2035 else
2036 Assert(*default_index == merged_index);
2037 }
2038 }
2039 else
2040 {
2041 Assert(IS_OUTER_JOIN(jointype));
2042 Assert(jointype != JOIN_RIGHT);
2043
2044 /* If we have already assigned a partition, no need to do anything. */
2045 merged_index = outer_map->merged_indexes[outer_index];
2046 if (merged_index == -1)
2047 merged_index = merge_partition_with_dummy(outer_map, outer_index,
2048 next_index);
2049 }
2050 return merged_index;
2051}
2052
2053/*
2054 * process_inner_partition
2055 * Try to assign given inner partition a merged partition, and return the
2056 * index of the merged partition if successful, -1 otherwise
2057 *
2058 * If the partition is newly created, *next_index is incremented. Also, if it
2059 * is the default partition of the join relation, *default_index is set to the
2060 * index if not already done.
2061 */
2062static int
2064 PartitionMap *inner_map,
2065 bool outer_has_default,
2066 bool inner_has_default,
2067 int inner_index,
2068 int outer_default,
2069 JoinType jointype,
2070 int *next_index,
2071 int *default_index)
2072{
2073 int merged_index = -1;
2074
2075 Assert(inner_index >= 0);
2076
2077 /*
2078 * If the outer side has the default partition, a row from the inner
2079 * partition might find its join partner in the default partition; try
2080 * merging the inner partition with the default partition. Otherwise,
2081 * this should be a FULL join, in which case the inner partition has to be
2082 * scanned all the way anyway; merge the inner partition with a dummy
2083 * partition on the other side.
2084 */
2085 if (outer_has_default)
2086 {
2087 Assert(outer_default >= 0);
2088
2089 /*
2090 * If the inner side has the default partition as well, the default
2091 * partition on the outer side will have two matching partitions on
2092 * the other side: the inner partition and the default partition on
2093 * the inner side. Partitionwise join doesn't handle this scenario
2094 * yet.
2095 */
2096 if (inner_has_default)
2097 return -1;
2098
2099 merged_index = merge_matching_partitions(outer_map, inner_map,
2100 outer_default, inner_index,
2101 next_index);
2102 if (merged_index == -1)
2103 return -1;
2104
2105 /*
2106 * If this is an outer join, the default partition on the outer side
2107 * has to be scanned all the way anyway, so the resulting partition
2108 * will contain all key values from the default partition, which any
2109 * other partition of the join relation will not contain. Thus the
2110 * resulting partition will act as the default partition of the join
2111 * relation; record the index in *default_index if not already done.
2112 */
2113 if (IS_OUTER_JOIN(jointype))
2114 {
2115 Assert(jointype != JOIN_RIGHT);
2116 if (*default_index == -1)
2117 *default_index = merged_index;
2118 else
2119 Assert(*default_index == merged_index);
2120 }
2121 }
2122 else
2123 {
2124 Assert(jointype == JOIN_FULL);
2125
2126 /* If we have already assigned a partition, no need to do anything. */
2127 merged_index = inner_map->merged_indexes[inner_index];
2128 if (merged_index == -1)
2129 merged_index = merge_partition_with_dummy(inner_map, inner_index,
2130 next_index);
2131 }
2132 return merged_index;
2133}
2134
2135/*
2136 * merge_null_partitions
2137 * Merge the NULL partitions from a join's outer and inner sides.
2138 *
2139 * If the merged partition produced from them is the NULL partition of the join
2140 * relation, *null_index is set to the index of the merged partition.
2141 *
2142 * Note: We assume here that the join clause for a partitioned join is strict
2143 * because have_partkey_equi_join() requires that the corresponding operator
2144 * be mergejoinable, and we currently assume that mergejoinable operators are
2145 * strict (see MJEvalOuterValues()/MJEvalInnerValues()).
2146 */
2147static void
2149 PartitionMap *inner_map,
2150 bool outer_has_null,
2151 bool inner_has_null,
2152 int outer_null,
2153 int inner_null,
2154 JoinType jointype,
2155 int *next_index,
2156 int *null_index)
2157{
2158 bool consider_outer_null = false;
2159 bool consider_inner_null = false;
2160
2161 Assert(outer_has_null || inner_has_null);
2162 Assert(*null_index == -1);
2163
2164 /*
2165 * Check whether the NULL partitions have already been merged and if so,
2166 * set the consider_outer_null/consider_inner_null flags.
2167 */
2168 if (outer_has_null)
2169 {
2170 Assert(outer_null >= 0 && outer_null < outer_map->nparts);
2171 if (outer_map->merged_indexes[outer_null] == -1)
2172 consider_outer_null = true;
2173 }
2174 if (inner_has_null)
2175 {
2176 Assert(inner_null >= 0 && inner_null < inner_map->nparts);
2177 if (inner_map->merged_indexes[inner_null] == -1)
2178 consider_inner_null = true;
2179 }
2180
2181 /* If both flags are set false, we don't need to do anything. */
2182 if (!consider_outer_null && !consider_inner_null)
2183 return;
2184
2185 if (consider_outer_null && !consider_inner_null)
2186 {
2187 Assert(outer_has_null);
2188
2189 /*
2190 * If this is an outer join, the NULL partition on the outer side has
2191 * to be scanned all the way anyway; merge the NULL partition with a
2192 * dummy partition on the other side. In that case
2193 * consider_outer_null means that the NULL partition only contains
2194 * NULL values as the key values, so the merged partition will do so;
2195 * treat it as the NULL partition of the join relation.
2196 */
2197 if (IS_OUTER_JOIN(jointype))
2198 {
2199 Assert(jointype != JOIN_RIGHT);
2200 *null_index = merge_partition_with_dummy(outer_map, outer_null,
2201 next_index);
2202 }
2203 }
2204 else if (!consider_outer_null && consider_inner_null)
2205 {
2206 Assert(inner_has_null);
2207
2208 /*
2209 * If this is a FULL join, the NULL partition on the inner side has to
2210 * be scanned all the way anyway; merge the NULL partition with a
2211 * dummy partition on the other side. In that case
2212 * consider_inner_null means that the NULL partition only contains
2213 * NULL values as the key values, so the merged partition will do so;
2214 * treat it as the NULL partition of the join relation.
2215 */
2216 if (jointype == JOIN_FULL)
2217 *null_index = merge_partition_with_dummy(inner_map, inner_null,
2218 next_index);
2219 }
2220 else
2221 {
2222 Assert(consider_outer_null && consider_inner_null);
2223 Assert(outer_has_null);
2224 Assert(inner_has_null);
2225
2226 /*
2227 * If this is an outer join, the NULL partition on the outer side (and
2228 * that on the inner side if this is a FULL join) have to be scanned
2229 * all the way anyway, so merge them. Note that each of the NULL
2230 * partitions isn't merged yet, so they should be merged successfully.
2231 * Like the above, each of the NULL partitions only contains NULL
2232 * values as the key values, so the merged partition will do so; treat
2233 * it as the NULL partition of the join relation.
2234 *
2235 * Note: if this an INNER/SEMI join, the join clause will never be
2236 * satisfied by two NULL values (see comments above), so both the NULL
2237 * partitions can be eliminated.
2238 */
2239 if (IS_OUTER_JOIN(jointype))
2240 {
2241 Assert(jointype != JOIN_RIGHT);
2242 *null_index = merge_matching_partitions(outer_map, inner_map,
2243 outer_null, inner_null,
2244 next_index);
2245 Assert(*null_index >= 0);
2246 }
2247 }
2248}
2249
2250/*
2251 * merge_default_partitions
2252 * Merge the default partitions from a join's outer and inner sides.
2253 *
2254 * If the merged partition produced from them is the default partition of the
2255 * join relation, *default_index is set to the index of the merged partition.
2256 */
2257static void
2259 PartitionMap *inner_map,
2260 bool outer_has_default,
2261 bool inner_has_default,
2262 int outer_default,
2263 int inner_default,
2264 JoinType jointype,
2265 int *next_index,
2266 int *default_index)
2267{
2268 int outer_merged_index = -1;
2269 int inner_merged_index = -1;
2270
2271 Assert(outer_has_default || inner_has_default);
2272
2273 /* Get the merged partition indexes for the default partitions. */
2274 if (outer_has_default)
2275 {
2276 Assert(outer_default >= 0 && outer_default < outer_map->nparts);
2277 outer_merged_index = outer_map->merged_indexes[outer_default];
2278 }
2279 if (inner_has_default)
2280 {
2281 Assert(inner_default >= 0 && inner_default < inner_map->nparts);
2282 inner_merged_index = inner_map->merged_indexes[inner_default];
2283 }
2284
2285 if (outer_has_default && !inner_has_default)
2286 {
2287 /*
2288 * If this is an outer join, the default partition on the outer side
2289 * has to be scanned all the way anyway; if we have not yet assigned a
2290 * partition, merge the default partition with a dummy partition on
2291 * the other side. The merged partition will act as the default
2292 * partition of the join relation (see comments in
2293 * process_inner_partition()).
2294 */
2295 if (IS_OUTER_JOIN(jointype))
2296 {
2297 Assert(jointype != JOIN_RIGHT);
2298 if (outer_merged_index == -1)
2299 {
2300 Assert(*default_index == -1);
2301 *default_index = merge_partition_with_dummy(outer_map,
2302 outer_default,
2303 next_index);
2304 }
2305 else
2306 Assert(*default_index == outer_merged_index);
2307 }
2308 else
2309 Assert(*default_index == -1);
2310 }
2311 else if (!outer_has_default && inner_has_default)
2312 {
2313 /*
2314 * If this is a FULL join, the default partition on the inner side has
2315 * to be scanned all the way anyway; if we have not yet assigned a
2316 * partition, merge the default partition with a dummy partition on
2317 * the other side. The merged partition will act as the default
2318 * partition of the join relation (see comments in
2319 * process_outer_partition()).
2320 */
2321 if (jointype == JOIN_FULL)
2322 {
2323 if (inner_merged_index == -1)
2324 {
2325 Assert(*default_index == -1);
2326 *default_index = merge_partition_with_dummy(inner_map,
2327 inner_default,
2328 next_index);
2329 }
2330 else
2331 Assert(*default_index == inner_merged_index);
2332 }
2333 else
2334 Assert(*default_index == -1);
2335 }
2336 else
2337 {
2338 Assert(outer_has_default && inner_has_default);
2339
2340 /*
2341 * The default partitions have to be joined with each other, so merge
2342 * them. Note that each of the default partitions isn't merged yet
2343 * (see, process_outer_partition()/process_inner_partition()), so they
2344 * should be merged successfully. The merged partition will act as
2345 * the default partition of the join relation.
2346 */
2347 Assert(outer_merged_index == -1);
2348 Assert(inner_merged_index == -1);
2349 Assert(*default_index == -1);
2350 *default_index = merge_matching_partitions(outer_map,
2351 inner_map,
2352 outer_default,
2353 inner_default,
2354 next_index);
2355 Assert(*default_index >= 0);
2356 }
2357}
2358
2359/*
2360 * merge_partition_with_dummy
2361 * Assign given partition a new partition of a join relation
2362 *
2363 * Note: The caller assumes that the given partition doesn't have a non-dummy
2364 * matching partition on the other side, but if the given partition finds the
2365 * matching partition later, we will adjust the assignment.
2366 */
2367static int
2369{
2370 int merged_index = *next_index;
2371
2372 Assert(index >= 0 && index < map->nparts);
2373 Assert(map->merged_indexes[index] == -1);
2374 Assert(!map->merged[index]);
2375 map->merged_indexes[index] = merged_index;
2376 /* Leave the merged flag alone! */
2377 *next_index = *next_index + 1;
2378 return merged_index;
2379}
2380
2381/*
2382 * fix_merged_indexes
2383 * Adjust merged indexes of re-merged partitions
2384 */
2385static void
2387 int nmerged, List *merged_indexes)
2388{
2389 int *new_indexes;
2390 int merged_index;
2391 int i;
2392 ListCell *lc;
2393
2394 Assert(nmerged > 0);
2395
2396 new_indexes = (int *) palloc(sizeof(int) * nmerged);
2397 for (i = 0; i < nmerged; i++)
2398 new_indexes[i] = -1;
2399
2400 /* Build the mapping of old merged indexes to new merged indexes. */
2401 if (outer_map->did_remapping)
2402 {
2403 for (i = 0; i < outer_map->nparts; i++)
2404 {
2405 merged_index = outer_map->old_indexes[i];
2406 if (merged_index >= 0)
2407 new_indexes[merged_index] = outer_map->merged_indexes[i];
2408 }
2409 }
2410 if (inner_map->did_remapping)
2411 {
2412 for (i = 0; i < inner_map->nparts; i++)
2413 {
2414 merged_index = inner_map->old_indexes[i];
2415 if (merged_index >= 0)
2416 new_indexes[merged_index] = inner_map->merged_indexes[i];
2417 }
2418 }
2419
2420 /* Fix the merged_indexes list using the mapping. */
2421 foreach(lc, merged_indexes)
2422 {
2423 merged_index = lfirst_int(lc);
2424 Assert(merged_index >= 0);
2425 if (new_indexes[merged_index] >= 0)
2426 lfirst_int(lc) = new_indexes[merged_index];
2427 }
2428
2429 pfree(new_indexes);
2430}
2431
2432/*
2433 * generate_matching_part_pairs
2434 * Generate a pair of lists of partitions that produce merged partitions
2435 *
2436 * The lists of partitions are built in the order of merged partition indexes,
2437 * and returned in *outer_parts and *inner_parts.
2438 */
2439static void
2441 PartitionMap *outer_map, PartitionMap *inner_map,
2442 int nmerged,
2443 List **outer_parts, List **inner_parts)
2444{
2445 int outer_nparts = outer_map->nparts;
2446 int inner_nparts = inner_map->nparts;
2447 int *outer_indexes;
2448 int *inner_indexes;
2449 int max_nparts;
2450 int i;
2451
2452 Assert(nmerged > 0);
2453 Assert(*outer_parts == NIL);
2454 Assert(*inner_parts == NIL);
2455
2456 outer_indexes = (int *) palloc(sizeof(int) * nmerged);
2457 inner_indexes = (int *) palloc(sizeof(int) * nmerged);
2458 for (i = 0; i < nmerged; i++)
2459 outer_indexes[i] = inner_indexes[i] = -1;
2460
2461 /* Set pairs of matching partitions. */
2462 Assert(outer_nparts == outer_rel->nparts);
2463 Assert(inner_nparts == inner_rel->nparts);
2464 max_nparts = Max(outer_nparts, inner_nparts);
2465 for (i = 0; i < max_nparts; i++)
2466 {
2467 if (i < outer_nparts)
2468 {
2469 int merged_index = outer_map->merged_indexes[i];
2470
2471 if (merged_index >= 0)
2472 {
2473 Assert(merged_index < nmerged);
2474 outer_indexes[merged_index] = i;
2475 }
2476 }
2477 if (i < inner_nparts)
2478 {
2479 int merged_index = inner_map->merged_indexes[i];
2480
2481 if (merged_index >= 0)
2482 {
2483 Assert(merged_index < nmerged);
2484 inner_indexes[merged_index] = i;
2485 }
2486 }
2487 }
2488
2489 /* Build the list pairs. */
2490 for (i = 0; i < nmerged; i++)
2491 {
2492 int outer_index = outer_indexes[i];
2493 int inner_index = inner_indexes[i];
2494
2495 /*
2496 * If both partitions are dummy, it means the merged partition that
2497 * had been assigned to the outer/inner partition was removed when
2498 * re-merging the outer/inner partition in
2499 * merge_matching_partitions(); ignore the merged partition.
2500 */
2501 if (outer_index == -1 && inner_index == -1)
2502 continue;
2503
2504 *outer_parts = lappend(*outer_parts, outer_index >= 0 ?
2505 outer_rel->part_rels[outer_index] : NULL);
2506 *inner_parts = lappend(*inner_parts, inner_index >= 0 ?
2507 inner_rel->part_rels[inner_index] : NULL);
2508 }
2509
2510 pfree(outer_indexes);
2511 pfree(inner_indexes);
2512}
2513
2514/*
2515 * build_merged_partition_bounds
2516 * Create a PartitionBoundInfo struct from merged partition bounds
2517 */
2518static PartitionBoundInfo
2519build_merged_partition_bounds(char strategy, List *merged_datums,
2520 List *merged_kinds, List *merged_indexes,
2521 int null_index, int default_index)
2522{
2523 PartitionBoundInfo merged_bounds;
2524 int ndatums = list_length(merged_datums);
2525 int pos;
2526 ListCell *lc;
2527
2528 merged_bounds = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
2529 merged_bounds->strategy = strategy;
2530 merged_bounds->ndatums = ndatums;
2531
2532 merged_bounds->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
2533 pos = 0;
2534 foreach(lc, merged_datums)
2535 merged_bounds->datums[pos++] = (Datum *) lfirst(lc);
2536
2537 if (strategy == PARTITION_STRATEGY_RANGE)
2538 {
2539 Assert(list_length(merged_kinds) == ndatums);
2540 merged_bounds->kind = (PartitionRangeDatumKind **)
2541 palloc(sizeof(PartitionRangeDatumKind *) * ndatums);
2542 pos = 0;
2543 foreach(lc, merged_kinds)
2544 merged_bounds->kind[pos++] = (PartitionRangeDatumKind *) lfirst(lc);
2545
2546 /* There are ndatums+1 indexes in the case of range partitioning. */
2547 merged_indexes = lappend_int(merged_indexes, -1);
2548 ndatums++;
2549 }
2550 else
2551 {
2552 Assert(strategy == PARTITION_STRATEGY_LIST);
2553 Assert(merged_kinds == NIL);
2554 merged_bounds->kind = NULL;
2555 }
2556
2557 /* interleaved_parts is always NULL for join relations. */
2558 merged_bounds->interleaved_parts = NULL;
2559
2560 Assert(list_length(merged_indexes) == ndatums);
2561 merged_bounds->nindexes = ndatums;
2562 merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums);
2563 pos = 0;
2564 foreach(lc, merged_indexes)
2565 merged_bounds->indexes[pos++] = lfirst_int(lc);
2566
2567 merged_bounds->null_index = null_index;
2568 merged_bounds->default_index = default_index;
2569
2570 return merged_bounds;
2571}
2572
2573/*
2574 * get_range_partition
2575 * Get the next non-dummy partition of a range-partitioned relation,
2576 * returning the index of that partition
2577 *
2578 * *lb and *ub are set to the lower and upper bounds of that partition
2579 * respectively, and *lb_pos is advanced to the next lower bound, if any.
2580 */
2581static int
2584 int *lb_pos,
2587{
2588 int part_index;
2589
2591
2592 do
2593 {
2594 part_index = get_range_partition_internal(bi, lb_pos, lb, ub);
2595 if (part_index == -1)
2596 return -1;
2597 } while (is_dummy_partition(rel, part_index));
2598
2599 return part_index;
2600}
2601
2602static int
2604 int *lb_pos,
2607{
2608 /* Return the index as -1 if we've exhausted all lower bounds. */
2609 if (*lb_pos >= bi->ndatums)
2610 return -1;
2611
2612 /* A lower bound should have at least one more bound after it. */
2613 Assert(*lb_pos + 1 < bi->ndatums);
2614
2615 /* Set the lower bound. */
2616 lb->index = bi->indexes[*lb_pos];
2617 lb->datums = bi->datums[*lb_pos];
2618 lb->kind = bi->kind[*lb_pos];
2619 lb->lower = true;
2620 /* Set the upper bound. */
2621 ub->index = bi->indexes[*lb_pos + 1];
2622 ub->datums = bi->datums[*lb_pos + 1];
2623 ub->kind = bi->kind[*lb_pos + 1];
2624 ub->lower = false;
2625
2626 /* The index assigned to an upper bound should be valid. */
2627 Assert(ub->index >= 0);
2628
2629 /*
2630 * Advance the position to the next lower bound. If there are no bounds
2631 * left beyond the upper bound, we have reached the last lower bound.
2632 */
2633 if (*lb_pos + 2 >= bi->ndatums)
2634 *lb_pos = bi->ndatums;
2635 else
2636 {
2637 /*
2638 * If the index assigned to the bound next to the upper bound isn't
2639 * valid, that is the next lower bound; else, the upper bound is also
2640 * the lower bound of the next range partition.
2641 */
2642 if (bi->indexes[*lb_pos + 2] < 0)
2643 *lb_pos = *lb_pos + 2;
2644 else
2645 *lb_pos = *lb_pos + 1;
2646 }
2647
2648 return ub->index;
2649}
2650
2651/*
2652 * compare_range_partitions
2653 * Compare the bounds of two range partitions, and return true if the
2654 * two partitions overlap, false otherwise
2655 *
2656 * *lb_cmpval is set to -1, 0, or 1 if the outer partition's lower bound is
2657 * lower than, equal to, or higher than the inner partition's lower bound
2658 * respectively. Likewise, *ub_cmpval is set to -1, 0, or 1 if the outer
2659 * partition's upper bound is lower than, equal to, or higher than the inner
2660 * partition's upper bound respectively.
2661 */
2662static bool
2663compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
2664 Oid *partcollations,
2665 PartitionRangeBound *outer_lb,
2666 PartitionRangeBound *outer_ub,
2667 PartitionRangeBound *inner_lb,
2668 PartitionRangeBound *inner_ub,
2669 int *lb_cmpval, int *ub_cmpval)
2670{
2671 /*
2672 * Check if the outer partition's upper bound is lower than the inner
2673 * partition's lower bound; if so the partitions aren't overlapping.
2674 */
2675 if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
2676 outer_ub, inner_lb) < 0)
2677 {
2678 *lb_cmpval = -1;
2679 *ub_cmpval = -1;
2680 return false;
2681 }
2682
2683 /*
2684 * Check if the outer partition's lower bound is higher than the inner
2685 * partition's upper bound; if so the partitions aren't overlapping.
2686 */
2687 if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
2688 outer_lb, inner_ub) > 0)
2689 {
2690 *lb_cmpval = 1;
2691 *ub_cmpval = 1;
2692 return false;
2693 }
2694
2695 /* All other cases indicate overlapping partitions. */
2696 *lb_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
2697 outer_lb, inner_lb);
2698 *ub_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
2699 outer_ub, inner_ub);
2700 return true;
2701}
2702
2703/*
2704 * get_merged_range_bounds
2705 * Given the bounds of range partitions to be joined, determine the bounds
2706 * of a merged partition produced from the range partitions
2707 *
2708 * *merged_lb and *merged_ub are set to the lower and upper bounds of the
2709 * merged partition.
2710 */
2711static void
2712get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
2713 Oid *partcollations, JoinType jointype,
2714 PartitionRangeBound *outer_lb,
2715 PartitionRangeBound *outer_ub,
2716 PartitionRangeBound *inner_lb,
2717 PartitionRangeBound *inner_ub,
2718 int lb_cmpval, int ub_cmpval,
2719 PartitionRangeBound *merged_lb,
2720 PartitionRangeBound *merged_ub)
2721{
2722 Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
2723 outer_lb, inner_lb) == lb_cmpval);
2724 Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
2725 outer_ub, inner_ub) == ub_cmpval);
2726
2727 switch (jointype)
2728 {
2729 case JOIN_INNER:
2730 case JOIN_SEMI:
2731
2732 /*
2733 * An INNER/SEMI join will have the rows that fit both sides, so
2734 * the lower bound of the merged partition will be the higher of
2735 * the two lower bounds, and the upper bound of the merged
2736 * partition will be the lower of the two upper bounds.
2737 */
2738 *merged_lb = (lb_cmpval > 0) ? *outer_lb : *inner_lb;
2739 *merged_ub = (ub_cmpval < 0) ? *outer_ub : *inner_ub;
2740 break;
2741
2742 case JOIN_LEFT:
2743 case JOIN_ANTI:
2744
2745 /*
2746 * A LEFT/ANTI join will have all the rows from the outer side, so
2747 * the bounds of the merged partition will be the same as the
2748 * outer bounds.
2749 */
2750 *merged_lb = *outer_lb;
2751 *merged_ub = *outer_ub;
2752 break;
2753
2754 case JOIN_FULL:
2755
2756 /*
2757 * A FULL join will have all the rows from both sides, so the
2758 * lower bound of the merged partition will be the lower of the
2759 * two lower bounds, and the upper bound of the merged partition
2760 * will be the higher of the two upper bounds.
2761 */
2762 *merged_lb = (lb_cmpval < 0) ? *outer_lb : *inner_lb;
2763 *merged_ub = (ub_cmpval > 0) ? *outer_ub : *inner_ub;
2764 break;
2765
2766 default:
2767 elog(ERROR, "unrecognized join type: %d", (int) jointype);
2768 }
2769}
2770
2771/*
2772 * add_merged_range_bounds
2773 * Add the bounds of a merged partition to the lists of range bounds
2774 */
2775static void
2776add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
2777 Oid *partcollations,
2778 PartitionRangeBound *merged_lb,
2779 PartitionRangeBound *merged_ub,
2780 int merged_index,
2781 List **merged_datums,
2782 List **merged_kinds,
2783 List **merged_indexes)
2784{
2785 int cmpval;
2786
2787 if (!*merged_datums)
2788 {
2789 /* First merged partition */
2790 Assert(!*merged_kinds);
2791 Assert(!*merged_indexes);
2792 cmpval = 1;
2793 }
2794 else
2795 {
2796 PartitionRangeBound prev_ub;
2797
2798 Assert(*merged_datums);
2799 Assert(*merged_kinds);
2800 Assert(*merged_indexes);
2801
2802 /* Get the last upper bound. */
2803 prev_ub.index = llast_int(*merged_indexes);
2804 prev_ub.datums = (Datum *) llast(*merged_datums);
2805 prev_ub.kind = (PartitionRangeDatumKind *) llast(*merged_kinds);
2806 prev_ub.lower = false;
2807
2808 /*
2809 * We pass lower1 = false to partition_rbound_cmp() to prevent it from
2810 * considering the last upper bound to be smaller than the lower bound
2811 * of the merged partition when the values of the two range bounds
2812 * compare equal.
2813 */
2814 cmpval = partition_rbound_cmp(partnatts, partsupfuncs, partcollations,
2815 merged_lb->datums, merged_lb->kind,
2816 false, &prev_ub);
2817 Assert(cmpval >= 0);
2818 }
2819
2820 /*
2821 * If the lower bound is higher than the last upper bound, add the lower
2822 * bound with the index as -1 indicating that that is a lower bound; else,
2823 * the last upper bound will be reused as the lower bound of the merged
2824 * partition, so skip this.
2825 */
2826 if (cmpval > 0)
2827 {
2828 *merged_datums = lappend(*merged_datums, merged_lb->datums);
2829 *merged_kinds = lappend(*merged_kinds, merged_lb->kind);
2830 *merged_indexes = lappend_int(*merged_indexes, -1);
2831 }
2832
2833 /* Add the upper bound and index of the merged partition. */
2834 *merged_datums = lappend(*merged_datums, merged_ub->datums);
2835 *merged_kinds = lappend(*merged_kinds, merged_ub->kind);
2836 *merged_indexes = lappend_int(*merged_indexes, merged_index);
2837}
2838
2839/*
2840 * partitions_are_ordered
2841 * Determine whether the partitions described by 'boundinfo' are ordered,
2842 * that is partitions appearing earlier in the PartitionDesc sequence
2843 * contain partition keys strictly less than those appearing later.
2844 * Also, if NULL values are possible, they must come in the last
2845 * partition defined in the PartitionDesc. 'live_parts' marks which
2846 * partitions we should include when checking the ordering. Partitions
2847 * that do not appear in 'live_parts' are ignored.
2848 *
2849 * If out of order, or there is insufficient info to know the order,
2850 * then we return false.
2851 */
2852bool
2854{
2855 Assert(boundinfo != NULL);
2856
2857 switch (boundinfo->strategy)
2858 {
2860
2861 /*
2862 * RANGE-type partitioning guarantees that the partitions can be
2863 * scanned in the order that they're defined in the PartitionDesc
2864 * to provide sequential, non-overlapping ranges of tuples.
2865 * However, if a DEFAULT partition exists and it's contained
2866 * within live_parts, then the partitions are not ordered.
2867 */
2868 if (!partition_bound_has_default(boundinfo) ||
2869 !bms_is_member(boundinfo->default_index, live_parts))
2870 return true;
2871 break;
2872
2874
2875 /*
2876 * LIST partitioned are ordered providing none of live_parts
2877 * overlap with the partitioned table's interleaved partitions.
2878 */
2879 if (!bms_overlap(live_parts, boundinfo->interleaved_parts))
2880 return true;
2881
2882 break;
2884 break;
2885 }
2886
2887 return false;
2888}
2889
2890/*
2891 * check_new_partition_bound
2892 *
2893 * Checks if the new partition's bound overlaps any of the existing partitions
2894 * of parent. Also performs additional checks as necessary per strategy.
2895 */
2896void
2898 PartitionBoundSpec *spec, ParseState *pstate)
2899{
2901 PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
2902 PartitionBoundInfo boundinfo = partdesc->boundinfo;
2903 int with = -1;
2904 bool overlap = false;
2905 int overlap_location = -1;
2906
2907 if (spec->is_default)
2908 {
2909 /*
2910 * The default partition bound never conflicts with any other
2911 * partition's; if that's what we're attaching, the only possible
2912 * problem is that one already exists, so check for that and we're
2913 * done.
2914 */
2915 if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
2916 return;
2917
2918 /* Default partition already exists, error out. */
2919 ereport(ERROR,
2920 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2921 errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
2922 relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
2923 parser_errposition(pstate, spec->location)));
2924 }
2925
2926 switch (key->strategy)
2927 {
2929 {
2931 Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
2932
2933 if (partdesc->nparts > 0)
2934 {
2935 int greatest_modulus;
2936 int remainder;
2937 int offset;
2938
2939 /*
2940 * Check rule that every modulus must be a factor of the
2941 * next larger modulus. (For example, if you have a bunch
2942 * of partitions that all have modulus 5, you can add a
2943 * new partition with modulus 10 or a new partition with
2944 * modulus 15, but you cannot add both a partition with
2945 * modulus 10 and a partition with modulus 15, because 10
2946 * is not a factor of 15.) We need only check the next
2947 * smaller and next larger existing moduli, relying on
2948 * previous enforcement of this rule to be sure that the
2949 * rest are in line.
2950 */
2951
2952 /*
2953 * Get the greatest (modulus, remainder) pair contained in
2954 * boundinfo->datums that is less than or equal to the
2955 * (spec->modulus, spec->remainder) pair.
2956 */
2957 offset = partition_hash_bsearch(boundinfo,
2958 spec->modulus,
2959 spec->remainder);
2960 if (offset < 0)
2961 {
2962 int next_modulus;
2963
2964 /*
2965 * All existing moduli are greater or equal, so the
2966 * new one must be a factor of the smallest one, which
2967 * is first in the boundinfo.
2968 */
2969 next_modulus = DatumGetInt32(boundinfo->datums[0][0]);
2970 if (next_modulus % spec->modulus != 0)
2971 ereport(ERROR,
2972 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2973 errmsg("every hash partition modulus must be a factor of the next larger modulus"),
2974 errdetail("The new modulus %d is not a factor of %d, the modulus of existing partition \"%s\".",
2975 spec->modulus, next_modulus,
2976 get_rel_name(partdesc->oids[0]))));
2977 }
2978 else
2979 {
2980 int prev_modulus;
2981
2982 /*
2983 * We found the largest (modulus, remainder) pair less
2984 * than or equal to the new one. That modulus must be
2985 * a divisor of, or equal to, the new modulus.
2986 */
2987 prev_modulus = DatumGetInt32(boundinfo->datums[offset][0]);
2988
2989 if (spec->modulus % prev_modulus != 0)
2990 ereport(ERROR,
2991 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2992 errmsg("every hash partition modulus must be a factor of the next larger modulus"),
2993 errdetail("The new modulus %d is not divisible by %d, the modulus of existing partition \"%s\".",
2994 spec->modulus,
2995 prev_modulus,
2996 get_rel_name(partdesc->oids[offset]))));
2997
2998 if (offset + 1 < boundinfo->ndatums)
2999 {
3000 int next_modulus;
3001
3002 /*
3003 * Look at the next higher (modulus, remainder)
3004 * pair. That could have the same modulus and a
3005 * larger remainder than the new pair, in which
3006 * case we're good. If it has a larger modulus,
3007 * the new modulus must divide that one.
3008 */
3009 next_modulus = DatumGetInt32(boundinfo->datums[offset + 1][0]);
3010
3011 if (next_modulus % spec->modulus != 0)
3012 ereport(ERROR,
3013 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
3014 errmsg("every hash partition modulus must be a factor of the next larger modulus"),
3015 errdetail("The new modulus %d is not a factor of %d, the modulus of existing partition \"%s\".",
3016 spec->modulus, next_modulus,
3017 get_rel_name(partdesc->oids[offset + 1]))));
3018 }
3019 }
3020
3021 greatest_modulus = boundinfo->nindexes;
3022 remainder = spec->remainder;
3023
3024 /*
3025 * Normally, the lowest remainder that could conflict with
3026 * the new partition is equal to the remainder specified
3027 * for the new partition, but when the new partition has a
3028 * modulus higher than any used so far, we need to adjust.
3029 */
3030 if (remainder >= greatest_modulus)
3031 remainder = remainder % greatest_modulus;
3032
3033 /* Check every potentially-conflicting remainder. */
3034 do
3035 {
3036 if (boundinfo->indexes[remainder] != -1)
3037 {
3038 overlap = true;
3039 overlap_location = spec->location;
3040 with = boundinfo->indexes[remainder];
3041 break;
3042 }
3043 remainder += spec->modulus;
3044 } while (remainder < greatest_modulus);
3045 }
3046
3047 break;
3048 }
3049
3051 {
3053
3054 if (partdesc->nparts > 0)
3055 {
3056 ListCell *cell;
3057
3058 Assert(boundinfo &&
3059 boundinfo->strategy == PARTITION_STRATEGY_LIST &&
3060 (boundinfo->ndatums > 0 ||
3061 partition_bound_accepts_nulls(boundinfo) ||
3062 partition_bound_has_default(boundinfo)));
3063
3064 foreach(cell, spec->listdatums)
3065 {
3066 Const *val = lfirst_node(Const, cell);
3067
3068 overlap_location = val->location;
3069 if (!val->constisnull)
3070 {
3071 int offset;
3072 bool equal;
3073
3074 offset = partition_list_bsearch(&key->partsupfunc[0],
3075 key->partcollation,
3076 boundinfo,
3077 val->constvalue,
3078 &equal);
3079 if (offset >= 0 && equal)
3080 {
3081 overlap = true;
3082 with = boundinfo->indexes[offset];
3083 break;
3084 }
3085 }
3086 else if (partition_bound_accepts_nulls(boundinfo))
3087 {
3088 overlap = true;
3089 with = boundinfo->null_index;
3090 break;
3091 }
3092 }
3093 }
3094
3095 break;
3096 }
3097
3099 {
3101 *upper;
3102 int cmpval;
3103
3105 lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
3106 upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
3107
3108 /*
3109 * First check if the resulting range would be empty with
3110 * specified lower and upper bounds. partition_rbound_cmp
3111 * cannot return zero here, since the lower-bound flags are
3112 * different.
3113 */
3114 cmpval = partition_rbound_cmp(key->partnatts,
3115 key->partsupfunc,
3116 key->partcollation,
3117 lower->datums, lower->kind,
3118 true, upper);
3119 Assert(cmpval != 0);
3120 if (cmpval > 0)
3121 {
3122 /* Point to problematic key in the lower datums list. */
3124 cmpval - 1);
3125
3126 ereport(ERROR,
3127 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
3128 errmsg("empty range bound specified for partition \"%s\"",
3129 relname),
3130 errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
3133 parser_errposition(pstate, datum->location)));
3134 }
3135
3136 if (partdesc->nparts > 0)
3137 {
3138 int offset;
3139
3140 Assert(boundinfo &&
3141 boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
3142 (boundinfo->ndatums > 0 ||
3143 partition_bound_has_default(boundinfo)));
3144
3145 /*
3146 * Test whether the new lower bound (which is treated
3147 * inclusively as part of the new partition) lies inside
3148 * an existing partition, or in a gap.
3149 *
3150 * If it's inside an existing partition, the bound at
3151 * offset + 1 will be the upper bound of that partition,
3152 * and its index will be >= 0.
3153 *
3154 * If it's in a gap, the bound at offset + 1 will be the
3155 * lower bound of the next partition, and its index will
3156 * be -1. This is also true if there is no next partition,
3157 * since the index array is initialised with an extra -1
3158 * at the end.
3159 */
3160 offset = partition_range_bsearch(key->partnatts,
3161 key->partsupfunc,
3162 key->partcollation,
3163 boundinfo, lower,
3164 &cmpval);
3165
3166 if (boundinfo->indexes[offset + 1] < 0)
3167 {
3168 /*
3169 * Check that the new partition will fit in the gap.
3170 * For it to fit, the new upper bound must be less
3171 * than or equal to the lower bound of the next
3172 * partition, if there is one.
3173 */
3174 if (offset + 1 < boundinfo->ndatums)
3175 {
3176 Datum *datums;
3178 bool is_lower;
3179
3180 datums = boundinfo->datums[offset + 1];
3181 kind = boundinfo->kind[offset + 1];
3182 is_lower = (boundinfo->indexes[offset + 1] == -1);
3183
3184 cmpval = partition_rbound_cmp(key->partnatts,
3185 key->partsupfunc,
3186 key->partcollation,
3187 datums, kind,
3188 is_lower, upper);
3189 if (cmpval < 0)
3190 {
3191 /*
3192 * Point to problematic key in the upper
3193 * datums list.
3194 */
3195 PartitionRangeDatum *datum =
3196 list_nth(spec->upperdatums, abs(cmpval) - 1);
3197
3198 /*
3199 * The new partition overlaps with the
3200 * existing partition between offset + 1 and
3201 * offset + 2.
3202 */
3203 overlap = true;
3204 overlap_location = datum->location;
3205 with = boundinfo->indexes[offset + 2];
3206 }
3207 }
3208 }
3209 else
3210 {
3211 /*
3212 * The new partition overlaps with the existing
3213 * partition between offset and offset + 1.
3214 */
3215 PartitionRangeDatum *datum;
3216
3217 /*
3218 * Point to problematic key in the lower datums list;
3219 * if we have equality, point to the first one.
3220 */
3221 datum = cmpval == 0 ? linitial(spec->lowerdatums) :
3222 list_nth(spec->lowerdatums, abs(cmpval) - 1);
3223 overlap = true;
3224 overlap_location = datum->location;
3225 with = boundinfo->indexes[offset + 1];
3226 }
3227 }
3228
3229 break;
3230 }
3231 }
3232
3233 if (overlap)
3234 {
3235 Assert(with >= 0);
3236 ereport(ERROR,
3237 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
3238 errmsg("partition \"%s\" would overlap partition \"%s\"",
3239 relname, get_rel_name(partdesc->oids[with])),
3240 parser_errposition(pstate, overlap_location)));
3241 }
3242}
3243
3244/*
3245 * check_default_partition_contents
3246 *
3247 * This function checks if there exists a row in the default partition that
3248 * would properly belong to the new partition being added. If it finds one,
3249 * it throws an error.
3250 */
3251void
3253 PartitionBoundSpec *new_spec)
3254{
3255 List *new_part_constraints;
3256 List *def_part_constraints;
3257 List *all_parts;
3258 ListCell *lc;
3259
3260 new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
3261 ? get_qual_for_list(parent, new_spec)
3262 : get_qual_for_range(parent, new_spec, false);
3263 def_part_constraints =
3264 get_proposed_default_constraint(new_part_constraints);
3265
3266 /*
3267 * Map the Vars in the constraint expression from parent's attnos to
3268 * default_rel's.
3269 */
3270 def_part_constraints =
3271 map_partition_varattnos(def_part_constraints, 1, default_rel,
3272 parent);
3273
3274 /*
3275 * If the existing constraints on the default partition imply that it will
3276 * not contain any row that would belong to the new partition, we can
3277 * avoid scanning the default partition.
3278 */
3279 if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
3280 {
3282 (errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
3283 RelationGetRelationName(default_rel))));
3284 return;
3285 }
3286
3287 /*
3288 * Scan the default partition and its subpartitions, and check for rows
3289 * that do not satisfy the revised partition constraints.
3290 */
3291 if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3292 all_parts = find_all_inheritors(RelationGetRelid(default_rel),
3293 AccessExclusiveLock, NULL);
3294 else
3295 all_parts = list_make1_oid(RelationGetRelid(default_rel));
3296
3297 foreach(lc, all_parts)
3298 {
3299 Oid part_relid = lfirst_oid(lc);
3300 Relation part_rel;
3301 Expr *partition_constraint;
3302 EState *estate;
3303 ExprState *partqualstate = NULL;
3304 Snapshot snapshot;
3305 ExprContext *econtext;
3306 TableScanDesc scan;
3307 MemoryContext oldCxt;
3308 TupleTableSlot *tupslot;
3309
3310 /* Lock already taken above. */
3311 if (part_relid != RelationGetRelid(default_rel))
3312 {
3313 part_rel = table_open(part_relid, NoLock);
3314
3315 /*
3316 * Map the Vars in the constraint expression from default_rel's
3317 * the sub-partition's.
3318 */
3319 partition_constraint = make_ands_explicit(def_part_constraints);
3320 partition_constraint = (Expr *)
3321 map_partition_varattnos((List *) partition_constraint, 1,
3322 part_rel, default_rel);
3323
3324 /*
3325 * If the partition constraints on default partition child imply
3326 * that it will not contain any row that would belong to the new
3327 * partition, we can avoid scanning the child table.
3328 */
3330 def_part_constraints))
3331 {
3333 (errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
3334 RelationGetRelationName(part_rel))));
3335
3336 table_close(part_rel, NoLock);
3337 continue;
3338 }
3339 }
3340 else
3341 {
3342 part_rel = default_rel;
3343 partition_constraint = make_ands_explicit(def_part_constraints);
3344 }
3345
3346 /*
3347 * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
3348 * scanned.
3349 */
3350 if (part_rel->rd_rel->relkind != RELKIND_RELATION)
3351 {
3352 if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
3354 (errcode(ERRCODE_CHECK_VIOLATION),
3355 errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
3356 RelationGetRelationName(part_rel),
3357 RelationGetRelationName(default_rel))));
3358
3359 if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
3360 table_close(part_rel, NoLock);
3361
3362 continue;
3363 }
3364
3365 estate = CreateExecutorState();
3366
3367 /* Build expression execution states for partition check quals */
3368 partqualstate = ExecPrepareExpr(partition_constraint, estate);
3369
3370 econtext = GetPerTupleExprContext(estate);
3371 snapshot = RegisterSnapshot(GetLatestSnapshot());
3372 tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
3373 scan = table_beginscan(part_rel, snapshot, 0, NULL);
3374
3375 /*
3376 * Switch to per-tuple memory context and reset it for each tuple
3377 * produced, so we don't leak memory.
3378 */
3380
3381 while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
3382 {
3383 econtext->ecxt_scantuple = tupslot;
3384
3385 if (!ExecCheck(partqualstate, econtext))
3386 ereport(ERROR,
3387 (errcode(ERRCODE_CHECK_VIOLATION),
3388 errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
3389 RelationGetRelationName(default_rel)),
3390 errtable(default_rel)));
3391
3392 ResetExprContext(econtext);
3394 }
3395
3396 MemoryContextSwitchTo(oldCxt);
3397 table_endscan(scan);
3398 UnregisterSnapshot(snapshot);
3400 FreeExecutorState(estate);
3401
3402 if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
3403 table_close(part_rel, NoLock); /* keep the lock until commit */
3404 }
3405}
3406
3407/*
3408 * get_hash_partition_greatest_modulus
3409 *
3410 * Returns the greatest modulus of the hash partition bound.
3411 * This is no longer used in the core code, but we keep it around
3412 * in case external modules are using it.
3413 */
3414int
3416{
3417 Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
3418 return bound->nindexes;
3419}
3420
3421/*
3422 * make_one_partition_rbound
3423 *
3424 * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
3425 * and a flag telling whether the bound is lower or not. Made into a function
3426 * because there are multiple sites that want to use this facility.
3427 */
3428static PartitionRangeBound *
3430{
3431 PartitionRangeBound *bound;
3432 ListCell *lc;
3433 int i;
3434
3435 Assert(datums != NIL);
3436
3437 bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
3438 bound->index = index;
3439 bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
3440 bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
3441 sizeof(PartitionRangeDatumKind));
3442 bound->lower = lower;
3443
3444 i = 0;
3445 foreach(lc, datums)
3446 {
3448
3449 /* What's contained in this range datum? */
3450 bound->kind[i] = datum->kind;
3451
3452 if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
3453 {
3454 Const *val = castNode(Const, datum->value);
3455
3456 if (val->constisnull)
3457 elog(ERROR, "invalid range bound datum");
3458 bound->datums[i] = val->constvalue;
3459 }
3460
3461 i++;
3462 }
3463
3464 return bound;
3465}
3466
3467/*
3468 * partition_rbound_cmp
3469 *
3470 * For two range bounds this decides whether the 1st one (specified by
3471 * datums1, kind1, and lower1) is <, =, or > the bound specified in *b2.
3472 *
3473 * 0 is returned if they are equal, otherwise a non-zero integer whose sign
3474 * indicates the ordering, and whose absolute value gives the 1-based
3475 * partition key number of the first mismatching column.
3476 *
3477 * partnatts, partsupfunc and partcollation give the number of attributes in the
3478 * bounds to be compared, comparison function to be used and the collations of
3479 * attributes, respectively.
3480 *
3481 * Note that if the values of the two range bounds compare equal, then we take
3482 * into account whether they are upper or lower bounds, and an upper bound is
3483 * considered to be smaller than a lower bound. This is important to the way
3484 * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
3485 * structure, which only stores the upper bound of a common boundary between
3486 * two contiguous partitions.
3487 */
3488static int32
3489partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
3490 Oid *partcollation,
3491 Datum *datums1, PartitionRangeDatumKind *kind1,
3492 bool lower1, PartitionRangeBound *b2)
3493{
3494 int32 colnum = 0;
3495 int32 cmpval = 0; /* placate compiler */
3496 int i;
3497 Datum *datums2 = b2->datums;
3498 PartitionRangeDatumKind *kind2 = b2->kind;
3499 bool lower2 = b2->lower;
3500
3501 for (i = 0; i < partnatts; i++)
3502 {
3503 /* Track column number in case we need it for result */
3504 colnum++;
3505
3506 /*
3507 * First, handle cases where the column is unbounded, which should not
3508 * invoke the comparison procedure, and should not consider any later
3509 * columns. Note that the PartitionRangeDatumKind enum elements
3510 * compare the same way as the values they represent.
3511 */
3512 if (kind1[i] < kind2[i])
3513 return -colnum;
3514 else if (kind1[i] > kind2[i])
3515 return colnum;
3516 else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
3517 {
3518 /*
3519 * The column bounds are both MINVALUE or both MAXVALUE. No later
3520 * columns should be considered, but we still need to compare
3521 * whether they are upper or lower bounds.
3522 */
3523 break;
3524 }
3525
3526 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
3527 partcollation[i],
3528 datums1[i],
3529 datums2[i]));
3530 if (cmpval != 0)
3531 break;
3532 }
3533
3534 /*
3535 * If the comparison is anything other than equal, we're done. If they
3536 * compare equal though, we still have to consider whether the boundaries
3537 * are inclusive or exclusive. Exclusive one is considered smaller of the
3538 * two.
3539 */
3540 if (cmpval == 0 && lower1 != lower2)
3541 cmpval = lower1 ? 1 : -1;
3542
3543 return cmpval == 0 ? 0 : (cmpval < 0 ? -colnum : colnum);
3544}
3545
3546/*
3547 * partition_rbound_datum_cmp
3548 *
3549 * Return whether range bound (specified in rb_datums and rb_kind)
3550 * is <, =, or > partition key of tuple (tuple_datums)
3551 *
3552 * n_tuple_datums, partsupfunc and partcollation give number of attributes in
3553 * the bounds to be compared, comparison function to be used and the collations
3554 * of attributes resp.
3555 */
3556int32
3557partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
3558 Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
3559 Datum *tuple_datums, int n_tuple_datums)
3560{
3561 int i;
3562 int32 cmpval = -1;
3563
3564 for (i = 0; i < n_tuple_datums; i++)
3565 {
3566 if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
3567 return -1;
3568 else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
3569 return 1;
3570
3571 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
3572 partcollation[i],
3573 rb_datums[i],
3574 tuple_datums[i]));
3575 if (cmpval != 0)
3576 break;
3577 }
3578
3579 return cmpval;
3580}
3581
3582/*
3583 * partition_hbound_cmp
3584 *
3585 * Compares modulus first, then remainder if modulus is equal.
3586 */
3587static int32
3588partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
3589{
3590 if (modulus1 < modulus2)
3591 return -1;
3592 if (modulus1 > modulus2)
3593 return 1;
3594 if (modulus1 == modulus2 && remainder1 != remainder2)
3595 return (remainder1 > remainder2) ? 1 : -1;
3596 return 0;
3597}
3598
3599/*
3600 * partition_list_bsearch
3601 * Returns the index of the greatest bound datum that is less than equal
3602 * to the given value or -1 if all of the bound datums are greater
3603 *
3604 * *is_equal is set to true if the bound datum at the returned index is equal
3605 * to the input value.
3606 */
3607int
3608partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
3609 PartitionBoundInfo boundinfo,
3610 Datum value, bool *is_equal)
3611{
3612 int lo,
3613 hi,
3614 mid;
3615
3616 lo = -1;
3617 hi = boundinfo->ndatums - 1;
3618 while (lo < hi)
3619 {
3620 int32 cmpval;
3621
3622 mid = (lo + hi + 1) / 2;
3623 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
3624 partcollation[0],
3625 boundinfo->datums[mid][0],
3626 value));
3627 if (cmpval <= 0)
3628 {
3629 lo = mid;
3630 *is_equal = (cmpval == 0);
3631 if (*is_equal)
3632 break;
3633 }
3634 else
3635 hi = mid - 1;
3636 }
3637
3638 return lo;
3639}
3640
3641/*
3642 * partition_range_bsearch
3643 * Returns the index of the greatest range bound that is less than or
3644 * equal to the given range bound or -1 if all of the range bounds are
3645 * greater
3646 *
3647 * Upon return from this function, *cmpval is set to 0 if the bound at the
3648 * returned index matches the input range bound exactly, otherwise a
3649 * non-zero integer whose sign indicates the ordering, and whose absolute
3650 * value gives the 1-based partition key number of the first mismatching
3651 * column.
3652 */
3653static int
3654partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
3655 Oid *partcollation,
3656 PartitionBoundInfo boundinfo,
3657 PartitionRangeBound *probe, int32 *cmpval)
3658{
3659 int lo,
3660 hi,
3661 mid;
3662
3663 lo = -1;
3664 hi = boundinfo->ndatums - 1;
3665 while (lo < hi)
3666 {
3667 mid = (lo + hi + 1) / 2;
3668 *cmpval = partition_rbound_cmp(partnatts, partsupfunc,
3669 partcollation,
3670 boundinfo->datums[mid],
3671 boundinfo->kind[mid],
3672 (boundinfo->indexes[mid] == -1),
3673 probe);
3674 if (*cmpval <= 0)
3675 {
3676 lo = mid;
3677 if (*cmpval == 0)
3678 break;
3679 }
3680 else
3681 hi = mid - 1;
3682 }
3683
3684 return lo;
3685}
3686
3687/*
3688 * partition_range_datum_bsearch
3689 * Returns the index of the greatest range bound that is less than or
3690 * equal to the given tuple or -1 if all of the range bounds are greater
3691 *
3692 * *is_equal is set to true if the range bound at the returned index is equal
3693 * to the input tuple.
3694 */
3695int
3696partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
3697 PartitionBoundInfo boundinfo,
3698 int nvalues, Datum *values, bool *is_equal)
3699{
3700 int lo,
3701 hi,
3702 mid;
3703
3704 lo = -1;
3705 hi = boundinfo->ndatums - 1;
3706 while (lo < hi)
3707 {
3708 int32 cmpval;
3709
3710 mid = (lo + hi + 1) / 2;
3711 cmpval = partition_rbound_datum_cmp(partsupfunc,
3712 partcollation,
3713 boundinfo->datums[mid],
3714 boundinfo->kind[mid],
3715 values,
3716 nvalues);
3717 if (cmpval <= 0)
3718 {
3719 lo = mid;
3720 *is_equal = (cmpval == 0);
3721
3722 if (*is_equal)
3723 break;
3724 }
3725 else
3726 hi = mid - 1;
3727 }
3728
3729 return lo;
3730}
3731
3732/*
3733 * partition_hash_bsearch
3734 * Returns the index of the greatest (modulus, remainder) pair that is
3735 * less than or equal to the given (modulus, remainder) pair or -1 if
3736 * all of them are greater
3737 */
3738int
3740 int modulus, int remainder)
3741{
3742 int lo,
3743 hi,
3744 mid;
3745
3746 lo = -1;
3747 hi = boundinfo->ndatums - 1;
3748 while (lo < hi)
3749 {
3750 int32 cmpval,
3751 bound_modulus,
3752 bound_remainder;
3753
3754 mid = (lo + hi + 1) / 2;
3755 bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
3756 bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
3757 cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
3758 modulus, remainder);
3759 if (cmpval <= 0)
3760 {
3761 lo = mid;
3762
3763 if (cmpval == 0)
3764 break;
3765 }
3766 else
3767 hi = mid - 1;
3768 }
3769
3770 return lo;
3771}
3772
3773/*
3774 * qsort_partition_hbound_cmp
3775 *
3776 * Hash bounds are sorted by modulus, then by remainder.
3777 */
3778static int32
3779qsort_partition_hbound_cmp(const void *a, const void *b)
3780{
3781 const PartitionHashBound *h1 = (const PartitionHashBound *) a;
3782 const PartitionHashBound *h2 = (const PartitionHashBound *) b;
3783
3784 return partition_hbound_cmp(h1->modulus, h1->remainder,
3785 h2->modulus, h2->remainder);
3786}
3787
3788/*
3789 * qsort_partition_list_value_cmp
3790 *
3791 * Compare two list partition bound datums.
3792 */
3793static int32
3794qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
3795{
3796 Datum val1 = ((const PartitionListValue *) a)->value,
3797 val2 = ((const PartitionListValue *) b)->value;
3799
3800 return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
3801 key->partcollation[0],
3802 val1, val2));
3803}
3804
3805/*
3806 * qsort_partition_rbound_cmp
3807 *
3808 * Used when sorting range bounds across all range partitions.
3809 */
3810static int32
3811qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
3812{
3813 PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
3814 PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
3816
3817 return compare_range_bounds(key->partnatts, key->partsupfunc,
3818 key->partcollation,
3819 b1, b2);
3820}
3821
3822/*
3823 * get_partition_operator
3824 *
3825 * Return oid of the operator of the given strategy for the given partition
3826 * key column. It is assumed that the partitioning key is of the same type as
3827 * the chosen partitioning opclass, or at least binary-compatible. In the
3828 * latter case, *need_relabel is set to true if the opclass is not of a
3829 * polymorphic type (indicating a RelabelType node needed on top), otherwise
3830 * false.
3831 */
3832static Oid
3834 bool *need_relabel)
3835{
3836 Oid operoid;
3837
3838 /*
3839 * Get the operator in the partitioning opfamily using the opclass'
3840 * declared input type as both left- and righttype.
3841 */
3842 operoid = get_opfamily_member(key->partopfamily[col],
3843 key->partopcintype[col],
3844 key->partopcintype[col],
3845 strategy);
3846 if (!OidIsValid(operoid))
3847 elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
3848 strategy, key->partopcintype[col], key->partopcintype[col],
3849 key->partopfamily[col]);
3850
3851 /*
3852 * If the partition key column is not of the same type as the operator
3853 * class and not polymorphic, tell caller to wrap the non-Const expression
3854 * in a RelabelType. This matches what parse_coerce.c does.
3855 */
3856 *need_relabel = (key->parttypid[col] != key->partopcintype[col] &&
3857 key->partopcintype[col] != RECORDOID &&
3858 !IsPolymorphicType(key->partopcintype[col]));
3859
3860 return operoid;
3861}
3862
3863/*
3864 * make_partition_op_expr
3865 * Returns an Expr for the given partition key column with arg1 and
3866 * arg2 as its leftop and rightop, respectively
3867 */
3868static Expr *
3870 uint16 strategy, Expr *arg1, Expr *arg2)
3871{
3872 Oid operoid;
3873 bool need_relabel = false;
3874 Expr *result = NULL;
3875
3876 /* Get the correct btree operator for this partitioning column */
3877 operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
3878
3879 /*
3880 * Chosen operator may be such that the non-Const operand needs to be
3881 * coerced, so apply the same; see the comment in
3882 * get_partition_operator().
3883 */
3884 if (!IsA(arg1, Const) &&
3885 (need_relabel ||
3886 key->partcollation[keynum] != key->parttypcoll[keynum]))
3887 arg1 = (Expr *) makeRelabelType(arg1,
3888 key->partopcintype[keynum],
3889 -1,
3890 key->partcollation[keynum],
3892
3893 /* Generate the actual expression */
3894 switch (key->strategy)
3895 {
3897 {
3898 List *elems = (List *) arg2;
3899 int nelems = list_length(elems);
3900
3901 Assert(nelems >= 1);
3902 Assert(keynum == 0);
3903
3904 if (nelems > 1 &&
3905 !type_is_array(key->parttypid[keynum]))
3906 {
3907 ArrayExpr *arrexpr;
3908 ScalarArrayOpExpr *saopexpr;
3909
3910 /* Construct an ArrayExpr for the right-hand inputs */
3911 arrexpr = makeNode(ArrayExpr);
3912 arrexpr->array_typeid =
3913 get_array_type(key->parttypid[keynum]);
3914 arrexpr->array_collid = key->parttypcoll[keynum];
3915 arrexpr->element_typeid = key->parttypid[keynum];
3916 arrexpr->elements = elems;
3917 arrexpr->multidims = false;
3918 arrexpr->location = -1;
3919
3920 /* Build leftop = ANY (rightop) */
3921 saopexpr = makeNode(ScalarArrayOpExpr);
3922 saopexpr->opno = operoid;
3923 saopexpr->opfuncid = get_opcode(operoid);
3924 saopexpr->hashfuncid = InvalidOid;
3925 saopexpr->negfuncid = InvalidOid;
3926 saopexpr->useOr = true;
3927 saopexpr->inputcollid = key->partcollation[keynum];
3928 saopexpr->args = list_make2(arg1, arrexpr);
3929 saopexpr->location = -1;
3930
3931 result = (Expr *) saopexpr;
3932 }
3933 else
3934 {
3935 List *elemops = NIL;
3936 ListCell *lc;
3937
3938 foreach(lc, elems)
3939 {
3940 Expr *elem = lfirst(lc),
3941 *elemop;
3942
3943 elemop = make_opclause(operoid,
3944 BOOLOID,
3945 false,
3946 arg1, elem,
3947 InvalidOid,
3948 key->partcollation[keynum]);
3949 elemops = lappend(elemops, elemop);
3950 }
3951
3952 result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
3953 }
3954 break;
3955 }
3956
3958 result = make_opclause(operoid,
3959 BOOLOID,
3960 false,
3961 arg1, arg2,
3962 InvalidOid,
3963 key->partcollation[keynum]);
3964 break;
3965
3967 Assert(false);
3968 break;
3969 }
3970
3971 return result;
3972}
3973
3974/*
3975 * get_qual_for_hash
3976 *
3977 * Returns a CHECK constraint expression to use as a hash partition's
3978 * constraint, given the parent relation and partition bound structure.
3979 *
3980 * The partition constraint for a hash partition is always a call to the
3981 * built-in function satisfies_hash_partition().
3982 */
3983static List *
3985{
3987 FuncExpr *fexpr;
3988 Node *relidConst;
3989 Node *modulusConst;
3990 Node *remainderConst;
3991 List *args;
3992 ListCell *partexprs_item;
3993 int i;
3994
3995 /* Fixed arguments. */
3996 relidConst = (Node *) makeConst(OIDOID,
3997 -1,
3998 InvalidOid,
3999 sizeof(Oid),
4001 false,
4002 true);
4003
4004 modulusConst = (Node *) makeConst(INT4OID,
4005 -1,
4006 InvalidOid,
4007 sizeof(int32),
4008 Int32GetDatum(spec->modulus),
4009 false,
4010 true);
4011
4012 remainderConst = (Node *) makeConst(INT4OID,
4013 -1,
4014 InvalidOid,
4015 sizeof(int32),
4016 Int32GetDatum(spec->remainder),
4017 false,
4018 true);
4019
4020 args = list_make3(relidConst, modulusConst, remainderConst);
4021 partexprs_item = list_head(key->partexprs);
4022
4023 /* Add an argument for each key column. */
4024 for (i = 0; i < key->partnatts; i++)
4025 {
4026 Node *keyCol;
4027
4028 /* Left operand */
4029 if (key->partattrs[i] != 0)
4030 {
4031 keyCol = (Node *) makeVar(1,
4032 key->partattrs[i],
4033 key->parttypid[i],
4034 key->parttypmod[i],
4035 key->parttypcoll[i],
4036 0);
4037 }
4038 else
4039 {
4040 keyCol = (Node *) copyObject(lfirst(partexprs_item));
4041 partexprs_item = lnext(key->partexprs, partexprs_item);
4042 }
4043
4044 args = lappend(args, keyCol);
4045 }
4046
4047 fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
4048 BOOLOID,
4049 args,
4050 InvalidOid,
4051 InvalidOid,
4053
4054 return list_make1(fexpr);
4055}
4056
4057/*
4058 * get_qual_for_list
4059 *
4060 * Returns an implicit-AND list of expressions to use as a list partition's
4061 * constraint, given the parent relation and partition bound structure.
4062 *
4063 * The function returns NIL for a default partition when it's the only
4064 * partition since in that case there is no constraint.
4065 */
4066static List *
4068{
4070 List *result;
4071 Expr *keyCol;
4072 Expr *opexpr;
4073 NullTest *nulltest;
4074 ListCell *cell;
4075 List *elems = NIL;
4076 bool list_has_null = false;
4077
4078 /*
4079 * Only single-column list partitioning is supported, so we are worried
4080 * only about the partition key with index 0.
4081 */
4082 Assert(key->partnatts == 1);
4083
4084 /* Construct Var or expression representing the partition column */
4085 if (key->partattrs[0] != 0)
4086 keyCol = (Expr *) makeVar(1,
4087 key->partattrs[0],
4088 key->parttypid[0],
4089 key->parttypmod[0],
4090 key->parttypcoll[0],
4091 0);
4092 else
4093 keyCol = (Expr *) copyObject(linitial(key->partexprs));
4094
4095 /*
4096 * For default list partition, collect datums for all the partitions. The
4097 * default partition constraint should check that the partition key is
4098 * equal to none of those.
4099 */
4100 if (spec->is_default)
4101 {
4102 int i;
4103 int ndatums = 0;
4104 PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
4105 PartitionBoundInfo boundinfo = pdesc->boundinfo;
4106
4107 if (boundinfo)
4108 {
4109 ndatums = boundinfo->ndatums;
4110
4111 if (partition_bound_accepts_nulls(boundinfo))
4112 list_has_null = true;
4113 }
4114
4115 /*
4116 * If default is the only partition, there need not be any partition
4117 * constraint on it.
4118 */
4119 if (ndatums == 0 && !list_has_null)
4120 return NIL;
4121
4122 for (i = 0; i < ndatums; i++)
4123 {
4124 Const *val;
4125
4126 /*
4127 * Construct Const from known-not-null datum. We must be careful
4128 * to copy the value, because our result has to be able to outlive
4129 * the relcache entry we're copying from.
4130 */
4131 val = makeConst(key->parttypid[0],
4132 key->parttypmod[0],
4133 key->parttypcoll[0],
4134 key->parttyplen[0],
4135 datumCopy(*boundinfo->datums[i],
4136 key->parttypbyval[0],
4137 key->parttyplen[0]),
4138 false, /* isnull */
4139 key->parttypbyval[0]);
4140
4141 elems = lappend(elems, val);
4142 }
4143 }
4144 else
4145 {
4146 /*
4147 * Create list of Consts for the allowed values, excluding any nulls.
4148 */
4149 foreach(cell, spec->listdatums)
4150 {
4151 Const *val = lfirst_node(Const, cell);
4152
4153 if (val->constisnull)
4154 list_has_null = true;
4155 else
4156 elems = lappend(elems, copyObject(val));
4157 }
4158 }
4159
4160 if (elems)
4161 {
4162 /*
4163 * Generate the operator expression from the non-null partition
4164 * values.
4165 */
4167 keyCol, (Expr *) elems);
4168 }
4169 else
4170 {
4171 /*
4172 * If there are no partition values, we don't need an operator
4173 * expression.
4174 */
4175 opexpr = NULL;
4176 }
4177
4178 if (!list_has_null)
4179 {
4180 /*
4181 * Gin up a "col IS NOT NULL" test that will be ANDed with the main
4182 * expression. This might seem redundant, but the partition routing
4183 * machinery needs it.
4184 */
4185 nulltest = makeNode(NullTest);
4186 nulltest->arg = keyCol;
4187 nulltest->nulltesttype = IS_NOT_NULL;
4188 nulltest->argisrow = false;
4189 nulltest->location = -1;
4190
4191 result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
4192 }
4193 else
4194 {
4195 /*
4196 * Gin up a "col IS NULL" test that will be OR'd with the main
4197 * expression.
4198 */
4199 nulltest = makeNode(NullTest);
4200 nulltest->arg = keyCol;
4201 nulltest->nulltesttype = IS_NULL;
4202 nulltest->argisrow = false;
4203 nulltest->location = -1;
4204
4205 if (opexpr)
4206 {
4207 Expr *or;
4208
4209 or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
4210 result = list_make1(or);
4211 }
4212 else
4213 result = list_make1(nulltest);
4214 }
4215
4216 /*
4217 * Note that, in general, applying NOT to a constraint expression doesn't
4218 * necessarily invert the set of rows it accepts, because NOT (NULL) is
4219 * NULL. However, the partition constraints we construct here never
4220 * evaluate to NULL, so applying NOT works as intended.
4221 */
4222 if (spec->is_default)
4223 {
4224 result = list_make1(make_ands_explicit(result));
4225 result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
4226 }
4227
4228 return result;
4229}
4230
4231/*
4232 * get_qual_for_range
4233 *
4234 * Returns an implicit-AND list of expressions to use as a range partition's
4235 * constraint, given the parent relation and partition bound structure.
4236 *
4237 * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
4238 * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
4239 * generate an expression tree of the following form:
4240 *
4241 * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
4242 * AND
4243 * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
4244 * AND
4245 * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
4246 *
4247 * It is often the case that a prefix of lower and upper bound tuples contains
4248 * the same values, for example, (al = au), in which case, we will emit an
4249 * expression tree of the following form:
4250 *
4251 * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
4252 * AND
4253 * (a = al)
4254 * AND
4255 * (b > bl OR (b = bl AND c >= cl))
4256 * AND
4257 * (b < bu OR (b = bu AND c < cu))
4258 *
4259 * If a bound datum is either MINVALUE or MAXVALUE, these expressions are
4260 * simplified using the fact that any value is greater than MINVALUE and less
4261 * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
4262 * true, and we need not emit any expression for it, and the last line becomes
4263 *
4264 * (b < bu) OR (b = bu), which is simplified to (b <= bu)
4265 *
4266 * In most common cases with only one partition column, say a, the following
4267 * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
4268 *
4269 * For default partition, it returns the negation of the constraints of all
4270 * the other partitions.
4271 *
4272 * External callers should pass for_default as false; we set it to true only
4273 * when recursing.
4274 */
4275static List *
4277 bool for_default)
4278{
4279 List *result = NIL;
4280 ListCell *cell1,
4281 *cell2,
4282 *partexprs_item,
4283 *partexprs_item_saved;
4284 int i,
4285 j;
4286 PartitionRangeDatum *ldatum,
4287 *udatum;
4289 Expr *keyCol;
4290 Const *lower_val,
4291 *upper_val;
4292 List *lower_or_arms,
4293 *upper_or_arms;
4294 int num_or_arms,
4295 current_or_arm;
4296 ListCell *lower_or_start_datum,
4297 *upper_or_start_datum;
4298 bool need_next_lower_arm,
4299 need_next_upper_arm;
4300
4301 if (spec->is_default)
4302 {
4303 List *or_expr_args = NIL;
4304 PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
4305 Oid *inhoids = pdesc->oids;
4306 int nparts = pdesc->nparts,
4307 k;
4308
4309 for (k = 0; k < nparts; k++)
4310 {
4311 Oid inhrelid = inhoids[k];
4312 HeapTuple tuple;
4313 Datum datum;
4314 PartitionBoundSpec *bspec;
4315
4316 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(inhrelid));
4317 if (!HeapTupleIsValid(tuple))
4318 elog(ERROR, "cache lookup failed for relation %u", inhrelid);
4319
4320 datum = SysCacheGetAttrNotNull(RELOID, tuple,
4321 Anum_pg_class_relpartbound);
4322 bspec = (PartitionBoundSpec *)
4324 if (!IsA(bspec, PartitionBoundSpec))
4325 elog(ERROR, "expected PartitionBoundSpec");
4326
4327 if (!bspec->is_default)
4328 {
4329 List *part_qual;
4330
4331 part_qual = get_qual_for_range(parent, bspec, true);
4332
4333 /*
4334 * AND the constraints of the partition and add to
4335 * or_expr_args
4336 */
4337 or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
4338 ? makeBoolExpr(AND_EXPR, part_qual, -1)
4339 : linitial(part_qual));
4340 }
4341 ReleaseSysCache(tuple);
4342 }
4343
4344 if (or_expr_args != NIL)
4345 {
4346 Expr *other_parts_constr;
4347
4348 /*
4349 * Combine the constraints obtained for non-default partitions
4350 * using OR. As requested, each of the OR's args doesn't include
4351 * the NOT NULL test for partition keys (which is to avoid its
4352 * useless repetition). Add the same now.
4353 */
4354 other_parts_constr =
4357 list_length(or_expr_args) > 1
4358 ? makeBoolExpr(OR_EXPR, or_expr_args,
4359 -1)
4360 : linitial(or_expr_args)),
4361 -1);
4362
4363 /*
4364 * Finally, the default partition contains everything *NOT*
4365 * contained in the non-default partitions.
4366 */
4368 list_make1(other_parts_constr), -1));
4369 }
4370
4371 return result;
4372 }
4373
4374 /*
4375 * If it is the recursive call for default, we skip the get_range_nulltest
4376 * to avoid accumulating the NullTest on the same keys for each partition.
4377 */
4378 if (!for_default)
4379 result = get_range_nulltest(key);
4380
4381 /*
4382 * Iterate over the key columns and check if the corresponding lower and
4383 * upper datums are equal using the btree equality operator for the
4384 * column's type. If equal, we emit single keyCol = common_value
4385 * expression. Starting from the first column for which the corresponding
4386 * lower and upper bound datums are not equal, we generate OR expressions
4387 * as shown in the function's header comment.
4388 */
4389 i = 0;
4390 partexprs_item = list_head(key->partexprs);
4391 partexprs_item_saved = partexprs_item; /* placate compiler */
4392 forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
4393 {
4394 EState *estate;
4395 MemoryContext oldcxt;
4396 Expr *test_expr;
4397 ExprState *test_exprstate;
4398 Datum test_result;
4399 bool isNull;
4400
4401 ldatum = lfirst_node(PartitionRangeDatum, cell1);
4402 udatum = lfirst_node(PartitionRangeDatum, cell2);
4403
4404 /*
4405 * Since get_range_key_properties() modifies partexprs_item, and we
4406 * might need to start over from the previous expression in the later
4407 * part of this function, save away the current value.
4408 */
4409 partexprs_item_saved = partexprs_item;
4410
4411 get_range_key_properties(key, i, ldatum, udatum,
4412 &partexprs_item,
4413 &keyCol,
4414 &lower_val, &upper_val);
4415
4416 /*
4417 * If either value is NULL, the corresponding partition bound is
4418 * either MINVALUE or MAXVALUE, and we treat them as unequal, because
4419 * even if they're the same, there is no common value to equate the
4420 * key column with.
4421 */
4422 if (!lower_val || !upper_val)
4423 break;
4424
4425 /* Create the test expression */
4426 estate = CreateExecutorState();
4427 oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
4429 (Expr *) lower_val,
4430 (Expr *) upper_val);
4431 fix_opfuncids((Node *) test_expr);
4432 test_exprstate = ExecInitExpr(test_expr, NULL);
4433 test_result = ExecEvalExprSwitchContext(test_exprstate,
4434 GetPerTupleExprContext(estate),
4435 &isNull);
4436 MemoryContextSwitchTo(oldcxt);
4437 FreeExecutorState(estate);
4438
4439 /* If not equal, go generate the OR expressions */
4440 if (!DatumGetBool(test_result))
4441 break;
4442
4443 /*
4444 * The bounds for the last key column can't be equal, because such a
4445 * range partition would never be allowed to be defined (it would have
4446 * an empty range otherwise).
4447 */
4448 if (i == key->partnatts - 1)
4449 elog(ERROR, "invalid range bound specification");
4450
4451 /* Equal, so generate keyCol = lower_val expression */
4452 result = lappend(result,
4454 keyCol, (Expr *) lower_val));
4455
4456 i++;
4457 }
4458
4459 /* First pair of lower_val and upper_val that are not equal. */
4460 lower_or_start_datum = cell1;
4461 upper_or_start_datum = cell2;
4462
4463 /* OR will have as many arms as there are key columns left. */
4464 num_or_arms = key->partnatts - i;
4465 current_or_arm = 0;
4466 lower_or_arms = upper_or_arms = NIL;
4467 need_next_lower_arm = need_next_upper_arm = true;
4468 while (current_or_arm < num_or_arms)
4469 {
4470 List *lower_or_arm_args = NIL,
4471 *upper_or_arm_args = NIL;
4472
4473 /* Restart scan of columns from the i'th one */
4474 j = i;
4475 partexprs_item = partexprs_item_saved;
4476
4477 for_both_cell(cell1, spec->lowerdatums, lower_or_start_datum,
4478 cell2, spec->upperdatums, upper_or_start_datum)
4479 {
4480 PartitionRangeDatum *ldatum_next = NULL,
4481 *udatum_next = NULL;
4482
4483 ldatum = lfirst_node(PartitionRangeDatum, cell1);
4484 if (lnext(spec->lowerdatums, cell1))
4485 ldatum_next = castNode(PartitionRangeDatum,
4486 lfirst(lnext(spec->lowerdatums, cell1)));
4487 udatum = lfirst_node(PartitionRangeDatum, cell2);
4488 if (lnext(spec->upperdatums, cell2))
4489 udatum_next = castNode(PartitionRangeDatum,
4490 lfirst(lnext(spec->upperdatums, cell2)));
4491 get_range_key_properties(key, j, ldatum, udatum,
4492 &partexprs_item,
4493 &keyCol,
4494 &lower_val, &upper_val);
4495
4496 if (need_next_lower_arm && lower_val)
4497 {
4498 uint16 strategy;
4499
4500 /*
4501 * For the non-last columns of this arm, use the EQ operator.
4502 * For the last column of this arm, use GT, unless this is the
4503 * last column of the whole bound check, or the next bound
4504 * datum is MINVALUE, in which case use GE.
4505 */
4506 if (j - i < current_or_arm)
4507 strategy = BTEqualStrategyNumber;
4508 else if (j == key->partnatts - 1 ||
4509 (ldatum_next &&
4510 ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
4512 else
4513 strategy = BTGreaterStrategyNumber;
4514
4515 lower_or_arm_args = lappend(lower_or_arm_args,
4517 strategy,
4518 keyCol,
4519 (Expr *) lower_val));
4520 }
4521
4522 if (need_next_upper_arm && upper_val)
4523 {
4524 uint16 strategy;
4525
4526 /*
4527 * For the non-last columns of this arm, use the EQ operator.
4528 * For the last column of this arm, use LT, unless the next
4529 * bound datum is MAXVALUE, in which case use LE.
4530 */
4531 if (j - i < current_or_arm)
4532 strategy = BTEqualStrategyNumber;
4533 else if (udatum_next &&
4534 udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
4535 strategy = BTLessEqualStrategyNumber;
4536 else
4537 strategy = BTLessStrategyNumber;
4538
4539 upper_or_arm_args = lappend(upper_or_arm_args,
4541 strategy,
4542 keyCol,
4543 (Expr *) upper_val));
4544 }
4545
4546 /*
4547 * Did we generate enough of OR's arguments? First arm considers
4548 * the first of the remaining columns, second arm considers first
4549 * two of the remaining columns, and so on.
4550 */
4551 ++j;
4552 if (j - i > current_or_arm)
4553 {
4554 /*
4555 * We must not emit any more arms if the new column that will
4556 * be considered is unbounded, or this one was.
4557 */
4558 if (!lower_val || !ldatum_next ||
4559 ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
4560 need_next_lower_arm = false;
4561 if (!upper_val || !udatum_next ||
4562 udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
4563 need_next_upper_arm = false;
4564 break;
4565 }
4566 }
4567
4568 if (lower_or_arm_args != NIL)
4569 lower_or_arms = lappend(lower_or_arms,
4570 list_length(lower_or_arm_args) > 1
4571 ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
4572 : linitial(lower_or_arm_args));
4573
4574 if (upper_or_arm_args != NIL)
4575 upper_or_arms = lappend(upper_or_arms,
4576 list_length(upper_or_arm_args) > 1
4577 ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
4578 : linitial(upper_or_arm_args));
4579
4580 /* If no work to do in the next iteration, break away. */
4581 if (!need_next_lower_arm && !need_next_upper_arm)
4582 break;
4583
4584 ++current_or_arm;
4585 }
4586
4587 /*
4588 * Generate the OR expressions for each of lower and upper bounds (if
4589 * required), and append to the list of implicitly ANDed list of
4590 * expressions.
4591 */
4592 if (lower_or_arms != NIL)
4593 result = lappend(result,
4594 list_length(lower_or_arms) > 1
4595 ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
4596 : linitial(lower_or_arms));
4597 if (upper_or_arms != NIL)
4598 result = lappend(result,
4599 list_length(upper_or_arms) > 1
4600 ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
4601 : linitial(upper_or_arms));
4602
4603 /*
4604 * As noted above, for non-default, we return list with constant TRUE. If
4605 * the result is NIL during the recursive call for default, it implies
4606 * this is the only other partition which can hold every value of the key
4607 * except NULL. Hence we return the NullTest result skipped earlier.
4608 */
4609 if (result == NIL)
4610 result = for_default
4612 : list_make1(makeBoolConst(true, false));
4613
4614 return result;
4615}
4616
4617/*
4618 * get_range_key_properties
4619 * Returns range partition key information for a given column
4620 *
4621 * This is a subroutine for get_qual_for_range, and its API is pretty
4622 * specialized to that caller.
4623 *
4624 * Constructs an Expr for the key column (returned in *keyCol) and Consts
4625 * for the lower and upper range limits (returned in *lower_val and
4626 * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
4627 * a Const. All of these structures are freshly palloc'd.
4628 *
4629 * *partexprs_item points to the cell containing the next expression in
4630 * the key->partexprs list, or NULL. It may be advanced upon return.
4631 */
4632static void
4634 PartitionRangeDatum *ldatum,
4635 PartitionRangeDatum *udatum,
4636 ListCell **partexprs_item,
4637 Expr **keyCol,
4638 Const **lower_val, Const **upper_val)
4639{
4640 /* Get partition key expression for this column */
4641 if (key->partattrs[keynum] != 0)
4642 {
4643 *keyCol = (Expr *) makeVar(1,
4644 key->partattrs[keynum],
4645 key->parttypid[keynum],
4646 key->parttypmod[keynum],
4647 key->parttypcoll[keynum],
4648 0);
4649 }
4650 else
4651 {
4652 if (*partexprs_item == NULL)
4653 elog(ERROR, "wrong number of partition key expressions");
4654 *keyCol = copyObject(lfirst(*partexprs_item));
4655 *partexprs_item = lnext(key->partexprs, *partexprs_item);
4656 }
4657
4658 /* Get appropriate Const nodes for the bounds */
4659 if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
4660 *lower_val = castNode(Const, copyObject(ldatum->value));
4661 else
4662 *lower_val = NULL;
4663
4664 if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
4665 *upper_val = castNode(Const, copyObject(udatum->value));
4666 else
4667 *upper_val = NULL;
4668}
4669
4670/*
4671 * get_range_nulltest
4672 *
4673 * A non-default range partition table does not currently allow partition
4674 * keys to be null, so emit an IS NOT NULL expression for each key column.
4675 */
4676static List *
4678{
4679 List *result = NIL;
4680 NullTest *nulltest;
4681 ListCell *partexprs_item;
4682 int i;
4683
4684 partexprs_item = list_head(key->partexprs);
4685 for (i = 0; i < key->partnatts; i++)
4686 {
4687 Expr *keyCol;
4688
4689 if (key->partattrs[i] != 0)
4690 {
4691 keyCol = (Expr *) makeVar(1,
4692 key->partattrs[i],
4693 key->parttypid[i],
4694 key->parttypmod[i],
4695 key->parttypcoll[i],
4696 0);
4697 }
4698 else
4699 {
4700 if (partexprs_item == NULL)
4701 elog(ERROR, "wrong number of partition key expressions");
4702 keyCol = copyObject(lfirst(partexprs_item));
4703 partexprs_item = lnext(key->partexprs, partexprs_item);
4704 }
4705
4706 nulltest = makeNode(NullTest);
4707 nulltest->arg = keyCol;
4708 nulltest->nulltesttype = IS_NOT_NULL;
4709 nulltest->argisrow = false;
4710 nulltest->location = -1;
4711 result = lappend(result, nulltest);
4712 }
4713
4714 return result;
4715}
4716
4717/*
4718 * compute_partition_hash_value
4719 *
4720 * Compute the hash value for given partition key values.
4721 */
4722uint64
4723compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation,
4724 const Datum *values, const bool *isnull)
4725{
4726 int i;
4727 uint64 rowHash = 0;
4729
4730 for (i = 0; i < partnatts; i++)
4731 {
4732 /* Nulls are just ignored */
4733 if (!isnull[i])
4734 {
4735 Datum hash;
4736
4737 Assert(OidIsValid(partsupfunc[i].fn_oid));
4738
4739 /*
4740 * Compute hash for each datum value by calling respective
4741 * datatype-specific hash functions of each partition key
4742 * attribute.
4743 */
4744 hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i],
4745 values[i], seed);
4746
4747 /* Form a single 64-bit hash value */
4748 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4749 }
4750 }
4751
4752 return rowHash;
4753}
4754
4755/*
4756 * satisfies_hash_partition
4757 *
4758 * This is an SQL-callable function for use in hash partition constraints.
4759 * The first three arguments are the parent table OID, modulus, and remainder.
4760 * The remaining arguments are the value of the partitioning columns (or
4761 * expressions); these are hashed and the results are combined into a single
4762 * hash value by calling hash_combine64.
4763 *
4764 * Returns true if remainder produced when this computed single hash value is
4765 * divided by the given modulus is equal to given remainder, otherwise false.
4766 * NB: it's important that this never return null, as the constraint machinery
4767 * would consider that to be a "pass".
4768 *
4769 * See get_qual_for_hash() for usage.
4770 */
4771Datum
4773{
4774 typedef struct ColumnsHashData
4775 {
4776 Oid relid;
4777 int nkeys;
4778 Oid variadic_type;
4779 int16 variadic_typlen;
4780 bool variadic_typbyval;
4781 char variadic_typalign;
4782 Oid partcollid[PARTITION_MAX_KEYS];
4783 FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
4784 } ColumnsHashData;
4785 Oid parentId;
4786 int modulus;
4787 int remainder;
4789 ColumnsHashData *my_extra;
4790 uint64 rowHash = 0;
4791
4792 /* Return false if the parent OID, modulus, or remainder is NULL. */
4793 if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
4794 PG_RETURN_BOOL(false);
4795 parentId = PG_GETARG_OID(0);
4796 modulus = PG_GETARG_INT32(1);
4798
4799 /* Sanity check modulus and remainder. */
4800 if (modulus <= 0)
4801 ereport(ERROR,
4802 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4803 errmsg("modulus for hash partition must be an integer value greater than zero")));
4804 if (remainder < 0)
4805 ereport(ERROR,
4806 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4807 errmsg("remainder for hash partition must be an integer value greater than or equal to zero")));
4808 if (remainder >= modulus)
4809 ereport(ERROR,
4810 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4811 errmsg("remainder for hash partition must be less than modulus")));
4812
4813 /*
4814 * Cache hash function information.
4815 */
4816 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4817 if (my_extra == NULL || my_extra->relid != parentId)
4818 {
4819 Relation parent;
4821 int j;
4822
4823 /* Open parent relation and fetch partition key info */
4824 parent = relation_open(parentId, AccessShareLock);
4825 key = RelationGetPartitionKey(parent);
4826
4827 /* Reject parent table that is not hash-partitioned. */
4828 if (key == NULL || key->strategy != PARTITION_STRATEGY_HASH)
4829 ereport(ERROR,
4830 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4831 errmsg("\"%s\" is not a hash partitioned table",
4832 get_rel_name(parentId))));
4833
4834 if (!get_fn_expr_variadic(fcinfo->flinfo))
4835 {
4836 int nargs = PG_NARGS() - 3;
4837
4838 /* complain if wrong number of column values */
4839 if (key->partnatts != nargs)
4840 ereport(ERROR,
4841 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4842 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
4843 key->partnatts, nargs)));
4844
4845 /* allocate space for our cache */
4846 fcinfo->flinfo->fn_extra =
4847 MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
4848 offsetof(ColumnsHashData, partsupfunc) +
4849 sizeof(FmgrInfo) * nargs);
4850 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4851 my_extra->relid = parentId;
4852 my_extra->nkeys = key->partnatts;
4853 memcpy(my_extra->partcollid, key->partcollation,
4854 key->partnatts * sizeof(Oid));
4855
4856 /* check argument types and save fmgr_infos */
4857 for (j = 0; j < key->partnatts; ++j)
4858 {
4859 Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
4860
4861 if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
4862 ereport(ERROR,
4863 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4864 errmsg("column %d of the partition key has type %s, but supplied value is of type %s",
4865 j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
4866
4867 fmgr_info_copy(&my_extra->partsupfunc[j],
4868 &key->partsupfunc[j],
4869 fcinfo->flinfo->fn_mcxt);
4870 }
4871 }
4872 else
4873 {
4874 ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
4875
4876 /* allocate space for our cache -- just one FmgrInfo in this case */
4877 fcinfo->flinfo->fn_extra =
4878 MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
4879 offsetof(ColumnsHashData, partsupfunc) +
4880 sizeof(FmgrInfo));
4881 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4882 my_extra->relid = parentId;
4883 my_extra->nkeys = key->partnatts;
4884 my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
4885 get_typlenbyvalalign(my_extra->variadic_type,
4886 &my_extra->variadic_typlen,
4887 &my_extra->variadic_typbyval,
4888 &my_extra->variadic_typalign);
4889 my_extra->partcollid[0] = key->partcollation[0];
4890
4891 /* check argument types */
4892 for (j = 0; j < key->partnatts; ++j)
4893 if (key->parttypid[j] != my_extra->variadic_type)
4894 ereport(ERROR,
4895 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4896 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
4897 j + 1,
4898 format_type_be(key->parttypid[j]),
4899 format_type_be(my_extra->variadic_type))));
4900
4901 fmgr_info_copy(&my_extra->partsupfunc[0],
4902 &key->partsupfunc[0],
4903 fcinfo->flinfo->fn_mcxt);
4904 }
4905
4906 /* Hold lock until commit */
4907 relation_close(parent, NoLock);
4908 }
4909
4910 if (!OidIsValid(my_extra->variadic_type))
4911 {
4912 int nkeys = my_extra->nkeys;
4913 int i;
4914
4915 /*
4916 * For a non-variadic call, neither the number of arguments nor their
4917 * types can change across calls, so avoid the expense of rechecking
4918 * here.
4919 */
4920
4921 for (i = 0; i < nkeys; i++)
4922 {
4923 Datum hash;
4924
4925 /* keys start from fourth argument of function. */
4926 int argno = i + 3;
4927
4928 if (PG_ARGISNULL(argno))
4929 continue;
4930
4931 hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
4932 my_extra->partcollid[i],
4933 PG_GETARG_DATUM(argno),
4934 seed);
4935
4936 /* Form a single 64-bit hash value */
4937 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4938 }
4939 }
4940 else
4941 {
4942 ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
4943 int i;
4944 int nelems;
4945 Datum *datum;
4946 bool *isnull;
4947
4948 deconstruct_array(variadic_array,
4949 my_extra->variadic_type,
4950 my_extra->variadic_typlen,
4951 my_extra->variadic_typbyval,
4952 my_extra->variadic_typalign,
4953 &datum, &isnull, &nelems);
4954
4955 /* complain if wrong number of column values */
4956 if (nelems != my_extra->nkeys)
4957 ereport(ERROR,
4958 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4959 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
4960 my_extra->nkeys, nelems)));
4961
4962 for (i = 0; i < nelems; i++)
4963 {
4964 Datum hash;
4965
4966 if (isnull[i])
4967 continue;
4968
4969 hash = FunctionCall2Coll(&my_extra->partsupfunc[0],
4970 my_extra->partcollid[0],
4971 datum[i],
4972 seed);
4973
4974 /* Form a single 64-bit hash value */
4975 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4976 }
4977 }
4978
4979 PG_RETURN_BOOL(rowHash % modulus == remainder);
4980}
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:263
#define ARR_ELEMTYPE(a)
Definition: array.h:292
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3631
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:815
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:122
static Datum values[MAXATTR]
Definition: bootstrap.c:153
static void cleanup(void)
Definition: bootstrap.c:715
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define Max(x, y)
Definition: c.h:998
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:471
int16_t int16
Definition: c.h:534
int32_t int32
Definition: c.h:535
uint64_t uint64
Definition: c.h:540
uint16_t uint16
Definition: c.h:538
#define OidIsValid(objectId)
Definition: c.h:775
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
bool datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
Definition: datum.c:223
struct cursor * cur
Definition: ecpg.c:29
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1161
int errdetail(const char *fmt,...)
Definition: elog.c:1207
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define WARNING
Definition: elog.h:36
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:143
ExprState * ExecPrepareExpr(Expr *node, EState *estate)
Definition: execExpr.c:765
bool ExecCheck(ExprState *state, ExprContext *econtext)
Definition: execExpr.c:872
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1443
void FreeExecutorState(EState *estate)
Definition: execUtils.c:192
EState * CreateExecutorState(void)
Definition: execUtils.c:88
#define GetPerTupleExprContext(estate)
Definition: executor.h:653
#define ResetExprContext(econtext)
Definition: executor.h:647
#define GetPerTupleMemoryContext(estate)
Definition: executor.h:658
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:433
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
bool get_fn_expr_variadic(FmgrInfo *flinfo)
Definition: fmgr.c:2008
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:1874
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:580
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_ARGISNULL(n)
Definition: fmgr.h:209
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_NARGS()
Definition: fmgr.h:203
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
char * format_type_be(Oid type_oid)
Definition: format_type.c:343
static uint64 hash_combine64(uint64 a, uint64 b)
Definition: hashfn.h:80
Assert(PointerIsAligned(start, uint64))
size_t remainder
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
long val
Definition: informix.c:689
static struct @166 value
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int j
Definition: isn.c:78
int i
Definition: isn.c:77
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
List * lappend(List *list, void *datum)
Definition: list.c:339
List * lappend_int(List *list, int datum)
Definition: list.c:357
void list_free(List *list)
Definition: list.c:1546
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define AccessShareLock
Definition: lockdefs.h:36
char * get_rel_name(Oid relid)
Definition: lsyscache.c:2095
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2438
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1452
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:168
Oid get_array_type(Oid typid)
Definition: lsyscache.c:2954
#define type_is_array(typid)
Definition: lsyscache.h:214
Expr * make_ands_explicit(List *andclauses)
Definition: makefuncs.c:799
Expr * makeBoolExpr(BoolExprType boolop, List *args, int location)
Definition: makefuncs.c:420
Var * makeVar(int varno, AttrNumber varattno, Oid vartype, int32 vartypmod, Oid varcollid, Index varlevelsup)
Definition: makefuncs.c:66
Node * makeBoolConst(bool value, bool isnull)
Definition: makefuncs.c:408
RelabelType * makeRelabelType(Expr *arg, Oid rtype, int32 rtypmod, Oid rcollid, CoercionForm rformat)
Definition: makefuncs.c:453
FuncExpr * makeFuncExpr(Oid funcid, Oid rettype, List *args, Oid funccollid, Oid inputcollid, CoercionForm fformat)
Definition: makefuncs.c:594
Expr * make_opclause(Oid opno, Oid opresulttype, bool opretset, Expr *leftop, Expr *rightop, Oid opcollid, Oid inputcollid)
Definition: makefuncs.c:701
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:350
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1263
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
void fix_opfuncids(Node *node)
Definition: nodeFuncs.c:1841
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
#define copyObject(obj)
Definition: nodes.h:232
#define IS_OUTER_JOIN(jointype)
Definition: nodes.h:348
#define makeNode(_type_)
Definition: nodes.h:161
#define castNode(_type_, nodeptr)
Definition: nodes.h:182
JoinType
Definition: nodes.h:298
@ JOIN_SEMI
Definition: nodes.h:317
@ JOIN_FULL
Definition: nodes.h:305
@ JOIN_INNER
Definition: nodes.h:303
@ JOIN_RIGHT
Definition: nodes.h:306
@ JOIN_LEFT
Definition: nodes.h:304
@ JOIN_ANTI
Definition: nodes.h:318
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
bool IsBinaryCoercible(Oid srctype, Oid targettype)
int parser_errposition(ParseState *pstate, int location)
Definition: parse_node.c:106
@ PARTITION_STRATEGY_HASH
Definition: parsenodes.h:900
@ PARTITION_STRATEGY_LIST
Definition: parsenodes.h:898
@ PARTITION_STRATEGY_RANGE
Definition: parsenodes.h:899
PartitionRangeDatumKind
Definition: parsenodes.h:949
@ PARTITION_RANGE_DATUM_MAXVALUE
Definition: parsenodes.h:952
@ PARTITION_RANGE_DATUM_VALUE
Definition: parsenodes.h:951
@ PARTITION_RANGE_DATUM_MINVALUE
Definition: parsenodes.h:950
static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, PartitionRangeBound *probe, int32 *cmpval)
Definition: partbounds.c:3654
static int process_inner_partition(PartitionMap *outer_map, PartitionMap *inner_map, bool outer_has_default, bool inner_has_default, int inner_index, int outer_default, JoinType jointype, int *next_index, int *default_index)
Definition: partbounds.c:2063
static void merge_default_partitions(PartitionMap *outer_map, PartitionMap *inner_map, bool outer_has_default, bool inner_has_default, int outer_default, int inner_default, JoinType jointype, int *next_index, int *default_index)
Definition: partbounds.c:2258
static void init_partition_map(RelOptInfo *rel, PartitionMap *map)
Definition: partbounds.c:1812
static List * get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
Definition: partbounds.c:4067
int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, Datum *rb_datums, PartitionRangeDatumKind *rb_kind, Datum *tuple_datums, int n_tuple_datums)
Definition: partbounds.c:3557
static int32 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
Definition: partbounds.c:3811
static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, Datum *datums1, PartitionRangeDatumKind *kind1, bool lower1, PartitionRangeBound *b2)
Definition: partbounds.c:3489
struct PartitionListValue PartitionListValue
static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, PartitionKey key, int **mapping)
Definition: partbounds.c:462
static bool is_dummy_partition(RelOptInfo *rel, int part_index)
Definition: partbounds.c:1844
static PartitionBoundInfo merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs, Oid *partcollations, RelOptInfo *outer_rel, RelOptInfo *inner_rel, JoinType jointype, List **outer_parts, List **inner_parts)
Definition: partbounds.c:1507
struct PartitionMap PartitionMap
bool partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, PartitionBoundInfo b1, PartitionBoundInfo b2)
Definition: partbounds.c:896
PartitionBoundInfo partition_bounds_merge(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, RelOptInfo *outer_rel, RelOptInfo *inner_rel, JoinType jointype, List **outer_parts, List **inner_parts)
Definition: partbounds.c:1119
static int merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map, int outer_index, int inner_index, int *next_index)
Definition: partbounds.c:1863
static List * get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
Definition: partbounds.c:3984
static void get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs, Oid *partcollations, JoinType jointype, PartitionRangeBound *outer_lb, PartitionRangeBound *outer_ub, PartitionRangeBound *inner_lb, PartitionRangeBound *inner_ub, int lb_cmpval, int ub_cmpval, PartitionRangeBound *merged_lb, PartitionRangeBound *merged_ub)
Definition: partbounds.c:2712
#define compare_range_bounds(partnatts, partsupfunc, partcollations, bound1, bound2)
Definition: partbounds.c:88
static int32 qsort_partition_hbound_cmp(const void *a, const void *b)
Definition: partbounds.c:3779
static void free_partition_map(PartitionMap *map)
Definition: partbounds.c:1833
static int get_non_null_list_datum_count(PartitionBoundSpec **boundspecs, int nparts)
Definition: partbounds.c:436
void check_new_partition_bound(char *relname, Relation parent, PartitionBoundSpec *spec, ParseState *pstate)
Definition: partbounds.c:2897
static void fix_merged_indexes(PartitionMap *outer_map, PartitionMap *inner_map, int nmerged, List *merged_indexes)
Definition: partbounds.c:2386
static int merge_partition_with_dummy(PartitionMap *map, int index, int *next_index)
Definition: partbounds.c:2368
static Expr * make_partition_op_expr(PartitionKey key, int keynum, uint16 strategy, Expr *arg1, Expr *arg2)
Definition: partbounds.c:3869
uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation, const Datum *values, const bool *isnull)
Definition: partbounds.c:4723
PartitionBoundInfo partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts, PartitionKey key, int **mapping)
Definition: partbounds.c:299
struct PartitionRangeBound PartitionRangeBound
static PartitionBoundInfo create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts, PartitionKey key, int **mapping)
Definition: partbounds.c:347
bool partitions_are_ordered(PartitionBoundInfo boundinfo, Bitmapset *live_parts)
Definition: partbounds.c:2853
static List * get_qual_for_range(Relation parent, PartitionBoundSpec *spec, bool for_default)
Definition: partbounds.c:4276
static PartitionBoundInfo merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation, RelOptInfo *outer_rel, RelOptInfo *inner_rel, JoinType jointype, List **outer_parts, List **inner_parts)
Definition: partbounds.c:1199
static void generate_matching_part_pairs(RelOptInfo *outer_rel, RelOptInfo *inner_rel, PartitionMap *outer_map, PartitionMap *inner_map, int nmerged, List **outer_parts, List **inner_parts)
Definition: partbounds.c:2440
static void add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs, Oid *partcollations, PartitionRangeBound *merged_lb, PartitionRangeBound *merged_ub, int merged_index, List **merged_datums, List **merged_kinds, List **merged_indexes)
Definition: partbounds.c:2776
int partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, int nvalues, Datum *values, bool *is_equal)
Definition: partbounds.c:3696
Datum satisfies_hash_partition(PG_FUNCTION_ARGS)
Definition: partbounds.c:4772
static PartitionRangeBound * make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
Definition: partbounds.c:3429
int partition_hash_bsearch(PartitionBoundInfo boundinfo, int modulus, int remainder)
Definition: partbounds.c:3739
static void get_range_key_properties(PartitionKey key, int keynum, PartitionRangeDatum *ldatum, PartitionRangeDatum *udatum, ListCell **partexprs_item, Expr **keyCol, Const **lower_val, Const **upper_val)
Definition: partbounds.c:4633
List * get_qual_from_partbound(Relation parent, PartitionBoundSpec *spec)
Definition: partbounds.c:249
static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
Definition: partbounds.c:3588
static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs, int nparts, PartitionKey key, int **mapping)
Definition: partbounds.c:677
static int32 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
Definition: partbounds.c:3794
static PartitionBoundInfo build_merged_partition_bounds(char strategy, List *merged_datums, List *merged_kinds, List *merged_indexes, int null_index, int default_index)
Definition: partbounds.c:2519
int get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
Definition: partbounds.c:3415
static Oid get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, bool *need_relabel)
Definition: partbounds.c:3833
struct PartitionHashBound PartitionHashBound
static bool compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs, Oid *partcollations, PartitionRangeBound *outer_lb, PartitionRangeBound *outer_ub, PartitionRangeBound *inner_lb, PartitionRangeBound *inner_ub, int *lb_cmpval, int *ub_cmpval)
Definition: partbounds.c:2663
static int get_range_partition(RelOptInfo *rel, PartitionBoundInfo bi, int *lb_pos, PartitionRangeBound *lb, PartitionRangeBound *ub)
Definition: partbounds.c:2582
static void merge_null_partitions(PartitionMap *outer_map, PartitionMap *inner_map, bool outer_has_null, bool inner_has_null, int outer_null, int inner_null, JoinType jointype, int *next_index, int *null_index)
Definition: partbounds.c:2148
void check_default_partition_contents(Relation parent, Relation default_rel, PartitionBoundSpec *new_spec)
Definition: partbounds.c:3252
static int get_range_partition_internal(PartitionBoundInfo bi, int *lb_pos, PartitionRangeBound *lb, PartitionRangeBound *ub)
Definition: partbounds.c:2603
static List * get_range_nulltest(PartitionKey key)
Definition: partbounds.c:4677
static int process_outer_partition(PartitionMap *outer_map, PartitionMap *inner_map, bool outer_has_default, bool inner_has_default, int outer_index, int inner_default, JoinType jointype, int *next_index, int *default_index)
Definition: partbounds.c:1981
int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, Datum value, bool *is_equal)
Definition: partbounds.c:3608
PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src, PartitionKey key)
Definition: partbounds.c:1002
#define partition_bound_has_default(bi)
Definition: partbounds.h:99
#define partition_bound_accepts_nulls(bi)
Definition: partbounds.h:98
PartitionKey RelationGetPartitionKey(Relation rel)
Definition: partcache.c:51
struct PartitionKeyData * PartitionKey
Definition: partdefs.h:18
struct PartitionBoundInfoData * PartitionBoundInfo
Definition: partdefs.h:16
PartitionDesc RelationGetPartitionDesc(Relation rel, bool omit_detached)
Definition: partdesc.c:71
List * map_partition_varattnos(List *expr, int fromrel_varno, Relation to_rel, Relation from_rel)
Definition: partition.c:222
List * get_proposed_default_constraint(List *new_part_constraints)
Definition: partition.c:370
#define HASH_PARTITION_SEED
Definition: partition.h:20
#define IS_DUMMY_REL(r)
Definition: pathnodes.h:2105
void * arg
NameData relname
Definition: pg_class.h:38
#define PARTITION_MAX_KEYS
List * find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
Definition: pg_inherits.c:255
#define lfirst(lc)
Definition: pg_list.h:172
#define llast_int(l)
Definition: pg_list.h:199
#define llast(l)
Definition: pg_list.h:198
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:518
#define lfirst_int(lc)
Definition: pg_list.h:173
#define list_make1_oid(x1)
Definition: pg_list.h:242
#define list_make1(x1)
Definition: pg_list.h:212
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
#define linitial(l)
Definition: pg_list.h:178
#define list_make3(x1, x2, x3)
Definition: pg_list.h:216
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define for_both_cell(cell1, list1, initcell1, cell2, list2, initcell2)
Definition: pg_list.h:540
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define list_make2(x1, x2)
Definition: pg_list.h:214
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
#define qsort(a, b, c, d)
Definition: port.h:479
static uint64 DatumGetUInt64(Datum X)
Definition: postgres.h:413
static bool DatumGetBool(Datum X)
Definition: postgres.h:100
static Datum UInt64GetDatum(uint64 X)
Definition: postgres.h:423
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
char * c
@ AND_EXPR
Definition: primnodes.h:950
@ OR_EXPR
Definition: primnodes.h:950
@ NOT_EXPR
Definition: primnodes.h:950
@ COERCE_EXPLICIT_CAST
Definition: primnodes.h:754
@ COERCE_EXPLICIT_CALL
Definition: primnodes.h:753
@ IS_NULL
Definition: primnodes.h:1963
@ IS_NOT_NULL
Definition: primnodes.h:1963
void * stringToNode(const char *str)
Definition: read.c:90
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
#define RelationGetRelid(relation)
Definition: rel.h:514
#define RelationGetRelationName(relation)
Definition: rel.h:548
int errtable(Relation rel)
Definition: relcache.c:6049
char * get_range_partbound_string(List *bound_datums)
Definition: ruleutils.c:13677
@ ForwardScanDirection
Definition: sdir.h:28
Snapshot GetLatestSnapshot(void)
Definition: snapmgr.c:353
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:864
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:822
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
uint16 StrategyNumber
Definition: stratnum.h:22
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32
ParseLoc location
Definition: primnodes.h:1407
MemoryContext es_query_cxt
Definition: execnodes.h:710
List * es_tupleTable
Definition: execnodes.h:712
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:273
Definition: fmgr.h:57
Definition: pg_list.h:54
Definition: nodes.h:135
NullTestType nulltesttype
Definition: primnodes.h:1970
ParseLoc location
Definition: primnodes.h:1973
Expr * arg
Definition: primnodes.h:1969
PartitionRangeDatumKind ** kind
Definition: partbounds.h:84
PartitionStrategy strategy
Definition: partbounds.h:81
Bitmapset * interleaved_parts
Definition: partbounds.h:87
PartitionBoundInfo boundinfo
Definition: partdesc.h:38
bool did_remapping
Definition: partbounds.c:82
bool * merged
Definition: partbounds.c:80
int * old_indexes
Definition: partbounds.c:83
int * merged_indexes
Definition: partbounds.c:79
PartitionRangeDatumKind * kind
Definition: partbounds.c:68
PartitionRangeDatumKind kind
Definition: parsenodes.h:959
Form_pg_class rd_rel
Definition: rel.h:111
ParseLoc location
Definition: primnodes.h:938
Definition: type.h:96
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:220
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:625
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:985
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1020
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, ScanKeyData *key)
Definition: tableam.h:876
bool PartConstraintImpliedByRelConstraint(Relation scanrel, List *partConstraint)
Definition: tablecmds.c:20015