Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
dynahash.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dynahash.c
4 * dynamic chained hash tables
5 *
6 * dynahash.c supports both local-to-a-backend hash tables and hash tables in
7 * shared memory. For shared hash tables, it is the caller's responsibility
8 * to provide appropriate access interlocking. The simplest convention is
9 * that a single LWLock protects the whole hash table. Searches (HASH_FIND or
10 * hash_seq_search) need only shared lock, but any update requires exclusive
11 * lock. For heavily-used shared tables, the single-lock approach creates a
12 * concurrency bottleneck, so we also support "partitioned" locking wherein
13 * there are multiple LWLocks guarding distinct subsets of the table. To use
14 * a hash table in partitioned mode, the HASH_PARTITION flag must be given
15 * to hash_create. This prevents any attempt to split buckets on-the-fly.
16 * Therefore, each hash bucket chain operates independently, and no fields
17 * of the hash header change after init except nentries and freeList.
18 * (A partitioned table uses multiple copies of those fields, guarded by
19 * spinlocks, for additional concurrency.)
20 * This lets any subset of the hash buckets be treated as a separately
21 * lockable partition. We expect callers to use the low-order bits of a
22 * lookup key's hash value as a partition number --- this will work because
23 * of the way calc_bucket() maps hash values to bucket numbers.
24 *
25 * The memory allocator function should match malloc's semantics of returning
26 * NULL on failure. (This is essential for hash tables in shared memory.
27 * For hash tables in local memory, we used to use palloc() which will throw
28 * error on failure; but we no longer do, so it's untested whether this
29 * module will still cope with that behavior.)
30 *
31 * dynahash.c provides support for these types of lookup keys:
32 *
33 * 1. Null-terminated C strings (truncated if necessary to fit in keysize),
34 * compared as though by strcmp(). This is selected by specifying the
35 * HASH_STRINGS flag to hash_create.
36 *
37 * 2. Arbitrary binary data of size keysize, compared as though by memcmp().
38 * (Caller must ensure there are no undefined padding bits in the keys!)
39 * This is selected by specifying the HASH_BLOBS flag to hash_create.
40 *
41 * 3. More complex key behavior can be selected by specifying user-supplied
42 * hashing, comparison, and/or key-copying functions. At least a hashing
43 * function must be supplied; comparison defaults to memcmp() and key copying
44 * to memcpy() when a user-defined hashing function is selected.
45 *
46 * Compared to simplehash, dynahash has the following benefits:
47 *
48 * - It supports partitioning, which is useful for shared memory access using
49 * locks.
50 * - Shared memory hashes are allocated in a fixed size area at startup and
51 * are discoverable by name from other processes.
52 * - Because entries don't need to be moved in the case of hash conflicts,
53 * dynahash has better performance for large entries.
54 * - Guarantees stable pointers to entries.
55 *
56 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
57 * Portions Copyright (c) 1994, Regents of the University of California
58 *
59 *
60 * IDENTIFICATION
61 * src/backend/utils/hash/dynahash.c
62 *
63 *-------------------------------------------------------------------------
64 */
65
66/*
67 * Original comments:
68 *
69 * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
70 * Coded into C, with minor code improvements, and with hsearch(3) interface,
71 * by [email protected], Jul 26, 1988: 13:16;
72 * also, hcreate/hdestroy routines added to simulate hsearch(3).
73 *
74 * These routines simulate hsearch(3) and family, with the important
75 * difference that the hash table is dynamic - can grow indefinitely
76 * beyond its original size (as supplied to hcreate()).
77 *
78 * Performance appears to be comparable to that of hsearch(3).
79 * The 'source-code' options referred to in hsearch(3)'s 'man' page
80 * are not implemented; otherwise functionality is identical.
81 *
82 * Compilation controls:
83 * HASH_STATISTICS causes some usage statistics to be maintained, which can be
84 * logged by calling hash_stats().
85 *
86 * Problems & fixes to [email protected]. WARNING: relies on pre-processor
87 * concatenation property, in probably unnecessary code 'optimization'.
88 *
89 * Modified [email protected] February 1990
90 * added multiple table interface
91 * Modified by [email protected] April 1990
92 * changed ctl structure for shared memory
93 */
94
95#include "postgres.h"
96
97#include <limits.h>
98
99#include "access/xact.h"
100#include "common/hashfn.h"
101#include "lib/ilist.h"
102#include "port/pg_bitutils.h"
103#include "storage/shmem.h"
104#include "storage/spin.h"
105#include "utils/memutils.h"
106
107
108/*
109 * Constants
110 *
111 * A hash table has a top-level "directory", each of whose entries points
112 * to a "segment" of ssize bucket headers. The maximum number of hash
113 * buckets is thus dsize * ssize (but dsize may be expansible). Of course,
114 * the number of records in the table can be larger, but we don't want a
115 * whole lot of records per bucket or performance goes down.
116 *
117 * In a hash table allocated in shared memory, the directory cannot be
118 * expanded because it must stay at a fixed address. The directory size
119 * should be selected using hash_select_dirsize (and you'd better have
120 * a good idea of the maximum number of entries!). For non-shared hash
121 * tables, the initial directory size can be left at the default.
122 */
123#define DEF_SEGSIZE 256
124#define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
125#define DEF_DIRSIZE 256
126
127/* Number of freelists to be used for a partitioned hash table. */
128#define NUM_FREELISTS 32
129
130/* A hash bucket is a linked list of HASHELEMENTs */
132
133/* A hash segment is an array of bucket headers */
135
136/*
137 * Per-freelist data.
138 *
139 * In a partitioned hash table, each freelist is associated with a specific
140 * set of hashcodes, as determined by the FREELIST_IDX() macro below.
141 * nentries tracks the number of live hashtable entries having those hashcodes
142 * (NOT the number of entries in the freelist, as you might expect).
143 *
144 * The coverage of a freelist might be more or less than one partition, so it
145 * needs its own lock rather than relying on caller locking. Relying on that
146 * wouldn't work even if the coverage was the same, because of the occasional
147 * need to "borrow" entries from another freelist; see get_hash_entry().
148 *
149 * Using an array of FreeListData instead of separate arrays of mutexes,
150 * nentries and freeLists helps to reduce sharing of cache lines between
151 * different mutexes.
152 */
153typedef struct
154{
155 slock_t mutex; /* spinlock for this freelist */
156 int64 nentries; /* number of entries in associated buckets */
157 HASHELEMENT *freeList; /* chain of free elements */
159
160/*
161 * Header structure for a hash table --- contains all changeable info
162 *
163 * In a shared-memory hash table, the HASHHDR is in shared memory, while
164 * each backend has a local HTAB struct. For a non-shared table, there isn't
165 * any functional difference between HASHHDR and HTAB, but we separate them
166 * anyway to share code between shared and non-shared tables.
167 */
169{
170 /*
171 * The freelist can become a point of contention in high-concurrency hash
172 * tables, so we use an array of freelists, each with its own mutex and
173 * nentries count, instead of just a single one. Although the freelists
174 * normally operate independently, we will scavenge entries from freelists
175 * other than a hashcode's default freelist when necessary.
176 *
177 * If the hash table is not partitioned, only freeList[0] is used and its
178 * spinlock is not used at all; callers' locking is assumed sufficient.
179 */
181
182 /* These fields can change, but not in a partitioned table */
183 /* Also, dsize can't change in a shared table, even if unpartitioned */
184 int64 dsize; /* directory size */
185 int64 nsegs; /* number of allocated segments (<= dsize) */
186 uint32 max_bucket; /* ID of maximum bucket in use */
187 uint32 high_mask; /* mask to modulo into entire table */
188 uint32 low_mask; /* mask to modulo into lower half of table */
189
190 /* These fields are fixed at hashtable creation */
191 Size keysize; /* hash key length in bytes */
192 Size entrysize; /* total user element size in bytes */
193 int64 num_partitions; /* # partitions (must be power of 2), or 0 */
194 int64 max_dsize; /* 'dsize' limit if directory is fixed size */
195 int64 ssize; /* segment size --- must be power of 2 */
196 int sshift; /* segment shift = log2(ssize) */
197 int nelem_alloc; /* number of entries to allocate at once */
198 bool isfixed; /* if true, don't enlarge */
199
200#ifdef HASH_STATISTICS
201
202 /*
203 * Count statistics here. NB: stats code doesn't bother with mutex, so
204 * counts could be corrupted a bit in a partitioned table.
205 */
206 uint64 accesses;
207 uint64 collisions;
208 uint64 expansions;
209#endif
210};
211
212#define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
213
214#define FREELIST_IDX(hctl, hashcode) \
215 (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
216
217/*
218 * Top control structure for a hashtable --- in a shared table, each backend
219 * has its own copy (OK since no fields change at runtime)
220 */
221struct HTAB
222{
223 HASHHDR *hctl; /* => shared control information */
224 HASHSEGMENT *dir; /* directory of segment starts */
225 HashValueFunc hash; /* hash function */
226 HashCompareFunc match; /* key comparison function */
227 HashCopyFunc keycopy; /* key copying function */
228 HashAllocFunc alloc; /* memory allocator */
229 MemoryContext hcxt; /* memory context if default allocator used */
230 char *tabname; /* table name (for error messages) */
231 bool isshared; /* true if table is in shared memory */
232
233 /* freezing a shared table isn't allowed, so we can keep state here */
234 bool frozen; /* true = no more inserts allowed */
235
236 /* We keep local copies of these fixed values to reduce contention */
237 Size keysize; /* hash key length in bytes */
238 int64 ssize; /* segment size --- must be power of 2 */
239 int sshift; /* segment shift = log2(ssize) */
240
241 /*
242 * In a USE_VALGRIND build, non-shared hashtables keep an slist chain of
243 * all the element blocks they have allocated. This pacifies Valgrind,
244 * which would otherwise often claim that the element blocks are "possibly
245 * lost" for lack of any non-interior pointers to their starts.
246 */
247#ifdef USE_VALGRIND
248 slist_head element_blocks;
249#endif
250};
251
252/*
253 * Key (also entry) part of a HASHELEMENT
254 */
255#define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
256
257/*
258 * Obtain element pointer given pointer to key
259 */
260#define ELEMENT_FROM_KEY(key) \
261 ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
262
263/*
264 * Fast MOD arithmetic, assuming that y is a power of 2 !
265 */
266#define MOD(x,y) ((x) & ((y)-1))
267
268/*
269 * Private function prototypes
270 */
271static void *DynaHashAlloc(Size size);
272static HASHSEGMENT seg_alloc(HTAB *hashp);
273static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
274static bool dir_realloc(HTAB *hashp);
275static bool expand_table(HTAB *hashp);
276static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
277static void hdefault(HTAB *hashp);
278static int choose_nelem_alloc(Size entrysize);
279static bool init_htab(HTAB *hashp, int64 nelem);
280pg_noreturn static void hash_corrupted(HTAB *hashp);
281static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue,
282 HASHBUCKET **bucketptr);
283static int my_log2(int64 num);
284static int64 next_pow2_int64(int64 num);
285static int next_pow2_int(int64 num);
286static void register_seq_scan(HTAB *hashp);
287static void deregister_seq_scan(HTAB *hashp);
288static bool has_seq_scans(HTAB *hashp);
289
290
291/*
292 * memory allocation support
293 */
295
296static void *
298{
302}
303
304
305/*
306 * HashCompareFunc for string keys
307 *
308 * Because we copy keys with strlcpy(), they will be truncated at keysize-1
309 * bytes, so we can only compare that many ... hence strncmp is almost but
310 * not quite the right thing.
311 */
312static int
313string_compare(const char *key1, const char *key2, Size keysize)
314{
315 return strncmp(key1, key2, keysize - 1);
316}
317
318
319/************************** CREATE ROUTINES **********************/
320
321/*
322 * hash_create -- create a new dynamic hash table
323 *
324 * tabname: a name for the table (for debugging purposes)
325 * nelem: maximum number of elements expected
326 * *info: additional table parameters, as indicated by flags
327 * flags: bitmask indicating which parameters to take from *info
328 *
329 * The flags value *must* include HASH_ELEM. (Formerly, this was nominally
330 * optional, but the default keysize and entrysize values were useless.)
331 * The flags value must also include exactly one of HASH_STRINGS, HASH_BLOBS,
332 * or HASH_FUNCTION, to define the key hashing semantics (C strings,
333 * binary blobs, or custom, respectively). Callers specifying a custom
334 * hash function will likely also want to use HASH_COMPARE, and perhaps
335 * also HASH_KEYCOPY, to control key comparison and copying.
336 * Another often-used flag is HASH_CONTEXT, to allocate the hash table
337 * under info->hcxt rather than under TopMemoryContext; the default
338 * behavior is only suitable for session-lifespan hash tables.
339 * Other flags bits are special-purpose and seldom used, except for those
340 * associated with shared-memory hash tables, for which see ShmemInitHash().
341 *
342 * Fields in *info are read only when the associated flags bit is set.
343 * It is not necessary to initialize other fields of *info.
344 * Neither tabname nor *info need persist after the hash_create() call.
345 *
346 * Note: It is deprecated for callers of hash_create() to explicitly specify
347 * string_hash, tag_hash, uint32_hash, or oid_hash. Just set HASH_STRINGS or
348 * HASH_BLOBS. Use HASH_FUNCTION only when you want something other than
349 * one of these.
350 *
351 * Note: for a shared-memory hashtable, nelem needs to be a pretty good
352 * estimate, since we can't expand the table on the fly. But an unshared
353 * hashtable can be expanded on-the-fly, so it's better for nelem to be
354 * on the small side and let the table grow if it's exceeded. An overly
355 * large nelem will penalize hash_seq_search speed without buying much.
356 */
357HTAB *
358hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
359{
360 HTAB *hashp;
361 HASHHDR *hctl;
362
363 /*
364 * Hash tables now allocate space for key and data, but you have to say
365 * how much space to allocate.
366 */
367 Assert(flags & HASH_ELEM);
368 Assert(info->keysize > 0);
369 Assert(info->entrysize >= info->keysize);
370
371 /*
372 * For shared hash tables, we have a local hash header (HTAB struct) that
373 * we allocate in TopMemoryContext; all else is in shared memory.
374 *
375 * For non-shared hash tables, everything including the hash header is in
376 * a memory context created specially for the hash table --- this makes
377 * hash_destroy very simple. The memory context is made a child of either
378 * a context specified by the caller, or TopMemoryContext if nothing is
379 * specified.
380 */
381 if (flags & HASH_SHARED_MEM)
382 {
383 /* Set up to allocate the hash header */
385 }
386 else
387 {
388 /* Create the hash table's private memory context */
389 if (flags & HASH_CONTEXT)
390 CurrentDynaHashCxt = info->hcxt;
391 else
394 "dynahash",
396 }
397
398 /* Initialize the hash header, plus a copy of the table name */
400 sizeof(HTAB) + strlen(tabname) + 1);
401 MemSet(hashp, 0, sizeof(HTAB));
402
403 hashp->tabname = (char *) (hashp + 1);
404 strcpy(hashp->tabname, tabname);
405
406 /* If we have a private context, label it with hashtable's name */
407 if (!(flags & HASH_SHARED_MEM))
409
410 /*
411 * Select the appropriate hash function (see comments at head of file).
412 */
413 if (flags & HASH_FUNCTION)
414 {
415 Assert(!(flags & (HASH_BLOBS | HASH_STRINGS)));
416 hashp->hash = info->hash;
417 }
418 else if (flags & HASH_BLOBS)
419 {
420 Assert(!(flags & HASH_STRINGS));
421 /* We can optimize hashing for common key sizes */
422 if (info->keysize == sizeof(uint32))
423 hashp->hash = uint32_hash;
424 else
425 hashp->hash = tag_hash;
426 }
427 else
428 {
429 /*
430 * string_hash used to be considered the default hash method, and in a
431 * non-assert build it effectively still is. But we now consider it
432 * an assertion error to not say HASH_STRINGS explicitly. To help
433 * catch mistaken usage of HASH_STRINGS, we also insist on a
434 * reasonably long string length: if the keysize is only 4 or 8 bytes,
435 * it's almost certainly an integer or pointer not a string.
436 */
437 Assert(flags & HASH_STRINGS);
438 Assert(info->keysize > 8);
439
440 hashp->hash = string_hash;
441 }
442
443 /*
444 * If you don't specify a match function, it defaults to string_compare if
445 * you used string_hash, and to memcmp otherwise.
446 *
447 * Note: explicitly specifying string_hash is deprecated, because this
448 * might not work for callers in loadable modules on some platforms due to
449 * referencing a trampoline instead of the string_hash function proper.
450 * Specify HASH_STRINGS instead.
451 */
452 if (flags & HASH_COMPARE)
453 hashp->match = info->match;
454 else if (hashp->hash == string_hash)
456 else
457 hashp->match = memcmp;
458
459 /*
460 * Similarly, the key-copying function defaults to strlcpy or memcpy.
461 */
462 if (flags & HASH_KEYCOPY)
463 hashp->keycopy = info->keycopy;
464 else if (hashp->hash == string_hash)
465 {
466 /*
467 * The signature of keycopy is meant for memcpy(), which returns
468 * void*, but strlcpy() returns size_t. Since we never use the return
469 * value of keycopy, and size_t is pretty much always the same size as
470 * void *, this should be safe. The extra cast in the middle is to
471 * avoid warnings from -Wcast-function-type.
472 */
474 }
475 else
476 hashp->keycopy = memcpy;
477
478 /* And select the entry allocation function, too. */
479 if (flags & HASH_ALLOC)
480 hashp->alloc = info->alloc;
481 else
482 hashp->alloc = DynaHashAlloc;
483
484 if (flags & HASH_SHARED_MEM)
485 {
486 /*
487 * ctl structure and directory are preallocated for shared memory
488 * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
489 * well.
490 */
491 hashp->hctl = info->hctl;
492 hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
493 hashp->hcxt = NULL;
494 hashp->isshared = true;
495
496 /* hash table already exists, we're just attaching to it */
497 if (flags & HASH_ATTACH)
498 {
499 /* make local copies of some heavily-used values */
500 hctl = hashp->hctl;
501 hashp->keysize = hctl->keysize;
502 hashp->ssize = hctl->ssize;
503 hashp->sshift = hctl->sshift;
504
505 return hashp;
506 }
507 }
508 else
509 {
510 /* setup hash table defaults */
511 hashp->hctl = NULL;
512 hashp->dir = NULL;
513 hashp->hcxt = CurrentDynaHashCxt;
514 hashp->isshared = false;
515 }
516
517 if (!hashp->hctl)
518 {
519 hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
520 if (!hashp->hctl)
522 (errcode(ERRCODE_OUT_OF_MEMORY),
523 errmsg("out of memory")));
524 }
525
526 hashp->frozen = false;
527
528 hdefault(hashp);
529
530 hctl = hashp->hctl;
531
532 if (flags & HASH_PARTITION)
533 {
534 /* Doesn't make sense to partition a local hash table */
535 Assert(flags & HASH_SHARED_MEM);
536
537 /*
538 * The number of partitions had better be a power of 2. Also, it must
539 * be less than INT_MAX (see init_htab()), so call the int version of
540 * next_pow2.
541 */
543
544 hctl->num_partitions = info->num_partitions;
545 }
546
547 if (flags & HASH_SEGMENT)
548 {
549 hctl->ssize = info->ssize;
550 hctl->sshift = my_log2(info->ssize);
551 /* ssize had better be a power of 2 */
552 Assert(hctl->ssize == (1L << hctl->sshift));
553 }
554
555 /*
556 * SHM hash tables have fixed directory size passed by the caller.
557 */
558 if (flags & HASH_DIRSIZE)
559 {
560 hctl->max_dsize = info->max_dsize;
561 hctl->dsize = info->dsize;
562 }
563
564 /* remember the entry sizes, too */
565 hctl->keysize = info->keysize;
566 hctl->entrysize = info->entrysize;
567
568 /* make local copies of heavily-used constant fields */
569 hashp->keysize = hctl->keysize;
570 hashp->ssize = hctl->ssize;
571 hashp->sshift = hctl->sshift;
572
573 /* Build the hash directory structure */
574 if (!init_htab(hashp, nelem))
575 elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
576
577 /*
578 * For a shared hash table, preallocate the requested number of elements.
579 * This reduces problems with run-time out-of-shared-memory conditions.
580 *
581 * For a non-shared hash table, preallocate the requested number of
582 * elements if it's less than our chosen nelem_alloc. This avoids wasting
583 * space if the caller correctly estimates a small table size.
584 */
585 if ((flags & HASH_SHARED_MEM) ||
586 nelem < hctl->nelem_alloc)
587 {
588 int i,
589 freelist_partitions,
590 nelem_alloc,
591 nelem_alloc_first;
592
593 /*
594 * If hash table is partitioned, give each freelist an equal share of
595 * the initial allocation. Otherwise only freeList[0] is used.
596 */
597 if (IS_PARTITIONED(hashp->hctl))
598 freelist_partitions = NUM_FREELISTS;
599 else
600 freelist_partitions = 1;
601
602 nelem_alloc = nelem / freelist_partitions;
603 if (nelem_alloc <= 0)
604 nelem_alloc = 1;
605
606 /*
607 * Make sure we'll allocate all the requested elements; freeList[0]
608 * gets the excess if the request isn't divisible by NUM_FREELISTS.
609 */
610 if (nelem_alloc * freelist_partitions < nelem)
611 nelem_alloc_first =
612 nelem - nelem_alloc * (freelist_partitions - 1);
613 else
614 nelem_alloc_first = nelem_alloc;
615
616 for (i = 0; i < freelist_partitions; i++)
617 {
618 int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
619
620 if (!element_alloc(hashp, temp, i))
622 (errcode(ERRCODE_OUT_OF_MEMORY),
623 errmsg("out of memory")));
624 }
625 }
626
627 /* Set isfixed if requested, but not till after we build initial entries */
628 if (flags & HASH_FIXED_SIZE)
629 hctl->isfixed = true;
630
631 return hashp;
632}
633
634/*
635 * Set default HASHHDR parameters.
636 */
637static void
639{
640 HASHHDR *hctl = hashp->hctl;
641
642 MemSet(hctl, 0, sizeof(HASHHDR));
643
644 hctl->dsize = DEF_DIRSIZE;
645 hctl->nsegs = 0;
646
647 hctl->num_partitions = 0; /* not partitioned */
648
649 /* table has no fixed maximum size */
650 hctl->max_dsize = NO_MAX_DSIZE;
651
652 hctl->ssize = DEF_SEGSIZE;
654
655 hctl->isfixed = false; /* can be enlarged */
656
657#ifdef HASH_STATISTICS
658 hctl->accesses = hctl->collisions = hctl->expansions = 0;
659#endif
660}
661
662/*
663 * Given the user-specified entry size, choose nelem_alloc, ie, how many
664 * elements to add to the hash table when we need more.
665 */
666static int
668{
669 int nelem_alloc;
670 Size elementSize;
671 Size allocSize;
672
673 /* Each element has a HASHELEMENT header plus user data. */
674 /* NB: this had better match element_alloc() */
675 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
676
677 /*
678 * The idea here is to choose nelem_alloc at least 32, but round up so
679 * that the allocation request will be a power of 2 or just less. This
680 * makes little difference for hash tables in shared memory, but for hash
681 * tables managed by palloc, the allocation request will be rounded up to
682 * a power of 2 anyway. If we fail to take this into account, we'll waste
683 * as much as half the allocated space.
684 */
685 allocSize = 32 * 4; /* assume elementSize at least 8 */
686 do
687 {
688 allocSize <<= 1;
689 nelem_alloc = allocSize / elementSize;
690 } while (nelem_alloc < 32);
691
692 return nelem_alloc;
693}
694
695/*
696 * Compute derived fields of hctl and build the initial directory/segment
697 * arrays
698 */
699static bool
700init_htab(HTAB *hashp, int64 nelem)
701{
702 HASHHDR *hctl = hashp->hctl;
703 HASHSEGMENT *segp;
704 int nbuckets;
705 int nsegs;
706 int i;
707
708 /*
709 * initialize mutexes if it's a partitioned table
710 */
711 if (IS_PARTITIONED(hctl))
712 for (i = 0; i < NUM_FREELISTS; i++)
713 SpinLockInit(&(hctl->freeList[i].mutex));
714
715 /*
716 * Allocate space for the next greater power of two number of buckets,
717 * assuming a desired maximum load factor of 1.
718 */
719 nbuckets = next_pow2_int(nelem);
720
721 /*
722 * In a partitioned table, nbuckets must be at least equal to
723 * num_partitions; were it less, keys with apparently different partition
724 * numbers would map to the same bucket, breaking partition independence.
725 * (Normally nbuckets will be much bigger; this is just a safety check.)
726 */
727 while (nbuckets < hctl->num_partitions)
728 nbuckets <<= 1;
729
730 hctl->max_bucket = hctl->low_mask = nbuckets - 1;
731 hctl->high_mask = (nbuckets << 1) - 1;
732
733 /*
734 * Figure number of directory segments needed, round up to a power of 2
735 */
736 nsegs = (nbuckets - 1) / hctl->ssize + 1;
737 nsegs = next_pow2_int(nsegs);
738
739 /*
740 * Make sure directory is big enough. If pre-allocated directory is too
741 * small, choke (caller screwed up).
742 */
743 if (nsegs > hctl->dsize)
744 {
745 if (!(hashp->dir))
746 hctl->dsize = nsegs;
747 else
748 return false;
749 }
750
751 /* Allocate a directory */
752 if (!(hashp->dir))
753 {
754 CurrentDynaHashCxt = hashp->hcxt;
755 hashp->dir = (HASHSEGMENT *)
756 hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
757 if (!hashp->dir)
758 return false;
759 }
760
761 /* Allocate initial segments */
762 for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
763 {
764 *segp = seg_alloc(hashp);
765 if (*segp == NULL)
766 return false;
767 }
768
769 /* Choose number of entries to allocate at a time */
771
772 return true;
773}
774
775/*
776 * Estimate the space needed for a hashtable containing the given number
777 * of entries of given size.
778 * NOTE: this is used to estimate the footprint of hashtables in shared
779 * memory; therefore it does not count HTAB which is in local memory.
780 * NB: assumes that all hash structure parameters have default values!
781 */
782Size
783hash_estimate_size(int64 num_entries, Size entrysize)
784{
785 Size size;
786 int64 nBuckets,
787 nSegments,
788 nDirEntries,
789 nElementAllocs,
790 elementSize,
791 elementAllocCnt;
792
793 /* estimate number of buckets wanted */
794 nBuckets = next_pow2_int64(num_entries);
795 /* # of segments needed for nBuckets */
796 nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);
797 /* directory entries */
798 nDirEntries = DEF_DIRSIZE;
799 while (nDirEntries < nSegments)
800 nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
801
802 /* fixed control info */
803 size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */
804 /* directory */
805 size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));
806 /* segments */
807 size = add_size(size, mul_size(nSegments,
808 MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
809 /* elements --- allocated in groups of choose_nelem_alloc() entries */
810 elementAllocCnt = choose_nelem_alloc(entrysize);
811 nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
812 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
813 size = add_size(size,
814 mul_size(nElementAllocs,
815 mul_size(elementAllocCnt, elementSize)));
816
817 return size;
818}
819
820/*
821 * Select an appropriate directory size for a hashtable with the given
822 * maximum number of entries.
823 * This is only needed for hashtables in shared memory, whose directories
824 * cannot be expanded dynamically.
825 * NB: assumes that all hash structure parameters have default values!
826 *
827 * XXX this had better agree with the behavior of init_htab()...
828 */
829int64
831{
832 int64 nBuckets,
833 nSegments,
834 nDirEntries;
835
836 /* estimate number of buckets wanted */
837 nBuckets = next_pow2_int64(num_entries);
838 /* # of segments needed for nBuckets */
839 nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);
840 /* directory entries */
841 nDirEntries = DEF_DIRSIZE;
842 while (nDirEntries < nSegments)
843 nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
844
845 return nDirEntries;
846}
847
848/*
849 * Compute the required initial memory allocation for a shared-memory
850 * hashtable with the given parameters. We need space for the HASHHDR
851 * and for the (non expansible) directory.
852 */
853Size
855{
856 Assert(flags & HASH_DIRSIZE);
857 Assert(info->dsize == info->max_dsize);
858 return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
859}
860
861
862/********************** DESTROY ROUTINES ************************/
863
864void
866{
867 if (hashp != NULL)
868 {
869 /* allocation method must be one we know how to free, too */
870 Assert(hashp->alloc == DynaHashAlloc);
871 /* so this hashtable must have its own context */
872 Assert(hashp->hcxt != NULL);
873
874 hash_stats(__func__, hashp);
875
876 /*
877 * Free everything by destroying the hash table's memory context.
878 */
880 }
881}
882
883void
884hash_stats(const char *caller, HTAB *hashp)
885{
886#ifdef HASH_STATISTICS
887 HASHHDR *hctl = hashp->hctl;
888
889 elog(DEBUG4,
890 "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: " INT64_FORMAT " Key Size: %zu Max Bucket: %u Segment Count: " INT64_FORMAT,
891 caller != NULL ? caller : "(unknown)", hashp->tabname, hctl->accesses,
892 hctl->collisions, hctl->expansions, hash_get_num_entries(hashp),
893 hctl->keysize, hctl->max_bucket, hctl->nsegs);
894#endif
895}
896
897/*******************************SEARCH ROUTINES *****************************/
898
899
900/*
901 * get_hash_value -- exported routine to calculate a key's hash value
902 *
903 * We export this because for partitioned tables, callers need to compute
904 * the partition number (from the low-order bits of the hash value) before
905 * searching.
906 */
907uint32
908get_hash_value(HTAB *hashp, const void *keyPtr)
909{
910 return hashp->hash(keyPtr, hashp->keysize);
911}
912
913/* Convert a hash value to a bucket number */
914static inline uint32
915calc_bucket(HASHHDR *hctl, uint32 hash_val)
916{
917 uint32 bucket;
918
919 bucket = hash_val & hctl->high_mask;
920 if (bucket > hctl->max_bucket)
921 bucket = bucket & hctl->low_mask;
922
923 return bucket;
924}
925
926/*
927 * hash_search -- look up key in table and perform action
928 * hash_search_with_hash_value -- same, with key's hash value already computed
929 *
930 * action is one of:
931 * HASH_FIND: look up key in table
932 * HASH_ENTER: look up key in table, creating entry if not present
933 * HASH_ENTER_NULL: same, but return NULL if out of memory
934 * HASH_REMOVE: look up key in table, remove entry if present
935 *
936 * Return value is a pointer to the element found/entered/removed if any,
937 * or NULL if no match was found. (NB: in the case of the REMOVE action,
938 * the result is a dangling pointer that shouldn't be dereferenced!)
939 *
940 * HASH_ENTER will normally ereport a generic "out of memory" error if
941 * it is unable to create a new entry. The HASH_ENTER_NULL operation is
942 * the same except it will return NULL if out of memory.
943 *
944 * If foundPtr isn't NULL, then *foundPtr is set true if we found an
945 * existing entry in the table, false otherwise. This is needed in the
946 * HASH_ENTER case, but is redundant with the return value otherwise.
947 *
948 * For hash_search_with_hash_value, the hashvalue parameter must have been
949 * calculated with get_hash_value().
950 */
951void *
953 const void *keyPtr,
955 bool *foundPtr)
956{
957 return hash_search_with_hash_value(hashp,
958 keyPtr,
959 hashp->hash(keyPtr, hashp->keysize),
960 action,
961 foundPtr);
962}
963
964void *
966 const void *keyPtr,
967 uint32 hashvalue,
969 bool *foundPtr)
970{
971 HASHHDR *hctl = hashp->hctl;
972 int freelist_idx = FREELIST_IDX(hctl, hashvalue);
973 Size keysize;
974 HASHBUCKET currBucket;
975 HASHBUCKET *prevBucketPtr;
976 HashCompareFunc match;
977
978#ifdef HASH_STATISTICS
979 hctl->accesses++;
980#endif
981
982 /*
983 * If inserting, check if it is time to split a bucket.
984 *
985 * NOTE: failure to expand table is not a fatal error, it just means we
986 * have to run at higher fill factor than we wanted. However, if we're
987 * using the palloc allocator then it will throw error anyway on
988 * out-of-memory, so we must do this before modifying the table.
989 */
991 {
992 /*
993 * Can't split if running in partitioned mode, nor if frozen, nor if
994 * table is the subject of any active hash_seq_search scans.
995 */
996 if (hctl->freeList[0].nentries > (int64) hctl->max_bucket &&
997 !IS_PARTITIONED(hctl) && !hashp->frozen &&
998 !has_seq_scans(hashp))
999 (void) expand_table(hashp);
1000 }
1001
1002 /*
1003 * Do the initial lookup
1004 */
1005 (void) hash_initial_lookup(hashp, hashvalue, &prevBucketPtr);
1006 currBucket = *prevBucketPtr;
1007
1008 /*
1009 * Follow collision chain looking for matching key
1010 */
1011 match = hashp->match; /* save one fetch in inner loop */
1012 keysize = hashp->keysize; /* ditto */
1013
1014 while (currBucket != NULL)
1015 {
1016 if (currBucket->hashvalue == hashvalue &&
1017 match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
1018 break;
1019 prevBucketPtr = &(currBucket->link);
1020 currBucket = *prevBucketPtr;
1021#ifdef HASH_STATISTICS
1022 hctl->collisions++;
1023#endif
1024 }
1025
1026 if (foundPtr)
1027 *foundPtr = (bool) (currBucket != NULL);
1028
1029 /*
1030 * OK, now what?
1031 */
1032 switch (action)
1033 {
1034 case HASH_FIND:
1035 if (currBucket != NULL)
1036 return ELEMENTKEY(currBucket);
1037 return NULL;
1038
1039 case HASH_REMOVE:
1040 if (currBucket != NULL)
1041 {
1042 /* if partitioned, must lock to touch nentries and freeList */
1043 if (IS_PARTITIONED(hctl))
1044 SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
1045
1046 /* delete the record from the appropriate nentries counter. */
1047 Assert(hctl->freeList[freelist_idx].nentries > 0);
1048 hctl->freeList[freelist_idx].nentries--;
1049
1050 /* remove record from hash bucket's chain. */
1051 *prevBucketPtr = currBucket->link;
1052
1053 /* add the record to the appropriate freelist. */
1054 currBucket->link = hctl->freeList[freelist_idx].freeList;
1055 hctl->freeList[freelist_idx].freeList = currBucket;
1056
1057 if (IS_PARTITIONED(hctl))
1058 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1059
1060 /*
1061 * better hope the caller is synchronizing access to this
1062 * element, because someone else is going to reuse it the next
1063 * time something is added to the table
1064 */
1065 return ELEMENTKEY(currBucket);
1066 }
1067 return NULL;
1068
1069 case HASH_ENTER:
1070 case HASH_ENTER_NULL:
1071 /* Return existing element if found, else create one */
1072 if (currBucket != NULL)
1073 return ELEMENTKEY(currBucket);
1074
1075 /* disallow inserts if frozen */
1076 if (hashp->frozen)
1077 elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
1078 hashp->tabname);
1079
1080 currBucket = get_hash_entry(hashp, freelist_idx);
1081 if (currBucket == NULL)
1082 {
1083 /* out of memory */
1084 if (action == HASH_ENTER_NULL)
1085 return NULL;
1086 /* report a generic message */
1087 if (hashp->isshared)
1088 ereport(ERROR,
1089 (errcode(ERRCODE_OUT_OF_MEMORY),
1090 errmsg("out of shared memory")));
1091 else
1092 ereport(ERROR,
1093 (errcode(ERRCODE_OUT_OF_MEMORY),
1094 errmsg("out of memory")));
1095 }
1096
1097 /* link into hashbucket chain */
1098 *prevBucketPtr = currBucket;
1099 currBucket->link = NULL;
1100
1101 /* copy key into record */
1102 currBucket->hashvalue = hashvalue;
1103 hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
1104
1105 /*
1106 * Caller is expected to fill the data field on return. DO NOT
1107 * insert any code that could possibly throw error here, as doing
1108 * so would leave the table entry incomplete and hence corrupt the
1109 * caller's data structure.
1110 */
1111
1112 return ELEMENTKEY(currBucket);
1113 }
1114
1115 elog(ERROR, "unrecognized hash action code: %d", (int) action);
1116
1117 return NULL; /* keep compiler quiet */
1118}
1119
1120/*
1121 * hash_update_hash_key -- change the hash key of an existing table entry
1122 *
1123 * This is equivalent to removing the entry, making a new entry, and copying
1124 * over its data, except that the entry never goes to the table's freelist.
1125 * Therefore this cannot suffer an out-of-memory failure, even if there are
1126 * other processes operating in other partitions of the hashtable.
1127 *
1128 * Returns true if successful, false if the requested new hash key is already
1129 * present. Throws error if the specified entry pointer isn't actually a
1130 * table member.
1131 *
1132 * NB: currently, there is no special case for old and new hash keys being
1133 * identical, which means we'll report false for that situation. This is
1134 * preferable for existing uses.
1135 *
1136 * NB: for a partitioned hashtable, caller must hold lock on both relevant
1137 * partitions, if the new hash key would belong to a different partition.
1138 */
1139bool
1141 void *existingEntry,
1142 const void *newKeyPtr)
1143{
1144 HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
1145 uint32 newhashvalue;
1146 Size keysize;
1147 uint32 bucket;
1148 uint32 newbucket;
1149 HASHBUCKET currBucket;
1150 HASHBUCKET *prevBucketPtr;
1151 HASHBUCKET *oldPrevPtr;
1152 HashCompareFunc match;
1153
1154#ifdef HASH_STATISTICS
1155 HASHHDR *hctl = hashp->hctl;
1156
1157 hctl->accesses++;
1158#endif
1159
1160 /* disallow updates if frozen */
1161 if (hashp->frozen)
1162 elog(ERROR, "cannot update in frozen hashtable \"%s\"",
1163 hashp->tabname);
1164
1165 /*
1166 * Lookup the existing element using its saved hash value. We need to do
1167 * this to be able to unlink it from its hash chain, but as a side benefit
1168 * we can verify the validity of the passed existingEntry pointer.
1169 */
1170 bucket = hash_initial_lookup(hashp, existingElement->hashvalue,
1171 &prevBucketPtr);
1172 currBucket = *prevBucketPtr;
1173
1174 while (currBucket != NULL)
1175 {
1176 if (currBucket == existingElement)
1177 break;
1178 prevBucketPtr = &(currBucket->link);
1179 currBucket = *prevBucketPtr;
1180 }
1181
1182 if (currBucket == NULL)
1183 elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
1184 hashp->tabname);
1185
1186 oldPrevPtr = prevBucketPtr;
1187
1188 /*
1189 * Now perform the equivalent of a HASH_ENTER operation to locate the hash
1190 * chain we want to put the entry into.
1191 */
1192 newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
1193 newbucket = hash_initial_lookup(hashp, newhashvalue, &prevBucketPtr);
1194 currBucket = *prevBucketPtr;
1195
1196 /*
1197 * Follow collision chain looking for matching key
1198 */
1199 match = hashp->match; /* save one fetch in inner loop */
1200 keysize = hashp->keysize; /* ditto */
1201
1202 while (currBucket != NULL)
1203 {
1204 if (currBucket->hashvalue == newhashvalue &&
1205 match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
1206 break;
1207 prevBucketPtr = &(currBucket->link);
1208 currBucket = *prevBucketPtr;
1209#ifdef HASH_STATISTICS
1210 hctl->collisions++;
1211#endif
1212 }
1213
1214 if (currBucket != NULL)
1215 return false; /* collision with an existing entry */
1216
1217 currBucket = existingElement;
1218
1219 /*
1220 * If old and new hash values belong to the same bucket, we need not
1221 * change any chain links, and indeed should not since this simplistic
1222 * update will corrupt the list if currBucket is the last element. (We
1223 * cannot fall out earlier, however, since we need to scan the bucket to
1224 * check for duplicate keys.)
1225 */
1226 if (bucket != newbucket)
1227 {
1228 /* OK to remove record from old hash bucket's chain. */
1229 *oldPrevPtr = currBucket->link;
1230
1231 /* link into new hashbucket chain */
1232 *prevBucketPtr = currBucket;
1233 currBucket->link = NULL;
1234 }
1235
1236 /* copy new key into record */
1237 currBucket->hashvalue = newhashvalue;
1238 hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
1239
1240 /* rest of record is untouched */
1241
1242 return true;
1243}
1244
1245/*
1246 * Allocate a new hashtable entry if possible; return NULL if out of memory.
1247 * (Or, if the underlying space allocator throws error for out-of-memory,
1248 * we won't return at all.)
1249 */
1250static HASHBUCKET
1251get_hash_entry(HTAB *hashp, int freelist_idx)
1252{
1253 HASHHDR *hctl = hashp->hctl;
1254 HASHBUCKET newElement;
1255
1256 for (;;)
1257 {
1258 /* if partitioned, must lock to touch nentries and freeList */
1259 if (IS_PARTITIONED(hctl))
1260 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1261
1262 /* try to get an entry from the freelist */
1263 newElement = hctl->freeList[freelist_idx].freeList;
1264
1265 if (newElement != NULL)
1266 break;
1267
1268 if (IS_PARTITIONED(hctl))
1269 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1270
1271 /*
1272 * No free elements in this freelist. In a partitioned table, there
1273 * might be entries in other freelists, but to reduce contention we
1274 * prefer to first try to get another chunk of buckets from the main
1275 * shmem allocator. If that fails, though, we *MUST* root through all
1276 * the other freelists before giving up. There are multiple callers
1277 * that assume that they can allocate every element in the initially
1278 * requested table size, or that deleting an element guarantees they
1279 * can insert a new element, even if shared memory is entirely full.
1280 * Failing because the needed element is in a different freelist is
1281 * not acceptable.
1282 */
1283 if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
1284 {
1285 int borrow_from_idx;
1286
1287 if (!IS_PARTITIONED(hctl))
1288 return NULL; /* out of memory */
1289
1290 /* try to borrow element from another freelist */
1291 borrow_from_idx = freelist_idx;
1292 for (;;)
1293 {
1294 borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
1295 if (borrow_from_idx == freelist_idx)
1296 break; /* examined all freelists, fail */
1297
1298 SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
1299 newElement = hctl->freeList[borrow_from_idx].freeList;
1300
1301 if (newElement != NULL)
1302 {
1303 hctl->freeList[borrow_from_idx].freeList = newElement->link;
1304 SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1305
1306 /* careful: count the new element in its proper freelist */
1307 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1308 hctl->freeList[freelist_idx].nentries++;
1309 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1310
1311 return newElement;
1312 }
1313
1314 SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1315 }
1316
1317 /* no elements available to borrow either, so out of memory */
1318 return NULL;
1319 }
1320 }
1321
1322 /* remove entry from freelist, bump nentries */
1323 hctl->freeList[freelist_idx].freeList = newElement->link;
1324 hctl->freeList[freelist_idx].nentries++;
1325
1326 if (IS_PARTITIONED(hctl))
1327 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1328
1329 return newElement;
1330}
1331
1332/*
1333 * hash_get_num_entries -- get the number of entries in a hashtable
1334 */
1335int64
1337{
1338 int i;
1339 int64 sum = hashp->hctl->freeList[0].nentries;
1340
1341 /*
1342 * We currently don't bother with acquiring the mutexes; it's only
1343 * sensible to call this function if you've got lock on all partitions of
1344 * the table.
1345 */
1346 if (IS_PARTITIONED(hashp->hctl))
1347 {
1348 for (i = 1; i < NUM_FREELISTS; i++)
1349 sum += hashp->hctl->freeList[i].nentries;
1350 }
1351
1352 return sum;
1353}
1354
1355/*
1356 * hash_seq_init/_search/_term
1357 * Sequentially search through hash table and return
1358 * all the elements one by one, return NULL when no more.
1359 *
1360 * hash_seq_term should be called if and only if the scan is abandoned before
1361 * completion; if hash_seq_search returns NULL then it has already done the
1362 * end-of-scan cleanup.
1363 *
1364 * NOTE: caller may delete the returned element before continuing the scan.
1365 * However, deleting any other element while the scan is in progress is
1366 * UNDEFINED (it might be the one that curIndex is pointing at!). Also,
1367 * if elements are added to the table while the scan is in progress, it is
1368 * unspecified whether they will be visited by the scan or not.
1369 *
1370 * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
1371 * worry about hash_seq_term cleanup, if the hashtable is first locked against
1372 * further insertions by calling hash_freeze.
1373 *
1374 * NOTE: to use this with a partitioned hashtable, caller had better hold
1375 * at least shared lock on all partitions of the table throughout the scan!
1376 * We can cope with insertions or deletions by our own backend, but *not*
1377 * with concurrent insertions or deletions by another.
1378 */
1379void
1381{
1382 status->hashp = hashp;
1383 status->curBucket = 0;
1384 status->curEntry = NULL;
1385 status->hasHashvalue = false;
1386 if (!hashp->frozen)
1387 register_seq_scan(hashp);
1388}
1389
1390/*
1391 * Same as above but scan by the given hash value.
1392 * See also hash_seq_search().
1393 *
1394 * NOTE: the default hash function doesn't match syscache hash function.
1395 * Thus, if you're going to use this function in syscache callback, make sure
1396 * you're using custom hash function. See relatt_cache_syshash()
1397 * for example.
1398 */
1399void
1401 uint32 hashvalue)
1402{
1403 HASHBUCKET *bucketPtr;
1404
1405 hash_seq_init(status, hashp);
1406
1407 status->hasHashvalue = true;
1408 status->hashvalue = hashvalue;
1409
1410 status->curBucket = hash_initial_lookup(hashp, hashvalue, &bucketPtr);
1411 status->curEntry = *bucketPtr;
1412}
1413
1414void *
1416{
1417 HTAB *hashp;
1418 HASHHDR *hctl;
1419 uint32 max_bucket;
1420 int64 ssize;
1421 int64 segment_num;
1422 int64 segment_ndx;
1423 HASHSEGMENT segp;
1424 uint32 curBucket;
1425 HASHELEMENT *curElem;
1426
1427 if (status->hasHashvalue)
1428 {
1429 /*
1430 * Scan entries only in the current bucket because only this bucket
1431 * can contain entries with the given hash value.
1432 */
1433 while ((curElem = status->curEntry) != NULL)
1434 {
1435 status->curEntry = curElem->link;
1436 if (status->hashvalue != curElem->hashvalue)
1437 continue;
1438 return (void *) ELEMENTKEY(curElem);
1439 }
1440
1441 hash_seq_term(status);
1442 return NULL;
1443 }
1444
1445 if ((curElem = status->curEntry) != NULL)
1446 {
1447 /* Continuing scan of curBucket... */
1448 status->curEntry = curElem->link;
1449 if (status->curEntry == NULL) /* end of this bucket */
1450 ++status->curBucket;
1451 return ELEMENTKEY(curElem);
1452 }
1453
1454 /*
1455 * Search for next nonempty bucket starting at curBucket.
1456 */
1457 curBucket = status->curBucket;
1458 hashp = status->hashp;
1459 hctl = hashp->hctl;
1460 ssize = hashp->ssize;
1461 max_bucket = hctl->max_bucket;
1462
1463 if (curBucket > max_bucket)
1464 {
1465 hash_seq_term(status);
1466 return NULL; /* search is done */
1467 }
1468
1469 /*
1470 * first find the right segment in the table directory.
1471 */
1472 segment_num = curBucket >> hashp->sshift;
1473 segment_ndx = MOD(curBucket, ssize);
1474
1475 segp = hashp->dir[segment_num];
1476
1477 /*
1478 * Pick up the first item in this bucket's chain. If chain is not empty
1479 * we can begin searching it. Otherwise we have to advance to find the
1480 * next nonempty bucket. We try to optimize that case since searching a
1481 * near-empty hashtable has to iterate this loop a lot.
1482 */
1483 while ((curElem = segp[segment_ndx]) == NULL)
1484 {
1485 /* empty bucket, advance to next */
1486 if (++curBucket > max_bucket)
1487 {
1488 status->curBucket = curBucket;
1489 hash_seq_term(status);
1490 return NULL; /* search is done */
1491 }
1492 if (++segment_ndx >= ssize)
1493 {
1494 segment_num++;
1495 segment_ndx = 0;
1496 segp = hashp->dir[segment_num];
1497 }
1498 }
1499
1500 /* Begin scan of curBucket... */
1501 status->curEntry = curElem->link;
1502 if (status->curEntry == NULL) /* end of this bucket */
1503 ++curBucket;
1504 status->curBucket = curBucket;
1505 return ELEMENTKEY(curElem);
1506}
1507
1508void
1510{
1511 if (!status->hashp->frozen)
1512 deregister_seq_scan(status->hashp);
1513}
1514
1515/*
1516 * hash_freeze
1517 * Freeze a hashtable against future insertions (deletions are
1518 * still allowed)
1519 *
1520 * The reason for doing this is that by preventing any more bucket splits,
1521 * we no longer need to worry about registering hash_seq_search scans,
1522 * and thus caller need not be careful about ensuring hash_seq_term gets
1523 * called at the right times.
1524 *
1525 * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
1526 * with active scans (since hash_seq_term would then do the wrong thing).
1527 */
1528void
1530{
1531 if (hashp->isshared)
1532 elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);
1533 if (!hashp->frozen && has_seq_scans(hashp))
1534 elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",
1535 hashp->tabname);
1536 hashp->frozen = true;
1537}
1538
1539
1540/********************************* UTILITIES ************************/
1541
1542/*
1543 * Expand the table by adding one more hash bucket.
1544 */
1545static bool
1547{
1548 HASHHDR *hctl = hashp->hctl;
1549 HASHSEGMENT old_seg,
1550 new_seg;
1551 int64 old_bucket,
1552 new_bucket;
1553 int64 new_segnum,
1554 new_segndx;
1555 int64 old_segnum,
1556 old_segndx;
1557 HASHBUCKET *oldlink,
1558 *newlink;
1559 HASHBUCKET currElement,
1560 nextElement;
1561
1562 Assert(!IS_PARTITIONED(hctl));
1563
1564#ifdef HASH_STATISTICS
1565 hctl->expansions++;
1566#endif
1567
1568 new_bucket = hctl->max_bucket + 1;
1569 new_segnum = new_bucket >> hashp->sshift;
1570 new_segndx = MOD(new_bucket, hashp->ssize);
1571
1572 if (new_segnum >= hctl->nsegs)
1573 {
1574 /* Allocate new segment if necessary -- could fail if dir full */
1575 if (new_segnum >= hctl->dsize)
1576 if (!dir_realloc(hashp))
1577 return false;
1578 if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
1579 return false;
1580 hctl->nsegs++;
1581 }
1582
1583 /* OK, we created a new bucket */
1584 hctl->max_bucket++;
1585
1586 /*
1587 * *Before* changing masks, find old bucket corresponding to same hash
1588 * values; values in that bucket may need to be relocated to new bucket.
1589 * Note that new_bucket is certainly larger than low_mask at this point,
1590 * so we can skip the first step of the regular hash mask calc.
1591 */
1592 old_bucket = (new_bucket & hctl->low_mask);
1593
1594 /*
1595 * If we crossed a power of 2, readjust masks.
1596 */
1597 if ((uint32) new_bucket > hctl->high_mask)
1598 {
1599 hctl->low_mask = hctl->high_mask;
1600 hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
1601 }
1602
1603 /*
1604 * Relocate records to the new bucket. NOTE: because of the way the hash
1605 * masking is done in calc_bucket, only one old bucket can need to be
1606 * split at this point. With a different way of reducing the hash value,
1607 * that might not be true!
1608 */
1609 old_segnum = old_bucket >> hashp->sshift;
1610 old_segndx = MOD(old_bucket, hashp->ssize);
1611
1612 old_seg = hashp->dir[old_segnum];
1613 new_seg = hashp->dir[new_segnum];
1614
1615 oldlink = &old_seg[old_segndx];
1616 newlink = &new_seg[new_segndx];
1617
1618 for (currElement = *oldlink;
1619 currElement != NULL;
1620 currElement = nextElement)
1621 {
1622 nextElement = currElement->link;
1623 if ((int64) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
1624 {
1625 *oldlink = currElement;
1626 oldlink = &currElement->link;
1627 }
1628 else
1629 {
1630 *newlink = currElement;
1631 newlink = &currElement->link;
1632 }
1633 }
1634 /* don't forget to terminate the rebuilt hash chains... */
1635 *oldlink = NULL;
1636 *newlink = NULL;
1637
1638 return true;
1639}
1640
1641
1642static bool
1644{
1645 HASHSEGMENT *p;
1646 HASHSEGMENT *old_p;
1647 int64 new_dsize;
1648 int64 old_dirsize;
1649 int64 new_dirsize;
1650
1651 if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
1652 return false;
1653
1654 /* Reallocate directory */
1655 new_dsize = hashp->hctl->dsize << 1;
1656 old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
1657 new_dirsize = new_dsize * sizeof(HASHSEGMENT);
1658
1659 old_p = hashp->dir;
1660 CurrentDynaHashCxt = hashp->hcxt;
1661 p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
1662
1663 if (p != NULL)
1664 {
1665 memcpy(p, old_p, old_dirsize);
1666 MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
1667 hashp->dir = p;
1668 hashp->hctl->dsize = new_dsize;
1669
1670 /* XXX assume the allocator is palloc, so we know how to free */
1671 Assert(hashp->alloc == DynaHashAlloc);
1672 pfree(old_p);
1673
1674 return true;
1675 }
1676
1677 return false;
1678}
1679
1680
1681static HASHSEGMENT
1683{
1684 HASHSEGMENT segp;
1685
1686 CurrentDynaHashCxt = hashp->hcxt;
1687 segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
1688
1689 if (!segp)
1690 return NULL;
1691
1692 MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
1693
1694 return segp;
1695}
1696
1697/*
1698 * allocate some new elements and link them into the indicated free list
1699 */
1700static bool
1701element_alloc(HTAB *hashp, int nelem, int freelist_idx)
1702{
1703 HASHHDR *hctl = hashp->hctl;
1704 Size elementSize;
1705 Size requestSize;
1706 char *allocedBlock;
1707 HASHELEMENT *firstElement;
1708 HASHELEMENT *tmpElement;
1709 HASHELEMENT *prevElement;
1710 int i;
1711
1712 if (hctl->isfixed)
1713 return false;
1714
1715 /* Each element has a HASHELEMENT header plus user data. */
1716 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
1717
1718 requestSize = nelem * elementSize;
1719
1720 /* Add space for slist_node list link if we need one. */
1721#ifdef USE_VALGRIND
1722 if (!hashp->isshared)
1723 requestSize += MAXALIGN(sizeof(slist_node));
1724#endif
1725
1726 /* Allocate the memory. */
1727 CurrentDynaHashCxt = hashp->hcxt;
1728 allocedBlock = hashp->alloc(requestSize);
1729
1730 if (!allocedBlock)
1731 return false;
1732
1733 /*
1734 * If USE_VALGRIND, each allocated block of elements of a non-shared
1735 * hashtable is chained into a list, so that Valgrind won't think it's
1736 * been leaked.
1737 */
1738#ifdef USE_VALGRIND
1739 if (hashp->isshared)
1740 firstElement = (HASHELEMENT *) allocedBlock;
1741 else
1742 {
1743 slist_push_head(&hashp->element_blocks, (slist_node *) allocedBlock);
1744 firstElement = (HASHELEMENT *) (allocedBlock + MAXALIGN(sizeof(slist_node)));
1745 }
1746#else
1747 firstElement = (HASHELEMENT *) allocedBlock;
1748#endif
1749
1750 /* prepare to link all the new entries into the freelist */
1751 prevElement = NULL;
1752 tmpElement = firstElement;
1753 for (i = 0; i < nelem; i++)
1754 {
1755 tmpElement->link = prevElement;
1756 prevElement = tmpElement;
1757 tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
1758 }
1759
1760 /* if partitioned, must lock to touch freeList */
1761 if (IS_PARTITIONED(hctl))
1762 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1763
1764 /* freelist could be nonempty if two backends did this concurrently */
1765 firstElement->link = hctl->freeList[freelist_idx].freeList;
1766 hctl->freeList[freelist_idx].freeList = prevElement;
1767
1768 if (IS_PARTITIONED(hctl))
1769 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1770
1771 return true;
1772}
1773
1774/*
1775 * Do initial lookup of a bucket for the given hash value, retrieving its
1776 * bucket number and its hash bucket.
1777 */
1778static inline uint32
1779hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
1780{
1781 HASHHDR *hctl = hashp->hctl;
1782 HASHSEGMENT segp;
1783 int64 segment_num;
1784 int64 segment_ndx;
1785 uint32 bucket;
1786
1787 bucket = calc_bucket(hctl, hashvalue);
1788
1789 segment_num = bucket >> hashp->sshift;
1790 segment_ndx = MOD(bucket, hashp->ssize);
1791
1792 segp = hashp->dir[segment_num];
1793
1794 if (segp == NULL)
1795 hash_corrupted(hashp);
1796
1797 *bucketptr = &segp[segment_ndx];
1798 return bucket;
1799}
1800
1801/* complain when we have detected a corrupted hashtable */
1802static void
1804{
1805 /*
1806 * If the corruption is in a shared hashtable, we'd better force a
1807 * systemwide restart. Otherwise, just shut down this one backend.
1808 */
1809 if (hashp->isshared)
1810 elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
1811 else
1812 elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
1813}
1814
1815/* calculate ceil(log base 2) of num */
1816static int
1818{
1819 /*
1820 * guard against too-large input, which would be invalid for
1821 * pg_ceil_log2_*()
1822 */
1823 if (num > PG_INT64_MAX / 2)
1824 num = PG_INT64_MAX / 2;
1825
1826 return pg_ceil_log2_64(num);
1827}
1828
1829/* calculate first power of 2 >= num, bounded to what will fit in a int64 */
1830static int64
1832{
1833 /* my_log2's internal range check is sufficient */
1834 return 1L << my_log2(num);
1835}
1836
1837/* calculate first power of 2 >= num, bounded to what will fit in an int */
1838static int
1840{
1841 if (num > INT_MAX / 2)
1842 num = INT_MAX / 2;
1843 return 1 << my_log2(num);
1844}
1845
1846
1847/************************* SEQ SCAN TRACKING ************************/
1848
1849/*
1850 * We track active hash_seq_search scans here. The need for this mechanism
1851 * comes from the fact that a scan will get confused if a bucket split occurs
1852 * while it's in progress: it might visit entries twice, or even miss some
1853 * entirely (if it's partway through the same bucket that splits). Hence
1854 * we want to inhibit bucket splits if there are any active scans on the
1855 * table being inserted into. This is a fairly rare case in current usage,
1856 * so just postponing the split until the next insertion seems sufficient.
1857 *
1858 * Given present usages of the function, only a few scans are likely to be
1859 * open concurrently; so a finite-size stack of open scans seems sufficient,
1860 * and we don't worry that linear search is too slow. Note that we do
1861 * allow multiple scans of the same hashtable to be open concurrently.
1862 *
1863 * This mechanism can support concurrent scan and insertion in a shared
1864 * hashtable if it's the same backend doing both. It would fail otherwise,
1865 * but locking reasons seem to preclude any such scenario anyway, so we don't
1866 * worry.
1867 *
1868 * This arrangement is reasonably robust if a transient hashtable is deleted
1869 * without notifying us. The absolute worst case is we might inhibit splits
1870 * in another table created later at exactly the same address. We will give
1871 * a warning at transaction end for reference leaks, so any bugs leading to
1872 * lack of notification should be easy to catch.
1873 */
1874
1875#define MAX_SEQ_SCANS 100
1876
1877static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */
1878static int seq_scan_level[MAX_SEQ_SCANS]; /* subtransaction nest level */
1879static int num_seq_scans = 0;
1880
1881
1882/* Register a table as having an active hash_seq_search scan */
1883static void
1885{
1887 elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",
1888 hashp->tabname);
1891 num_seq_scans++;
1892}
1893
1894/* Deregister an active scan */
1895static void
1897{
1898 int i;
1899
1900 /* Search backward since it's most likely at the stack top */
1901 for (i = num_seq_scans - 1; i >= 0; i--)
1902 {
1903 if (seq_scan_tables[i] == hashp)
1904 {
1907 num_seq_scans--;
1908 return;
1909 }
1910 }
1911 elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
1912 hashp->tabname);
1913}
1914
1915/* Check if a table has any active scan */
1916static bool
1918{
1919 int i;
1920
1921 for (i = 0; i < num_seq_scans; i++)
1922 {
1923 if (seq_scan_tables[i] == hashp)
1924 return true;
1925 }
1926 return false;
1927}
1928
1929/* Clean up any open scans at end of transaction */
1930void
1932{
1933 /*
1934 * During abort cleanup, open scans are expected; just silently clean 'em
1935 * out. An open scan at commit means someone forgot a hash_seq_term()
1936 * call, so complain.
1937 *
1938 * Note: it's tempting to try to print the tabname here, but refrain for
1939 * fear of touching deallocated memory. This isn't a user-facing message
1940 * anyway, so it needn't be pretty.
1941 */
1942 if (isCommit)
1943 {
1944 int i;
1945
1946 for (i = 0; i < num_seq_scans; i++)
1947 {
1948 elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1950 }
1951 }
1952 num_seq_scans = 0;
1953}
1954
1955/* Clean up any open scans at end of subtransaction */
1956void
1957AtEOSubXact_HashTables(bool isCommit, int nestDepth)
1958{
1959 int i;
1960
1961 /*
1962 * Search backward to make cleanup easy. Note we must check all entries,
1963 * not only those at the end of the array, because deletion technique
1964 * doesn't keep them in order.
1965 */
1966 for (i = num_seq_scans - 1; i >= 0; i--)
1967 {
1968 if (seq_scan_level[i] >= nestDepth)
1969 {
1970 if (isCommit)
1971 elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1975 num_seq_scans--;
1976 }
1977 }
1978}
#define MAXALIGN(LEN)
Definition: c.h:811
#define pg_noreturn
Definition: c.h:164
#define INT64_FORMAT
Definition: c.h:557
int64_t int64
Definition: c.h:536
#define UINT64_FORMAT
Definition: c.h:558
#define PG_INT64_MAX
Definition: c.h:598
uint64_t uint64
Definition: c.h:540
uint32_t uint32
Definition: c.h:539
#define MemSet(start, val, len)
Definition: c.h:1020
void(* pg_funcptr_t)(void)
Definition: c.h:461
size_t Size
Definition: c.h:611
static HTAB * seq_scan_tables[MAX_SEQ_SCANS]
Definition: dynahash.c:1877
static int seq_scan_level[MAX_SEQ_SCANS]
Definition: dynahash.c:1878
void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp, uint32 hashvalue)
Definition: dynahash.c:1400
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:952
#define ELEMENT_FROM_KEY(key)
Definition: dynahash.c:260
#define DEF_DIRSIZE
Definition: dynahash.c:125
static void * DynaHashAlloc(Size size)
Definition: dynahash.c:297
static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx)
Definition: dynahash.c:1701
void AtEOXact_HashTables(bool isCommit)
Definition: dynahash.c:1931
Size hash_estimate_size(int64 num_entries, Size entrysize)
Definition: dynahash.c:783
static HASHSEGMENT seg_alloc(HTAB *hashp)
Definition: dynahash.c:1682
#define MAX_SEQ_SCANS
Definition: dynahash.c:1875
static MemoryContext CurrentDynaHashCxt
Definition: dynahash.c:294
static int next_pow2_int(int64 num)
Definition: dynahash.c:1839
static int choose_nelem_alloc(Size entrysize)
Definition: dynahash.c:667
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:358
Size hash_get_shared_size(HASHCTL *info, int flags)
Definition: dynahash.c:854
static void register_seq_scan(HTAB *hashp)
Definition: dynahash.c:1884
#define MOD(x, y)
Definition: dynahash.c:266
#define IS_PARTITIONED(hctl)
Definition: dynahash.c:212
#define DEF_SEGSIZE_SHIFT
Definition: dynahash.c:124
void AtEOSubXact_HashTables(bool isCommit, int nestDepth)
Definition: dynahash.c:1957
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx)
Definition: dynahash.c:1251
#define NUM_FREELISTS
Definition: dynahash.c:128
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:865
static int string_compare(const char *key1, const char *key2, Size keysize)
Definition: dynahash.c:313
int64 hash_select_dirsize(int64 num_entries)
Definition: dynahash.c:830
void * hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, uint32 hashvalue, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:965
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1415
static bool expand_table(HTAB *hashp)
Definition: dynahash.c:1546
static void hdefault(HTAB *hashp)
Definition: dynahash.c:638
static void deregister_seq_scan(HTAB *hashp)
Definition: dynahash.c:1896
static int my_log2(int64 num)
Definition: dynahash.c:1817
#define ELEMENTKEY(helem)
Definition: dynahash.c:255
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1509
int64 hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1336
#define DEF_SEGSIZE
Definition: dynahash.c:123
static int num_seq_scans
Definition: dynahash.c:1879
static int64 next_pow2_int64(int64 num)
Definition: dynahash.c:1831
#define FREELIST_IDX(hctl, hashcode)
Definition: dynahash.c:214
void hash_stats(const char *caller, HTAB *hashp)
Definition: dynahash.c:884
static bool init_htab(HTAB *hashp, int64 nelem)
Definition: dynahash.c:700
static pg_noreturn void hash_corrupted(HTAB *hashp)
Definition: dynahash.c:1803
void hash_freeze(HTAB *hashp)
Definition: dynahash.c:1529
static bool dir_realloc(HTAB *hashp)
Definition: dynahash.c:1643
bool hash_update_hash_key(HTAB *hashp, void *existingEntry, const void *newKeyPtr)
Definition: dynahash.c:1140
static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
Definition: dynahash.c:1779
HASHELEMENT * HASHBUCKET
Definition: dynahash.c:131
uint32 get_hash_value(HTAB *hashp, const void *keyPtr)
Definition: dynahash.c:908
static uint32 calc_bucket(HASHHDR *hctl, uint32 hash_val)
Definition: dynahash.c:915
static bool has_seq_scans(HTAB *hashp)
Definition: dynahash.c:1917
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1380
HASHBUCKET * HASHSEGMENT
Definition: dynahash.c:134
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
#define DEBUG4
Definition: elog.h:27
#define MCXT_ALLOC_NO_OOM
Definition: fe_memutils.h:29
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
uint32 uint32_hash(const void *key, Size keysize)
Definition: hashfn.c:688
uint32 string_hash(const void *key, Size keysize)
Definition: hashfn.c:660
Assert(PointerIsAligned(start, uint64))
#define HASH_KEYCOPY
Definition: hsearch.h:100
void *(* HashAllocFunc)(Size request)
Definition: hsearch.h:44
#define HASH_STRINGS
Definition: hsearch.h:96
int(* HashCompareFunc)(const void *key1, const void *key2, Size keysize)
Definition: hsearch.h:29
HASHACTION
Definition: hsearch.h:112
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
@ HASH_ENTER_NULL
Definition: hsearch.h:116
#define HASH_CONTEXT
Definition: hsearch.h:102
#define NO_MAX_DSIZE
Definition: hsearch.h:108
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_ALLOC
Definition: hsearch.h:101
#define HASH_DIRSIZE
Definition: hsearch.h:94
uint32(* HashValueFunc)(const void *key, Size keysize)
Definition: hsearch.h:21
#define HASH_SEGMENT
Definition: hsearch.h:93
#define HASH_ATTACH
Definition: hsearch.h:104
#define HASH_COMPARE
Definition: hsearch.h:99
struct HASHHDR HASHHDR
Definition: hsearch.h:58
#define HASH_FUNCTION
Definition: hsearch.h:98
#define HASH_BLOBS
Definition: hsearch.h:97
#define HASH_SHARED_MEM
Definition: hsearch.h:103
#define HASH_FIXED_SIZE
Definition: hsearch.h:105
#define HASH_PARTITION
Definition: hsearch.h:92
void *(* HashCopyFunc)(void *dest, const void *src, Size keysize)
Definition: hsearch.h:37
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:1006
int i
Definition: isn.c:77
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1229
void pfree(void *pointer)
Definition: mcxt.c:1594
MemoryContext TopMemoryContext
Definition: mcxt.c:166
void * MemoryContextAllocExtended(MemoryContext context, Size size, int flags)
Definition: mcxt.c:1286
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:469
void MemoryContextSetIdentifier(MemoryContext context, const char *id)
Definition: mcxt.c:658
#define MemoryContextIsValid(context)
Definition: memnodes.h:145
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static uint64 pg_ceil_log2_64(uint64 num)
Definition: pg_bitutils.h:271
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
slock_t mutex
Definition: dynahash.c:155
int64 nentries
Definition: dynahash.c:156
HASHELEMENT * freeList
Definition: dynahash.c:157
HashAllocFunc alloc
Definition: hsearch.h:84
Size keysize
Definition: hsearch.h:75
int64 max_dsize
Definition: hsearch.h:73
HashValueFunc hash
Definition: hsearch.h:78
Size entrysize
Definition: hsearch.h:76
HashCompareFunc match
Definition: hsearch.h:80
HASHHDR * hctl
Definition: hsearch.h:88
int64 ssize
Definition: hsearch.h:70
MemoryContext hcxt
Definition: hsearch.h:86
int64 num_partitions
Definition: hsearch.h:68
int64 dsize
Definition: hsearch.h:72
HashCopyFunc keycopy
Definition: hsearch.h:82
struct HASHELEMENT * link
Definition: hsearch.h:53
uint32 hashvalue
Definition: hsearch.h:54
uint32 high_mask
Definition: dynahash.c:187
FreeListData freeList[NUM_FREELISTS]
Definition: dynahash.c:180
Size entrysize
Definition: dynahash.c:192
uint32 max_bucket
Definition: dynahash.c:186
int64 dsize
Definition: dynahash.c:184
int64 max_dsize
Definition: dynahash.c:194
int64 ssize
Definition: dynahash.c:195
Size keysize
Definition: dynahash.c:191
bool isfixed
Definition: dynahash.c:198
int nelem_alloc
Definition: dynahash.c:197
int64 num_partitions
Definition: dynahash.c:193
uint32 low_mask
Definition: dynahash.c:188
int64 nsegs
Definition: dynahash.c:185
int sshift
Definition: dynahash.c:196
uint32 hashvalue
Definition: hsearch.h:126
HASHELEMENT * curEntry
Definition: hsearch.h:124
uint32 curBucket
Definition: hsearch.h:123
HTAB * hashp
Definition: hsearch.h:122
bool hasHashvalue
Definition: hsearch.h:125
Definition: dynahash.c:222
bool isshared
Definition: dynahash.c:231
HashCompareFunc match
Definition: dynahash.c:226
char * tabname
Definition: dynahash.c:230
HASHHDR * hctl
Definition: dynahash.c:223
MemoryContext hcxt
Definition: dynahash.c:229
HashAllocFunc alloc
Definition: dynahash.c:228
HashValueFunc hash
Definition: dynahash.c:225
int64 ssize
Definition: dynahash.c:238
HASHSEGMENT * dir
Definition: dynahash.c:224
Size keysize
Definition: dynahash.c:237
int sshift
Definition: dynahash.c:239
HashCopyFunc keycopy
Definition: dynahash.c:227
bool frozen
Definition: dynahash.c:234
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:929