Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
pg_buffercache_pages.c File Reference
#include "postgres.h"
#include "access/htup_details.h"
#include "access/relation.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "port/pg_numa.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
Include dependency graph for pg_buffercache_pages.c:

Go to the source code of this file.

Data Structures

struct  BufferCachePagesRec
 
struct  BufferCachePagesContext
 
struct  BufferCacheNumaRec
 
struct  BufferCacheNumaContext
 

Macros

#define NUM_BUFFERCACHE_PAGES_MIN_ELEM   8
 
#define NUM_BUFFERCACHE_PAGES_ELEM   9
 
#define NUM_BUFFERCACHE_SUMMARY_ELEM   5
 
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM   4
 
#define NUM_BUFFERCACHE_EVICT_ELEM   2
 
#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM   3
 
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM   3
 
#define NUM_BUFFERCACHE_NUMA_ELEM   3
 

Functions

 PG_MODULE_MAGIC_EXT (.name="pg_buffercache",.version=PG_VERSION)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_pages)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_numa_pages)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_summary)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_usage_counts)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_evict)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_evict_relation)
 
 PG_FUNCTION_INFO_V1 (pg_buffercache_evict_all)
 
Datum pg_buffercache_pages (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_numa_pages (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_summary (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_usage_counts (PG_FUNCTION_ARGS)
 
static void pg_buffercache_superuser_check (char *func_name)
 
Datum pg_buffercache_evict (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_evict_relation (PG_FUNCTION_ARGS)
 
Datum pg_buffercache_evict_all (PG_FUNCTION_ARGS)
 

Variables

static bool firstNumaTouch = true
 

Macro Definition Documentation

◆ NUM_BUFFERCACHE_EVICT_ALL_ELEM

#define NUM_BUFFERCACHE_EVICT_ALL_ELEM   3

Definition at line 27 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_EVICT_ELEM

#define NUM_BUFFERCACHE_EVICT_ELEM   2

Definition at line 25 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_EVICT_RELATION_ELEM

#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM   3

Definition at line 26 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_NUMA_ELEM

#define NUM_BUFFERCACHE_NUMA_ELEM   3

Definition at line 29 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_PAGES_ELEM

#define NUM_BUFFERCACHE_PAGES_ELEM   9

Definition at line 22 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_PAGES_MIN_ELEM

#define NUM_BUFFERCACHE_PAGES_MIN_ELEM   8

Definition at line 21 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_SUMMARY_ELEM

#define NUM_BUFFERCACHE_SUMMARY_ELEM   5

Definition at line 23 of file pg_buffercache_pages.c.

◆ NUM_BUFFERCACHE_USAGE_COUNTS_ELEM

#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM   4

Definition at line 24 of file pg_buffercache_pages.c.

Function Documentation

◆ pg_buffercache_evict()

Datum pg_buffercache_evict ( PG_FUNCTION_ARGS  )

Definition at line 669 of file pg_buffercache_pages.c.

670{
671 Datum result;
672 TupleDesc tupledesc;
673 HeapTuple tuple;
675 bool nulls[NUM_BUFFERCACHE_EVICT_ELEM] = {0};
676
678 bool buffer_flushed;
679
680 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
681 elog(ERROR, "return type must be a row type");
682
683 pg_buffercache_superuser_check("pg_buffercache_evict");
684
685 if (buf < 1 || buf > NBuffers)
686 elog(ERROR, "bad buffer ID: %d", buf);
687
688 values[0] = BoolGetDatum(EvictUnpinnedBuffer(buf, &buffer_flushed));
689 values[1] = BoolGetDatum(buffer_flushed);
690
691 tuple = heap_form_tuple(tupledesc, values, nulls);
692 result = HeapTupleGetDatum(tuple);
693
694 PG_RETURN_DATUM(result);
695}
static Datum values[MAXATTR]
Definition: bootstrap.c:153
int Buffer
Definition: buf.h:23
bool EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed)
Definition: bufmgr.c:6616
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
int NBuffers
Definition: globals.c:142
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
static void pg_buffercache_superuser_check(char *func_name)
#define NUM_BUFFERCACHE_EVICT_ELEM
static char * buf
Definition: pg_test_fsync.c:72
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
uint64_t Datum
Definition: postgres.h:70

References BoolGetDatum(), buf, elog, ERROR, EvictUnpinnedBuffer(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), NBuffers, NUM_BUFFERCACHE_EVICT_ELEM, pg_buffercache_superuser_check(), PG_GETARG_INT32, PG_RETURN_DATUM, TYPEFUNC_COMPOSITE, and values.

◆ pg_buffercache_evict_all()

Datum pg_buffercache_evict_all ( PG_FUNCTION_ARGS  )

Definition at line 751 of file pg_buffercache_pages.c.

752{
753 Datum result;
754 TupleDesc tupledesc;
755 HeapTuple tuple;
757 bool nulls[NUM_BUFFERCACHE_EVICT_ALL_ELEM] = {0};
758
759 int32 buffers_evicted = 0;
760 int32 buffers_flushed = 0;
761 int32 buffers_skipped = 0;
762
763 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
764 elog(ERROR, "return type must be a row type");
765
766 pg_buffercache_superuser_check("pg_buffercache_evict_all");
767
768 EvictAllUnpinnedBuffers(&buffers_evicted, &buffers_flushed,
769 &buffers_skipped);
770
771 values[0] = Int32GetDatum(buffers_evicted);
772 values[1] = Int32GetDatum(buffers_flushed);
773 values[2] = Int32GetDatum(buffers_skipped);
774
775 tuple = heap_form_tuple(tupledesc, values, nulls);
776 result = HeapTupleGetDatum(tuple);
777
778 PG_RETURN_DATUM(result);
779}
void EvictAllUnpinnedBuffers(int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6645
int32_t int32
Definition: c.h:534
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222

References elog, ERROR, EvictAllUnpinnedBuffers(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), Int32GetDatum(), NUM_BUFFERCACHE_EVICT_ALL_ELEM, pg_buffercache_superuser_check(), PG_RETURN_DATUM, TYPEFUNC_COMPOSITE, and values.

◆ pg_buffercache_evict_relation()

Datum pg_buffercache_evict_relation ( PG_FUNCTION_ARGS  )

Definition at line 701 of file pg_buffercache_pages.c.

702{
703 Datum result;
704 TupleDesc tupledesc;
705 HeapTuple tuple;
707 bool nulls[NUM_BUFFERCACHE_EVICT_RELATION_ELEM] = {0};
708
709 Oid relOid;
710 Relation rel;
711
712 int32 buffers_evicted = 0;
713 int32 buffers_flushed = 0;
714 int32 buffers_skipped = 0;
715
716 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
717 elog(ERROR, "return type must be a row type");
718
719 pg_buffercache_superuser_check("pg_buffercache_evict_relation");
720
721 relOid = PG_GETARG_OID(0);
722
723 rel = relation_open(relOid, AccessShareLock);
724
727 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
728 errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
729 "pg_buffercache_evict_relation")));
730
731 EvictRelUnpinnedBuffers(rel, &buffers_evicted, &buffers_flushed,
732 &buffers_skipped);
733
735
736 values[0] = Int32GetDatum(buffers_evicted);
737 values[1] = Int32GetDatum(buffers_flushed);
738 values[2] = Int32GetDatum(buffers_skipped);
739
740 tuple = heap_form_tuple(tupledesc, values, nulls);
741 result = HeapTupleGetDatum(tuple);
742
743 PG_RETURN_DATUM(result);
744}
void EvictRelUnpinnedBuffers(Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6693
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ereport(elevel,...)
Definition: elog.h:150
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define AccessShareLock
Definition: lockdefs.h:36
#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM
unsigned int Oid
Definition: postgres_ext.h:32
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:646
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47

References AccessShareLock, elog, ereport, errcode(), errmsg(), ERROR, EvictRelUnpinnedBuffers(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), Int32GetDatum(), NUM_BUFFERCACHE_EVICT_RELATION_ELEM, pg_buffercache_superuser_check(), PG_GETARG_OID, PG_RETURN_DATUM, relation_close(), relation_open(), RelationUsesLocalBuffers, TYPEFUNC_COMPOSITE, and values.

◆ pg_buffercache_numa_pages()

Datum pg_buffercache_numa_pages ( PG_FUNCTION_ARGS  )

Definition at line 305 of file pg_buffercache_pages.c.

306{
307 FuncCallContext *funcctx;
308 MemoryContext oldcontext;
309 BufferCacheNumaContext *fctx; /* User function context. */
310 TupleDesc tupledesc;
311 TupleDesc expected_tupledesc;
312 HeapTuple tuple;
313 Datum result;
314
315 if (SRF_IS_FIRSTCALL())
316 {
317 int i,
318 idx;
319 Size os_page_size;
320 void **os_page_ptrs;
321 int *os_page_status;
322 uint64 os_page_count;
323 int pages_per_buffer;
324 int max_entries;
325 char *startptr,
326 *endptr;
327
328 if (pg_numa_init() == -1)
329 elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
330
331 /*
332 * The database block size and OS memory page size are unlikely to be
333 * the same. The block size is 1-32KB, the memory page size depends on
334 * platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
335 * there are also features like THP etc. Moreover, we don't quite know
336 * how the pages and buffers "align" in memory - the buffers may be
337 * shifted in some way, using more memory pages than necessary.
338 *
339 * So we need to be careful about mapping buffers to memory pages. We
340 * calculate the maximum number of pages a buffer might use, so that
341 * we allocate enough space for the entries. And then we count the
342 * actual number of entries as we scan the buffers.
343 *
344 * This information is needed before calling move_pages() for NUMA
345 * node id inquiry.
346 */
347 os_page_size = pg_get_shmem_pagesize();
348
349 /*
350 * The pages and block size is expected to be 2^k, so one divides the
351 * other (we don't know in which direction). This does not say
352 * anything about relative alignment of pages/buffers.
353 */
354 Assert((os_page_size % BLCKSZ == 0) || (BLCKSZ % os_page_size == 0));
355
356 /*
357 * How many addresses we are going to query? Simply get the page for
358 * the first buffer, and first page after the last buffer, and count
359 * the pages from that.
360 */
361 startptr = (char *) TYPEALIGN_DOWN(os_page_size,
362 BufferGetBlock(1));
363 endptr = (char *) TYPEALIGN(os_page_size,
364 (char *) BufferGetBlock(NBuffers) + BLCKSZ);
365 os_page_count = (endptr - startptr) / os_page_size;
366
367 /* Used to determine the NUMA node for all OS pages at once */
368 os_page_ptrs = palloc0(sizeof(void *) * os_page_count);
369 os_page_status = palloc(sizeof(uint64) * os_page_count);
370
371 /* Fill pointers for all the memory pages. */
372 idx = 0;
373 for (char *ptr = startptr; ptr < endptr; ptr += os_page_size)
374 {
375 os_page_ptrs[idx++] = ptr;
376
377 /* Only need to touch memory once per backend process lifetime */
378 if (firstNumaTouch)
380 }
381
382 Assert(idx == os_page_count);
383
384 elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " "
385 "os_page_size=%zu", NBuffers, os_page_count, os_page_size);
386
387 /*
388 * If we ever get 0xff back from kernel inquiry, then we probably have
389 * bug in our buffers to OS page mapping code here.
390 */
391 memset(os_page_status, 0xff, sizeof(int) * os_page_count);
392
393 /* Query NUMA status for all the pointers */
394 if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
395 elog(ERROR, "failed NUMA pages inquiry: %m");
396
397 /* Initialize the multi-call context, load entries about buffers */
398
399 funcctx = SRF_FIRSTCALL_INIT();
400
401 /* Switch context when allocating stuff to be used in later calls */
402 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
403
404 /* Create a user function context for cross-call persistence */
406
407 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
408 elog(ERROR, "return type must be a row type");
409
410 if (expected_tupledesc->natts != NUM_BUFFERCACHE_NUMA_ELEM)
411 elog(ERROR, "incorrect number of output arguments");
412
413 /* Construct a tuple descriptor for the result rows. */
414 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
415 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
416 INT4OID, -1, 0);
417 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
418 INT8OID, -1, 0);
419 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
420 INT4OID, -1, 0);
421
422 fctx->tupdesc = BlessTupleDesc(tupledesc);
423
424 /*
425 * Each buffer needs at least one entry, but it might be offset in
426 * some way, and use one extra entry. So we allocate space for the
427 * maximum number of entries we might need, and then count the exact
428 * number as we're walking buffers. That way we can do it in one pass,
429 * without reallocating memory.
430 */
431 pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
432 max_entries = NBuffers * pages_per_buffer;
433
434 /* Allocate entries for BufferCachePagesRec records. */
435 fctx->record = (BufferCacheNumaRec *)
437 sizeof(BufferCacheNumaRec) * max_entries);
438
439 /* Return to original context when allocating transient memory */
440 MemoryContextSwitchTo(oldcontext);
441
442 if (firstNumaTouch)
443 elog(DEBUG1, "NUMA: page-faulting the buffercache for proper NUMA readouts");
444
445 /*
446 * Scan through all the buffers, saving the relevant fields in the
447 * fctx->record structure.
448 *
449 * We don't hold the partition locks, so we don't get a consistent
450 * snapshot across all buffers, but we do grab the buffer header
451 * locks, so the information of each buffer is self-consistent.
452 *
453 * This loop touches and stores addresses into os_page_ptrs[] as input
454 * to one big move_pages(2) inquiry system call. Basically we ask for
455 * all memory pages for NBuffers.
456 */
457 startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
458 idx = 0;
459 for (i = 0; i < NBuffers; i++)
460 {
461 char *buffptr = (char *) BufferGetBlock(i + 1);
462 BufferDesc *bufHdr;
463 uint32 buf_state;
464 uint32 bufferid;
465 int32 page_num;
466 char *startptr_buff,
467 *endptr_buff;
468
470
471 bufHdr = GetBufferDescriptor(i);
472
473 /* Lock each buffer header before inspecting. */
474 buf_state = LockBufHdr(bufHdr);
475 bufferid = BufferDescriptorGetBuffer(bufHdr);
476 UnlockBufHdr(bufHdr, buf_state);
477
478 /* start of the first page of this buffer */
479 startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
480
481 /* end of the buffer (no need to align to memory page) */
482 endptr_buff = buffptr + BLCKSZ;
483
484 Assert(startptr_buff < endptr_buff);
485
486 /* calculate ID of the first page for this buffer */
487 page_num = (startptr_buff - startptr) / os_page_size;
488
489 /* Add an entry for each OS page overlapping with this buffer. */
490 for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
491 {
492 fctx->record[idx].bufferid = bufferid;
493 fctx->record[idx].page_num = page_num;
494 fctx->record[idx].numa_node = os_page_status[page_num];
495
496 /* advance to the next entry/page */
497 ++idx;
498 ++page_num;
499 }
500 }
501
502 Assert((idx >= os_page_count) && (idx <= max_entries));
503
504 /* Set max calls and remember the user function context. */
505 funcctx->max_calls = idx;
506 funcctx->user_fctx = fctx;
507
508 /* Remember this backend touched the pages */
509 firstNumaTouch = false;
510 }
511
512 funcctx = SRF_PERCALL_SETUP();
513
514 /* Get the saved state */
515 fctx = funcctx->user_fctx;
516
517 if (funcctx->call_cntr < funcctx->max_calls)
518 {
519 uint32 i = funcctx->call_cntr;
521 bool nulls[NUM_BUFFERCACHE_NUMA_ELEM];
522
523 values[0] = Int32GetDatum(fctx->record[i].bufferid);
524 nulls[0] = false;
525
526 values[1] = Int64GetDatum(fctx->record[i].page_num);
527 nulls[1] = false;
528
530 nulls[2] = false;
531
532 /* Build and return the tuple. */
533 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
534 result = HeapTupleGetDatum(tuple);
535
536 SRF_RETURN_NEXT(funcctx, result);
537 }
538 else
539 SRF_RETURN_DONE(funcctx);
540}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
int16 AttrNumber
Definition: attnum.h:21
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
static BufferDesc * GetBufferDescriptor(uint32 id)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:6224
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:384
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:803
#define Max(x, y)
Definition: c.h:997
#define UINT64_FORMAT
Definition: c.h:557
uint64_t uint64
Definition: c.h:539
uint32_t uint32
Definition: c.h:538
size_t Size
Definition: c.h:610
#define TYPEALIGN_DOWN(ALIGNVAL, LEN)
Definition: c.h:815
#define DEBUG1
Definition: elog.h:30
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2260
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
Assert(PointerIsAligned(start, uint64))
int i
Definition: isn.c:77
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1703
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
static bool firstNumaTouch
#define NUM_BUFFERCACHE_NUMA_ELEM
#define pg_numa_touch_mem_if_required(ptr)
Definition: pg_numa.h:37
PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
Definition: pg_numa.c:120
PGDLLIMPORT int pg_numa_init(void)
Definition: pg_numa.c:113
static Datum Int64GetDatum(int64 X)
Definition: postgres.h:403
Size pg_get_shmem_pagesize(void)
Definition: shmem.c:740
BufferCacheNumaRec * record
void * user_fctx
Definition: funcapi.h:82
uint64 max_calls
Definition: funcapi.h:74
uint64 call_cntr
Definition: funcapi.h:65
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:182
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:842

References Assert(), BlessTupleDesc(), BufferDescriptorGetBuffer(), BufferGetBlock(), BufferCacheNumaRec::bufferid, FuncCallContext::call_cntr, CHECK_FOR_INTERRUPTS, CreateTemplateTupleDesc(), CurrentMemoryContext, DEBUG1, elog, ERROR, firstNumaTouch, get_call_result_type(), GetBufferDescriptor(), heap_form_tuple(), HeapTupleGetDatum(), i, idx(), Int32GetDatum(), Int64GetDatum(), LockBufHdr(), Max, FuncCallContext::max_calls, MemoryContextAllocHuge(), MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, TupleDescData::natts, NBuffers, NUM_BUFFERCACHE_NUMA_ELEM, BufferCacheNumaRec::numa_node, BufferCacheNumaRec::page_num, palloc(), palloc0(), pg_get_shmem_pagesize(), pg_numa_init(), pg_numa_query_pages(), pg_numa_touch_mem_if_required, BufferCacheNumaContext::record, SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, BufferCacheNumaContext::tupdesc, TupleDescInitEntry(), TYPEALIGN, TYPEALIGN_DOWN, TYPEFUNC_COMPOSITE, UINT64_FORMAT, UnlockBufHdr(), FuncCallContext::user_fctx, and values.

◆ pg_buffercache_pages()

Datum pg_buffercache_pages ( PG_FUNCTION_ARGS  )

Definition at line 110 of file pg_buffercache_pages.c.

111{
112 FuncCallContext *funcctx;
113 Datum result;
114 MemoryContext oldcontext;
115 BufferCachePagesContext *fctx; /* User function context. */
116 TupleDesc tupledesc;
117 TupleDesc expected_tupledesc;
118 HeapTuple tuple;
119
120 if (SRF_IS_FIRSTCALL())
121 {
122 int i;
123
124 funcctx = SRF_FIRSTCALL_INIT();
125
126 /* Switch context when allocating stuff to be used in later calls */
127 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
128
129 /* Create a user function context for cross-call persistence */
131
132 /*
133 * To smoothly support upgrades from version 1.0 of this extension
134 * transparently handle the (non-)existence of the pinning_backends
135 * column. We unfortunately have to get the result type for that... -
136 * we can't use the result type determined by the function definition
137 * without potentially crashing when somebody uses the old (or even
138 * wrong) function definition though.
139 */
140 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
141 elog(ERROR, "return type must be a row type");
142
143 if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
144 expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
145 elog(ERROR, "incorrect number of output arguments");
146
147 /* Construct a tuple descriptor for the result rows. */
148 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
149 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
150 INT4OID, -1, 0);
151 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
152 OIDOID, -1, 0);
153 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
154 OIDOID, -1, 0);
155 TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
156 OIDOID, -1, 0);
157 TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
158 INT2OID, -1, 0);
159 TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
160 INT8OID, -1, 0);
161 TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
162 BOOLOID, -1, 0);
163 TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
164 INT2OID, -1, 0);
165
166 if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
167 TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
168 INT4OID, -1, 0);
169
170 fctx->tupdesc = BlessTupleDesc(tupledesc);
171
172 /* Allocate NBuffers worth of BufferCachePagesRec records. */
173 fctx->record = (BufferCachePagesRec *)
175 sizeof(BufferCachePagesRec) * NBuffers);
176
177 /* Set max calls and remember the user function context. */
178 funcctx->max_calls = NBuffers;
179 funcctx->user_fctx = fctx;
180
181 /* Return to original context when allocating transient memory */
182 MemoryContextSwitchTo(oldcontext);
183
184 /*
185 * Scan through all the buffers, saving the relevant fields in the
186 * fctx->record structure.
187 *
188 * We don't hold the partition locks, so we don't get a consistent
189 * snapshot across all buffers, but we do grab the buffer header
190 * locks, so the information of each buffer is self-consistent.
191 */
192 for (i = 0; i < NBuffers; i++)
193 {
194 BufferDesc *bufHdr;
195 uint32 buf_state;
196
198
199 bufHdr = GetBufferDescriptor(i);
200 /* Lock each buffer header before inspecting. */
201 buf_state = LockBufHdr(bufHdr);
202
204 fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
205 fctx->record[i].reltablespace = bufHdr->tag.spcOid;
206 fctx->record[i].reldatabase = bufHdr->tag.dbOid;
207 fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
208 fctx->record[i].blocknum = bufHdr->tag.blockNum;
209 fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
211
212 if (buf_state & BM_DIRTY)
213 fctx->record[i].isdirty = true;
214 else
215 fctx->record[i].isdirty = false;
216
217 /* Note if the buffer is valid, and has storage created */
218 if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
219 fctx->record[i].isvalid = true;
220 else
221 fctx->record[i].isvalid = false;
222
223 UnlockBufHdr(bufHdr, buf_state);
224 }
225 }
226
227 funcctx = SRF_PERCALL_SETUP();
228
229 /* Get the saved state */
230 fctx = funcctx->user_fctx;
231
232 if (funcctx->call_cntr < funcctx->max_calls)
233 {
234 uint32 i = funcctx->call_cntr;
236 bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
237
238 values[0] = Int32GetDatum(fctx->record[i].bufferid);
239 nulls[0] = false;
240
241 /*
242 * Set all fields except the bufferid to null if the buffer is unused
243 * or not valid.
244 */
245 if (fctx->record[i].blocknum == InvalidBlockNumber ||
246 fctx->record[i].isvalid == false)
247 {
248 nulls[1] = true;
249 nulls[2] = true;
250 nulls[3] = true;
251 nulls[4] = true;
252 nulls[5] = true;
253 nulls[6] = true;
254 nulls[7] = true;
255 /* unused for v1.0 callers, but the array is always long enough */
256 nulls[8] = true;
257 }
258 else
259 {
261 nulls[1] = false;
263 nulls[2] = false;
265 nulls[3] = false;
267 nulls[4] = false;
268 values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
269 nulls[5] = false;
270 values[6] = BoolGetDatum(fctx->record[i].isdirty);
271 nulls[6] = false;
273 nulls[7] = false;
274 /* unused for v1.0 callers, but the array is always long enough */
276 nulls[8] = false;
277 }
278
279 /* Build and return the tuple. */
280 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
281 result = HeapTupleGetDatum(tuple);
282
283 SRF_RETURN_NEXT(funcctx, result);
284 }
285 else
286 SRF_RETURN_DONE(funcctx);
287}
#define InvalidBlockNumber
Definition: block.h:33
#define BM_TAG_VALID
Definition: buf_internals.h:71
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_DIRTY
Definition: buf_internals.h:69
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:60
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:59
#define BM_VALID
Definition: buf_internals.h:70
int64_t int64
Definition: c.h:535
#define NUM_BUFFERCACHE_PAGES_MIN_ELEM
#define NUM_BUFFERCACHE_PAGES_ELEM
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:182
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
BufferCachePagesRec * record
BufferTag tag
BlockNumber blockNum
Oid spcOid

References BlessTupleDesc(), BufferCachePagesRec::blocknum, buftag::blockNum, BM_DIRTY, BM_TAG_VALID, BM_VALID, BoolGetDatum(), BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BufferDescriptorGetBuffer(), BufferCachePagesRec::bufferid, BufTagGetForkNum(), BufTagGetRelNumber(), FuncCallContext::call_cntr, CHECK_FOR_INTERRUPTS, CreateTemplateTupleDesc(), CurrentMemoryContext, buftag::dbOid, elog, ERROR, BufferCachePagesRec::forknum, get_call_result_type(), GetBufferDescriptor(), heap_form_tuple(), HeapTupleGetDatum(), i, Int16GetDatum(), Int32GetDatum(), Int64GetDatum(), InvalidBlockNumber, BufferCachePagesRec::isdirty, BufferCachePagesRec::isvalid, LockBufHdr(), FuncCallContext::max_calls, MemoryContextAllocHuge(), MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, TupleDescData::natts, NBuffers, NUM_BUFFERCACHE_PAGES_ELEM, NUM_BUFFERCACHE_PAGES_MIN_ELEM, ObjectIdGetDatum(), palloc(), BufferCachePagesRec::pinning_backends, BufferCachePagesContext::record, BufferCachePagesRec::reldatabase, BufferCachePagesRec::relfilenumber, BufferCachePagesRec::reltablespace, buftag::spcOid, SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, BufferDesc::tag, BufferCachePagesContext::tupdesc, TupleDescInitEntry(), TYPEFUNC_COMPOSITE, UnlockBufHdr(), BufferCachePagesRec::usagecount, FuncCallContext::user_fctx, and values.

◆ pg_buffercache_summary()

Datum pg_buffercache_summary ( PG_FUNCTION_ARGS  )

Definition at line 543 of file pg_buffercache_pages.c.

544{
545 Datum result;
546 TupleDesc tupledesc;
547 HeapTuple tuple;
550
551 int32 buffers_used = 0;
552 int32 buffers_unused = 0;
553 int32 buffers_dirty = 0;
554 int32 buffers_pinned = 0;
555 int64 usagecount_total = 0;
556
557 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
558 elog(ERROR, "return type must be a row type");
559
560 for (int i = 0; i < NBuffers; i++)
561 {
562 BufferDesc *bufHdr;
563 uint32 buf_state;
564
566
567 /*
568 * This function summarizes the state of all headers. Locking the
569 * buffer headers wouldn't provide an improved result as the state of
570 * the buffer can still change after we release the lock and it'd
571 * noticeably increase the cost of the function.
572 */
573 bufHdr = GetBufferDescriptor(i);
574 buf_state = pg_atomic_read_u32(&bufHdr->state);
575
576 if (buf_state & BM_VALID)
577 {
578 buffers_used++;
579 usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
580
581 if (buf_state & BM_DIRTY)
582 buffers_dirty++;
583 }
584 else
585 buffers_unused++;
586
587 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
588 buffers_pinned++;
589 }
590
591 memset(nulls, 0, sizeof(nulls));
592 values[0] = Int32GetDatum(buffers_used);
593 values[1] = Int32GetDatum(buffers_unused);
594 values[2] = Int32GetDatum(buffers_dirty);
595 values[3] = Int32GetDatum(buffers_pinned);
596
597 if (buffers_used != 0)
598 values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
599 else
600 nulls[4] = true;
601
602 /* Build and return the tuple. */
603 tuple = heap_form_tuple(tupledesc, values, nulls);
604 result = HeapTupleGetDatum(tuple);
605
606 PG_RETURN_DATUM(result);
607}
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:237
#define NUM_BUFFERCACHE_SUMMARY_ELEM
static Datum Float8GetDatum(float8 X)
Definition: postgres.h:492
pg_atomic_uint32 state

References BM_DIRTY, BM_VALID, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, CHECK_FOR_INTERRUPTS, elog, ERROR, Float8GetDatum(), get_call_result_type(), GetBufferDescriptor(), heap_form_tuple(), HeapTupleGetDatum(), i, Int32GetDatum(), NBuffers, NUM_BUFFERCACHE_SUMMARY_ELEM, pg_atomic_read_u32(), PG_RETURN_DATUM, BufferDesc::state, TYPEFUNC_COMPOSITE, and values.

◆ pg_buffercache_superuser_check()

static void pg_buffercache_superuser_check ( char *  func_name)
static

Definition at line 656 of file pg_buffercache_pages.c.

657{
658 if (!superuser())
660 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
661 errmsg("must be superuser to use %s()",
662 func_name)));
663}
bool superuser(void)
Definition: superuser.c:46

References ereport, errcode(), errmsg(), ERROR, and superuser().

Referenced by pg_buffercache_evict(), pg_buffercache_evict_all(), and pg_buffercache_evict_relation().

◆ pg_buffercache_usage_counts()

Datum pg_buffercache_usage_counts ( PG_FUNCTION_ARGS  )

Definition at line 610 of file pg_buffercache_pages.c.

611{
612 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
613 int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
614 int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
615 int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
617 bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
618
619 InitMaterializedSRF(fcinfo, 0);
620
621 for (int i = 0; i < NBuffers; i++)
622 {
624 uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
625 int usage_count;
626
628
629 usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
630 usage_counts[usage_count]++;
631
632 if (buf_state & BM_DIRTY)
633 dirty[usage_count]++;
634
635 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
636 pinned[usage_count]++;
637 }
638
639 for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
640 {
641 values[0] = Int32GetDatum(i);
642 values[1] = Int32GetDatum(usage_counts[i]);
643 values[2] = Int32GetDatum(dirty[i]);
644 values[3] = Int32GetDatum(pinned[i]);
645
646 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
647 }
648
649 return (Datum) 0;
650}
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:86
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784

References BM_MAX_USAGE_COUNT.

◆ PG_FUNCTION_INFO_V1() [1/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_evict  )

◆ PG_FUNCTION_INFO_V1() [2/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_evict_all  )

◆ PG_FUNCTION_INFO_V1() [3/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_evict_relation  )

◆ PG_FUNCTION_INFO_V1() [4/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_numa_pages  )

◆ PG_FUNCTION_INFO_V1() [5/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_pages  )

◆ PG_FUNCTION_INFO_V1() [6/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_summary  )

◆ PG_FUNCTION_INFO_V1() [7/7]

PG_FUNCTION_INFO_V1 ( pg_buffercache_usage_counts  )

◆ PG_MODULE_MAGIC_EXT()

PG_MODULE_MAGIC_EXT ( name = "pg_buffercache",
version = PG_VERSION 
)

Variable Documentation

◆ firstNumaTouch

bool firstNumaTouch = true
static

Definition at line 106 of file pg_buffercache_pages.c.

Referenced by pg_buffercache_numa_pages().