Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b6ef167

Browse files
committed
Introduce optimized routine for linear searches of arrays
Use SSE2 intrinsics to speed up the search, where available. Otherwise, use a simple 'for' loop. The motivation to add this now is to speed up XidInMVCCSnapshot(), which is the reason only unsigned 32-bit integer arrays are optimized. Other types are left for future work, as is the extension of this technique to non-x86 platforms. Nathan Bossart Reviewed by: Andres Freund, Bharath Rupireddy, Masahiko Sawada Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13
1 parent 356dd2c commit b6ef167

File tree

9 files changed

+215
-0
lines changed

9 files changed

+215
-0
lines changed

src/include/port/pg_lfind.h

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* pg_lfind.h
4+
* Optimized linear search routines.
5+
*
6+
* Copyright (c) 2022, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
* src/include/port/pg_lfind.h
10+
*
11+
*-------------------------------------------------------------------------
12+
*/
13+
#ifndef PG_LFIND_H
14+
#define PG_LFIND_H
15+
16+
#include "port/simd.h"
17+
18+
/*
19+
* pg_lfind32
20+
*
21+
* Return true if there is an element in 'base' that equals 'key', otherwise
22+
* return false.
23+
*/
24+
static inline bool
25+
pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
26+
{
27+
uint32 i = 0;
28+
29+
/* Use SIMD intrinsics where available. */
30+
#ifdef USE_SSE2
31+
32+
/*
33+
* A 16-byte register only has four 4-byte lanes. For better
34+
* instruction-level parallelism, each loop iteration operates on a block
35+
* of four registers. Testing has showed this is ~40% faster than using a
36+
* block of two registers.
37+
*/
38+
const __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */
39+
uint32 iterations = nelem & ~0xF; /* round down to multiple of 16 */
40+
41+
#if defined(USE_ASSERT_CHECKING)
42+
bool assert_result = false;
43+
44+
/* pre-compute the result for assert checking */
45+
for (i = 0; i < nelem; i++)
46+
{
47+
if (key == base[i])
48+
{
49+
assert_result = true;
50+
break;
51+
}
52+
}
53+
#endif
54+
55+
for (i = 0; i < iterations; i += 16)
56+
{
57+
/* load the next block into 4 registers holding 4 values each */
58+
const __m128i vals1 = _mm_loadu_si128((__m128i *) & base[i]);
59+
const __m128i vals2 = _mm_loadu_si128((__m128i *) & base[i + 4]);
60+
const __m128i vals3 = _mm_loadu_si128((__m128i *) & base[i + 8]);
61+
const __m128i vals4 = _mm_loadu_si128((__m128i *) & base[i + 12]);
62+
63+
/* compare each value to the key */
64+
const __m128i result1 = _mm_cmpeq_epi32(keys, vals1);
65+
const __m128i result2 = _mm_cmpeq_epi32(keys, vals2);
66+
const __m128i result3 = _mm_cmpeq_epi32(keys, vals3);
67+
const __m128i result4 = _mm_cmpeq_epi32(keys, vals4);
68+
69+
/* combine the results into a single variable */
70+
const __m128i tmp1 = _mm_or_si128(result1, result2);
71+
const __m128i tmp2 = _mm_or_si128(result3, result4);
72+
const __m128i result = _mm_or_si128(tmp1, tmp2);
73+
74+
/* see if there was a match */
75+
if (_mm_movemask_epi8(result) != 0)
76+
{
77+
#if defined(USE_ASSERT_CHECKING)
78+
Assert(assert_result == true);
79+
#endif
80+
return true;
81+
}
82+
}
83+
#endif /* USE_SSE2 */
84+
85+
/* Process the remaining elements one at a time. */
86+
for (; i < nelem; i++)
87+
{
88+
if (key == base[i])
89+
{
90+
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING)
91+
Assert(assert_result == true);
92+
#endif
93+
return true;
94+
}
95+
}
96+
97+
#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING)
98+
Assert(assert_result == false);
99+
#endif
100+
return false;
101+
}
102+
103+
#endif /* PG_LFIND_H */

src/test/modules/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ SUBDIRS = \
1919
test_extensions \
2020
test_ginpostinglist \
2121
test_integerset \
22+
test_lfind \
2223
test_misc \
2324
test_oat_hooks \
2425
test_parser \
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Generated subdirectories
2+
/log/
3+
/results/
4+
/tmp_check/

src/test/modules/test_lfind/Makefile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# src/test/modules/test_lfind/Makefile
2+
3+
MODULE_big = test_lfind
4+
OBJS = \
5+
$(WIN32RES) \
6+
test_lfind.o
7+
PGFILEDESC = "test_lfind - test code for optimized linear search functions"
8+
9+
EXTENSION = test_lfind
10+
DATA = test_lfind--1.0.sql
11+
12+
REGRESS = test_lfind
13+
14+
ifdef USE_PGXS
15+
PG_CONFIG = pg_config
16+
PGXS := $(shell $(PG_CONFIG) --pgxs)
17+
include $(PGXS)
18+
else
19+
subdir = src/test/modules/test_lfind
20+
top_builddir = ../../../..
21+
include $(top_builddir)/src/Makefile.global
22+
include $(top_srcdir)/contrib/contrib-global.mk
23+
endif
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
CREATE EXTENSION test_lfind;
2+
--
3+
-- These tests don't produce any interesting output. We're checking that
4+
-- the operations complete without crashing or hanging and that none of their
5+
-- internal sanity tests fail.
6+
--
7+
SELECT test_lfind();
8+
test_lfind
9+
------------
10+
11+
(1 row)
12+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE EXTENSION test_lfind;
2+
3+
--
4+
-- These tests don't produce any interesting output. We're checking that
5+
-- the operations complete without crashing or hanging and that none of their
6+
-- internal sanity tests fail.
7+
--
8+
SELECT test_lfind();
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/* src/test/modules/test_lfind/test_lfind--1.0.sql */
2+
3+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4+
\echo Use "CREATE EXTENSION test_lfind" to load this file. \quit
5+
6+
CREATE FUNCTION test_lfind()
7+
RETURNS pg_catalog.void
8+
AS 'MODULE_PATHNAME' LANGUAGE C;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*--------------------------------------------------------------------------
2+
*
3+
* test_lfind.c
4+
* Test correctness of optimized linear search functions.
5+
*
6+
* Copyright (c) 2022, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
* src/test/modules/test_lfind/test_lfind.c
10+
*
11+
* -------------------------------------------------------------------------
12+
*/
13+
14+
#include "postgres.h"
15+
16+
#include "fmgr.h"
17+
#include "port/pg_lfind.h"
18+
19+
PG_MODULE_MAGIC;
20+
21+
PG_FUNCTION_INFO_V1(test_lfind);
22+
23+
Datum
24+
test_lfind(PG_FUNCTION_ARGS)
25+
{
26+
#define TEST_ARRAY_SIZE 135
27+
uint32 test_array[TEST_ARRAY_SIZE] = {0};
28+
29+
test_array[8] = 1;
30+
test_array[64] = 2;
31+
test_array[TEST_ARRAY_SIZE - 1] = 3;
32+
33+
if (pg_lfind32(1, test_array, 4))
34+
elog(ERROR, "pg_lfind32() found nonexistent element");
35+
if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE))
36+
elog(ERROR, "pg_lfind32() did not find existing element");
37+
38+
if (pg_lfind32(2, test_array, 32))
39+
elog(ERROR, "pg_lfind32() found nonexistent element");
40+
if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE))
41+
elog(ERROR, "pg_lfind32() did not find existing element");
42+
43+
if (pg_lfind32(3, test_array, 96))
44+
elog(ERROR, "pg_lfind32() found nonexistent element");
45+
if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE))
46+
elog(ERROR, "pg_lfind32() did not find existing element");
47+
48+
if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE))
49+
elog(ERROR, "pg_lfind32() found nonexistent element");
50+
51+
PG_RETURN_VOID();
52+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
comment = 'Test code for optimized linear search functions'
2+
default_version = '1.0'
3+
module_pathname = '$libdir/test_lfind'
4+
relocatable = true

0 commit comments

Comments
 (0)