Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 710d90d

Browse files
committed
Add prefix operator for TEXT type.
The prefix operator along with SP-GiST indexes can be used as an alternative for LIKE 'word%' commands and it doesn't have a limitation of string/prefix length as B-Tree has. Bump catalog version Author: Ildus Kurbangaliev with some editorization by me Review by: Arthur Zakirov, Alexander Korotkov, and me Discussion: https://www.postgresql.org/message-id/flat/[email protected]
1 parent 4ab2999 commit 710d90d

File tree

14 files changed

+189
-9
lines changed

14 files changed

+189
-9
lines changed

doc/src/sgml/func.sgml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2274,6 +2274,21 @@
22742274
<entry><literal>ph</literal></entry>
22752275
</row>
22762276

2277+
<row>
2278+
<entry>
2279+
<indexterm>
2280+
<primary>starts_with</primary>
2281+
</indexterm>
2282+
<literal><function>starts_with(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
2283+
</entry>
2284+
<entry><type>bool</type></entry>
2285+
<entry>
2286+
Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
2287+
</entry>
2288+
<entry><literal>starts_with('alphabet', 'alph')</literal></entry>
2289+
<entry><literal>t</literal></entry>
2290+
</row>
2291+
22772292
<row>
22782293
<entry>
22792294
<indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
40334048
ILIKE</function>, respectively. All of these operators are
40344049
<productname>PostgreSQL</productname>-specific.
40354050
</para>
4051+
4052+
<para>
4053+
There is also the prefix operator <literal>^@</literal> and corresponding
4054+
<function>starts_with</function> function which covers cases when only
4055+
searching by beginning of the string is needed.
4056+
</para>
40364057
</sect2>
40374058

40384059

doc/src/sgml/spgist.sgml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@
161161
<literal>~&lt;~</literal>
162162
<literal>~&gt;=~</literal>
163163
<literal>~&gt;~</literal>
164+
<literal>^@</literal>
164165
</entry>
165166
</row>
166167
<row>

src/backend/access/spgist/spgtextproc.c

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,20 @@
6767
*/
6868
#define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
6969

70+
/*
71+
* Strategy for collation aware operator on text is equal to btree strategy
72+
* plus value of 10.
73+
*
74+
* Current collation aware strategies and their corresponding btree strategies:
75+
* 11 BTLessStrategyNumber
76+
* 12 BTLessEqualStrategyNumber
77+
* 14 BTGreaterEqualStrategyNumber
78+
* 15 BTGreaterStrategyNumber
79+
*/
80+
#define SPG_STRATEGY_ADDITION (10)
81+
#define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
82+
&& (s) != RTPrefixStrategyNumber)
83+
7084
/* Struct for sorting values in picksplit */
7185
typedef struct spgNodePtr
7286
{
@@ -496,10 +510,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
496510
* well end with a partial multibyte character, so that applying
497511
* any encoding-sensitive test to it would be risky anyhow.)
498512
*/
499-
if (strategy > 10)
513+
if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
500514
{
501515
if (collate_is_c)
502-
strategy -= 10;
516+
strategy -= SPG_STRATEGY_ADDITION;
503517
else
504518
continue;
505519
}
@@ -526,6 +540,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
526540
if (r < 0)
527541
res = false;
528542
break;
543+
case RTPrefixStrategyNumber:
544+
if (r != 0)
545+
res = false;
546+
break;
529547
default:
530548
elog(ERROR, "unrecognized strategy number: %d",
531549
in->scankeys[j].sk_strategy);
@@ -605,10 +623,27 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
605623
int queryLen = VARSIZE_ANY_EXHDR(query);
606624
int r;
607625

608-
if (strategy > 10)
626+
if (strategy == RTPrefixStrategyNumber)
627+
{
628+
/*
629+
* if level >= length of query then reconstrValue is began with
630+
* query (prefix) string and we don't need to check it again.
631+
*/
632+
633+
res = (level >= queryLen) ||
634+
DatumGetBool(DirectFunctionCall2(text_starts_with,
635+
out->leafValue, PointerGetDatum(query)));
636+
637+
if (!res) /* no need to consider remaining conditions */
638+
break;
639+
640+
continue;
641+
}
642+
643+
if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
609644
{
610645
/* Collation-aware comparison */
611-
strategy -= 10;
646+
strategy -= SPG_STRATEGY_ADDITION;
612647

613648
/* If asserts enabled, verify encoding of reconstructed string */
614649
Assert(pg_verifymbstr(fullValue, fullLen, false));

src/backend/utils/adt/selfuncs.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,6 +1488,16 @@ likesel(PG_FUNCTION_ARGS)
14881488
}
14891489

14901490
/*
1491+
* prefixsel - selectivity of prefix operator
1492+
*/
1493+
Datum
1494+
prefixsel(PG_FUNCTION_ARGS)
1495+
{
1496+
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
1497+
}
1498+
1499+
/*
1500+
*
14911501
* iclikesel - Selectivity of ILIKE pattern match.
14921502
*/
14931503
Datum
@@ -2906,6 +2916,15 @@ likejoinsel(PG_FUNCTION_ARGS)
29062916
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
29072917
}
29082918

2919+
/*
2920+
* prefixjoinsel - Join selectivity of prefix operator
2921+
*/
2922+
Datum
2923+
prefixjoinsel(PG_FUNCTION_ARGS)
2924+
{
2925+
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
2926+
}
2927+
29092928
/*
29102929
* iclikejoinsel - Join selectivity of ILIKE pattern match.
29112930
*/
@@ -5947,6 +5966,20 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
59475966
result = regex_fixed_prefix(patt, true, collation,
59485967
prefix, rest_selec);
59495968
break;
5969+
case Pattern_Type_Prefix:
5970+
/* Prefix type work is trivial. */
5971+
result = Pattern_Prefix_Partial;
5972+
*rest_selec = 1.0; /* all */
5973+
*prefix = makeConst(patt->consttype,
5974+
patt->consttypmod,
5975+
patt->constcollid,
5976+
patt->constlen,
5977+
datumCopy(patt->constvalue,
5978+
patt->constbyval,
5979+
patt->constlen),
5980+
patt->constisnull,
5981+
patt->constbyval);
5982+
break;
59505983
default:
59515984
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
59525985
result = Pattern_Prefix_None; /* keep compiler quiet */

src/backend/utils/adt/varlena.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
17611761
PG_RETURN_BOOL(result);
17621762
}
17631763

1764+
Datum
1765+
text_starts_with(PG_FUNCTION_ARGS)
1766+
{
1767+
Datum arg1 = PG_GETARG_DATUM(0);
1768+
Datum arg2 = PG_GETARG_DATUM(1);
1769+
bool result;
1770+
Size len1,
1771+
len2;
1772+
1773+
len1 = toast_raw_datum_size(arg1);
1774+
len2 = toast_raw_datum_size(arg2);
1775+
if (len2 > len1)
1776+
result = false;
1777+
else
1778+
{
1779+
text *targ1 = DatumGetTextPP(arg1);
1780+
text *targ2 = DatumGetTextPP(arg2);
1781+
1782+
result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1783+
VARSIZE_ANY_EXHDR(targ2)) == 0);
1784+
1785+
PG_FREE_IF_COPY(targ1, 0);
1786+
PG_FREE_IF_COPY(targ2, 1);
1787+
}
1788+
1789+
PG_RETURN_BOOL(result);
1790+
}
1791+
17641792
Datum
17651793
bttextcmp(PG_FUNCTION_ARGS)
17661794
{

src/include/access/stratnum.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
6868
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
6969
#define RTSuperStrategyNumber 26 /* for inet << */
7070
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
71+
#define RTPrefixStrategyNumber 28 /* for text ^@ */
7172

72-
#define RTMaxStrategyNumber 27
73+
#define RTMaxStrategyNumber 28
7374

7475

7576
#endif /* STRATNUM_H */

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 201803311
56+
#define CATALOG_VERSION_NO 201804031
5757

5858
#endif

src/include/catalog/pg_amop.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,7 @@ DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
799799
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
800800
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
801801
DATA(insert ( 4017 25 25 15 s 666 4000 0 ));
802+
DATA(insert ( 4017 25 25 28 s 3877 4000 0 ));
802803

803804
/*
804805
* btree jsonb_ops

src/include/catalog/pg_operator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ DESCR("less than");
134134
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
135135
DESCR("equal");
136136
#define TextEqualOperator 98
137+
DATA(insert OID = 3877 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 starts_with prefixsel prefixjoinsel ));
138+
DESCR("starts with");
137139

138140
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
139141
DESCR("append element onto end of array");

src/include/catalog/pg_proc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
209209
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
210210
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
211211
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
212+
DATA(insert OID = 3696 ( starts_with PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_starts_with _null_ _null_ _null_ ));
212213
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
213214
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
214215
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2584,6 +2585,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
25842585
DESCR("join selectivity of NOT LIKE");
25852586
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
25862587
DESCR("join selectivity of case-insensitive regex non-match");
2588+
DATA(insert OID = 3437 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
2589+
DESCR("restriction selectivity of exact prefix");
2590+
DATA(insert OID = 3438 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
2591+
DESCR("join selectivity of exact prefix");
25872592

25882593
/* Aggregate-related functions */
25892594
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));

0 commit comments

Comments
 (0)