Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 52fd2d6

Browse files
committed
Fix up core tsquery GIN support for new extractQuery API.
No need for the empty-prefix-match kluge to force a full scan anymore.
1 parent 3048450 commit 52fd2d6

File tree

3 files changed

+96
-53
lines changed

3 files changed

+96
-53
lines changed

src/backend/utils/adt/tsginidx.c

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
*/
1414
#include "postgres.h"
1515

16+
#include "access/gin.h"
1617
#include "access/skey.h"
1718
#include "tsearch/ts_type.h"
1819
#include "tsearch/ts_utils.h"
@@ -26,8 +27,7 @@ gin_cmp_tslexeme(PG_FUNCTION_ARGS)
2627
text *b = PG_GETARG_TEXT_PP(1);
2728
int cmp;
2829

29-
cmp = tsCompareString(
30-
VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
30+
cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
3131
VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
3232
false);
3333

@@ -48,8 +48,7 @@ gin_cmp_prefix(PG_FUNCTION_ARGS)
4848
#endif
4949
int cmp;
5050

51-
cmp = tsCompareString(
52-
VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
51+
cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
5352
VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
5453
true);
5554

@@ -96,71 +95,72 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
9695
{
9796
TSQuery query = PG_GETARG_TSQUERY(0);
9897
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
99-
10098
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
10199
bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
102100
Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
101+
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
102+
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
103103
Datum *entries = NULL;
104-
bool *partialmatch;
105104

106105
*nentries = 0;
107106

108107
if (query->size > 0)
109108
{
109+
QueryItem *item = GETQUERY(query);
110110
int4 i,
111-
j = 0,
112-
len;
113-
QueryItem *item;
114-
bool use_fullscan = false;
111+
j;
112+
bool *partialmatch;
115113
int *map_item_operand;
116114

117-
item = clean_NOT(GETQUERY(query), &len);
118-
if (!item)
119-
{
120-
use_fullscan = true;
121-
*nentries = 1;
122-
}
123-
124-
item = GETQUERY(query);
115+
/*
116+
* If the query doesn't have any required positive matches (for
117+
* instance, it's something like '! foo'), we have to do a full
118+
* index scan.
119+
*/
120+
if (tsquery_requires_match(item))
121+
*searchMode = GIN_SEARCH_MODE_DEFAULT;
122+
else
123+
*searchMode = GIN_SEARCH_MODE_ALL;
125124

125+
/* count number of VAL items */
126+
j = 0;
126127
for (i = 0; i < query->size; i++)
128+
{
127129
if (item[i].type == QI_VAL)
128-
(*nentries)++;
130+
j++;
131+
}
132+
*nentries = j;
129133

130-
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
131-
partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * (*nentries));
134+
entries = (Datum *) palloc(sizeof(Datum) * j);
135+
partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
132136

133137
/*
134138
* Make map to convert item's number to corresponding operand's (the
135139
* same, entry's) number. Entry's number is used in check array in
136140
* consistent method. We use the same map for each entry.
137141
*/
138-
*extra_data = (Pointer *) palloc0(sizeof(Pointer) * (*nentries));
139-
map_item_operand = palloc0(sizeof(int) * (query->size + 1));
142+
*extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
143+
map_item_operand = (int *) palloc0(sizeof(int) * query->size);
140144

145+
/* Now rescan the VAL items and fill in the arrays */
146+
j = 0;
141147
for (i = 0; i < query->size; i++)
148+
{
142149
if (item[i].type == QI_VAL)
143150
{
144-
text *txt;
145151
QueryOperand *val = &item[i].qoperand;
152+
text *txt;
146153

147154
txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
148155
val->length);
156+
entries[j] = PointerGetDatum(txt);
157+
partialmatch[j] = val->prefix;
149158
(*extra_data)[j] = (Pointer) map_item_operand;
150159
map_item_operand[i] = j;
151-
partialmatch[j] = val->prefix;
152-
entries[j++] = PointerGetDatum(txt);
160+
j++;
153161
}
154-
155-
if (use_fullscan)
156-
{
157-
(*extra_data)[j] = (Pointer) map_item_operand;
158-
map_item_operand[i] = j;
159-
entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0));
160162
}
161163
}
162-
else
163-
*nentries = -1; /* nothing can be found */
164164

165165
PG_FREE_IF_COPY(query, 0);
166166

@@ -222,12 +222,10 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
222222
gcv.map_item_operand = (int *) (extra_data[0]);
223223
gcv.need_recheck = recheck;
224224

225-
res = TS_execute(
226-
GETQUERY(query),
225+
res = TS_execute(GETQUERY(query),
227226
&gcv,
228227
true,
229-
checkcondition_gin
230-
);
228+
checkcondition_gin);
231229
}
232230

233231
PG_RETURN_BOOL(res);

src/backend/utils/adt/tsvector_op.c

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,8 @@ tsvector_concat(PG_FUNCTION_ARGS)
525525

526526
/*
527527
* Compare two strings by tsvector rules.
528-
* if isPrefix = true then it returns not-zero value if b has prefix a
528+
*
529+
* if isPrefix = true then it returns zero value iff b has prefix a
529530
*/
530531
int4
531532
tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
@@ -535,8 +536,7 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
535536
if (lena == 0)
536537
{
537538
if (prefix)
538-
cmp = 0; /* emtry string is equal to any if a prefix
539-
* match */
539+
cmp = 0; /* empty string is prefix of anything */
540540
else
541541
cmp = (lenb > 0) ? -1 : 0;
542542
}
@@ -551,14 +551,9 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
551551
if (prefix)
552552
{
553553
if (cmp == 0 && lena > lenb)
554-
{
555-
/*
556-
* b argument is not beginning with argument a
557-
*/
558-
cmp = 1;
559-
}
554+
cmp = 1; /* a is longer, so not a prefix of b */
560555
}
561-
else if ((cmp == 0) && (lena != lenb))
556+
else if (cmp == 0 && lena != lenb)
562557
{
563558
cmp = (lena < lenb) ? -1 : 1;
564559
}
@@ -650,13 +645,13 @@ checkcondition_str(void *checkval, QueryOperand *val)
650645
}
651646

652647
/*
653-
* check for boolean condition.
648+
* Evaluate tsquery boolean expression.
654649
*
655-
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
650+
* chkcond is a callback function used to evaluate each VAL node in the query.
656651
* checkval can be used to pass information to the callback. TS_execute doesn't
657652
* do anything with it.
658-
* chkcond is a callback function used to evaluate each VAL node in the query.
659-
*
653+
* if calcnot is false, NOT expressions are always evaluated to be true. This
654+
* is used in ranking.
660655
*/
661656
bool
662657
TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
@@ -675,6 +670,7 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
675670
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
676671
else
677672
return true;
673+
678674
case OP_AND:
679675
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
680676
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
@@ -695,6 +691,55 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
695691
return false;
696692
}
697693

694+
/*
695+
* Detect whether a tsquery boolean expression requires any positive matches
696+
* to values shown in the tsquery.
697+
*
698+
* This is needed to know whether a GIN index search requires full index scan.
699+
* For example, 'x & !y' requires a match of x, so it's sufficient to scan
700+
* entries for x; but 'x | !y' could match rows containing neither x nor y.
701+
*/
702+
bool
703+
tsquery_requires_match(QueryItem *curitem)
704+
{
705+
/* since this function recurses, it could be driven to stack overflow */
706+
check_stack_depth();
707+
708+
if (curitem->type == QI_VAL)
709+
return true;
710+
711+
switch (curitem->qoperator.oper)
712+
{
713+
case OP_NOT:
714+
/*
715+
* Assume there are no required matches underneath a NOT. For
716+
* some cases with nested NOTs, we could prove there's a required
717+
* match, but it seems unlikely to be worth the trouble.
718+
*/
719+
return false;
720+
721+
case OP_AND:
722+
/* If either side requires a match, we're good */
723+
if (tsquery_requires_match(curitem + curitem->qoperator.left))
724+
return true;
725+
else
726+
return tsquery_requires_match(curitem + 1);
727+
728+
case OP_OR:
729+
/* Both sides must require a match */
730+
if (tsquery_requires_match(curitem + curitem->qoperator.left))
731+
return tsquery_requires_match(curitem + 1);
732+
else
733+
return false;
734+
735+
default:
736+
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
737+
}
738+
739+
/* not reachable, but keep compiler quiet */
740+
return false;
741+
}
742+
698743
/*
699744
* boolean operations
700745
*/

src/include/tsearch/ts_utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ extern text *generateHeadline(HeadlineParsedText *prs);
104104
/*
105105
* Common check function for tsvector @@ tsquery
106106
*/
107-
108107
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
109108
bool (*chkcond) (void *checkval, QueryOperand *val));
109+
extern bool tsquery_requires_match(QueryItem *curitem);
110110

111111
/*
112112
* to_ts* - text transformation to tsvector, tsquery

0 commit comments

Comments
 (0)