Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit dde9457

Browse files
committed
Fixing and improve compound word support. This changes cannot be applied to
previous version iwthout recreating tsvector fields... Thanks to Alexander Presber <[email protected]> to discover a problem.
1 parent 21e2544 commit dde9457

File tree

1 file changed

+75
-56
lines changed

1 file changed

+75
-56
lines changed

contrib/tsearch2/ispell/spell.c

Lines changed: 75 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -737,9 +737,9 @@ NISortAffixes(IspellDict * Conf)
737737
{
738738
if (firstsuffix < 0)
739739
firstsuffix = i;
740-
if (Affix->flagflags & FF_COMPOUNDONLYAFX)
740+
if ((Affix->flagflags & FF_COMPOUNDONLYAFX) && Affix->replen>0 )
741741
{
742-
if (!ptr->affix ||
742+
if (ptr == Conf->CompoundAffix ||
743743
strbncmp((const unsigned char *) (ptr - 1)->affix,
744744
(const unsigned char *) Affix->repl,
745745
(ptr - 1)->len))
@@ -1024,17 +1024,31 @@ typedef struct SplitVar
10241024
} SplitVar;
10251025

10261026
static int
1027-
CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len)
1027+
CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace)
10281028
{
1029-
while ((*ptr)->affix)
1030-
{
1031-
if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
1029+
if ( CheckInPlace ) {
1030+
while ((*ptr)->affix)
1031+
{
1032+
if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
1033+
{
1034+
len = (*ptr)->len;
1035+
(*ptr)++;
1036+
return len;
1037+
}
1038+
(*ptr)++;
1039+
}
1040+
} else {
1041+
char *affbegin;
1042+
while ((*ptr)->affix)
10321043
{
1033-
len = (*ptr)->len;
1044+
if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
1045+
{
1046+
len = (*ptr)->len + (affbegin-word);
1047+
(*ptr)++;
1048+
return len;
1049+
}
10341050
(*ptr)++;
1035-
return len;
10361051
}
1037-
(*ptr)++;
10381052
}
10391053
return 0;
10401054
}
@@ -1078,26 +1092,11 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
10781092
memset(notprobed, 1, wordlen);
10791093
var = CopyVar(orig, 1);
10801094

1081-
while (node && level < wordlen)
1095+
while (level < wordlen)
10821096
{
1083-
StopLow = node->data;
1084-
StopHigh = node->data + node->length;
1085-
while (StopLow < StopHigh)
1086-
{
1087-
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1088-
if (StopMiddle->val == ((uint8 *) (word))[level])
1089-
break;
1090-
else if (StopMiddle->val < ((uint8 *) (word))[level])
1091-
StopLow = StopMiddle + 1;
1092-
else
1093-
StopHigh = StopMiddle;
1094-
}
1095-
if (StopLow >= StopHigh)
1096-
break;
1097-
1098-
/* find word with epenthetic */
1097+
/* find word with epenthetic or/and compound suffix */
10991098
caff = Conf->CompoundAffix;
1100-
while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level)) > 0)
1099+
while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) > 0)
11011100
{
11021101
/*
11031102
* there is one of compound suffixes, so check word for existings
@@ -1143,41 +1142,61 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
11431142
}
11441143
}
11451144

1146-
/* find infinitive */
1147-
if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])
1145+
if ( !node )
1146+
break;
1147+
1148+
StopLow = node->data;
1149+
StopHigh = node->data + node->length;
1150+
while (StopLow < StopHigh)
11481151
{
1149-
/* ok, we found full compoundallowed word */
1150-
if (level > minpos)
1152+
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1153+
if (StopMiddle->val == ((uint8 *) (word))[level])
1154+
break;
1155+
else if (StopMiddle->val < ((uint8 *) (word))[level])
1156+
StopLow = StopMiddle + 1;
1157+
else
1158+
StopHigh = StopMiddle;
1159+
}
1160+
1161+
if (StopLow < StopHigh) {
1162+
1163+
/* find infinitive */
1164+
if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])
11511165
{
1152-
/* and its length more than minimal */
1153-
if (wordlen == level + 1)
1154-
{
1155-
/* well, it was last word */
1156-
var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);
1157-
var->nstem++;
1158-
pfree(notprobed);
1159-
return var;
1160-
}
1161-
else
1166+
/* ok, we found full compoundallowed word */
1167+
if (level > minpos)
11621168
{
1163-
/* then we will search more big word at the same point */
1164-
SplitVar *ptr = var;
1165-
1166-
while (ptr->next)
1167-
ptr = ptr->next;
1168-
ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
1169-
/* we can find next word */
1170-
level++;
1171-
var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos);
1172-
var->nstem++;
1173-
node = Conf->Dictionary;
1174-
startpos = level;
1175-
continue;
1169+
/* and its length more than minimal */
1170+
if (wordlen == level + 1)
1171+
{
1172+
/* well, it was last word */
1173+
var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);
1174+
var->nstem++;
1175+
pfree(notprobed);
1176+
return var;
1177+
}
1178+
else
1179+
{
1180+
/* then we will search more big word at the same point */
1181+
SplitVar *ptr = var;
1182+
1183+
while (ptr->next)
1184+
ptr = ptr->next;
1185+
ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
1186+
/* we can find next word */
1187+
level++;
1188+
var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos);
1189+
var->nstem++;
1190+
node = Conf->Dictionary;
1191+
startpos = level;
1192+
continue;
1193+
}
11761194
}
11771195
}
1178-
}
1196+
node = StopMiddle->node;
1197+
} else
1198+
node = NULL;
11791199
level++;
1180-
node = StopMiddle->node;
11811200
}
11821201

11831202
var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);

0 commit comments

Comments
 (0)