From a25f42017aa2859c65ceeae4daf4d9ba68b3442e Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Thu, 11 Feb 2016 18:57:47 +0300 Subject: [PATCH 01/29] affix file is created in current backend to avoid regex_t structure copying --- Makefile | 22 +- README.md | 11 +- expected/shared_ispell.out | 193 ++ shared_ispell.control | 2 +- ...ll--1.0.0.sql => shared_ispell--1.1.0.sql} | 0 sql/shared_ispell.sql | 49 + src/shared_ispell.c | 1581 +++++++---------- src/shared_ispell.h | 71 + src/spell.c | 647 ------- src/spell.h | 71 - 10 files changed, 1010 insertions(+), 1637 deletions(-) create mode 100644 expected/shared_ispell.out rename sql/{shared_ispell--1.0.0.sql => shared_ispell--1.1.0.sql} (100%) create mode 100644 sql/shared_ispell.sql create mode 100644 src/shared_ispell.h delete mode 100644 src/spell.c delete mode 100644 src/spell.h diff --git a/Makefile b/Makefile index 2f9574d..a238f02 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,20 @@ +# contrib/shared_ispell/Makefile + MODULE_big = shared_ispell -OBJS = src/shared_ispell.o src/spell.o +OBJS = src/shared_ispell.o EXTENSION = shared_ispell -DATA = sql/shared_ispell--1.0.0.sql -MODULES = shared_ispell +DATA = sql/shared_ispell--1.1.0.sql -CFLAGS=`pg_config --includedir-server` +REGRESS = shared_ispell +ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) - -all: shared_ispell.so - -shared_ispell.so: $(OBJS) - -%.o : src/%.c +else +subdir = contrib/shared_ispell +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif \ No newline at end of file diff --git a/README.md b/README.md index e24f81e..55a7195 100644 --- a/README.md +++ b/README.md @@ -13,15 +13,6 @@ If you need just snowball-type dictionaries, this extension is not really interesting for you. But if you really need an ispell dictionary, this may save you a lot of resources. -Warning -------- -The extension does not yet handle affixes that require full regular -expressions (regex_t, implemented in regex.h). This is indicated by -an error when initializing the dictionary. - -Simple affixes and affixes that can be handled by fast regex subset -(as implemented in regis.h) are handled just fine. - Install ------- @@ -144,4 +135,4 @@ use this prepared data). db=# SELECT shared_ispell_reset(); -That's all for now ... \ No newline at end of file +That's all for now ... diff --git a/expected/shared_ispell.out b/expected/shared_ispell.out new file mode 100644 index 0000000..bd80ff0 --- /dev/null +++ b/expected/shared_ispell.out @@ -0,0 +1,193 @@ +CREATE EXTENSION shared_ispell; +-- Test ISpell dictionary with ispell affix file +CREATE TEXT SEARCH DICTIONARY shared_ispell ( + Template=shared_ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); +SELECT ts_lexize('shared_ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('shared_ispell', 'bookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'booking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'foot'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('shared_ispell', 'foots'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('shared_ispell', 'rebookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'rebooking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'rebook'); + ts_lexize +----------- + +(1 row) + +SELECT ts_lexize('shared_ispell', 'unbookings'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'unbooking'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'unbook'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'footklubber'); + ts_lexize +---------------- + {foot,klubber} +(1 row) + +SELECT ts_lexize('shared_ispell', 'footballklubber'); + ts_lexize +------------------------------------------------------ + {footballklubber,foot,ball,klubber,football,klubber} +(1 row) + +SELECT ts_lexize('shared_ispell', 'ballyklubber'); + ts_lexize +---------------- + {ball,klubber} +(1 row) + +SELECT ts_lexize('shared_ispell', 'footballyklubber'); + ts_lexize +--------------------- + {foot,ball,klubber} +(1 row) + +-- Test ISpell dictionary with hunspell affix file +CREATE TEXT SEARCH DICTIONARY shared_hunspell ( + Template=shared_ispell, + DictFile=ispell_sample, + AffFile=hunspell_sample +); +SELECT ts_lexize('shared_hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'bookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'booking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'foot'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'foots'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'rebookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'rebooking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'rebook'); + ts_lexize +----------- + +(1 row) + +SELECT ts_lexize('shared_hunspell', 'unbookings'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'unbooking'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'unbook'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'footklubber'); + ts_lexize +---------------- + {foot,klubber} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'footballklubber'); + ts_lexize +------------------------------------------------------ + {footballklubber,foot,ball,klubber,football,klubber} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'ballyklubber'); + ts_lexize +---------------- + {ball,klubber} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'footballyklubber'); + ts_lexize +--------------------- + {foot,ball,klubber} +(1 row) + diff --git a/shared_ispell.control b/shared_ispell.control index 5380e28..8fab766 100644 --- a/shared_ispell.control +++ b/shared_ispell.control @@ -1,6 +1,6 @@ # shared ispell dictionary comment = 'Provides shared ispell dictionaries.' -default_version = '1.0.0' +default_version = '1.1.0' relocatable = true module_pathname = '$libdir/shared_ispell' diff --git a/sql/shared_ispell--1.0.0.sql b/sql/shared_ispell--1.1.0.sql similarity index 100% rename from sql/shared_ispell--1.0.0.sql rename to sql/shared_ispell--1.1.0.sql diff --git a/sql/shared_ispell.sql b/sql/shared_ispell.sql new file mode 100644 index 0000000..888df98 --- /dev/null +++ b/sql/shared_ispell.sql @@ -0,0 +1,49 @@ +CREATE EXTENSION shared_ispell; + +-- Test ISpell dictionary with ispell affix file +CREATE TEXT SEARCH DICTIONARY shared_ispell ( + Template=shared_ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); + +SELECT ts_lexize('shared_ispell', 'skies'); +SELECT ts_lexize('shared_ispell', 'bookings'); +SELECT ts_lexize('shared_ispell', 'booking'); +SELECT ts_lexize('shared_ispell', 'foot'); +SELECT ts_lexize('shared_ispell', 'foots'); +SELECT ts_lexize('shared_ispell', 'rebookings'); +SELECT ts_lexize('shared_ispell', 'rebooking'); +SELECT ts_lexize('shared_ispell', 'rebook'); +SELECT ts_lexize('shared_ispell', 'unbookings'); +SELECT ts_lexize('shared_ispell', 'unbooking'); +SELECT ts_lexize('shared_ispell', 'unbook'); + +SELECT ts_lexize('shared_ispell', 'footklubber'); +SELECT ts_lexize('shared_ispell', 'footballklubber'); +SELECT ts_lexize('shared_ispell', 'ballyklubber'); +SELECT ts_lexize('shared_ispell', 'footballyklubber'); + +-- Test ISpell dictionary with hunspell affix file +CREATE TEXT SEARCH DICTIONARY shared_hunspell ( + Template=shared_ispell, + DictFile=ispell_sample, + AffFile=hunspell_sample +); + +SELECT ts_lexize('shared_hunspell', 'skies'); +SELECT ts_lexize('shared_hunspell', 'bookings'); +SELECT ts_lexize('shared_hunspell', 'booking'); +SELECT ts_lexize('shared_hunspell', 'foot'); +SELECT ts_lexize('shared_hunspell', 'foots'); +SELECT ts_lexize('shared_hunspell', 'rebookings'); +SELECT ts_lexize('shared_hunspell', 'rebooking'); +SELECT ts_lexize('shared_hunspell', 'rebook'); +SELECT ts_lexize('shared_hunspell', 'unbookings'); +SELECT ts_lexize('shared_hunspell', 'unbooking'); +SELECT ts_lexize('shared_hunspell', 'unbook'); + +SELECT ts_lexize('shared_hunspell', 'footklubber'); +SELECT ts_lexize('shared_hunspell', 'footballklubber'); +SELECT ts_lexize('shared_hunspell', 'ballyklubber'); +SELECT ts_lexize('shared_hunspell', 'footballyklubber'); diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 2276ab1..db9cfec 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -5,25 +5,25 @@ * dictionaries are copied in memory multiple times. The connections * also need to initialize the dictionary on their own, which may take * up to a few seconds. - * + * * This means the connections are either long-lived (and each keeps * a private copy of the dictionary, wasting memory), or short-lived * (resulting in high latencies when the dictionary is initialized). - * + * * This extension is storing a single copy of the dictionary in a shared * memory so that all connections may use it, saving memory and CPU time. - * - * + * + * * The flow within the shared ispell may be slightly confusing, so this * is a brief summary of the main flows within the code. - * + * * ===== shared segment init (postmaster startup) ===== - * + * * _PG_init * -> ispell_shmem_startup (registered as a hook) - * + * * ===== dictionary init (backend) ===== - * + * * dispell_init * -> init_shared_dict * -> get_shared_dict @@ -35,17 +35,13 @@ * -> NIFinishBuild * -> sizeIspellDict * -> copyIspellDict - * -> copyAffixNode (prefixes) - * -> copyAffixNode (suffixes) * -> copySPNode - * -> copy affix data - * -> copy compound affixes * -> get_shared_stop_list * -> readstoplist * -> copyStopList - * + * * ===== dictionary reinit after reset (backend) ===== - * + * * dispell_lexize * -> timestamp of lookup < last reset * -> init_shared_dict @@ -53,101 +49,52 @@ * -> SharedNINormalizeWord */ -#include -#include -#include -#include - -#include -#include - #include "postgres.h" #include "miscadmin.h" #include "storage/ipc.h" -#include "storage/fd.h" -#include "commands/explain.h" -#include "executor/executor.h" -#include "executor/instrument.h" -#include "utils/guc.h" #include "commands/defrem.h" #include "tsearch/ts_locale.h" -#include "storage/lwlock.h" -#include "utils/timestamp.h" #include "access/htup_details.h" - #include "funcapi.h" -#include "libpq/md5.h" - -#include "spell.h" +#include "shared_ispell.h" #include "tsearch/dicts/spell.h" +#include "regex/regguts.h" -#ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; -#endif -#if (PG_VERSION_NUM < 90100) -#define NIStartBuild(dict) -#define NIFinishBuild(dict) -#endif +void _PG_init(void); +void _PG_fini(void); -/* private functions */ -static void ispell_shmem_startup(void); - -/* This segment is initialized in the first process that accesses it (see - * ispell_shmem_startup function). - */ -#define SEGMENT_NAME "shared_ispell" - -static int max_ispell_mem_size = (30*1024*1024); /* 50MB by default */ +/* Memory for dictionaries in kbytes */ +static int max_ispell_mem_size_kb; /* Saved hook values in case of unload */ static shmem_startup_hook_type prev_shmem_startup_hook = NULL; -void _PG_init(void); -void _PG_fini(void); - -/* used to allocate memory in the shared segment */ -typedef struct SegmentInfo { - - LWLockId lock; - char *firstfree; /* first free address (always maxaligned) */ - size_t available; /* free space remaining at firstfree */ - Timestamp lastReset; /* last reset of the dictionary */ - - /* the shared segment (info and data) */ - SharedIspellDict * dict; - SharedStopList * stop; - -} SegmentInfo; - -#define MAXLEN 255 - -/* used to keep track of dictionary in each backend */ -typedef struct DictInfo { - - Timestamp lookup; - - char dictFile[MAXLEN]; - char affixFile[MAXLEN]; - char stopFile[MAXLEN]; - - SharedIspellDict * dict; - SharedStopList * stop; +/* These are used to allocate data within shared segment */ +static SegmentInfo *segment_info = NULL; -} DictInfo; +static void ispell_shmem_startup(void); -/* These are used to allocate data within shared segment */ -static SegmentInfo * segment_info = NULL; +static char *shalloc(int bytes); +static char *shstrcpy(char *str); -static char * shalloc(int bytes); +static SharedIspellDict *copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int bytes, int words); +static SharedStopList *copyStopList(StopList *list, char *stopFile, int bytes); -static SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int bytes, int words); -static SharedStopList * copyStopList(StopList * list, char * stopFile, int bytes); +static int sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile); +static int sizeStopList(StopList *list, char *stopFile); -static int sizeIspellDict(IspellDict * dict, char * dictFile, char * affixFile); -static int sizeStopList(StopList * list, char * stopFile); +/* + * Get memory for dictionaries in bytes + */ +static Size +max_ispell_mem_size() +{ + return (Size)max_ispell_mem_size_kb * 1024L; +} /* * Module load callback @@ -155,45 +102,46 @@ static int sizeStopList(StopList * list, char * stopFile); void _PG_init(void) { - - /* */ - if (! process_shared_preload_libraries_in_progress) { - elog(ERROR, "shared_ispell has to be loaded using shared_preload_libraries"); - return; - } - - /* Define custom GUC variables. */ - - /* How much memory should we preallocate for the dictionaries (limits how many - * dictionaries you can load into the shared segment). */ - DefineCustomIntVariable("shared_ispell.max_size", - "amount of memory to pre-allocate for ispell dictionaries", - NULL, - &max_ispell_mem_size, - (32*1024*1024), - (1024*1024), INT_MAX, - PGC_POSTMASTER, - GUC_UNIT_BLOCKS, -#if (PG_VERSION_NUM >= 90100) - NULL, -#endif - NULL, - NULL); - - EmitWarningsOnPlaceholders("shared_ispell"); - - /* - * Request additional shared resources. (These are no-ops if we're not in - * the postmaster process.) We'll allocate or attach to the shared - * resources in ispell_shmem_startup(). - */ - RequestAddinShmemSpace(max_ispell_mem_size); - RequestAddinLWLocks(1); - - /* Install hooks. */ - prev_shmem_startup_hook = shmem_startup_hook; - shmem_startup_hook = ispell_shmem_startup; - + if (!process_shared_preload_libraries_in_progress) { + elog(ERROR, "shared_ispell has to be loaded using shared_preload_libraries"); + return; + } + + /* Define custom GUC variables. */ + + /* How much memory should we preallocate for the dictionaries (limits how many + * dictionaries you can load into the shared segment). */ + DefineCustomIntVariable("shared_ispell.max_size", + "amount of memory to pre-allocate for ispell dictionaries", + NULL, + &max_ispell_mem_size_kb, + 50 * 1024, /* default 50MB */ + 1024, /* min 1MB */ + INT_MAX, + PGC_POSTMASTER, + GUC_UNIT_KB, + NULL, + NULL, + NULL); + + EmitWarningsOnPlaceholders("shared_ispell"); + + /* + * Request additional shared resources. (These are no-ops if we're not in + * the postmaster process.) We'll allocate or attach to the shared + * resources in ispell_shmem_startup(). + */ + RequestAddinShmemSpace(max_ispell_mem_size()); + + #if PG_VERSION_NUM >= 90600 + RequestNamedLWLockTranche("shared_ispell", 1); + #else + RequestAddinLWLocks(1); + #endif + + /* Install hooks. */ + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = ispell_shmem_startup; } @@ -203,210 +151,232 @@ _PG_init(void) void _PG_fini(void) { - /* Uninstall hooks. */ - shmem_startup_hook = prev_shmem_startup_hook; + /* Uninstall hooks. */ + shmem_startup_hook = prev_shmem_startup_hook; } - -/* +/* * Probably the most important part of the startup - initializes the * memory in shared memory segment (creates and initializes the * SegmentInfo data structure). - * - * This is called from a shmem_startup_hook (see _PG_init). */ -static -void ispell_shmem_startup() { - - bool found = FALSE; - char * segment; - - if (prev_shmem_startup_hook) - prev_shmem_startup_hook(); - - elog(DEBUG1, "initializing shared ispell segment (size: %d B)", - max_ispell_mem_size); - - /* - * Create or attach to the shared memory state, including hash table - */ - LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - - segment = ShmemInitStruct(SEGMENT_NAME, - max_ispell_mem_size, - &found); - - /* Was the shared memory segment already initialized? */ - if (! found) { + * + * This is called from a shmem_startup_hook (see _PG_init). + */ +static void +ispell_shmem_startup() +{ + bool found = FALSE; + char *segment; - memset(segment, 0, max_ispell_mem_size); + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); - segment_info = (SegmentInfo*)segment; + /* + * Create or attach to the shared memory state, including hash table + */ + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - segment_info->lock = LWLockAssign(); - segment_info->firstfree = segment + MAXALIGN(sizeof(SegmentInfo)); - segment_info->available = max_ispell_mem_size - (int)(segment_info->firstfree - segment); + segment = ShmemInitStruct(SEGMENT_NAME, + max_ispell_mem_size(), + &found); - segment_info->lastReset = GetCurrentTimestamp(); + /* Was the shared memory segment already initialized? */ + if (!found) + { + memset(segment, 0, max_ispell_mem_size()); - elog(DEBUG1, "shared memory segment (shared ispell) successfully created"); + segment_info = (SegmentInfo *) segment; - } + #if PG_VERSION_NUM >= 90600 + segment_info->lock = &(GetNamedLWLockTranche("shared_ispell"))->lock; + #else + segment_info->lock = LWLockAssign(); + #endif + segment_info->firstfree = segment + MAXALIGN(sizeof(SegmentInfo)); + segment_info->available = max_ispell_mem_size() + - (int)(segment_info->firstfree - segment); - LWLockRelease(AddinShmemInitLock); + segment_info->lastReset = GetCurrentTimestamp(); + } + LWLockRelease(AddinShmemInitLock); } /* * This is called from backends that are looking up for a shared dictionary * definition using a filename with dictionary / affixes. - * + * * This is called through dispell_init() which is responsible for proper locking * of the shared memory (using SegmentInfo->lock). */ -static -SharedIspellDict * get_shared_dict(char * words, char * affixes) { - - SharedIspellDict * dict = segment_info->dict; +static SharedIspellDict * +get_shared_dict(char *words, char *affixes) +{ + SharedIspellDict *dict = segment_info->shdict; - while (dict != NULL) { - if ((strcmp(dict->dictFile, words) == 0) && - (strcmp(dict->affixFile, affixes) == 0)) { - return dict; - } - dict = dict->next; - } + while (dict != NULL) + { + if ((strcmp(dict->dictFile, words) == 0) && + (strcmp(dict->affixFile, affixes) == 0)) + return dict; + dict = dict->next; + } - return NULL; + return NULL; } /* * This is called from backends that are looking up for a list of stop words * using a filename of the list. - * + * * This is called through dispell_init() which is responsible for proper locking * of the shared memory (using SegmentInfo->lock). */ -static -SharedStopList * get_shared_stop_list(char * stop) { - - SharedStopList * list = segment_info->stop; +static SharedStopList * +get_shared_stop_list(char *stop) +{ + SharedStopList *list = segment_info->shstop; - while (list != NULL) { - if (strcmp(list->stopFile, stop) == 0) { - return list; - } - list = list->next; - } + while (list != NULL) + { + if (strcmp(list->stopFile, stop) == 0) + return list; + list = list->next; + } - return NULL; + return NULL; } /* - * Initializes the dictionary for use in backends - checks whether such dictionary + * Initializes the dictionary for use in backends - checks whether such dictionary * and list of stopwords is already used, and if not then parses it and loads it into * the shared segment. - * + * + * Function lookup if the dictionary (word list) is already loaded in the + * shared segment. If not then loads the dictionary (word list). + * Affix list is loaded to a current backend process. + * * This is called through dispell_init() which is responsible for proper locking * of the shared memory (using SegmentInfo->lock). */ -static -void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * stopFile) { - - int size; - - SharedIspellDict * shdict = NULL; - SharedStopList * shstop = NULL; - - IspellDict * dict; - StopList stoplist; - - /* DICTIONARY + AFFIXES */ - - /* TODO This should probably check that the filenames are not NULL, and maybe that - * it exists. Or maybe that's handled by the NIImport* functions. */ - - /* lookup if the dictionary (words and affixes) is already loaded in the shared segment */ - shdict = get_shared_dict(dictFile, affFile); - - /* load the dictionary / affixes if not yet defined */ - if (shdict == NULL) { - - dict = (IspellDict *)palloc0(sizeof(IspellDict)); - - NIStartBuild(dict); - - NIImportDictionary(dict, - get_tsearch_config_filename(dictFile, "dict")); - - NIImportAffixes(dict, - get_tsearch_config_filename(affFile, "affix")); - - NISortDictionary(dict); - NISortAffixes(dict); - - NIFinishBuild(dict); - - /* check available space in shared segment */ - size = sizeIspellDict(dict, dictFile, affFile); - if (size > segment_info->available) - elog(ERROR, "shared dictionary %s.dict / %s.affix needs %d B, only %ld B available", - dictFile, affFile, size, segment_info->available); - - /* fine, there's enough space - copy the dictionary */ - shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell); - - elog(INFO, "shared dictionary %s.dict / %s.affix loaded, used %d B, %ld B remaining", - dictFile, affFile, size, segment_info->available); - - /* add the new dictionary to the linked list (of SharedIspellDict structures) */ - shdict->next = segment_info->dict; - segment_info->dict = shdict; - - } - - /* STOP WORDS */ - - /* lookup if the stop words are already loaded in the shared segment, but only if there - * actually is a list */ - if (stopFile != NULL) { - - shstop = get_shared_stop_list(stopFile); - - /* load the stopwords if not yet defined */ - if (shstop == NULL) { - - readstoplist(stopFile, &stoplist, lowerstr); - - size = sizeStopList(&stoplist, stopFile); - if (size > segment_info->available) { - elog(ERROR, "shared stoplist %s.stop needs %d B, only %ld B available", - stopFile, size, segment_info->available); - } - - /* fine, there's enough space - copy the stoplist */ - shstop = copyStopList(&stoplist, stopFile, size); - - elog(INFO, "shared stoplist %s.stop loaded, used %d B, %ld B remaining", - affFile, size, segment_info->available); - - /* add the new stopword list to the linked list (of SharedStopList structures) */ - shstop->next = segment_info->stop; - segment_info->stop = shstop; - - } - } - - /* Now, fill the DictInfo structure for the backend (references to dictionary, - * stopwords and the filenames). */ - - info->dict = shdict; - info->stop = shstop; - info->lookup = GetCurrentTimestamp(); - - memcpy(info->dictFile, dictFile, strlen(dictFile) + 1); - memcpy(info->affixFile, dictFile, strlen(affFile)+ 1); - memcpy(info->stopFile, dictFile, strlen(stopFile) + 1); - +static void +init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) +{ + int size; + + SharedIspellDict *shdict = NULL; + SharedStopList *shstop = NULL; + + IspellDict *dict; + StopList stoplist; + + /* DICTIONARY + AFFIXES */ + + /* TODO This should probably check that the filenames are not NULL, and maybe that + * it exists. Or maybe that's handled by the NIImport* functions. */ + + /* lookup if the dictionary (words and affixes) is already loaded in the shared segment */ + shdict = get_shared_dict(dictFile, affFile); + + /* load affix list */ + NIStartBuild(&(info->dict)); + NIImportAffixes(&(info->dict), get_tsearch_config_filename(affFile, "affix")); + + /* load the dictionary (word list) if not yet defined */ + if (shdict == NULL) + { + dict = (IspellDict *) palloc0(sizeof(IspellDict)); + + NIStartBuild(dict); + NIImportDictionary(dict, get_tsearch_config_filename(dictFile, "dict")); + + dict->usecompound = info->dict.usecompound; + memcpy(dict->flagval, &(info->dict.flagval), 65000); + + /* + * If affix->useFlagAliases == true then AffixData is generated + * in NIImportAffixes(). Therefore we need to copy it. + */ + if (info->dict.useFlagAliases) + { + int i; + dict->useFlagAliases = true; + dict->lenAffixData = info->dict.lenAffixData; + dict->nAffixData = info->dict.nAffixData; + dict->AffixData = (char **) palloc0(dict->nAffixData * sizeof(char *)); + for (i = 0; i < dict->nAffixData; i++) + { + dict->AffixData[i] = palloc0(strlen(info->dict.AffixData[i]) + 1); + strcpy(dict->AffixData[i], info->dict.AffixData[i]); + } + } + + NISortDictionary(dict); + NIFinishBuild(dict); + + /* check available space in shared segment */ + size = sizeIspellDict(dict, dictFile, affFile); + if (size > segment_info->available) + elog(ERROR, "shared dictionary %s.dict / %s.affix needs %d B, only %ld B available", + dictFile, affFile, size, segment_info->available); + + /* fine, there's enough space - copy the dictionary */ + shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell); + + /* add the new dictionary to the linked list (of SharedIspellDict structures) */ + shdict->next = segment_info->shdict; + segment_info->shdict = shdict; + } + /* continue load affix list to a current backend process */ + + /* NISortAffixes is used AffixData. Therefore we need to copy pointer */ + info->dict.lenAffixData = shdict->dict.lenAffixData; + info->dict.nAffixData = shdict->dict.nAffixData; + info->dict.AffixData = shdict->dict.AffixData; + info->dict.Dictionary = shdict->dict.Dictionary; + NISortAffixes(&(info->dict)); + NIFinishBuild(&(info->dict)); + + /* STOP WORDS */ + + /* lookup if the stop words are already loaded in the shared segment, but only if there + * actually is a list */ + if (stopFile && *stopFile) + { + shstop = get_shared_stop_list(stopFile); + + /* load the stopwords if not yet defined */ + if (shstop == NULL) + { + readstoplist(stopFile, &stoplist, lowerstr); + + size = sizeStopList(&stoplist, stopFile); + if (size > segment_info->available) + elog(ERROR, "shared stoplist %s.stop needs %d B, only %ld B available", + stopFile, size, segment_info->available); + + /* fine, there's enough space - copy the stoplist */ + shstop = copyStopList(&stoplist, stopFile, size); + + /* add the new stopword list to the linked list (of SharedStopList structures) */ + shstop->next = segment_info->shstop; + segment_info->shstop = shstop; + } + } + + /* Now, fill the DictInfo structure for the backend (references to dictionary, + * stopwords and the filenames). */ + + info->shdict = shdict; + info->shstop = shstop; + info->lookup = GetCurrentTimestamp(); + + memcpy(info->dictFile, dictFile, strlen(dictFile) + 1); + memcpy(info->affixFile, dictFile, strlen(affFile)+ 1); + if (stopFile != NULL) + memcpy(info->stopFile, dictFile, strlen(stopFile) + 1); + else + memset(info->stopFile, 0, sizeof(info->stopFile)); } Datum dispell_init(PG_FUNCTION_ARGS); @@ -434,19 +404,19 @@ PG_FUNCTION_INFO_V1(dispell_list_stoplists); Datum dispell_reset(PG_FUNCTION_ARGS) { - LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); + LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); - segment_info->dict = NULL; - segment_info->stop = NULL; - segment_info->lastReset = GetCurrentTimestamp(); - segment_info->firstfree = ((char*)segment_info) + MAXALIGN(sizeof(SegmentInfo)); - segment_info->available = max_ispell_mem_size - (int)(segment_info->firstfree - (char*)segment_info); + segment_info->shdict = NULL; + segment_info->shstop = NULL; + segment_info->lastReset = GetCurrentTimestamp(); + segment_info->firstfree = ((char*) segment_info) + MAXALIGN(sizeof(SegmentInfo)); + segment_info->available = max_ispell_mem_size() - (int)(segment_info->firstfree - (char*) segment_info); - memset(segment_info->firstfree, 0, segment_info->available); + memset(segment_info->firstfree, 0, segment_info->available); - LWLockRelease(segment_info->lock); + LWLockRelease(segment_info->lock); - PG_RETURN_VOID(); + PG_RETURN_VOID(); } /* @@ -455,14 +425,14 @@ dispell_reset(PG_FUNCTION_ARGS) Datum dispell_mem_available(PG_FUNCTION_ARGS) { - int result = 0; - LWLockAcquire(segment_info->lock, LW_SHARED); + int result = 0; + LWLockAcquire(segment_info->lock, LW_SHARED); - result = segment_info->available; + result = segment_info->available; - LWLockRelease(segment_info->lock); + LWLockRelease(segment_info->lock); - PG_RETURN_INT32(result); + PG_RETURN_INT32(result); } /* @@ -471,208 +441,209 @@ dispell_mem_available(PG_FUNCTION_ARGS) Datum dispell_mem_used(PG_FUNCTION_ARGS) { - int result = 0; - LWLockAcquire(segment_info->lock, LW_SHARED); + int result = 0; + LWLockAcquire(segment_info->lock, LW_SHARED); - result = max_ispell_mem_size - segment_info->available; + result = max_ispell_mem_size() - segment_info->available; - LWLockRelease(segment_info->lock); + LWLockRelease(segment_info->lock); - PG_RETURN_INT32(result); + PG_RETURN_INT32(result); } /* * This initializes a (shared) dictionary for a backend. The function receives * a list of options specified in the CREATE TEXT SEARCH DICTIONARY with ispell * template (http://www.postgresql.org/docs/9.3/static/sql-createtsdictionary.html). - * + * * There are three allowed options: DictFile, AffFile, StopWords. The values * should match to filenames in `pg_config --sharedir` directory, ending with * .dict, .affix and .stop. - * + * * The StopWords parameter is optional, the two other are required. - * + * * If any of the filenames are incorrect, the call to init_shared_dict will fail. */ Datum dispell_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); - char *dictFile = NULL, *affFile = NULL, *stopFile = NULL; - bool affloaded = false, - dictloaded = false, - stoploaded = false; - ListCell *l; - - /* this is the result passed to dispell_lexize */ - DictInfo * info = (DictInfo *)palloc0(sizeof(DictInfo)); - - foreach(l, dictoptions) - { - DefElem *defel = (DefElem *) lfirst(l); - - if (pg_strcasecmp(defel->defname, "DictFile") == 0) - { - if (dictloaded) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple DictFile parameters"))); - dictFile = defGetString(defel); - dictloaded = true; - } - else if (pg_strcasecmp(defel->defname, "AffFile") == 0) - { - if (affloaded) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple AffFile parameters"))); - affFile = defGetString(defel); - affloaded = true; - } - else if (pg_strcasecmp(defel->defname, "StopWords") == 0) - { - if (stoploaded) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple StopWords parameters"))); - stopFile = defGetString(defel); - stoploaded = true; - } - else - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognized Ispell parameter: \"%s\"", - defel->defname))); - } - } - - if (!affloaded) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("missing AffFile parameter"))); - } - else if (! dictloaded) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("missing DictFile parameter"))); - } - - /* search if the dictionary is already initialized */ - LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); - - init_shared_dict(info, dictFile, affFile, stopFile); - - LWLockRelease(segment_info->lock); - - PG_RETURN_POINTER(info); + List *dictoptions = (List *) PG_GETARG_POINTER(0); + char *dictFile = NULL, + *affFile = NULL, + *stopFile = NULL; + bool affloaded = false, + dictloaded = false, + stoploaded = false; + ListCell *l; + + /* this is the result passed to dispell_lexize */ + DictInfo *info = (DictInfo *) palloc0(sizeof(DictInfo)); + + foreach(l, dictoptions) + { + DefElem *defel = (DefElem *) lfirst(l); + + if (pg_strcasecmp(defel->defname, "DictFile") == 0) + { + if (dictloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple DictFile parameters"))); + dictFile = defGetString(defel); + dictloaded = true; + } + else if (pg_strcasecmp(defel->defname, "AffFile") == 0) + { + if (affloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple AffFile parameters"))); + affFile = defGetString(defel); + affloaded = true; + } + else if (pg_strcasecmp(defel->defname, "StopWords") == 0) + { + if (stoploaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple StopWords parameters"))); + stopFile = defGetString(defel); + stoploaded = true; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized Ispell parameter: \"%s\"", + defel->defname))); + } + } + + if (!affloaded) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing AffFile parameter"))); + } + else if (!dictloaded) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing DictFile parameter"))); + } + + /* search if the dictionary is already initialized */ + LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); + + init_shared_dict(info, dictFile, affFile, stopFile); + + LWLockRelease(segment_info->lock); + + PG_RETURN_POINTER(info); } Datum dispell_lexize(PG_FUNCTION_ARGS) { - DictInfo * info = (DictInfo *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - int32 len = PG_GETARG_INT32(2); - char *txt; - TSLexeme *res; - TSLexeme *ptr, - *cptr; - - if (len <= 0) - PG_RETURN_POINTER(NULL); - - txt = lowerstr_with_len(in, len); - - /* need to lock the segment in shared mode */ - LWLockAcquire(segment_info->lock, LW_SHARED); - - /* do we need to reinit the dictionary? was the dict reset since the lookup */ - if (timestamp_cmp_internal(info->lookup, segment_info->lastReset) < 0) { - - /* relock in exclusive mode */ - LWLockRelease(segment_info->lock); - LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); - - elog(INFO, "reinitializing shared dict (segment reset)"); - - init_shared_dict(info, info->dictFile, info->affixFile, info->stopFile); - } - - res = SharedNINormalizeWord(info->dict, txt); - - /* nothing found :-( */ - if (res == NULL) { - LWLockRelease(segment_info->lock); - PG_RETURN_POINTER(NULL); - } - - ptr = cptr = res; - while (ptr->lexeme) - { - if (searchstoplist(&info->stop->list, ptr->lexeme)) - { - pfree(ptr->lexeme); - ptr->lexeme = NULL; - ptr++; - } - else - { - memcpy(cptr, ptr, sizeof(TSLexeme)); - cptr++; - ptr++; - } - } - cptr->lexeme = NULL; - - LWLockRelease(segment_info->lock); - - PG_RETURN_POINTER(res); + DictInfo *info = (DictInfo *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + char *txt; + TSLexeme *res; + TSLexeme *ptr, + *cptr; + + if (len <= 0) + PG_RETURN_POINTER(NULL); + + txt = lowerstr_with_len(in, len); + + /* need to lock the segment in shared mode */ + LWLockAcquire(segment_info->lock, LW_SHARED); + + /* do we need to reinit the dictionary? was the dict reset since the lookup */ + if (timestamp_cmp_internal(info->lookup, segment_info->lastReset) < 0) + { + /* relock in exclusive mode */ + LWLockRelease(segment_info->lock); + LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); + + init_shared_dict(info, info->dictFile, info->affixFile, info->stopFile); + } + + res = NINormalizeWord(&(info->dict), txt); + + /* nothing found :-( */ + if (res == NULL) + { + LWLockRelease(segment_info->lock); + PG_RETURN_POINTER(NULL); + } + + ptr = cptr = res; + while (ptr->lexeme) + { + if (info->shstop && searchstoplist(&(info->shstop->stop), ptr->lexeme)) + { + pfree(ptr->lexeme); + ptr->lexeme = NULL; + ptr++; + } + else + { + memcpy(cptr, ptr, sizeof(TSLexeme)); + cptr++; + ptr++; + } + } + cptr->lexeme = NULL; + + LWLockRelease(segment_info->lock); + + PG_RETURN_POINTER(res); } /* * This 'allocates' memory in the shared segment - i.e. the memory is * already allocated and this just gives nbytes to the caller. This is * used exclusively by the 'copy' methods defined below. - * + * * The memory is kept aligned thanks to MAXALIGN. Also, this assumes * the segment was locked properly by the caller. */ -static -char * shalloc(int bytes) { - - char * result; - bytes = MAXALIGN(bytes); - - /* This shouldn't really happen, as the init_shared_dict checks the size - * prior to copy. So let's just throw error here, as something went - * obviously wrong. */ - if (bytes > segment_info->available) - elog(ERROR, "the shared segment (shared ispell) is too small"); +static char * +shalloc(int bytes) +{ + char *result; + bytes = MAXALIGN(bytes); - result = segment_info->firstfree; - segment_info->firstfree += bytes; - segment_info->available -= bytes; + /* This shouldn't really happen, as the init_shared_dict checks the size + * prior to copy. So let's just throw error here, as something went + * obviously wrong. */ + if (bytes > segment_info->available) + elog(ERROR, "the shared segment (shared ispell) is too small"); - memset(result, 0, bytes); + result = segment_info->firstfree; + segment_info->firstfree += bytes; + segment_info->available -= bytes; - return result; + memset(result, 0, bytes); + return result; } /* * Copies a string into the shared segment - allocates memory and does memcpy. - * + * * TODO This assumes the string is properly terminated (should be guaranteed * by the code that reads and parses the dictionary / affixes). */ -static -char * shstrcpy(char * str) { - char * tmp = shalloc(strlen(str)+1); - memcpy(tmp, str, strlen(str)+1); - return tmp; +static char * +shstrcpy(char *str) +{ + char *tmp = shalloc(strlen(str) + 1); + memcpy(tmp, str, strlen(str) + 1); + return tmp; } /* @@ -684,498 +655,312 @@ char * shstrcpy(char * str) { /* SPNode - dictionary words */ -static -SPNode * copySPNode(SPNode * node) { - int i; - - SPNode * copy = NULL; - - if (node == NULL) { - return NULL; - } - - copy = (SPNode*)shalloc(offsetof(SPNode,data) + sizeof(SPNodeData) * node->length); - memcpy(copy, node, offsetof(SPNode,data) + sizeof(SPNodeData) * node->length); - - for (i = 0; i < node->length; i++) { - copy->data[i].node = copySPNode(node->data[i].node); - } - - return copy; -} - -static -int sizeSPNode(SPNode * node) { - - int i; - int size = 0; - - if (node == NULL) { - return 0; - } - - size = MAXALIGN(offsetof(SPNode,data) + sizeof(SPNodeData) * node->length); - - for (i = 0; i < node->length; i++) { - size += sizeSPNode(node->data[i].node); - } - - return size; -} - -/* RegisNode - simple regular expressions */ - -static -RegisNode * copyRegisNode(RegisNode * node) { - - RegisNode * copy = (RegisNode *)shalloc(offsetof(RegisNode, data) + node->len); - - memcpy(copy, node, offsetof(RegisNode, data) + node->len); - - if (node->next != NULL) { - copy->next = copyRegisNode(node->next); - } - - return copy; -} - -static -int sizeRegisNode(RegisNode * node) { - - int size = MAXALIGN(offsetof(RegisNode, data) + node->len); - - if (node->next != NULL) { - size += sizeRegisNode(node->next); - } - - return size; -} - -/* AFFIX - affix rules (simple, regis or full regular expressions). */ - -static -AFFIX * copyAffix(AFFIX * affix) { - - AFFIX * copy = (AFFIX*)shalloc(sizeof(AFFIX)); - - memcpy(copy, affix, sizeof(AFFIX)); - - copy->find = shstrcpy(affix->find); - copy->repl = shstrcpy(affix->repl); - - if (affix->isregis) { - copy->reg.regis.node = copyRegisNode(affix->reg.regis.node); - } else if (! affix->issimple) { - - /*FIXME Need to copy the regex_t properly. But a plain copy would not be - * safe tu use by multiple processes at the same time, so each backend - * needs to create it's own copy. */ - elog(ERROR, "This extension can't handle regex_t affixes yet."); - - } - - return copy; - -} - -static -int sizeAffix(AFFIX * affix) { - - int size = MAXALIGN(sizeof(AFFIX)); - - size += MAXALIGN(strlen(affix->find)+1); - size += MAXALIGN(strlen(affix->repl)+1); - - if (affix->isregis) { - size += sizeRegisNode(affix->reg.regis.node); - } else if (! affix->issimple) { - - /*FIXME Need to copy the regex_t properly. But would a plain copy be - * safe tu use by multiple processes at the same time? */ - elog(ERROR, "This extension can't handle regex_t affixes yet."); - - } - - return size; - -} - -/* AffixNode */ - -static -AffixNode * copyAffixNode(AffixNode * node) { - - int i, j; - AffixNode * copy = NULL; - - if (node == NULL) { - return NULL; - } - - copy = (AffixNode *)shalloc(offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length); - memcpy(copy, node, offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length); - - for (i = 0; i < node->length; i++) { +static SPNode * +copySPNode(SPNode *node) +{ + int i; + SPNode *copy = NULL; - copy->data[i].node = copyAffixNode(node->data[i].node); + if (node == NULL) + return NULL; - copy->data[i].val = node->data[i].val; - copy->data[i].naff = node->data[i].naff; - copy->data[i].aff = (AFFIX**)shalloc(sizeof(AFFIX*) * node->data[i].naff); + copy = (SPNode *) shalloc(offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); + memcpy(copy, node, offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); - for (j = 0; j < node->data[i].naff; j++) { - copy->data[i].aff[j] = copyAffix(node->data[i].aff[j]); - } - } + for (i = 0; i < node->length; i++) + copy->data[i].node = copySPNode(node->data[i].node); - return copy; + return copy; } -static -int sizeAffixNode(AffixNode * node) { - - int i, j; - int size = 0; - - if (node == NULL) { - return 0; - } - - size = MAXALIGN(offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length); +static int +sizeSPNode(SPNode *node) +{ + int i; + int size = 0; - for (i = 0; i < node->length; i++) { + if (node == NULL) + return 0; - size += sizeAffixNode(node->data[i].node); - size += MAXALIGN(sizeof(AFFIX*) * node->data[i].naff); + size = MAXALIGN(offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); - for (j = 0; j < node->data[i].naff; j++) { - size += sizeAffix(node->data[i].aff[j]); - } - } + for (i = 0; i < node->length; i++) + size += sizeSPNode(node->data[i].node); - return size; + return size; } /* StopList */ -static -SharedStopList * copyStopList(StopList * list, char * stopFile, int size) { - - int i; - SharedStopList * copy = (SharedStopList *)shalloc(sizeof(SharedStopList)); - - copy->list.len = list->len; - copy->list.stop = (char**)shalloc(sizeof(char*) * list->len); - copy->stopFile = shstrcpy(stopFile); - copy->nbytes = size; - - for (i = 0; i < list->len; i++) { - copy->list.stop[i] = shstrcpy(list->stop[i]); - } - - return copy; -} - -static -int sizeStopList(StopList * list, char * stopFile) { - - int i; - int size = MAXALIGN(sizeof(SharedStopList)); +static SharedStopList * +copyStopList(StopList *list, char *stopFile, int size) +{ + int i; + SharedStopList *copy = (SharedStopList *) shalloc(sizeof(SharedStopList)); - size += MAXALIGN(sizeof(char*) * list->len); - size += MAXALIGN(strlen(stopFile) + 1); + copy->stop.len = list->len; + copy->stop.stop = (char **) shalloc(sizeof(char *) * list->len); + copy->stopFile = shstrcpy(stopFile); + copy->nbytes = size; - for (i = 0; i < list->len; i++) { - size += MAXALIGN(strlen(list->stop[i]) + 1); - } + for (i = 0; i < list->len; i++) + copy->stop.stop[i] = shstrcpy(list->stop[i]); - return size; + return copy; } -/* CMPDAffix (compound affixes?) */ - -static -int countCMPDAffixes(CMPDAffix * affixes) { - - /* there's at least one affix */ - int count = 1; - CMPDAffix * ptr = affixes; +static int +sizeStopList(StopList *list, char *stopFile) +{ + int i; + int size = MAXALIGN(sizeof(SharedStopList)); - /* the last one is marked with (affix == NULL) */ - while (ptr->affix) - { - ptr++; - count++; - } + size += MAXALIGN(sizeof(char *) * list->len); + size += MAXALIGN(strlen(stopFile) + 1); - return count; + for (i = 0; i < list->len; i++) + size += MAXALIGN(strlen(list->stop[i]) + 1); + return size; } /* * Performs deep copy of the dictionary into the shared memory segment. - * + * * It gets the populated Ispell Dictionary (dict) and copies all the data * using the 'copy' methods listed above. It also keeps the filenames so * that it's possible to lookup the dictionaries later. + * + * Function copies only word list. Affix list is loaded to a current process. */ -static -SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int size, int words) { - - int i, cnt; - - SharedIspellDict * copy = (SharedIspellDict*)shalloc(sizeof(SharedIspellDict)); - - copy->dictFile = shalloc(strlen(dictFile)+1); - copy->affixFile = shalloc(strlen(affixFile)+1); - - strcpy(copy->dictFile, dictFile); - strcpy(copy->affixFile, affixFile); - - copy->naffixes = dict->naffixes; - - copy->Affix = (AFFIX*)shalloc(sizeof(AFFIX) * dict->naffixes); - - copy->Suffix = copyAffixNode(dict->Suffix); - copy->Prefix = copyAffixNode(dict->Prefix); +static SharedIspellDict * +copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int size, int words) +{ + int i; - copy->Dictionary = copySPNode(dict->Dictionary); + SharedIspellDict *copy = (SharedIspellDict *) shalloc(sizeof(SharedIspellDict)); - /* copy affix data */ - copy->nAffixData = dict->nAffixData; - copy->AffixData = (char**)shalloc(sizeof(char*) * dict->nAffixData); - for (i = 0; i < copy->nAffixData; i++) { - copy->AffixData[i] = shstrcpy(dict->AffixData[i]); - } + copy->dictFile = shalloc(strlen(dictFile) + 1); + copy->affixFile = shalloc(strlen(affixFile) + 1); - /* copy compound affixes (there's at least one) */ - cnt = countCMPDAffixes(dict->CompoundAffix); - copy->CompoundAffix = (CMPDAffix*)shalloc(sizeof(CMPDAffix) * cnt); - memcpy(copy->CompoundAffix, dict->CompoundAffix, sizeof(CMPDAffix) * cnt); + strcpy(copy->dictFile, dictFile); + strcpy(copy->affixFile, affixFile); - memcpy(copy->flagval, dict->flagval, 255); - copy->usecompound = dict->usecompound; + copy->dict.Dictionary = copySPNode(dict->Dictionary); - copy->nbytes = size; - copy->nwords = words; + /* copy affix data */ + copy->dict.nAffixData = dict->nAffixData; + copy->dict.AffixData = (char **) shalloc(sizeof(char *) * dict->nAffixData); + for (i = 0; i < copy->dict.nAffixData; i++) + copy->dict.AffixData[i] = shstrcpy(dict->AffixData[i]); - return copy; + copy->nbytes = size; + copy->nwords = words; + return copy; } /* - * Computes how much space is needed for a dictionary in the shared segment. + * Computes how much space is needed for a dictionary (word list) in the shared segment. + * + * Function does not compute space for a affix list since affix list is loaded + * to a current process. */ -static -int sizeIspellDict(IspellDict * dict, char * dictFile, char * affixFile) { - - int i; - int size = MAXALIGN(sizeof(SharedIspellDict)); - - size += MAXALIGN(strlen(dictFile)+1); - size += MAXALIGN(strlen(affixFile)+1); - - size += MAXALIGN(sizeof(AFFIX) * dict->naffixes); - - size += MAXALIGN(sizeAffixNode(dict->Suffix)); - size += MAXALIGN(sizeAffixNode(dict->Prefix)); - - size += sizeSPNode(dict->Dictionary); +static int +sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile) +{ + int i; + int size = MAXALIGN(sizeof(SharedIspellDict)); - /* copy affix data */ - size += MAXALIGN(sizeof(char*) * dict->nAffixData); - for (i = 0; i < dict->nAffixData; i++) { - size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1); - } + size += MAXALIGN(strlen(dictFile) + 1); + size += MAXALIGN(strlen(affixFile) + 1); - /* copy compound affixes (there's at least one) */ - size += MAXALIGN(sizeof(CMPDAffix) * countCMPDAffixes(dict->CompoundAffix)); + size += sizeSPNode(dict->Dictionary); - return size; + /* copy affix data */ + size += MAXALIGN(sizeof(char *) * dict->nAffixData); + for (i = 0; i < dict->nAffixData; i++) + size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1); + return size; } /* SRF function returning a list of shared dictionaries currently loaded in memory. */ Datum dispell_list_dicts(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; - TupleDesc tupdesc; - AttInMetadata *attinmeta; - SharedIspellDict * dict; - - /* init on the first call */ - if (SRF_IS_FIRSTCALL()) { - - MemoryContext oldcontext; - - funcctx = SRF_FIRSTCALL_INIT(); - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* get a shared lock and then the first dictionary */ - LWLockAcquire(segment_info->lock, LW_SHARED); - funcctx->user_fctx = segment_info->dict; - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); - - /* - * generate attribute metadata needed later to produce tuples from raw - * C strings - */ - attinmeta = TupleDescGetAttInMetadata(tupdesc); - funcctx->attinmeta = attinmeta; - funcctx->tuple_desc = tupdesc; - - /* switch back to the old context */ - MemoryContextSwitchTo(oldcontext); - - } - - /* init the context */ - funcctx = SRF_PERCALL_SETUP(); - - /* check if we have more data */ - if (funcctx->user_fctx != NULL) - { - HeapTuple tuple; - Datum result; - Datum values[5]; - bool nulls[5]; - - text *dictname, *affname; - - dict = (SharedIspellDict*)funcctx->user_fctx; - funcctx->user_fctx = dict->next; - - memset(nulls, 0, sizeof(nulls)); - - dictname = (text *) palloc(strlen(dict->dictFile) + VARHDRSZ); - affname = (text *) palloc(strlen(dict->affixFile) + VARHDRSZ); - - SET_VARSIZE(dictname, strlen(dict->dictFile) + VARHDRSZ); - SET_VARSIZE(affname, strlen(dict->affixFile) + VARHDRSZ); - - strcpy(VARDATA(dictname), dict->dictFile); - strcpy(VARDATA(affname), dict->affixFile); - - values[0] = PointerGetDatum(dictname); - values[1] = PointerGetDatum(affname); - values[2] = UInt32GetDatum(dict->nwords); - values[3] = UInt32GetDatum(dict->naffixes); - values[4] = UInt32GetDatum(dict->nbytes); - - /* Build and return the tuple. */ - tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); - - /* make the tuple into a datum */ - result = HeapTupleGetDatum(tuple); - - /* Here we want to return another item: */ - SRF_RETURN_NEXT(funcctx, result); - - } - else - { - /* release the lock */ - LWLockRelease(segment_info->lock); - - /* Here we are done returning items and just need to clean up: */ - SRF_RETURN_DONE(funcctx); - } - + FuncCallContext *funcctx; + TupleDesc tupdesc; + AttInMetadata *attinmeta; + SharedIspellDict *dict; + + /* init on the first call */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* get a shared lock and then the first dictionary */ + LWLockAcquire(segment_info->lock, LW_SHARED); + funcctx->user_fctx = segment_info->shdict; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + funcctx->attinmeta = attinmeta; + funcctx->tuple_desc = tupdesc; + + /* switch back to the old context */ + MemoryContextSwitchTo(oldcontext); + } + + /* init the context */ + funcctx = SRF_PERCALL_SETUP(); + + /* check if we have more data */ + if (funcctx->user_fctx != NULL) + { + HeapTuple tuple; + Datum result; + Datum values[5]; + bool nulls[5]; + + text *dictname, + *affname; + + dict = (SharedIspellDict *) funcctx->user_fctx; + funcctx->user_fctx = dict->next; + + memset(nulls, 0, sizeof(nulls)); + + dictname = (text *) palloc(strlen(dict->dictFile) + VARHDRSZ); + affname = (text *) palloc(strlen(dict->affixFile) + VARHDRSZ); + + SET_VARSIZE(dictname, strlen(dict->dictFile) + VARHDRSZ); + SET_VARSIZE(affname, strlen(dict->affixFile) + VARHDRSZ); + + strcpy(VARDATA(dictname), dict->dictFile); + strcpy(VARDATA(affname), dict->affixFile); + + values[0] = PointerGetDatum(dictname); + values[1] = PointerGetDatum(affname); + values[2] = UInt32GetDatum(dict->nwords); + values[3] = UInt32GetDatum(dict->dict.naffixes); + values[4] = UInt32GetDatum(dict->nbytes); + + /* Build and return the tuple. */ + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + /* make the tuple into a datum */ + result = HeapTupleGetDatum(tuple); + + /* Here we want to return another item: */ + SRF_RETURN_NEXT(funcctx, result); + } + else + { + /* release the lock */ + LWLockRelease(segment_info->lock); + + /* Here we are done returning items and just need to clean up: */ + SRF_RETURN_DONE(funcctx); + } } /* SRF function returning a list of shared stopword lists currently loaded in memory. */ Datum dispell_list_stoplists(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; - TupleDesc tupdesc; - AttInMetadata *attinmeta; - SharedStopList *stoplist; - - /* init on the first call */ - if (SRF_IS_FIRSTCALL()) { - - MemoryContext oldcontext; - - funcctx = SRF_FIRSTCALL_INIT(); - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* get a shared lock and then the first stop list */ - LWLockAcquire(segment_info->lock, LW_SHARED); - funcctx->user_fctx = segment_info->stop; - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); - - /* - * generate attribute metadata needed later to produce tuples from raw - * C strings - */ - attinmeta = TupleDescGetAttInMetadata(tupdesc); - funcctx->attinmeta = attinmeta; - funcctx->tuple_desc = tupdesc; - - /* switch back to the old context */ - MemoryContextSwitchTo(oldcontext); - - } - - /* init the context */ - funcctx = SRF_PERCALL_SETUP(); - - /* check if we have more data */ - if (funcctx->user_fctx != NULL) - { - HeapTuple tuple; - Datum result; - Datum values[3]; - bool nulls[3]; - - text *stopname; - - stoplist = (SharedStopList*)funcctx->user_fctx; - funcctx->user_fctx = stoplist->next; - - memset(nulls, 0, sizeof(nulls)); - - stopname = (text *) palloc(strlen(stoplist->stopFile) + VARHDRSZ); - - SET_VARSIZE(stopname, strlen(stoplist->stopFile) + VARHDRSZ); - - strcpy(VARDATA(stopname), stoplist->stopFile); - - values[0] = PointerGetDatum(stopname); - values[1] = UInt32GetDatum(stoplist->list.len); - values[2] = UInt32GetDatum(stoplist->nbytes); - - /* Build and return the tuple. */ - tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); - - /* make the tuple into a datum */ - result = HeapTupleGetDatum(tuple); - - /* Here we want to return another item: */ - SRF_RETURN_NEXT(funcctx, result); - - } - else - { - /* release the lock */ - LWLockRelease(segment_info->lock); - - /* Here we are done returning items and just need to clean up: */ - SRF_RETURN_DONE(funcctx); - } - + FuncCallContext *funcctx; + TupleDesc tupdesc; + AttInMetadata *attinmeta; + SharedStopList *stoplist; + + /* init on the first call */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* get a shared lock and then the first stop list */ + LWLockAcquire(segment_info->lock, LW_SHARED); + funcctx->user_fctx = segment_info->shstop; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + funcctx->attinmeta = attinmeta; + funcctx->tuple_desc = tupdesc; + + /* switch back to the old context */ + MemoryContextSwitchTo(oldcontext); + } + + /* init the context */ + funcctx = SRF_PERCALL_SETUP(); + + /* check if we have more data */ + if (funcctx->user_fctx != NULL) + { + HeapTuple tuple; + Datum result; + Datum values[3]; + bool nulls[3]; + + text *stopname; + + stoplist = (SharedStopList *) funcctx->user_fctx; + funcctx->user_fctx = stoplist->next; + + memset(nulls, 0, sizeof(nulls)); + + stopname = (text *) palloc(strlen(stoplist->stopFile) + VARHDRSZ); + + SET_VARSIZE(stopname, strlen(stoplist->stopFile) + VARHDRSZ); + + strcpy(VARDATA(stopname), stoplist->stopFile); + + values[0] = PointerGetDatum(stopname); + values[1] = UInt32GetDatum(stoplist->stop.len); + values[2] = UInt32GetDatum(stoplist->nbytes); + + /* Build and return the tuple. */ + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + /* make the tuple into a datum */ + result = HeapTupleGetDatum(tuple); + + /* Here we want to return another item: */ + SRF_RETURN_NEXT(funcctx, result); + } + else + { + /* release the lock */ + LWLockRelease(segment_info->lock); + + /* Here we are done returning items and just need to clean up: */ + SRF_RETURN_DONE(funcctx); + } } diff --git a/src/shared_ispell.h b/src/shared_ispell.h new file mode 100644 index 0000000..92de330 --- /dev/null +++ b/src/shared_ispell.h @@ -0,0 +1,71 @@ +#ifndef __SHARED_ISPELL_H__ +#define __SHARED_ISPELL_H__ + +#include "storage/lwlock.h" +#include "utils/timestamp.h" +#include "tsearch/dicts/spell.h" +#include "tsearch/ts_public.h" + +/* This segment is initialized in the first process that accesses it (see + * ispell_shmem_startup function). + */ +#define SEGMENT_NAME "shared_ispell" + +#define MAXLEN 255 + +typedef struct SharedIspellDict +{ + /* this is used for selecting the dictionary */ + char *dictFile; + char *affixFile; + int nbytes; + int nwords; + + /* next dictionary in the chain (essentially a linked list) */ + struct SharedIspellDict *next; + + IspellDict dict; +} SharedIspellDict; + +typedef struct SharedStopList +{ + char *stopFile; + int nbytes; + + struct SharedStopList *next; + + StopList stop; +} SharedStopList; + +/* used to allocate memory in the shared segment */ +typedef struct SegmentInfo +{ + LWLockId lock; + char *firstfree; /* first free address (always maxaligned) */ + size_t available; /* free space remaining at firstfree */ + Timestamp lastReset; /* last reset of the dictionary */ + + /* the shared segment (info and data) */ + SharedIspellDict *shdict; + SharedStopList *shstop; +} SegmentInfo; + +/* used to keep track of dictionary in each backend */ +typedef struct DictInfo +{ + Timestamp lookup; + + char dictFile[MAXLEN]; + char affixFile[MAXLEN]; + char stopFile[MAXLEN]; + + /* We split word list and affix list. + * In shdict we store a word list, word list will be stored in shared segment. + * In dict we store an affix list in each process. + */ + SharedIspellDict *shdict; + IspellDict dict; + SharedStopList *shstop; +} DictInfo; + +#endif \ No newline at end of file diff --git a/src/spell.c b/src/spell.c deleted file mode 100644 index d233247..0000000 --- a/src/spell.c +++ /dev/null @@ -1,647 +0,0 @@ -/*------------------------------------------------------------------------- - * - * spell.c - * - * Normalizing word with ISpell (in shared segment). Mostly a slightly - * copy of the spell.c code, modified so that it works with SharedIspellDict - * instead of plain IspellDict. - * - * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group - * Copyright (c) 2011, Tomas Vondra - * - * IDENTIFICATION - * src/spell.c (a slightly modified copy of src/backend/tsearch/spell.c) - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "spell.h" - -#define MAX_NORM 1024 -#define MAXNORMLEN 256 - -#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) - -static int -FindWord(SharedIspellDict *Conf, const char *word, int affixflag, int flag) -{ - SPNode *node = Conf->Dictionary; - SPNodeData *StopLow, - *StopHigh, - *StopMiddle; - const uint8 *ptr = (const uint8 *) word; - - flag &= FF_DICTFLAGMASK; - - while (node && *ptr) - { - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if (StopMiddle->val == *ptr) - { - if (*(ptr + 1) == '\0' && StopMiddle->isword) - { - if (flag == 0) - { - if (StopMiddle->compoundflag & FF_COMPOUNDONLY) - return 0; - } - else if ((flag & StopMiddle->compoundflag) == 0) - return 0; - - if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL)) - return 1; - } - node = StopMiddle->node; - ptr++; - break; - } - else if (StopMiddle->val < *ptr) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - if (StopLow >= StopHigh) - break; - } - return 0; -} - -static AffixNodeData * -FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) -{ - AffixNodeData *StopLow, - *StopHigh, - *StopMiddle; - uint8 symbol; - - if (node->isvoid) - { /* search void affixes */ - if (node->data->naff) - return node->data; - node = node->data->node; - } - - while (node && *level < wrdlen) - { - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - symbol = GETWCHAR(word, wrdlen, *level, type); - - if (StopMiddle->val == symbol) - { - (*level)++; - if (StopMiddle->naff) - return StopMiddle; - node = StopMiddle->node; - break; - } - else if (StopMiddle->val < symbol) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - if (StopLow >= StopHigh) - break; - } - return NULL; -} - -static char * -CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen) -{ - /* - * Check compound allow flags - */ - - if (flagflags == 0) - { - if (Affix->flagflags & FF_COMPOUNDONLY) - return NULL; - } - else if (flagflags & FF_COMPOUNDBEGIN) - { - if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG) - return NULL; - if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0) - if (Affix->type == FF_SUFFIX) - return NULL; - } - else if (flagflags & FF_COMPOUNDMIDDLE) - { - if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 || - (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)) - return NULL; - } - else if (flagflags & FF_COMPOUNDLAST) - { - if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG) - return NULL; - if ((Affix->flagflags & FF_COMPOUNDLAST) == 0) - if (Affix->type == FF_PREFIX) - return NULL; - } - - /* - * make replace pattern of affix - */ - if (Affix->type == FF_SUFFIX) - { - strcpy(newword, word); - strcpy(newword + len - Affix->replen, Affix->find); - if (baselen) /* store length of non-changed part of word */ - *baselen = len - Affix->replen; - } - else - { - /* - * if prefix is a all non-chaged part's length then all word contains - * only prefix and suffix, so out - */ - if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) - return NULL; - strcpy(newword, Affix->find); - strcat(newword, word + Affix->replen); - } - - /* - * check resulting word - */ - if (Affix->issimple) - return newword; - else if (Affix->isregis) - { - if (RS_execute(&(Affix->reg.regis), newword)) - return newword; - } - else - { - int err; - pg_wchar *data; - size_t data_len; - int newword_len; - - /* Convert data string to wide characters */ - newword_len = strlen(newword); - data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); - data_len = pg_mb2wchar_with_len(newword, data, newword_len); - - if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) - { - pfree(data); - return newword; - } - pfree(data); - } - - return NULL; -} - -static int -addToResult(char **forms, char **cur, char *word) -{ - if (cur - forms >= MAX_NORM - 1) - return 0; - if (forms == cur || strcmp(word, *(cur - 1)) != 0) - { - *cur = pstrdup(word); - *(cur + 1) = NULL; - return 1; - } - - return 0; -} - -static char ** -NormalizeSubWord(SharedIspellDict *Conf, char *word, int flag) -{ - AffixNodeData *suffix = NULL, - *prefix = NULL; - int slevel = 0, - plevel = 0; - int wrdlen = strlen(word), - swrdlen; - char **forms; - char **cur; - char newword[2 * MAXNORMLEN] = ""; - char pnewword[2 * MAXNORMLEN] = ""; - AffixNode *snode = Conf->Suffix, - *pnode; - int i, - j; - - if (wrdlen > MAXNORMLEN) - return NULL; - cur = forms = (char **) palloc(MAX_NORM * sizeof(char *)); - *cur = NULL; - - - /* Check that the word itself is normal form */ - if (FindWord(Conf, word, 0, flag)) - { - *cur = pstrdup(word); - cur++; - *cur = NULL; - } - - /* Find all other NORMAL forms of the 'word' (check only prefix) */ - pnode = Conf->Prefix; - plevel = 0; - while (pnode) - { - prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); - if (!prefix) - break; - for (j = 0; j < prefix->naff; j++) - { - if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) - { - /* prefix success */ - if (FindWord(Conf, newword, prefix->aff[j]->flag, flag)) - cur += addToResult(forms, cur, newword); - } - } - pnode = prefix->node; - } - - /* - * Find all other NORMAL forms of the 'word' (check suffix and then - * prefix) - */ - while (snode) - { - int baselen = 0; - - /* find possible suffix */ - suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); - if (!suffix) - break; - /* foreach suffix check affix */ - for (i = 0; i < suffix->naff; i++) - { - if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) - { - /* suffix success */ - if (FindWord(Conf, newword, suffix->aff[i]->flag, flag)) - cur += addToResult(forms, cur, newword); - - /* now we will look changed word with prefixes */ - pnode = Conf->Prefix; - plevel = 0; - swrdlen = strlen(newword); - while (pnode) - { - prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); - if (!prefix) - break; - for (j = 0; j < prefix->naff; j++) - { - if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) - { - /* prefix success */ - int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? - 0 : prefix->aff[j]->flag; - - if (FindWord(Conf, pnewword, ff, flag)) - cur += addToResult(forms, cur, pnewword); - } - } - pnode = prefix->node; - } - } - } - - snode = suffix->node; - } - - if (cur == forms) - { - pfree(forms); - return (NULL); - } - return (forms); -} - -typedef struct SplitVar -{ - int nstem; - int lenstem; - char **stem; - struct SplitVar *next; -} SplitVar; - -static int -CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) -{ - bool issuffix; - - if (CheckInPlace) - { - while ((*ptr)->affix) - { - if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) - { - len = (*ptr)->len; - issuffix = (*ptr)->issuffix; - (*ptr)++; - return (issuffix) ? len : 0; - } - (*ptr)++; - } - } - else - { - char *affbegin; - - while ((*ptr)->affix) - { - if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) - { - len = (*ptr)->len + (affbegin - word); - issuffix = (*ptr)->issuffix; - (*ptr)++; - return (issuffix) ? len : 0; - } - (*ptr)++; - } - } - return -1; -} - -static SplitVar * -CopyVar(SplitVar *s, int makedup) -{ - SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar)); - - v->next = NULL; - if (s) - { - int i; - - v->lenstem = s->lenstem; - v->stem = (char **) palloc(sizeof(char *) * v->lenstem); - v->nstem = s->nstem; - for (i = 0; i < s->nstem; i++) - v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i]; - } - else - { - v->lenstem = 16; - v->stem = (char **) palloc(sizeof(char *) * v->lenstem); - v->nstem = 0; - } - return v; -} - -static void -AddStem(SplitVar *v, char *word) -{ - if (v->nstem >= v->lenstem) - { - v->lenstem *= 2; - v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem); - } - - v->stem[v->nstem] = word; - v->nstem++; -} - -static SplitVar * -SplitToVariants(SharedIspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos) -{ - SplitVar *var = NULL; - SPNodeData *StopLow, - *StopHigh, - *StopMiddle = NULL; - SPNode *node = (snode) ? snode : Conf->Dictionary; - int level = (snode) ? minpos : startpos; /* recursive - * minpos==level */ - int lenaff; - CMPDAffix *caff; - char *notprobed; - int compoundflag = 0; - - notprobed = (char *) palloc(wordlen); - memset(notprobed, 1, wordlen); - var = CopyVar(orig, 1); - - while (level < wordlen) - { - /* find word with epenthetic or/and compound affix */ - caff = Conf->CompoundAffix; - while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0) - { - /* - * there is one of compound affixes, so check word for existings - */ - char buf[MAXNORMLEN]; - char **subres; - - lenaff = level - startpos + lenaff; - - if (!notprobed[startpos + lenaff - 1]) - continue; - - if (level + lenaff - 1 <= minpos) - continue; - - if (lenaff >= MAXNORMLEN) - continue; /* skip too big value */ - if (lenaff > 0) - memcpy(buf, word + startpos, lenaff); - buf[lenaff] = '\0'; - - if (level == 0) - compoundflag = FF_COMPOUNDBEGIN; - else if (level == wordlen - 1) - compoundflag = FF_COMPOUNDLAST; - else - compoundflag = FF_COMPOUNDMIDDLE; - subres = NormalizeSubWord(Conf, buf, compoundflag); - if (subres) - { - /* Yes, it was a word from dictionary */ - SplitVar *new = CopyVar(var, 0); - SplitVar *ptr = var; - char **sptr = subres; - - notprobed[startpos + lenaff - 1] = 0; - - while (*sptr) - { - AddStem(new, *sptr); - sptr++; - } - pfree(subres); - - while (ptr->next) - ptr = ptr->next; - ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); - - pfree(new->stem); - pfree(new); - } - } - - if (!node) - break; - - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if (StopMiddle->val == ((uint8 *) (word))[level]) - break; - else if (StopMiddle->val < ((uint8 *) (word))[level]) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - - if (StopLow < StopHigh) - { - if (level == FF_COMPOUNDBEGIN) - compoundflag = FF_COMPOUNDBEGIN; - else if (level == wordlen - 1) - compoundflag = FF_COMPOUNDLAST; - else - compoundflag = FF_COMPOUNDMIDDLE; - - /* find infinitive */ - if (StopMiddle->isword && - (StopMiddle->compoundflag & compoundflag) && - notprobed[level]) - { - /* ok, we found full compoundallowed word */ - if (level > minpos) - { - /* and its length more than minimal */ - if (wordlen == level + 1) - { - /* well, it was last word */ - AddStem(var, pnstrdup(word + startpos, wordlen - startpos)); - pfree(notprobed); - return var; - } - else - { - /* then we will search more big word at the same point */ - SplitVar *ptr = var; - - while (ptr->next) - ptr = ptr->next; - ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level); - /* we can find next word */ - level++; - AddStem(var, pnstrdup(word + startpos, level - startpos)); - node = Conf->Dictionary; - startpos = level; - continue; - } - } - } - node = StopMiddle->node; - } - else - node = NULL; - level++; - } - - AddStem(var, pnstrdup(word + startpos, wordlen - startpos)); - pfree(notprobed); - return var; -} - -static void -addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant) -{ - if (*lres == NULL) - *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme)); - - if (*lcur - *lres < MAX_NORM - 1) - { - (*lcur)->lexeme = word; - (*lcur)->flags = flags; - (*lcur)->nvariant = NVariant; - (*lcur)++; - (*lcur)->lexeme = NULL; - } -} - -TSLexeme * -SharedNINormalizeWord(SharedIspellDict *Conf, char *word) -{ - char **res; - TSLexeme *lcur = NULL, - *lres = NULL; - uint16 NVariant = 1; - - res = NormalizeSubWord(Conf, word, 0); - - if (res) - { - char **ptr = res; - - while (*ptr && (lcur - lres) < MAX_NORM) - { - addNorm(&lres, &lcur, *ptr, 0, NVariant++); - ptr++; - } - pfree(res); - } - - if (Conf->usecompound) - { - int wordlen = strlen(word); - SplitVar *ptr, - *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1); - int i; - - while (var) - { - if (var->nstem > 1) - { - char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST); - - if (subres) - { - char **subptr = subres; - - while (*subptr) - { - for (i = 0; i < var->nstem - 1; i++) - { - addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant); - } - - addNorm(&lres, &lcur, *subptr, 0, NVariant); - subptr++; - NVariant++; - } - - pfree(subres); - var->stem[0] = NULL; - pfree(var->stem[var->nstem - 1]); - } - } - - for (i = 0; i < var->nstem && var->stem[i]; i++) - pfree(var->stem[i]); - ptr = var->next; - pfree(var->stem); - pfree(var); - var = ptr; - } - } - - return lres; -} diff --git a/src/spell.h b/src/spell.h deleted file mode 100644 index b559fc2..0000000 --- a/src/spell.h +++ /dev/null @@ -1,71 +0,0 @@ -/*------------------------------------------------------------------------- - * - * spell.h - * - * Declarations for ISpell dictionary - * - * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group - * - * src/include/tsearch/dicts/spell.h - * - *------------------------------------------------------------------------- - */ - -#ifndef __SHARED_SPELL_H__ -#define __SHARED_SPELL_H__ - -#include "regex/regex.h" -#include "tsearch/dicts/regis.h" -#include "tsearch/ts_public.h" -#include "storage/lwlock.h" -#include "tsearch/dicts/spell.h" - -typedef struct SharedIspellDict -{ - - /* this is used for selecting the dictionary */ - char * dictFile; - char * affixFile; - - int nbytes; - int nwords; - - /* next dictionary in the chain (essentially a linked list) */ - struct SharedIspellDict * next; - - /* the copied fields */ - int naffixes; - AFFIX *Affix; - - AffixNode *Suffix; - AffixNode *Prefix; - - SPNode *Dictionary; - char **AffixData; /* list of flags (characters) used in the dictionary */ - - /* FIXME lenAffixData and nAffixData seems to be the same thing */ - int lenAffixData; /* length of the affix array */ - int nAffixData; /* number of affix data items */ - - CMPDAffix * CompoundAffix; - - unsigned char flagval[256]; - bool usecompound; - -} SharedIspellDict; - -typedef struct SharedStopList -{ - - char * stopFile; - - int nbytes; - - StopList list; - struct SharedStopList * next; - -} SharedStopList; - -TSLexeme *SharedNINormalizeWord(SharedIspellDict *Conf, char *word); - -#endif \ No newline at end of file From f89418a13c9e3574cf1ef5297fc6b5567cd31412 Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Fri, 12 Feb 2016 13:34:16 +0300 Subject: [PATCH 02/29] README.md was corrected --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 55a7195..a0a436a 100644 --- a/README.md +++ b/README.md @@ -40,9 +40,6 @@ the config file (or update the current values) # libraries to load shared_preload_libraries = 'shared_ispell' - # known GUC prefixes - custom_variable_classes = 'shared_ispell' - # config of the shared memory shared_ispell.max_size = 32MB From d1b549528114a87d79784f9a9cf447ee85fa5d92 Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Fri, 12 Feb 2016 19:17:13 +0300 Subject: [PATCH 03/29] Tests was corrected. Fixed bugs with the functions shared_ispell_reset(), shared_ispell_dicts(), shared_ispell_stoplists(). --- Makefile | 9 ++- README.md | 3 + expected/shared_ispell.out | 70 ++++++++++++++++++- postgresql.conf | 2 + ...ell--1.1.0.sql => shared_ispell--1.1.0.sql | 0 sql/shared_ispell.sql | 20 +++++- src/shared_ispell.c | 55 ++++++++++----- 7 files changed, 138 insertions(+), 21 deletions(-) create mode 100644 postgresql.conf rename sql/shared_ispell--1.1.0.sql => shared_ispell--1.1.0.sql (100%) diff --git a/Makefile b/Makefile index a238f02..fe0061a 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,12 @@ MODULE_big = shared_ispell OBJS = src/shared_ispell.o EXTENSION = shared_ispell -DATA = sql/shared_ispell--1.1.0.sql +DATA = shared_ispell--1.1.0.sql REGRESS = shared_ispell +EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/postgresql.conf + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) @@ -17,4 +19,7 @@ subdir = contrib/shared_ispell top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk -endif \ No newline at end of file +endif + +installcheck: + @echo "installcheck is disabled" diff --git a/README.md b/README.md index a0a436a..55a7195 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,9 @@ the config file (or update the current values) # libraries to load shared_preload_libraries = 'shared_ispell' + # known GUC prefixes + custom_variable_classes = 'shared_ispell' + # config of the shared memory shared_ispell.max_size = 32MB diff --git a/expected/shared_ispell.out b/expected/shared_ispell.out index bd80ff0..e008cee 100644 --- a/expected/shared_ispell.out +++ b/expected/shared_ispell.out @@ -1,9 +1,22 @@ CREATE EXTENSION shared_ispell; +SELECT shared_ispell_mem_available(); + shared_ispell_mem_available +----------------------------- + 1048528 +(1 row) + +SELECT shared_ispell_mem_used(); + shared_ispell_mem_used +------------------------ + 48 +(1 row) + -- Test ISpell dictionary with ispell affix file CREATE TEXT SEARCH DICTIONARY shared_ispell ( Template=shared_ispell, DictFile=ispell_sample, - AffFile=ispell_sample + AffFile=ispell_sample, + Stopwords=english ); SELECT ts_lexize('shared_ispell', 'skies'); ts_lexize @@ -95,6 +108,18 @@ SELECT ts_lexize('shared_ispell', 'footballyklubber'); {foot,ball,klubber} (1 row) +SELECT shared_ispell_mem_available(); + shared_ispell_mem_available +----------------------------- + 980312 +(1 row) + +SELECT shared_ispell_mem_used(); + shared_ispell_mem_used +------------------------ + 68264 +(1 row) + -- Test ISpell dictionary with hunspell affix file CREATE TEXT SEARCH DICTIONARY shared_hunspell ( Template=shared_ispell, @@ -191,3 +216,46 @@ SELECT ts_lexize('shared_hunspell', 'footballyklubber'); {foot,ball,klubber} (1 row) +SELECT shared_ispell_mem_available(); + shared_ispell_mem_available +----------------------------- + 914208 +(1 row) + +SELECT shared_ispell_mem_used(); + shared_ispell_mem_used +------------------------ + 134368 +(1 row) + +SELECT * FROM shared_ispell_dicts(); + dict_name | affix_name | words | affixes | bytes +---------------+-----------------+-------+---------+------- + ispell_sample | hunspell_sample | 8 | 0 | 66104 + ispell_sample | ispell_sample | 8 | 0 | 66104 +(2 rows) + +SELECT * FROM shared_ispell_stoplists(); + stop_name | words | bytes +-----------+-------+------- + english | 127 | 2112 +(1 row) + +SELECT shared_ispell_reset(); + shared_ispell_reset +--------------------- + +(1 row) + +SELECT shared_ispell_mem_available(); + shared_ispell_mem_available +----------------------------- + 1048528 +(1 row) + +SELECT shared_ispell_mem_used(); + shared_ispell_mem_used +------------------------ + 48 +(1 row) + diff --git a/postgresql.conf b/postgresql.conf new file mode 100644 index 0000000..e3dcb7b --- /dev/null +++ b/postgresql.conf @@ -0,0 +1,2 @@ +shared_preload_libraries = 'shared_ispell' +shared_ispell.max_size = 1MB diff --git a/sql/shared_ispell--1.1.0.sql b/shared_ispell--1.1.0.sql similarity index 100% rename from sql/shared_ispell--1.1.0.sql rename to shared_ispell--1.1.0.sql diff --git a/sql/shared_ispell.sql b/sql/shared_ispell.sql index 888df98..ee56c2a 100644 --- a/sql/shared_ispell.sql +++ b/sql/shared_ispell.sql @@ -1,10 +1,14 @@ CREATE EXTENSION shared_ispell; +SELECT shared_ispell_mem_available(); +SELECT shared_ispell_mem_used(); + -- Test ISpell dictionary with ispell affix file CREATE TEXT SEARCH DICTIONARY shared_ispell ( Template=shared_ispell, DictFile=ispell_sample, - AffFile=ispell_sample + AffFile=ispell_sample, + Stopwords=english ); SELECT ts_lexize('shared_ispell', 'skies'); @@ -24,6 +28,9 @@ SELECT ts_lexize('shared_ispell', 'footballklubber'); SELECT ts_lexize('shared_ispell', 'ballyklubber'); SELECT ts_lexize('shared_ispell', 'footballyklubber'); +SELECT shared_ispell_mem_available(); +SELECT shared_ispell_mem_used(); + -- Test ISpell dictionary with hunspell affix file CREATE TEXT SEARCH DICTIONARY shared_hunspell ( Template=shared_ispell, @@ -47,3 +54,14 @@ SELECT ts_lexize('shared_hunspell', 'footklubber'); SELECT ts_lexize('shared_hunspell', 'footballklubber'); SELECT ts_lexize('shared_hunspell', 'ballyklubber'); SELECT ts_lexize('shared_hunspell', 'footballyklubber'); + +SELECT shared_ispell_mem_available(); +SELECT shared_ispell_mem_used(); + +SELECT * FROM shared_ispell_dicts(); +SELECT * FROM shared_ispell_stoplists(); + +SELECT shared_ispell_reset(); + +SELECT shared_ispell_mem_available(); +SELECT shared_ispell_mem_used(); \ No newline at end of file diff --git a/src/shared_ispell.c b/src/shared_ispell.c index db9cfec..eb7d397 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -57,10 +57,10 @@ #include "tsearch/ts_locale.h" #include "access/htup_details.h" #include "funcapi.h" +#include "utils/builtins.h" #include "shared_ispell.h" #include "tsearch/dicts/spell.h" -#include "regex/regguts.h" PG_MODULE_MAGIC; @@ -247,6 +247,28 @@ get_shared_stop_list(char *stop) return NULL; } +/* + * Cleares IspellDict fields which are used for store affix list. + */ +static void +clean_dict_affix(IspellDict *dict) +{ + dict->maffixes = 0; + dict->naffixes = 0; + dict->Affix = NULL; + + dict->Suffix = NULL; + dict->Prefix = NULL; + + dict->AffixData = NULL; + dict->lenAffixData = 0; + dict->nAffixData = 0; + + dict->CompoundAffix = NULL; + + dict->avail = 0; +} + /* * Initializes the dictionary for use in backends - checks whether such dictionary * and list of stopwords is already used, and if not then parses it and loads it into @@ -278,6 +300,9 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) /* lookup if the dictionary (words and affixes) is already loaded in the shared segment */ shdict = get_shared_dict(dictFile, affFile); + /* clear dict affix sources */ + clean_dict_affix(&(info->dict)); + /* load affix list */ NIStartBuild(&(info->dict)); NIImportAffixes(&(info->dict), get_tsearch_config_filename(affFile, "affix")); @@ -397,7 +422,7 @@ PG_FUNCTION_INFO_V1(dispell_list_stoplists); /* * Resets the shared dictionary memory, i.e. removes all the dictionaries. This - * is the only way to remove dictionaries from the memory - either when when + * is the only way to remove dictionaries from the memory - either when * a dictionary is no longer needed or needs to be reloaded (e.g. to update * list of words / affixes). */ @@ -791,7 +816,6 @@ dispell_list_dicts(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; TupleDesc tupdesc; - AttInMetadata *attinmeta; SharedIspellDict *dict; /* init on the first call */ @@ -817,8 +841,7 @@ dispell_list_dicts(PG_FUNCTION_ARGS) * generate attribute metadata needed later to produce tuples from raw * C strings */ - attinmeta = TupleDescGetAttInMetadata(tupdesc); - funcctx->attinmeta = attinmeta; + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); funcctx->tuple_desc = tupdesc; /* switch back to the old context */ @@ -844,14 +867,14 @@ dispell_list_dicts(PG_FUNCTION_ARGS) memset(nulls, 0, sizeof(nulls)); - dictname = (text *) palloc(strlen(dict->dictFile) + VARHDRSZ); - affname = (text *) palloc(strlen(dict->affixFile) + VARHDRSZ); + dictname = cstring_to_text(dict->dictFile); + affname = cstring_to_text(dict->affixFile); - SET_VARSIZE(dictname, strlen(dict->dictFile) + VARHDRSZ); - SET_VARSIZE(affname, strlen(dict->affixFile) + VARHDRSZ); + // SET_VARSIZE(dictname, strlen(dict->dictFile) + VARHDRSZ); + // SET_VARSIZE(affname, strlen(dict->affixFile) + VARHDRSZ); - strcpy(VARDATA(dictname), dict->dictFile); - strcpy(VARDATA(affname), dict->affixFile); + // strcpy(dictname, dict->dictFile); + // strcpy(affname, dict->affixFile); values[0] = PointerGetDatum(dictname); values[1] = PointerGetDatum(affname); @@ -884,7 +907,6 @@ dispell_list_stoplists(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; TupleDesc tupdesc; - AttInMetadata *attinmeta; SharedStopList *stoplist; /* init on the first call */ @@ -910,8 +932,7 @@ dispell_list_stoplists(PG_FUNCTION_ARGS) * generate attribute metadata needed later to produce tuples from raw * C strings */ - attinmeta = TupleDescGetAttInMetadata(tupdesc); - funcctx->attinmeta = attinmeta; + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); funcctx->tuple_desc = tupdesc; /* switch back to the old context */ @@ -936,11 +957,11 @@ dispell_list_stoplists(PG_FUNCTION_ARGS) memset(nulls, 0, sizeof(nulls)); - stopname = (text *) palloc(strlen(stoplist->stopFile) + VARHDRSZ); + stopname = cstring_to_text(stoplist->stopFile); - SET_VARSIZE(stopname, strlen(stoplist->stopFile) + VARHDRSZ); + // SET_VARSIZE(stopname, strlen(stoplist->stopFile) + VARHDRSZ); - strcpy(VARDATA(stopname), stoplist->stopFile); + // strcpy(VARDATA(stopname), stoplist->stopFile); values[0] = PointerGetDatum(stopname); values[1] = UInt32GetDatum(stoplist->stop.len); From abc6944f46d93561ba74a6d72ab936b9890467f6 Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Fri, 12 Feb 2016 19:21:45 +0300 Subject: [PATCH 04/29] shared_ispell.c fixes --- README.md | 3 --- src/shared_ispell.c | 10 ---------- 2 files changed, 13 deletions(-) diff --git a/README.md b/README.md index 55a7195..a0a436a 100644 --- a/README.md +++ b/README.md @@ -40,9 +40,6 @@ the config file (or update the current values) # libraries to load shared_preload_libraries = 'shared_ispell' - # known GUC prefixes - custom_variable_classes = 'shared_ispell' - # config of the shared memory shared_ispell.max_size = 32MB diff --git a/src/shared_ispell.c b/src/shared_ispell.c index eb7d397..e627443 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -870,12 +870,6 @@ dispell_list_dicts(PG_FUNCTION_ARGS) dictname = cstring_to_text(dict->dictFile); affname = cstring_to_text(dict->affixFile); - // SET_VARSIZE(dictname, strlen(dict->dictFile) + VARHDRSZ); - // SET_VARSIZE(affname, strlen(dict->affixFile) + VARHDRSZ); - - // strcpy(dictname, dict->dictFile); - // strcpy(affname, dict->affixFile); - values[0] = PointerGetDatum(dictname); values[1] = PointerGetDatum(affname); values[2] = UInt32GetDatum(dict->nwords); @@ -959,10 +953,6 @@ dispell_list_stoplists(PG_FUNCTION_ARGS) stopname = cstring_to_text(stoplist->stopFile); - // SET_VARSIZE(stopname, strlen(stoplist->stopFile) + VARHDRSZ); - - // strcpy(VARDATA(stopname), stoplist->stopFile); - values[0] = PointerGetDatum(stopname); values[1] = UInt32GetDatum(stoplist->stop.len); values[2] = UInt32GetDatum(stoplist->nbytes); From 6f80f07bfed78bccb50fa1cda5725b73c3b39c04 Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Wed, 24 Feb 2016 16:26:53 +0300 Subject: [PATCH 05/29] correct installcheck in Makefile --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fe0061a..cf91b65 100644 --- a/Makefile +++ b/Makefile @@ -21,5 +21,4 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif -installcheck: - @echo "installcheck is disabled" +installcheck:; From 99f6e49bbdac2dc4dfc3a2b713bf481431089a2e Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Tue, 1 Mar 2016 14:06:25 +0300 Subject: [PATCH 06/29] Added Author part for README --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index a0a436a..b6d359e 100644 --- a/README.md +++ b/README.md @@ -133,3 +133,20 @@ use this prepared data). db=# SELECT shared_ispell_reset(); That's all for now ... + +Changes from original version +----------------------------- +The original version of this module located in the Tomas Vondra's +[GitHub](https://github.com/tvondra/shared_ispell). That version does not handle +affixes that require full regular expressions (regex_t, implemented in regex.h). + +This version of the module can handle that affixes with full regular +exressions. To handle it the module loads and stores affix files in each +sessions. The affix list is tiny and takes a little time and memory to parse. +Actually this is Tomas +[idea](http://www.postgresql.org/message-id/56A5F3D5.9030702@2ndquadrant.com), +but there is not related code in the GitHub. + +Author +------ +Tomas Vondra [GitHub](https://github.com/tvondra) \ No newline at end of file From d6e29e7295a3e7d3aab583ccfb53e9c84d849d32 Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Fri, 18 Mar 2016 17:10:34 +0300 Subject: [PATCH 07/29] shared_ispell uses new version of IspellDict struct --- expected/shared_ispell.out | 82 ++++++++------------------------------ sql/shared_ispell.sql | 19 ++------- src/shared_ispell.c | 19 ++++++--- 3 files changed, 35 insertions(+), 85 deletions(-) diff --git a/expected/shared_ispell.out b/expected/shared_ispell.out index e008cee..68e59c9 100644 --- a/expected/shared_ispell.out +++ b/expected/shared_ispell.out @@ -1,16 +1,4 @@ CREATE EXTENSION shared_ispell; -SELECT shared_ispell_mem_available(); - shared_ispell_mem_available ------------------------------ - 1048528 -(1 row) - -SELECT shared_ispell_mem_used(); - shared_ispell_mem_used ------------------------- - 48 -(1 row) - -- Test ISpell dictionary with ispell affix file CREATE TEXT SEARCH DICTIONARY shared_ispell ( Template=shared_ispell, @@ -60,12 +48,6 @@ SELECT ts_lexize('shared_ispell', 'rebooking'); {booking,book} (1 row) -SELECT ts_lexize('shared_ispell', 'rebook'); - ts_lexize ------------ - -(1 row) - SELECT ts_lexize('shared_ispell', 'unbookings'); ts_lexize ----------- @@ -108,18 +90,6 @@ SELECT ts_lexize('shared_ispell', 'footballyklubber'); {foot,ball,klubber} (1 row) -SELECT shared_ispell_mem_available(); - shared_ispell_mem_available ------------------------------ - 980312 -(1 row) - -SELECT shared_ispell_mem_used(); - shared_ispell_mem_used ------------------------- - 68264 -(1 row) - -- Test ISpell dictionary with hunspell affix file CREATE TEXT SEARCH DICTIONARY shared_hunspell ( Template=shared_ispell, @@ -168,12 +138,6 @@ SELECT ts_lexize('shared_hunspell', 'rebooking'); {booking,book} (1 row) -SELECT ts_lexize('shared_hunspell', 'rebook'); - ts_lexize ------------ - -(1 row) - SELECT ts_lexize('shared_hunspell', 'unbookings'); ts_lexize ----------- @@ -216,29 +180,17 @@ SELECT ts_lexize('shared_hunspell', 'footballyklubber'); {foot,ball,klubber} (1 row) -SELECT shared_ispell_mem_available(); - shared_ispell_mem_available ------------------------------ - 914208 -(1 row) - -SELECT shared_ispell_mem_used(); - shared_ispell_mem_used ------------------------- - 134368 -(1 row) - -SELECT * FROM shared_ispell_dicts(); - dict_name | affix_name | words | affixes | bytes ----------------+-----------------+-------+---------+------- - ispell_sample | hunspell_sample | 8 | 0 | 66104 - ispell_sample | ispell_sample | 8 | 0 | 66104 +SELECT dict_name, affix_name, words, affixes FROM shared_ispell_dicts(); + dict_name | affix_name | words | affixes +---------------+-----------------+-------+--------- + ispell_sample | hunspell_sample | 8 | 7 + ispell_sample | ispell_sample | 8 | 7 (2 rows) -SELECT * FROM shared_ispell_stoplists(); - stop_name | words | bytes ------------+-------+------- - english | 127 | 2112 +SELECT stop_name, words FROM shared_ispell_stoplists(); + stop_name | words +-----------+------- + english | 127 (1 row) SELECT shared_ispell_reset(); @@ -247,15 +199,15 @@ SELECT shared_ispell_reset(); (1 row) -SELECT shared_ispell_mem_available(); - shared_ispell_mem_available ------------------------------ - 1048528 +SELECT ts_lexize('shared_ispell', 'skies'); + ts_lexize +----------- + {sky} (1 row) -SELECT shared_ispell_mem_used(); - shared_ispell_mem_used ------------------------- - 48 +SELECT ts_lexize('shared_hunspell', 'skies'); + ts_lexize +----------- + {sky} (1 row) diff --git a/sql/shared_ispell.sql b/sql/shared_ispell.sql index ee56c2a..e791399 100644 --- a/sql/shared_ispell.sql +++ b/sql/shared_ispell.sql @@ -1,8 +1,5 @@ CREATE EXTENSION shared_ispell; -SELECT shared_ispell_mem_available(); -SELECT shared_ispell_mem_used(); - -- Test ISpell dictionary with ispell affix file CREATE TEXT SEARCH DICTIONARY shared_ispell ( Template=shared_ispell, @@ -18,7 +15,6 @@ SELECT ts_lexize('shared_ispell', 'foot'); SELECT ts_lexize('shared_ispell', 'foots'); SELECT ts_lexize('shared_ispell', 'rebookings'); SELECT ts_lexize('shared_ispell', 'rebooking'); -SELECT ts_lexize('shared_ispell', 'rebook'); SELECT ts_lexize('shared_ispell', 'unbookings'); SELECT ts_lexize('shared_ispell', 'unbooking'); SELECT ts_lexize('shared_ispell', 'unbook'); @@ -28,9 +24,6 @@ SELECT ts_lexize('shared_ispell', 'footballklubber'); SELECT ts_lexize('shared_ispell', 'ballyklubber'); SELECT ts_lexize('shared_ispell', 'footballyklubber'); -SELECT shared_ispell_mem_available(); -SELECT shared_ispell_mem_used(); - -- Test ISpell dictionary with hunspell affix file CREATE TEXT SEARCH DICTIONARY shared_hunspell ( Template=shared_ispell, @@ -45,7 +38,6 @@ SELECT ts_lexize('shared_hunspell', 'foot'); SELECT ts_lexize('shared_hunspell', 'foots'); SELECT ts_lexize('shared_hunspell', 'rebookings'); SELECT ts_lexize('shared_hunspell', 'rebooking'); -SELECT ts_lexize('shared_hunspell', 'rebook'); SELECT ts_lexize('shared_hunspell', 'unbookings'); SELECT ts_lexize('shared_hunspell', 'unbooking'); SELECT ts_lexize('shared_hunspell', 'unbook'); @@ -55,13 +47,10 @@ SELECT ts_lexize('shared_hunspell', 'footballklubber'); SELECT ts_lexize('shared_hunspell', 'ballyklubber'); SELECT ts_lexize('shared_hunspell', 'footballyklubber'); -SELECT shared_ispell_mem_available(); -SELECT shared_ispell_mem_used(); - -SELECT * FROM shared_ispell_dicts(); -SELECT * FROM shared_ispell_stoplists(); +SELECT dict_name, affix_name, words, affixes FROM shared_ispell_dicts(); +SELECT stop_name, words FROM shared_ispell_stoplists(); SELECT shared_ispell_reset(); -SELECT shared_ispell_mem_available(); -SELECT shared_ispell_mem_used(); \ No newline at end of file +SELECT ts_lexize('shared_ispell', 'skies'); +SELECT ts_lexize('shared_hunspell', 'skies'); \ No newline at end of file diff --git a/src/shared_ispell.c b/src/shared_ispell.c index e627443..13bf802 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -179,14 +179,13 @@ ispell_shmem_startup() segment = ShmemInitStruct(SEGMENT_NAME, max_ispell_mem_size(), &found); + segment_info = (SegmentInfo *) segment; /* Was the shared memory segment already initialized? */ if (!found) { memset(segment, 0, max_ispell_mem_size()); - segment_info = (SegmentInfo *) segment; - #if PG_VERSION_NUM >= 90600 segment_info->lock = &(GetNamedLWLockTranche("shared_ispell"))->lock; #else @@ -265,6 +264,9 @@ clean_dict_affix(IspellDict *dict) dict->nAffixData = 0; dict->CompoundAffix = NULL; + dict->CompoundAffixFlags = NULL; + dict->nCompoundAffixFlag = 0; + dict->mCompoundAffixFlag = 0; dict->avail = 0; } @@ -316,7 +318,13 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) NIImportDictionary(dict, get_tsearch_config_filename(dictFile, "dict")); dict->usecompound = info->dict.usecompound; - memcpy(dict->flagval, &(info->dict.flagval), 65000); + + dict->nCompoundAffixFlag = dict->mCompoundAffixFlag = + info->dict.nCompoundAffixFlag; + dict->CompoundAffixFlags = (CompoundAffixFlag *) palloc0( + dict->nCompoundAffixFlag * sizeof(CompoundAffixFlag)); + memcpy(dict->CompoundAffixFlags, info->dict.CompoundAffixFlags, + dict->nCompoundAffixFlag * sizeof(CompoundAffixFlag)); /* * If affix->useFlagAliases == true then AffixData is generated @@ -342,11 +350,12 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) /* check available space in shared segment */ size = sizeIspellDict(dict, dictFile, affFile); if (size > segment_info->available) - elog(ERROR, "shared dictionary %s.dict / %s.affix needs %d B, only %ld B available", + elog(ERROR, "shared dictionary %s.dict / %s.affix needs %d B, only %zd B available", dictFile, affFile, size, segment_info->available); /* fine, there's enough space - copy the dictionary */ shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell); + shdict->dict.naffixes = info->dict.naffixes; /* add the new dictionary to the linked list (of SharedIspellDict structures) */ shdict->next = segment_info->shdict; @@ -377,7 +386,7 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) size = sizeStopList(&stoplist, stopFile); if (size > segment_info->available) - elog(ERROR, "shared stoplist %s.stop needs %d B, only %ld B available", + elog(ERROR, "shared stoplist %s.stop needs %d B, only %zd B available", stopFile, size, segment_info->available); /* fine, there's enough space - copy the stoplist */ From 7381528d1d7aa33f7241cfbddb1df9a56c329b0f Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Thu, 13 Apr 2017 17:13:48 +0300 Subject: [PATCH 08/29] Include storage/shmem.h --- src/shared_ispell.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 13bf802..126dd72 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -52,6 +52,7 @@ #include "postgres.h" #include "miscadmin.h" #include "storage/ipc.h" +#include "storage/shmem.h" #include "commands/defrem.h" #include "tsearch/ts_locale.h" From e4c50b789b31b33989bcd53142fa6d53a491d010 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 20 Dec 2017 18:49:52 +0300 Subject: [PATCH 09/29] Make shared_ispell compile for PostgreSQL master --- Makefile | 4 +++- src/shared_ispell.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index cf91b65..363eea6 100644 --- a/Makefile +++ b/Makefile @@ -21,4 +21,6 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif -installcheck:; +# Disabled because these tests require "shared_preload_libraries=shared_ispell", +# which typical installcheck users do not have (e.g. buildfarm clients). +installcheck: REGRESS= diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 126dd72..61ac39c 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -166,7 +166,7 @@ _PG_fini(void) static void ispell_shmem_startup() { - bool found = FALSE; + bool found = false; char *segment; if (prev_shmem_startup_hook) From 62df307cf780db3b68f33eab2d72394644998679 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 25 Jan 2018 14:48:03 +0300 Subject: [PATCH 10/29] Issue #1. Fix mention about USE_PGXS --- .gitignore | 3 +++ README.md | 17 +++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8a9a6c9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +results diff --git a/README.md b/README.md index b6d359e..d78c5aa 100644 --- a/README.md +++ b/README.md @@ -16,18 +16,23 @@ dictionary, this may save you a lot of resources. Install ------- -Installing the extension is quite simple, especially if you're on 9.1. -In that case all you need to do is this: - $ make install +Before build and install `shared_ispell` you should ensure following: + +* PostgreSQL version is 9.6 or 10. + +Installing the extension is quite simple. In that case all you need to do is this: + + $ git clone git@github.com:postgrespro/shared_ispell.git + $ cd shared_ispell + $ make USE_PGXS=1 + $ make USE_PGXS=1 install and then (after connecting to the database) db=# CREATE EXTENSION shared_ispell; -If you're on pre-9.1 version, you'll have to do the second part manually -by running the SQL script (shared_ispell--x.y.sql) in the database. If -needed, replace MODULE_PATHNAME by $libdir. +> **Important:** Don't forget to set the `PG_CONFIG` variable in case you want to test `shared_ispell` on a custom build of PostgreSQL. Read more [here](https://wiki.postgresql.org/wiki/Building_and_Installing_PostgreSQL_Extension_Modules). Config From bdcadfeea8459caabf616248889b16596473b5eb Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 15 May 2018 16:49:39 +0300 Subject: [PATCH 11/29] Teodor Sigaev Fix segmentation fault. init_shared_dict() copied dictFile into info->affixFile and info->stopFile. --- expected/shared_ispell.out | 6 +++ sql/shared_ispell.sql | 3 +- src/shared_ispell.c | 100 ++++++++++++++++++++++++------------- src/shared_ispell.h | 6 ++- 4 files changed, 77 insertions(+), 38 deletions(-) diff --git a/expected/shared_ispell.out b/expected/shared_ispell.out index 68e59c9..9998cb9 100644 --- a/expected/shared_ispell.out +++ b/expected/shared_ispell.out @@ -211,3 +211,9 @@ SELECT ts_lexize('shared_hunspell', 'skies'); {sky} (1 row) +SELECT ts_lexize('shared_hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + diff --git a/sql/shared_ispell.sql b/sql/shared_ispell.sql index e791399..0a4af97 100644 --- a/sql/shared_ispell.sql +++ b/sql/shared_ispell.sql @@ -53,4 +53,5 @@ SELECT stop_name, words FROM shared_ispell_stoplists(); SELECT shared_ispell_reset(); SELECT ts_lexize('shared_ispell', 'skies'); -SELECT ts_lexize('shared_hunspell', 'skies'); \ No newline at end of file +SELECT ts_lexize('shared_hunspell', 'skies'); +SELECT ts_lexize('shared_hunspell', 'skies'); diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 61ac39c..5c59d7b 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -20,33 +20,33 @@ * ===== shared segment init (postmaster startup) ===== * * _PG_init - * -> ispell_shmem_startup (registered as a hook) + * -> ispell_shmem_startup (registered as a hook) * * ===== dictionary init (backend) ===== * * dispell_init - * -> init_shared_dict - * -> get_shared_dict - * -> NIStartBuild - * -> NIImportDictionary - * -> NIImportAffixes - * -> NISortDictionary - * -> NISortAffixes - * -> NIFinishBuild - * -> sizeIspellDict - * -> copyIspellDict - * -> copySPNode - * -> get_shared_stop_list - * -> readstoplist - * -> copyStopList + * -> init_shared_dict + * -> get_shared_dict + * -> NIStartBuild + * -> NIImportDictionary + * -> NIImportAffixes + * -> NISortDictionary + * -> NISortAffixes + * -> NIFinishBuild + * -> sizeIspellDict + * -> copyIspellDict + * -> copySPNode + * -> get_shared_stop_list + * -> readstoplist + * -> copyStopList * * ===== dictionary reinit after reset (backend) ===== * * dispell_lexize - * -> timestamp of lookup < last reset - * -> init_shared_dict - * (see dispell_init above) - * -> SharedNINormalizeWord + * -> timestamp of lookup < last reset + * -> init_shared_dict + * (see dispell_init above) + * -> SharedNINormalizeWord */ #include "postgres.h" @@ -166,7 +166,7 @@ _PG_fini(void) static void ispell_shmem_startup() { - bool found = false; + bool found = FALSE; char *segment; if (prev_shmem_startup_hook) @@ -185,6 +185,12 @@ ispell_shmem_startup() /* Was the shared memory segment already initialized? */ if (!found) { + if (segment == NULL) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Cannot acquire %d kB of shared memory", + max_ispell_mem_size_kb))); + } memset(segment, 0, max_ispell_mem_size()); #if PG_VERSION_NUM >= 90600 @@ -288,13 +294,9 @@ static void init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) { int size; - SharedIspellDict *shdict = NULL; SharedStopList *shstop = NULL; - IspellDict *dict; - StopList stoplist; - /* DICTIONARY + AFFIXES */ /* TODO This should probably check that the filenames are not NULL, and maybe that @@ -313,6 +315,8 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) /* load the dictionary (word list) if not yet defined */ if (shdict == NULL) { + IspellDict *dict; + dict = (IspellDict *) palloc0(sizeof(IspellDict)); NIStartBuild(dict); @@ -383,6 +387,8 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) /* load the stopwords if not yet defined */ if (shstop == NULL) { + StopList stoplist; + readstoplist(stopFile, &stoplist, lowerstr); size = sizeStopList(&stoplist, stopFile); @@ -407,11 +413,14 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) info->lookup = GetCurrentTimestamp(); memcpy(info->dictFile, dictFile, strlen(dictFile) + 1); - memcpy(info->affixFile, dictFile, strlen(affFile)+ 1); + memcpy(info->affixFile, affFile, strlen(affFile) + 1); if (stopFile != NULL) - memcpy(info->stopFile, dictFile, strlen(stopFile) + 1); + memcpy(info->stopFile, stopFile, strlen(stopFile) + 1); else memset(info->stopFile, 0, sizeof(info->stopFile)); + + /* save current context as long-lived */ + info->saveCntx = CurrentMemoryContext; } Datum dispell_init(PG_FUNCTION_ARGS); @@ -498,6 +507,9 @@ dispell_mem_used(PG_FUNCTION_ARGS) * The StopWords parameter is optional, the two other are required. * * If any of the filenames are incorrect, the call to init_shared_dict will fail. + * + * Do not call it directly - it saves current memory context as long-lived + * context. */ Datum dispell_init(PG_FUNCTION_ARGS) @@ -586,7 +598,7 @@ dispell_lexize(PG_FUNCTION_ARGS) char *txt; TSLexeme *res; TSLexeme *ptr, - *cptr; + *cptr; if (len <= 0) PG_RETURN_POINTER(NULL); @@ -599,11 +611,27 @@ dispell_lexize(PG_FUNCTION_ARGS) /* do we need to reinit the dictionary? was the dict reset since the lookup */ if (timestamp_cmp_internal(info->lookup, segment_info->lastReset) < 0) { + DictInfo saveInfo = *info; + MemoryContext ctx; + /* relock in exclusive mode */ LWLockRelease(segment_info->lock); LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); - init_shared_dict(info, info->dictFile, info->affixFile, info->stopFile); + /* + * info is allocated in info->saveCntx, so that's why we use a copy of + * info here + */ + + MemoryContextResetAndDeleteChildren(saveInfo.saveCntx); + ctx = MemoryContextSwitchTo(saveInfo.saveCntx); + + info = palloc0(sizeof(*info)); + + init_shared_dict(info, saveInfo.dictFile, + saveInfo.affixFile, saveInfo.stopFile); + + MemoryContextSwitchTo(ctx); } res = NINormalizeWord(&(info->dict), txt); @@ -697,13 +725,13 @@ copySPNode(SPNode *node) SPNode *copy = NULL; if (node == NULL) - return NULL; + return NULL; copy = (SPNode *) shalloc(offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); memcpy(copy, node, offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); for (i = 0; i < node->length; i++) - copy->data[i].node = copySPNode(node->data[i].node); + copy->data[i].node = copySPNode(node->data[i].node); return copy; } @@ -715,7 +743,7 @@ sizeSPNode(SPNode *node) int size = 0; if (node == NULL) - return 0; + return 0; size = MAXALIGN(offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); @@ -815,7 +843,7 @@ sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile) /* copy affix data */ size += MAXALIGN(sizeof(char *) * dict->nAffixData); for (i = 0; i < dict->nAffixData; i++) - size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1); + size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1); return size; } @@ -842,10 +870,10 @@ dispell_list_dicts(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); /* * generate attribute metadata needed later to produce tuples from raw diff --git a/src/shared_ispell.h b/src/shared_ispell.h index 92de330..2039ffe 100644 --- a/src/shared_ispell.h +++ b/src/shared_ispell.h @@ -2,6 +2,7 @@ #define __SHARED_ISPELL_H__ #include "storage/lwlock.h" +#include "utils/memutils.h" #include "utils/timestamp.h" #include "tsearch/dicts/spell.h" #include "tsearch/ts_public.h" @@ -66,6 +67,9 @@ typedef struct DictInfo SharedIspellDict *shdict; IspellDict dict; SharedStopList *shstop; + + /* MemoryContext of dict local content */ + MemoryContext saveCntx; } DictInfo; -#endif \ No newline at end of file +#endif From 38b259fd278cb6fb8836ea52e731941a80220f6a Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 16 May 2018 12:37:57 +0300 Subject: [PATCH 12/29] Style fixes, revert unnecessary ereport() and FALSE --- src/shared_ispell.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 5c59d7b..ca1fed7 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -166,7 +166,7 @@ _PG_fini(void) static void ispell_shmem_startup() { - bool found = FALSE; + bool found = false; char *segment; if (prev_shmem_startup_hook) @@ -177,27 +177,19 @@ ispell_shmem_startup() */ LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - segment = ShmemInitStruct(SEGMENT_NAME, - max_ispell_mem_size(), - &found); + segment = ShmemInitStruct(SEGMENT_NAME, max_ispell_mem_size(), &found); segment_info = (SegmentInfo *) segment; /* Was the shared memory segment already initialized? */ if (!found) { - if (segment == NULL) { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("Cannot acquire %d kB of shared memory", - max_ispell_mem_size_kb))); - } memset(segment, 0, max_ispell_mem_size()); - #if PG_VERSION_NUM >= 90600 +#if PG_VERSION_NUM >= 90600 segment_info->lock = &(GetNamedLWLockTranche("shared_ispell"))->lock; - #else +#else segment_info->lock = LWLockAssign(); - #endif +#endif segment_info->firstfree = segment + MAXALIGN(sizeof(SegmentInfo)); segment_info->available = max_ispell_mem_size() - (int)(segment_info->firstfree - segment); @@ -293,9 +285,9 @@ clean_dict_affix(IspellDict *dict) static void init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) { - int size; + int size; SharedIspellDict *shdict = NULL; - SharedStopList *shstop = NULL; + SharedStopList *shstop = NULL; /* DICTIONARY + AFFIXES */ @@ -315,7 +307,7 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) /* load the dictionary (word list) if not yet defined */ if (shdict == NULL) { - IspellDict *dict; + IspellDict *dict; dict = (IspellDict *) palloc0(sizeof(IspellDict)); @@ -337,11 +329,13 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) */ if (info->dict.useFlagAliases) { - int i; + int i; + dict->useFlagAliases = true; dict->lenAffixData = info->dict.lenAffixData; dict->nAffixData = info->dict.nAffixData; dict->AffixData = (char **) palloc0(dict->nAffixData * sizeof(char *)); + for (i = 0; i < dict->nAffixData; i++) { dict->AffixData[i] = palloc0(strlen(info->dict.AffixData[i]) + 1); @@ -721,8 +715,8 @@ shstrcpy(char *str) static SPNode * copySPNode(SPNode *node) { - int i; - SPNode *copy = NULL; + int i; + SPNode *copy = NULL; if (node == NULL) return NULL; @@ -739,8 +733,8 @@ copySPNode(SPNode *node) static int sizeSPNode(SPNode *node) { - int i; - int size = 0; + int i; + int size = 0; if (node == NULL) return 0; @@ -852,9 +846,9 @@ sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile) Datum dispell_list_dicts(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; - TupleDesc tupdesc; - SharedIspellDict *dict; + FuncCallContext *funcctx; + TupleDesc tupdesc; + SharedIspellDict *dict; /* init on the first call */ if (SRF_IS_FIRSTCALL()) From fa70830b4fc7f7942015d3d2dddccae719dc6160 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 16 May 2018 12:47:34 +0300 Subject: [PATCH 13/29] Do not palloc0() DictInfo, MemSet() it --- src/shared_ispell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index ca1fed7..61d21e3 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -620,7 +620,7 @@ dispell_lexize(PG_FUNCTION_ARGS) MemoryContextResetAndDeleteChildren(saveInfo.saveCntx); ctx = MemoryContextSwitchTo(saveInfo.saveCntx); - info = palloc0(sizeof(*info)); + MemSet(info, 0, sizeof(*info)); init_shared_dict(info, saveInfo.dictFile, saveInfo.affixFile, saveInfo.stopFile); From 8b55719f90f11d3a95f7700795bd3b5c6dc165e3 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 16 May 2018 15:33:04 +0300 Subject: [PATCH 14/29] fa70830 isn't correct. Fix the bug by creating child context of entry->dictCtx. --- Makefile | 2 +- src/shared_ispell.c | 30 +++++++++++++++++++----------- src/shared_ispell.h | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 363eea6..dbb4136 100644 --- a/Makefile +++ b/Makefile @@ -23,4 +23,4 @@ endif # Disabled because these tests require "shared_preload_libraries=shared_ispell", # which typical installcheck users do not have (e.g. buildfarm clients). -installcheck: REGRESS= +#installcheck: REGRESS= diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 61d21e3..7803759 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -283,11 +283,15 @@ clean_dict_affix(IspellDict *dict) * of the shared memory (using SegmentInfo->lock). */ static void -init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) +init_shared_dict(DictInfo *info, MemoryContext infoCntx, + char *dictFile, char *affFile, char *stopFile) { int size; SharedIspellDict *shdict = NULL; SharedStopList *shstop = NULL; + MemoryContext oldctx; + + oldctx = MemoryContextSwitchTo(infoCntx); /* DICTIONARY + AFFIXES */ @@ -413,8 +417,9 @@ init_shared_dict(DictInfo *info, char *dictFile, char *affFile, char *stopFile) else memset(info->stopFile, 0, sizeof(info->stopFile)); + MemoryContextSwitchTo(oldctx); /* save current context as long-lived */ - info->saveCntx = CurrentMemoryContext; + info->infoCntx = infoCntx; } Datum dispell_init(PG_FUNCTION_ARGS); @@ -576,7 +581,15 @@ dispell_init(PG_FUNCTION_ARGS) /* search if the dictionary is already initialized */ LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); - init_shared_dict(info, dictFile, affFile, stopFile); + /* + * Current context is a long lived context. Create child context to store + * DictInfo internal data. + */ + info->infoCntx = AllocSetContextCreate(CurrentMemoryContext, + "shared_ispell context", + ALLOCSET_DEFAULT_SIZES); + + init_shared_dict(info, info->infoCntx, dictFile, affFile, stopFile); LWLockRelease(segment_info->lock); @@ -605,8 +618,7 @@ dispell_lexize(PG_FUNCTION_ARGS) /* do we need to reinit the dictionary? was the dict reset since the lookup */ if (timestamp_cmp_internal(info->lookup, segment_info->lastReset) < 0) { - DictInfo saveInfo = *info; - MemoryContext ctx; + DictInfo saveInfo = *info; /* relock in exclusive mode */ LWLockRelease(segment_info->lock); @@ -617,15 +629,11 @@ dispell_lexize(PG_FUNCTION_ARGS) * info here */ - MemoryContextResetAndDeleteChildren(saveInfo.saveCntx); - ctx = MemoryContextSwitchTo(saveInfo.saveCntx); - + MemoryContextResetAndDeleteChildren(saveInfo.infoCntx); MemSet(info, 0, sizeof(*info)); - init_shared_dict(info, saveInfo.dictFile, + init_shared_dict(info, saveInfo.infoCntx, saveInfo.dictFile, saveInfo.affixFile, saveInfo.stopFile); - - MemoryContextSwitchTo(ctx); } res = NINormalizeWord(&(info->dict), txt); diff --git a/src/shared_ispell.h b/src/shared_ispell.h index 2039ffe..ca4d014 100644 --- a/src/shared_ispell.h +++ b/src/shared_ispell.h @@ -69,7 +69,7 @@ typedef struct DictInfo SharedStopList *shstop; /* MemoryContext of dict local content */ - MemoryContext saveCntx; + MemoryContext infoCntx; } DictInfo; #endif From 6431b333b5ff72080e92502e0e5d8959b4317d1a Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 16 May 2018 15:36:14 +0300 Subject: [PATCH 15/29] Revert Makefile accidental changes --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dbb4136..363eea6 100644 --- a/Makefile +++ b/Makefile @@ -23,4 +23,4 @@ endif # Disabled because these tests require "shared_preload_libraries=shared_ispell", # which typical installcheck users do not have (e.g. buildfarm clients). -#installcheck: REGRESS= +installcheck: REGRESS= From 1afd1a5dec75d9e710e5d8b2390f7470909ae45e Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 18 May 2018 11:19:41 +0300 Subject: [PATCH 16/29] Code style fixes --- src/shared_ispell.c | 49 +++++++++++++++++++++------------------------ src/shared_ispell.h | 6 +++--- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 7803759..ec459e8 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -94,7 +94,7 @@ static int sizeStopList(StopList *list, char *stopFile); static Size max_ispell_mem_size() { - return (Size)max_ispell_mem_size_kb * 1024L; + return (Size) max_ispell_mem_size_kb * 1024L; } /* @@ -134,11 +134,11 @@ _PG_init(void) */ RequestAddinShmemSpace(max_ispell_mem_size()); - #if PG_VERSION_NUM >= 90600 +#if PG_VERSION_NUM >= 90600 RequestNamedLWLockTranche("shared_ispell", 1); - #else +#else RequestAddinLWLocks(1); - #endif +#endif /* Install hooks. */ prev_shmem_startup_hook = shmem_startup_hook; @@ -166,8 +166,8 @@ _PG_fini(void) static void ispell_shmem_startup() { - bool found = false; - char *segment; + bool found = false; + char *segment; if (prev_shmem_startup_hook) prev_shmem_startup_hook(); @@ -191,8 +191,8 @@ ispell_shmem_startup() segment_info->lock = LWLockAssign(); #endif segment_info->firstfree = segment + MAXALIGN(sizeof(SegmentInfo)); - segment_info->available = max_ispell_mem_size() - - (int)(segment_info->firstfree - segment); + segment_info->available = max_ispell_mem_size() - + (int) (segment_info->firstfree - segment); segment_info->lastReset = GetCurrentTimestamp(); } @@ -311,7 +311,7 @@ init_shared_dict(DictInfo *info, MemoryContext infoCntx, /* load the dictionary (word list) if not yet defined */ if (shdict == NULL) { - IspellDict *dict; + IspellDict *dict; dict = (IspellDict *) palloc0(sizeof(IspellDict)); @@ -422,14 +422,6 @@ init_shared_dict(DictInfo *info, MemoryContext infoCntx, info->infoCntx = infoCntx; } -Datum dispell_init(PG_FUNCTION_ARGS); -Datum dispell_lexize(PG_FUNCTION_ARGS); -Datum dispell_reset(PG_FUNCTION_ARGS); -Datum dispell_mem_available(PG_FUNCTION_ARGS); -Datum dispell_mem_used(PG_FUNCTION_ARGS); -Datum dispell_list_dicts(PG_FUNCTION_ARGS); -Datum dispell_list_stoplists(PG_FUNCTION_ARGS); - PG_FUNCTION_INFO_V1(dispell_init); PG_FUNCTION_INFO_V1(dispell_lexize); PG_FUNCTION_INFO_V1(dispell_reset); @@ -453,7 +445,8 @@ dispell_reset(PG_FUNCTION_ARGS) segment_info->shstop = NULL; segment_info->lastReset = GetCurrentTimestamp(); segment_info->firstfree = ((char*) segment_info) + MAXALIGN(sizeof(SegmentInfo)); - segment_info->available = max_ispell_mem_size() - (int)(segment_info->firstfree - (char*) segment_info); + segment_info->available = max_ispell_mem_size() - + (int) (segment_info->firstfree - (char*) segment_info); memset(segment_info->firstfree, 0, segment_info->available); @@ -479,12 +472,14 @@ dispell_mem_available(PG_FUNCTION_ARGS) } /* - * Returns amount of 'occupied space' in the shared segment (used by current dictionaries). + * Returns amount of 'occupied space' in the shared segment (used by current + * dictionaries). */ Datum dispell_mem_used(PG_FUNCTION_ARGS) { - int result = 0; + int result = 0; + LWLockAcquire(segment_info->lock, LW_SHARED); result = max_ispell_mem_size() - segment_info->available; @@ -679,7 +674,8 @@ dispell_lexize(PG_FUNCTION_ARGS) static char * shalloc(int bytes) { - char *result; + char *result; + bytes = MAXALIGN(bytes); /* This shouldn't really happen, as the init_shared_dict checks the size @@ -706,8 +702,10 @@ shalloc(int bytes) static char * shstrcpy(char *str) { - char *tmp = shalloc(strlen(str) + 1); + char *tmp = shalloc(strlen(str) + 1); + memcpy(tmp, str, strlen(str) + 1); + return tmp; } @@ -801,8 +799,7 @@ sizeStopList(StopList *list, char *stopFile) static SharedIspellDict * copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int size, int words) { - int i; - + int i; SharedIspellDict *copy = (SharedIspellDict *) shalloc(sizeof(SharedIspellDict)); copy->dictFile = shalloc(strlen(dictFile) + 1); @@ -834,8 +831,8 @@ copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int size, int static int sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile) { - int i; - int size = MAXALIGN(sizeof(SharedIspellDict)); + int i; + int size = MAXALIGN(sizeof(SharedIspellDict)); size += MAXALIGN(strlen(dictFile) + 1); size += MAXALIGN(strlen(affixFile) + 1); diff --git a/src/shared_ispell.h b/src/shared_ispell.h index ca4d014..2cfb422 100644 --- a/src/shared_ispell.h +++ b/src/shared_ispell.h @@ -42,9 +42,9 @@ typedef struct SharedStopList typedef struct SegmentInfo { LWLockId lock; - char *firstfree; /* first free address (always maxaligned) */ - size_t available; /* free space remaining at firstfree */ - Timestamp lastReset; /* last reset of the dictionary */ + char *firstfree; /* first free address (always maxaligned) */ + size_t available; /* free space remaining at firstfree */ + Timestamp lastReset; /* last reset of the dictionary */ /* the shared segment (info and data) */ SharedIspellDict *shdict; From 0ad59c2dfc1a52fe61b137f666fd378b19811c3a Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 23 May 2018 11:41:56 +0300 Subject: [PATCH 17/29] Enable installcheck --- Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/Makefile b/Makefile index 363eea6..6ff9e12 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,3 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif -# Disabled because these tests require "shared_preload_libraries=shared_ispell", -# which typical installcheck users do not have (e.g. buildfarm clients). -installcheck: REGRESS= From d7a1e85f540b3d9e408290dbe39d8d3ceaa8e165 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 29 May 2018 15:11:27 +0300 Subject: [PATCH 18/29] Copy flagMode into shared dictionary structure. Issue https://github.com/postgrespro/hunspell_dicts/issues/3#issuecomment-392709311 --- src/shared_ispell.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index ec459e8..9192f56 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -318,6 +318,7 @@ init_shared_dict(DictInfo *info, MemoryContext infoCntx, NIStartBuild(dict); NIImportDictionary(dict, get_tsearch_config_filename(dictFile, "dict")); + dict->flagMode = info->dict.flagMode; dict->usecompound = info->dict.usecompound; dict->nCompoundAffixFlag = dict->mCompoundAffixFlag = @@ -816,6 +817,8 @@ copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int size, int for (i = 0; i < copy->dict.nAffixData; i++) copy->dict.AffixData[i] = shstrcpy(dict->AffixData[i]); + copy->dict.flagMode = dict->flagMode; + copy->nbytes = size; copy->nwords = words; From e6d908dc28464b441b15037b46fccf47bfc10378 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 7 Aug 2018 18:30:42 +0300 Subject: [PATCH 19/29] Add and dirs into .gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8a9a6c9..b87b714 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.o *.so -results +/log/ +/results/ +/tmp_check/ From 49da9567c4b31587b03e173febad7fd02503dcf6 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Fri, 5 Oct 2018 12:18:48 +0300 Subject: [PATCH 20/29] PGPRO-2033: a try to fix the use of dispell_reset in shared_ispell on Windows The shared_ispell dictionary is loaded into memory if the timestamp of its last lookup is less than the timestamp of the reset of the shared dictionary memory. These timestamps used the function gettimeofday() which is not for Win32 high precision timing purposes. Therefore sometimes the timestamp of the last dictinary lookup and the timestamp of the reset of the shared dictionary memory are equal and the function ts_lexize returns NULL (from buildfarm). To avoid this use the structure instr_time and its macros/functions with portable high-precision interval timing for the same purpose. --- src/shared_ispell.c | 9 +++++---- src/shared_ispell.h | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 9192f56..cbf78ae 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -194,7 +194,7 @@ ispell_shmem_startup() segment_info->available = max_ispell_mem_size() - (int) (segment_info->firstfree - segment); - segment_info->lastReset = GetCurrentTimestamp(); + INSTR_TIME_SET_CURRENT(segment_info->lastReset); } LWLockRelease(AddinShmemInitLock); @@ -409,7 +409,7 @@ init_shared_dict(DictInfo *info, MemoryContext infoCntx, info->shdict = shdict; info->shstop = shstop; - info->lookup = GetCurrentTimestamp(); + INSTR_TIME_SET_CURRENT(info->lookup); memcpy(info->dictFile, dictFile, strlen(dictFile) + 1); memcpy(info->affixFile, affFile, strlen(affFile) + 1); @@ -444,7 +444,7 @@ dispell_reset(PG_FUNCTION_ARGS) segment_info->shdict = NULL; segment_info->shstop = NULL; - segment_info->lastReset = GetCurrentTimestamp(); + INSTR_TIME_SET_CURRENT(segment_info->lastReset); segment_info->firstfree = ((char*) segment_info) + MAXALIGN(sizeof(SegmentInfo)); segment_info->available = max_ispell_mem_size() - (int) (segment_info->firstfree - (char*) segment_info); @@ -612,7 +612,8 @@ dispell_lexize(PG_FUNCTION_ARGS) LWLockAcquire(segment_info->lock, LW_SHARED); /* do we need to reinit the dictionary? was the dict reset since the lookup */ - if (timestamp_cmp_internal(info->lookup, segment_info->lastReset) < 0) + if (INSTR_TIME_GET_MICROSEC(info->lookup) < + INSTR_TIME_GET_MICROSEC(segment_info->lastReset)) { DictInfo saveInfo = *info; diff --git a/src/shared_ispell.h b/src/shared_ispell.h index 2cfb422..cbba198 100644 --- a/src/shared_ispell.h +++ b/src/shared_ispell.h @@ -44,7 +44,7 @@ typedef struct SegmentInfo LWLockId lock; char *firstfree; /* first free address (always maxaligned) */ size_t available; /* free space remaining at firstfree */ - Timestamp lastReset; /* last reset of the dictionary */ + instr_time lastReset; /* last reset of the dictionary */ /* the shared segment (info and data) */ SharedIspellDict *shdict; @@ -54,7 +54,7 @@ typedef struct SegmentInfo /* used to keep track of dictionary in each backend */ typedef struct DictInfo { - Timestamp lookup; + instr_time lookup; char dictFile[MAXLEN]; char affixFile[MAXLEN]; From bfa1978652c9e9e2d971e6e1119571e0db129b2f Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 2 Nov 2018 15:18:37 +0300 Subject: [PATCH 21/29] Update copyrights and license --- LICENSE | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/LICENSE b/LICENSE index 417dcbb..54e49a5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,25 +1,19 @@ -Copyright 2012, Tomas Vondra (tv@fuzzy.cz). All rights reserved. +Copyright (c) 2016-2018, Postgres Professional +Portions Copyright 2012, Tomas Vondra (tv@fuzzy.cz). All rights reserved. -Redistribution and use in source and binary forms, with or without modification, are -permitted provided that the following conditions are met: +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this +paragraph and the following two paragraphs appear in all copies. - 1. Redistributions of source code must retain the above copyright notice, this list of - conditions and the following disclaimer. +IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING +LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS +DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. - 2. Redistributions in binary form must reproduce the above copyright notice, this list - of conditions and the following disclaimer in the documentation and/or other materials - provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY TOMAS VONDRA ''AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TOMAS VONDRA OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -The views and conclusions contained in the software and documentation are those of the -authors and should not be interpreted as representing official policies, either expressed -or implied, of Tomas Vondra. \ No newline at end of file +POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO +PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. From f0e83041ebfc5435babb1c2d60c7d6646a7dcfc8 Mon Sep 17 00:00:00 2001 From: Daria Lepikhova Date: Mon, 9 Nov 2020 15:03:50 +0500 Subject: [PATCH 22/29] Fixed security CVE_2020_14350. Added tests --- Makefile | 2 +- expected/security.out | 40 +++++++++++++++++++++++++++++++++++++ shared_ispell--1.1.0.sql | 14 ++++++------- sql/security.sql | 43 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 expected/security.out create mode 100644 sql/security.sql diff --git a/Makefile b/Makefile index 6ff9e12..15d3187 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ OBJS = src/shared_ispell.o EXTENSION = shared_ispell DATA = shared_ispell--1.1.0.sql -REGRESS = shared_ispell +REGRESS = security shared_ispell EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/postgresql.conf diff --git a/expected/security.out b/expected/security.out new file mode 100644 index 0000000..d6522bd --- /dev/null +++ b/expected/security.out @@ -0,0 +1,40 @@ +create type si_dicts_result as (dict_name VARCHAR, affix_name VARCHAR, words INT, affixes INT, bytes INT); +create function shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) +returns SETOF record as $$ +declare + qString varchar(4000); + rec si_dicts_result; +begin + qString := 'select * from shared_ispell_dicts()'; + for rec in execute qString loop + return NEXT; + end loop; + return; +end +$$ language plpgsql; +create extension shared_ispell; +ERROR: function "shared_ispell_dicts" already exists with same argument types +drop extension if exists shared_ispell; +NOTICE: extension "shared_ispell" does not exist, skipping +drop type si_dicts_result; +drop function shared_ispell_dicts; +create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); +create function shared_ispell_stoplists(OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) +returns SETOF record as $$ +declare + rec si_stoplists_result; + qString varchar(4000); +begin + qString := 'select * from shared_ispell_stoplists()'; + for rec in execute qString loop + return NEXT; + end loop; + return; +end +$$ language plpgsql; +create extension shared_ispell; +ERROR: function "shared_ispell_stoplists" already exists with same argument types +drop extension if exists shared_ispell; +NOTICE: extension "shared_ispell" does not exist, skipping +drop type si_stoplists_result; +drop function shared_ispell_stoplists; diff --git a/shared_ispell--1.1.0.sql b/shared_ispell--1.1.0.sql index 07c3ac3..7f638ab 100644 --- a/shared_ispell--1.1.0.sql +++ b/shared_ispell--1.1.0.sql @@ -1,34 +1,34 @@ -CREATE OR REPLACE FUNCTION shared_ispell_init(internal) +CREATE FUNCTION shared_ispell_init(internal) RETURNS internal AS 'MODULE_PATHNAME', 'dispell_init' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_lexize(internal,internal,internal,internal) +CREATE FUNCTION shared_ispell_lexize(internal,internal,internal,internal) RETURNS internal AS 'MODULE_PATHNAME', 'dispell_lexize' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_reset() +CREATE FUNCTION shared_ispell_reset() RETURNS void AS 'MODULE_PATHNAME', 'dispell_reset' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_mem_used() +CREATE FUNCTION shared_ispell_mem_used() RETURNS integer AS 'MODULE_PATHNAME', 'dispell_mem_used' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_mem_available() +CREATE FUNCTION shared_ispell_mem_available() RETURNS integer AS 'MODULE_PATHNAME', 'dispell_mem_available' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) +CREATE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) RETURNS SETOF record AS 'MODULE_PATHNAME', 'dispell_list_dicts' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) +CREATE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) RETURNS SETOF record AS 'MODULE_PATHNAME', 'dispell_list_stoplists' LANGUAGE C IMMUTABLE; diff --git a/sql/security.sql b/sql/security.sql new file mode 100644 index 0000000..c29b1a5 --- /dev/null +++ b/sql/security.sql @@ -0,0 +1,43 @@ +create type si_dicts_result as (dict_name VARCHAR, affix_name VARCHAR, words INT, affixes INT, bytes INT); + +create function shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) +returns SETOF record as $$ +declare + qString varchar(4000); + rec si_dicts_result; +begin + qString := 'select * from shared_ispell_dicts()'; + for rec in execute qString loop + return NEXT; + end loop; + return; +end +$$ language plpgsql; + +create extension shared_ispell; + +drop extension if exists shared_ispell; +drop type si_dicts_result; +drop function shared_ispell_dicts; + +create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); + +create function shared_ispell_stoplists(OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) +returns SETOF record as $$ +declare + rec si_stoplists_result; + qString varchar(4000); +begin + qString := 'select * from shared_ispell_stoplists()'; + for rec in execute qString loop + return NEXT; + end loop; + return; +end +$$ language plpgsql; + +create extension shared_ispell; + +drop extension if exists shared_ispell; +drop type si_stoplists_result; +drop function shared_ispell_stoplists; From 8f061f79b6619bce620bfcf8d0df92b17595e3fd Mon Sep 17 00:00:00 2001 From: Daria Lepikhova Date: Wed, 11 Nov 2020 08:54:48 +0500 Subject: [PATCH 23/29] Fixed test for 9.6 --- expected/security.out | 4 ++-- sql/security.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/expected/security.out b/expected/security.out index d6522bd..6f73aa1 100644 --- a/expected/security.out +++ b/expected/security.out @@ -17,7 +17,7 @@ ERROR: function "shared_ispell_dicts" already exists with same argument types drop extension if exists shared_ispell; NOTICE: extension "shared_ispell" does not exist, skipping drop type si_dicts_result; -drop function shared_ispell_dicts; +drop function shared_ispell_dicts(); create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); create function shared_ispell_stoplists(OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) returns SETOF record as $$ @@ -37,4 +37,4 @@ ERROR: function "shared_ispell_stoplists" already exists with same argument typ drop extension if exists shared_ispell; NOTICE: extension "shared_ispell" does not exist, skipping drop type si_stoplists_result; -drop function shared_ispell_stoplists; +drop function shared_ispell_stoplists(); diff --git a/sql/security.sql b/sql/security.sql index c29b1a5..33a09e1 100644 --- a/sql/security.sql +++ b/sql/security.sql @@ -18,7 +18,7 @@ create extension shared_ispell; drop extension if exists shared_ispell; drop type si_dicts_result; -drop function shared_ispell_dicts; +drop function shared_ispell_dicts(); create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); @@ -40,4 +40,4 @@ create extension shared_ispell; drop extension if exists shared_ispell; drop type si_stoplists_result; -drop function shared_ispell_stoplists; +drop function shared_ispell_stoplists(); From be086277d4fde7fbb5830cecdcfb127732931f5f Mon Sep 17 00:00:00 2001 From: Ivan Panchenko Date: Fri, 23 Apr 2021 07:22:29 +0300 Subject: [PATCH 24/29] Updated PostgreSQL version info --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d78c5aa..9f9b6d8 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Install Before build and install `shared_ispell` you should ensure following: -* PostgreSQL version is 9.6 or 10. +* PostgreSQL version is 9.6 or later. Installing the extension is quite simple. In that case all you need to do is this: From 64b5868f0e831cb07e46fe02a81db7edd16b9b57 Mon Sep 17 00:00:00 2001 From: Anton Voloshin Date: Wed, 20 Jul 2022 12:28:46 +0300 Subject: [PATCH 25/29] adapt shared_ispell for upcoming PostgreSQL 15 1. Only call RequestAddinShmemSpace and RequestNamedLWLockTranche from within our implementation of shmem_request_hook (as required after commit 4f2400cb3 in PostgreSQL 15). 2. While we are here, remove _PG_fini, as it is now officially dead after commit ab02d702e in PostgreSQL 15. --- src/shared_ispell.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index cbf78ae..04587fd 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -66,12 +66,11 @@ PG_MODULE_MAGIC; void _PG_init(void); -void _PG_fini(void); /* Memory for dictionaries in kbytes */ static int max_ispell_mem_size_kb; -/* Saved hook values in case of unload */ +/* Saved hook value for proper chaining */ static shmem_startup_hook_type prev_shmem_startup_hook = NULL; /* These are used to allocate data within shared segment */ @@ -97,6 +96,11 @@ max_ispell_mem_size() return (Size) max_ispell_mem_size_kb * 1024L; } +#if (PG_VERSION_NUM >= 150000) +static shmem_request_hook_type prev_shmem_request_hook = NULL; +static void shared_ispell_shmem_request(void); +#endif + /* * Module load callback */ @@ -127,17 +131,17 @@ _PG_init(void) EmitWarningsOnPlaceholders("shared_ispell"); - /* - * Request additional shared resources. (These are no-ops if we're not in - * the postmaster process.) We'll allocate or attach to the shared - * resources in ispell_shmem_startup(). - */ +#if PG_VERSION_NUM >= 150000 + prev_shmem_request_hook = shmem_request_hook; + shmem_request_hook = shared_ispell_shmem_request; +#else RequestAddinShmemSpace(max_ispell_mem_size()); #if PG_VERSION_NUM >= 90600 RequestNamedLWLockTranche("shared_ispell", 1); #else RequestAddinLWLocks(1); +#endif #endif /* Install hooks. */ @@ -145,15 +149,15 @@ _PG_init(void) shmem_startup_hook = ispell_shmem_startup; } - -/* - * Module unload callback - */ -void -_PG_fini(void) +static void +shared_ispell_shmem_request(void) { - /* Uninstall hooks. */ - shmem_startup_hook = prev_shmem_startup_hook; + if (prev_shmem_request_hook) + prev_shmem_request_hook(); + + RequestAddinShmemSpace(max_ispell_mem_size()); + + RequestNamedLWLockTranche("shared_ispell", 1); } /* From 8b3f4d463ea00618ca6e73a354b9293f711a0331 Mon Sep 17 00:00:00 2001 From: Yura Sokolov Date: Mon, 8 Aug 2022 12:37:31 +0300 Subject: [PATCH 26/29] fix compilation on pre-15 --- src/shared_ispell.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 04587fd..95a8b9d 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -149,6 +149,7 @@ _PG_init(void) shmem_startup_hook = ispell_shmem_startup; } +#if PG_VERSION_NUM >= 150000 static void shared_ispell_shmem_request(void) { @@ -159,6 +160,7 @@ shared_ispell_shmem_request(void) RequestNamedLWLockTranche("shared_ispell", 1); } +#endif /* * Probably the most important part of the startup - initializes the From 39ecfe635327ecad41504ea31495f9965e9671b3 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Fri, 18 Nov 2022 08:27:38 +0300 Subject: [PATCH 27/29] Fix build due to new changes in PostgreSQL 16 --- src/shared_ispell.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 95a8b9d..4fac4fb 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -59,6 +59,7 @@ #include "access/htup_details.h" #include "funcapi.h" #include "utils/builtins.h" +#include "utils/guc.h" #include "shared_ispell.h" #include "tsearch/dicts/spell.h" From b580a8256f87b8fdf3406eeac465a7d1f82235c2 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Wed, 24 Jan 2024 10:15:55 +0300 Subject: [PATCH 28/29] Retire MemoryContextResetAndDeleteChildren() macro. Caused by the following commits in PostgreSQL: - eaa5808e8ec4e82ce1a87103a6b6f687666e4e4c (PostgreSQL 9.5) Redefine MemoryContextReset() as deleting, not resetting, child contexts. - 6a72c42fd5af7ada49584694f543eb06dddb4a87 (PostgreSQL 17) Retire MemoryContextResetAndDeleteChildren() macro. --- src/shared_ispell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 4fac4fb..37243e2 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -633,7 +633,7 @@ dispell_lexize(PG_FUNCTION_ARGS) * info here */ - MemoryContextResetAndDeleteChildren(saveInfo.infoCntx); + MemoryContextReset(saveInfo.infoCntx); MemSet(info, 0, sizeof(*info)); init_shared_dict(info, saveInfo.infoCntx, saveInfo.dictFile, From 4a9dce8c7d6806ee7c01dfbf5fdd8fe7a05a3083 Mon Sep 17 00:00:00 2001 From: Zharkov Roman Date: Tue, 21 Jan 2025 16:54:13 +0300 Subject: [PATCH 29/29] Add meson.build file to support building from the contrib source tree. --- meson.build | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 meson.build diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..0f07821 --- /dev/null +++ b/meson.build @@ -0,0 +1,39 @@ +# Copyright (c) 2025, Postgres Professional + +# Does not support the PGXS infrastructure at this time. Please, compile as part +# of the contrib source tree. + +shared_ispell_sources = files( + 'src' / 'shared_ispell.c' +) + +if host_system == 'windows' + shared_ispell_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'shared_ispell', + '--FILEDESC', 'shared_ispell - provides a shared ispell dictionary, i.e. a dictionary that\'s stored in shared segment.',]) +endif + +shared_ispell = shared_module('shared_ispell', + shared_ispell_sources, + kwargs: contrib_mod_args, +) +contrib_targets += shared_ispell + +install_data( + 'shared_ispell.control', + 'shared_ispell--1.1.0.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'shared_ispell', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'security', + 'shared_ispell', + ], + 'regress_args': ['--temp-config', files('postgresql.conf')], + }, +}