diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b87b714 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +*.so +/log/ +/results/ +/tmp_check/ diff --git a/LICENSE b/LICENSE index 417dcbb..54e49a5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,25 +1,19 @@ -Copyright 2012, Tomas Vondra (tv@fuzzy.cz). All rights reserved. +Copyright (c) 2016-2018, Postgres Professional +Portions Copyright 2012, Tomas Vondra (tv@fuzzy.cz). All rights reserved. -Redistribution and use in source and binary forms, with or without modification, are -permitted provided that the following conditions are met: +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this +paragraph and the following two paragraphs appear in all copies. - 1. Redistributions of source code must retain the above copyright notice, this list of - conditions and the following disclaimer. +IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING +LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS +DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. - 2. Redistributions in binary form must reproduce the above copyright notice, this list - of conditions and the following disclaimer in the documentation and/or other materials - provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY TOMAS VONDRA ''AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TOMAS VONDRA OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -The views and conclusions contained in the software and documentation are those of the -authors and should not be interpreted as representing official policies, either expressed -or implied, of Tomas Vondra. \ No newline at end of file +POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO +PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/Makefile b/Makefile index 2f9574d..15d3187 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,23 @@ +# contrib/shared_ispell/Makefile + MODULE_big = shared_ispell -OBJS = src/shared_ispell.o src/spell.o +OBJS = src/shared_ispell.o EXTENSION = shared_ispell -DATA = sql/shared_ispell--1.0.0.sql -MODULES = shared_ispell +DATA = shared_ispell--1.1.0.sql + +REGRESS = security shared_ispell -CFLAGS=`pg_config --includedir-server` +EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/postgresql.conf +ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +else +subdir = contrib/shared_ispell +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif -all: shared_ispell.so - -shared_ispell.so: $(OBJS) - -%.o : src/%.c diff --git a/README.md b/README.md index e24f81e..9f9b6d8 100644 --- a/README.md +++ b/README.md @@ -13,30 +13,26 @@ If you need just snowball-type dictionaries, this extension is not really interesting for you. But if you really need an ispell dictionary, this may save you a lot of resources. -Warning + +Install ------- -The extension does not yet handle affixes that require full regular -expressions (regex_t, implemented in regex.h). This is indicated by -an error when initializing the dictionary. -Simple affixes and affixes that can be handled by fast regex subset -(as implemented in regis.h) are handled just fine. +Before build and install `shared_ispell` you should ensure following: +* PostgreSQL version is 9.6 or later. -Install -------- -Installing the extension is quite simple, especially if you're on 9.1. -In that case all you need to do is this: +Installing the extension is quite simple. In that case all you need to do is this: - $ make install + $ git clone git@github.com:postgrespro/shared_ispell.git + $ cd shared_ispell + $ make USE_PGXS=1 + $ make USE_PGXS=1 install and then (after connecting to the database) db=# CREATE EXTENSION shared_ispell; -If you're on pre-9.1 version, you'll have to do the second part manually -by running the SQL script (shared_ispell--x.y.sql) in the database. If -needed, replace MODULE_PATHNAME by $libdir. +> **Important:** Don't forget to set the `PG_CONFIG` variable in case you want to test `shared_ispell` on a custom build of PostgreSQL. Read more [here](https://wiki.postgresql.org/wiki/Building_and_Installing_PostgreSQL_Extension_Modules). Config @@ -49,9 +45,6 @@ the config file (or update the current values) # libraries to load shared_preload_libraries = 'shared_ispell' - # known GUC prefixes - custom_variable_classes = 'shared_ispell' - # config of the shared memory shared_ispell.max_size = 32MB @@ -144,4 +137,21 @@ use this prepared data). db=# SELECT shared_ispell_reset(); -That's all for now ... \ No newline at end of file +That's all for now ... + +Changes from original version +----------------------------- +The original version of this module located in the Tomas Vondra's +[GitHub](https://github.com/tvondra/shared_ispell). That version does not handle +affixes that require full regular expressions (regex_t, implemented in regex.h). + +This version of the module can handle that affixes with full regular +exressions. To handle it the module loads and stores affix files in each +sessions. The affix list is tiny and takes a little time and memory to parse. +Actually this is Tomas +[idea](http://www.postgresql.org/message-id/56A5F3D5.9030702@2ndquadrant.com), +but there is not related code in the GitHub. + +Author +------ +Tomas Vondra [GitHub](https://github.com/tvondra) \ No newline at end of file diff --git a/expected/security.out b/expected/security.out new file mode 100644 index 0000000..6f73aa1 --- /dev/null +++ b/expected/security.out @@ -0,0 +1,40 @@ +create type si_dicts_result as (dict_name VARCHAR, affix_name VARCHAR, words INT, affixes INT, bytes INT); +create function shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) +returns SETOF record as $$ +declare + qString varchar(4000); + rec si_dicts_result; +begin + qString := 'select * from shared_ispell_dicts()'; + for rec in execute qString loop + return NEXT; + end loop; + return; +end +$$ language plpgsql; +create extension shared_ispell; +ERROR: function "shared_ispell_dicts" already exists with same argument types +drop extension if exists shared_ispell; +NOTICE: extension "shared_ispell" does not exist, skipping +drop type si_dicts_result; +drop function shared_ispell_dicts(); +create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); +create function shared_ispell_stoplists(OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) +returns SETOF record as $$ +declare + rec si_stoplists_result; + qString varchar(4000); +begin + qString := 'select * from shared_ispell_stoplists()'; + for rec in execute qString loop + return NEXT; + end loop; + return; +end +$$ language plpgsql; +create extension shared_ispell; +ERROR: function "shared_ispell_stoplists" already exists with same argument types +drop extension if exists shared_ispell; +NOTICE: extension "shared_ispell" does not exist, skipping +drop type si_stoplists_result; +drop function shared_ispell_stoplists(); diff --git a/expected/shared_ispell.out b/expected/shared_ispell.out new file mode 100644 index 0000000..9998cb9 --- /dev/null +++ b/expected/shared_ispell.out @@ -0,0 +1,219 @@ +CREATE EXTENSION shared_ispell; +-- Test ISpell dictionary with ispell affix file +CREATE TEXT SEARCH DICTIONARY shared_ispell ( + Template=shared_ispell, + DictFile=ispell_sample, + AffFile=ispell_sample, + Stopwords=english +); +SELECT ts_lexize('shared_ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('shared_ispell', 'bookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'booking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'foot'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('shared_ispell', 'foots'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('shared_ispell', 'rebookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'rebooking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'unbookings'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'unbooking'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'unbook'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_ispell', 'footklubber'); + ts_lexize +---------------- + {foot,klubber} +(1 row) + +SELECT ts_lexize('shared_ispell', 'footballklubber'); + ts_lexize +------------------------------------------------------ + {footballklubber,foot,ball,klubber,football,klubber} +(1 row) + +SELECT ts_lexize('shared_ispell', 'ballyklubber'); + ts_lexize +---------------- + {ball,klubber} +(1 row) + +SELECT ts_lexize('shared_ispell', 'footballyklubber'); + ts_lexize +--------------------- + {foot,ball,klubber} +(1 row) + +-- Test ISpell dictionary with hunspell affix file +CREATE TEXT SEARCH DICTIONARY shared_hunspell ( + Template=shared_ispell, + DictFile=ispell_sample, + AffFile=hunspell_sample +); +SELECT ts_lexize('shared_hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'bookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'booking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'foot'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'foots'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'rebookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'rebooking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'unbookings'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'unbooking'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'unbook'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'footklubber'); + ts_lexize +---------------- + {foot,klubber} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'footballklubber'); + ts_lexize +------------------------------------------------------ + {footballklubber,foot,ball,klubber,football,klubber} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'ballyklubber'); + ts_lexize +---------------- + {ball,klubber} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'footballyklubber'); + ts_lexize +--------------------- + {foot,ball,klubber} +(1 row) + +SELECT dict_name, affix_name, words, affixes FROM shared_ispell_dicts(); + dict_name | affix_name | words | affixes +---------------+-----------------+-------+--------- + ispell_sample | hunspell_sample | 8 | 7 + ispell_sample | ispell_sample | 8 | 7 +(2 rows) + +SELECT stop_name, words FROM shared_ispell_stoplists(); + stop_name | words +-----------+------- + english | 127 +(1 row) + +SELECT shared_ispell_reset(); + shared_ispell_reset +--------------------- + +(1 row) + +SELECT ts_lexize('shared_ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('shared_hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..0f07821 --- /dev/null +++ b/meson.build @@ -0,0 +1,39 @@ +# Copyright (c) 2025, Postgres Professional + +# Does not support the PGXS infrastructure at this time. Please, compile as part +# of the contrib source tree. + +shared_ispell_sources = files( + 'src' / 'shared_ispell.c' +) + +if host_system == 'windows' + shared_ispell_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'shared_ispell', + '--FILEDESC', 'shared_ispell - provides a shared ispell dictionary, i.e. a dictionary that\'s stored in shared segment.',]) +endif + +shared_ispell = shared_module('shared_ispell', + shared_ispell_sources, + kwargs: contrib_mod_args, +) +contrib_targets += shared_ispell + +install_data( + 'shared_ispell.control', + 'shared_ispell--1.1.0.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'shared_ispell', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'security', + 'shared_ispell', + ], + 'regress_args': ['--temp-config', files('postgresql.conf')], + }, +} diff --git a/postgresql.conf b/postgresql.conf new file mode 100644 index 0000000..e3dcb7b --- /dev/null +++ b/postgresql.conf @@ -0,0 +1,2 @@ +shared_preload_libraries = 'shared_ispell' +shared_ispell.max_size = 1MB diff --git a/sql/shared_ispell--1.0.0.sql b/shared_ispell--1.1.0.sql similarity index 67% rename from sql/shared_ispell--1.0.0.sql rename to shared_ispell--1.1.0.sql index 07c3ac3..7f638ab 100644 --- a/sql/shared_ispell--1.0.0.sql +++ b/shared_ispell--1.1.0.sql @@ -1,34 +1,34 @@ -CREATE OR REPLACE FUNCTION shared_ispell_init(internal) +CREATE FUNCTION shared_ispell_init(internal) RETURNS internal AS 'MODULE_PATHNAME', 'dispell_init' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_lexize(internal,internal,internal,internal) +CREATE FUNCTION shared_ispell_lexize(internal,internal,internal,internal) RETURNS internal AS 'MODULE_PATHNAME', 'dispell_lexize' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_reset() +CREATE FUNCTION shared_ispell_reset() RETURNS void AS 'MODULE_PATHNAME', 'dispell_reset' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_mem_used() +CREATE FUNCTION shared_ispell_mem_used() RETURNS integer AS 'MODULE_PATHNAME', 'dispell_mem_used' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_mem_available() +CREATE FUNCTION shared_ispell_mem_available() RETURNS integer AS 'MODULE_PATHNAME', 'dispell_mem_available' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) +CREATE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) RETURNS SETOF record AS 'MODULE_PATHNAME', 'dispell_list_dicts' LANGUAGE C IMMUTABLE; -CREATE OR REPLACE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) +CREATE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) RETURNS SETOF record AS 'MODULE_PATHNAME', 'dispell_list_stoplists' LANGUAGE C IMMUTABLE; diff --git a/shared_ispell.control b/shared_ispell.control index 5380e28..8fab766 100644 --- a/shared_ispell.control +++ b/shared_ispell.control @@ -1,6 +1,6 @@ # shared ispell dictionary comment = 'Provides shared ispell dictionaries.' -default_version = '1.0.0' +default_version = '1.1.0' relocatable = true module_pathname = '$libdir/shared_ispell' diff --git a/sql/security.sql b/sql/security.sql new file mode 100644 index 0000000..33a09e1 --- /dev/null +++ b/sql/security.sql @@ -0,0 +1,43 @@ +create type si_dicts_result as (dict_name VARCHAR, affix_name VARCHAR, words INT, affixes INT, bytes INT); + +create function shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) +returns SETOF record as $$ +declare + qString varchar(4000); + rec si_dicts_result; +begin + qString := 'select * from shared_ispell_dicts()'; + for rec in execute qString loop + return NEXT; + end loop; + return; +end +$$ language plpgsql; + +create extension shared_ispell; + +drop extension if exists shared_ispell; +drop type si_dicts_result; +drop function shared_ispell_dicts(); + +create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); + +create function shared_ispell_stoplists(OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) +returns SETOF record as $$ +declare + rec si_stoplists_result; + qString varchar(4000); +begin + qString := 'select * from shared_ispell_stoplists()'; + for rec in execute qString loop + return NEXT; + end loop; + return; +end +$$ language plpgsql; + +create extension shared_ispell; + +drop extension if exists shared_ispell; +drop type si_stoplists_result; +drop function shared_ispell_stoplists(); diff --git a/sql/shared_ispell.sql b/sql/shared_ispell.sql new file mode 100644 index 0000000..0a4af97 --- /dev/null +++ b/sql/shared_ispell.sql @@ -0,0 +1,57 @@ +CREATE EXTENSION shared_ispell; + +-- Test ISpell dictionary with ispell affix file +CREATE TEXT SEARCH DICTIONARY shared_ispell ( + Template=shared_ispell, + DictFile=ispell_sample, + AffFile=ispell_sample, + Stopwords=english +); + +SELECT ts_lexize('shared_ispell', 'skies'); +SELECT ts_lexize('shared_ispell', 'bookings'); +SELECT ts_lexize('shared_ispell', 'booking'); +SELECT ts_lexize('shared_ispell', 'foot'); +SELECT ts_lexize('shared_ispell', 'foots'); +SELECT ts_lexize('shared_ispell', 'rebookings'); +SELECT ts_lexize('shared_ispell', 'rebooking'); +SELECT ts_lexize('shared_ispell', 'unbookings'); +SELECT ts_lexize('shared_ispell', 'unbooking'); +SELECT ts_lexize('shared_ispell', 'unbook'); + +SELECT ts_lexize('shared_ispell', 'footklubber'); +SELECT ts_lexize('shared_ispell', 'footballklubber'); +SELECT ts_lexize('shared_ispell', 'ballyklubber'); +SELECT ts_lexize('shared_ispell', 'footballyklubber'); + +-- Test ISpell dictionary with hunspell affix file +CREATE TEXT SEARCH DICTIONARY shared_hunspell ( + Template=shared_ispell, + DictFile=ispell_sample, + AffFile=hunspell_sample +); + +SELECT ts_lexize('shared_hunspell', 'skies'); +SELECT ts_lexize('shared_hunspell', 'bookings'); +SELECT ts_lexize('shared_hunspell', 'booking'); +SELECT ts_lexize('shared_hunspell', 'foot'); +SELECT ts_lexize('shared_hunspell', 'foots'); +SELECT ts_lexize('shared_hunspell', 'rebookings'); +SELECT ts_lexize('shared_hunspell', 'rebooking'); +SELECT ts_lexize('shared_hunspell', 'unbookings'); +SELECT ts_lexize('shared_hunspell', 'unbooking'); +SELECT ts_lexize('shared_hunspell', 'unbook'); + +SELECT ts_lexize('shared_hunspell', 'footklubber'); +SELECT ts_lexize('shared_hunspell', 'footballklubber'); +SELECT ts_lexize('shared_hunspell', 'ballyklubber'); +SELECT ts_lexize('shared_hunspell', 'footballyklubber'); + +SELECT dict_name, affix_name, words, affixes FROM shared_ispell_dicts(); +SELECT stop_name, words FROM shared_ispell_stoplists(); + +SELECT shared_ispell_reset(); + +SELECT ts_lexize('shared_ispell', 'skies'); +SELECT ts_lexize('shared_hunspell', 'skies'); +SELECT ts_lexize('shared_hunspell', 'skies'); diff --git a/src/shared_ispell.c b/src/shared_ispell.c index 2276ab1..37243e2 100644 --- a/src/shared_ispell.c +++ b/src/shared_ispell.c @@ -5,149 +5,102 @@ * dictionaries are copied in memory multiple times. The connections * also need to initialize the dictionary on their own, which may take * up to a few seconds. - * + * * This means the connections are either long-lived (and each keeps * a private copy of the dictionary, wasting memory), or short-lived * (resulting in high latencies when the dictionary is initialized). - * + * * This extension is storing a single copy of the dictionary in a shared * memory so that all connections may use it, saving memory and CPU time. - * - * + * + * * The flow within the shared ispell may be slightly confusing, so this * is a brief summary of the main flows within the code. - * + * * ===== shared segment init (postmaster startup) ===== - * + * * _PG_init - * -> ispell_shmem_startup (registered as a hook) - * + * -> ispell_shmem_startup (registered as a hook) + * * ===== dictionary init (backend) ===== - * + * * dispell_init - * -> init_shared_dict - * -> get_shared_dict - * -> NIStartBuild - * -> NIImportDictionary - * -> NIImportAffixes - * -> NISortDictionary - * -> NISortAffixes - * -> NIFinishBuild - * -> sizeIspellDict - * -> copyIspellDict - * -> copyAffixNode (prefixes) - * -> copyAffixNode (suffixes) - * -> copySPNode - * -> copy affix data - * -> copy compound affixes - * -> get_shared_stop_list - * -> readstoplist - * -> copyStopList - * + * -> init_shared_dict + * -> get_shared_dict + * -> NIStartBuild + * -> NIImportDictionary + * -> NIImportAffixes + * -> NISortDictionary + * -> NISortAffixes + * -> NIFinishBuild + * -> sizeIspellDict + * -> copyIspellDict + * -> copySPNode + * -> get_shared_stop_list + * -> readstoplist + * -> copyStopList + * * ===== dictionary reinit after reset (backend) ===== - * + * * dispell_lexize - * -> timestamp of lookup < last reset - * -> init_shared_dict - * (see dispell_init above) - * -> SharedNINormalizeWord + * -> timestamp of lookup < last reset + * -> init_shared_dict + * (see dispell_init above) + * -> SharedNINormalizeWord */ -#include -#include -#include -#include - -#include -#include - #include "postgres.h" #include "miscadmin.h" #include "storage/ipc.h" -#include "storage/fd.h" +#include "storage/shmem.h" -#include "commands/explain.h" -#include "executor/executor.h" -#include "executor/instrument.h" -#include "utils/guc.h" #include "commands/defrem.h" #include "tsearch/ts_locale.h" -#include "storage/lwlock.h" -#include "utils/timestamp.h" #include "access/htup_details.h" - #include "funcapi.h" +#include "utils/builtins.h" +#include "utils/guc.h" -#include "libpq/md5.h" - -#include "spell.h" +#include "shared_ispell.h" #include "tsearch/dicts/spell.h" -#ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; -#endif -#if (PG_VERSION_NUM < 90100) -#define NIStartBuild(dict) -#define NIFinishBuild(dict) -#endif +void _PG_init(void); -/* private functions */ -static void ispell_shmem_startup(void); - -/* This segment is initialized in the first process that accesses it (see - * ispell_shmem_startup function). - */ -#define SEGMENT_NAME "shared_ispell" - -static int max_ispell_mem_size = (30*1024*1024); /* 50MB by default */ +/* Memory for dictionaries in kbytes */ +static int max_ispell_mem_size_kb; -/* Saved hook values in case of unload */ +/* Saved hook value for proper chaining */ static shmem_startup_hook_type prev_shmem_startup_hook = NULL; -void _PG_init(void); -void _PG_fini(void); - -/* used to allocate memory in the shared segment */ -typedef struct SegmentInfo { - - LWLockId lock; - char *firstfree; /* first free address (always maxaligned) */ - size_t available; /* free space remaining at firstfree */ - Timestamp lastReset; /* last reset of the dictionary */ - - /* the shared segment (info and data) */ - SharedIspellDict * dict; - SharedStopList * stop; - -} SegmentInfo; - -#define MAXLEN 255 - -/* used to keep track of dictionary in each backend */ -typedef struct DictInfo { - - Timestamp lookup; - - char dictFile[MAXLEN]; - char affixFile[MAXLEN]; - char stopFile[MAXLEN]; +/* These are used to allocate data within shared segment */ +static SegmentInfo *segment_info = NULL; - SharedIspellDict * dict; - SharedStopList * stop; +static void ispell_shmem_startup(void); -} DictInfo; +static char *shalloc(int bytes); +static char *shstrcpy(char *str); -/* These are used to allocate data within shared segment */ -static SegmentInfo * segment_info = NULL; +static SharedIspellDict *copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int bytes, int words); +static SharedStopList *copyStopList(StopList *list, char *stopFile, int bytes); -static char * shalloc(int bytes); +static int sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile); +static int sizeStopList(StopList *list, char *stopFile); -static SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int bytes, int words); -static SharedStopList * copyStopList(StopList * list, char * stopFile, int bytes); +/* + * Get memory for dictionaries in bytes + */ +static Size +max_ispell_mem_size() +{ + return (Size) max_ispell_mem_size_kb * 1024L; +} -static int sizeIspellDict(IspellDict * dict, char * dictFile, char * affixFile); -static int sizeStopList(StopList * list, char * stopFile); +#if (PG_VERSION_NUM >= 150000) +static shmem_request_hook_type prev_shmem_request_hook = NULL; +static void shared_ispell_shmem_request(void); +#endif /* * Module load callback @@ -155,267 +108,327 @@ static int sizeStopList(StopList * list, char * stopFile); void _PG_init(void) { - - /* */ - if (! process_shared_preload_libraries_in_progress) { - elog(ERROR, "shared_ispell has to be loaded using shared_preload_libraries"); - return; - } - - /* Define custom GUC variables. */ - - /* How much memory should we preallocate for the dictionaries (limits how many - * dictionaries you can load into the shared segment). */ - DefineCustomIntVariable("shared_ispell.max_size", - "amount of memory to pre-allocate for ispell dictionaries", - NULL, - &max_ispell_mem_size, - (32*1024*1024), - (1024*1024), INT_MAX, - PGC_POSTMASTER, - GUC_UNIT_BLOCKS, -#if (PG_VERSION_NUM >= 90100) - NULL, + if (!process_shared_preload_libraries_in_progress) { + elog(ERROR, "shared_ispell has to be loaded using shared_preload_libraries"); + return; + } + + /* Define custom GUC variables. */ + + /* How much memory should we preallocate for the dictionaries (limits how many + * dictionaries you can load into the shared segment). */ + DefineCustomIntVariable("shared_ispell.max_size", + "amount of memory to pre-allocate for ispell dictionaries", + NULL, + &max_ispell_mem_size_kb, + 50 * 1024, /* default 50MB */ + 1024, /* min 1MB */ + INT_MAX, + PGC_POSTMASTER, + GUC_UNIT_KB, + NULL, + NULL, + NULL); + + EmitWarningsOnPlaceholders("shared_ispell"); + +#if PG_VERSION_NUM >= 150000 + prev_shmem_request_hook = shmem_request_hook; + shmem_request_hook = shared_ispell_shmem_request; +#else + RequestAddinShmemSpace(max_ispell_mem_size()); + +#if PG_VERSION_NUM >= 90600 + RequestNamedLWLockTranche("shared_ispell", 1); +#else + RequestAddinLWLocks(1); +#endif #endif - NULL, - NULL); - EmitWarningsOnPlaceholders("shared_ispell"); + /* Install hooks. */ + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = ispell_shmem_startup; +} - /* - * Request additional shared resources. (These are no-ops if we're not in - * the postmaster process.) We'll allocate or attach to the shared - * resources in ispell_shmem_startup(). - */ - RequestAddinShmemSpace(max_ispell_mem_size); - RequestAddinLWLocks(1); +#if PG_VERSION_NUM >= 150000 +static void +shared_ispell_shmem_request(void) +{ + if (prev_shmem_request_hook) + prev_shmem_request_hook(); - /* Install hooks. */ - prev_shmem_startup_hook = shmem_startup_hook; - shmem_startup_hook = ispell_shmem_startup; + RequestAddinShmemSpace(max_ispell_mem_size()); + RequestNamedLWLockTranche("shared_ispell", 1); } - +#endif /* - * Module unload callback - */ -void -_PG_fini(void) -{ - /* Uninstall hooks. */ - shmem_startup_hook = prev_shmem_startup_hook; -} - - -/* * Probably the most important part of the startup - initializes the * memory in shared memory segment (creates and initializes the * SegmentInfo data structure). - * - * This is called from a shmem_startup_hook (see _PG_init). */ -static -void ispell_shmem_startup() { - - bool found = FALSE; - char * segment; - - if (prev_shmem_startup_hook) - prev_shmem_startup_hook(); - - elog(DEBUG1, "initializing shared ispell segment (size: %d B)", - max_ispell_mem_size); - - /* - * Create or attach to the shared memory state, including hash table - */ - LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - - segment = ShmemInitStruct(SEGMENT_NAME, - max_ispell_mem_size, - &found); - - /* Was the shared memory segment already initialized? */ - if (! found) { - - memset(segment, 0, max_ispell_mem_size); + * + * This is called from a shmem_startup_hook (see _PG_init). + */ +static void +ispell_shmem_startup() +{ + bool found = false; + char *segment; - segment_info = (SegmentInfo*)segment; + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); - segment_info->lock = LWLockAssign(); - segment_info->firstfree = segment + MAXALIGN(sizeof(SegmentInfo)); - segment_info->available = max_ispell_mem_size - (int)(segment_info->firstfree - segment); + /* + * Create or attach to the shared memory state, including hash table + */ + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - segment_info->lastReset = GetCurrentTimestamp(); + segment = ShmemInitStruct(SEGMENT_NAME, max_ispell_mem_size(), &found); + segment_info = (SegmentInfo *) segment; - elog(DEBUG1, "shared memory segment (shared ispell) successfully created"); + /* Was the shared memory segment already initialized? */ + if (!found) + { + memset(segment, 0, max_ispell_mem_size()); - } +#if PG_VERSION_NUM >= 90600 + segment_info->lock = &(GetNamedLWLockTranche("shared_ispell"))->lock; +#else + segment_info->lock = LWLockAssign(); +#endif + segment_info->firstfree = segment + MAXALIGN(sizeof(SegmentInfo)); + segment_info->available = max_ispell_mem_size() - + (int) (segment_info->firstfree - segment); - LWLockRelease(AddinShmemInitLock); + INSTR_TIME_SET_CURRENT(segment_info->lastReset); + } + LWLockRelease(AddinShmemInitLock); } /* * This is called from backends that are looking up for a shared dictionary * definition using a filename with dictionary / affixes. - * + * * This is called through dispell_init() which is responsible for proper locking * of the shared memory (using SegmentInfo->lock). */ -static -SharedIspellDict * get_shared_dict(char * words, char * affixes) { - - SharedIspellDict * dict = segment_info->dict; +static SharedIspellDict * +get_shared_dict(char *words, char *affixes) +{ + SharedIspellDict *dict = segment_info->shdict; - while (dict != NULL) { - if ((strcmp(dict->dictFile, words) == 0) && - (strcmp(dict->affixFile, affixes) == 0)) { - return dict; - } - dict = dict->next; - } + while (dict != NULL) + { + if ((strcmp(dict->dictFile, words) == 0) && + (strcmp(dict->affixFile, affixes) == 0)) + return dict; + dict = dict->next; + } - return NULL; + return NULL; } /* * This is called from backends that are looking up for a list of stop words * using a filename of the list. - * + * * This is called through dispell_init() which is responsible for proper locking * of the shared memory (using SegmentInfo->lock). */ -static -SharedStopList * get_shared_stop_list(char * stop) { - - SharedStopList * list = segment_info->stop; +static SharedStopList * +get_shared_stop_list(char *stop) +{ + SharedStopList *list = segment_info->shstop; - while (list != NULL) { - if (strcmp(list->stopFile, stop) == 0) { - return list; - } - list = list->next; - } + while (list != NULL) + { + if (strcmp(list->stopFile, stop) == 0) + return list; + list = list->next; + } - return NULL; + return NULL; } /* - * Initializes the dictionary for use in backends - checks whether such dictionary - * and list of stopwords is already used, and if not then parses it and loads it into - * the shared segment. - * - * This is called through dispell_init() which is responsible for proper locking - * of the shared memory (using SegmentInfo->lock). + * Cleares IspellDict fields which are used for store affix list. */ -static -void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * stopFile) { - - int size; - - SharedIspellDict * shdict = NULL; - SharedStopList * shstop = NULL; - - IspellDict * dict; - StopList stoplist; - - /* DICTIONARY + AFFIXES */ - - /* TODO This should probably check that the filenames are not NULL, and maybe that - * it exists. Or maybe that's handled by the NIImport* functions. */ - - /* lookup if the dictionary (words and affixes) is already loaded in the shared segment */ - shdict = get_shared_dict(dictFile, affFile); - - /* load the dictionary / affixes if not yet defined */ - if (shdict == NULL) { - - dict = (IspellDict *)palloc0(sizeof(IspellDict)); - - NIStartBuild(dict); - - NIImportDictionary(dict, - get_tsearch_config_filename(dictFile, "dict")); - - NIImportAffixes(dict, - get_tsearch_config_filename(affFile, "affix")); - - NISortDictionary(dict); - NISortAffixes(dict); - - NIFinishBuild(dict); - - /* check available space in shared segment */ - size = sizeIspellDict(dict, dictFile, affFile); - if (size > segment_info->available) - elog(ERROR, "shared dictionary %s.dict / %s.affix needs %d B, only %ld B available", - dictFile, affFile, size, segment_info->available); - - /* fine, there's enough space - copy the dictionary */ - shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell); - - elog(INFO, "shared dictionary %s.dict / %s.affix loaded, used %d B, %ld B remaining", - dictFile, affFile, size, segment_info->available); - - /* add the new dictionary to the linked list (of SharedIspellDict structures) */ - shdict->next = segment_info->dict; - segment_info->dict = shdict; - - } - - /* STOP WORDS */ - - /* lookup if the stop words are already loaded in the shared segment, but only if there - * actually is a list */ - if (stopFile != NULL) { - - shstop = get_shared_stop_list(stopFile); - - /* load the stopwords if not yet defined */ - if (shstop == NULL) { - - readstoplist(stopFile, &stoplist, lowerstr); - - size = sizeStopList(&stoplist, stopFile); - if (size > segment_info->available) { - elog(ERROR, "shared stoplist %s.stop needs %d B, only %ld B available", - stopFile, size, segment_info->available); - } - - /* fine, there's enough space - copy the stoplist */ - shstop = copyStopList(&stoplist, stopFile, size); - - elog(INFO, "shared stoplist %s.stop loaded, used %d B, %ld B remaining", - affFile, size, segment_info->available); - - /* add the new stopword list to the linked list (of SharedStopList structures) */ - shstop->next = segment_info->stop; - segment_info->stop = shstop; - - } - } +static void +clean_dict_affix(IspellDict *dict) +{ + dict->maffixes = 0; + dict->naffixes = 0; + dict->Affix = NULL; - /* Now, fill the DictInfo structure for the backend (references to dictionary, - * stopwords and the filenames). */ + dict->Suffix = NULL; + dict->Prefix = NULL; - info->dict = shdict; - info->stop = shstop; - info->lookup = GetCurrentTimestamp(); + dict->AffixData = NULL; + dict->lenAffixData = 0; + dict->nAffixData = 0; - memcpy(info->dictFile, dictFile, strlen(dictFile) + 1); - memcpy(info->affixFile, dictFile, strlen(affFile)+ 1); - memcpy(info->stopFile, dictFile, strlen(stopFile) + 1); + dict->CompoundAffix = NULL; + dict->CompoundAffixFlags = NULL; + dict->nCompoundAffixFlag = 0; + dict->mCompoundAffixFlag = 0; + dict->avail = 0; } -Datum dispell_init(PG_FUNCTION_ARGS); -Datum dispell_lexize(PG_FUNCTION_ARGS); -Datum dispell_reset(PG_FUNCTION_ARGS); -Datum dispell_mem_available(PG_FUNCTION_ARGS); -Datum dispell_mem_used(PG_FUNCTION_ARGS); -Datum dispell_list_dicts(PG_FUNCTION_ARGS); -Datum dispell_list_stoplists(PG_FUNCTION_ARGS); +/* + * Initializes the dictionary for use in backends - checks whether such dictionary + * and list of stopwords is already used, and if not then parses it and loads it into + * the shared segment. + * + * Function lookup if the dictionary (word list) is already loaded in the + * shared segment. If not then loads the dictionary (word list). + * Affix list is loaded to a current backend process. + * + * This is called through dispell_init() which is responsible for proper locking + * of the shared memory (using SegmentInfo->lock). + */ +static void +init_shared_dict(DictInfo *info, MemoryContext infoCntx, + char *dictFile, char *affFile, char *stopFile) +{ + int size; + SharedIspellDict *shdict = NULL; + SharedStopList *shstop = NULL; + MemoryContext oldctx; + + oldctx = MemoryContextSwitchTo(infoCntx); + + /* DICTIONARY + AFFIXES */ + + /* TODO This should probably check that the filenames are not NULL, and maybe that + * it exists. Or maybe that's handled by the NIImport* functions. */ + + /* lookup if the dictionary (words and affixes) is already loaded in the shared segment */ + shdict = get_shared_dict(dictFile, affFile); + + /* clear dict affix sources */ + clean_dict_affix(&(info->dict)); + + /* load affix list */ + NIStartBuild(&(info->dict)); + NIImportAffixes(&(info->dict), get_tsearch_config_filename(affFile, "affix")); + + /* load the dictionary (word list) if not yet defined */ + if (shdict == NULL) + { + IspellDict *dict; + + dict = (IspellDict *) palloc0(sizeof(IspellDict)); + + NIStartBuild(dict); + NIImportDictionary(dict, get_tsearch_config_filename(dictFile, "dict")); + + dict->flagMode = info->dict.flagMode; + dict->usecompound = info->dict.usecompound; + + dict->nCompoundAffixFlag = dict->mCompoundAffixFlag = + info->dict.nCompoundAffixFlag; + dict->CompoundAffixFlags = (CompoundAffixFlag *) palloc0( + dict->nCompoundAffixFlag * sizeof(CompoundAffixFlag)); + memcpy(dict->CompoundAffixFlags, info->dict.CompoundAffixFlags, + dict->nCompoundAffixFlag * sizeof(CompoundAffixFlag)); + + /* + * If affix->useFlagAliases == true then AffixData is generated + * in NIImportAffixes(). Therefore we need to copy it. + */ + if (info->dict.useFlagAliases) + { + int i; + + dict->useFlagAliases = true; + dict->lenAffixData = info->dict.lenAffixData; + dict->nAffixData = info->dict.nAffixData; + dict->AffixData = (char **) palloc0(dict->nAffixData * sizeof(char *)); + + for (i = 0; i < dict->nAffixData; i++) + { + dict->AffixData[i] = palloc0(strlen(info->dict.AffixData[i]) + 1); + strcpy(dict->AffixData[i], info->dict.AffixData[i]); + } + } + + NISortDictionary(dict); + NIFinishBuild(dict); + + /* check available space in shared segment */ + size = sizeIspellDict(dict, dictFile, affFile); + if (size > segment_info->available) + elog(ERROR, "shared dictionary %s.dict / %s.affix needs %d B, only %zd B available", + dictFile, affFile, size, segment_info->available); + + /* fine, there's enough space - copy the dictionary */ + shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell); + shdict->dict.naffixes = info->dict.naffixes; + + /* add the new dictionary to the linked list (of SharedIspellDict structures) */ + shdict->next = segment_info->shdict; + segment_info->shdict = shdict; + } + /* continue load affix list to a current backend process */ + + /* NISortAffixes is used AffixData. Therefore we need to copy pointer */ + info->dict.lenAffixData = shdict->dict.lenAffixData; + info->dict.nAffixData = shdict->dict.nAffixData; + info->dict.AffixData = shdict->dict.AffixData; + info->dict.Dictionary = shdict->dict.Dictionary; + NISortAffixes(&(info->dict)); + NIFinishBuild(&(info->dict)); + + /* STOP WORDS */ + + /* lookup if the stop words are already loaded in the shared segment, but only if there + * actually is a list */ + if (stopFile && *stopFile) + { + shstop = get_shared_stop_list(stopFile); + + /* load the stopwords if not yet defined */ + if (shstop == NULL) + { + StopList stoplist; + + readstoplist(stopFile, &stoplist, lowerstr); + + size = sizeStopList(&stoplist, stopFile); + if (size > segment_info->available) + elog(ERROR, "shared stoplist %s.stop needs %d B, only %zd B available", + stopFile, size, segment_info->available); + + /* fine, there's enough space - copy the stoplist */ + shstop = copyStopList(&stoplist, stopFile, size); + + /* add the new stopword list to the linked list (of SharedStopList structures) */ + shstop->next = segment_info->shstop; + segment_info->shstop = shstop; + } + } + + /* Now, fill the DictInfo structure for the backend (references to dictionary, + * stopwords and the filenames). */ + + info->shdict = shdict; + info->shstop = shstop; + INSTR_TIME_SET_CURRENT(info->lookup); + + memcpy(info->dictFile, dictFile, strlen(dictFile) + 1); + memcpy(info->affixFile, affFile, strlen(affFile) + 1); + if (stopFile != NULL) + memcpy(info->stopFile, stopFile, strlen(stopFile) + 1); + else + memset(info->stopFile, 0, sizeof(info->stopFile)); + + MemoryContextSwitchTo(oldctx); + /* save current context as long-lived */ + info->infoCntx = infoCntx; +} PG_FUNCTION_INFO_V1(dispell_init); PG_FUNCTION_INFO_V1(dispell_lexize); @@ -427,26 +440,27 @@ PG_FUNCTION_INFO_V1(dispell_list_stoplists); /* * Resets the shared dictionary memory, i.e. removes all the dictionaries. This - * is the only way to remove dictionaries from the memory - either when when + * is the only way to remove dictionaries from the memory - either when * a dictionary is no longer needed or needs to be reloaded (e.g. to update * list of words / affixes). */ Datum dispell_reset(PG_FUNCTION_ARGS) { - LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); + LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); - segment_info->dict = NULL; - segment_info->stop = NULL; - segment_info->lastReset = GetCurrentTimestamp(); - segment_info->firstfree = ((char*)segment_info) + MAXALIGN(sizeof(SegmentInfo)); - segment_info->available = max_ispell_mem_size - (int)(segment_info->firstfree - (char*)segment_info); + segment_info->shdict = NULL; + segment_info->shstop = NULL; + INSTR_TIME_SET_CURRENT(segment_info->lastReset); + segment_info->firstfree = ((char*) segment_info) + MAXALIGN(sizeof(SegmentInfo)); + segment_info->available = max_ispell_mem_size() - + (int) (segment_info->firstfree - (char*) segment_info); - memset(segment_info->firstfree, 0, segment_info->available); + memset(segment_info->firstfree, 0, segment_info->available); - LWLockRelease(segment_info->lock); + LWLockRelease(segment_info->lock); - PG_RETURN_VOID(); + PG_RETURN_VOID(); } /* @@ -455,224 +469,253 @@ dispell_reset(PG_FUNCTION_ARGS) Datum dispell_mem_available(PG_FUNCTION_ARGS) { - int result = 0; - LWLockAcquire(segment_info->lock, LW_SHARED); + int result = 0; + LWLockAcquire(segment_info->lock, LW_SHARED); - result = segment_info->available; + result = segment_info->available; - LWLockRelease(segment_info->lock); + LWLockRelease(segment_info->lock); - PG_RETURN_INT32(result); + PG_RETURN_INT32(result); } /* - * Returns amount of 'occupied space' in the shared segment (used by current dictionaries). + * Returns amount of 'occupied space' in the shared segment (used by current + * dictionaries). */ Datum dispell_mem_used(PG_FUNCTION_ARGS) { - int result = 0; - LWLockAcquire(segment_info->lock, LW_SHARED); + int result = 0; - result = max_ispell_mem_size - segment_info->available; + LWLockAcquire(segment_info->lock, LW_SHARED); - LWLockRelease(segment_info->lock); + result = max_ispell_mem_size() - segment_info->available; - PG_RETURN_INT32(result); + LWLockRelease(segment_info->lock); + + PG_RETURN_INT32(result); } /* * This initializes a (shared) dictionary for a backend. The function receives * a list of options specified in the CREATE TEXT SEARCH DICTIONARY with ispell * template (http://www.postgresql.org/docs/9.3/static/sql-createtsdictionary.html). - * + * * There are three allowed options: DictFile, AffFile, StopWords. The values * should match to filenames in `pg_config --sharedir` directory, ending with * .dict, .affix and .stop. - * + * * The StopWords parameter is optional, the two other are required. - * + * * If any of the filenames are incorrect, the call to init_shared_dict will fail. + * + * Do not call it directly - it saves current memory context as long-lived + * context. */ Datum dispell_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); - char *dictFile = NULL, *affFile = NULL, *stopFile = NULL; - bool affloaded = false, - dictloaded = false, - stoploaded = false; - ListCell *l; - - /* this is the result passed to dispell_lexize */ - DictInfo * info = (DictInfo *)palloc0(sizeof(DictInfo)); - - foreach(l, dictoptions) - { - DefElem *defel = (DefElem *) lfirst(l); - - if (pg_strcasecmp(defel->defname, "DictFile") == 0) - { - if (dictloaded) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple DictFile parameters"))); - dictFile = defGetString(defel); - dictloaded = true; - } - else if (pg_strcasecmp(defel->defname, "AffFile") == 0) - { - if (affloaded) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple AffFile parameters"))); - affFile = defGetString(defel); - affloaded = true; - } - else if (pg_strcasecmp(defel->defname, "StopWords") == 0) - { - if (stoploaded) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple StopWords parameters"))); - stopFile = defGetString(defel); - stoploaded = true; - } - else - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognized Ispell parameter: \"%s\"", - defel->defname))); - } - } - - if (!affloaded) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("missing AffFile parameter"))); - } - else if (! dictloaded) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("missing DictFile parameter"))); - } - - /* search if the dictionary is already initialized */ - LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); - - init_shared_dict(info, dictFile, affFile, stopFile); - - LWLockRelease(segment_info->lock); - - PG_RETURN_POINTER(info); + List *dictoptions = (List *) PG_GETARG_POINTER(0); + char *dictFile = NULL, + *affFile = NULL, + *stopFile = NULL; + bool affloaded = false, + dictloaded = false, + stoploaded = false; + ListCell *l; + + /* this is the result passed to dispell_lexize */ + DictInfo *info = (DictInfo *) palloc0(sizeof(DictInfo)); + + foreach(l, dictoptions) + { + DefElem *defel = (DefElem *) lfirst(l); + + if (pg_strcasecmp(defel->defname, "DictFile") == 0) + { + if (dictloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple DictFile parameters"))); + dictFile = defGetString(defel); + dictloaded = true; + } + else if (pg_strcasecmp(defel->defname, "AffFile") == 0) + { + if (affloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple AffFile parameters"))); + affFile = defGetString(defel); + affloaded = true; + } + else if (pg_strcasecmp(defel->defname, "StopWords") == 0) + { + if (stoploaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple StopWords parameters"))); + stopFile = defGetString(defel); + stoploaded = true; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized Ispell parameter: \"%s\"", + defel->defname))); + } + } + + if (!affloaded) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing AffFile parameter"))); + } + else if (!dictloaded) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing DictFile parameter"))); + } + + /* search if the dictionary is already initialized */ + LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); + + /* + * Current context is a long lived context. Create child context to store + * DictInfo internal data. + */ + info->infoCntx = AllocSetContextCreate(CurrentMemoryContext, + "shared_ispell context", + ALLOCSET_DEFAULT_SIZES); + + init_shared_dict(info, info->infoCntx, dictFile, affFile, stopFile); + + LWLockRelease(segment_info->lock); + + PG_RETURN_POINTER(info); } Datum dispell_lexize(PG_FUNCTION_ARGS) { - DictInfo * info = (DictInfo *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - int32 len = PG_GETARG_INT32(2); - char *txt; - TSLexeme *res; - TSLexeme *ptr, - *cptr; - - if (len <= 0) - PG_RETURN_POINTER(NULL); - - txt = lowerstr_with_len(in, len); - - /* need to lock the segment in shared mode */ - LWLockAcquire(segment_info->lock, LW_SHARED); - - /* do we need to reinit the dictionary? was the dict reset since the lookup */ - if (timestamp_cmp_internal(info->lookup, segment_info->lastReset) < 0) { - - /* relock in exclusive mode */ - LWLockRelease(segment_info->lock); - LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); - - elog(INFO, "reinitializing shared dict (segment reset)"); - - init_shared_dict(info, info->dictFile, info->affixFile, info->stopFile); - } - - res = SharedNINormalizeWord(info->dict, txt); - - /* nothing found :-( */ - if (res == NULL) { - LWLockRelease(segment_info->lock); - PG_RETURN_POINTER(NULL); - } - - ptr = cptr = res; - while (ptr->lexeme) - { - if (searchstoplist(&info->stop->list, ptr->lexeme)) - { - pfree(ptr->lexeme); - ptr->lexeme = NULL; - ptr++; - } - else - { - memcpy(cptr, ptr, sizeof(TSLexeme)); - cptr++; - ptr++; - } - } - cptr->lexeme = NULL; - - LWLockRelease(segment_info->lock); - - PG_RETURN_POINTER(res); + DictInfo *info = (DictInfo *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + char *txt; + TSLexeme *res; + TSLexeme *ptr, + *cptr; + + if (len <= 0) + PG_RETURN_POINTER(NULL); + + txt = lowerstr_with_len(in, len); + + /* need to lock the segment in shared mode */ + LWLockAcquire(segment_info->lock, LW_SHARED); + + /* do we need to reinit the dictionary? was the dict reset since the lookup */ + if (INSTR_TIME_GET_MICROSEC(info->lookup) < + INSTR_TIME_GET_MICROSEC(segment_info->lastReset)) + { + DictInfo saveInfo = *info; + + /* relock in exclusive mode */ + LWLockRelease(segment_info->lock); + LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); + + /* + * info is allocated in info->saveCntx, so that's why we use a copy of + * info here + */ + + MemoryContextReset(saveInfo.infoCntx); + MemSet(info, 0, sizeof(*info)); + + init_shared_dict(info, saveInfo.infoCntx, saveInfo.dictFile, + saveInfo.affixFile, saveInfo.stopFile); + } + + res = NINormalizeWord(&(info->dict), txt); + + /* nothing found :-( */ + if (res == NULL) + { + LWLockRelease(segment_info->lock); + PG_RETURN_POINTER(NULL); + } + + ptr = cptr = res; + while (ptr->lexeme) + { + if (info->shstop && searchstoplist(&(info->shstop->stop), ptr->lexeme)) + { + pfree(ptr->lexeme); + ptr->lexeme = NULL; + ptr++; + } + else + { + memcpy(cptr, ptr, sizeof(TSLexeme)); + cptr++; + ptr++; + } + } + cptr->lexeme = NULL; + + LWLockRelease(segment_info->lock); + + PG_RETURN_POINTER(res); } /* * This 'allocates' memory in the shared segment - i.e. the memory is * already allocated and this just gives nbytes to the caller. This is * used exclusively by the 'copy' methods defined below. - * + * * The memory is kept aligned thanks to MAXALIGN. Also, this assumes * the segment was locked properly by the caller. */ -static -char * shalloc(int bytes) { - - char * result; - bytes = MAXALIGN(bytes); +static char * +shalloc(int bytes) +{ + char *result; - /* This shouldn't really happen, as the init_shared_dict checks the size - * prior to copy. So let's just throw error here, as something went - * obviously wrong. */ - if (bytes > segment_info->available) - elog(ERROR, "the shared segment (shared ispell) is too small"); + bytes = MAXALIGN(bytes); - result = segment_info->firstfree; - segment_info->firstfree += bytes; - segment_info->available -= bytes; + /* This shouldn't really happen, as the init_shared_dict checks the size + * prior to copy. So let's just throw error here, as something went + * obviously wrong. */ + if (bytes > segment_info->available) + elog(ERROR, "the shared segment (shared ispell) is too small"); - memset(result, 0, bytes); + result = segment_info->firstfree; + segment_info->firstfree += bytes; + segment_info->available -= bytes; - return result; + memset(result, 0, bytes); + return result; } /* * Copies a string into the shared segment - allocates memory and does memcpy. - * + * * TODO This assumes the string is properly terminated (should be guaranteed * by the code that reads and parses the dictionary / affixes). */ -static -char * shstrcpy(char * str) { - char * tmp = shalloc(strlen(str)+1); - memcpy(tmp, str, strlen(str)+1); - return tmp; +static char * +shstrcpy(char *str) +{ + char *tmp = shalloc(strlen(str) + 1); + + memcpy(tmp, str, strlen(str) + 1); + + return tmp; } /* @@ -684,498 +727,299 @@ char * shstrcpy(char * str) { /* SPNode - dictionary words */ -static -SPNode * copySPNode(SPNode * node) { - int i; - - SPNode * copy = NULL; - - if (node == NULL) { - return NULL; - } - - copy = (SPNode*)shalloc(offsetof(SPNode,data) + sizeof(SPNodeData) * node->length); - memcpy(copy, node, offsetof(SPNode,data) + sizeof(SPNodeData) * node->length); - - for (i = 0; i < node->length; i++) { - copy->data[i].node = copySPNode(node->data[i].node); - } - - return copy; -} - -static -int sizeSPNode(SPNode * node) { - - int i; - int size = 0; - - if (node == NULL) { - return 0; - } - - size = MAXALIGN(offsetof(SPNode,data) + sizeof(SPNodeData) * node->length); - - for (i = 0; i < node->length; i++) { - size += sizeSPNode(node->data[i].node); - } - - return size; -} - -/* RegisNode - simple regular expressions */ - -static -RegisNode * copyRegisNode(RegisNode * node) { - - RegisNode * copy = (RegisNode *)shalloc(offsetof(RegisNode, data) + node->len); - - memcpy(copy, node, offsetof(RegisNode, data) + node->len); - - if (node->next != NULL) { - copy->next = copyRegisNode(node->next); - } - - return copy; -} - -static -int sizeRegisNode(RegisNode * node) { - - int size = MAXALIGN(offsetof(RegisNode, data) + node->len); - - if (node->next != NULL) { - size += sizeRegisNode(node->next); - } - - return size; -} - -/* AFFIX - affix rules (simple, regis or full regular expressions). */ - -static -AFFIX * copyAffix(AFFIX * affix) { - - AFFIX * copy = (AFFIX*)shalloc(sizeof(AFFIX)); - - memcpy(copy, affix, sizeof(AFFIX)); - - copy->find = shstrcpy(affix->find); - copy->repl = shstrcpy(affix->repl); - - if (affix->isregis) { - copy->reg.regis.node = copyRegisNode(affix->reg.regis.node); - } else if (! affix->issimple) { - - /*FIXME Need to copy the regex_t properly. But a plain copy would not be - * safe tu use by multiple processes at the same time, so each backend - * needs to create it's own copy. */ - elog(ERROR, "This extension can't handle regex_t affixes yet."); - - } - - return copy; - -} - -static -int sizeAffix(AFFIX * affix) { - - int size = MAXALIGN(sizeof(AFFIX)); - - size += MAXALIGN(strlen(affix->find)+1); - size += MAXALIGN(strlen(affix->repl)+1); - - if (affix->isregis) { - size += sizeRegisNode(affix->reg.regis.node); - } else if (! affix->issimple) { - - /*FIXME Need to copy the regex_t properly. But would a plain copy be - * safe tu use by multiple processes at the same time? */ - elog(ERROR, "This extension can't handle regex_t affixes yet."); - - } - - return size; - -} - -/* AffixNode */ - -static -AffixNode * copyAffixNode(AffixNode * node) { - - int i, j; - AffixNode * copy = NULL; - - if (node == NULL) { - return NULL; - } - - copy = (AffixNode *)shalloc(offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length); - memcpy(copy, node, offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length); - - for (i = 0; i < node->length; i++) { +static SPNode * +copySPNode(SPNode *node) +{ + int i; + SPNode *copy = NULL; - copy->data[i].node = copyAffixNode(node->data[i].node); + if (node == NULL) + return NULL; - copy->data[i].val = node->data[i].val; - copy->data[i].naff = node->data[i].naff; - copy->data[i].aff = (AFFIX**)shalloc(sizeof(AFFIX*) * node->data[i].naff); + copy = (SPNode *) shalloc(offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); + memcpy(copy, node, offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); - for (j = 0; j < node->data[i].naff; j++) { - copy->data[i].aff[j] = copyAffix(node->data[i].aff[j]); - } - } + for (i = 0; i < node->length; i++) + copy->data[i].node = copySPNode(node->data[i].node); - return copy; + return copy; } -static -int sizeAffixNode(AffixNode * node) { - - int i, j; - int size = 0; - - if (node == NULL) { - return 0; - } - - size = MAXALIGN(offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length); +static int +sizeSPNode(SPNode *node) +{ + int i; + int size = 0; - for (i = 0; i < node->length; i++) { + if (node == NULL) + return 0; - size += sizeAffixNode(node->data[i].node); - size += MAXALIGN(sizeof(AFFIX*) * node->data[i].naff); + size = MAXALIGN(offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); - for (j = 0; j < node->data[i].naff; j++) { - size += sizeAffix(node->data[i].aff[j]); - } - } + for (i = 0; i < node->length; i++) + size += sizeSPNode(node->data[i].node); - return size; + return size; } /* StopList */ -static -SharedStopList * copyStopList(StopList * list, char * stopFile, int size) { - - int i; - SharedStopList * copy = (SharedStopList *)shalloc(sizeof(SharedStopList)); - - copy->list.len = list->len; - copy->list.stop = (char**)shalloc(sizeof(char*) * list->len); - copy->stopFile = shstrcpy(stopFile); - copy->nbytes = size; - - for (i = 0; i < list->len; i++) { - copy->list.stop[i] = shstrcpy(list->stop[i]); - } - - return copy; -} - -static -int sizeStopList(StopList * list, char * stopFile) { - - int i; - int size = MAXALIGN(sizeof(SharedStopList)); +static SharedStopList * +copyStopList(StopList *list, char *stopFile, int size) +{ + int i; + SharedStopList *copy = (SharedStopList *) shalloc(sizeof(SharedStopList)); - size += MAXALIGN(sizeof(char*) * list->len); - size += MAXALIGN(strlen(stopFile) + 1); + copy->stop.len = list->len; + copy->stop.stop = (char **) shalloc(sizeof(char *) * list->len); + copy->stopFile = shstrcpy(stopFile); + copy->nbytes = size; - for (i = 0; i < list->len; i++) { - size += MAXALIGN(strlen(list->stop[i]) + 1); - } + for (i = 0; i < list->len; i++) + copy->stop.stop[i] = shstrcpy(list->stop[i]); - return size; + return copy; } -/* CMPDAffix (compound affixes?) */ - -static -int countCMPDAffixes(CMPDAffix * affixes) { - - /* there's at least one affix */ - int count = 1; - CMPDAffix * ptr = affixes; +static int +sizeStopList(StopList *list, char *stopFile) +{ + int i; + int size = MAXALIGN(sizeof(SharedStopList)); - /* the last one is marked with (affix == NULL) */ - while (ptr->affix) - { - ptr++; - count++; - } + size += MAXALIGN(sizeof(char *) * list->len); + size += MAXALIGN(strlen(stopFile) + 1); - return count; + for (i = 0; i < list->len; i++) + size += MAXALIGN(strlen(list->stop[i]) + 1); + return size; } /* * Performs deep copy of the dictionary into the shared memory segment. - * + * * It gets the populated Ispell Dictionary (dict) and copies all the data * using the 'copy' methods listed above. It also keeps the filenames so * that it's possible to lookup the dictionaries later. + * + * Function copies only word list. Affix list is loaded to a current process. */ -static -SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int size, int words) { - - int i, cnt; - - SharedIspellDict * copy = (SharedIspellDict*)shalloc(sizeof(SharedIspellDict)); - - copy->dictFile = shalloc(strlen(dictFile)+1); - copy->affixFile = shalloc(strlen(affixFile)+1); - - strcpy(copy->dictFile, dictFile); - strcpy(copy->affixFile, affixFile); - - copy->naffixes = dict->naffixes; - - copy->Affix = (AFFIX*)shalloc(sizeof(AFFIX) * dict->naffixes); - - copy->Suffix = copyAffixNode(dict->Suffix); - copy->Prefix = copyAffixNode(dict->Prefix); +static SharedIspellDict * +copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int size, int words) +{ + int i; + SharedIspellDict *copy = (SharedIspellDict *) shalloc(sizeof(SharedIspellDict)); - copy->Dictionary = copySPNode(dict->Dictionary); + copy->dictFile = shalloc(strlen(dictFile) + 1); + copy->affixFile = shalloc(strlen(affixFile) + 1); - /* copy affix data */ - copy->nAffixData = dict->nAffixData; - copy->AffixData = (char**)shalloc(sizeof(char*) * dict->nAffixData); - for (i = 0; i < copy->nAffixData; i++) { - copy->AffixData[i] = shstrcpy(dict->AffixData[i]); - } + strcpy(copy->dictFile, dictFile); + strcpy(copy->affixFile, affixFile); - /* copy compound affixes (there's at least one) */ - cnt = countCMPDAffixes(dict->CompoundAffix); - copy->CompoundAffix = (CMPDAffix*)shalloc(sizeof(CMPDAffix) * cnt); - memcpy(copy->CompoundAffix, dict->CompoundAffix, sizeof(CMPDAffix) * cnt); + copy->dict.Dictionary = copySPNode(dict->Dictionary); - memcpy(copy->flagval, dict->flagval, 255); - copy->usecompound = dict->usecompound; + /* copy affix data */ + copy->dict.nAffixData = dict->nAffixData; + copy->dict.AffixData = (char **) shalloc(sizeof(char *) * dict->nAffixData); + for (i = 0; i < copy->dict.nAffixData; i++) + copy->dict.AffixData[i] = shstrcpy(dict->AffixData[i]); - copy->nbytes = size; - copy->nwords = words; + copy->dict.flagMode = dict->flagMode; - return copy; + copy->nbytes = size; + copy->nwords = words; + return copy; } /* - * Computes how much space is needed for a dictionary in the shared segment. + * Computes how much space is needed for a dictionary (word list) in the shared segment. + * + * Function does not compute space for a affix list since affix list is loaded + * to a current process. */ -static -int sizeIspellDict(IspellDict * dict, char * dictFile, char * affixFile) { - - int i; - int size = MAXALIGN(sizeof(SharedIspellDict)); - - size += MAXALIGN(strlen(dictFile)+1); - size += MAXALIGN(strlen(affixFile)+1); - - size += MAXALIGN(sizeof(AFFIX) * dict->naffixes); - - size += MAXALIGN(sizeAffixNode(dict->Suffix)); - size += MAXALIGN(sizeAffixNode(dict->Prefix)); - - size += sizeSPNode(dict->Dictionary); +static int +sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile) +{ + int i; + int size = MAXALIGN(sizeof(SharedIspellDict)); - /* copy affix data */ - size += MAXALIGN(sizeof(char*) * dict->nAffixData); - for (i = 0; i < dict->nAffixData; i++) { - size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1); - } + size += MAXALIGN(strlen(dictFile) + 1); + size += MAXALIGN(strlen(affixFile) + 1); - /* copy compound affixes (there's at least one) */ - size += MAXALIGN(sizeof(CMPDAffix) * countCMPDAffixes(dict->CompoundAffix)); + size += sizeSPNode(dict->Dictionary); - return size; + /* copy affix data */ + size += MAXALIGN(sizeof(char *) * dict->nAffixData); + for (i = 0; i < dict->nAffixData; i++) + size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1); + return size; } /* SRF function returning a list of shared dictionaries currently loaded in memory. */ Datum dispell_list_dicts(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; - TupleDesc tupdesc; - AttInMetadata *attinmeta; - SharedIspellDict * dict; - - /* init on the first call */ - if (SRF_IS_FIRSTCALL()) { - - MemoryContext oldcontext; - - funcctx = SRF_FIRSTCALL_INIT(); - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* get a shared lock and then the first dictionary */ - LWLockAcquire(segment_info->lock, LW_SHARED); - funcctx->user_fctx = segment_info->dict; - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); - - /* - * generate attribute metadata needed later to produce tuples from raw - * C strings - */ - attinmeta = TupleDescGetAttInMetadata(tupdesc); - funcctx->attinmeta = attinmeta; - funcctx->tuple_desc = tupdesc; - - /* switch back to the old context */ - MemoryContextSwitchTo(oldcontext); - - } - - /* init the context */ - funcctx = SRF_PERCALL_SETUP(); - - /* check if we have more data */ - if (funcctx->user_fctx != NULL) - { - HeapTuple tuple; - Datum result; - Datum values[5]; - bool nulls[5]; - - text *dictname, *affname; - - dict = (SharedIspellDict*)funcctx->user_fctx; - funcctx->user_fctx = dict->next; - - memset(nulls, 0, sizeof(nulls)); - - dictname = (text *) palloc(strlen(dict->dictFile) + VARHDRSZ); - affname = (text *) palloc(strlen(dict->affixFile) + VARHDRSZ); - - SET_VARSIZE(dictname, strlen(dict->dictFile) + VARHDRSZ); - SET_VARSIZE(affname, strlen(dict->affixFile) + VARHDRSZ); - - strcpy(VARDATA(dictname), dict->dictFile); - strcpy(VARDATA(affname), dict->affixFile); - - values[0] = PointerGetDatum(dictname); - values[1] = PointerGetDatum(affname); - values[2] = UInt32GetDatum(dict->nwords); - values[3] = UInt32GetDatum(dict->naffixes); - values[4] = UInt32GetDatum(dict->nbytes); - - /* Build and return the tuple. */ - tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); - - /* make the tuple into a datum */ - result = HeapTupleGetDatum(tuple); - - /* Here we want to return another item: */ - SRF_RETURN_NEXT(funcctx, result); - - } - else - { - /* release the lock */ - LWLockRelease(segment_info->lock); - - /* Here we are done returning items and just need to clean up: */ - SRF_RETURN_DONE(funcctx); - } - + FuncCallContext *funcctx; + TupleDesc tupdesc; + SharedIspellDict *dict; + + /* init on the first call */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* get a shared lock and then the first dictionary */ + LWLockAcquire(segment_info->lock, LW_SHARED); + funcctx->user_fctx = segment_info->shdict; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + funcctx->tuple_desc = tupdesc; + + /* switch back to the old context */ + MemoryContextSwitchTo(oldcontext); + } + + /* init the context */ + funcctx = SRF_PERCALL_SETUP(); + + /* check if we have more data */ + if (funcctx->user_fctx != NULL) + { + HeapTuple tuple; + Datum result; + Datum values[5]; + bool nulls[5]; + + text *dictname, + *affname; + + dict = (SharedIspellDict *) funcctx->user_fctx; + funcctx->user_fctx = dict->next; + + memset(nulls, 0, sizeof(nulls)); + + dictname = cstring_to_text(dict->dictFile); + affname = cstring_to_text(dict->affixFile); + + values[0] = PointerGetDatum(dictname); + values[1] = PointerGetDatum(affname); + values[2] = UInt32GetDatum(dict->nwords); + values[3] = UInt32GetDatum(dict->dict.naffixes); + values[4] = UInt32GetDatum(dict->nbytes); + + /* Build and return the tuple. */ + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + /* make the tuple into a datum */ + result = HeapTupleGetDatum(tuple); + + /* Here we want to return another item: */ + SRF_RETURN_NEXT(funcctx, result); + } + else + { + /* release the lock */ + LWLockRelease(segment_info->lock); + + /* Here we are done returning items and just need to clean up: */ + SRF_RETURN_DONE(funcctx); + } } /* SRF function returning a list of shared stopword lists currently loaded in memory. */ Datum dispell_list_stoplists(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; - TupleDesc tupdesc; - AttInMetadata *attinmeta; - SharedStopList *stoplist; - - /* init on the first call */ - if (SRF_IS_FIRSTCALL()) { - - MemoryContext oldcontext; - - funcctx = SRF_FIRSTCALL_INIT(); - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* get a shared lock and then the first stop list */ - LWLockAcquire(segment_info->lock, LW_SHARED); - funcctx->user_fctx = segment_info->stop; - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); - - /* - * generate attribute metadata needed later to produce tuples from raw - * C strings - */ - attinmeta = TupleDescGetAttInMetadata(tupdesc); - funcctx->attinmeta = attinmeta; - funcctx->tuple_desc = tupdesc; - - /* switch back to the old context */ - MemoryContextSwitchTo(oldcontext); - - } - - /* init the context */ - funcctx = SRF_PERCALL_SETUP(); - - /* check if we have more data */ - if (funcctx->user_fctx != NULL) - { - HeapTuple tuple; - Datum result; - Datum values[3]; - bool nulls[3]; - - text *stopname; - - stoplist = (SharedStopList*)funcctx->user_fctx; - funcctx->user_fctx = stoplist->next; - - memset(nulls, 0, sizeof(nulls)); - - stopname = (text *) palloc(strlen(stoplist->stopFile) + VARHDRSZ); - - SET_VARSIZE(stopname, strlen(stoplist->stopFile) + VARHDRSZ); - - strcpy(VARDATA(stopname), stoplist->stopFile); - - values[0] = PointerGetDatum(stopname); - values[1] = UInt32GetDatum(stoplist->list.len); - values[2] = UInt32GetDatum(stoplist->nbytes); - - /* Build and return the tuple. */ - tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); - - /* make the tuple into a datum */ - result = HeapTupleGetDatum(tuple); - - /* Here we want to return another item: */ - SRF_RETURN_NEXT(funcctx, result); - - } - else - { - /* release the lock */ - LWLockRelease(segment_info->lock); - - /* Here we are done returning items and just need to clean up: */ - SRF_RETURN_DONE(funcctx); - } - + FuncCallContext *funcctx; + TupleDesc tupdesc; + SharedStopList *stoplist; + + /* init on the first call */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* get a shared lock and then the first stop list */ + LWLockAcquire(segment_info->lock, LW_SHARED); + funcctx->user_fctx = segment_info->shstop; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + funcctx->tuple_desc = tupdesc; + + /* switch back to the old context */ + MemoryContextSwitchTo(oldcontext); + } + + /* init the context */ + funcctx = SRF_PERCALL_SETUP(); + + /* check if we have more data */ + if (funcctx->user_fctx != NULL) + { + HeapTuple tuple; + Datum result; + Datum values[3]; + bool nulls[3]; + + text *stopname; + + stoplist = (SharedStopList *) funcctx->user_fctx; + funcctx->user_fctx = stoplist->next; + + memset(nulls, 0, sizeof(nulls)); + + stopname = cstring_to_text(stoplist->stopFile); + + values[0] = PointerGetDatum(stopname); + values[1] = UInt32GetDatum(stoplist->stop.len); + values[2] = UInt32GetDatum(stoplist->nbytes); + + /* Build and return the tuple. */ + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + /* make the tuple into a datum */ + result = HeapTupleGetDatum(tuple); + + /* Here we want to return another item: */ + SRF_RETURN_NEXT(funcctx, result); + } + else + { + /* release the lock */ + LWLockRelease(segment_info->lock); + + /* Here we are done returning items and just need to clean up: */ + SRF_RETURN_DONE(funcctx); + } } diff --git a/src/shared_ispell.h b/src/shared_ispell.h new file mode 100644 index 0000000..cbba198 --- /dev/null +++ b/src/shared_ispell.h @@ -0,0 +1,75 @@ +#ifndef __SHARED_ISPELL_H__ +#define __SHARED_ISPELL_H__ + +#include "storage/lwlock.h" +#include "utils/memutils.h" +#include "utils/timestamp.h" +#include "tsearch/dicts/spell.h" +#include "tsearch/ts_public.h" + +/* This segment is initialized in the first process that accesses it (see + * ispell_shmem_startup function). + */ +#define SEGMENT_NAME "shared_ispell" + +#define MAXLEN 255 + +typedef struct SharedIspellDict +{ + /* this is used for selecting the dictionary */ + char *dictFile; + char *affixFile; + int nbytes; + int nwords; + + /* next dictionary in the chain (essentially a linked list) */ + struct SharedIspellDict *next; + + IspellDict dict; +} SharedIspellDict; + +typedef struct SharedStopList +{ + char *stopFile; + int nbytes; + + struct SharedStopList *next; + + StopList stop; +} SharedStopList; + +/* used to allocate memory in the shared segment */ +typedef struct SegmentInfo +{ + LWLockId lock; + char *firstfree; /* first free address (always maxaligned) */ + size_t available; /* free space remaining at firstfree */ + instr_time lastReset; /* last reset of the dictionary */ + + /* the shared segment (info and data) */ + SharedIspellDict *shdict; + SharedStopList *shstop; +} SegmentInfo; + +/* used to keep track of dictionary in each backend */ +typedef struct DictInfo +{ + instr_time lookup; + + char dictFile[MAXLEN]; + char affixFile[MAXLEN]; + char stopFile[MAXLEN]; + + /* We split word list and affix list. + * In shdict we store a word list, word list will be stored in shared segment. + * In dict we store an affix list in each process. + */ + SharedIspellDict *shdict; + IspellDict dict; + SharedStopList *shstop; + + /* MemoryContext of dict local content */ + MemoryContext infoCntx; +} DictInfo; + +#endif diff --git a/src/spell.c b/src/spell.c deleted file mode 100644 index d233247..0000000 --- a/src/spell.c +++ /dev/null @@ -1,647 +0,0 @@ -/*------------------------------------------------------------------------- - * - * spell.c - * - * Normalizing word with ISpell (in shared segment). Mostly a slightly - * copy of the spell.c code, modified so that it works with SharedIspellDict - * instead of plain IspellDict. - * - * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group - * Copyright (c) 2011, Tomas Vondra - * - * IDENTIFICATION - * src/spell.c (a slightly modified copy of src/backend/tsearch/spell.c) - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "spell.h" - -#define MAX_NORM 1024 -#define MAXNORMLEN 256 - -#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) - -static int -FindWord(SharedIspellDict *Conf, const char *word, int affixflag, int flag) -{ - SPNode *node = Conf->Dictionary; - SPNodeData *StopLow, - *StopHigh, - *StopMiddle; - const uint8 *ptr = (const uint8 *) word; - - flag &= FF_DICTFLAGMASK; - - while (node && *ptr) - { - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if (StopMiddle->val == *ptr) - { - if (*(ptr + 1) == '\0' && StopMiddle->isword) - { - if (flag == 0) - { - if (StopMiddle->compoundflag & FF_COMPOUNDONLY) - return 0; - } - else if ((flag & StopMiddle->compoundflag) == 0) - return 0; - - if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL)) - return 1; - } - node = StopMiddle->node; - ptr++; - break; - } - else if (StopMiddle->val < *ptr) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - if (StopLow >= StopHigh) - break; - } - return 0; -} - -static AffixNodeData * -FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) -{ - AffixNodeData *StopLow, - *StopHigh, - *StopMiddle; - uint8 symbol; - - if (node->isvoid) - { /* search void affixes */ - if (node->data->naff) - return node->data; - node = node->data->node; - } - - while (node && *level < wrdlen) - { - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - symbol = GETWCHAR(word, wrdlen, *level, type); - - if (StopMiddle->val == symbol) - { - (*level)++; - if (StopMiddle->naff) - return StopMiddle; - node = StopMiddle->node; - break; - } - else if (StopMiddle->val < symbol) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - if (StopLow >= StopHigh) - break; - } - return NULL; -} - -static char * -CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen) -{ - /* - * Check compound allow flags - */ - - if (flagflags == 0) - { - if (Affix->flagflags & FF_COMPOUNDONLY) - return NULL; - } - else if (flagflags & FF_COMPOUNDBEGIN) - { - if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG) - return NULL; - if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0) - if (Affix->type == FF_SUFFIX) - return NULL; - } - else if (flagflags & FF_COMPOUNDMIDDLE) - { - if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 || - (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)) - return NULL; - } - else if (flagflags & FF_COMPOUNDLAST) - { - if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG) - return NULL; - if ((Affix->flagflags & FF_COMPOUNDLAST) == 0) - if (Affix->type == FF_PREFIX) - return NULL; - } - - /* - * make replace pattern of affix - */ - if (Affix->type == FF_SUFFIX) - { - strcpy(newword, word); - strcpy(newword + len - Affix->replen, Affix->find); - if (baselen) /* store length of non-changed part of word */ - *baselen = len - Affix->replen; - } - else - { - /* - * if prefix is a all non-chaged part's length then all word contains - * only prefix and suffix, so out - */ - if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) - return NULL; - strcpy(newword, Affix->find); - strcat(newword, word + Affix->replen); - } - - /* - * check resulting word - */ - if (Affix->issimple) - return newword; - else if (Affix->isregis) - { - if (RS_execute(&(Affix->reg.regis), newword)) - return newword; - } - else - { - int err; - pg_wchar *data; - size_t data_len; - int newword_len; - - /* Convert data string to wide characters */ - newword_len = strlen(newword); - data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); - data_len = pg_mb2wchar_with_len(newword, data, newword_len); - - if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) - { - pfree(data); - return newword; - } - pfree(data); - } - - return NULL; -} - -static int -addToResult(char **forms, char **cur, char *word) -{ - if (cur - forms >= MAX_NORM - 1) - return 0; - if (forms == cur || strcmp(word, *(cur - 1)) != 0) - { - *cur = pstrdup(word); - *(cur + 1) = NULL; - return 1; - } - - return 0; -} - -static char ** -NormalizeSubWord(SharedIspellDict *Conf, char *word, int flag) -{ - AffixNodeData *suffix = NULL, - *prefix = NULL; - int slevel = 0, - plevel = 0; - int wrdlen = strlen(word), - swrdlen; - char **forms; - char **cur; - char newword[2 * MAXNORMLEN] = ""; - char pnewword[2 * MAXNORMLEN] = ""; - AffixNode *snode = Conf->Suffix, - *pnode; - int i, - j; - - if (wrdlen > MAXNORMLEN) - return NULL; - cur = forms = (char **) palloc(MAX_NORM * sizeof(char *)); - *cur = NULL; - - - /* Check that the word itself is normal form */ - if (FindWord(Conf, word, 0, flag)) - { - *cur = pstrdup(word); - cur++; - *cur = NULL; - } - - /* Find all other NORMAL forms of the 'word' (check only prefix) */ - pnode = Conf->Prefix; - plevel = 0; - while (pnode) - { - prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); - if (!prefix) - break; - for (j = 0; j < prefix->naff; j++) - { - if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) - { - /* prefix success */ - if (FindWord(Conf, newword, prefix->aff[j]->flag, flag)) - cur += addToResult(forms, cur, newword); - } - } - pnode = prefix->node; - } - - /* - * Find all other NORMAL forms of the 'word' (check suffix and then - * prefix) - */ - while (snode) - { - int baselen = 0; - - /* find possible suffix */ - suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); - if (!suffix) - break; - /* foreach suffix check affix */ - for (i = 0; i < suffix->naff; i++) - { - if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) - { - /* suffix success */ - if (FindWord(Conf, newword, suffix->aff[i]->flag, flag)) - cur += addToResult(forms, cur, newword); - - /* now we will look changed word with prefixes */ - pnode = Conf->Prefix; - plevel = 0; - swrdlen = strlen(newword); - while (pnode) - { - prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); - if (!prefix) - break; - for (j = 0; j < prefix->naff; j++) - { - if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) - { - /* prefix success */ - int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? - 0 : prefix->aff[j]->flag; - - if (FindWord(Conf, pnewword, ff, flag)) - cur += addToResult(forms, cur, pnewword); - } - } - pnode = prefix->node; - } - } - } - - snode = suffix->node; - } - - if (cur == forms) - { - pfree(forms); - return (NULL); - } - return (forms); -} - -typedef struct SplitVar -{ - int nstem; - int lenstem; - char **stem; - struct SplitVar *next; -} SplitVar; - -static int -CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) -{ - bool issuffix; - - if (CheckInPlace) - { - while ((*ptr)->affix) - { - if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) - { - len = (*ptr)->len; - issuffix = (*ptr)->issuffix; - (*ptr)++; - return (issuffix) ? len : 0; - } - (*ptr)++; - } - } - else - { - char *affbegin; - - while ((*ptr)->affix) - { - if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) - { - len = (*ptr)->len + (affbegin - word); - issuffix = (*ptr)->issuffix; - (*ptr)++; - return (issuffix) ? len : 0; - } - (*ptr)++; - } - } - return -1; -} - -static SplitVar * -CopyVar(SplitVar *s, int makedup) -{ - SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar)); - - v->next = NULL; - if (s) - { - int i; - - v->lenstem = s->lenstem; - v->stem = (char **) palloc(sizeof(char *) * v->lenstem); - v->nstem = s->nstem; - for (i = 0; i < s->nstem; i++) - v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i]; - } - else - { - v->lenstem = 16; - v->stem = (char **) palloc(sizeof(char *) * v->lenstem); - v->nstem = 0; - } - return v; -} - -static void -AddStem(SplitVar *v, char *word) -{ - if (v->nstem >= v->lenstem) - { - v->lenstem *= 2; - v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem); - } - - v->stem[v->nstem] = word; - v->nstem++; -} - -static SplitVar * -SplitToVariants(SharedIspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos) -{ - SplitVar *var = NULL; - SPNodeData *StopLow, - *StopHigh, - *StopMiddle = NULL; - SPNode *node = (snode) ? snode : Conf->Dictionary; - int level = (snode) ? minpos : startpos; /* recursive - * minpos==level */ - int lenaff; - CMPDAffix *caff; - char *notprobed; - int compoundflag = 0; - - notprobed = (char *) palloc(wordlen); - memset(notprobed, 1, wordlen); - var = CopyVar(orig, 1); - - while (level < wordlen) - { - /* find word with epenthetic or/and compound affix */ - caff = Conf->CompoundAffix; - while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0) - { - /* - * there is one of compound affixes, so check word for existings - */ - char buf[MAXNORMLEN]; - char **subres; - - lenaff = level - startpos + lenaff; - - if (!notprobed[startpos + lenaff - 1]) - continue; - - if (level + lenaff - 1 <= minpos) - continue; - - if (lenaff >= MAXNORMLEN) - continue; /* skip too big value */ - if (lenaff > 0) - memcpy(buf, word + startpos, lenaff); - buf[lenaff] = '\0'; - - if (level == 0) - compoundflag = FF_COMPOUNDBEGIN; - else if (level == wordlen - 1) - compoundflag = FF_COMPOUNDLAST; - else - compoundflag = FF_COMPOUNDMIDDLE; - subres = NormalizeSubWord(Conf, buf, compoundflag); - if (subres) - { - /* Yes, it was a word from dictionary */ - SplitVar *new = CopyVar(var, 0); - SplitVar *ptr = var; - char **sptr = subres; - - notprobed[startpos + lenaff - 1] = 0; - - while (*sptr) - { - AddStem(new, *sptr); - sptr++; - } - pfree(subres); - - while (ptr->next) - ptr = ptr->next; - ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); - - pfree(new->stem); - pfree(new); - } - } - - if (!node) - break; - - StopLow = node->data; - StopHigh = node->data + node->length; - while (StopLow < StopHigh) - { - StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); - if (StopMiddle->val == ((uint8 *) (word))[level]) - break; - else if (StopMiddle->val < ((uint8 *) (word))[level]) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } - - if (StopLow < StopHigh) - { - if (level == FF_COMPOUNDBEGIN) - compoundflag = FF_COMPOUNDBEGIN; - else if (level == wordlen - 1) - compoundflag = FF_COMPOUNDLAST; - else - compoundflag = FF_COMPOUNDMIDDLE; - - /* find infinitive */ - if (StopMiddle->isword && - (StopMiddle->compoundflag & compoundflag) && - notprobed[level]) - { - /* ok, we found full compoundallowed word */ - if (level > minpos) - { - /* and its length more than minimal */ - if (wordlen == level + 1) - { - /* well, it was last word */ - AddStem(var, pnstrdup(word + startpos, wordlen - startpos)); - pfree(notprobed); - return var; - } - else - { - /* then we will search more big word at the same point */ - SplitVar *ptr = var; - - while (ptr->next) - ptr = ptr->next; - ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level); - /* we can find next word */ - level++; - AddStem(var, pnstrdup(word + startpos, level - startpos)); - node = Conf->Dictionary; - startpos = level; - continue; - } - } - } - node = StopMiddle->node; - } - else - node = NULL; - level++; - } - - AddStem(var, pnstrdup(word + startpos, wordlen - startpos)); - pfree(notprobed); - return var; -} - -static void -addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant) -{ - if (*lres == NULL) - *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme)); - - if (*lcur - *lres < MAX_NORM - 1) - { - (*lcur)->lexeme = word; - (*lcur)->flags = flags; - (*lcur)->nvariant = NVariant; - (*lcur)++; - (*lcur)->lexeme = NULL; - } -} - -TSLexeme * -SharedNINormalizeWord(SharedIspellDict *Conf, char *word) -{ - char **res; - TSLexeme *lcur = NULL, - *lres = NULL; - uint16 NVariant = 1; - - res = NormalizeSubWord(Conf, word, 0); - - if (res) - { - char **ptr = res; - - while (*ptr && (lcur - lres) < MAX_NORM) - { - addNorm(&lres, &lcur, *ptr, 0, NVariant++); - ptr++; - } - pfree(res); - } - - if (Conf->usecompound) - { - int wordlen = strlen(word); - SplitVar *ptr, - *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1); - int i; - - while (var) - { - if (var->nstem > 1) - { - char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST); - - if (subres) - { - char **subptr = subres; - - while (*subptr) - { - for (i = 0; i < var->nstem - 1; i++) - { - addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant); - } - - addNorm(&lres, &lcur, *subptr, 0, NVariant); - subptr++; - NVariant++; - } - - pfree(subres); - var->stem[0] = NULL; - pfree(var->stem[var->nstem - 1]); - } - } - - for (i = 0; i < var->nstem && var->stem[i]; i++) - pfree(var->stem[i]); - ptr = var->next; - pfree(var->stem); - pfree(var); - var = ptr; - } - } - - return lres; -} diff --git a/src/spell.h b/src/spell.h deleted file mode 100644 index b559fc2..0000000 --- a/src/spell.h +++ /dev/null @@ -1,71 +0,0 @@ -/*------------------------------------------------------------------------- - * - * spell.h - * - * Declarations for ISpell dictionary - * - * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group - * - * src/include/tsearch/dicts/spell.h - * - *------------------------------------------------------------------------- - */ - -#ifndef __SHARED_SPELL_H__ -#define __SHARED_SPELL_H__ - -#include "regex/regex.h" -#include "tsearch/dicts/regis.h" -#include "tsearch/ts_public.h" -#include "storage/lwlock.h" -#include "tsearch/dicts/spell.h" - -typedef struct SharedIspellDict -{ - - /* this is used for selecting the dictionary */ - char * dictFile; - char * affixFile; - - int nbytes; - int nwords; - - /* next dictionary in the chain (essentially a linked list) */ - struct SharedIspellDict * next; - - /* the copied fields */ - int naffixes; - AFFIX *Affix; - - AffixNode *Suffix; - AffixNode *Prefix; - - SPNode *Dictionary; - char **AffixData; /* list of flags (characters) used in the dictionary */ - - /* FIXME lenAffixData and nAffixData seems to be the same thing */ - int lenAffixData; /* length of the affix array */ - int nAffixData; /* number of affix data items */ - - CMPDAffix * CompoundAffix; - - unsigned char flagval[256]; - bool usecompound; - -} SharedIspellDict; - -typedef struct SharedStopList -{ - - char * stopFile; - - int nbytes; - - StopList list; - struct SharedStopList * next; - -} SharedStopList; - -TSLexeme *SharedNINormalizeWord(SharedIspellDict *Conf, char *word); - -#endif \ No newline at end of file