Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
pg_locale_builtin.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities for builtin provider
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale_builtin.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12#include "postgres.h"
13
14#include "catalog/pg_database.h"
16#include "common/unicode_case.h"
18#include "mb/pg_wchar.h"
19#include "miscadmin.h"
20#include "utils/builtins.h"
21#include "utils/pg_locale.h"
22#include "utils/syscache.h"
23
25 MemoryContext context);
26extern char *get_collation_actual_version_builtin(const char *collcollate);
27
29{
30 const char *str;
31 size_t len;
32 size_t offset;
33 bool posix;
34 bool init;
36};
37
38/*
39 * Simple word boundary iterator that draws boundaries each time the result of
40 * pg_u_isalnum() changes.
41 */
42static size_t
44{
45 struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
46
47 while (wbstate->offset < wbstate->len &&
48 wbstate->str[wbstate->offset] != '\0')
49 {
50 pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
51 wbstate->offset);
52 bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
53
54 if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
55 {
56 size_t prev_offset = wbstate->offset;
57
58 wbstate->init = true;
59 wbstate->offset += unicode_utf8len(u);
60 wbstate->prev_alnum = curr_alnum;
61 return prev_offset;
62 }
63
64 wbstate->offset += unicode_utf8len(u);
65 }
66
67 return wbstate->len;
68}
69
70static size_t
71strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
73{
74 return unicode_strlower(dest, destsize, src, srclen,
75 locale->info.builtin.casemap_full);
76}
77
78static size_t
79strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
81{
82 struct WordBoundaryState wbstate = {
83 .str = src,
84 .len = srclen,
85 .offset = 0,
86 .posix = !locale->info.builtin.casemap_full,
87 .init = false,
88 .prev_alnum = false,
89 };
90
91 return unicode_strtitle(dest, destsize, src, srclen,
92 locale->info.builtin.casemap_full,
93 initcap_wbnext, &wbstate);
94}
95
96static size_t
97strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
99{
100 return unicode_strupper(dest, destsize, src, srclen,
101 locale->info.builtin.casemap_full);
102}
103
104static size_t
105strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
107{
108 return unicode_strfold(dest, destsize, src, srclen,
109 locale->info.builtin.casemap_full);
110}
111
112static bool
114{
115 return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
116}
117
118static bool
120{
121 return pg_u_isalpha(wc);
122}
123
124static bool
126{
127 return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
128}
129
130static bool
132{
133 return pg_u_isupper(wc);
134}
135
136static bool
138{
139 return pg_u_islower(wc);
140}
141
142static bool
144{
145 return pg_u_isgraph(wc);
146}
147
148static bool
150{
151 return pg_u_isprint(wc);
152}
153
154static bool
156{
157 return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
158}
159
160static bool
162{
163 return pg_u_isspace(wc);
164}
165
166static bool
168{
169 return IS_HIGHBIT_SET(ch) ||
170 (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
171}
172
173static pg_wchar
175{
176 return unicode_uppercase_simple(wc);
177}
178
179static pg_wchar
181{
182 return unicode_lowercase_simple(wc);
183}
184
187 .strtitle = strtitle_builtin,
188 .strupper = strupper_builtin,
189 .strfold = strfold_builtin,
190 .wc_isdigit = wc_isdigit_builtin,
191 .wc_isalpha = wc_isalpha_builtin,
192 .wc_isalnum = wc_isalnum_builtin,
193 .wc_isupper = wc_isupper_builtin,
194 .wc_islower = wc_islower_builtin,
195 .wc_isgraph = wc_isgraph_builtin,
196 .wc_isprint = wc_isprint_builtin,
197 .wc_ispunct = wc_ispunct_builtin,
198 .wc_isspace = wc_isspace_builtin,
199 .char_is_cased = char_is_cased_builtin,
200 .wc_tolower = wc_tolower_builtin,
201 .wc_toupper = wc_toupper_builtin,
202};
203
206{
207 const char *locstr;
208 pg_locale_t result;
209
210 if (collid == DEFAULT_COLLATION_OID)
211 {
212 HeapTuple tp;
213 Datum datum;
214
216 if (!HeapTupleIsValid(tp))
217 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
218 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
219 Anum_pg_database_datlocale);
220 locstr = TextDatumGetCString(datum);
221 ReleaseSysCache(tp);
222 }
223 else
224 {
225 HeapTuple tp;
226 Datum datum;
227
229 if (!HeapTupleIsValid(tp))
230 elog(ERROR, "cache lookup failed for collation %u", collid);
231 datum = SysCacheGetAttrNotNull(COLLOID, tp,
232 Anum_pg_collation_colllocale);
233 locstr = TextDatumGetCString(datum);
234 ReleaseSysCache(tp);
235 }
236
238
239 result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
240
241 result->info.builtin.locale = MemoryContextStrdup(context, locstr);
242 result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
243 result->deterministic = true;
244 result->collate_is_c = true;
245 result->ctype_is_c = (strcmp(locstr, "C") == 0);
246 if (!result->ctype_is_c)
247 result->ctype = &ctype_methods_builtin;
248
249 return result;
250}
251
252char *
254{
255 /*
256 * The only two supported locales (C and C.UTF-8) are both based on memcmp
257 * and are not expected to change, but track the version anyway.
258 *
259 * Note that the character semantics may change for some locales, but the
260 * collation version only tracks changes to sort order.
261 */
262 if (strcmp(collcollate, "C") == 0)
263 return "1";
264 else if (strcmp(collcollate, "C.UTF-8") == 0)
265 return "1";
266 else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
267 return "1";
268 else
270 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
271 errmsg("invalid locale name \"%s\" for builtin provider",
272 collcollate)));
273
274 return NULL; /* keep compiler quiet */
275}
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1155
Oid collid
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
Oid MyDatabaseId
Definition: globals.c:94
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static char * locale
Definition: initdb.c:140
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
unsigned int pg_wchar
Definition: mbprint.c:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1746
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1263
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:1462
static pg_wchar wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
static pg_wchar wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context)
static bool wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
static size_t strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static size_t initcap_wbnext(void *state)
static size_t strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static bool wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
static bool char_is_cased_builtin(char ch, pg_locale_t locale)
static bool wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
static size_t strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * get_collation_actual_version_builtin(const char *collcollate)
static bool wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
static size_t strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static const struct ctype_methods ctype_methods_builtin
static int unicode_utf8len(pg_wchar c)
Definition: pg_wchar.h:607
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
unsigned int Oid
Definition: postgres_ext.h:32
size_t(* strlower)(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.h:90
struct pg_locale_struct::@162::@163 builtin
const struct ctype_methods * ctype
Definition: pg_locale.h:157
union pg_locale_struct::@162 info
const char * locale
Definition: pg_locale.h:163
Definition: regguts.h:323
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:220
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:625
pg_wchar unicode_uppercase_simple(pg_wchar code)
Definition: unicode_case.c:66
size_t unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:165
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:101
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate)
Definition: unicode_case.c:138
pg_wchar unicode_lowercase_simple(pg_wchar code)
Definition: unicode_case.c:50
size_t unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:189
bool pg_u_isspace(pg_wchar code)
bool pg_u_ispunct(pg_wchar code, bool posix)
bool pg_u_isprint(pg_wchar code)
bool pg_u_islower(pg_wchar code)
bool pg_u_isalpha(pg_wchar code)
bool pg_u_isalnum(pg_wchar code, bool posix)
bool pg_u_isupper(pg_wchar code)
bool pg_u_isdigit(pg_wchar code, bool posix)
bool pg_u_isgraph(pg_wchar code)