Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
pg_locale.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12/*----------
13 * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 * toupper(), etc. are always in the same fixed locale.
17 *
18 * LC_MESSAGES is settable at run time and will take effect
19 * immediately.
20 *
21 * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are
22 * permanently set to "C", and then we use temporary locale_t
23 * objects when we need to look up locale data based on the GUCs
24 * of the same name. Information is cached when the GUCs change.
25 * The cached information is only used by the formatting functions
26 * (to_char, etc.) and the money type. For the user, this should all be
27 * transparent.
28 *----------
29 */
30
31
32#include "postgres.h"
33
34#include <time.h>
35
36#include "access/htup_details.h"
38#include "catalog/pg_database.h"
39#include "common/hashfn.h"
40#include "common/string.h"
41#include "mb/pg_wchar.h"
42#include "miscadmin.h"
43#include "utils/builtins.h"
44#include "utils/guc_hooks.h"
45#include "utils/lsyscache.h"
46#include "utils/memutils.h"
47#include "utils/pg_locale.h"
48#include "utils/relcache.h"
49#include "utils/syscache.h"
50
51#ifdef WIN32
52#include <shlwapi.h>
53#endif
54
55/* Error triggered for locale-sensitive subroutines */
56#define PGLOCALE_SUPPORT_ERROR(provider) \
57 elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider)
58
59/*
60 * This should be large enough that most strings will fit, but small enough
61 * that we feel comfortable putting it on the stack
62 */
63#define TEXTBUFLEN 1024
64
65#define MAX_L10N_DATA 80
66
67/* pg_locale_builtin.c */
69extern char *get_collation_actual_version_builtin(const char *collcollate);
70
71/* pg_locale_icu.c */
72#ifdef USE_ICU
73extern UCollator *pg_ucol_open(const char *loc_str);
74extern char *get_collation_actual_version_icu(const char *collcollate);
75#endif
77
78/* pg_locale_libc.c */
80extern char *get_collation_actual_version_libc(const char *collcollate);
81
82/* GUC settings */
87
89
90/*
91 * lc_time localization cache.
92 *
93 * We use only the first 7 or 12 entries of these arrays. The last array
94 * element is left as NULL for the convenience of outside code that wants
95 * to sequentially scan these arrays.
96 */
101
102/* is the databases's LC_CTYPE the C locale? */
104
106
107/* indicates whether locale information cache is valid */
108static bool CurrentLocaleConvValid = false;
109static bool CurrentLCTimeValid = false;
110
111/* Cache for collation-related knowledge */
112
113typedef struct
114{
115 Oid collid; /* hash key: pg_collation OID */
116 pg_locale_t locale; /* locale_t struct, or 0 if not valid */
117
118 /* needed for simplehash */
120 char status;
122
123#define SH_PREFIX collation_cache
124#define SH_ELEMENT_TYPE collation_cache_entry
125#define SH_KEY_TYPE Oid
126#define SH_KEY collid
127#define SH_HASH_KEY(tb, key) murmurhash32((uint32) key)
128#define SH_EQUAL(tb, a, b) (a == b)
129#define SH_GET_HASH(tb, a) a->hash
130#define SH_SCOPE static inline
131#define SH_STORE_HASH
132#define SH_DECLARE
133#define SH_DEFINE
134#include "lib/simplehash.h"
135
137static collation_cache_hash *CollationCache = NULL;
138
139/*
140 * The collation cache is often accessed repeatedly for the same collation, so
141 * remember the last one used.
142 */
145
146#if defined(WIN32) && defined(LC_MESSAGES)
147static char *IsoLocaleName(const char *);
148#endif
149
150/*
151 * pg_perm_setlocale
152 *
153 * This wraps the libc function setlocale(), with two additions. First, when
154 * changing LC_CTYPE, update gettext's encoding for the current message
155 * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
156 * not on Windows. Second, if the operation is successful, the corresponding
157 * LC_XXX environment variable is set to match. By setting the environment
158 * variable, we ensure that any subsequent use of setlocale(..., "") will
159 * preserve the settings made through this routine. Of course, LC_ALL must
160 * also be unset to fully ensure that, but that has to be done elsewhere after
161 * all the individual LC_XXX variables have been set correctly. (Thank you
162 * Perl for making this kluge necessary.)
163 */
164char *
165pg_perm_setlocale(int category, const char *locale)
166{
167 char *result;
168 const char *envvar;
169
170#ifndef WIN32
171 result = setlocale(category, locale);
172#else
173
174 /*
175 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
176 * the given value is good and set it in the environment variables. We
177 * must ignore attempts to set to "", which means "keep using the old
178 * environment value".
179 */
180#ifdef LC_MESSAGES
181 if (category == LC_MESSAGES)
182 {
183 result = (char *) locale;
184 if (locale == NULL || locale[0] == '\0')
185 return result;
186 }
187 else
188#endif
189 result = setlocale(category, locale);
190#endif /* WIN32 */
191
192 if (result == NULL)
193 return result; /* fall out immediately on failure */
194
195 /*
196 * Use the right encoding in translated messages. Under ENABLE_NLS, let
197 * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
198 * format strings are ASCII, but database-encoding strings may enter the
199 * message via %s. This makes the overall message encoding equal to the
200 * database encoding.
201 */
202 if (category == LC_CTYPE)
203 {
204 static char save_lc_ctype[LOCALE_NAME_BUFLEN];
205
206 /* copy setlocale() return value before callee invokes it again */
207 strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
208 result = save_lc_ctype;
209
210#ifdef ENABLE_NLS
211 SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
212#else
214#endif
215 }
216
217 switch (category)
218 {
219 case LC_COLLATE:
220 envvar = "LC_COLLATE";
221 break;
222 case LC_CTYPE:
223 envvar = "LC_CTYPE";
224 break;
225#ifdef LC_MESSAGES
226 case LC_MESSAGES:
227 envvar = "LC_MESSAGES";
228#ifdef WIN32
229 result = IsoLocaleName(locale);
230 if (result == NULL)
231 result = (char *) locale;
232 elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
233#endif /* WIN32 */
234 break;
235#endif /* LC_MESSAGES */
236 case LC_MONETARY:
237 envvar = "LC_MONETARY";
238 break;
239 case LC_NUMERIC:
240 envvar = "LC_NUMERIC";
241 break;
242 case LC_TIME:
243 envvar = "LC_TIME";
244 break;
245 default:
246 elog(FATAL, "unrecognized LC category: %d", category);
247 return NULL; /* keep compiler quiet */
248 }
249
250 if (setenv(envvar, result, 1) != 0)
251 return NULL;
252
253 return result;
254}
255
256
257/*
258 * Is the locale name valid for the locale category?
259 *
260 * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
261 * canonical name is stored there. This is especially useful for figuring out
262 * what locale name "" means (ie, the server environment value). (Actually,
263 * it seems that on most implementations that's the only thing it's good for;
264 * we could wish that setlocale gave back a canonically spelled version of
265 * the locale name, but typically it doesn't.)
266 */
267bool
268check_locale(int category, const char *locale, char **canonname)
269{
270 char *save;
271 char *res;
272
273 /* Don't let Windows' non-ASCII locale names in. */
274 if (!pg_is_ascii(locale))
275 {
277 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
278 errmsg("locale name \"%s\" contains non-ASCII characters",
279 locale)));
280 return false;
281 }
282
283 if (canonname)
284 *canonname = NULL; /* in case of failure */
285
286 save = setlocale(category, NULL);
287 if (!save)
288 return false; /* won't happen, we hope */
289
290 /* save may be pointing at a modifiable scratch variable, see above. */
291 save = pstrdup(save);
292
293 /* set the locale with setlocale, to see if it accepts it. */
294 res = setlocale(category, locale);
295
296 /* save canonical name if requested. */
297 if (res && canonname)
298 *canonname = pstrdup(res);
299
300 /* restore old value. */
301 if (!setlocale(category, save))
302 elog(WARNING, "failed to restore old locale \"%s\"", save);
303 pfree(save);
304
305 /* Don't let Windows' non-ASCII locale names out. */
306 if (canonname && *canonname && !pg_is_ascii(*canonname))
307 {
309 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
310 errmsg("locale name \"%s\" contains non-ASCII characters",
311 *canonname)));
312 pfree(*canonname);
313 *canonname = NULL;
314 return false;
315 }
316
317 return (res != NULL);
318}
319
320
321/*
322 * GUC check/assign hooks
323 *
324 * For most locale categories, the assign hook doesn't actually set the locale
325 * permanently, just reset flags so that the next use will cache the
326 * appropriate values. (See explanation at the top of this file.)
327 *
328 * Note: we accept value = "" as selecting the postmaster's environment
329 * value, whatever it was (so long as the environment setting is legal).
330 * This will have been locked down by an earlier call to pg_perm_setlocale.
331 */
332bool
334{
335 return check_locale(LC_MONETARY, *newval, NULL);
336}
337
338void
339assign_locale_monetary(const char *newval, void *extra)
340{
342}
343
344bool
346{
347 return check_locale(LC_NUMERIC, *newval, NULL);
348}
349
350void
351assign_locale_numeric(const char *newval, void *extra)
352{
354}
355
356bool
358{
359 return check_locale(LC_TIME, *newval, NULL);
360}
361
362void
363assign_locale_time(const char *newval, void *extra)
364{
365 CurrentLCTimeValid = false;
366}
367
368/*
369 * We allow LC_MESSAGES to actually be set globally.
370 *
371 * Note: we normally disallow value = "" because it wouldn't have consistent
372 * semantics (it'd effectively just use the previous value). However, this
373 * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
374 * not even if the attempted setting fails due to invalid environment value.
375 * The idea there is just to accept the environment setting *if possible*
376 * during startup, until we can read the proper value from postgresql.conf.
377 */
378bool
380{
381 if (**newval == '\0')
382 {
383 if (source == PGC_S_DEFAULT)
384 return true;
385 else
386 return false;
387 }
388
389 /*
390 * LC_MESSAGES category does not exist everywhere, but accept it anyway
391 *
392 * On Windows, we can't even check the value, so accept blindly
393 */
394#if defined(LC_MESSAGES) && !defined(WIN32)
395 return check_locale(LC_MESSAGES, *newval, NULL);
396#else
397 return true;
398#endif
399}
400
401void
402assign_locale_messages(const char *newval, void *extra)
403{
404 /*
405 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
406 * We ignore failure, as per comment above.
407 */
408#ifdef LC_MESSAGES
409 (void) pg_perm_setlocale(LC_MESSAGES, newval);
410#endif
411}
412
413
414/*
415 * Frees the malloced content of a struct lconv. (But not the struct
416 * itself.) It's important that this not throw elog(ERROR).
417 */
418static void
419free_struct_lconv(struct lconv *s)
420{
421 free(s->decimal_point);
422 free(s->thousands_sep);
423 free(s->grouping);
424 free(s->int_curr_symbol);
425 free(s->currency_symbol);
426 free(s->mon_decimal_point);
427 free(s->mon_thousands_sep);
428 free(s->mon_grouping);
429 free(s->positive_sign);
430 free(s->negative_sign);
431}
432
433/*
434 * Check that all fields of a struct lconv (or at least, the ones we care
435 * about) are non-NULL. The field list must match free_struct_lconv().
436 */
437static bool
438struct_lconv_is_valid(struct lconv *s)
439{
440 if (s->decimal_point == NULL)
441 return false;
442 if (s->thousands_sep == NULL)
443 return false;
444 if (s->grouping == NULL)
445 return false;
446 if (s->int_curr_symbol == NULL)
447 return false;
448 if (s->currency_symbol == NULL)
449 return false;
450 if (s->mon_decimal_point == NULL)
451 return false;
452 if (s->mon_thousands_sep == NULL)
453 return false;
454 if (s->mon_grouping == NULL)
455 return false;
456 if (s->positive_sign == NULL)
457 return false;
458 if (s->negative_sign == NULL)
459 return false;
460 return true;
461}
462
463
464/*
465 * Convert the strdup'd string at *str from the specified encoding to the
466 * database encoding.
467 */
468static void
470{
471 char *pstr;
472 char *mstr;
473
474 /* convert the string to the database encoding */
475 pstr = pg_any_to_server(*str, strlen(*str), encoding);
476 if (pstr == *str)
477 return; /* no conversion happened */
478
479 /* need it malloc'd not palloc'd */
480 mstr = strdup(pstr);
481 if (mstr == NULL)
483 (errcode(ERRCODE_OUT_OF_MEMORY),
484 errmsg("out of memory")));
485
486 /* replace old string */
487 free(*str);
488 *str = mstr;
489
490 pfree(pstr);
491}
492
493
494/*
495 * Return the POSIX lconv struct (contains number/money formatting
496 * information) with locale information for all categories.
497 */
498struct lconv *
500{
501 static struct lconv CurrentLocaleConv;
502 static bool CurrentLocaleConvAllocated = false;
503 struct lconv *extlconv;
504 struct lconv tmp;
505 struct lconv worklconv = {0};
506
507 /* Did we do it already? */
509 return &CurrentLocaleConv;
510
511 /* Free any already-allocated storage */
512 if (CurrentLocaleConvAllocated)
513 {
514 free_struct_lconv(&CurrentLocaleConv);
515 CurrentLocaleConvAllocated = false;
516 }
517
518 /*
519 * Use thread-safe method of obtaining a copy of lconv from the operating
520 * system.
521 */
524 &tmp) != 0)
525 elog(ERROR,
526 "could not get lconv for LC_MONETARY = \"%s\", LC_NUMERIC = \"%s\": %m",
528
529 /* Must copy data now so we can re-encode it. */
530 extlconv = &tmp;
531 worklconv.decimal_point = strdup(extlconv->decimal_point);
532 worklconv.thousands_sep = strdup(extlconv->thousands_sep);
533 worklconv.grouping = strdup(extlconv->grouping);
534 worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
535 worklconv.currency_symbol = strdup(extlconv->currency_symbol);
536 worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
537 worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
538 worklconv.mon_grouping = strdup(extlconv->mon_grouping);
539 worklconv.positive_sign = strdup(extlconv->positive_sign);
540 worklconv.negative_sign = strdup(extlconv->negative_sign);
541 /* Copy scalar fields as well */
542 worklconv.int_frac_digits = extlconv->int_frac_digits;
543 worklconv.frac_digits = extlconv->frac_digits;
544 worklconv.p_cs_precedes = extlconv->p_cs_precedes;
545 worklconv.p_sep_by_space = extlconv->p_sep_by_space;
546 worklconv.n_cs_precedes = extlconv->n_cs_precedes;
547 worklconv.n_sep_by_space = extlconv->n_sep_by_space;
548 worklconv.p_sign_posn = extlconv->p_sign_posn;
549 worklconv.n_sign_posn = extlconv->n_sign_posn;
550
551 /* Free the contents of the object populated by pg_localeconv_r(). */
552 pg_localeconv_free(&tmp);
553
554 /* If any of the preceding strdup calls failed, complain now. */
555 if (!struct_lconv_is_valid(&worklconv))
557 (errcode(ERRCODE_OUT_OF_MEMORY),
558 errmsg("out of memory")));
559
560 PG_TRY();
561 {
562 int encoding;
563
564 /*
565 * Now we must perform encoding conversion from whatever's associated
566 * with the locales into the database encoding. If we can't identify
567 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
568 * use PG_SQL_ASCII, which will result in just validating that the
569 * strings are OK in the database encoding.
570 */
572 if (encoding < 0)
574
575 db_encoding_convert(encoding, &worklconv.decimal_point);
576 db_encoding_convert(encoding, &worklconv.thousands_sep);
577 /* grouping is not text and does not require conversion */
578
580 if (encoding < 0)
582
583 db_encoding_convert(encoding, &worklconv.int_curr_symbol);
584 db_encoding_convert(encoding, &worklconv.currency_symbol);
585 db_encoding_convert(encoding, &worklconv.mon_decimal_point);
586 db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
587 /* mon_grouping is not text and does not require conversion */
588 db_encoding_convert(encoding, &worklconv.positive_sign);
589 db_encoding_convert(encoding, &worklconv.negative_sign);
590 }
591 PG_CATCH();
592 {
593 free_struct_lconv(&worklconv);
594 PG_RE_THROW();
595 }
596 PG_END_TRY();
597
598 /*
599 * Everything is good, so save the results.
600 */
601 CurrentLocaleConv = worklconv;
602 CurrentLocaleConvAllocated = true;
604 return &CurrentLocaleConv;
605}
606
607#ifdef WIN32
608/*
609 * On Windows, strftime() returns its output in encoding CP_ACP (the default
610 * operating system codepage for the computer), which is likely different
611 * from SERVER_ENCODING. This is especially important in Japanese versions
612 * of Windows which will use SJIS encoding, which we don't support as a
613 * server encoding.
614 *
615 * So, instead of using strftime(), use wcsftime() to return the value in
616 * wide characters (internally UTF16) and then convert to UTF8, which we
617 * know how to handle directly.
618 *
619 * Note that this only affects the calls to strftime() in this file, which are
620 * used to get the locale-aware strings. Other parts of the backend use
621 * pg_strftime(), which isn't locale-aware and does not need to be replaced.
622 */
623static size_t
624strftime_l_win32(char *dst, size_t dstlen,
625 const char *format, const struct tm *tm, locale_t locale)
626{
627 size_t len;
628 wchar_t wformat[8]; /* formats used below need 3 chars */
629 wchar_t wbuf[MAX_L10N_DATA];
630
631 /*
632 * Get a wchar_t version of the format string. We only actually use
633 * plain-ASCII formats in this file, so we can say that they're UTF8.
634 */
635 len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
636 wformat, lengthof(wformat));
637 if (len == 0)
638 elog(ERROR, "could not convert format string from UTF-8: error code %lu",
639 GetLastError());
640
641 len = _wcsftime_l(wbuf, MAX_L10N_DATA, wformat, tm, locale);
642 if (len == 0)
643 {
644 /*
645 * wcsftime failed, possibly because the result would not fit in
646 * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
647 */
648 return 0;
649 }
650
651 len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
652 NULL, NULL);
653 if (len == 0)
654 elog(ERROR, "could not convert string to UTF-8: error code %lu",
655 GetLastError());
656
657 dst[len] = '\0';
658
659 return len;
660}
661
662/* redefine strftime_l() */
663#define strftime_l(a,b,c,d,e) strftime_l_win32(a,b,c,d,e)
664#endif /* WIN32 */
665
666/*
667 * Subroutine for cache_locale_time().
668 * Convert the given string from encoding "encoding" to the database
669 * encoding, and store the result at *dst, replacing any previous value.
670 */
671static void
672cache_single_string(char **dst, const char *src, int encoding)
673{
674 char *ptr;
675 char *olddst;
676
677 /* Convert the string to the database encoding, or validate it's OK */
678 ptr = pg_any_to_server(src, strlen(src), encoding);
679
680 /* Store the string in long-lived storage, replacing any previous value */
681 olddst = *dst;
683 if (olddst)
684 pfree(olddst);
685
686 /* Might as well clean up any palloc'd conversion result, too */
687 if (ptr != src)
688 pfree(ptr);
689}
690
691/*
692 * Update the lc_time localization cache variables if needed.
693 */
694void
696{
697 char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
698 char *bufptr;
699 time_t timenow;
700 struct tm *timeinfo;
701 struct tm timeinfobuf;
702 bool strftimefail = false;
703 int encoding;
704 int i;
706
707 /* did we do this already? */
709 return;
710
711 elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
712
713 errno = ENOENT;
714#ifdef WIN32
715 locale = _create_locale(LC_ALL, locale_time);
716 if (locale == (locale_t) 0)
717 _dosmaperr(GetLastError());
718#else
719 locale = newlocale(LC_ALL_MASK, locale_time, (locale_t) 0);
720#endif
721 if (!locale)
723
724 /* We use times close to current time as data for strftime(). */
725 timenow = time(NULL);
726 timeinfo = gmtime_r(&timenow, &timeinfobuf);
727
728 /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
729 bufptr = buf;
730
731 /*
732 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
733 * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
734 * error.) An implementation might report errors (e.g. ENOMEM) by
735 * returning 0 (or, less plausibly, a negative value) and setting errno.
736 * Report errno just in case the implementation did that, but clear it in
737 * advance of the calls so we don't emit a stale, unrelated errno.
738 */
739 errno = 0;
740
741 /* localized days */
742 for (i = 0; i < 7; i++)
743 {
744 timeinfo->tm_wday = i;
745 if (strftime_l(bufptr, MAX_L10N_DATA, "%a", timeinfo, locale) <= 0)
746 strftimefail = true;
747 bufptr += MAX_L10N_DATA;
748 if (strftime_l(bufptr, MAX_L10N_DATA, "%A", timeinfo, locale) <= 0)
749 strftimefail = true;
750 bufptr += MAX_L10N_DATA;
751 }
752
753 /* localized months */
754 for (i = 0; i < 12; i++)
755 {
756 timeinfo->tm_mon = i;
757 timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
758 if (strftime_l(bufptr, MAX_L10N_DATA, "%b", timeinfo, locale) <= 0)
759 strftimefail = true;
760 bufptr += MAX_L10N_DATA;
761 if (strftime_l(bufptr, MAX_L10N_DATA, "%B", timeinfo, locale) <= 0)
762 strftimefail = true;
763 bufptr += MAX_L10N_DATA;
764 }
765
766#ifdef WIN32
767 _free_locale(locale);
768#else
769 freelocale(locale);
770#endif
771
772 /*
773 * At this point we've done our best to clean up, and can throw errors, or
774 * call functions that might throw errors, with a clean conscience.
775 */
776 if (strftimefail)
777 elog(ERROR, "strftime_l() failed");
778
779#ifndef WIN32
780
781 /*
782 * As in PGLC_localeconv(), we must convert strftime()'s output from the
783 * encoding implied by LC_TIME to the database encoding. If we can't
784 * identify the LC_TIME encoding, just perform encoding validation.
785 */
787 if (encoding < 0)
789
790#else
791
792 /*
793 * On Windows, strftime_win32() always returns UTF8 data, so convert from
794 * that if necessary.
795 */
797
798#endif /* WIN32 */
799
800 bufptr = buf;
801
802 /* localized days */
803 for (i = 0; i < 7; i++)
804 {
806 bufptr += MAX_L10N_DATA;
808 bufptr += MAX_L10N_DATA;
809 }
810 localized_abbrev_days[7] = NULL;
811 localized_full_days[7] = NULL;
812
813 /* localized months */
814 for (i = 0; i < 12; i++)
815 {
817 bufptr += MAX_L10N_DATA;
819 bufptr += MAX_L10N_DATA;
820 }
821 localized_abbrev_months[12] = NULL;
822 localized_full_months[12] = NULL;
823
824 CurrentLCTimeValid = true;
825}
826
827
828#if defined(WIN32) && defined(LC_MESSAGES)
829/*
830 * Convert a Windows setlocale() argument to a Unix-style one.
831 *
832 * Regardless of platform, we install message catalogs under a Unix-style
833 * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
834 * following that style will elicit localized interface strings.
835 *
836 * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
837 * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
838 * case-insensitive. setlocale() returns the fully-qualified form; for
839 * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
840 * setlocale() and _create_locale() select a "locale identifier"[1] and store
841 * it in an undocumented _locale_t field. From that LCID, we can retrieve the
842 * ISO 639 language and the ISO 3166 country. Character encoding does not
843 * matter, because the server and client encodings govern that.
844 *
845 * Windows Vista introduced the "locale name" concept[2], closely following
846 * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
847 * Studio 2012, setlocale() accepts locale names in addition to the strings it
848 * accepted historically. It does not standardize them; setlocale("Th-tH")
849 * returns "Th-tH". setlocale(category, "") still returns a traditional
850 * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
851 * content to carry locale names instead of locale identifiers.
852 *
853 * Visual Studio 2015 should still be able to do the same as Visual Studio
854 * 2012, but the declaration of locale_name is missing in _locale_t, causing
855 * this code compilation to fail, hence this falls back instead on to
856 * enumerating all system locales by using EnumSystemLocalesEx to find the
857 * required locale name. If the input argument is in Unix-style then we can
858 * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
859 * LOCALE_SNAME.
860 *
861 * This function returns a pointer to a static buffer bearing the converted
862 * name or NULL if conversion fails.
863 *
864 * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
865 * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
866 */
867
868/*
869 * Callback function for EnumSystemLocalesEx() in get_iso_localename().
870 *
871 * This function enumerates all system locales, searching for one that matches
872 * an input with the format: <Language>[_<Country>], e.g.
873 * English[_United States]
874 *
875 * The input is a three wchar_t array as an LPARAM. The first element is the
876 * locale_name we want to match, the second element is an allocated buffer
877 * where the Unix-style locale is copied if a match is found, and the third
878 * element is the search status, 1 if a match was found, 0 otherwise.
879 */
880static BOOL CALLBACK
881search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
882{
883 wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
884 wchar_t **argv;
885
886 (void) (dwFlags);
887
888 argv = (wchar_t **) lparam;
889 *argv[2] = (wchar_t) 0;
890
891 memset(test_locale, 0, sizeof(test_locale));
892
893 /* Get the name of the <Language> in English */
894 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
895 test_locale, LOCALE_NAME_MAX_LENGTH))
896 {
897 /*
898 * If the enumerated locale does not have a hyphen ("en") OR the
899 * locale_name input does not have an underscore ("English"), we only
900 * need to compare the <Language> tags.
901 */
902 if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
903 {
904 if (_wcsicmp(argv[0], test_locale) == 0)
905 {
906 wcscpy(argv[1], pStr);
907 *argv[2] = (wchar_t) 1;
908 return FALSE;
909 }
910 }
911
912 /*
913 * We have to compare a full <Language>_<Country> tag, so we append
914 * the underscore and name of the country/region in English, e.g.
915 * "English_United States".
916 */
917 else
918 {
919 size_t len;
920
921 wcscat(test_locale, L"_");
922 len = wcslen(test_locale);
923 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
924 test_locale + len,
925 LOCALE_NAME_MAX_LENGTH - len))
926 {
927 if (_wcsicmp(argv[0], test_locale) == 0)
928 {
929 wcscpy(argv[1], pStr);
930 *argv[2] = (wchar_t) 1;
931 return FALSE;
932 }
933 }
934 }
935 }
936
937 return TRUE;
938}
939
940/*
941 * This function converts a Windows locale name to an ISO formatted version
942 * for Visual Studio 2015 or greater.
943 *
944 * Returns NULL, if no valid conversion was found.
945 */
946static char *
947get_iso_localename(const char *winlocname)
948{
949 wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
950 wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
951 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
952 char *period;
953 int len;
954 int ret_val;
955
956 /*
957 * Valid locales have the following syntax:
958 * <Language>[_<Country>[.<CodePage>]]
959 *
960 * GetLocaleInfoEx can only take locale name without code-page and for the
961 * purpose of this API the code-page doesn't matter.
962 */
963 period = strchr(winlocname, '.');
964 if (period != NULL)
965 len = period - winlocname;
966 else
967 len = pg_mbstrlen(winlocname);
968
969 memset(wc_locale_name, 0, sizeof(wc_locale_name));
970 memset(buffer, 0, sizeof(buffer));
971 MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
972 LOCALE_NAME_MAX_LENGTH);
973
974 /*
975 * If the lc_messages is already a Unix-style string, we have a direct
976 * match with LOCALE_SNAME, e.g. en-US, en_US.
977 */
978 ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
979 LOCALE_NAME_MAX_LENGTH);
980 if (!ret_val)
981 {
982 /*
983 * Search for a locale in the system that matches language and country
984 * name.
985 */
986 wchar_t *argv[3];
987
988 argv[0] = wc_locale_name;
989 argv[1] = buffer;
990 argv[2] = (wchar_t *) &ret_val;
991 EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
992 NULL);
993 }
994
995 if (ret_val)
996 {
997 size_t rc;
998 char *hyphen;
999
1000 /* Locale names use only ASCII, any conversion locale suffices. */
1001 rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1002 if (rc == -1 || rc == sizeof(iso_lc_messages))
1003 return NULL;
1004
1005 /*
1006 * Since the message catalogs sit on a case-insensitive filesystem, we
1007 * need not standardize letter case here. So long as we do not ship
1008 * message catalogs for which it would matter, we also need not
1009 * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1010 * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1011 */
1012 hyphen = strchr(iso_lc_messages, '-');
1013 if (hyphen)
1014 *hyphen = '_';
1015 return iso_lc_messages;
1016 }
1017
1018 return NULL;
1019}
1020
1021static char *
1022IsoLocaleName(const char *winlocname)
1023{
1024 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1025
1026 if (pg_strcasecmp("c", winlocname) == 0 ||
1027 pg_strcasecmp("posix", winlocname) == 0)
1028 {
1029 strcpy(iso_lc_messages, "C");
1030 return iso_lc_messages;
1031 }
1032 else
1033 return get_iso_localename(winlocname);
1034}
1035
1036#endif /* WIN32 && LC_MESSAGES */
1037
1038/*
1039 * Create a new pg_locale_t struct for the given collation oid.
1040 */
1041static pg_locale_t
1043{
1044 HeapTuple tp;
1045 Form_pg_collation collform;
1046 pg_locale_t result;
1047 Datum datum;
1048 bool isnull;
1049
1050 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1051 if (!HeapTupleIsValid(tp))
1052 elog(ERROR, "cache lookup failed for collation %u", collid);
1053 collform = (Form_pg_collation) GETSTRUCT(tp);
1054
1055 if (collform->collprovider == COLLPROVIDER_BUILTIN)
1056 result = create_pg_locale_builtin(collid, context);
1057 else if (collform->collprovider == COLLPROVIDER_ICU)
1058 result = create_pg_locale_icu(collid, context);
1059 else if (collform->collprovider == COLLPROVIDER_LIBC)
1060 result = create_pg_locale_libc(collid, context);
1061 else
1062 /* shouldn't happen */
1063 PGLOCALE_SUPPORT_ERROR(collform->collprovider);
1064
1065 result->is_default = false;
1066
1067 Assert((result->collate_is_c && result->collate == NULL) ||
1068 (!result->collate_is_c && result->collate != NULL));
1069
1070 Assert((result->ctype_is_c && result->ctype == NULL) ||
1071 (!result->ctype_is_c && result->ctype != NULL));
1072
1073 datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1074 &isnull);
1075 if (!isnull)
1076 {
1077 char *actual_versionstr;
1078 char *collversionstr;
1079
1080 collversionstr = TextDatumGetCString(datum);
1081
1082 if (collform->collprovider == COLLPROVIDER_LIBC)
1083 datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1084 else
1085 datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1086
1087 actual_versionstr = get_collation_actual_version(collform->collprovider,
1088 TextDatumGetCString(datum));
1089 if (!actual_versionstr)
1090 {
1091 /*
1092 * This could happen when specifying a version in CREATE COLLATION
1093 * but the provider does not support versioning, or manually
1094 * creating a mess in the catalogs.
1095 */
1096 ereport(ERROR,
1097 (errmsg("collation \"%s\" has no actual version, but a version was recorded",
1098 NameStr(collform->collname))));
1099 }
1100
1101 if (strcmp(actual_versionstr, collversionstr) != 0)
1103 (errmsg("collation \"%s\" has version mismatch",
1104 NameStr(collform->collname)),
1105 errdetail("The collation in the database was created using version %s, "
1106 "but the operating system provides version %s.",
1107 collversionstr, actual_versionstr),
1108 errhint("Rebuild all objects affected by this collation and run "
1109 "ALTER COLLATION %s REFRESH VERSION, "
1110 "or build PostgreSQL with the right library version.",
1111 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1112 NameStr(collform->collname)))));
1113 }
1114
1115 ReleaseSysCache(tp);
1116
1117 return result;
1118}
1119
1120/*
1121 * Initialize default_locale with database locale settings.
1122 */
1123void
1125{
1126 HeapTuple tup;
1127 Form_pg_database dbform;
1128 pg_locale_t result;
1129
1130 Assert(default_locale == NULL);
1131
1132 /* Fetch our pg_database row normally, via syscache */
1133 tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1134 if (!HeapTupleIsValid(tup))
1135 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
1136 dbform = (Form_pg_database) GETSTRUCT(tup);
1137
1138 if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
1139 result = create_pg_locale_builtin(DEFAULT_COLLATION_OID,
1141 else if (dbform->datlocprovider == COLLPROVIDER_ICU)
1142 result = create_pg_locale_icu(DEFAULT_COLLATION_OID,
1144 else if (dbform->datlocprovider == COLLPROVIDER_LIBC)
1145 result = create_pg_locale_libc(DEFAULT_COLLATION_OID,
1147 else
1148 /* shouldn't happen */
1149 PGLOCALE_SUPPORT_ERROR(dbform->datlocprovider);
1150
1151 result->is_default = true;
1152 ReleaseSysCache(tup);
1153
1154 default_locale = result;
1155}
1156
1157/*
1158 * Create a pg_locale_t from a collation OID. Results are cached for the
1159 * lifetime of the backend. Thus, do not free the result with freelocale().
1160 *
1161 * For simplicity, we always generate COLLATE + CTYPE even though we
1162 * might only need one of them. Since this is called only once per session,
1163 * it shouldn't cost much.
1164 */
1167{
1168 collation_cache_entry *cache_entry;
1169 bool found;
1170
1171 if (collid == DEFAULT_COLLATION_OID)
1172 return default_locale;
1173
1174 if (!OidIsValid(collid))
1175 elog(ERROR, "cache lookup failed for collation %u", collid);
1176
1178
1181
1182 if (CollationCache == NULL)
1183 {
1185 "collation cache",
1187 CollationCache = collation_cache_create(CollationCacheContext,
1188 16, NULL);
1189 }
1190
1191 cache_entry = collation_cache_insert(CollationCache, collid, &found);
1192 if (!found)
1193 {
1194 /*
1195 * Make sure cache entry is marked invalid, in case we fail before
1196 * setting things.
1197 */
1198 cache_entry->locale = 0;
1199 }
1200
1201 if (cache_entry->locale == 0)
1202 {
1204 }
1205
1207 last_collation_cache_locale = cache_entry->locale;
1208
1209 return cache_entry->locale;
1210}
1211
1212/*
1213 * Get provider-specific collation version string for the given collation from
1214 * the operating system/library.
1215 */
1216char *
1217get_collation_actual_version(char collprovider, const char *collcollate)
1218{
1219 char *collversion = NULL;
1220
1221 if (collprovider == COLLPROVIDER_BUILTIN)
1222 collversion = get_collation_actual_version_builtin(collcollate);
1223#ifdef USE_ICU
1224 else if (collprovider == COLLPROVIDER_ICU)
1225 collversion = get_collation_actual_version_icu(collcollate);
1226#endif
1227 else if (collprovider == COLLPROVIDER_LIBC)
1228 collversion = get_collation_actual_version_libc(collcollate);
1229
1230 return collversion;
1231}
1232
1233size_t
1234pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1236{
1237 return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
1238}
1239
1240size_t
1241pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1243{
1244 return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
1245}
1246
1247size_t
1248pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1250{
1251 return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
1252}
1253
1254size_t
1255pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1257{
1258 if (locale->ctype->strfold)
1259 return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
1260 else
1261 return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
1262}
1263
1264/*
1265 * pg_strcoll
1266 *
1267 * Like pg_strncoll for NUL-terminated input strings.
1268 */
1269int
1270pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
1271{
1272 return locale->collate->strncoll(arg1, -1, arg2, -1, locale);
1273}
1274
1275/*
1276 * pg_strncoll
1277 *
1278 * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll_l() or wcscoll_l() as
1279 * appropriate for the given locale, platform, and database encoding. If the
1280 * locale is not specified, use the database collation.
1281 *
1282 * The input strings must be encoded in the database encoding. If an input
1283 * string is NUL-terminated, its length may be specified as -1.
1284 *
1285 * The caller is responsible for breaking ties if the collation is
1286 * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
1287 * easily account for deterministic collations.
1288 */
1289int
1290pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
1292{
1293 return locale->collate->strncoll(arg1, len1, arg2, len2, locale);
1294}
1295
1296/*
1297 * Return true if the collation provider supports pg_strxfrm() and
1298 * pg_strnxfrm(); otherwise false.
1299 *
1300 *
1301 * No similar problem is known for the ICU provider.
1302 */
1303bool
1305{
1306 /*
1307 * locale->collate->strnxfrm is still a required method, even if it may
1308 * have the wrong behavior, because the planner uses it for estimates in
1309 * some cases.
1310 */
1311 return locale->collate->strxfrm_is_safe;
1312}
1313
1314/*
1315 * pg_strxfrm
1316 *
1317 * Like pg_strnxfrm for a NUL-terminated input string.
1318 */
1319size_t
1320pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
1321{
1322 return locale->collate->strnxfrm(dest, destsize, src, -1, locale);
1323}
1324
1325/*
1326 * pg_strnxfrm
1327 *
1328 * Transforms 'src' to a nul-terminated string stored in 'dest' such that
1329 * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
1330 * untransformed strings.
1331 *
1332 * The input string must be encoded in the database encoding. If the input
1333 * string is NUL-terminated, its length may be specified as -1. If 'destsize'
1334 * is zero, 'dest' may be NULL.
1335 *
1336 * Not all providers support pg_strnxfrm() safely. The caller should check
1337 * pg_strxfrm_enabled() first, otherwise this function may return wrong
1338 * results or an error.
1339 *
1340 * Returns the number of bytes needed (or more) to store the transformed
1341 * string, excluding the terminating nul byte. If the value returned is
1342 * 'destsize' or greater, the resulting contents of 'dest' are undefined.
1343 */
1344size_t
1345pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen,
1347{
1348 return locale->collate->strnxfrm(dest, destsize, src, srclen, locale);
1349}
1350
1351/*
1352 * Return true if the collation provider supports pg_strxfrm_prefix() and
1353 * pg_strnxfrm_prefix(); otherwise false.
1354 */
1355bool
1357{
1358 return (locale->collate->strnxfrm_prefix != NULL);
1359}
1360
1361/*
1362 * pg_strxfrm_prefix
1363 *
1364 * Like pg_strnxfrm_prefix for a NUL-terminated input string.
1365 */
1366size_t
1367pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
1369{
1370 return locale->collate->strnxfrm_prefix(dest, destsize, src, -1, locale);
1371}
1372
1373/*
1374 * pg_strnxfrm_prefix
1375 *
1376 * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
1377 * memcmp() on the byte sequence is equivalent to pg_strncoll() on
1378 * untransformed strings. The result is not nul-terminated.
1379 *
1380 * The input string must be encoded in the database encoding. If the input
1381 * string is NUL-terminated, its length may be specified as -1.
1382 *
1383 * Not all providers support pg_strnxfrm_prefix() safely. The caller should
1384 * check pg_strxfrm_prefix_enabled() first, otherwise this function may return
1385 * wrong results or an error.
1386 *
1387 * If destsize is not large enough to hold the resulting byte sequence, stores
1388 * only the first destsize bytes in 'dest'. Returns the number of bytes
1389 * actually copied to 'dest'.
1390 */
1391size_t
1392pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
1393 ssize_t srclen, pg_locale_t locale)
1394{
1395 return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
1396}
1397
1398/*
1399 * char_is_cased()
1400 *
1401 * Fuzzy test of whether the given char is case-varying or not. The argument
1402 * is a single byte, so in a multibyte encoding, just assume any non-ASCII
1403 * char is case-varying.
1404 */
1405bool
1407{
1408 return locale->ctype->char_is_cased(ch, locale);
1409}
1410
1411/*
1412 * char_tolower_enabled()
1413 *
1414 * Does the provider support char_tolower()?
1415 */
1416bool
1418{
1419 return (locale->ctype->char_tolower != NULL);
1420}
1421
1422/*
1423 * char_tolower()
1424 *
1425 * Convert char (single-byte encoding) to lowercase.
1426 */
1427char
1429{
1430 return locale->ctype->char_tolower(ch, locale);
1431}
1432
1433/*
1434 * Return required encoding ID for the given locale, or -1 if any encoding is
1435 * valid for the locale.
1436 */
1437int
1439{
1440 if (strcmp(locale, "C") == 0)
1441 return -1;
1442 else if (strcmp(locale, "C.UTF-8") == 0)
1443 return PG_UTF8;
1444 else if (strcmp(locale, "PG_UNICODE_FAST") == 0)
1445 return PG_UTF8;
1446
1447
1448 ereport(ERROR,
1449 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1450 errmsg("invalid locale name \"%s\" for builtin provider",
1451 locale)));
1452
1453 return 0; /* keep compiler quiet */
1454}
1455
1456
1457/*
1458 * Validate the locale and encoding combination, and return the canonical form
1459 * of the locale name.
1460 */
1461const char *
1463{
1464 const char *canonical_name = NULL;
1465 int required_encoding;
1466
1467 if (strcmp(locale, "C") == 0)
1468 canonical_name = "C";
1469 else if (strcmp(locale, "C.UTF-8") == 0 || strcmp(locale, "C.UTF8") == 0)
1470 canonical_name = "C.UTF-8";
1471 else if (strcmp(locale, "PG_UNICODE_FAST") == 0)
1472 canonical_name = "PG_UNICODE_FAST";
1473
1474 if (!canonical_name)
1475 ereport(ERROR,
1476 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1477 errmsg("invalid locale name \"%s\" for builtin provider",
1478 locale)));
1479
1480 required_encoding = builtin_locale_encoding(canonical_name);
1481 if (required_encoding >= 0 && encoding != required_encoding)
1482 ereport(ERROR,
1483 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1484 errmsg("encoding \"%s\" does not match locale \"%s\"",
1486
1487 return canonical_name;
1488}
1489
1490
1491
1492/*
1493 * Return the BCP47 language tag representation of the requested locale.
1494 *
1495 * This function should be called before passing the string to ucol_open(),
1496 * because conversion to a language tag also performs "level 2
1497 * canonicalization". In addition to producing a consistent format, level 2
1498 * canonicalization is able to more accurately interpret different input
1499 * locale string formats, such as POSIX and .NET IDs.
1500 */
1501char *
1502icu_language_tag(const char *loc_str, int elevel)
1503{
1504#ifdef USE_ICU
1505 UErrorCode status;
1506 char *langtag;
1507 size_t buflen = 32; /* arbitrary starting buffer size */
1508 const bool strict = true;
1509
1510 /*
1511 * A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
1512 * RFC5646 section 4.4). Additionally, in older ICU versions,
1513 * uloc_toLanguageTag() doesn't always return the ultimate length on the
1514 * first call, necessitating a loop.
1515 */
1516 langtag = palloc(buflen);
1517 while (true)
1518 {
1519 status = U_ZERO_ERROR;
1520 uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
1521
1522 /* try again if the buffer is not large enough */
1523 if ((status == U_BUFFER_OVERFLOW_ERROR ||
1524 status == U_STRING_NOT_TERMINATED_WARNING) &&
1525 buflen < MaxAllocSize)
1526 {
1527 buflen = Min(buflen * 2, MaxAllocSize);
1528 langtag = repalloc(langtag, buflen);
1529 continue;
1530 }
1531
1532 break;
1533 }
1534
1535 if (U_FAILURE(status))
1536 {
1537 pfree(langtag);
1538
1539 if (elevel > 0)
1540 ereport(elevel,
1541 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1542 errmsg("could not convert locale name \"%s\" to language tag: %s",
1543 loc_str, u_errorName(status))));
1544 return NULL;
1545 }
1546
1547 return langtag;
1548#else /* not USE_ICU */
1549 ereport(ERROR,
1550 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1551 errmsg("ICU is not supported in this build")));
1552 return NULL; /* keep compiler quiet */
1553#endif /* not USE_ICU */
1554}
1555
1556/*
1557 * Perform best-effort check that the locale is a valid one.
1558 */
1559void
1560icu_validate_locale(const char *loc_str)
1561{
1562#ifdef USE_ICU
1563 UCollator *collator;
1564 UErrorCode status;
1565 char lang[ULOC_LANG_CAPACITY];
1566 bool found = false;
1567 int elevel = icu_validation_level;
1568
1569 /* no validation */
1570 if (elevel < 0)
1571 return;
1572
1573 /* downgrade to WARNING during pg_upgrade */
1574 if (IsBinaryUpgrade && elevel > WARNING)
1575 elevel = WARNING;
1576
1577 /* validate that we can extract the language */
1578 status = U_ZERO_ERROR;
1579 uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
1580 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
1581 {
1582 ereport(elevel,
1583 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1584 errmsg("could not get language from ICU locale \"%s\": %s",
1585 loc_str, u_errorName(status)),
1586 errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
1587 "icu_validation_level", "disabled")));
1588 return;
1589 }
1590
1591 /* check for special language name */
1592 if (strcmp(lang, "") == 0 ||
1593 strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
1594 found = true;
1595
1596 /* search for matching language within ICU */
1597 for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
1598 {
1599 const char *otherloc = uloc_getAvailable(i);
1600 char otherlang[ULOC_LANG_CAPACITY];
1601
1602 status = U_ZERO_ERROR;
1603 uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
1604 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
1605 continue;
1606
1607 if (strcmp(lang, otherlang) == 0)
1608 found = true;
1609 }
1610
1611 if (!found)
1612 ereport(elevel,
1613 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1614 errmsg("ICU locale \"%s\" has unknown language \"%s\"",
1615 loc_str, lang),
1616 errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
1617 "icu_validation_level", "disabled")));
1618
1619 /* check that it can be opened */
1620 collator = pg_ucol_open(loc_str);
1621 ucol_close(collator);
1622#else /* not USE_ICU */
1623 /* could get here if a collation was created by a build with ICU */
1624 ereport(ERROR,
1625 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1626 errmsg("ICU is not supported in this build")));
1627#endif /* not USE_ICU */
1628}
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:752
#define Min(x, y)
Definition: c.h:1004
uint32_t uint32
Definition: c.h:539
#define lengthof(array)
Definition: c.h:788
#define OidIsValid(objectId)
Definition: c.h:775
Oid collid
int errdetail(const char *fmt,...)
Definition: elog.c:1207
int errhint(const char *fmt,...)
Definition: elog.c:1321
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define PG_RE_THROW()
Definition: elog.h:405
#define DEBUG3
Definition: elog.h:28
#define FATAL
Definition: elog.h:41
#define PG_TRY(...)
Definition: elog.h:372
#define WARNING
Definition: elog.h:36
#define PG_END_TRY(...)
Definition: elog.h:397
#define ERROR
Definition: elog.h:39
#define PG_CATCH(...)
Definition: elog.h:382
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
#define MaxAllocSize
Definition: fe_memutils.h:22
bool IsBinaryUpgrade
Definition: globals.c:121
Oid MyDatabaseId
Definition: globals.c:94
#define newval
GucSource
Definition: guc.h:112
@ PGC_S_DEFAULT
Definition: guc.h:113
Assert(PointerIsAligned(start, uint64))
const char * str
#define free(a)
Definition: header.h:65
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
#define period
Definition: indent_codes.h:66
static char * locale
Definition: initdb.c:140
int i
Definition: isn.c:77
static struct pg_tm tm
Definition: localtime.c:104
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3533
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:677
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:1038
void SetMessageEncoding(int encoding)
Definition: mbutils.c:1172
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1746
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
MemoryContext TopMemoryContext
Definition: mcxt.c:166
void * palloc(Size size)
Definition: mcxt.c:1365
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static char format
FormData_pg_collation * Form_pg_collation
Definition: pg_collation.h:58
const void size_t len
FormData_pg_database * Form_pg_database
Definition: pg_database.h:96
int32 encoding
Definition: pg_database.h:41
char char_tolower(unsigned char ch, pg_locale_t locale)
Definition: pg_locale.c:1428
int icu_validation_level
Definition: pg_locale.c:88
static pg_locale_t last_collation_cache_locale
Definition: pg_locale.c:144
void cache_locale_time(void)
Definition: pg_locale.c:695
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1345
bool pg_strxfrm_enabled(pg_locale_t locale)
Definition: pg_locale.c:1304
char * localized_full_months[12+1]
Definition: pg_locale.c:100
void icu_validate_locale(const char *loc_str)
Definition: pg_locale.c:1560
pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context)
static bool CurrentLCTimeValid
Definition: pg_locale.c:109
void assign_locale_time(const char *newval, void *extra)
Definition: pg_locale.c:363
char * get_collation_actual_version(char collprovider, const char *collcollate)
Definition: pg_locale.c:1217
pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context)
bool check_locale_time(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:357
char * locale_messages
Definition: pg_locale.c:83
char * locale_numeric
Definition: pg_locale.c:85
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1166
size_t pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1255
int builtin_locale_encoding(const char *locale)
Definition: pg_locale.c:1438
size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1392
bool database_ctype_is_c
Definition: pg_locale.c:103
char * pg_perm_setlocale(int category, const char *locale)
Definition: pg_locale.c:165
#define PGLOCALE_SUPPORT_ERROR(provider)
Definition: pg_locale.c:56
static pg_locale_t create_pg_locale(Oid collid, MemoryContext context)
Definition: pg_locale.c:1042
char * locale_time
Definition: pg_locale.c:86
static void cache_single_string(char **dst, const char *src, int encoding)
Definition: pg_locale.c:672
char * get_collation_actual_version_libc(const char *collcollate)
size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1234
bool check_locale_numeric(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:345
bool char_tolower_enabled(pg_locale_t locale)
Definition: pg_locale.c:1417
static void db_encoding_convert(int encoding, char **str)
Definition: pg_locale.c:469
void assign_locale_numeric(const char *newval, void *extra)
Definition: pg_locale.c:351
bool check_locale_messages(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:379
#define MAX_L10N_DATA
Definition: pg_locale.c:65
char * get_collation_actual_version_builtin(const char *collcollate)
static void free_struct_lconv(struct lconv *s)
Definition: pg_locale.c:419
static MemoryContext CollationCacheContext
Definition: pg_locale.c:136
void assign_locale_messages(const char *newval, void *extra)
Definition: pg_locale.c:402
static bool CurrentLocaleConvValid
Definition: pg_locale.c:108
struct lconv * PGLC_localeconv(void)
Definition: pg_locale.c:499
pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context)
size_t pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1241
int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:1270
bool pg_strxfrm_prefix_enabled(pg_locale_t locale)
Definition: pg_locale.c:1356
char * icu_language_tag(const char *loc_str, int elevel)
Definition: pg_locale.c:1502
bool char_is_cased(char ch, pg_locale_t locale)
Definition: pg_locale.c:1406
char * localized_abbrev_months[12+1]
Definition: pg_locale.c:99
static pg_locale_t default_locale
Definition: pg_locale.c:105
static collation_cache_hash * CollationCache
Definition: pg_locale.c:137
int pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
Definition: pg_locale.c:1290
static bool struct_lconv_is_valid(struct lconv *s)
Definition: pg_locale.c:438
void init_database_collation(void)
Definition: pg_locale.c:1124
char * localized_full_days[7+1]
Definition: pg_locale.c:98
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1320
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:1462
size_t pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1248
void assign_locale_monetary(const char *newval, void *extra)
Definition: pg_locale.c:339
bool check_locale(int category, const char *locale, char **canonname)
Definition: pg_locale.c:268
char * localized_abbrev_days[7+1]
Definition: pg_locale.c:97
size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1367
char * locale_monetary
Definition: pg_locale.c:84
bool check_locale_monetary(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:333
static Oid last_collation_cache_oid
Definition: pg_locale.c:143
#define LOCALE_NAME_BUFLEN
Definition: pg_locale.h:27
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
void report_newlocale_failure(const char *localename)
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:72
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
#define pg_encoding_to_char
Definition: pg_wchar.h:630
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
int pg_localeconv_r(const char *lc_monetary, const char *lc_numeric, struct lconv *output)
int pg_get_encoding_from_locale(const char *ctype, bool write_message)
Definition: chklocale.c:301
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void pg_localeconv_free(struct lconv *lconv)
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
static void AssertCouldGetRelation(void)
Definition: relcache.h:44
char * quote_qualified_identifier(const char *qualifier, const char *ident)
Definition: ruleutils.c:13112
bool pg_is_ascii(const char *str)
Definition: string.c:132
Definition: pg_locale.c:114
char status
Definition: pg_locale.c:120
Oid collid
Definition: pg_locale.c:115
pg_locale_t locale
Definition: pg_locale.c:116
uint32 hash
Definition: pg_locale.c:119
const struct ctype_methods * ctype
Definition: pg_locale.h:157
const struct collate_methods * collate
Definition: pg_locale.h:156
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:220
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:595
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:625
#define locale_t
Definition: win32_port.h:432
void _dosmaperr(unsigned long)
Definition: win32error.c:177
#define setenv(x, y, z)
Definition: win32_port.h:545
#define setlocale(a, b)
Definition: win32_port.h:475