Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c2d4526

Browse files
committed
Tighten the check in initdb and CREATE DATABASE that the chosen encoding
matches the encoding of the locale. LC_COLLATE is now checked in addition to LC_CTYPE.
1 parent 61d9674 commit c2d4526

File tree

3 files changed

+81
-52
lines changed

3 files changed

+81
-52
lines changed

doc/src/sgml/charset.sgml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.88 2008/09/23 09:20:34 heikki Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.89 2008/09/23 10:58:03 heikki Exp $ -->
22

33
<chapter id="charset">
44
<title>Localization</>
@@ -320,10 +320,10 @@ initdb --locale=sv_SE
320320

321321
<para>
322322
An important restriction, however, is that each database's character set
323-
must be compatible with the database's <envar>LC_CTYPE</> setting.
324-
When <envar>LC_CTYPE</> is <literal>C</> or <literal>POSIX</>, any
325-
character set is allowed, but for other settings of <envar>LC_CTYPE</>
326-
there is only one character set that will work correctly.
323+
must be compatible with the database's <envar>LC_CTYPE</> and
324+
<envvar>LC_COLLATE</> locale settings. For <literal>C</> or
325+
<literal>POSIX</> locale, any character set is allowed, but for other
326+
locales there is only one character set that will work correctly.
327327
</para>
328328

329329
<sect2 id="multibyte-charset-supported">

src/backend/commands/dbcommands.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
*
1515
* IDENTIFICATION
16-
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.211 2008/09/23 09:20:35 heikki Exp $
16+
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.212 2008/09/23 10:58:03 heikki Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -118,6 +118,7 @@ createdb(const CreatedbStmt *stmt)
118118
int encoding = -1;
119119
int dbconnlimit = -1;
120120
int ctype_encoding;
121+
int collate_encoding;
121122
int notherbackends;
122123
int npreparedxacts;
123124
createdb_failure_params fparms;
@@ -334,6 +335,7 @@ createdb(const CreatedbStmt *stmt)
334335
* Note: if you change this policy, fix initdb to match.
335336
*/
336337
ctype_encoding = pg_get_encoding_from_locale(dbctype);
338+
collate_encoding = pg_get_encoding_from_locale(dbcollate);
337339

338340
if (!(ctype_encoding == encoding ||
339341
ctype_encoding == PG_SQL_ASCII ||
@@ -345,9 +347,22 @@ createdb(const CreatedbStmt *stmt)
345347
(errmsg("encoding %s does not match locale %s",
346348
pg_encoding_to_char(encoding),
347349
dbctype),
348-
errdetail("The chosen LC_CTYPE setting requires encoding %s.",
350+
errdetail("The chosen CTYPE setting requires encoding %s.",
349351
pg_encoding_to_char(ctype_encoding))));
350352

353+
if (!(collate_encoding == encoding ||
354+
collate_encoding == PG_SQL_ASCII ||
355+
#ifdef WIN32
356+
encoding == PG_UTF8 ||
357+
#endif
358+
(encoding == PG_SQL_ASCII && superuser())))
359+
ereport(ERROR,
360+
(errmsg("encoding %s does not match locale %s",
361+
pg_encoding_to_char(encoding),
362+
dbcollate),
363+
errdetail("The chosen COLLATE setting requires encoding %s.",
364+
pg_encoding_to_char(collate_encoding))));
365+
351366
/*
352367
* Check that the new locale is compatible with the source database.
353368
*

src/bin/initdb/initdb.c

Lines changed: 59 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
* Portions Copyright (c) 1994, Regents of the University of California
4343
* Portions taken from FreeBSD.
4444
*
45-
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.160 2008/09/23 09:20:37 heikki Exp $
45+
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.161 2008/09/23 10:58:03 heikki Exp $
4646
*
4747
*-------------------------------------------------------------------------
4848
*/
@@ -188,7 +188,8 @@ static void trapsig(int signum);
188188
static void check_ok(void);
189189
static char *escape_quotes(const char *src);
190190
static int locale_date_order(const char *locale);
191-
static bool chklocale(const char *locale);
191+
static bool check_locale_name(const char *locale);
192+
static bool check_locale_encoding(const char *locale, int encoding);
192193
static void setlocales(void);
193194
static void usage(const char *progname);
194195

@@ -2187,7 +2188,7 @@ locale_date_order(const char *locale)
21872188
* this should match the backend check_locale() function
21882189
*/
21892190
static bool
2190-
chklocale(const char *locale)
2191+
check_locale_name(const char *locale)
21912192
{
21922193
bool ret;
21932194
int category = LC_CTYPE;
@@ -2211,6 +2212,50 @@ chklocale(const char *locale)
22112212
return ret;
22122213
}
22132214

2215+
/*
2216+
* check if the chosen encoding matches the encoding required by the locale
2217+
*
2218+
* this should match the similar check in the backend createdb() function
2219+
*/
2220+
static bool
2221+
check_locale_encoding(const char *locale, int user_enc)
2222+
{
2223+
int locale_enc;
2224+
2225+
locale_enc = pg_get_encoding_from_locale(locale);
2226+
2227+
/* We allow selection of SQL_ASCII --- see notes in createdb() */
2228+
if (!(locale_enc == user_enc ||
2229+
locale_enc == PG_SQL_ASCII ||
2230+
user_enc == PG_SQL_ASCII
2231+
#ifdef WIN32
2232+
2233+
/*
2234+
* On win32, if the encoding chosen is UTF8, all locales are OK
2235+
* (assuming the actual locale name passed the checks above). This is
2236+
* because UTF8 is a pseudo-codepage, that we convert to UTF16 before
2237+
* doing any operations on, and UTF16 supports all locales.
2238+
*/
2239+
|| user_enc == PG_UTF8
2240+
#endif
2241+
))
2242+
{
2243+
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
2244+
fprintf(stderr,
2245+
_("The encoding you selected (%s) and the encoding that the\n"
2246+
"selected locale uses (%s) do not match. This would lead to\n"
2247+
"misbehavior in various character string processing functions.\n"
2248+
"Rerun %s and either do not specify an encoding explicitly,\n"
2249+
"or choose a matching combination.\n"),
2250+
pg_encoding_to_char(user_enc),
2251+
pg_encoding_to_char(locale_enc),
2252+
progname);
2253+
return false;
2254+
}
2255+
return true;
2256+
}
2257+
2258+
22142259
/*
22152260
* set up the locale variables
22162261
*
@@ -2241,17 +2286,17 @@ setlocales(void)
22412286
* override absent/invalid config settings from initdb's locale settings
22422287
*/
22432288

2244-
if (strlen(lc_ctype) == 0 || !chklocale(lc_ctype))
2289+
if (strlen(lc_ctype) == 0 || !check_locale_name(lc_ctype))
22452290
lc_ctype = xstrdup(setlocale(LC_CTYPE, NULL));
2246-
if (strlen(lc_collate) == 0 || !chklocale(lc_collate))
2291+
if (strlen(lc_collate) == 0 || !check_locale_name(lc_collate))
22472292
lc_collate = xstrdup(setlocale(LC_COLLATE, NULL));
2248-
if (strlen(lc_numeric) == 0 || !chklocale(lc_numeric))
2293+
if (strlen(lc_numeric) == 0 || !check_locale_name(lc_numeric))
22492294
lc_numeric = xstrdup(setlocale(LC_NUMERIC, NULL));
2250-
if (strlen(lc_time) == 0 || !chklocale(lc_time))
2295+
if (strlen(lc_time) == 0 || !check_locale_name(lc_time))
22512296
lc_time = xstrdup(setlocale(LC_TIME, NULL));
2252-
if (strlen(lc_monetary) == 0 || !chklocale(lc_monetary))
2297+
if (strlen(lc_monetary) == 0 || !check_locale_name(lc_monetary))
22532298
lc_monetary = xstrdup(setlocale(LC_MONETARY, NULL));
2254-
if (strlen(lc_messages) == 0 || !chklocale(lc_messages))
2299+
if (strlen(lc_messages) == 0 || !check_locale_name(lc_messages))
22552300
#if defined(LC_MESSAGES) && !defined(WIN32)
22562301
{
22572302
/* when available get the current locale setting */
@@ -2452,6 +2497,7 @@ main(int argc, char *argv[])
24522497
* environment */
24532498
char bin_dir[MAXPGPATH];
24542499
char *pg_data_native;
2500+
int user_enc;
24552501

24562502
#ifdef WIN32
24572503
char *restrict_env;
@@ -2868,44 +2914,12 @@ main(int argc, char *argv[])
28682914
}
28692915
}
28702916
else
2871-
{
2872-
int user_enc;
2873-
int ctype_enc;
2874-
28752917
encodingid = get_encoding_id(encoding);
2876-
user_enc = atoi(encodingid);
2877-
2878-
ctype_enc = pg_get_encoding_from_locale(lc_ctype);
28792918

2880-
/* We allow selection of SQL_ASCII --- see notes in createdb() */
2881-
if (!(ctype_enc == user_enc ||
2882-
ctype_enc == PG_SQL_ASCII ||
2883-
user_enc == PG_SQL_ASCII
2884-
#ifdef WIN32
2885-
2886-
/*
2887-
* On win32, if the encoding chosen is UTF8, all locales are OK
2888-
* (assuming the actual locale name passed the checks above). This is
2889-
* because UTF8 is a pseudo-codepage, that we convert to UTF16 before
2890-
* doing any operations on, and UTF16 supports all locales.
2891-
*/
2892-
|| user_enc == PG_UTF8
2893-
#endif
2894-
))
2895-
{
2896-
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
2897-
fprintf(stderr,
2898-
_("The encoding you selected (%s) and the encoding that the\n"
2899-
"selected locale uses (%s) do not match. This would lead to\n"
2900-
"misbehavior in various character string processing functions.\n"
2901-
"Rerun %s and either do not specify an encoding explicitly,\n"
2902-
"or choose a matching combination.\n"),
2903-
pg_encoding_to_char(user_enc),
2904-
pg_encoding_to_char(ctype_enc),
2905-
progname);
2906-
exit(1);
2907-
}
2908-
}
2919+
user_enc = atoi(encodingid);
2920+
if (!check_locale_encoding(lc_ctype, user_enc) ||
2921+
!check_locale_encoding(lc_collate, user_enc))
2922+
exit(1); /* check_locale_encoding printed the error */
29092923

29102924
if (strlen(default_text_search_config) == 0)
29112925
{

0 commit comments

Comments
 (0)