Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b615920

Browse files
committed
Fix memory leakage in ICU encoding conversion, and other code review.
Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/[email protected]
1 parent 8be8510 commit b615920

File tree

5 files changed

+60
-11
lines changed

5 files changed

+60
-11
lines changed

src/backend/commands/collationcmds.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,10 @@ normalize_libc_locale_name(char *new, const char *old)
381381

382382

383383
#ifdef USE_ICU
384+
/*
385+
* Get the ICU language tag for a locale name.
386+
* The result is a palloc'd string.
387+
*/
384388
static char *
385389
get_icu_language_tag(const char *localename)
386390
{
@@ -397,7 +401,10 @@ get_icu_language_tag(const char *localename)
397401
return pstrdup(buf);
398402
}
399403

400-
404+
/*
405+
* Get a comment (specifically, the display name) for an ICU locale.
406+
* The result is a palloc'd string.
407+
*/
401408
static char *
402409
get_icu_locale_comment(const char *localename)
403410
{
@@ -407,10 +414,12 @@ get_icu_locale_comment(const char *localename)
407414
char *result;
408415

409416
status = U_ZERO_ERROR;
410-
len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
417+
len_uchar = uloc_getDisplayName(localename, "en",
418+
&displayname[0], sizeof(displayname),
419+
&status);
411420
if (U_FAILURE(status))
412421
ereport(ERROR,
413-
(errmsg("could get display name for locale \"%s\": %s",
422+
(errmsg("could not get display name for locale \"%s\": %s",
414423
localename, u_errorName(status))));
415424

416425
icu_from_uchar(&result, displayname, len_uchar);

src/backend/utils/adt/formatting.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1561,6 +1561,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
15611561
len_conv = icu_convert_case(u_strToLower, mylocale,
15621562
&buff_conv, buff_uchar, len_uchar);
15631563
icu_from_uchar(&result, buff_conv, len_conv);
1564+
pfree(buff_uchar);
15641565
}
15651566
else
15661567
#endif
@@ -1684,6 +1685,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16841685
len_conv = icu_convert_case(u_strToUpper, mylocale,
16851686
&buff_conv, buff_uchar, len_uchar);
16861687
icu_from_uchar(&result, buff_conv, len_conv);
1688+
pfree(buff_uchar);
16871689
}
16881690
else
16891691
#endif
@@ -1808,6 +1810,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18081810
len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
18091811
&buff_conv, buff_uchar, len_uchar);
18101812
icu_from_uchar(&result, buff_conv, len_conv);
1813+
pfree(buff_uchar);
18111814
}
18121815
else
18131816
#endif

src/backend/utils/adt/pg_locale.c

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,18 @@ init_icu_converter(void)
14861486
icu_converter = conv;
14871487
}
14881488

1489+
/*
1490+
* Convert a string in the database encoding into a string of UChars.
1491+
*
1492+
* The source string at buff is of length nbytes
1493+
* (it needn't be nul-terminated)
1494+
*
1495+
* *buff_uchar receives a pointer to the palloc'd result string, and
1496+
* the function's result is the number of UChars generated.
1497+
*
1498+
* The result string is nul-terminated, though most callers rely on the
1499+
* result length instead.
1500+
*/
14891501
int32_t
14901502
icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
14911503
{
@@ -1494,18 +1506,30 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
14941506

14951507
init_icu_converter();
14961508

1497-
len_uchar = 2 * nbytes; /* max length per docs */
1509+
len_uchar = 2 * nbytes + 1; /* max length per docs */
14981510
*buff_uchar = palloc(len_uchar * sizeof(**buff_uchar));
14991511
status = U_ZERO_ERROR;
1500-
len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status);
1512+
len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar,
1513+
buff, nbytes, &status);
15011514
if (U_FAILURE(status))
15021515
ereport(ERROR,
15031516
(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
15041517
return len_uchar;
15051518
}
15061519

1520+
/*
1521+
* Convert a string of UChars into the database encoding.
1522+
*
1523+
* The source string at buff_uchar is of length len_uchar
1524+
* (it needn't be nul-terminated)
1525+
*
1526+
* *result receives a pointer to the palloc'd result string, and the
1527+
* function's result is the number of bytes generated (not counting nul).
1528+
*
1529+
* The result string is nul-terminated.
1530+
*/
15071531
int32_t
1508-
icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
1532+
icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
15091533
{
15101534
UErrorCode status;
15111535
int32_t len_result;
@@ -1515,13 +1539,14 @@ icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
15151539
len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter));
15161540
*result = palloc(len_result + 1);
15171541
status = U_ZERO_ERROR;
1518-
ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status);
1542+
len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1543+
buff_uchar, len_uchar, &status);
15191544
if (U_FAILURE(status))
15201545
ereport(ERROR,
15211546
(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
15221547
return len_result;
15231548
}
1524-
#endif
1549+
#endif /* USE_ICU */
15251550

15261551
/*
15271552
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.

src/backend/utils/adt/varlena.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,9 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
15691569
result = ucol_strcoll(mylocale->info.icu.ucol,
15701570
uchar1, ulen1,
15711571
uchar2, ulen2);
1572+
1573+
pfree(uchar1);
1574+
pfree(uchar2);
15721575
}
15731576
#else /* not USE_ICU */
15741577
/* shouldn't happen */
@@ -2155,6 +2158,9 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
21552158
result = ucol_strcoll(sss->locale->info.icu.ucol,
21562159
uchar1, ulen1,
21572160
uchar2, ulen2);
2161+
2162+
pfree(uchar1);
2163+
pfree(uchar2);
21582164
}
21592165
#else /* not USE_ICU */
21602166
/* shouldn't happen */
@@ -2279,7 +2285,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
22792285
Size bsize;
22802286
#ifdef USE_ICU
22812287
int32_t ulen = -1;
2282-
UChar *uchar;
2288+
UChar *uchar = NULL;
22832289
#endif
22842290

22852291
/*
@@ -2354,7 +2360,8 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
23542360
&status);
23552361
if (U_FAILURE(status))
23562362
ereport(ERROR,
2357-
(errmsg("sort key generation failed: %s", u_errorName(status))));
2363+
(errmsg("sort key generation failed: %s",
2364+
u_errorName(status))));
23582365
}
23592366
else
23602367
bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
@@ -2394,6 +2401,11 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
23942401
* okay. See remarks on bytea case above.)
23952402
*/
23962403
memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2404+
2405+
#ifdef USE_ICU
2406+
if (uchar)
2407+
pfree(uchar);
2408+
#endif
23972409
}
23982410

23992411
/*

src/include/utils/pg_locale.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol
9393

9494
#ifdef USE_ICU
9595
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
96-
extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar);
96+
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
9797
#endif
9898

9999
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */

0 commit comments

Comments
 (0)