Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 92fab75

Browse files
committed
Use wchar_t functions in _locale module.
1 parent 8e925b9 commit 92fab75

5 files changed

Lines changed: 96 additions & 45 deletions

File tree

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ What's New in Python 3.0a4?
99

1010
*Release date: XX-XXX-2008*
1111

12+
Extension Modules
13+
-----------------
14+
15+
- Use wchar_t functions in _locale module.
1216

1317

1418
What's New in Python 3.0a3?

Modules/_localemodule.c

Lines changed: 85 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/***********************************************************
2-
Copyright (C) 1997, 2002, 2003, 2007 Martin von Loewis
2+
Copyright (C) 1997, 2002, 2003, 2007, 2008 Martin von Loewis
33
44
Permission to use, copy, modify, and distribute this software and its
55
documentation for any purpose and without fee is hereby granted,
@@ -45,6 +45,35 @@ PyDoc_STRVAR(locale__doc__, "Support for POSIX locales.");
4545

4646
static PyObject *Error;
4747

48+
/* Convert a char* to a Unicode object according to the current locale */
49+
static PyObject*
50+
str2uni(const char* s)
51+
{
52+
size_t needed = mbstowcs(NULL, s, 0);
53+
size_t res1;
54+
wchar_t smallbuf[30];
55+
wchar_t *dest;
56+
PyObject *res2;
57+
if (needed == (size_t)-1) {
58+
PyErr_SetString(PyExc_ValueError, "Cannot convert byte to string");
59+
return NULL;
60+
}
61+
if (needed < sizeof(smallbuf))
62+
dest = smallbuf;
63+
else {
64+
dest = PyMem_Malloc(needed+1);
65+
if (!dest)
66+
return PyErr_NoMemory();
67+
}
68+
/* This shouldn't fail now */
69+
res1 = mbstowcs(dest, s, needed+1);
70+
assert(res == needed);
71+
res2 = PyUnicode_FromWideChar(dest, res1);
72+
if (dest != smallbuf)
73+
PyMem_Free(dest);
74+
return res2;
75+
}
76+
4877
/* support functions for formatting floating point numbers */
4978

5079
PyDoc_STRVAR(setlocale__doc__,
@@ -107,7 +136,7 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
107136
PyErr_SetString(Error, "unsupported locale setting");
108137
return NULL;
109138
}
110-
result_object = PyUnicode_FromString(result);
139+
result_object = str2uni(result);
111140
if (!result_object)
112141
return NULL;
113142
} else {
@@ -117,7 +146,7 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
117146
PyErr_SetString(Error, "locale query failed");
118147
return NULL;
119148
}
120-
result_object = PyUnicode_FromString(result);
149+
result_object = str2uni(result);
121150
}
122151
return result_object;
123152
}
@@ -143,7 +172,7 @@ PyLocale_localeconv(PyObject* self)
143172
involved herein */
144173

145174
#define RESULT_STRING(s)\
146-
x = PyUnicode_DecodeUnicodeEscape(l->s, strlen(l->s), "strict");\
175+
x = str2uni(l->s); \
147176
if (!x) goto failed;\
148177
PyDict_SetItemString(result, #s, x);\
149178
Py_XDECREF(x)
@@ -191,29 +220,19 @@ PyLocale_localeconv(PyObject* self)
191220
return NULL;
192221
}
193222

223+
#if defined(HAVE_WCSCOLL)
194224
PyDoc_STRVAR(strcoll__doc__,
195225
"string,string -> int. Compares two strings according to the locale.");
196226

197227
static PyObject*
198228
PyLocale_strcoll(PyObject* self, PyObject* args)
199229
{
200-
#if !defined(HAVE_WCSCOLL)
201-
char *s1,*s2;
202-
203-
if (!PyArg_ParseTuple(args, "ss:strcoll", &s1, &s2))
204-
return NULL;
205-
return PyLong_FromLong(strcoll(s1, s2));
206-
#else
207230
PyObject *os1, *os2, *result = NULL;
208231
wchar_t *ws1 = NULL, *ws2 = NULL;
209-
int len1, len2;
232+
Py_ssize_t len1, len2;
210233

211-
if (!PyArg_UnpackTuple(args, "strcoll", 2, 2, &os1, &os2))
234+
if (!PyArg_ParseTuple(args, "UU:strcoll", &os1, &os2))
212235
return NULL;
213-
/* Both arguments must be unicode, or it's an error. */
214-
if (!PyUnicode_Check(os1) || !PyUnicode_Check(os2)) {
215-
PyErr_SetString(PyExc_ValueError, "strcoll arguments must be strings");
216-
}
217236
/* Convert the unicode strings to wchar[]. */
218237
len1 = PyUnicode_GET_SIZE(os1) + 1;
219238
ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t));
@@ -240,40 +259,62 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
240259
if (ws1) PyMem_FREE(ws1);
241260
if (ws2) PyMem_FREE(ws2);
242261
return result;
243-
#endif
244262
}
263+
#endif
245264

246-
265+
#ifdef HAVE_WCSXFRM
247266
PyDoc_STRVAR(strxfrm__doc__,
248267
"string -> string. Returns a string that behaves for cmp locale-aware.");
249268

250269
static PyObject*
251270
PyLocale_strxfrm(PyObject* self, PyObject* args)
252271
{
253-
char *s, *buf;
272+
Py_UNICODE *s0;
273+
Py_ssize_t n0;
274+
wchar_t *s, *buf = NULL;
254275
size_t n1, n2;
255-
PyObject *result;
276+
PyObject *result = NULL;
277+
Py_ssize_t i;
256278

257-
if (!PyArg_ParseTuple(args, "s:strxfrm", &s))
279+
if (!PyArg_ParseTuple(args, "u#:strxfrm", &s0, &n0))
258280
return NULL;
259281

282+
#ifdef HAVE_USABLE_WCHAR_T
283+
s = s0;
284+
#else
285+
s = PyMem_Malloc(n0+1);
286+
if (!s)
287+
return PyErr_NoMemory();
288+
for (i=0; i<=n0; i++)
289+
s[i] = s0[i];
290+
#endif
291+
260292
/* assume no change in size, first */
261-
n1 = strlen(s) + 1;
293+
n1 = wcslen(s) + 1;
262294
buf = PyMem_Malloc(n1);
263-
if (!buf)
264-
return PyErr_NoMemory();
265-
n2 = strxfrm(buf, s, n1) + 1;
266-
if (n2 > n1) {
295+
if (!buf) {
296+
PyErr_NoMemory();
297+
goto exit;
298+
}
299+
n2 = wcsxfrm(buf, s, n1);
300+
if (n2 >= n1) {
267301
/* more space needed */
268-
buf = PyMem_Realloc(buf, n2);
269-
if (!buf)
270-
return PyErr_NoMemory();
271-
strxfrm(buf, s, n2);
302+
buf = PyMem_Realloc(buf, n2+1);
303+
if (!buf) {
304+
PyErr_NoMemory();
305+
goto exit;
306+
}
307+
n2 = wcsxfrm(buf, s, n2);
272308
}
273-
result = PyUnicode_FromString(buf);
274-
PyMem_Free(buf);
309+
result = PyUnicode_FromWideChar(buf, n2);
310+
exit:
311+
if (buf) PyMem_Free(buf);
312+
#ifdef HAVE_USABLE_WCHAR_T
313+
PyMem_Free(s);
314+
#endif
275315
return result;
276316
}
317+
#endif
277318

278319
#if defined(MS_WINDOWS)
279320
static PyObject*
@@ -472,9 +513,7 @@ PyLocale_nl_langinfo(PyObject* self, PyObject* args)
472513
instead of an empty string for nl_langinfo(ERA). */
473514
const char *result = nl_langinfo(item);
474515
result = result != NULL ? result : "";
475-
/* XXX may have to convert this to wcs first. */
476-
return PyUnicode_DecodeUnicodeEscape(result, strlen(result),
477-
"strict");
516+
return str2uni(result);
478517
}
479518
PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant");
480519
return NULL;
@@ -493,7 +532,7 @@ PyIntl_gettext(PyObject* self, PyObject *args)
493532
char *in;
494533
if (!PyArg_ParseTuple(args, "z", &in))
495534
return 0;
496-
return PyUnicode_FromString(gettext(in));
535+
return str2uni(gettext(in));
497536
}
498537

499538
PyDoc_STRVAR(dgettext__doc__,
@@ -506,7 +545,7 @@ PyIntl_dgettext(PyObject* self, PyObject *args)
506545
char *domain, *in;
507546
if (!PyArg_ParseTuple(args, "zz", &domain, &in))
508547
return 0;
509-
return PyUnicode_FromString(dgettext(domain, in));
548+
return str2uni(dgettext(domain, in));
510549
}
511550

512551
PyDoc_STRVAR(dcgettext__doc__,
@@ -520,7 +559,7 @@ PyIntl_dcgettext(PyObject *self, PyObject *args)
520559
int category;
521560
if (!PyArg_ParseTuple(args, "zzi", &domain, &msgid, &category))
522561
return 0;
523-
return PyUnicode_FromString(dcgettext(domain,msgid,category));
562+
return str2uni(dcgettext(domain,msgid,category));
524563
}
525564

526565
PyDoc_STRVAR(textdomain__doc__,
@@ -538,7 +577,7 @@ PyIntl_textdomain(PyObject* self, PyObject* args)
538577
PyErr_SetFromErrno(PyExc_OSError);
539578
return NULL;
540579
}
541-
return PyUnicode_FromString(domain);
580+
return str2uni(domain);
542581
}
543582

544583
PyDoc_STRVAR(bindtextdomain__doc__,
@@ -556,7 +595,7 @@ PyIntl_bindtextdomain(PyObject* self,PyObject*args)
556595
PyErr_SetFromErrno(PyExc_OSError);
557596
return NULL;
558597
}
559-
return PyUnicode_FromString(dirname);
598+
return str2uni(dirname);
560599
}
561600

562601
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
@@ -572,7 +611,7 @@ PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args)
572611
return NULL;
573612
codeset = bind_textdomain_codeset(domain, codeset);
574613
if (codeset)
575-
return PyUnicode_FromString(codeset);
614+
return str2uni(codeset);
576615
Py_RETURN_NONE;
577616
}
578617
#endif
@@ -584,10 +623,14 @@ static struct PyMethodDef PyLocale_Methods[] = {
584623
METH_VARARGS, setlocale__doc__},
585624
{"localeconv", (PyCFunction) PyLocale_localeconv,
586625
METH_NOARGS, localeconv__doc__},
626+
#ifdef HAVE_WCSCOLL
587627
{"strcoll", (PyCFunction) PyLocale_strcoll,
588628
METH_VARARGS, strcoll__doc__},
629+
#endif
630+
#ifdef HAVE_WCSXFRM
589631
{"strxfrm", (PyCFunction) PyLocale_strxfrm,
590632
METH_VARARGS, strxfrm__doc__},
633+
#endif
591634
#if defined(MS_WINDOWS) || defined(__APPLE__)
592635
{"_getdefaultlocale", (PyCFunction) PyLocale_getdefaultlocale, METH_NOARGS},
593636
#endif

configure

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#! /bin/sh
2-
# From configure.in Revision: 60787 .
2+
# From configure.in Revision: 61238 .
33
# Guess values for system-dependent variables and create Makefiles.
44
# Generated by GNU Autoconf 2.61 for python 3.0.
55
#
@@ -15713,6 +15713,7 @@ echo "${ECHO_T}MACHDEP_OBJS" >&6; }
1571315713

1571415714

1571515715

15716+
1571615717

1571715718

1571815719
for ac_func in alarm bind_textdomain_codeset chown clock confstr \
@@ -15726,7 +15727,7 @@ for ac_func in alarm bind_textdomain_codeset chown clock confstr \
1572615727
setlocale setregid setreuid setsid setpgid setpgrp setuid setvbuf snprintf \
1572715728
sigaction siginterrupt sigrelse strftime strlcpy \
1572815729
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
15729-
truncate uname unsetenv utimes waitpid wait3 wait4 wcscoll _getpty
15730+
truncate uname unsetenv utimes waitpid wait3 wait4 wcscoll wcsxfrm _getpty
1573015731
do
1573115732
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
1573215733
{ echo "$as_me:$LINENO: checking for $ac_func" >&5

configure.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2284,7 +2284,7 @@ AC_CHECK_FUNCS(alarm bind_textdomain_codeset chown clock confstr \
22842284
setlocale setregid setreuid setsid setpgid setpgrp setuid setvbuf snprintf \
22852285
sigaction siginterrupt sigrelse strftime strlcpy \
22862286
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
2287-
truncate uname unsetenv utimes waitpid wait3 wait4 wcscoll _getpty)
2287+
truncate uname unsetenv utimes waitpid wait3 wait4 wcscoll wcsxfrm _getpty)
22882288

22892289
# For some functions, having a definition is not sufficient, since
22902290
# we want to take their address.

pyconfig.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,9 @@
776776
/* Define to 1 if you have the `wcscoll' function. */
777777
#undef HAVE_WCSCOLL
778778

779+
/* Define to 1 if you have the `wcsxfrm' function. */
780+
#undef HAVE_WCSXFRM
781+
779782
/* Define if tzset() actually switches the local timezone in a meaningful way.
780783
*/
781784
#undef HAVE_WORKING_TZSET

0 commit comments

Comments
 (0)