Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d203431

Browse files
committed
Add 'U'/'U#' format characters to Py_BuildValue (and thus
to PyObject_CallFunction()) that take a char * (and a size in the case of 'U#') and create a unicode object out of it. Add functions PyUnicode_FromFormat() and PyUnicode_FromFormatV() that work similar to PyString_FromFormat(), but create a unicode object (also a %U format character has been added, that takes a PyObject *, which must point to a unicode object). Change the encoding and reason attributes of UnicodeEncodeError, UnicodeDecodeError and UnicodeTranslateError to be unicode objects.
1 parent 5550731 commit d203431

6 files changed

Lines changed: 376 additions & 113 deletions

File tree

Doc/api/utilities.tex

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,15 @@ \section{Parsing arguments and building values
848848
to a Python Unicode object. If the Unicode buffer pointer
849849
is \NULL, the length is ignored and \code{None} is returned.
850850

851+
\item[\samp{U} (string) {[char *]}]
852+
Convert a null-terminated C string to a Python unicode object.
853+
If the C string pointer is \NULL, \code{None} is used.
854+
855+
\item[\samp{U\#} (string) {[char *, int]}]
856+
Convert a C string and its length to a Python unicode object.
857+
If the C string pointer is \NULL, the length is ignored and \code{None}
858+
is returned.
859+
851860
\item[\samp{i} (integer) {[int]}]
852861
Convert a plain C \ctype{int} to a Python integer object.
853862

Include/unicodeobject.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
173173
# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
174174
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
175175
# define PyUnicode_FromString PyUnicodeUCS2_FromString
176-
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
176+
# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
177+
# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
178+
# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
177179
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
178180
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
179181
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@@ -252,6 +254,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
252254
# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
253255
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
254256
# define PyUnicode_FromString PyUnicodeUCS4_FromString
257+
# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
258+
# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
259+
# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
255260
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
256261
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
257262
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
@@ -429,6 +434,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
429434
Py_ssize_t size /* size of buffer */
430435
);
431436

437+
/* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */
438+
PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
439+
const char *u, /* char buffer */
440+
Py_ssize_t size /* size of buffer */
441+
);
442+
432443
/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
433444
Latin-1 encoded bytes */
434445
PyAPI_FUNC(PyObject*) PyUnicode_FromString(
@@ -510,6 +521,9 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
510521
register PyObject *obj /* Object */
511522
);
512523

524+
PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
525+
PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
526+
513527
/* --- wchar_t support for platforms which support it --------------------- */
514528

515529
#ifdef HAVE_WCHAR_H

Lib/test/test_codeccallbacks.py

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -21,43 +21,43 @@ def handle(self, exc):
2121
# A UnicodeEncodeError object with a bad start attribute
2222
class BadStartUnicodeEncodeError(UnicodeEncodeError):
2323
def __init__(self):
24-
UnicodeEncodeError.__init__(self, str8("ascii"), "", 0, 1, str8("bad"))
24+
UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
2525
self.start = []
2626

2727
# A UnicodeEncodeError object with a bad object attribute
2828
class BadObjectUnicodeEncodeError(UnicodeEncodeError):
2929
def __init__(self):
30-
UnicodeEncodeError.__init__(self, str8("ascii"), "", 0, 1, str8("bad"))
30+
UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
3131
self.object = []
3232

3333
# A UnicodeDecodeError object without an end attribute
3434
class NoEndUnicodeDecodeError(UnicodeDecodeError):
3535
def __init__(self):
36-
UnicodeDecodeError.__init__(self, str8("ascii"), b"", 0, 1, str8("bad"))
36+
UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad")
3737
del self.end
3838

3939
# A UnicodeDecodeError object with a bad object attribute
4040
class BadObjectUnicodeDecodeError(UnicodeDecodeError):
4141
def __init__(self):
42-
UnicodeDecodeError.__init__(self, str8("ascii"), b"", 0, 1, str8("bad"))
42+
UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad")
4343
self.object = []
4444

4545
# A UnicodeTranslateError object without a start attribute
4646
class NoStartUnicodeTranslateError(UnicodeTranslateError):
4747
def __init__(self):
48-
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
48+
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
4949
del self.start
5050

5151
# A UnicodeTranslateError object without an end attribute
5252
class NoEndUnicodeTranslateError(UnicodeTranslateError):
5353
def __init__(self):
54-
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
54+
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
5555
del self.end
5656

5757
# A UnicodeTranslateError object without an object attribute
5858
class NoObjectUnicodeTranslateError(UnicodeTranslateError):
5959
def __init__(self):
60-
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
60+
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
6161
del self.object
6262

6363
class CodecCallbackTest(unittest.TestCase):
@@ -328,73 +328,73 @@ def check_exceptionobjectargs(self, exctype, args, msg):
328328
def test_unicodeencodeerror(self):
329329
self.check_exceptionobjectargs(
330330
UnicodeEncodeError,
331-
[str8("ascii"), "g\xfcrk", 1, 2, str8("ouch")],
331+
["ascii", "g\xfcrk", 1, 2, "ouch"],
332332
"'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
333333
)
334334
self.check_exceptionobjectargs(
335335
UnicodeEncodeError,
336-
[str8("ascii"), "g\xfcrk", 1, 4, str8("ouch")],
336+
["ascii", "g\xfcrk", 1, 4, "ouch"],
337337
"'ascii' codec can't encode characters in position 1-3: ouch"
338338
)
339339
self.check_exceptionobjectargs(
340340
UnicodeEncodeError,
341-
[str8("ascii"), "\xfcx", 0, 1, str8("ouch")],
341+
["ascii", "\xfcx", 0, 1, "ouch"],
342342
"'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
343343
)
344344
self.check_exceptionobjectargs(
345345
UnicodeEncodeError,
346-
[str8("ascii"), "\u0100x", 0, 1, str8("ouch")],
346+
["ascii", "\u0100x", 0, 1, "ouch"],
347347
"'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
348348
)
349349
self.check_exceptionobjectargs(
350350
UnicodeEncodeError,
351-
[str8("ascii"), "\uffffx", 0, 1, str8("ouch")],
351+
["ascii", "\uffffx", 0, 1, "ouch"],
352352
"'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
353353
)
354354
if sys.maxunicode > 0xffff:
355355
self.check_exceptionobjectargs(
356356
UnicodeEncodeError,
357-
[str8("ascii"), "\U00010000x", 0, 1, str8("ouch")],
357+
["ascii", "\U00010000x", 0, 1, "ouch"],
358358
"'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
359359
)
360360

361361
def test_unicodedecodeerror(self):
362362
self.check_exceptionobjectargs(
363363
UnicodeDecodeError,
364-
[str8("ascii"), b"g\xfcrk", 1, 2, str8("ouch")],
364+
["ascii", b"g\xfcrk", 1, 2, "ouch"],
365365
"'ascii' codec can't decode byte 0xfc in position 1: ouch"
366366
)
367367
self.check_exceptionobjectargs(
368368
UnicodeDecodeError,
369-
[str8("ascii"), b"g\xfcrk", 1, 3, str8("ouch")],
369+
["ascii", b"g\xfcrk", 1, 3, "ouch"],
370370
"'ascii' codec can't decode bytes in position 1-2: ouch"
371371
)
372372

373373
def test_unicodetranslateerror(self):
374374
self.check_exceptionobjectargs(
375375
UnicodeTranslateError,
376-
["g\xfcrk", 1, 2, str8("ouch")],
376+
["g\xfcrk", 1, 2, "ouch"],
377377
"can't translate character u'\\xfc' in position 1: ouch"
378378
)
379379
self.check_exceptionobjectargs(
380380
UnicodeTranslateError,
381-
["g\u0100rk", 1, 2, str8("ouch")],
381+
["g\u0100rk", 1, 2, "ouch"],
382382
"can't translate character u'\\u0100' in position 1: ouch"
383383
)
384384
self.check_exceptionobjectargs(
385385
UnicodeTranslateError,
386-
["g\uffffrk", 1, 2, str8("ouch")],
386+
["g\uffffrk", 1, 2, "ouch"],
387387
"can't translate character u'\\uffff' in position 1: ouch"
388388
)
389389
if sys.maxunicode > 0xffff:
390390
self.check_exceptionobjectargs(
391391
UnicodeTranslateError,
392-
["g\U00010000rk", 1, 2, str8("ouch")],
392+
["g\U00010000rk", 1, 2, "ouch"],
393393
"can't translate character u'\\U00010000' in position 1: ouch"
394394
)
395395
self.check_exceptionobjectargs(
396396
UnicodeTranslateError,
397-
["g\xfcrk", 1, 3, str8("ouch")],
397+
["g\xfcrk", 1, 3, "ouch"],
398398
"can't translate characters in position 1-2: ouch"
399399
)
400400

@@ -416,7 +416,7 @@ def test_badandgoodstrictexceptions(self):
416416
self.assertRaises(
417417
UnicodeEncodeError,
418418
codecs.strict_errors,
419-
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))
419+
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")
420420
)
421421

422422
def test_badandgoodignoreexceptions(self):
@@ -435,17 +435,17 @@ def test_badandgoodignoreexceptions(self):
435435
# If the correct exception is passed in, "ignore" returns an empty replacement
436436
self.assertEquals(
437437
codecs.ignore_errors(
438-
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
438+
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
439439
("", 1)
440440
)
441441
self.assertEquals(
442442
codecs.ignore_errors(
443-
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))),
443+
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")),
444444
("", 1)
445445
)
446446
self.assertEquals(
447447
codecs.ignore_errors(
448-
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))),
448+
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
449449
("", 1)
450450
)
451451

@@ -475,17 +475,17 @@ def test_badandgoodreplaceexceptions(self):
475475
# With the correct exception, "replace" returns an "?" or "\ufffd" replacement
476476
self.assertEquals(
477477
codecs.replace_errors(
478-
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
478+
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
479479
("?", 1)
480480
)
481481
self.assertEquals(
482482
codecs.replace_errors(
483-
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))),
483+
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")),
484484
("\ufffd", 1)
485485
)
486486
self.assertEquals(
487487
codecs.replace_errors(
488-
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))),
488+
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
489489
("\ufffd", 1)
490490
)
491491

@@ -506,19 +506,19 @@ def test_badandgoodxmlcharrefreplaceexceptions(self):
506506
self.assertRaises(
507507
TypeError,
508508
codecs.xmlcharrefreplace_errors,
509-
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))
509+
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")
510510
)
511511
self.assertRaises(
512512
TypeError,
513513
codecs.xmlcharrefreplace_errors,
514-
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))
514+
UnicodeTranslateError("\u3042", 0, 1, "ouch")
515515
)
516516
# Use the correct exception
517517
cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
518518
s = "".join(chr(c) for c in cs)
519519
self.assertEquals(
520520
codecs.xmlcharrefreplace_errors(
521-
UnicodeEncodeError(str8("ascii"), s, 0, len(s), str8("ouch"))
521+
UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
522522
),
523523
("".join("&#%d;" % ord(c) for c in s), len(s))
524524
)
@@ -540,48 +540,48 @@ def test_badandgoodbackslashreplaceexceptions(self):
540540
self.assertRaises(
541541
TypeError,
542542
codecs.backslashreplace_errors,
543-
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))
543+
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")
544544
)
545545
self.assertRaises(
546546
TypeError,
547547
codecs.backslashreplace_errors,
548-
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))
548+
UnicodeTranslateError("\u3042", 0, 1, "ouch")
549549
)
550550
# Use the correct exception
551551
self.assertEquals(
552552
codecs.backslashreplace_errors(
553-
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
553+
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
554554
("\\u3042", 1)
555555
)
556556
self.assertEquals(
557557
codecs.backslashreplace_errors(
558-
UnicodeEncodeError(str8("ascii"), "\x00", 0, 1, str8("ouch"))),
558+
UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
559559
("\\x00", 1)
560560
)
561561
self.assertEquals(
562562
codecs.backslashreplace_errors(
563-
UnicodeEncodeError(str8("ascii"), "\xff", 0, 1, str8("ouch"))),
563+
UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
564564
("\\xff", 1)
565565
)
566566
self.assertEquals(
567567
codecs.backslashreplace_errors(
568-
UnicodeEncodeError(str8("ascii"), "\u0100", 0, 1, str8("ouch"))),
568+
UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
569569
("\\u0100", 1)
570570
)
571571
self.assertEquals(
572572
codecs.backslashreplace_errors(
573-
UnicodeEncodeError(str8("ascii"), "\uffff", 0, 1, str8("ouch"))),
573+
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
574574
("\\uffff", 1)
575575
)
576576
if sys.maxunicode>0xffff:
577577
self.assertEquals(
578578
codecs.backslashreplace_errors(
579-
UnicodeEncodeError(str8("ascii"), "\U00010000", 0, 1, str8("ouch"))),
579+
UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")),
580580
("\\U00010000", 1)
581581
)
582582
self.assertEquals(
583583
codecs.backslashreplace_errors(
584-
UnicodeEncodeError(str8("ascii"), "\U0010ffff", 0, 1, str8("ouch"))),
584+
UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")),
585585
("\\U0010ffff", 1)
586586
)
587587

0 commit comments

Comments
 (0)