From da93f2f18d37c415e5f9d5bafdfa6e6cab36ded2 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 14:14:32 +0800 Subject: [PATCH 01/15] Raise `TypeError` for duplicate/unknown fields in PyStructSequence constructor --- Objects/structseq.c | 56 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/Objects/structseq.c b/Objects/structseq.c index 0ca622edc2ba37..829cfaab84fd4a 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -216,10 +216,62 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) res->ob_item[i] = Py_NewRef(v); } Py_DECREF(arg); - for (; i < max_len; ++i) { + if (dict != NULL) { + for (i = 0; i < len; ++i) { + const char *name = type->tp_members[i < min_len ? i : i - n_unnamed_fields].name; + PyObject *ob = NULL; + if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { + Py_DECREF(res); + return NULL; + } + if (ob != NULL) { + Py_DECREF(ob); + PyErr_Format(PyExc_TypeError, + "%.500s() got multiple values for field '%s'", + type->tp_name, name); + Py_DECREF(res); + return NULL; + } + } + PyObject *unknown_keys = PySet_New(dict); + if (unknown_keys == NULL) { + Py_DECREF(res); + return NULL; + } + for (i = len; i < max_len; ++i) { + PyObject *ob = NULL; + const char *name = type->tp_members[i - n_unnamed_fields].name; + PyObject *key = PyUnicode_FromString(name); + if (!key || PyDict_GetItemRef(dict, key, &ob) < 0) { + Py_XDECREF(key); + Py_DECREF(res); + Py_DECREF(unknown_keys); + return NULL; + } + if (ob != NULL) { + Py_DECREF(ob); + if (PySet_Discard(unknown_keys, key) < 0) { + Py_DECREF(key); + Py_DECREF(res); + Py_DECREF(unknown_keys); + return NULL; + } + } + Py_DECREF(key); + } + if (PySet_GET_SIZE(unknown_keys) > 0) { + PyErr_Format(PyExc_TypeError, + "%.500s() got unexpected field name(s): %R", + type->tp_name, unknown_keys); + Py_DECREF(res); + Py_DECREF(unknown_keys); + return NULL; + } + } + for (i = len; i < max_len; ++i) { PyObject *ob = NULL; if (dict != NULL) { - const char *name = type->tp_members[i-n_unnamed_fields].name; + const char *name = type->tp_members[i - n_unnamed_fields].name; if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { Py_DECREF(res); return NULL; From 9632370867ada1a87a670e599735017bbd9765b9 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 06:19:11 +0000 Subject: [PATCH 02/15] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/C API/2023-10-03-06-19-10.gh-issue-110235.uec5AG.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/C API/2023-10-03-06-19-10.gh-issue-110235.uec5AG.rst diff --git a/Misc/NEWS.d/next/C API/2023-10-03-06-19-10.gh-issue-110235.uec5AG.rst b/Misc/NEWS.d/next/C API/2023-10-03-06-19-10.gh-issue-110235.uec5AG.rst new file mode 100644 index 00000000000000..ff26f25fe71d61 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-03-06-19-10.gh-issue-110235.uec5AG.rst @@ -0,0 +1,2 @@ +Raise :exc:`TypeError` for duplicate/unknown fields in ``PyStructSequence`` constructor. +Patched by Xuehai Pan. From 464cbaf579cacdf2289ab319a37f8a04254e98ad Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 14:33:37 +0800 Subject: [PATCH 03/15] Add test for duplicate/unknown fields in PyStructSequence constructor --- Lib/test/test_structseq.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index a9fe193028ebe4..7280c505b5cdb5 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -1,4 +1,5 @@ import os +import re import time import unittest @@ -93,6 +94,28 @@ def test_constructor(self): s = "123456789" self.assertEqual("".join(t(s)), s) + def test_constructor_with_duplicate_fields(self): + t = time.struct_time + self.assertEqual(t("123456789"), tuple("123456789")) + self.assertEqual(t("123456789", dict={"tm_zone": "some zone"}), tuple("123456789")) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): + t("123456789", dict={"tm_year": 0}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): + t("123456789", dict={"tm_year": 0, "tm_mon": 1}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): + t("123456789", dict={"tm_zone": 'some zone', "tm_mon": 1}) + + def test_constructor_with_unknown_fields(self): + t = time.struct_time + with self.assertRaisesRegex(TypeError, + re.escape("got unexpected field name(s): {'error'}")): + t("123456789", dict={"error": 0}) + with self.assertRaisesRegex(TypeError, + re.escape("got unexpected field name(s): {'error'}")): + t("123456789", dict={"tm_zone": 'some zone', "error": 0}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): + t("123456789", dict={"tm_year": 0, "tm_zone": 'some zone', "error": 0}) + def test_eviltuple(self): class Exc(Exception): pass From 11a8435f7ca60f30f8ba7c2f78174373eaa46149 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 14:38:20 +0800 Subject: [PATCH 04/15] Add test for duplicate/unknown fields in PyStructSequence constructor --- Lib/test/test_structseq.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index 7280c505b5cdb5..5ec2192fc4e3e6 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -103,7 +103,11 @@ def test_constructor_with_duplicate_fields(self): with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): t("123456789", dict={"tm_year": 0, "tm_mon": 1}) with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): - t("123456789", dict={"tm_zone": 'some zone', "tm_mon": 1}) + t("123456789", dict={"tm_zone": "some zone", "tm_mon": 1}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): + t("123456789", dict={"tm_zone": "some zone", "tm_mon": 1, "error": 0}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): + t("123456789", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1}) def test_constructor_with_unknown_fields(self): t = time.struct_time @@ -112,9 +116,7 @@ def test_constructor_with_unknown_fields(self): t("123456789", dict={"error": 0}) with self.assertRaisesRegex(TypeError, re.escape("got unexpected field name(s): {'error'}")): - t("123456789", dict={"tm_zone": 'some zone', "error": 0}) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): - t("123456789", dict={"tm_year": 0, "tm_zone": 'some zone', "error": 0}) + t("123456789", dict={"tm_zone": "some zone", "error": 0}) def test_eviltuple(self): class Exc(Exception): From 9fc15015b2714931cd75a0a72f94ee457cf97e43 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 15:18:23 +0800 Subject: [PATCH 05/15] Handle case for PyStructSequence with unnamed fields --- Lib/test/test_structseq.py | 16 +++++++++++++++- Objects/structseq.c | 5 +++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index 5ec2192fc4e3e6..6aafd17a43d219 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -133,7 +133,21 @@ def __len__(self): def test_reduce(self): t = time.gmtime() - x = t.__reduce__() + cls, (tup, dct) = t.__reduce__() + self.assertIs(cls, time.struct_time) + self.assertEqual(tup, tuple(t)) + cls(tup, dct) + + def test_reduce_with_unnamed_fields(self): + r = os.stat_result(range(os.stat_result.n_sequence_fields), {'st_atime': 1.0}) + self.assertEqual(r.st_atime, 1.0) + cls, (tup, dct) = r.__reduce__() + self.assertIs(cls, os.stat_result) + self.assertEqual(tup, tuple(r)) + self.assertIn('st_atime', dct) + reconstructed = cls(tup, dct) + self.assertEqual(reconstructed, r) + self.assertAlmostEqual(reconstructed.st_atime, r.st_atime) def test_extended_getslice(self): # Test extended slicing by comparing with list slicing. diff --git a/Objects/structseq.c b/Objects/structseq.c index 829cfaab84fd4a..33a7e53e94d02a 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -218,6 +218,11 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) Py_DECREF(arg); if (dict != NULL) { for (i = 0; i < len; ++i) { + // unnamed fields can be present in both sequence and dict + if (i >= min_len - n_unnamed_fields && i < min_len + n_unnamed_fields) { + continue; + } + const char *name = type->tp_members[i < min_len ? i : i - n_unnamed_fields].name; PyObject *ob = NULL; if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { From 981e40371a20f153abe34137b02211e90414eb42 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 15:27:25 +0800 Subject: [PATCH 06/15] Handle case for PyStructSequence with unnamed fields --- Lib/test/test_structseq.py | 15 +++++++++++++++ Objects/structseq.c | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index 6aafd17a43d219..c662ea37b04e09 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -109,6 +109,21 @@ def test_constructor_with_duplicate_fields(self): with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): t("123456789", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1}) + def test_constructor_with_duplicate_unnamed_fields(self): + assert os.stat_result.n_unnamed_fields > 0 + n_visible_fields = os.stat_result.n_sequence_fields + + r = os.stat_result(range(n_visible_fields), {'st_atime': -1.0}) + self.assertEqual(r.st_atime, -1.0) + self.assertEqual(r, tuple(range(n_visible_fields))) + + r = os.stat_result((*range(n_visible_fields), -1.0)) + self.assertEqual(r.st_atime, -1.0) + self.assertEqual(r, tuple(range(n_visible_fields))) + + with self.assertRaisesRegex(TypeError, "got multiple values for field 'st_atime'"): + os.stat_result((*range(n_visible_fields), -1.0), {'st_atime': -1.0}) + def test_constructor_with_unknown_fields(self): t = time.struct_time with self.assertRaisesRegex(TypeError, diff --git a/Objects/structseq.c b/Objects/structseq.c index 33a7e53e94d02a..2adf8010e51eef 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -219,7 +219,7 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) if (dict != NULL) { for (i = 0; i < len; ++i) { // unnamed fields can be present in both sequence and dict - if (i >= min_len - n_unnamed_fields && i < min_len + n_unnamed_fields) { + if (i >= min_len - n_unnamed_fields && i < min_len) { continue; } @@ -243,7 +243,7 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) Py_DECREF(res); return NULL; } - for (i = len; i < max_len; ++i) { + for (i = min_len; i < max_len; ++i) { PyObject *ob = NULL; const char *name = type->tp_members[i - n_unnamed_fields].name; PyObject *key = PyUnicode_FromString(name); From 67954db973d0a5d5f18de9cbee66ca5b18c2214f Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 15:51:18 +0800 Subject: [PATCH 07/15] Add more test cases --- Lib/test/test_structseq.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index c662ea37b04e09..3242b8dadc7276 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -97,7 +97,11 @@ def test_constructor(self): def test_constructor_with_duplicate_fields(self): t = time.struct_time self.assertEqual(t("123456789"), tuple("123456789")) + self.assertEqual(t("1234567890"), tuple("123456789")) + self.assertEqual(t("1234567890").tm_zone, "0") self.assertEqual(t("123456789", dict={"tm_zone": "some zone"}), tuple("123456789")) + self.assertEqual(t("123456789", dict={"tm_zone": "some zone"}).tm_zone, "some zone") + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): t("123456789", dict={"tm_year": 0}) with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): @@ -108,6 +112,10 @@ def test_constructor_with_duplicate_fields(self): t("123456789", dict={"tm_zone": "some zone", "tm_mon": 1, "error": 0}) with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): t("123456789", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): + t("1234567890", dict={"tm_zone": "some zone"}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): + t("1234567890", dict={"error": 0, "tm_zone": "some zone"}) def test_constructor_with_duplicate_unnamed_fields(self): assert os.stat_result.n_unnamed_fields > 0 From 9c3f7d2bddc2cab978d4ed4513e68b029548e1b7 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 16:02:06 +0800 Subject: [PATCH 08/15] Add more test cases --- Lib/test/test_structseq.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index 3242b8dadc7276..1968fbd6ac27a9 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -159,7 +159,9 @@ def test_reduce(self): cls, (tup, dct) = t.__reduce__() self.assertIs(cls, time.struct_time) self.assertEqual(tup, tuple(t)) - cls(tup, dct) + reconstructed = cls(tup, dct) + self.assertEqual(reconstructed, t) + self.assertEqual(reconstructed.tm_zone, t.tm_zone) def test_reduce_with_unnamed_fields(self): r = os.stat_result(range(os.stat_result.n_sequence_fields), {'st_atime': 1.0}) From c927e1ee85923d69487bd1358404c3bb56cbc338 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 18:18:39 +0800 Subject: [PATCH 09/15] Move tests to better places --- Lib/test/test_structseq.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index 1968fbd6ac27a9..bafcbfe0020747 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -90,17 +90,23 @@ def test_constructor(self): self.assertRaises(TypeError, t, "123") self.assertRaises(TypeError, t, "123", dict={}) self.assertRaises(TypeError, t, "123456789", dict=None) + self.assertRaises(TypeError, t, seq="123456789", dict={}) + + self.assertEqual(t("123456789"), tuple("123456789")) + self.assertEqual(t("123456789", {}), tuple("123456789")) + self.assertEqual(t("123456789", dict={}), tuple("123456789")) + self.assertEqual(t(sequence="123456789", dict={}), tuple("123456789")) + + self.assertEqual(t("1234567890"), tuple("123456789")) + self.assertEqual(t("1234567890").tm_zone, "0") + self.assertEqual(t("123456789", {"tm_zone": "some zone"}), tuple("123456789")) + self.assertEqual(t("123456789", {"tm_zone": "some zone"}).tm_zone, "some zone") s = "123456789" self.assertEqual("".join(t(s)), s) def test_constructor_with_duplicate_fields(self): t = time.struct_time - self.assertEqual(t("123456789"), tuple("123456789")) - self.assertEqual(t("1234567890"), tuple("123456789")) - self.assertEqual(t("1234567890").tm_zone, "0") - self.assertEqual(t("123456789", dict={"tm_zone": "some zone"}), tuple("123456789")) - self.assertEqual(t("123456789", dict={"tm_zone": "some zone"}).tm_zone, "some zone") with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): t("123456789", dict={"tm_year": 0}) From c173451172b29ba81c188c35b6c3a68422916631 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 18:58:17 +0800 Subject: [PATCH 10/15] Revert test changes for structseq.__reduce__ --- Lib/test/test_structseq.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index bafcbfe0020747..24262cb91eb6f8 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -162,23 +162,7 @@ def __len__(self): def test_reduce(self): t = time.gmtime() - cls, (tup, dct) = t.__reduce__() - self.assertIs(cls, time.struct_time) - self.assertEqual(tup, tuple(t)) - reconstructed = cls(tup, dct) - self.assertEqual(reconstructed, t) - self.assertEqual(reconstructed.tm_zone, t.tm_zone) - - def test_reduce_with_unnamed_fields(self): - r = os.stat_result(range(os.stat_result.n_sequence_fields), {'st_atime': 1.0}) - self.assertEqual(r.st_atime, 1.0) - cls, (tup, dct) = r.__reduce__() - self.assertIs(cls, os.stat_result) - self.assertEqual(tup, tuple(r)) - self.assertIn('st_atime', dct) - reconstructed = cls(tup, dct) - self.assertEqual(reconstructed, r) - self.assertAlmostEqual(reconstructed.st_atime, r.st_atime) + x = t.__reduce__() def test_extended_getslice(self): # Test extended slicing by comparing with list slicing. From 704ccaa89089168ba33677d0a3ef64ea714b8749 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 19:34:55 +0800 Subject: [PATCH 11/15] Refactor and simplify error handling --- Lib/test/test_structseq.py | 32 +++++++++------- Objects/structseq.c | 75 ++++++++++++-------------------------- 2 files changed, 41 insertions(+), 66 deletions(-) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index 24262cb91eb6f8..0999a10b99a12e 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -108,20 +108,14 @@ def test_constructor(self): def test_constructor_with_duplicate_fields(self): t = time.struct_time - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): - t("123456789", dict={"tm_year": 0}) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_year'"): - t("123456789", dict={"tm_year": 0, "tm_mon": 1}) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): - t("123456789", dict={"tm_zone": "some zone", "tm_mon": 1}) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): - t("123456789", dict={"tm_zone": "some zone", "tm_mon": 1, "error": 0}) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_mon'"): - t("123456789", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1}) with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): t("1234567890", dict={"tm_zone": "some zone"}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): + t("1234567890", dict={"tm_zone": "some zone", "tm_mon": 1}) with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): t("1234567890", dict={"error": 0, "tm_zone": "some zone"}) + with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): + t("1234567890", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1}) def test_constructor_with_duplicate_unnamed_fields(self): assert os.stat_result.n_unnamed_fields > 0 @@ -140,11 +134,21 @@ def test_constructor_with_duplicate_unnamed_fields(self): def test_constructor_with_unknown_fields(self): t = time.struct_time - with self.assertRaisesRegex(TypeError, - re.escape("got unexpected field name(s): {'error'}")): + + error_message = re.escape("got unexpected field name(s)") + with self.assertRaisesRegex(TypeError, error_message): + t("123456789", dict={"tm_year": 0}) + with self.assertRaisesRegex(TypeError, error_message): + t("123456789", dict={"tm_year": 0, "tm_mon": 1}) + with self.assertRaisesRegex(TypeError, error_message): + t("123456789", dict={"tm_zone": "some zone", "tm_mon": 1}) + with self.assertRaisesRegex(TypeError, error_message): + t("123456789", dict={"tm_zone": "some zone", "error": 0}) + with self.assertRaisesRegex(TypeError, error_message): + t("123456789", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1}) + with self.assertRaisesRegex(TypeError, error_message): t("123456789", dict={"error": 0}) - with self.assertRaisesRegex(TypeError, - re.escape("got unexpected field name(s): {'error'}")): + with self.assertRaisesRegex(TypeError, error_message): t("123456789", dict={"tm_zone": "some zone", "error": 0}) def test_eviltuple(self): diff --git a/Objects/structseq.c b/Objects/structseq.c index 2adf8010e51eef..08f24c0d3a0734 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -217,75 +217,46 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) } Py_DECREF(arg); if (dict != NULL) { - for (i = 0; i < len; ++i) { - // unnamed fields can be present in both sequence and dict - if (i >= min_len - n_unnamed_fields && i < min_len) { - continue; - } - - const char *name = type->tp_members[i < min_len ? i : i - n_unnamed_fields].name; - PyObject *ob = NULL; - if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { - Py_DECREF(res); - return NULL; - } - if (ob != NULL) { - Py_DECREF(ob); - PyErr_Format(PyExc_TypeError, - "%.500s() got multiple values for field '%s'", - type->tp_name, name); - Py_DECREF(res); - return NULL; - } - } - PyObject *unknown_keys = PySet_New(dict); - if (unknown_keys == NULL) { - Py_DECREF(res); - return NULL; - } + Py_ssize_t n_found_keys = 0; for (i = min_len; i < max_len; ++i) { PyObject *ob = NULL; const char *name = type->tp_members[i - n_unnamed_fields].name; - PyObject *key = PyUnicode_FromString(name); - if (!key || PyDict_GetItemRef(dict, key, &ob) < 0) { - Py_XDECREF(key); + if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { Py_DECREF(res); - Py_DECREF(unknown_keys); return NULL; } - if (ob != NULL) { - Py_DECREF(ob); - if (PySet_Discard(unknown_keys, key) < 0) { - Py_DECREF(key); + if (i < len) { + if (ob != NULL && res->ob_item[i] != NULL) { + PyErr_Format(PyExc_TypeError, + "%.500s() got multiple values for field '%s'", + type->tp_name, + name); + Py_DECREF(ob); Py_DECREF(res); - Py_DECREF(unknown_keys); return NULL; } + continue; } - Py_DECREF(key); + if (ob == NULL) { + ob = Py_NewRef(Py_None); + } + else { + ++n_found_keys; + } + res->ob_item[i] = ob; } - if (PySet_GET_SIZE(unknown_keys) > 0) { + if (PyDict_GET_SIZE(dict) > n_found_keys) { PyErr_Format(PyExc_TypeError, - "%.500s() got unexpected field name(s): %R", - type->tp_name, unknown_keys); + "%.500s() got unexpected field name(s).", + type->tp_name); Py_DECREF(res); - Py_DECREF(unknown_keys); return NULL; } } - for (i = len; i < max_len; ++i) { - PyObject *ob = NULL; - if (dict != NULL) { - const char *name = type->tp_members[i - n_unnamed_fields].name; - if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { - Py_DECREF(res); - return NULL; - } - } - if (ob == NULL) { - ob = Py_NewRef(Py_None); + else { + for (i = len; i < max_len; ++i) { + res->ob_item[i] = Py_NewRef(Py_None); } - res->ob_item[i] = ob; } _PyObject_GC_TRACK(res); From 8f216b85d3a82ad3a06e1e8ede783ab132c14093 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 19:39:47 +0800 Subject: [PATCH 12/15] Refactor and simplify error handling --- Objects/structseq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Objects/structseq.c b/Objects/structseq.c index 08f24c0d3a0734..8ea4cde5c69874 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -226,7 +226,9 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) return NULL; } if (i < len) { - if (ob != NULL && res->ob_item[i] != NULL) { + // For i < len, the ob_item[i] is already set from sequence. + // If there is a value in the dict, raise an error. + if (ob != NULL) { PyErr_Format(PyExc_TypeError, "%.500s() got multiple values for field '%s'", type->tp_name, From 04a19ccd20484ff8f01f1db8e747bde3e3d1ab65 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 3 Oct 2023 20:06:36 +0800 Subject: [PATCH 13/15] Also check the input dict size --- Objects/structseq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Objects/structseq.c b/Objects/structseq.c index 8ea4cde5c69874..f5d2b729b1577a 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -216,7 +216,7 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) res->ob_item[i] = Py_NewRef(v); } Py_DECREF(arg); - if (dict != NULL) { + if (dict != NULL && PyDict_GET_SIZE(dict) > 0) { Py_ssize_t n_found_keys = 0; for (i = min_len; i < max_len; ++i) { PyObject *ob = NULL; @@ -254,8 +254,7 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) Py_DECREF(res); return NULL; } - } - else { + } else { for (i = len; i < max_len; ++i) { res->ob_item[i] = Py_NewRef(Py_None); } From bd2782c3ef97db2194b0a2bd40b533e9a6eb3807 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Wed, 4 Oct 2023 21:27:39 +0800 Subject: [PATCH 14/15] Split for-loop for better code readability --- Objects/structseq.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/Objects/structseq.c b/Objects/structseq.c index f5d2b729b1577a..166433f49dfaa1 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -217,27 +217,32 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) } Py_DECREF(arg); if (dict != NULL && PyDict_GET_SIZE(dict) > 0) { - Py_ssize_t n_found_keys = 0; - for (i = min_len; i < max_len; ++i) { + for (i = min_len; i < len; ++i) { PyObject *ob = NULL; const char *name = type->tp_members[i - n_unnamed_fields].name; if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { Py_DECREF(res); return NULL; } - if (i < len) { - // For i < len, the ob_item[i] is already set from sequence. - // If there is a value in the dict, raise an error. - if (ob != NULL) { - PyErr_Format(PyExc_TypeError, - "%.500s() got multiple values for field '%s'", - type->tp_name, - name); - Py_DECREF(ob); - Py_DECREF(res); - return NULL; - } - continue; + // For min_len <= i < len, the ob_item[i] is already set from sequence. + // If there is a value in the dict, raise an error. + if (ob != NULL) { + PyErr_Format(PyExc_TypeError, + "%.500s() got multiple values for field '%s'", + type->tp_name, name); + Py_DECREF(ob); + Py_DECREF(res); + return NULL; + } + } + + Py_ssize_t n_found_keys = 0; + for (i = len; i < max_len; ++i) { + PyObject *ob = NULL; + const char *name = type->tp_members[i - n_unnamed_fields].name; + if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { + Py_DECREF(res); + return NULL; } if (ob == NULL) { ob = Py_NewRef(Py_None); @@ -249,7 +254,7 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) } if (PyDict_GET_SIZE(dict) > n_found_keys) { PyErr_Format(PyExc_TypeError, - "%.500s() got unexpected field name(s).", + "%.500s() got unexpected field name(s)", type->tp_name); Py_DECREF(res); return NULL; From 04a554d2682846e10b0733f1c8b55be0a7283728 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Wed, 4 Oct 2023 21:32:41 +0800 Subject: [PATCH 15/15] Remove unnecessary error checking --- Lib/test/test_structseq.py | 14 ++++++++------ Objects/structseq.c | 21 +-------------------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index 18119ae7f2a240..2ef1316e08fb8b 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -110,13 +110,14 @@ def test_constructor(self): def test_constructor_with_duplicate_fields(self): t = time.struct_time - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): + error_message = re.escape("got duplicate or unexpected field name(s)") + with self.assertRaisesRegex(TypeError, error_message): t("1234567890", dict={"tm_zone": "some zone"}) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): + with self.assertRaisesRegex(TypeError, error_message): t("1234567890", dict={"tm_zone": "some zone", "tm_mon": 1}) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): + with self.assertRaisesRegex(TypeError, error_message): t("1234567890", dict={"error": 0, "tm_zone": "some zone"}) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'tm_zone'"): + with self.assertRaisesRegex(TypeError, error_message): t("1234567890", dict={"error": 0, "tm_zone": "some zone", "tm_mon": 1}) def test_constructor_with_duplicate_unnamed_fields(self): @@ -131,13 +132,14 @@ def test_constructor_with_duplicate_unnamed_fields(self): self.assertEqual(r.st_atime, -1.0) self.assertEqual(r, tuple(range(n_visible_fields))) - with self.assertRaisesRegex(TypeError, "got multiple values for field 'st_atime'"): + with self.assertRaisesRegex(TypeError, + re.escape("got duplicate or unexpected field name(s)")): os.stat_result((*range(n_visible_fields), -1.0), {'st_atime': -1.0}) def test_constructor_with_unknown_fields(self): t = time.struct_time - error_message = re.escape("got unexpected field name(s)") + error_message = re.escape("got duplicate or unexpected field name(s)") with self.assertRaisesRegex(TypeError, error_message): t("123456789", dict={"tm_year": 0}) with self.assertRaisesRegex(TypeError, error_message): diff --git a/Objects/structseq.c b/Objects/structseq.c index 166433f49dfaa1..2c98288039c58c 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -217,25 +217,6 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) } Py_DECREF(arg); if (dict != NULL && PyDict_GET_SIZE(dict) > 0) { - for (i = min_len; i < len; ++i) { - PyObject *ob = NULL; - const char *name = type->tp_members[i - n_unnamed_fields].name; - if (PyDict_GetItemStringRef(dict, name, &ob) < 0) { - Py_DECREF(res); - return NULL; - } - // For min_len <= i < len, the ob_item[i] is already set from sequence. - // If there is a value in the dict, raise an error. - if (ob != NULL) { - PyErr_Format(PyExc_TypeError, - "%.500s() got multiple values for field '%s'", - type->tp_name, name); - Py_DECREF(ob); - Py_DECREF(res); - return NULL; - } - } - Py_ssize_t n_found_keys = 0; for (i = len; i < max_len; ++i) { PyObject *ob = NULL; @@ -254,7 +235,7 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) } if (PyDict_GET_SIZE(dict) > n_found_keys) { PyErr_Format(PyExc_TypeError, - "%.500s() got unexpected field name(s)", + "%.500s() got duplicate or unexpected field name(s)", type->tp_name); Py_DECREF(res); return NULL;