From 0d49ccb66c544820c8b75e11c5a66eb8973b9e6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 21:43:08 +0100 Subject: [PATCH 01/27] improve performance of UUIDs creation --- Lib/uuid.py | 84 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 9c6ad9643cf6d5..96d48ce04730a1 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -85,6 +85,14 @@ class SafeUUID: unknown = None +_RFC_4122_CLEARFLAGS_MASK = 0xffff_ffff_ffff_0fff_3fff_ffff_ffff_ffff +_RFC_4122_VERSION_1_FLAGS = 0x0000_0000_0000_1000_8000_0000_0000_0000 +_RFC_4122_VERSION_3_FLAGS = 0x0000_0000_0000_3000_8000_0000_0000_0000 +_RFC_4122_VERSION_4_FLAGS = 0x0000_0000_0000_4000_8000_0000_0000_0000 +_RFC_4122_VERSION_5_FLAGS = 0x0000_0000_0000_5000_8000_0000_0000_0000 +_RFC_4122_VERSION_8_FLAGS = 0x0000_0000_0000_8000_8000_0000_0000_0000 + + class UUID: """Instances of the UUID class represent UUIDs as specified in RFC 4122. UUID objects are immutable, hashable, and usable as dictionary keys. @@ -174,45 +182,49 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if [hex, bytes, bytes_le, fields, int].count(None) != 4: raise TypeError('one of the hex, bytes, bytes_le, fields, ' 'or int arguments must be given') - if hex is not None: + if int is not None: + pass + elif hex is not None: hex = hex.replace('urn:', '').replace('uuid:', '') hex = hex.strip('{}').replace('-', '') if len(hex) != 32: raise ValueError('badly formed hexadecimal UUID string') int = int_(hex, 16) - if bytes_le is not None: + elif bytes_le is not None: if len(bytes_le) != 16: raise ValueError('bytes_le is not a 16-char string') + assert isinstance(bytes_le, bytes_), repr(bytes_le) bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] + bytes_le[8-1:6-1:-1] + bytes_le[8:]) - if bytes is not None: + int = int_.from_bytes(bytes) + elif bytes is not None: if len(bytes) != 16: raise ValueError('bytes is not a 16-char string') assert isinstance(bytes, bytes_), repr(bytes) int = int_.from_bytes(bytes) # big endian - if fields is not None: + elif fields is not None: if len(fields) != 6: raise ValueError('fields is not a 6-tuple') (time_low, time_mid, time_hi_version, clock_seq_hi_variant, clock_seq_low, node) = fields - if not 0 <= time_low < 1<<32: + if time_low < 0 or time_low > 0xffff_ffff: raise ValueError('field 1 out of range (need a 32-bit value)') - if not 0 <= time_mid < 1<<16: + if time_mid < 0 or time_mid > 0xffff: raise ValueError('field 2 out of range (need a 16-bit value)') - if not 0 <= time_hi_version < 1<<16: + if time_hi_version < 0 or time_hi_version > 0xffff: raise ValueError('field 3 out of range (need a 16-bit value)') - if not 0 <= clock_seq_hi_variant < 1<<8: + if clock_seq_hi_variant < 0 or clock_seq_hi_variant > 0xff: raise ValueError('field 4 out of range (need an 8-bit value)') - if not 0 <= clock_seq_low < 1<<8: + if clock_seq_low < 0 or clock_seq_low > 0xff: raise ValueError('field 5 out of range (need an 8-bit value)') - if not 0 <= node < 1<<48: + if node < 0 or node > 0xffff_ffff_ffff: raise ValueError('field 6 out of range (need a 48-bit value)') clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | (time_hi_version << 64) | (clock_seq << 48) | node) - if int is not None: - if not 0 <= int < 1<<128: - raise ValueError('int is out of range (need a 128-bit value)') + # "x < a or int > b" is slightly faster than "not (a <= x <= b)" + if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: + raise ValueError('int is out of range (need a 128-bit value)') if version is not None: if not 1 <= version <= 8: raise ValueError('illegal version number') @@ -686,38 +698,52 @@ def uuid1(node=None, clock_seq=None): if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage + else: + clock_seq = clock_seq & 0x3fff time_low = timestamp & 0xffffffff time_mid = (timestamp >> 32) & 0xffff time_hi_version = (timestamp >> 48) & 0x0fff - clock_seq_low = clock_seq & 0xff - clock_seq_hi_variant = (clock_seq >> 8) & 0x3f if node is None: node = getnode() - return UUID(fields=(time_low, time_mid, time_hi_version, - clock_seq_hi_variant, clock_seq_low, node), version=1) + int_uuid_1 = ((time_low << 96) | (time_mid << 80) | + (time_hi_version << 64) | (clock_seq << 48) | node) + # by construction, the variant and version bits are already cleared + int_uuid_1 |= _RFC_4122_VERSION_1_FLAGS + return UUID(int=int_uuid_1, version=None) def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - from hashlib import md5 - digest = md5( - namespace.bytes + name, - usedforsecurity=False - ).digest() - return UUID(bytes=digest[:16], version=3) + # HACL*-based MD5 is slightly faster than its OpenSSL version, + # and 'import X; X.Y' is slightly faster than 'from X import Y'. + import _md5 + h = _md5.md5(namespace.bytes + name, usedforsecurity=False) + assert len(h.digest()) == 16 + int_uuid_3 = int_.from_bytes(h.digest()) + int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK + int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS + return UUID(int=int_uuid_3, version=None) def uuid4(): """Generate a random UUID.""" - return UUID(bytes=os.urandom(16), version=4) + int_uuid_4 = int_.from_bytes(os.urandom(16)) + int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK + int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS + return UUID(int=int_uuid_4, version=None) def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - from hashlib import sha1 - hash = sha1(namespace.bytes + name).digest() - return UUID(bytes=hash[:16], version=5) + # OpenSSL-based SHA-1 is slightly faster than its HACL* version, + # and 'import X; X.Y' is slightly faster than 'from X import Y'. + import hashlib + h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False) + int_uuid_5 = int_.from_bytes(h.digest()[:16]) + int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK + int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS + return UUID(int=int_uuid_5, version=None) def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -740,7 +766,9 @@ def uuid8(a=None, b=None, c=None): int_uuid_8 = (a & 0xffff_ffff_ffff) << 80 int_uuid_8 |= (b & 0xfff) << 64 int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff - return UUID(int=int_uuid_8, version=8) + # by construction, the variant and version bits are already cleared + int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS + return UUID(int=int_uuid_8, version=None) def main(): """Run the uuid command line interface.""" From 603335fc13429a7dcf3358131557cdca3e8645b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:59:47 +0100 Subject: [PATCH 02/27] add What's New entry --- Doc/whatsnew/3.14.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index d13cd2d5173a04..013b7eb085a88e 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -659,6 +659,28 @@ io file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in :gh:`120754` and :gh:`90102`.) + +uuid +---- + +* Improve generations of UUID objects via their dedicated functions: + + * For a given 48-bit hardware address *node* and a given 14-bit + clock sequence *clock_seq*, :func:`uuid1(node=node) ` + and :func:`uuid1(clock_seq=clock_seq) ` are 35% faster. + Performances for :func:`~uuid.uuid1` remain unchanged when neither + the hardware address nor the clock sequence is specified. + * :func:`~uuid.uuid3` is 27% faster for 16-byte names and 8% faster + for 1024-byte names. Performances for longer names remain unchanged. + * :func:`~uuid.uuid5` is 24% faster for 16-byte names and 11% faster + for 1024-byte names. Performances for longer names remain unchanged. + * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 20% faster. + + Overall, dedicated generation of UUID objects is 20% faster. + + (Contributed by Bénédikt Tran in :gh:`XXX`.) + + Deprecated ========== From 154ff8b7b5f2bf51ecef886422100e1a5e466b60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:13:01 +0100 Subject: [PATCH 03/27] blurb --- .../next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst new file mode 100644 index 00000000000000..b7c98469407fbb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -0,0 +1,2 @@ +Improve generations of UUID objects via their dedicated functions by 20%. +Patch by Bénédikt Tran. From b965887569ad422d304e53df14043a70c8e4030d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:34:43 +0100 Subject: [PATCH 04/27] fix issue number --- Doc/whatsnew/3.14.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 013b7eb085a88e..443636fe1bf408 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -678,7 +678,7 @@ uuid Overall, dedicated generation of UUID objects is 20% faster. - (Contributed by Bénédikt Tran in :gh:`XXX`.) + (Contributed by Bénédikt Tran in :gh:`128150`.) Deprecated From a8a1894a7f4fb4f034036c4ebcee043ffa4d057c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:34:59 +0100 Subject: [PATCH 05/27] fix typos --- Doc/whatsnew/3.14.rst | 3 ++- .../Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 443636fe1bf408..359cd1e26dd744 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -663,7 +663,8 @@ io uuid ---- -* Improve generations of UUID objects via their dedicated functions: +* Improve generation of :class:`~uuid.UUID` objects via their dedicated + functions: * For a given 48-bit hardware address *node* and a given 14-bit clock sequence *clock_seq*, :func:`uuid1(node=node) ` diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index b7c98469407fbb..1a17fa27535004 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ -Improve generations of UUID objects via their dedicated functions by 20%. -Patch by Bénédikt Tran. +Improve generation of :class:`~uuid.UUID` objects via their dedicated +functions by 20%. Patch by Bénédikt Tran. From c8aa75256d80b1de87fb7c874b723e3f73ab3e38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:38:08 +0100 Subject: [PATCH 06/27] ensure 14-bit clock sequence --- Lib/uuid.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 96d48ce04730a1..b47d9fc64c0bc4 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -698,13 +698,12 @@ def uuid1(node=None, clock_seq=None): if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage - else: - clock_seq = clock_seq & 0x3fff time_low = timestamp & 0xffffffff time_mid = (timestamp >> 32) & 0xffff time_hi_version = (timestamp >> 48) & 0x0fff if node is None: node = getnode() + clock_seq = clock_seq & 0x3fff int_uuid_1 = ((time_low << 96) | (time_mid << 80) | (time_hi_version << 64) | (clock_seq << 48) | node) # by construction, the variant and version bits are already cleared From a2278b88337abfa2ea42bb000a081386fee3adb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 15:18:44 +0100 Subject: [PATCH 07/27] add dedicated private fast constructor --- Lib/uuid.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index b47d9fc64c0bc4..56071b58993f09 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -237,6 +237,15 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, object.__setattr__(self, 'int', int) object.__setattr__(self, 'is_safe', is_safe) + @classmethod + def _from_int(cls, int, *, is_safe=SafeUUID.unknown): + self = cls.__new__(cls) + if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: + raise ValueError('int is out of range (need a 128-bit value)') + object.__setattr__(self, 'int', int) + object.__setattr__(self, 'is_safe', is_safe) + return self + def __getstate__(self): d = {'int': self.int} if self.is_safe != SafeUUID.unknown: @@ -722,14 +731,14 @@ def uuid3(namespace, name): int_uuid_3 = int_.from_bytes(h.digest()) int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS - return UUID(int=int_uuid_3, version=None) + return UUID._from_int(int_uuid_3) def uuid4(): """Generate a random UUID.""" int_uuid_4 = int_.from_bytes(os.urandom(16)) int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS - return UUID(int=int_uuid_4, version=None) + return UUID._from_int(int_uuid_4) def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" @@ -742,7 +751,7 @@ def uuid5(namespace, name): int_uuid_5 = int_.from_bytes(h.digest()[:16]) int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS - return UUID(int=int_uuid_5, version=None) + return UUID._from_int(int_uuid_5) def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -767,7 +776,7 @@ def uuid8(a=None, b=None, c=None): int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff # by construction, the variant and version bits are already cleared int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS - return UUID(int=int_uuid_8, version=None) + return UUID._from_int(int_uuid_8) def main(): """Run the uuid command line interface.""" From 0710549d83f22560726e5f09bef0c3013fd838c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 15:18:48 +0100 Subject: [PATCH 08/27] revert UUIDv1 construction --- Doc/whatsnew/3.14.rst | 14 +++++--------- Lib/uuid.py | 10 ++++------ .../2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 4 ++-- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7783064465c745..8d55e9e3b2af2c 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -675,18 +675,14 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * For a given 48-bit hardware address *node* and a given 14-bit - clock sequence *clock_seq*, :func:`uuid1(node=node) ` - and :func:`uuid1(clock_seq=clock_seq) ` are 35% faster. - Performances for :func:`~uuid.uuid1` remain unchanged when neither - the hardware address nor the clock sequence is specified. - * :func:`~uuid.uuid3` is 27% faster for 16-byte names and 8% faster + * :func:`~uuid.uuid3` is 40% faster for 16-byte names and 10% faster for 1024-byte names. Performances for longer names remain unchanged. - * :func:`~uuid.uuid5` is 24% faster for 16-byte names and 11% faster + * :func:`~uuid.uuid5` is 38% faster for 16-byte names and 21% faster for 1024-byte names. Performances for longer names remain unchanged. - * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 20% faster. + * :func:`~uuid.uuid4` is 31% faster and :func:`~uuid.uuid8` is 37% faster. - Overall, dedicated generation of UUID objects is 20% faster. + Overall, dedicated generation of UUID objects version 3, 4, 5, and 8 is + roughly 30% faster. (Contributed by Bénédikt Tran in :gh:`128150`.) diff --git a/Lib/uuid.py b/Lib/uuid.py index 56071b58993f09..d68007ba295ca2 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -710,14 +710,12 @@ def uuid1(node=None, clock_seq=None): time_low = timestamp & 0xffffffff time_mid = (timestamp >> 32) & 0xffff time_hi_version = (timestamp >> 48) & 0x0fff + clock_seq_low = clock_seq & 0xff + clock_seq_hi_variant = (clock_seq >> 8) & 0x3f if node is None: node = getnode() - clock_seq = clock_seq & 0x3fff - int_uuid_1 = ((time_low << 96) | (time_mid << 80) | - (time_hi_version << 64) | (clock_seq << 48) | node) - # by construction, the variant and version bits are already cleared - int_uuid_1 |= _RFC_4122_VERSION_1_FLAGS - return UUID(int=int_uuid_1, version=None) + return UUID(fields=(time_low, time_mid, time_hi_version, + clock_seq_hi_variant, clock_seq_low, node), version=1) def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index 1a17fa27535004..04c744fb2ba54f 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ -Improve generation of :class:`~uuid.UUID` objects via their dedicated -functions by 20%. Patch by Bénédikt Tran. +Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 +via their dedicated functions by 30%. Patch by Bénédikt Tran. From 5b6922f554f55659944995403a739797c806ef89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Dec 2024 11:08:19 +0100 Subject: [PATCH 09/27] change eager check into an assertion check for internal constructor --- Lib/uuid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index d68007ba295ca2..41283aebce91ca 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -239,9 +239,9 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, @classmethod def _from_int(cls, int, *, is_safe=SafeUUID.unknown): + """Internal use only.""" + assert int >= 0 and int <= 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff self = cls.__new__(cls) - if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: - raise ValueError('int is out of range (need a 128-bit value)') object.__setattr__(self, 'int', int) object.__setattr__(self, 'is_safe', is_safe) return self From e631593bfb46560a42afa27b2bd39c021467a68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Dec 2024 11:19:39 +0100 Subject: [PATCH 10/27] update performance results --- Doc/whatsnew/3.14.rst | 8 ++++---- .../2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 8d55e9e3b2af2c..4cfa829530cf6b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -675,14 +675,14 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * :func:`~uuid.uuid3` is 40% faster for 16-byte names and 10% faster + * :func:`~uuid.uuid3` is 47% faster for 16-byte names and 13% faster for 1024-byte names. Performances for longer names remain unchanged. - * :func:`~uuid.uuid5` is 38% faster for 16-byte names and 21% faster + * :func:`~uuid.uuid5` is 35% faster for 16-byte names and 24% faster for 1024-byte names. Performances for longer names remain unchanged. - * :func:`~uuid.uuid4` is 31% faster and :func:`~uuid.uuid8` is 37% faster. + * :func:`~uuid.uuid4` is 33% faster and :func:`~uuid.uuid8` is 38% faster. Overall, dedicated generation of UUID objects version 3, 4, 5, and 8 is - roughly 30% faster. + roughly 20% faster. (Contributed by Bénédikt Tran in :gh:`128150`.) diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index 04c744fb2ba54f..5a1d65f044171e 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 -via their dedicated functions by 30%. Patch by Bénédikt Tran. +via their dedicated functions by 20%. Patch by Bénédikt Tran. From 1c1090163b05b23d46260bc7fe8db00893a0aa16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:29:43 +0100 Subject: [PATCH 11/27] describe constants --- Lib/uuid.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/uuid.py b/Lib/uuid.py index 41283aebce91ca..b35df37fe574ab 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -85,7 +85,14 @@ class SafeUUID: unknown = None +_UINT_128_MAX = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff +# 128-bit mask to clear the variant and version bits of a UUID integral value +# +# This is equivalent to the 2-complement of '(0xc000 << 48) | (0xf000 << 64)'. _RFC_4122_CLEARFLAGS_MASK = 0xffff_ffff_ffff_0fff_3fff_ffff_ffff_ffff +# RFC 4122 variant bits and version bits to activate on a UUID integral value. +# +# The values are equivalent to '(version << 76) | (0x8000 << 48)'. _RFC_4122_VERSION_1_FLAGS = 0x0000_0000_0000_1000_8000_0000_0000_0000 _RFC_4122_VERSION_3_FLAGS = 0x0000_0000_0000_3000_8000_0000_0000_0000 _RFC_4122_VERSION_4_FLAGS = 0x0000_0000_0000_4000_8000_0000_0000_0000 From 0bc7321c7225bc888b060140f2b87305de872581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:30:11 +0100 Subject: [PATCH 12/27] revert UUIDv1 optimizations to reduce the diff --- Lib/uuid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index b35df37fe574ab..03987825617af8 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -214,17 +214,17 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, raise ValueError('fields is not a 6-tuple') (time_low, time_mid, time_hi_version, clock_seq_hi_variant, clock_seq_low, node) = fields - if time_low < 0 or time_low > 0xffff_ffff: + if not 0 <= time_low <= 0xffff_ffff: raise ValueError('field 1 out of range (need a 32-bit value)') - if time_mid < 0 or time_mid > 0xffff: + if not 0 <= time_mid <= 0xffff: raise ValueError('field 2 out of range (need a 16-bit value)') - if time_hi_version < 0 or time_hi_version > 0xffff: + if not 0 <= time_hi_version <= 0xffff: raise ValueError('field 3 out of range (need a 16-bit value)') - if clock_seq_hi_variant < 0 or clock_seq_hi_variant > 0xff: + if not 0 <= clock_seq_hi_variant <= 0xff: raise ValueError('field 4 out of range (need an 8-bit value)') - if clock_seq_low < 0 or clock_seq_low > 0xff: + if not 0 <= clock_seq_low <= 0xff: raise ValueError('field 5 out of range (need an 8-bit value)') - if node < 0 or node > 0xffff_ffff_ffff: + if not 0 <= node <= 0xffff_ffff_ffff: raise ValueError('field 6 out of range (need a 48-bit value)') clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | From 26b1eb1c6ff0f3ded790e5f80a6020c0a7ab4c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:31:03 +0100 Subject: [PATCH 13/27] simplify `_from_int` private constructor as per Pieter's review --- Lib/uuid.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 03987825617af8..8ad9d1b715e4a0 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -245,12 +245,13 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, object.__setattr__(self, 'is_safe', is_safe) @classmethod - def _from_int(cls, int, *, is_safe=SafeUUID.unknown): + def _from_int(cls, value): """Internal use only.""" - assert int >= 0 and int <= 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff + assert isinstance(value, int), repr(value) + assert 0 <= value <= _UINT_128_MAX, repr(value) self = cls.__new__(cls) - object.__setattr__(self, 'int', int) - object.__setattr__(self, 'is_safe', is_safe) + object.__setattr__(self, 'int', value) + object.__setattr__(self, 'is_safe', SafeUUID.unknown) return self def __getstate__(self): From df50a7a8c819281033a0ec4bff34aa2cab20986c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:31:39 +0100 Subject: [PATCH 14/27] revert micro-optimization of `not a <= x <= b` --- Lib/uuid.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 8ad9d1b715e4a0..e63849c04c0812 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -229,8 +229,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | (time_hi_version << 64) | (clock_seq << 48) | node) - # "x < a or int > b" is slightly faster than "not (a <= x <= b)" - if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: + if not 0 <= int <= _UINT_128_MAX: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: if not 1 <= version <= 8: From c1ffa7dd5f402a266f1c6f9fd6277da4653328b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:31:54 +0100 Subject: [PATCH 15/27] use built-in `int` when it is not shadowed --- Lib/uuid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index e63849c04c0812..201201eadb38de 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -733,14 +733,14 @@ def uuid3(namespace, name): import _md5 h = _md5.md5(namespace.bytes + name, usedforsecurity=False) assert len(h.digest()) == 16 - int_uuid_3 = int_.from_bytes(h.digest()) + int_uuid_3 = int.from_bytes(h.digest()) int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS return UUID._from_int(int_uuid_3) def uuid4(): """Generate a random UUID.""" - int_uuid_4 = int_.from_bytes(os.urandom(16)) + int_uuid_4 = int.from_bytes(os.urandom(16)) int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS return UUID._from_int(int_uuid_4) @@ -753,7 +753,7 @@ def uuid5(namespace, name): # and 'import X; X.Y' is slightly faster than 'from X import Y'. import hashlib h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False) - int_uuid_5 = int_.from_bytes(h.digest()[:16]) + int_uuid_5 = int.from_bytes(h.digest()[:16]) int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS return UUID._from_int(int_uuid_5) From cff86e9e8d5318a2f933fafdbf191131c5ebf627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:36:32 +0100 Subject: [PATCH 16/27] remove rationale comment for HACL* MD5 In this commit, we move the rationale for using HACL*-based MD5 instead of its OpenSSL implementation from the code to this note. HACL*-based MD5 is 2x faster than its OpenSSL implementation for creating the hash object via `h = md5(..., usedforsecurity=False)` but `h.digest()` is slightly (yet noticeably) slower. Overall, HACL*-based MD5 still remains faster than its OpenSSL-based implementation, whence the choice of `_md5.md5` over `hashlib.md5`. --- Lib/uuid.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 201201eadb38de..9da2ea83ad41b1 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -728,8 +728,6 @@ def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - # HACL*-based MD5 is slightly faster than its OpenSSL version, - # and 'import X; X.Y' is slightly faster than 'from X import Y'. import _md5 h = _md5.md5(namespace.bytes + name, usedforsecurity=False) assert len(h.digest()) == 16 From 7095aa4fa92a8e449a0de7a7503fac4bd7be0d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:41:37 +0100 Subject: [PATCH 17/27] remove rationale comment for OpenSSL SHA-1 In this commit, we move the rationale for using OpenSSL-based SHA-1 instead of its HACL* implementation from the code to this note. HACL*-based SHA-1 is 2x faster than its OpenSSL implementation for creating the hash object via `h = sha1(..., usedforsecurity=False)` but `h.digest()` is almost 3x slower. Unlike HACL* MD5, HACL*-based SHA-1 is slower than its OpenSSL-based implementation, whence the choice of `hashlib.sha1` over `_sha1.sha1`. --- Lib/uuid.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 9da2ea83ad41b1..c8ad77c5c1b80f 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -747,8 +747,6 @@ def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - # OpenSSL-based SHA-1 is slightly faster than its HACL* version, - # and 'import X; X.Y' is slightly faster than 'from X import Y'. import hashlib h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False) int_uuid_5 = int.from_bytes(h.digest()[:16]) From 4af15352d2732af7d4a5464eaa831e623b62ec06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:52:43 +0100 Subject: [PATCH 18/27] clear variant and version bits using dedicated mask --- Lib/uuid.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index c8ad77c5c1b80f..21ebad31eee491 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -203,7 +203,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, assert isinstance(bytes_le, bytes_), repr(bytes_le) bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] + bytes_le[8-1:6-1:-1] + bytes_le[8:]) - int = int_.from_bytes(bytes) + int = int_.from_bytes(bytes) # big endian elif bytes is not None: if len(bytes) != 16: raise ValueError('bytes is not a 16-char string') @@ -234,11 +234,11 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if version is not None: if not 1 <= version <= 8: raise ValueError('illegal version number') + # clear the variant and the version number bits + int &= _RFC_4122_CLEARFLAGS_MASK # Set the variant to RFC 4122/9562. - int &= ~(0xc000 << 48) - int |= 0x8000 << 48 + int |= 0x8000_0000_0000_0000 # (0x8000 << 48) # Set the version number. - int &= ~(0xf000 << 64) int |= version << 76 object.__setattr__(self, 'int', int) object.__setattr__(self, 'is_safe', is_safe) From 0d4c0088b613debd7fbcead030f66795ac4b9b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Dec 2024 13:35:43 +0100 Subject: [PATCH 19/27] fix typos --- Doc/whatsnew/3.14.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 4cfa829530cf6b..5e8e6630992127 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -676,14 +676,11 @@ uuid functions: * :func:`~uuid.uuid3` is 47% faster for 16-byte names and 13% faster - for 1024-byte names. Performances for longer names remain unchanged. + for 1024-byte names. Performance for longer names remains unchanged. * :func:`~uuid.uuid5` is 35% faster for 16-byte names and 24% faster - for 1024-byte names. Performances for longer names remain unchanged. + for 1024-byte names. Performance for longer names remains unchanged. * :func:`~uuid.uuid4` is 33% faster and :func:`~uuid.uuid8` is 38% faster. - Overall, dedicated generation of UUID objects version 3, 4, 5, and 8 is - roughly 20% faster. - (Contributed by Bénédikt Tran in :gh:`128150`.) From 9854f69e4980d05ec0e4115d7679c973e9a6822b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Dec 2024 13:46:53 +0100 Subject: [PATCH 20/27] update benchmarks --- Doc/whatsnew/3.14.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5e8e6630992127..cd1193e7b0e0fd 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -675,11 +675,11 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * :func:`~uuid.uuid3` is 47% faster for 16-byte names and 13% faster + * :func:`~uuid.uuid3` is 40% faster for 16-byte names and 10% faster for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid5` is 35% faster for 16-byte names and 24% faster + * :func:`~uuid.uuid5` is 30% faster for 16-byte names and 20% faster for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid4` is 33% faster and :func:`~uuid.uuid8` is 38% faster. + * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% faster. (Contributed by Bénédikt Tran in :gh:`128150`.) From 897902b799a576bb1f1aed9f73b496e0130e60f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 26 Dec 2024 09:40:30 +0100 Subject: [PATCH 21/27] remove un-necessary assertions --- Lib/uuid.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 21ebad31eee491..5c934d444a4a77 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -245,8 +245,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, @classmethod def _from_int(cls, value): - """Internal use only.""" - assert isinstance(value, int), repr(value) + """Create a UUID from an integer *value*. Internal use only.""" assert 0 <= value <= _UINT_128_MAX, repr(value) self = cls.__new__(cls) object.__setattr__(self, 'int', value) @@ -730,7 +729,6 @@ def uuid3(namespace, name): name = bytes(name, "utf-8") import _md5 h = _md5.md5(namespace.bytes + name, usedforsecurity=False) - assert len(h.digest()) == 16 int_uuid_3 = int.from_bytes(h.digest()) int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS From a8a19e1343e392cef23ab9300bead9866008aa78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 26 Dec 2024 09:40:45 +0100 Subject: [PATCH 22/27] use `object.__new__` instead of `cls.__new__` --- Lib/uuid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 5c934d444a4a77..59b7b082bfb3bb 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -247,7 +247,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, def _from_int(cls, value): """Create a UUID from an integer *value*. Internal use only.""" assert 0 <= value <= _UINT_128_MAX, repr(value) - self = cls.__new__(cls) + self = object.__new__(cls) object.__setattr__(self, 'int', value) object.__setattr__(self, 'is_safe', SafeUUID.unknown) return self From e2b8b08b5ff8d5fc29c9cd40f0e5268f69fe8c14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Dec 2024 11:37:49 +0100 Subject: [PATCH 23/27] remove dedicated constant folding --- Lib/uuid.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 59b7b082bfb3bb..1ef15a575ce00f 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -85,19 +85,15 @@ class SafeUUID: unknown = None -_UINT_128_MAX = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff +_UINT_128_MAX = (1 << 128) - 1 # 128-bit mask to clear the variant and version bits of a UUID integral value -# -# This is equivalent to the 2-complement of '(0xc000 << 48) | (0xf000 << 64)'. -_RFC_4122_CLEARFLAGS_MASK = 0xffff_ffff_ffff_0fff_3fff_ffff_ffff_ffff +_RFC_4122_CLEARFLAGS_MASK = ~((0xf000 << 64) | (0xc000 << 48)) # RFC 4122 variant bits and version bits to activate on a UUID integral value. -# -# The values are equivalent to '(version << 76) | (0x8000 << 48)'. -_RFC_4122_VERSION_1_FLAGS = 0x0000_0000_0000_1000_8000_0000_0000_0000 -_RFC_4122_VERSION_3_FLAGS = 0x0000_0000_0000_3000_8000_0000_0000_0000 -_RFC_4122_VERSION_4_FLAGS = 0x0000_0000_0000_4000_8000_0000_0000_0000 -_RFC_4122_VERSION_5_FLAGS = 0x0000_0000_0000_5000_8000_0000_0000_0000 -_RFC_4122_VERSION_8_FLAGS = 0x0000_0000_0000_8000_8000_0000_0000_0000 +_RFC_4122_VERSION_1_FLAGS = ((1 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48)) class UUID: @@ -214,17 +210,17 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, raise ValueError('fields is not a 6-tuple') (time_low, time_mid, time_hi_version, clock_seq_hi_variant, clock_seq_low, node) = fields - if not 0 <= time_low <= 0xffff_ffff: + if not 0 <= time_low < (1 << 32): raise ValueError('field 1 out of range (need a 32-bit value)') - if not 0 <= time_mid <= 0xffff: + if not 0 <= time_mid < (1 << 16): raise ValueError('field 2 out of range (need a 16-bit value)') - if not 0 <= time_hi_version <= 0xffff: + if not 0 <= time_hi_version < (1 << 16): raise ValueError('field 3 out of range (need a 16-bit value)') - if not 0 <= clock_seq_hi_variant <= 0xff: + if not 0 <= clock_seq_hi_variant < (1 << 8): raise ValueError('field 4 out of range (need an 8-bit value)') - if not 0 <= clock_seq_low <= 0xff: + if not 0 <= clock_seq_low < (1 << 8): raise ValueError('field 5 out of range (need an 8-bit value)') - if not 0 <= node <= 0xffff_ffff_ffff: + if not 0 <= node < (1 << 48): raise ValueError('field 6 out of range (need a 48-bit value)') clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | From 1d4216aebdfed46508f1d6641653a71b7d10b25e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Dec 2024 11:37:55 +0100 Subject: [PATCH 24/27] update benchmarks --- Doc/whatsnew/3.14.rst | 7 ++++--- .../Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f65871a9fc127d..9411586f47d8b1 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -683,11 +683,12 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * :func:`~uuid.uuid3` is 40% faster for 16-byte names and 10% faster + * :func:`~uuid.uuid3` is 70% faster for 16-byte names and 20% faster for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid5` is 30% faster for 16-byte names and 20% faster + * :func:`~uuid.uuid5` is 40% faster for 16-byte names and 30% faster for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% faster. + * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 45% faster + respectively. (Contributed by Bénédikt Tran in :gh:`128150`.) diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index 5a1d65f044171e..9bcfc12f5a0a5d 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 -via their dedicated functions by 20%. Patch by Bénédikt Tran. +via their dedicated functions by 25%. Patch by Bénédikt Tran. From 5c87adfad4cf44832df4d0594b6188630906290d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 12 Jan 2025 12:17:37 +0100 Subject: [PATCH 25/27] Always use `hashlib.md5` for consistency as per Petr's comment. --- Lib/uuid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 1ef15a575ce00f..cd1f3530ab63e1 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -723,8 +723,8 @@ def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - import _md5 - h = _md5.md5(namespace.bytes + name, usedforsecurity=False) + import hashlib + h = hashlib.md5(namespace.bytes + name, usedforsecurity=False) int_uuid_3 = int.from_bytes(h.digest()) int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS From ea23629bb368783e69caafe2c019576c80826358 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 12 Jan 2025 12:17:43 +0100 Subject: [PATCH 26/27] update benchmarks --- Doc/whatsnew/3.14.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9411586f47d8b1..66c92cd450cda3 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -683,11 +683,10 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * :func:`~uuid.uuid3` is 70% faster for 16-byte names and 20% faster - for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid5` is 40% faster for 16-byte names and 30% faster - for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 45% faster + * :func:`~uuid.uuid3` and :func:`~uuid.uuid5` are both roughly 40% faster + for 16-byte names and 20% faster for 1024-byte names. Performance for + longer names remains unchanged. + * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 40% faster respectively. (Contributed by Bénédikt Tran in :gh:`128150`.) From bdf7c6efd123c45b2e6d9f20f52d0e02a2c6bd15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:16:33 +0100 Subject: [PATCH 27/27] update NEWS --- .../next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index 9bcfc12f5a0a5d..04c744fb2ba54f 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 -via their dedicated functions by 25%. Patch by Bénédikt Tran. +via their dedicated functions by 30%. Patch by Bénédikt Tran.