From 42d55b4b0df528761adf43f1cc7b761187901152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 10:15:47 +0200 Subject: [PATCH 01/48] add UUIDv7 implementation --- Lib/uuid.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index c286eac38e1ef4..c1ff9d746b739c 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,9 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5 -UUIDs as specified in RFC 4122. +uuid1(), uuid3(), uuid4(), uuid5(), and uuid7() for generating version 1, 3, +4, 5, and 7 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still +referred to as RFC 4122 for compatibility purposes). If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -129,7 +130,7 @@ class UUID: variant the UUID variant (one of the constants RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) - version the UUID version number (1 through 5, meaningful only + version the UUID version number (1, 3, 4, 5 and 7, meaningful only when the variant is RFC_4122) is_safe An enum indicating whether the UUID has been generated in @@ -214,7 +215,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if not 0 <= int < 1<<128: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: - if not 1 <= version <= 5: + if not 1 <= version <= 7: raise ValueError('illegal version number') # Set the variant to RFC 4122. int &= ~(0xc000 << 48) @@ -656,7 +657,7 @@ def getnode(): assert False, '_random_getnode() returned invalid value: {}'.format(_node) -_last_timestamp = None +_last_timestamp_v1 = None def uuid1(node=None, clock_seq=None): """Generate a UUID from a host ID, sequence number, and the current time. @@ -674,15 +675,15 @@ def uuid1(node=None, clock_seq=None): is_safe = SafeUUID.unknown return UUID(bytes=uuid_time, is_safe=is_safe) - global _last_timestamp + global _last_timestamp_v1 import time nanoseconds = time.time_ns() # 0x01b21dd213814000 is the number of 100-ns intervals between the # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. timestamp = nanoseconds // 100 + 0x01b21dd213814000 - if _last_timestamp is not None and timestamp <= _last_timestamp: - timestamp = _last_timestamp + 1 - _last_timestamp = timestamp + if _last_timestamp_v1 is not None and timestamp <= _last_timestamp_v1: + timestamp = _last_timestamp_v1 + 1 + _last_timestamp_v1 = timestamp if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage @@ -719,6 +720,63 @@ def uuid5(namespace, name): hash = sha1(namespace.bytes + name).digest() return UUID(bytes=hash[:16], version=5) +_last_timestamp_v7 = None +_last_counter_v7 = 0 # 42-bit counter + +def uuid7(): + """Generate a UUID from a Unix timestamp in milliseconds and random bits. + + UUIDv7 objects feature monotonicity within a millisecond. + """ + # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - + # unix_ts_ms | version | counter_hi | variant | counter_lo | random + # + # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed + # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. + # + # 'random' is a 32-bit random value regenerated for every new UUID. + # + # If multiple UUIDs are generated within the same millisecond, the LSB + # of 'counter' is incremented by 1. When overflowing, the timestamp is + # advanced and the counter is reset to a random 42-bit integer with MSB + # set to 0. + + def get_counter_and_tail(): + rand = int.from_bytes(os.urandom(10)) + # 42-bit counter with MSB set to 0 + counter = (rand >> 32) & 0x1ffffffffff + # 32-bit random data + tail = rand & 0xffffffff + return counter, tail + + global _last_timestamp_v7 + global _last_counter_v7 + + import time + nanoseconds = time.time_ns() + timestamp_ms, _ = divmod(nanoseconds, 1_000_000) + + if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: + counter, tail = get_counter_and_tail() + else: + if timestamp_ms < _last_timestamp_v7: + timestamp_ms = _last_timestamp_v7 + 1 + # advance the counter + counter = _last_counter_v7 + 1 + if counter > 0x3ffffffffff: + timestamp_ms += 1 # advance the timestamp + counter, tail = get_counter_and_tail() + else: + tail = int.from_bytes(os.urandom(4)) + + _last_timestamp_v7 = timestamp_ms + _last_counter_v7 = counter + + int_uuid_7 = (timestamp_ms & 0xffffffffffff) << 80 + int_uuid_7 |= ((counter >> 30) & 0xfff) << 64 + int_uuid_7 |= (counter & 0x3fffffff) << 32 + int_uuid_7 |= tail & 0xffffffff + return UUID(int=int_uuid_7, version=7) def main(): """Run the uuid command line interface.""" @@ -726,7 +784,8 @@ def main(): "uuid1": uuid1, "uuid3": uuid3, "uuid4": uuid4, - "uuid5": uuid5 + "uuid5": uuid5, + "uuid7": uuid7, } uuid_namespace_funcs = ("uuid3", "uuid5") namespaces = { From 6826fa1b9b61f8bfc1299410a5a594084b63d0d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:25:51 +0200 Subject: [PATCH 02/48] add tests --- Lib/test/test_uuid.py | 177 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index e177464c00f7a6..d5a601d8b60126 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1,3 +1,4 @@ +import random import unittest from test import support from test.support import import_helper @@ -267,7 +268,7 @@ def test_exceptions(self): # Version number out of range. badvalue(lambda: self.uuid.UUID('00'*16, version=0)) - badvalue(lambda: self.uuid.UUID('00'*16, version=6)) + badvalue(lambda: self.uuid.UUID('00'*16, version=42)) # Integer value out of range. badvalue(lambda: self.uuid.UUID(int=-1)) @@ -588,7 +589,7 @@ def test_uuid1_bogus_return_value(self): def test_uuid1_time(self): with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp', None), \ + mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ mock.patch.object(self.uuid, 'getnode', return_value=93328246233727), \ mock.patch('time.time_ns', return_value=1545052026752910643), \ mock.patch('random.getrandbits', return_value=5317): # guaranteed to be random @@ -596,7 +597,7 @@ def test_uuid1_time(self): self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp', None), \ + mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ mock.patch('time.time_ns', return_value=1545052026752910643): u = self.uuid.uuid1(node=93328246233727, clock_seq=5317) self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) @@ -681,6 +682,176 @@ def test_uuid5(self): equal(u, self.uuid.UUID(v)) equal(str(u), v) + def test_uuid7(self): + equal = self.assertEqual + u = self.uuid.uuid7() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + for _ in range(100): + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(30) + counter = (counter_hi << 30) | counter_lo + + tail = random.getrandbits(32) + # effective number of bits is 32 + 30 + 11 = 73 + random_bits = counter << 32 | tail + + # set all remaining MSB of fake random bits to 1 to ensure that + # the implementation correctly remove them + random_bits = (((1 << 7) - 1) << 73) | random_bits + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.object(self.uuid, '_last_timestamp_v7', None), + mock.patch.object(self.uuid, '_last_counter_v7', 0), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + + unix_ts_ms = timestamp_ms & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + + equal((u.int >> 75) & 1, 0) # check that the MSB is 0 + equal((u.int >> 64) & 0xfff, counter_hi) + equal((u.int >> 32) & 0x3fffffff, counter_lo) + equal(u.int & 0xffffffff, tail) + + def test_uuid7_monotonicity(self): + equal = self.assertEqual + + us = [self.uuid.uuid7() for _ in range(10_000)] + equal(us, sorted(us)) + + with mock.patch.multiple(self.uuid, _last_timestamp_v7=0, _last_counter_v7=0): + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) # make sure that +1 does not overflow + counter = (counter_hi << 30) | counter_lo + + tail = random.getrandbits(32) + random_bits = counter << 32 | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u1 = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + equal((u1.int >> 64) & 0xfff, counter_hi) + equal((u1.int >> 32) & 0x3fffffff, counter_lo) + equal(u1.int & 0xffffffff, tail) + + # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same prec) + next_timestamp_ns = 1672533296_123_457_032 + next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + equal(timestamp_ms, next_timestamp_ms) + + next_tail_bytes = os.urandom(4) + next_fail = int.from_bytes(next_tail_bytes) + + with ( + mock.patch('time.time_ns', return_value=next_timestamp_ns), + mock.patch('os.urandom', return_value=next_tail_bytes) as urand + ): + u2 = self.uuid.uuid7() + urand.assert_called_once_with(4) + # same milli-second + equal(self.uuid._last_timestamp_v7, timestamp_ms) + # counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u2.int >> 64) & 0xfff, counter_hi) + equal((u2.int >> 32) & 0x3fffffff, counter_lo + 1) + equal(u2.int & 0xffffffff, next_fail) + + self.assertLess(u1, u2) + + def test_uuid7_timestamp_backwards(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + fake_last_timestamp_v7 = timestamp_ms + 1 + + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) # make sure that +1 does not overflow + counter = (counter_hi << 30) | counter_lo + + tail_bytes = os.urandom(4) + tail = int.from_bytes(tail_bytes) + + with ( + mock.patch.object(self.uuid, '_last_timestamp_v7', fake_last_timestamp_v7), + mock.patch.object(self.uuid, '_last_counter_v7', counter), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=tail_bytes) as os_urandom_fake + ): + u = self.uuid.uuid7() + os_urandom_fake.assert_called_once_with(4) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1) + unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + # counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u.int >> 64) & 0xfff, counter_hi) + # counter advanced by 1 (constructed so that counter_hi is unchanged) + equal((u.int >> 32) & 0x3fffffff, counter_lo + 1) + equal(u.int & 0xffffffff, tail) + + def test_uuid7_overflow_counter(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + new_counter_hi = random.getrandbits(11) + new_counter_lo = random.getrandbits(30) + new_counter = (new_counter_hi << 30) | new_counter_lo + + tail = random.getrandbits(32) + random_bits = new_counter << 32 | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.object(self.uuid, '_last_timestamp_v7', timestamp_ms), + # same timestamp, but force an overflow on the counter + mock.patch.object(self.uuid, '_last_counter_v7', 0x3ffffffffff), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + # timestamp advanced due to overflow + equal(self.uuid._last_timestamp_v7, timestamp_ms + 1) + unix_ts_ms = (timestamp_ms + 1) & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + # counter overflow, so we picked a new one + equal(self.uuid._last_counter_v7, new_counter) + equal((u.int >> 64) & 0xfff, new_counter_hi) + equal((u.int >> 32) & 0x3fffffff, new_counter_lo) + equal(u.int & 0xffffffff, tail) + @support.requires_fork() def testIssue8621(self): # On at least some versions of OSX self.uuid.uuid4 generates From edc2caba5949a1018f606e2ea3921a79aadb895b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:27:29 +0200 Subject: [PATCH 03/48] blurb --- .../next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst new file mode 100644 index 00000000000000..f85e05622623c2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst @@ -0,0 +1,2 @@ +Add :func:`uuid.uuid7` for generating UUIDv7 objects as specified in +:rfc:`9562`. Patch by Bénédikt Tran. From c6d26b63610aaa1d77fa7b141fe43e08f57532e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:28:36 +0200 Subject: [PATCH 04/48] update CHANGELOG --- Doc/whatsnew/3.14.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9662044915b8ca..1e56fd076ba8cf 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -121,6 +121,14 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) +uuid +---- + +* Add support for UUID version 7 via :func:`uuid.uuid7` as specified + in :rfc:`9562`. + + (Contributed by Bénédikt Tran in :gh:`89083`.) + Optimizations ============= From 2ddb4b8fcd9b0d622b2cd1b65b1580c547600ce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:32:38 +0200 Subject: [PATCH 05/48] update RFC number --- Doc/library/uuid.rst | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 0f2d7820cb25c8..2cd3c842d5c5f8 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -1,8 +1,8 @@ -:mod:`!uuid` --- UUID objects according to :rfc:`4122` +:mod:`!uuid` --- UUID objects according to :rfc:`9562` ====================================================== .. module:: uuid - :synopsis: UUID objects (universally unique identifiers) according to RFC 4122 + :synopsis: UUID objects (universally unique identifiers) according to RFC 9562 .. moduleauthor:: Ka-Ping Yee .. sectionauthor:: George Yoshida @@ -12,7 +12,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`4122`. +generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`9562`. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -65,7 +65,7 @@ which relays any information about the UUID's safety, using this enumeration: Exactly one of *hex*, *bytes*, *bytes_le*, *fields*, or *int* must be given. The *version* argument is optional; if given, the resulting UUID will have its - variant and version number set according to :rfc:`4122`, overriding bits in the + variant and version number set according to :rfc:`9562`, overriding bits in the given *hex*, *bytes*, *bytes_le*, *fields*, or *int*. Comparison of UUID objects are made by way of comparing their @@ -137,7 +137,7 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.urn - The UUID as a URN as specified in :rfc:`4122`. + The UUID as a URN as specified in :rfc:`9562`. .. attribute:: UUID.variant @@ -168,7 +168,7 @@ The :mod:`uuid` module defines the following functions: runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with the multicast bit (least significant bit of the first octet) - set to 1 as recommended in :rfc:`4122`. "Hardware address" means the MAC + set to 1 as recommended in :rfc:`9562`. "Hardware address" means the MAC address of a network interface. On a machine with multiple network interfaces, universally administered MAC addresses (i.e. where the second least significant bit of the first octet is *unset*) will be preferred over @@ -252,7 +252,12 @@ of the :attr:`~UUID.variant` attribute: .. data:: RFC_4122 - Specifies the UUID layout given in :rfc:`4122`. + Specifies the UUID layout given in :rfc:`9562`. + + .. note:: + + For compatibility reasons, the content of the :data:`!RFC_4122` constant + is not updated to reflect the new RFC number. .. data:: RESERVED_MICROSOFT @@ -267,7 +272,7 @@ of the :attr:`~UUID.variant` attribute: .. seealso:: - :rfc:`4122` - A Universally Unique IDentifier (UUID) URN Namespace + :rfc:`9562` - A Universally Unique IDentifier (UUID) URN Namespace This specification defines a Uniform Resource Name namespace for UUIDs, the internal format of UUIDs, and methods of generating UUIDs. From bcd1417e8c8a1d23091930d6e5ca3190873d7191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:36:56 +0200 Subject: [PATCH 06/48] add TODO in the docs --- Doc/library/uuid.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 2cd3c842d5c5f8..9d71657b7670ee 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,7 +12,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`9562`. +generating version 1, 3, 4, 5, and 7 UUIDs as specified in :rfc:`9562`. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -149,7 +149,7 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.version - The UUID version number (1 through 5, meaningful only when the variant is + The UUID version number (1 through 7, meaningful only when the variant is :const:`RFC_4122`). .. attribute:: UUID.is_safe @@ -216,6 +216,14 @@ The :mod:`uuid` module defines the following functions: .. index:: single: uuid5 + +.. function:: uuid7() + + TODO + +.. index:: single: uuid7 + + The :mod:`uuid` module defines the following namespace identifiers for use with :func:`uuid3` or :func:`uuid5`. From c3d474519e63ad34777e3f3f26b9793bc0fececf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:18:57 +0200 Subject: [PATCH 07/48] add UUIDv8 implementation --- Lib/uuid.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 4d4f06cfc9ebbe..2ff64fc39fb83e 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,9 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5 -UUIDs as specified in RFC 4122. +uuid1(), uuid3(), uuid4(), uuid5(), and uuid8() for generating version 1, 3, +4, 5, and 8 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still +referred to as RFC 4122 for compatibility purposes). If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -129,7 +130,7 @@ class UUID: variant the UUID variant (one of the constants RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) - version the UUID version number (1 through 5, meaningful only + version the UUID version number (1 through 8, meaningful only when the variant is RFC_4122) is_safe An enum indicating whether the UUID has been generated in @@ -214,7 +215,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if not 0 <= int < 1<<128: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: - if not 1 <= version <= 5: + if not 1 <= version <= 8: raise ValueError('illegal version number') # Set the variant to RFC 4122. int &= ~(0xc000 << 48) @@ -719,6 +720,27 @@ def uuid5(namespace, name): hash = sha1(namespace.bytes + name).digest() return UUID(bytes=hash[:16], version=5) +def uuid8(a=None, b=None, c=None): + """Generate a UUID from three custom blocks. + 'a' is the first 48-bit chunk of the UUID (octets 0-5); + 'b' is the mid 12-bit chunk (octets 6-7); + 'c' is the last 62-bit chunk (octets 8-15). + When a value is not specified, a random value is generated. + """ + if a is None: + import random + a = random.getrandbits(48) + if b is None: + import random + b = random.getrandbits(12) + if c is None: + import random + c = random.getrandbits(62) + + int_uuid_8 = (a & 0xffffffffffff) << 80 + int_uuid_8 |= (b & 0xfff) << 64 + int_uuid_8 |= c & 0x3fffffffffffffff + return UUID(int=int_uuid_8, version=8) def main(): """Run the uuid command line interface.""" @@ -726,7 +748,8 @@ def main(): "uuid1": uuid1, "uuid3": uuid3, "uuid4": uuid4, - "uuid5": uuid5 + "uuid5": uuid5, + "uuid8": uuid8, } uuid_namespace_funcs = ("uuid3", "uuid5") namespaces = { From 392d289f549d6412dd9e9cef009edc37fcd4f334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:19:01 +0200 Subject: [PATCH 08/48] add tests --- Lib/test/test_uuid.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index e177464c00f7a6..f89f14b82a0fe9 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -8,8 +8,10 @@ import io import os import pickle +import random import sys import weakref +from itertools import product from unittest import mock py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) @@ -267,7 +269,7 @@ def test_exceptions(self): # Version number out of range. badvalue(lambda: self.uuid.UUID('00'*16, version=0)) - badvalue(lambda: self.uuid.UUID('00'*16, version=6)) + badvalue(lambda: self.uuid.UUID('00'*16, version=42)) # Integer value out of range. badvalue(lambda: self.uuid.UUID(int=-1)) @@ -681,6 +683,29 @@ def test_uuid5(self): equal(u, self.uuid.UUID(v)) equal(str(u), v) + def test_uuid8(self): + equal = self.assertEqual + u = self.uuid.uuid8() + + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 8) + + for (_, hi, mid, lo) in product( + range(10), # repeat 10 times + [None, 0, random.getrandbits(48)], + [None, 0, random.getrandbits(12)], + [None, 0, random.getrandbits(62)], + ): + u = self.uuid.uuid8(hi, mid, lo) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 8) + if hi is not None: + equal((u.int >> 80) & 0xffffffffffff, hi) + if mid is not None: + equal((u.int >> 64) & 0xfff, mid) + if lo is not None: + equal(u.int & 0x3fffffffffffffff, lo) + @support.requires_fork() def testIssue8621(self): # On at least some versions of OSX self.uuid.uuid4 generates From 26889ea442cf421ac383cb970ff88c8b3a566e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:19:10 +0200 Subject: [PATCH 09/48] blurb --- .../next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst diff --git a/Misc/NEWS.d/next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst b/Misc/NEWS.d/next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst new file mode 100644 index 00000000000000..d37d585d51b490 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst @@ -0,0 +1,2 @@ +Add :func:`uuid.uuid8` for generating UUIDv8 objects as specified in +:rfc:`9562`. Patch by Bénédikt Tran From 44b66e6c82a4d1aefdcf1a6cbb3ffe02d53596d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:19:17 +0200 Subject: [PATCH 10/48] add What's New entry --- Doc/whatsnew/3.14.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index a34dc639ad2a94..7730dc528c59d3 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -199,6 +199,14 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) +uuid +---- + +* Add support for UUID version 8 via :func:`uuid.uuid8` as specified + in :rfc:`9562`. + + (Contributed by Bénédikt Tran in :gh:`89083`.) + .. Add improved modules above alphabetically, not here at the end. Optimizations From 7be6dc4b402b3d0e68b3ba3eb44247e9aca2d216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:19:19 +0200 Subject: [PATCH 11/48] add docs --- Doc/library/uuid.rst | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 0f2d7820cb25c8..f4b1a1e734ebc5 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,7 +12,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`4122`. +generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`4122`. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -149,9 +149,13 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.version - The UUID version number (1 through 5, meaningful only when the variant is + The UUID version number (1 through 8, meaningful only when the variant is :const:`RFC_4122`). + .. versionchanged:: 3.14 + Added UUID version 8. + + .. attribute:: UUID.is_safe An enumeration of :class:`SafeUUID` which indicates whether the platform @@ -216,6 +220,16 @@ The :mod:`uuid` module defines the following functions: .. index:: single: uuid5 + +.. function:: uuid8(a=None, b=None, c=None) + + TODO + + .. versionadded:: 3.14 + +.. index:: single: uuid8 + + The :mod:`uuid` module defines the following namespace identifiers for use with :func:`uuid3` or :func:`uuid5`. From 8ba3d8b7d781e6a6e74f2b7563044bab7e46d90b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:58:38 +0200 Subject: [PATCH 12/48] Improve hexadecimal masks reading --- Lib/uuid.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 2ff64fc39fb83e..fac7e32deb6275 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -722,10 +722,12 @@ def uuid5(namespace, name): def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. - 'a' is the first 48-bit chunk of the UUID (octets 0-5); - 'b' is the mid 12-bit chunk (octets 6-7); - 'c' is the last 62-bit chunk (octets 8-15). - When a value is not specified, a random value is generated. + + * 'a' is the first 48-bit chunk of the UUID (octets 0-5); + * 'b' is the mid 12-bit chunk (octets 6-7); + * 'c' is the last 62-bit chunk (octets 8-15). + + When a value is not specified, a pseudo-random value is generated. """ if a is None: import random @@ -736,10 +738,9 @@ def uuid8(a=None, b=None, c=None): if c is None: import random c = random.getrandbits(62) - - int_uuid_8 = (a & 0xffffffffffff) << 80 + int_uuid_8 = (a & 0xffff_ffff_ffff) << 80 int_uuid_8 |= (b & 0xfff) << 64 - int_uuid_8 |= c & 0x3fffffffffffffff + int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff return UUID(int=int_uuid_8, version=8) def main(): From a14ae9bf5e51ef0bd3bc1bfd068fab5921181e1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:19:49 +0200 Subject: [PATCH 13/48] add uniqueness test --- Lib/test/test_uuid.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index f89f14b82a0fe9..39f65e2847e0ec 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -706,6 +706,14 @@ def test_uuid8(self): if lo is not None: equal(u.int & 0x3fffffffffffffff, lo) + def test_uuid8_uniqueness(self): + """Test that UUIDv8-generated values are unique (up to a negligible + probability of failure).""" + u1 = self.uuid.uuid8() + u2 = self.uuid.uuid8() + self.assertNotEqual(u1.int, u2.int) + self.assertEqual(u1.version, u2.version) + @support.requires_fork() def testIssue8621(self): # On at least some versions of OSX self.uuid.uuid4 generates From 7a169c96dc1c3a16d66d2856a144bacc1c1ebf0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:27:47 +0200 Subject: [PATCH 14/48] Update mentions to RFC 4122 to RFC 4122/9562 when possible. --- Doc/library/uuid.rst | 18 ++++++++++-------- Lib/uuid.py | 11 +++++------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index f4b1a1e734ebc5..111a313bf3bdec 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -1,8 +1,8 @@ -:mod:`!uuid` --- UUID objects according to :rfc:`4122` +:mod:`!uuid` --- UUID objects according to :rfc:`9562` ====================================================== .. module:: uuid - :synopsis: UUID objects (universally unique identifiers) according to RFC 4122 + :synopsis: UUID objects (universally unique identifiers) according to RFC 9562 .. moduleauthor:: Ka-Ping Yee .. sectionauthor:: George Yoshida @@ -12,7 +12,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`4122`. +generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562`. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -65,7 +65,7 @@ which relays any information about the UUID's safety, using this enumeration: Exactly one of *hex*, *bytes*, *bytes_le*, *fields*, or *int* must be given. The *version* argument is optional; if given, the resulting UUID will have its - variant and version number set according to :rfc:`4122`, overriding bits in the + variant and version number set according to :rfc:`9562`, overriding bits in the given *hex*, *bytes*, *bytes_le*, *fields*, or *int*. Comparison of UUID objects are made by way of comparing their @@ -137,7 +137,7 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.urn - The UUID as a URN as specified in :rfc:`4122`. + The UUID as a URN as specified in :rfc:`9562`. .. attribute:: UUID.variant @@ -172,7 +172,7 @@ The :mod:`uuid` module defines the following functions: runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with the multicast bit (least significant bit of the first octet) - set to 1 as recommended in :rfc:`4122`. "Hardware address" means the MAC + set to 1 as recommended in :rfc:`9562`. "Hardware address" means the MAC address of a network interface. On a machine with multiple network interfaces, universally administered MAC addresses (i.e. where the second least significant bit of the first octet is *unset*) will be preferred over @@ -266,7 +266,9 @@ of the :attr:`~UUID.variant` attribute: .. data:: RFC_4122 - Specifies the UUID layout given in :rfc:`4122`. + Specifies the UUID layout given in :rfc:`4122`. This constant is kept + for backward compatibility even though :rfc:`4122` has been superseeded + by :rfc:`9562`. .. data:: RESERVED_MICROSOFT @@ -281,7 +283,7 @@ of the :attr:`~UUID.variant` attribute: .. seealso:: - :rfc:`4122` - A Universally Unique IDentifier (UUID) URN Namespace + :rfc:`9562` - A Universally Unique IDentifier (UUID) URN Namespace This specification defines a Uniform Resource Name namespace for UUIDs, the internal format of UUIDs, and methods of generating UUIDs. diff --git a/Lib/uuid.py b/Lib/uuid.py index fac7e32deb6275..9c6ad9643cf6d5 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,9 +1,8 @@ -r"""UUID objects (universally unique identifiers) according to RFC 4122. +r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. This module provides immutable UUID objects (class UUID) and the functions uuid1(), uuid3(), uuid4(), uuid5(), and uuid8() for generating version 1, 3, -4, 5, and 8 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still -referred to as RFC 4122 for compatibility purposes). +4, 5, and 8 UUIDs as specified in RFC 4122/9562. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -125,7 +124,7 @@ class UUID: int the UUID as a 128-bit integer - urn the UUID as a URN as specified in RFC 4122 + urn the UUID as a URN as specified in RFC 4122/9562 variant the UUID variant (one of the constants RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) @@ -217,7 +216,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if version is not None: if not 1 <= version <= 8: raise ValueError('illegal version number') - # Set the variant to RFC 4122. + # Set the variant to RFC 4122/9562. int &= ~(0xc000 << 48) int |= 0x8000 << 48 # Set the version number. @@ -356,7 +355,7 @@ def variant(self): @property def version(self): - # The version bits are only meaningful for RFC 4122 UUIDs. + # The version bits are only meaningful for RFC 4122/9562 UUIDs. if self.variant == RFC_4122: return int((self.int >> 76) & 0xf) From b082c9085dd65051add3536ffc3abcd82ec9717f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:41:15 +0200 Subject: [PATCH 15/48] Update docs --- Doc/library/uuid.rst | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 111a313bf3bdec..11b15294535f50 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,7 +12,8 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562`. +generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562` (which +superseeds :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -172,7 +173,7 @@ The :mod:`uuid` module defines the following functions: runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with the multicast bit (least significant bit of the first octet) - set to 1 as recommended in :rfc:`9562`. "Hardware address" means the MAC + set to 1 as recommended in :rfc:`4122`. "Hardware address" means the MAC address of a network interface. On a machine with multiple network interfaces, universally administered MAC addresses (i.e. where the second least significant bit of the first octet is *unset*) will be preferred over @@ -223,7 +224,14 @@ The :mod:`uuid` module defines the following functions: .. function:: uuid8(a=None, b=None, c=None) - TODO + Generate a pseudo-random UUID according to + :rfc:`RFC 9562, §5.8 <9562#section-5.8>`. + + When specified, the parameters *a*, *b* and *c* are expected to be + positive integers of 48, 12 and 62 bits respectively. If they exceed + their expected bit count, only their least significant bits are kept; + non-specified arguments are substituted for a pseudo-random integer of + appropriate size. .. versionadded:: 3.14 @@ -299,7 +307,7 @@ The :mod:`uuid` module can be executed as a script from the command line. .. code-block:: sh - python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5}] [-n NAMESPACE] [-N NAME] + python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid8}] [-n NAMESPACE] [-N NAME] The following options are accepted: From 5e97cc32343023f6c415ee042d00d3eeaef4913d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:44:00 +0100 Subject: [PATCH 16/48] Apply suggestions from code review Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/uuid.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 11b15294535f50..658e8491f56bf1 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -13,7 +13,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562` (which -superseeds :rfc:`4122`). +supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -275,7 +275,7 @@ of the :attr:`~UUID.variant` attribute: .. data:: RFC_4122 Specifies the UUID layout given in :rfc:`4122`. This constant is kept - for backward compatibility even though :rfc:`4122` has been superseeded + for backward compatibility even though :rfc:`4122` has been superseded by :rfc:`9562`. From 051f34e734bbbfa41563dce8129eb31d9ada329e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:44:51 +0100 Subject: [PATCH 17/48] Update Lib/test/test_uuid.py --- Lib/test/test_uuid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 39f65e2847e0ec..7bd26a8ca34b62 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -707,8 +707,8 @@ def test_uuid8(self): equal(u.int & 0x3fffffffffffffff, lo) def test_uuid8_uniqueness(self): - """Test that UUIDv8-generated values are unique (up to a negligible - probability of failure).""" + # Test that UUIDv8-generated values are unique + # (up to a negligible probability of failure). u1 = self.uuid.uuid8() u2 = self.uuid.uuid8() self.assertNotEqual(u1.int, u2.int) From bdf9a77e7eebf4d33d7bd9d9480c9784907fcff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:46:32 +0100 Subject: [PATCH 18/48] Apply suggestions from code review --- Doc/library/uuid.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 658e8491f56bf1..6166c22caedf81 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -153,7 +153,7 @@ which relays any information about the UUID's safety, using this enumeration: The UUID version number (1 through 8, meaningful only when the variant is :const:`RFC_4122`). - .. versionchanged:: 3.14 + .. versionchanged:: next Added UUID version 8. @@ -233,7 +233,7 @@ The :mod:`uuid` module defines the following functions: non-specified arguments are substituted for a pseudo-random integer of appropriate size. - .. versionadded:: 3.14 + .. versionadded:: next .. index:: single: uuid8 From 2e390727c00da45cdc102814d805c88c17877dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 09:56:33 +0100 Subject: [PATCH 19/48] update CLI --- Doc/library/uuid.rst | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 97896f0ed8ea52..33dbca5231ac9e 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,12 +12,8 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -<<<<<<< HEAD -generating version 1, 3, 4, 5, and 7 UUIDs as specified in :rfc:`9562`. -======= -generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562` (which -supersedes :rfc:`4122`). ->>>>>>> origin/uuid-v8-89083 +generating version 1, 3, 4, 5, 7, and 8 UUIDs as specified in :rfc:`9562` +(which supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -154,13 +150,12 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.version -<<<<<<< HEAD - The UUID version number (1 through 7, meaningful only when the variant is -======= The UUID version number (1 through 8, meaningful only when the variant is ->>>>>>> origin/uuid-v8-89083 :const:`RFC_4122`). + .. versionchanged:: next + Added UUID version 7. + .. versionchanged:: next Added UUID version 8. @@ -322,7 +317,7 @@ The :mod:`uuid` module can be executed as a script from the command line. .. code-block:: sh - python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid8}] [-n NAMESPACE] [-N NAME] + python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid7,uuid8}] [-n NAMESPACE] [-N NAME] The following options are accepted: @@ -338,6 +333,9 @@ The following options are accepted: Specify the function name to use to generate the uuid. By default :func:`uuid4` is used. + .. versionchanged:: next + Expose UUID version 7 via ``uuid7``. + .. option:: -n --namespace From 694e07fdc59e84dec4768721974cf52bb820b389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:02:44 +0100 Subject: [PATCH 20/48] post-merge --- Doc/library/uuid.rst | 13 ++++++++----- Doc/whatsnew/3.14.rst | 1 - Lib/uuid.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 33dbca5231ac9e..2ace429bab8533 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -11,9 +11,9 @@ -------------- This module provides immutable :class:`UUID` objects (the :class:`UUID` class) -and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, 5, 7, and 8 UUIDs as specified in :rfc:`9562` -(which supersedes :rfc:`4122`). +and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`, +:func:`uuid7`, and :func:`uuid8` for generating version 1, 3, 4, 5, 7, and 8 +UUIDS as specified in :rfc:`9562` (which supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -176,7 +176,7 @@ The :mod:`uuid` module defines the following functions: runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with the multicast bit (least significant bit of the first octet) - set to 1 as recommended in :rfc:`9562`. "Hardware address" means the MAC + set to 1 as recommended in :rfc:`4122`. "Hardware address" means the MAC address of a network interface. On a machine with multiple network interfaces, universally administered MAC addresses (i.e. where the second least significant bit of the first octet is *unset*) will be preferred over @@ -334,7 +334,10 @@ The following options are accepted: is used. .. versionchanged:: next - Expose UUID version 7 via ``uuid7``. + Allow generating UUID version 7. + + .. versionchanged:: next + Allow generating UUID version 8. .. option:: -n --namespace diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7253e65267a6ff..a5251c1acd0bf9 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -522,7 +522,6 @@ uuid * Add support for UUID version 7 via :func:`uuid.uuid7` as specified in :rfc:`9562`. - (Contributed by Bénédikt Tran in :gh:`89083`.) * Add support for UUID version 8 via :func:`uuid.uuid8` as specified diff --git a/Lib/uuid.py b/Lib/uuid.py index e0bc394b00ce6a..b36a29dd75c2e3 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,8 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5(), and uuid7() for generating version 1, 3, -4, 5, 7, and 8 UUIDs as specified in RFC 4122/9562. +uuid1(), uuid3(), uuid4(), uuid5(), uuid7(), and uuid8() for generating +version 1, 3, 4, 5, 7, and 8 UUIDs as specified in RFC 4122/9562. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing From 7ff4368afb66179a5195c5b7eb7130148540cc8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:13:41 +0100 Subject: [PATCH 21/48] improve readability --- Lib/uuid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index b36a29dd75c2e3..e8a8c51cb06e6c 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -743,9 +743,9 @@ def uuid7(): def get_counter_and_tail(): rand = int.from_bytes(os.urandom(10)) # 42-bit counter with MSB set to 0 - counter = (rand >> 32) & 0x1ffffffffff + counter = (rand >> 32) & 0x1ff_ffff_ffff # 32-bit random data - tail = rand & 0xffffffff + tail = rand & 0xffff_ffff return counter, tail global _last_timestamp_v7 @@ -762,7 +762,7 @@ def get_counter_and_tail(): timestamp_ms = _last_timestamp_v7 + 1 # advance the counter counter = _last_counter_v7 + 1 - if counter > 0x3ffffffffff: + if counter > 0x3fff_ffff: timestamp_ms += 1 # advance the timestamp counter, tail = get_counter_and_tail() else: @@ -771,10 +771,10 @@ def get_counter_and_tail(): _last_timestamp_v7 = timestamp_ms _last_counter_v7 = counter - int_uuid_7 = (timestamp_ms & 0xffffffffffff) << 80 + int_uuid_7 = (timestamp_ms & 0xffff_ffff_ffff) << 80 int_uuid_7 |= ((counter >> 30) & 0xfff) << 64 - int_uuid_7 |= (counter & 0x3fffffff) << 32 - int_uuid_7 |= tail & 0xffffffff + int_uuid_7 |= (counter & 0x3fff_ffff) << 32 + int_uuid_7 |= tail & 0xffff_ffff return UUID(int=int_uuid_7, version=7) def uuid8(a=None, b=None, c=None): From 7c3cab646acf1fd7842a9ee81f7b4feb9de77a25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:33:45 +0100 Subject: [PATCH 22/48] post-merge --- Lib/uuid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index e8a8c51cb06e6c..f0d9b39c43b7ba 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -760,10 +760,10 @@ def get_counter_and_tail(): else: if timestamp_ms < _last_timestamp_v7: timestamp_ms = _last_timestamp_v7 + 1 - # advance the counter + # advance the 42-bit counter counter = _last_counter_v7 + 1 - if counter > 0x3fff_ffff: - timestamp_ms += 1 # advance the timestamp + if counter > 0x3ff_ffff_ffff: + timestamp_ms += 1 # advance the 48-bit timestamp counter, tail = get_counter_and_tail() else: tail = int.from_bytes(os.urandom(4)) From e75874122f5ea2f86e071df7eb4348f84cad4a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:33:53 +0100 Subject: [PATCH 23/48] uniqueness test --- Lib/test/test_uuid.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index ad157fa720e358..8f4fb080fbed68 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -730,6 +730,18 @@ def test_uuid7(self): equal((u.int >> 32) & 0x3fffffff, counter_lo) equal(u.int & 0xffffffff, tail) + def test_uuid7_uniqueness(self): + # Test that UUIDv7-generated values are unique. + # + # While UUIDv8 has an entropy of 122 bits, those 122 bits may not + # necessarily be sampled from a PRNG. On the other hand, UUIDv7 + # uses os.urandom() as a PRNG which features better randomness. + # + # Until reaching UNIX_EPOCH + 10'000 years, the probability for + # generating two identical UUIDs is negligilbe. + uuids = {self.uuid.uuid7() for _ in range(1000)} + self.assertEqual(len(uuids), 1000) + def test_uuid7_monotonicity(self): equal = self.assertEqual From c18d0c4629d7661795d2e0ac1b5a74e519dfa6b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:42:10 +0100 Subject: [PATCH 24/48] improve test comments --- Lib/test/test_uuid.py | 83 +++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 8f4fb080fbed68..0997e16f0ba41f 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -709,8 +709,11 @@ def test_uuid7(self): random_data = random_bits.to_bytes(10) with ( - mock.patch.object(self.uuid, '_last_timestamp_v7', None), - mock.patch.object(self.uuid, '_last_counter_v7', 0), + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=None, + _last_counter_v7=0, + ), mock.patch('time.time_ns', return_value=timestamp_ns), mock.patch('os.urandom', return_value=random_data) as urand ): @@ -722,13 +725,13 @@ def test_uuid7(self): equal(self.uuid._last_timestamp_v7, timestamp_ms) equal(self.uuid._last_counter_v7, counter) - unix_ts_ms = timestamp_ms & 0xffffffffffff - equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) - equal((u.int >> 75) & 1, 0) # check that the MSB is 0 + equal((u.int >> 75) & 1, 0) # check that the MSB is 0 equal((u.int >> 64) & 0xfff, counter_hi) - equal((u.int >> 32) & 0x3fffffff, counter_lo) - equal(u.int & 0xffffffff, tail) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u.int & 0xffff_ffff, tail) def test_uuid7_uniqueness(self): # Test that UUIDv7-generated values are unique. @@ -748,13 +751,18 @@ def test_uuid7_monotonicity(self): us = [self.uuid.uuid7() for _ in range(10_000)] equal(us, sorted(us)) - with mock.patch.multiple(self.uuid, _last_timestamp_v7=0, _last_counter_v7=0): + with mock.patch.multiple( + self.uuid, + _last_timestamp_v7=0, + _last_counter_v7=0 + ): # 1 Jan 2023 12:34:56.123_456_789 timestamp_ns = 1672533296_123_456_789 # ns precision timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow counter_hi = random.getrandbits(11) - counter_lo = random.getrandbits(29) # make sure that +1 does not overflow + counter_lo = random.getrandbits(29) counter = (counter_hi << 30) | counter_lo tail = random.getrandbits(32) @@ -770,10 +778,10 @@ def test_uuid7_monotonicity(self): equal(self.uuid._last_timestamp_v7, timestamp_ms) equal(self.uuid._last_counter_v7, counter) equal((u1.int >> 64) & 0xfff, counter_hi) - equal((u1.int >> 32) & 0x3fffffff, counter_lo) - equal(u1.int & 0xffffffff, tail) + equal((u1.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u1.int & 0xffff_ffff, tail) - # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same prec) + # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same ns) next_timestamp_ns = 1672533296_123_457_032 next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) equal(timestamp_ms, next_timestamp_ms) @@ -789,11 +797,11 @@ def test_uuid7_monotonicity(self): urand.assert_called_once_with(4) # same milli-second equal(self.uuid._last_timestamp_v7, timestamp_ms) - # counter advanced by 1 + # 42-bit counter advanced by 1 equal(self.uuid._last_counter_v7, counter + 1) equal((u2.int >> 64) & 0xfff, counter_hi) - equal((u2.int >> 32) & 0x3fffffff, counter_lo + 1) - equal(u2.int & 0xffffffff, next_fail) + equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u2.int & 0xffff_ffff, next_fail) self.assertLess(u1, u2) @@ -804,32 +812,36 @@ def test_uuid7_timestamp_backwards(self): timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) fake_last_timestamp_v7 = timestamp_ms + 1 + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow counter_hi = random.getrandbits(11) - counter_lo = random.getrandbits(29) # make sure that +1 does not overflow + counter_lo = random.getrandbits(29) counter = (counter_hi << 30) | counter_lo tail_bytes = os.urandom(4) tail = int.from_bytes(tail_bytes) with ( - mock.patch.object(self.uuid, '_last_timestamp_v7', fake_last_timestamp_v7), - mock.patch.object(self.uuid, '_last_counter_v7', counter), + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=fake_last_timestamp_v7, + _last_counter_v7=counter, + ), mock.patch('time.time_ns', return_value=timestamp_ns), - mock.patch('os.urandom', return_value=tail_bytes) as os_urandom_fake + mock.patch('os.urandom', return_value=tail_bytes) as urand ): u = self.uuid.uuid7() - os_urandom_fake.assert_called_once_with(4) + urand.assert_called_once_with(4) equal(u.variant, self.uuid.RFC_4122) equal(u.version, 7) equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1) - unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffffffffffff - equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) - # counter advanced by 1 + unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # 42-bit counter advanced by 1 equal(self.uuid._last_counter_v7, counter + 1) equal((u.int >> 64) & 0xfff, counter_hi) - # counter advanced by 1 (constructed so that counter_hi is unchanged) - equal((u.int >> 32) & 0x3fffffff, counter_lo + 1) - equal(u.int & 0xffffffff, tail) + # 42-bit counter advanced by 1 (counter_hi is untouched) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u.int & 0xffff_ffff, tail) def test_uuid7_overflow_counter(self): equal = self.assertEqual @@ -846,9 +858,12 @@ def test_uuid7_overflow_counter(self): random_data = random_bits.to_bytes(10) with ( - mock.patch.object(self.uuid, '_last_timestamp_v7', timestamp_ms), - # same timestamp, but force an overflow on the counter - mock.patch.object(self.uuid, '_last_counter_v7', 0x3ffffffffff), + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=timestamp_ms, + # same timestamp, but force an overflow on the counter + _last_counter_v7=0x3ff_ffff_ffff, + ), mock.patch('time.time_ns', return_value=timestamp_ns), mock.patch('os.urandom', return_value=random_data) as urand ): @@ -858,13 +873,13 @@ def test_uuid7_overflow_counter(self): equal(u.version, 7) # timestamp advanced due to overflow equal(self.uuid._last_timestamp_v7, timestamp_ms + 1) - unix_ts_ms = (timestamp_ms + 1) & 0xffffffffffff - equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) - # counter overflow, so we picked a new one + unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # counter overflowed, so we picked a new one equal(self.uuid._last_counter_v7, new_counter) equal((u.int >> 64) & 0xfff, new_counter_hi) - equal((u.int >> 32) & 0x3fffffff, new_counter_lo) - equal(u.int & 0xffffffff, tail) + equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo) + equal(u.int & 0xffff_ffff, tail) def test_uuid8(self): equal = self.assertEqual From 6fcb6a10f64ba0b1c20a6f7cba716147e806af3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 09:49:06 +0100 Subject: [PATCH 25/48] fix lint --- Lib/test/test_uuid.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 0997e16f0ba41f..520d187ce991c9 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1,7 +1,3 @@ -import random -import unittest -from test import support -from test.support import import_helper import builtins import contextlib import copy @@ -12,9 +8,13 @@ import random import sys import weakref +import unittest from itertools import product from unittest import mock +from test import support +from test.support import import_helper + py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) c_uuid = import_helper.import_fresh_module('uuid', fresh=['_uuid']) From be3f0243069d38cc62354902934e4d3dd33e058a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 09:51:43 +0100 Subject: [PATCH 26/48] post-merge --- Lib/test/test_uuid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 520d187ce991c9..f3dae65c0007be 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -7,8 +7,8 @@ import pickle import random import sys -import weakref import unittest +import weakref from itertools import product from unittest import mock From 06befcaf8fe4f7cbffb6bb59c75939bf8cadf8ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:30:59 +0100 Subject: [PATCH 27/48] use versionchanged instead of versionadded --- Doc/library/uuid.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index dfc664efbfe27b..bba346b1e45f58 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -333,7 +333,7 @@ The following options are accepted: Specify the function name to use to generate the uuid. By default :func:`uuid4` is used. - .. versionadded:: next + .. versionchanged:: next Allow generating UUID versions 7 and 8. .. option:: -n From aee2898bab2928f01df17e35246094bd10951c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 18:00:26 +0100 Subject: [PATCH 28/48] improve UUIDv7 tests readability --- Lib/test/test_uuid.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index f3dae65c0007be..6361818f9cf669 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -704,7 +704,7 @@ def test_uuid7(self): random_bits = counter << 32 | tail # set all remaining MSB of fake random bits to 1 to ensure that - # the implementation correctly remove them + # the implementation correctly removes them random_bits = (((1 << 7) - 1) << 73) | random_bits random_data = random_bits.to_bytes(10) @@ -754,7 +754,7 @@ def test_uuid7_monotonicity(self): with mock.patch.multiple( self.uuid, _last_timestamp_v7=0, - _last_counter_v7=0 + _last_counter_v7=0, ): # 1 Jan 2023 12:34:56.123_456_789 timestamp_ns = 1672533296_123_456_789 # ns precision @@ -764,6 +764,7 @@ def test_uuid7_monotonicity(self): counter_hi = random.getrandbits(11) counter_lo = random.getrandbits(29) counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) tail = random.getrandbits(32) random_bits = counter << 32 | tail @@ -816,6 +817,7 @@ def test_uuid7_timestamp_backwards(self): counter_hi = random.getrandbits(11) counter_lo = random.getrandbits(29) counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) tail_bytes = os.urandom(4) tail = int.from_bytes(tail_bytes) @@ -854,7 +856,7 @@ def test_uuid7_overflow_counter(self): new_counter = (new_counter_hi << 30) | new_counter_lo tail = random.getrandbits(32) - random_bits = new_counter << 32 | tail + random_bits = (new_counter << 32) | tail random_data = random_bits.to_bytes(10) with ( From 1a5ac19825deca8f81ac43aca20ceb3bce1af207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 18:14:48 +0100 Subject: [PATCH 29/48] improve UUIDv7 uniqueness tests --- Lib/test/test_uuid.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 6361818f9cf669..ca84895be95f75 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -741,9 +741,13 @@ def test_uuid7_uniqueness(self): # uses os.urandom() as a PRNG which features better randomness. # # Until reaching UNIX_EPOCH + 10'000 years, the probability for - # generating two identical UUIDs is negligilbe. - uuids = {self.uuid.uuid7() for _ in range(1000)} - self.assertEqual(len(uuids), 1000) + # generating two identical UUIDs is negligible. + N = 1000 + uuids = {self.uuid.uuid7() for _ in range(N)} + self.assertEqual(len(uuids), N) + + versions = {u.version for u in uuids} + self.assertSetEqual(versions, {7}) def test_uuid7_monotonicity(self): equal = self.assertEqual From ef85b200602ab00c23ef158813fa57076f561cfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 20 Jan 2025 13:03:00 +0100 Subject: [PATCH 30/48] use `UUID._from_int` for UUIDv7 and remove `divmod` usage --- Lib/uuid.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 1e2c39f2174ea9..948b1248e0b690 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -93,6 +93,7 @@ class SafeUUID: _RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_7_FLAGS = ((7 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48)) @@ -782,7 +783,7 @@ def get_counter_and_tail(): import time nanoseconds = time.time_ns() - timestamp_ms, _ = divmod(nanoseconds, 1_000_000) + timestamp_ms = nanoseconds // 1_000_000 if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: counter, tail = get_counter_and_tail() @@ -800,11 +801,18 @@ def get_counter_and_tail(): _last_timestamp_v7 = timestamp_ms _last_counter_v7 = counter - int_uuid_7 = (timestamp_ms & 0xffff_ffff_ffff) << 80 - int_uuid_7 |= ((counter >> 30) & 0xfff) << 64 - int_uuid_7 |= (counter & 0x3fff_ffff) << 32 + unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff + counter_msbs = counter >> 30 + counter_hi = counter_msbs & 0x0fff # keep 12 bits and clear variant bits + counter_lo = counter & 0x3fff_ffff # keep 30 bits and clear version bits + + int_uuid_7 = unix_ts_ms << 80 + int_uuid_7 |= counter_hi << 64 + int_uuid_7 |= counter_lo << 32 int_uuid_7 |= tail & 0xffff_ffff - return UUID(int=int_uuid_7, version=7) + # by construction, the variant and version bits are already cleared + int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS + return UUID._from_int(int_uuid_7) def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. From 571d2fec9fb1ea2ec3cdbffe73f49e5d5924bef8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 23 Feb 2025 12:17:11 +0100 Subject: [PATCH 31/48] backport Victor's review on UUIDv6 --- Lib/test/test_uuid.py | 4 ++-- Lib/uuid.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 3ebbe24d292294..8a0a11e8b2a73f 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -632,7 +632,7 @@ def test_uuid1_bogus_return_value(self): def test_uuid1_time(self): with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ + mock.patch.object(self.uuid, '_last_timestamp', None), \ mock.patch.object(self.uuid, 'getnode', return_value=93328246233727), \ mock.patch('time.time_ns', return_value=1545052026752910643), \ mock.patch('random.getrandbits', return_value=5317): # guaranteed to be random @@ -640,7 +640,7 @@ def test_uuid1_time(self): self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ + mock.patch.object(self.uuid, '_last_timestamp', None), \ mock.patch('time.time_ns', return_value=1545052026752910643): u = self.uuid.uuid1(node=93328246233727, clock_seq=5317) self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) diff --git a/Lib/uuid.py b/Lib/uuid.py index 5c9ebefb688734..f36b9e1ddf3da2 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -688,7 +688,7 @@ def getnode(): assert False, '_random_getnode() returned invalid value: {}'.format(_node) -_last_timestamp_v1 = None +_last_timestamp = None def uuid1(node=None, clock_seq=None): """Generate a UUID from a host ID, sequence number, and the current time. @@ -706,15 +706,15 @@ def uuid1(node=None, clock_seq=None): is_safe = SafeUUID.unknown return UUID(bytes=uuid_time, is_safe=is_safe) - global _last_timestamp_v1 + global _last_timestamp import time nanoseconds = time.time_ns() # 0x01b21dd213814000 is the number of 100-ns intervals between the # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. timestamp = nanoseconds // 100 + 0x01b21dd213814000 - if _last_timestamp_v1 is not None and timestamp <= _last_timestamp_v1: - timestamp = _last_timestamp_v1 + 1 - _last_timestamp_v1 = timestamp + if _last_timestamp is not None and timestamp <= _last_timestamp: + timestamp = _last_timestamp + 1 + _last_timestamp = timestamp if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage From f9ac6583a68b45e75b32511ae3cab6f364ee8efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:37:43 +0100 Subject: [PATCH 32/48] address Victor's review --- Lib/uuid.py | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index f36b9e1ddf3da2..c73e06ee7712e3 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -760,6 +760,14 @@ def uuid5(namespace, name): _last_timestamp_v7 = None _last_counter_v7 = 0 # 42-bit counter +def _uuid7_get_counter_and_tail(): + rand = int.from_bytes(os.urandom(10)) + # 42-bit counter with MSB set to 0 + counter = (rand >> 32) & 0x1ff_ffff_ffff + # 32-bit random data + tail = rand & 0xffff_ffff + return counter, tail + def uuid7(): """Generate a UUID from a Unix timestamp in milliseconds and random bits. @@ -778,14 +786,6 @@ def uuid7(): # advanced and the counter is reset to a random 42-bit integer with MSB # set to 0. - def get_counter_and_tail(): - rand = int.from_bytes(os.urandom(10)) - # 42-bit counter with MSB set to 0 - counter = (rand >> 32) & 0x1ff_ffff_ffff - # 32-bit random data - tail = rand & 0xffff_ffff - return counter, tail - global _last_timestamp_v7 global _last_counter_v7 @@ -794,33 +794,40 @@ def get_counter_and_tail(): timestamp_ms = nanoseconds // 1_000_000 if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: - counter, tail = get_counter_and_tail() + counter, tail = _uuid7_get_counter_and_tail() else: if timestamp_ms < _last_timestamp_v7: timestamp_ms = _last_timestamp_v7 + 1 # advance the 42-bit counter counter = _last_counter_v7 + 1 if counter > 0x3ff_ffff_ffff: - timestamp_ms += 1 # advance the 48-bit timestamp - counter, tail = get_counter_and_tail() + # advance the 48-bit timestamp + timestamp_ms += 1 + counter, tail = _uuid7_get_counter_and_tail() else: + # 32-bit random data tail = int.from_bytes(os.urandom(4)) - _last_timestamp_v7 = timestamp_ms - _last_counter_v7 = counter - unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff counter_msbs = counter >> 30 - counter_hi = counter_msbs & 0x0fff # keep 12 bits and clear variant bits - counter_lo = counter & 0x3fff_ffff # keep 30 bits and clear version bits + # keep 12 counter's MSBs and clear variant bits + counter_hi = counter_msbs & 0x0fff + # keep 30 counter's LSBs and clear version bits + counter_lo = counter & 0x3fff_ffff + # ensure that the fail is always a 32-bit integer + tail &= 0xffff_ffff int_uuid_7 = unix_ts_ms << 80 int_uuid_7 |= counter_hi << 64 int_uuid_7 |= counter_lo << 32 - int_uuid_7 |= tail & 0xffff_ffff + int_uuid_7 |= tail # by construction, the variant and version bits are already cleared int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS - return UUID._from_int(int_uuid_7) + res = UUID._from_int(int_uuid_7) + # defer global update until all computations are done + _last_timestamp_v7 = timestamp_ms + _last_counter_v7 = counter + return res def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. From a756b9d836db3b41e182866ca84824bdddd70f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:38:35 +0100 Subject: [PATCH 33/48] remove mention of UNIX_EPOCH + 10k years as the proof is long --- Lib/test/test_uuid.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 8a0a11e8b2a73f..22a66457357d5c 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -780,9 +780,6 @@ def test_uuid7_uniqueness(self): # While UUIDv8 has an entropy of 122 bits, those 122 bits may not # necessarily be sampled from a PRNG. On the other hand, UUIDv7 # uses os.urandom() as a PRNG which features better randomness. - # - # Until reaching UNIX_EPOCH + 10'000 years, the probability for - # generating two identical UUIDs is negligible. N = 1000 uuids = {self.uuid.uuid7() for _ in range(N)} self.assertEqual(len(uuids), N) From 44067961384441e114aef7b4535faa21a84bbf0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:39:21 +0100 Subject: [PATCH 34/48] import `time` globally as UUIDv7 is likely to be used now --- Lib/uuid.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index c73e06ee7712e3..152c3a7a41099c 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -54,6 +54,7 @@ import os import sys +import time from enum import Enum, _simple_enum @@ -707,7 +708,6 @@ def uuid1(node=None, clock_seq=None): return UUID(bytes=uuid_time, is_safe=is_safe) global _last_timestamp - import time nanoseconds = time.time_ns() # 0x01b21dd213814000 is the number of 100-ns intervals between the # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. @@ -789,7 +789,6 @@ def uuid7(): global _last_timestamp_v7 global _last_counter_v7 - import time nanoseconds = time.time_ns() timestamp_ms = nanoseconds // 1_000_000 From d4eeded20e238e585c105473651bfaea47ab9e70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:43:46 +0100 Subject: [PATCH 35/48] run half-black --- Lib/uuid.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 152c3a7a41099c..ec54eef8ad187b 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -55,7 +55,6 @@ import os import sys import time - from enum import Enum, _simple_enum @@ -439,6 +438,7 @@ def _get_command_stdout(command, *args): # # See https://en.wikipedia.org/wiki/MAC_address#Universal_vs._local_(U/L_bit) + def _is_universal(mac): return not (mac & (1 << 41)) @@ -554,6 +554,7 @@ def _ifconfig_getnode(): return mac return None + def _ip_getnode(): """Get the hardware address on Unix by running ip.""" # This works on Linux with iproute2. @@ -562,6 +563,7 @@ def _ip_getnode(): return mac return None + def _arp_getnode(): """Get the hardware address on Unix by running arp.""" import os, socket @@ -590,11 +592,13 @@ def _arp_getnode(): return mac return None + def _lanscan_getnode(): """Get the hardware address on Unix by running lanscan.""" # This might work on HP-UX. return _find_mac_near_keyword('lanscan', '-ai', [b'lan0'], lambda i: 0) + def _netstat_getnode(): """Get the hardware address on Unix by running netstat.""" # This works on AIX and might work on Tru64 UNIX. @@ -618,12 +622,14 @@ def _unix_getnode(): uuid_time, _ = _generate_time_safe() return UUID(bytes=uuid_time).node + def _windll_getnode(): """Get the hardware address on Windows using the _uuid extension module.""" if _UuidCreate: uuid_bytes = _UuidCreate() return UUID(bytes_le=uuid_bytes).node + def _random_getnode(): """Get a random node ID.""" # RFC 4122, $4.1.6 says "For systems with no IEEE address, a randomly or @@ -667,6 +673,7 @@ def _random_getnode(): _node = None + def getnode(): """Get the hardware address as a 48-bit positive integer. @@ -691,6 +698,7 @@ def getnode(): _last_timestamp = None + def uuid1(node=None, clock_seq=None): """Generate a UUID from a host ID, sequence number, and the current time. If 'node' is not given, getnode() is used to obtain the hardware @@ -739,6 +747,7 @@ def uuid3(namespace, name): int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS return UUID._from_int(int_uuid_3) + def uuid4(): """Generate a random UUID.""" int_uuid_4 = int.from_bytes(os.urandom(16)) @@ -746,6 +755,7 @@ def uuid4(): int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS return UUID._from_int(int_uuid_4) + def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" if isinstance(name, str): @@ -757,8 +767,6 @@ def uuid5(namespace, name): int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS return UUID._from_int(int_uuid_5) -_last_timestamp_v7 = None -_last_counter_v7 = 0 # 42-bit counter def _uuid7_get_counter_and_tail(): rand = int.from_bytes(os.urandom(10)) @@ -768,6 +776,11 @@ def _uuid7_get_counter_and_tail(): tail = rand & 0xffff_ffff return counter, tail + +_last_timestamp_v7 = None +_last_counter_v7 = 0 # 42-bit counter + + def uuid7(): """Generate a UUID from a Unix timestamp in milliseconds and random bits. @@ -828,6 +841,7 @@ def uuid7(): _last_counter_v7 = counter return res + def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -853,6 +867,7 @@ def uuid8(a=None, b=None, c=None): int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS return UUID._from_int(int_uuid_8) + def main(): """Run the uuid command line interface.""" uuid_funcs = { From 0e54a728083bd39ae4f3e065dbfebcdc945eac9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 12:05:43 +0100 Subject: [PATCH 36/48] update docs --- Doc/library/uuid.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 11836b823b945e..6354e6781117bd 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -224,9 +224,14 @@ The :mod:`uuid` module defines the following functions: .. function:: uuid7() - .. versionadded:: next + Generate a time-based UUID according to + :rfc:`RFC 9562, §5.7 <9562#section-5.7>`. + + For portability across platforms lacking sub-millisecond precision, UUIDs + produced by this function embed a 48-bit timestamp and use a 42-bit counter + to guarantee monotonicity within a millisecond. -.. index:: single: uuid7 + .. versionadded:: next .. function:: uuid8(a=None, b=None, c=None) From 40ab2fa0c7c63323110a3940b7af0858381f349b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 13:22:01 +0100 Subject: [PATCH 37/48] Revert "run half-black" This reverts commit d4eeded20e238e585c105473651bfaea47ab9e70. --- Lib/uuid.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index ec54eef8ad187b..152c3a7a41099c 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -55,6 +55,7 @@ import os import sys import time + from enum import Enum, _simple_enum @@ -438,7 +439,6 @@ def _get_command_stdout(command, *args): # # See https://en.wikipedia.org/wiki/MAC_address#Universal_vs._local_(U/L_bit) - def _is_universal(mac): return not (mac & (1 << 41)) @@ -554,7 +554,6 @@ def _ifconfig_getnode(): return mac return None - def _ip_getnode(): """Get the hardware address on Unix by running ip.""" # This works on Linux with iproute2. @@ -563,7 +562,6 @@ def _ip_getnode(): return mac return None - def _arp_getnode(): """Get the hardware address on Unix by running arp.""" import os, socket @@ -592,13 +590,11 @@ def _arp_getnode(): return mac return None - def _lanscan_getnode(): """Get the hardware address on Unix by running lanscan.""" # This might work on HP-UX. return _find_mac_near_keyword('lanscan', '-ai', [b'lan0'], lambda i: 0) - def _netstat_getnode(): """Get the hardware address on Unix by running netstat.""" # This works on AIX and might work on Tru64 UNIX. @@ -622,14 +618,12 @@ def _unix_getnode(): uuid_time, _ = _generate_time_safe() return UUID(bytes=uuid_time).node - def _windll_getnode(): """Get the hardware address on Windows using the _uuid extension module.""" if _UuidCreate: uuid_bytes = _UuidCreate() return UUID(bytes_le=uuid_bytes).node - def _random_getnode(): """Get a random node ID.""" # RFC 4122, $4.1.6 says "For systems with no IEEE address, a randomly or @@ -673,7 +667,6 @@ def _random_getnode(): _node = None - def getnode(): """Get the hardware address as a 48-bit positive integer. @@ -698,7 +691,6 @@ def getnode(): _last_timestamp = None - def uuid1(node=None, clock_seq=None): """Generate a UUID from a host ID, sequence number, and the current time. If 'node' is not given, getnode() is used to obtain the hardware @@ -747,7 +739,6 @@ def uuid3(namespace, name): int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS return UUID._from_int(int_uuid_3) - def uuid4(): """Generate a random UUID.""" int_uuid_4 = int.from_bytes(os.urandom(16)) @@ -755,7 +746,6 @@ def uuid4(): int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS return UUID._from_int(int_uuid_4) - def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" if isinstance(name, str): @@ -767,6 +757,8 @@ def uuid5(namespace, name): int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS return UUID._from_int(int_uuid_5) +_last_timestamp_v7 = None +_last_counter_v7 = 0 # 42-bit counter def _uuid7_get_counter_and_tail(): rand = int.from_bytes(os.urandom(10)) @@ -776,11 +768,6 @@ def _uuid7_get_counter_and_tail(): tail = rand & 0xffff_ffff return counter, tail - -_last_timestamp_v7 = None -_last_counter_v7 = 0 # 42-bit counter - - def uuid7(): """Generate a UUID from a Unix timestamp in milliseconds and random bits. @@ -841,7 +828,6 @@ def uuid7(): _last_counter_v7 = counter return res - def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -867,7 +853,6 @@ def uuid8(a=None, b=None, c=None): int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS return UUID._from_int(int_uuid_8) - def main(): """Run the uuid command line interface.""" uuid_funcs = { From 3ce8943bf8a453802c08d262a94a92963aa95a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 13:54:20 +0100 Subject: [PATCH 38/48] add blank line for readability --- Lib/uuid.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/uuid.py b/Lib/uuid.py index 152c3a7a41099c..9d02495b747d9c 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -822,6 +822,7 @@ def uuid7(): int_uuid_7 |= tail # by construction, the variant and version bits are already cleared int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS + res = UUID._from_int(int_uuid_7) # defer global update until all computations are done _last_timestamp_v7 = timestamp_ms From 59e6d7e18ab132444f0aed1ade179ccf9b79b24c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 13:57:19 +0100 Subject: [PATCH 39/48] update comment --- Lib/uuid.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 9d02495b747d9c..9edef381ead26c 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -813,7 +813,9 @@ def uuid7(): counter_hi = counter_msbs & 0x0fff # keep 30 counter's LSBs and clear version bits counter_lo = counter & 0x3fff_ffff - # ensure that the fail is always a 32-bit integer + # ensure that the tail is always a 32-bit integer (by construction, + # it is already the case, but future interfaces may allow the user + # to specify the random tail) tail &= 0xffff_ffff int_uuid_7 = unix_ts_ms << 80 From 437d8cff6056995a548b998d74696cf79a2c5f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 25 Feb 2025 13:58:12 +0100 Subject: [PATCH 40/48] Update Lib/uuid.py Co-authored-by: Victor Stinner --- Lib/uuid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 9edef381ead26c..4823a5b2543ba6 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -824,8 +824,8 @@ def uuid7(): int_uuid_7 |= tail # by construction, the variant and version bits are already cleared int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS - res = UUID._from_int(int_uuid_7) + # defer global update until all computations are done _last_timestamp_v7 = timestamp_ms _last_counter_v7 = counter From 73ab65614ef1ea8fb000bf2cb4b34171129a45fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Mar 2025 17:50:04 +0100 Subject: [PATCH 41/48] improve online docs --- Doc/library/uuid.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 6a24660d2d1246..052c59fa239dd2 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -10,10 +10,11 @@ -------------- -This module provides immutable :class:`UUID` objects (the :class:`UUID` class) -and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`, -:func:`uuid6`, :func:`uuid7`, and :func:`uuid8` for generating UUIDs version 1, -3, 4, 5, 6, 7, and 8 as specified in :rfc:`9562` (which supersedes :rfc:`4122`). +This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and +the :ref:`functions ` for generating UUIDs corresponding +to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`), +e.g., :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on. +Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -185,6 +186,8 @@ The :mod:`uuid` module defines the following functions: globally unique, while the latter are not. +.. _uuid-constructor-functions: + .. function:: uuid1(node=None, clock_seq=None) Generate a UUID from a host ID, sequence number, and the current time. If *node* From 54d07ae71549978bb12345fcb48f2c1bbbd6e6e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Mar 2025 18:24:48 +0100 Subject: [PATCH 42/48] `constructor` -> `factory` in labels --- Doc/library/uuid.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 052c59fa239dd2..712cb1cd353ec7 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -11,7 +11,7 @@ -------------- This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and -the :ref:`functions ` for generating UUIDs corresponding +the :ref:`functions ` for generating UUIDs corresponding to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`), e.g., :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on. Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC. @@ -186,7 +186,7 @@ The :mod:`uuid` module defines the following functions: globally unique, while the latter are not. -.. _uuid-constructor-functions: +.. _uuid-factory-functions: .. function:: uuid1(node=None, clock_seq=None) From 6d763896515729f170ed114fef97e0038ad9e6c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Mar 2025 18:30:04 +0100 Subject: [PATCH 43/48] reword prolog --- Lib/uuid.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index f7974b0c83f91a..49cb8570adc485 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,12 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. -This module provides immutable UUID objects (class UUID) and the functions -uuid{N}() for generating UUIDs version N as specified in RFC 4122/9562 for -N = 1, 3, 4, 5, 6, 7, and 8. +This module provides immutable UUID objects (class UUID) and functions for +generating UUIDs corresponding to a specific UUID version as specified in +RFC 4122/9562, e.g., uuid1() for UUID version 1, uuid3() for UUID version 3, +and so on. + +Note that UUID version 2 is deliberately omitted as it exceeds the scope +of the RFC. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing From bd4ab5531b6762b8d20cb1047d33918ea8d844c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Mar 2025 18:30:39 +0100 Subject: [PATCH 44/48] 'is outside the scope' -> 'exceeds the scope' --- Doc/library/uuid.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 712cb1cd353ec7..d8293519079f81 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -14,7 +14,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) a the :ref:`functions ` for generating UUIDs corresponding to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`), e.g., :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on. -Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC. +Note that UUID version 2 is deliberately omitted as it exceeds the scope of the RFC. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates From e9ddb742aabdf565258efba683bc0c64d2e709a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Mar 2025 19:02:46 +0100 Subject: [PATCH 45/48] Apply suggestions from code review --- Doc/library/uuid.rst | 2 +- Lib/uuid.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index d8293519079f81..712cb1cd353ec7 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -14,7 +14,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) a the :ref:`functions ` for generating UUIDs corresponding to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`), e.g., :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on. -Note that UUID version 2 is deliberately omitted as it exceeds the scope of the RFC. +Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates diff --git a/Lib/uuid.py b/Lib/uuid.py index 49cb8570adc485..20e15edbabe811 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -5,7 +5,7 @@ RFC 4122/9562, e.g., uuid1() for UUID version 1, uuid3() for UUID version 3, and so on. -Note that UUID version 2 is deliberately omitted as it exceeds the scope +Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC. If all you want is a unique ID, you should probably call uuid1() or uuid4(). From 8755de0d313862fbad3d4f621a9a74b8e5e62f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Mar 2025 19:07:16 +0100 Subject: [PATCH 46/48] apply PEP-8 only for UUID6, UUID7 and UUID8 Opportunity to make new code more PEP-8. --- Lib/uuid.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/uuid.py b/Lib/uuid.py index 20e15edbabe811..7a12b48cb008f1 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -775,6 +775,7 @@ def uuid5(namespace, name): int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS return UUID._from_int(int_uuid_5) + _last_timestamp_v6 = None def uuid6(node=None, clock_seq=None): @@ -813,6 +814,7 @@ def uuid6(node=None, clock_seq=None): int_uuid_6 |= _RFC_4122_VERSION_6_FLAGS return UUID._from_int(int_uuid_6) + _last_timestamp_v7 = None _last_counter_v7 = 0 # 42-bit counter @@ -824,6 +826,7 @@ def _uuid7_get_counter_and_tail(): tail = rand & 0xffff_ffff return counter, tail + def uuid7(): """Generate a UUID from a Unix timestamp in milliseconds and random bits. @@ -887,6 +890,7 @@ def uuid7(): _last_counter_v7 = counter return res + def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -912,6 +916,7 @@ def uuid8(a=None, b=None, c=None): int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS return UUID._from_int(int_uuid_8) + def main(): """Run the uuid command line interface.""" uuid_funcs = { From 12d7ad41b174f863ae816c9c5ef4c3f5b9b9e89f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric?= Date: Mon, 3 Mar 2025 14:14:28 -0500 Subject: [PATCH 47/48] small fix minimize git diff + remove now unwanted article --- Doc/library/uuid.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 712cb1cd353ec7..1c14489fe1a1cf 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -10,8 +10,8 @@ -------------- -This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and -the :ref:`functions ` for generating UUIDs corresponding +This module provides immutable :class:`UUID` objects (the :class:`UUID` class) +and :ref:`functions ` for generating UUIDs corresponding to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`), e.g., :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on. Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC. From 560d87c87b7c9901a7e440b0c84f85155420c562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:51:32 +0100 Subject: [PATCH 48/48] avoid complex language :) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/uuid.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 1c14489fe1a1cf..0fb29460e2e68a 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -13,7 +13,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and :ref:`functions ` for generating UUIDs corresponding to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`), -e.g., :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on. +for example, :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on. Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC. If all you want is a unique ID, you should probably call :func:`uuid1` or