diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 0f2d7820cb25c8..52389b207e96cc 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -149,9 +149,12 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.version - The UUID version number (1 through 5, meaningful only when the variant is + The UUID version number (1 through 7, meaningful only when the variant is :const:`RFC_4122`). + .. versionadded:: 3.14 + Added UUID version 7 + .. attribute:: UUID.is_safe An enumeration of :class:`SafeUUID` which indicates whether the platform @@ -216,6 +219,16 @@ The :mod:`uuid` module defines the following functions: .. index:: single: uuid5 + +.. function:: uuid7() + + TODO + + .. versionadded:: 3.14 + +.. index:: single: uuid7 + + The :mod:`uuid` module defines the following namespace identifiers for use with :func:`uuid3` or :func:`uuid5`. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index a102af13a08362..0ddd681ecc6166 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -118,6 +118,12 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) +uuid +---- + +* Add :func:`uuid.uuid7` for UUID version 7 as specified by :rfc:`9562`. + + (Contributed by Bénédikt Tran in :gh:`89083`.) Optimizations ============= diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index e177464c00f7a6..f3031b53cf0efb 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1,3 +1,4 @@ +import random import unittest from test import support from test.support import import_helper @@ -10,6 +11,7 @@ import pickle import sys import weakref +from itertools import product from unittest import mock py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) @@ -267,7 +269,7 @@ def test_exceptions(self): # Version number out of range. badvalue(lambda: self.uuid.UUID('00'*16, version=0)) - badvalue(lambda: self.uuid.UUID('00'*16, version=6)) + badvalue(lambda: self.uuid.UUID('00'*16, version=42)) # Integer value out of range. badvalue(lambda: self.uuid.UUID(int=-1)) @@ -588,7 +590,7 @@ def test_uuid1_bogus_return_value(self): def test_uuid1_time(self): with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp', None), \ + mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ mock.patch.object(self.uuid, 'getnode', return_value=93328246233727), \ mock.patch('time.time_ns', return_value=1545052026752910643), \ mock.patch('random.getrandbits', return_value=5317): # guaranteed to be random @@ -596,7 +598,7 @@ def test_uuid1_time(self): self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp', None), \ + mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ mock.patch('time.time_ns', return_value=1545052026752910643): u = self.uuid.uuid1(node=93328246233727, clock_seq=5317) self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) @@ -681,6 +683,158 @@ def test_uuid5(self): equal(u, self.uuid.UUID(v)) equal(str(u), v) + def test_uuid7(self): + equal = self.assertEqual + u = self.uuid.uuid7() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + # 1 Jan 2023 12:34:56.123_456_789 + fake_nanoseconds = 1672533296_123_456_789 # ns precision + expect_timestamp, _ = divmod(fake_nanoseconds, 1_000_000) + rand_b_64_bytes = os.urandom(8) + with mock.patch.object(self.uuid, '_last_timestamp_v7', None), \ + mock.patch.object(self.uuid, '_last_counter_v7_a', 0), \ + mock.patch.object(self.uuid, '_last_counter_v7_b', 0), \ + mock.patch('time.time_ns', return_value=fake_nanoseconds), \ + mock.patch('os.urandom', return_value=rand_b_64_bytes): + u = self.uuid.uuid7() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, expect_timestamp) + unix_ts_ms = expect_timestamp & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + rand_a = 1871 # == int(0.4567890 * 4096) + equal((u.int >> 64) & 0x0fff, rand_a) + rand_b = int.from_bytes(rand_b_64_bytes) & 0x3fffffffffffffff + equal(u.int & 0x3fffffffffffffff, rand_b) + + def test_uuid7_monotonicity(self): + equal = self.assertEqual + + us = [self.uuid.uuid7() for _ in range(10_000)] + equal(us, sorted(us)) + + with mock.patch.multiple(self.uuid, _last_counter_v7_a=0, _last_counter_v7_b=0): + # 1 Jan 2023 12:34:56.123_456_789 + fake_nanoseconds = 1672533296_123_456_789 # ns precision + expect_timestamp, _ = divmod(fake_nanoseconds, 1_000_000) + with mock.patch.object(self.uuid, '_last_timestamp_v7', expect_timestamp): + with mock.patch('time.time_ns', return_value=fake_nanoseconds), \ + mock.patch('os.urandom', return_value=b'\x01') as os_urandom_fake: + u1 = self.uuid.uuid7() + os_urandom_fake.assert_called_once_with(4) + # 1871 = int(0.456_789 * 4096) + equal(self.uuid._last_counter_v7_a, 1871) + equal((u1.int >> 64) & 0x0fff, 1871) + equal(self.uuid._last_counter_v7_b, 1) + equal(u1.int & 0x3fffffffffffffff, 1) + + # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same prec) + next_fake_nanoseconds = 1672533296_123_457_032 + with mock.patch('time.time_ns', return_value=next_fake_nanoseconds), \ + mock.patch('os.urandom', return_value=b'\x01') as os_urandom_fake: + u2 = self.uuid.uuid7() + os_urandom_fake.assert_called_once_with(4) + # 1872 = int(0.457_032 * 4096) + equal(self.uuid._last_counter_v7_a, 1872) + equal((u2.int >> 64) & 0x0fff, 1872) + equal(self.uuid._last_counter_v7_b, 2) + equal(u2.int & 0x3fffffffffffffff, 2) + + self.assertLess(u1, u2) + # 48-bit time component is the same + self.assertEqual(u1.int >> 80, u2.int >> 80) + + def test_uuid7_timestamp_backwards(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + fake_nanoseconds = 1672533296_123_456_789 # ns precision + expect_timestamp, _ = divmod(fake_nanoseconds, 1_000_000) + fake_last_timestamp_v7 = expect_timestamp + 1 + fake_prev_rand_b = 123456 + with mock.patch.object(self.uuid, '_last_timestamp_v7', fake_last_timestamp_v7), \ + mock.patch.object(self.uuid, '_last_counter_v7_a', 0), \ + mock.patch.object(self.uuid, '_last_counter_v7_b', fake_prev_rand_b), \ + mock.patch('time.time_ns', return_value=fake_nanoseconds), \ + mock.patch('os.urandom', return_value=b'\x00\x00\x00\x01') as os_urandom_fake: + u = self.uuid.uuid7() + os_urandom_fake.assert_called_once() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1) + unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + rand_a = 1871 # == int(0.456789 * 4096) + equal(self.uuid._last_counter_v7_a, rand_a) + equal((u.int >> 64) & 0x0fff, rand_a) + rand_b = fake_prev_rand_b + 1 # 1 = os.urandom(4) + equal(self.uuid._last_counter_v7_b, rand_b) + equal(u.int & 0x3fffffffffffffff, rand_b) + + def test_uuid7_overflow_rand_b(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + fake_nanoseconds = 1672533296_123_456_789 # ns precision + expect_timestamp, _ = divmod(fake_nanoseconds, 1_000_000) + # same timestamp, but force an overflow on rand_b (not on rand_a) + new_rand_b_64_bytes = os.urandom(8) + with mock.patch.object(self.uuid, '_last_timestamp_v7', expect_timestamp), \ + mock.patch.object(self.uuid, '_last_counter_v7_a', 0), \ + mock.patch.object(self.uuid, '_last_counter_v7_b', 1 << 62), \ + mock.patch('time.time_ns', return_value=fake_nanoseconds), \ + mock.patch('os.urandom', return_value=new_rand_b_64_bytes): + u = self.uuid.uuid7() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, expect_timestamp) # same + unix_ts_ms = expect_timestamp & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + rand_a = 1871 + 1 # advance 'int(0.456789 * 4096)' by 1 + equal(self.uuid._last_counter_v7_a, rand_a) + equal((u.int >> 64) & 0x0fff, rand_a) + rand_b = int.from_bytes(new_rand_b_64_bytes) & 0x3fffffffffffffff + equal(self.uuid._last_counter_v7_b, rand_b) + equal(u.int & 0x3fffffffffffffff, rand_b) + + def test_uuid7_overflow_rand_a_and_rand_b(self): + equal = self.assertEqual + nanoseconds = [ + 1672533296_123_999_999, # to hit the overflow on rand_a + 1704069296_123_456_789, # to hit 'timestamp_ms > _last_timestamp_v7' + ] + + # 1 Jan 2023 12:34:56.123_999_999 + expect_timestamp_call_1, _ = divmod(nanoseconds[0], 1_000_000) + expect_timestamp_call_2, _ = divmod(nanoseconds[1], 1_000_000) + + random_bytes = [ + b'\xff' * 4, # for advancing rand_b and hitting the overflow + os.urandom(8), # for the next call to uuid7(), only called for generating rand_b + ] + random_bytes_iter = iter(random_bytes) + os_urandom_fake = lambda n: next(random_bytes_iter, None) + + with mock.patch.object(self.uuid, '_last_timestamp_v7', expect_timestamp_call_1), \ + mock.patch.object(self.uuid, '_last_counter_v7_a', 0), \ + mock.patch.object(self.uuid, '_last_counter_v7_b', 1 << 62), \ + mock.patch('time.time_ns', iter(nanoseconds).__next__), \ + mock.patch('os.urandom', os_urandom_fake): + u = self.uuid.uuid7() + # check that random_bytes_iter is exhausted + self.assertIsNone(os.urandom(1)) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, expect_timestamp_call_2) + unix_ts_ms = expect_timestamp_call_2 & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + rand_a_second_call = 1871 + equal(self.uuid._last_counter_v7_a, rand_a_second_call) + equal((u.int >> 64) & 0x0fff, rand_a_second_call) + rand_b_second_call = int.from_bytes(random_bytes[1]) & 0x3fffffffffffffff + equal(self.uuid._last_counter_v7_b, rand_b_second_call) + equal(u.int & 0x3fffffffffffffff, rand_b_second_call) + @support.requires_fork() def testIssue8621(self): # On at least some versions of OSX self.uuid.uuid4 generates diff --git a/Lib/uuid.py b/Lib/uuid.py index c286eac38e1ef4..1f91043b428ac5 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,9 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5 -UUIDs as specified in RFC 4122. +uuid1(), uuid3(), uuid4(), uuid5(), and uuid7() for generating version 1, 3, +4, 5, and 7 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still +referred to as RFC 4122 for compatibility purposes). If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -129,7 +130,7 @@ class UUID: variant the UUID variant (one of the constants RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) - version the UUID version number (1 through 5, meaningful only + version the UUID version number (1, 3, 4, 5 and 7, meaningful only when the variant is RFC_4122) is_safe An enum indicating whether the UUID has been generated in @@ -214,7 +215,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if not 0 <= int < 1<<128: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: - if not 1 <= version <= 5: + if not 1 <= version <= 7: raise ValueError('illegal version number') # Set the variant to RFC 4122. int &= ~(0xc000 << 48) @@ -656,7 +657,7 @@ def getnode(): assert False, '_random_getnode() returned invalid value: {}'.format(_node) -_last_timestamp = None +_last_timestamp_v1 = None def uuid1(node=None, clock_seq=None): """Generate a UUID from a host ID, sequence number, and the current time. @@ -674,15 +675,15 @@ def uuid1(node=None, clock_seq=None): is_safe = SafeUUID.unknown return UUID(bytes=uuid_time, is_safe=is_safe) - global _last_timestamp + global _last_timestamp_v1 import time nanoseconds = time.time_ns() # 0x01b21dd213814000 is the number of 100-ns intervals between the # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. timestamp = nanoseconds // 100 + 0x01b21dd213814000 - if _last_timestamp is not None and timestamp <= _last_timestamp: - timestamp = _last_timestamp + 1 - _last_timestamp = timestamp + if _last_timestamp_v1 is not None and timestamp <= _last_timestamp_v1: + timestamp = _last_timestamp_v1 + 1 + _last_timestamp_v1 = timestamp if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage @@ -719,6 +720,66 @@ def uuid5(namespace, name): hash = sha1(namespace.bytes + name).digest() return UUID(bytes=hash[:16], version=5) +_last_timestamp_v7 = None +_last_counter_v7_a = 0 # 12-bit sub-millisecond precision +_last_counter_v7_b = 0 # 62-bit seeded counter + +def uuid7(): + """Generate a UUID from a Unix timestamp in milliseconds and random bits. + + UUIDv7 objects feature monotonicity within a millisecond. + """ + # --- 48 --- -- 4 -- - 12 - -- 2 -- - 62 - + # unix_ts_ms | version | rand_a | variant | rand_b + # + # 'rand_a' is used for an additional 12-bit sub-millisecond + # precision constructed with Method 3 of RFC 9562, §6.2. + # + # 'rand_b' is a seeded counter generated according to + # the Method 2 of RFC 9562, §6.2. The initial counter + # is a random 62-bit integer and the counter is incremented + # by a random 32-bit integer within the same timestamp tick. + # + # If 'rand_b' overflows, it is regenerated and 'rand_a' is + # advanced by 1. If 'rand_a' also overflows, re-run uuid7(). + + def get_rand_b(): # random 62-bit integer + return int.from_bytes(os.urandom(8)) & 0x3fffffffffffffff + + global _last_timestamp_v7 + global _last_counter_v7_a + global _last_counter_v7_b + + import time + nanoseconds = time.time_ns() + timestamp_ms, sub_millisecs = divmod(nanoseconds, 1_000_000) + # get the 12-bit sub-milliseconds precision part + assert 0 <= sub_millisecs < 1_000_000 + rand_a = int((sub_millisecs / 1_000_000) * (1 << 12)) + assert 0 <= rand_a <= 0xfff + + if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: + rand_b = get_rand_b() + else: + if timestamp_ms < _last_timestamp_v7: + timestamp_ms = _last_timestamp_v7 + 1 + # advance 'rand_b' by a 32-bit random increment + rand_b = _last_counter_v7_b + int.from_bytes(os.urandom(4)) + if rand_b > 0x3fffffffffffffff: + if rand_a == 4095: # fast path to avoid a call to os.urandom() + return uuid7() + rand_a += 1 + rand_b = get_rand_b() + + _last_timestamp_v7 = timestamp_ms + _last_counter_v7_a = rand_a + _last_counter_v7_b = rand_b + + int_uuid_7 = (timestamp_ms & 0xffffffffffff) << 80 + int_uuid_7 |= rand_a << 64 + int_uuid_7 |= rand_b + return UUID(int=int_uuid_7, version=7) + def main(): """Run the uuid command line interface.""" @@ -726,7 +787,8 @@ def main(): "uuid1": uuid1, "uuid3": uuid3, "uuid4": uuid4, - "uuid5": uuid5 + "uuid5": uuid5, + "uuid7": uuid7, } uuid_namespace_funcs = ("uuid3", "uuid5") namespaces = { diff --git a/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst b/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst new file mode 100644 index 00000000000000..d2507e4b24d96f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst @@ -0,0 +1,2 @@ +Add :func:`~uuid.uuid7` to the :mod:`uuid` module as specified by :rfc:`9562`. +Patch by Bénédikt Tran.