diff --git a/docs/why.md b/docs/why.md index cbdfdc9fcb0..002c342e465 100644 --- a/docs/why.md +++ b/docs/why.md @@ -314,7 +314,11 @@ Pydantic provides four ways to create schemas and perform validation and seriali { 'properties': { 'when': {'format': 'date-time', 'title': 'When', 'type': 'string'}, - 'where': {'format': 'binary', 'title': 'Where', 'type': 'string'}, + 'where': { + 'contentMediaType': 'application/octet-stream', + 'title': 'Where', + 'type': 'string', + }, 'why': {'title': 'Why', 'type': 'string'}, }, 'required': ['when', 'where'], diff --git a/pydantic/json_schema.py b/pydantic/json_schema.py index 6662661012f..a0835bad108 100644 --- a/pydantic/json_schema.py +++ b/pydantic/json_schema.py @@ -785,7 +785,10 @@ def bytes_schema(self, schema: core_schema.BytesSchema) -> JsonSchemaValue: Returns: The generated JSON schema. """ - json_schema = {'type': 'string', 'format': 'base64url' if self._config.ser_json_bytes == 'base64' else 'binary'} + json_schema = {'type': 'string', 'contentMediaType': 'application/octet-stream'} + bytes_mode = self._config.ser_json_bytes if self.mode == 'serialization' else self._config.val_json_bytes + if bytes_mode == 'base64': + json_schema['contentEncoding'] = 'base64' self.update_with_validations(json_schema, schema, self.ValidationsMapping.bytes) return json_schema diff --git a/pydantic/types.py b/pydantic/types.py index 32742ac4645..80b6745caba 100644 --- a/pydantic/types.py +++ b/pydantic/types.py @@ -2379,11 +2379,20 @@ def encode(cls, value: bytes) -> bytes: ... @classmethod - def get_json_format(cls) -> str: - """Get the JSON format for the encoded data. + def get_json_format(cls) -> str | None: + """Get the JSON Schema `format` value for the encoded data. Returns: - The JSON format for the encoded data. + The format string, or `None` if no format should be set. + """ + ... + + @classmethod + def get_content_encoding(cls) -> str | None: + """Get the JSON Schema `contentEncoding` value for the encoded data. + + Returns: + The content encoding string, or `None` if no content encoding should be set. """ ... @@ -2427,6 +2436,15 @@ def get_json_format(cls) -> Literal['base64']: """ return 'base64' + @classmethod + def get_content_encoding(cls) -> Literal['base64']: + """Get the JSON Schema `contentEncoding` value for the encoded data. + + Returns: + The content encoding string. + """ + return 'base64' + class Base64UrlEncoder(EncoderProtocol): """URL-safe Base64 encoder.""" @@ -2467,6 +2485,15 @@ def get_json_format(cls) -> Literal['base64url']: """ return 'base64url' + @classmethod + def get_content_encoding(cls) -> Literal['base64url']: + """Get the JSON Schema `contentEncoding` value for the encoded data. + + Returns: + The content encoding string. + """ + return 'base64url' + @_dataclasses.dataclass(**_internal_dataclass.slots_true) class EncodedBytes: @@ -2529,7 +2556,13 @@ def __get_pydantic_json_schema__( self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler ) -> JsonSchemaValue: field_schema = handler(core_schema) - field_schema.update(type='string', format=self.encoder.get_json_format()) + field_schema.update(type='string') + json_format = self.encoder.get_json_format() + if json_format is not None: + field_schema['format'] = json_format + content_encoding = self.encoder.get_content_encoding() + if content_encoding is not None: + field_schema['contentEncoding'] = content_encoding return field_schema def __get_pydantic_core_schema__(self, source: type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema: @@ -2628,7 +2661,13 @@ def __get_pydantic_json_schema__( self, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler ) -> JsonSchemaValue: field_schema = handler(core_schema) - field_schema.update(type='string', format=self.encoder.get_json_format()) + field_schema.update(type='string') + json_format = self.encoder.get_json_format() + if json_format is not None: + field_schema['format'] = json_format + content_encoding = self.encoder.get_content_encoding() + if content_encoding is not None: + field_schema['contentEncoding'] = content_encoding return field_schema def __get_pydantic_core_schema__(self, source: type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema: diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py index ff6bc3c3fdf..b4f1a26eee1 100644 --- a/tests/test_json_schema.py +++ b/tests/test_json_schema.py @@ -524,7 +524,7 @@ class Model(BaseModel): assert model_json_schema_validation == { 'properties': { - 'a': {'default': 'foobar', 'format': 'binary', 'title': 'A', 'type': 'string'}, + 'a': {'contentMediaType': 'application/octet-stream', 'default': 'foobar', 'title': 'A', 'type': 'string'}, 'b': { 'anyOf': [ {'type': 'number'}, @@ -542,7 +542,7 @@ class Model(BaseModel): } assert model_json_schema_serialization == { 'properties': { - 'a': {'default': 'foobar', 'format': 'binary', 'title': 'A', 'type': 'string'}, + 'a': {'contentMediaType': 'application/octet-stream', 'default': 'foobar', 'title': 'A', 'type': 'string'}, 'b': { 'default': '12.34', 'title': 'B', @@ -873,13 +873,26 @@ class Model(BaseModel): (Optional[str], {'properties': {'a': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'title': 'A'}}}), ( Optional[bytes], - {'properties': {'a': {'title': 'A', 'anyOf': [{'type': 'string', 'format': 'binary'}, {'type': 'null'}]}}}, + { + 'properties': { + 'a': { + 'title': 'A', + 'anyOf': [{'contentMediaType': 'application/octet-stream', 'type': 'string'}, {'type': 'null'}], + } + } + }, ), ( Union[str, bytes], { 'properties': { - 'a': {'title': 'A', 'anyOf': [{'type': 'string'}, {'type': 'string', 'format': 'binary'}]} + 'a': { + 'title': 'A', + 'anyOf': [ + {'type': 'string'}, + {'contentMediaType': 'application/octet-stream', 'type': 'string'}, + ], + } }, }, ), @@ -889,7 +902,11 @@ class Model(BaseModel): 'properties': { 'a': { 'title': 'A', - 'anyOf': [{'type': 'string'}, {'type': 'string', 'format': 'binary'}, {'type': 'null'}], + 'anyOf': [ + {'type': 'string'}, + {'contentMediaType': 'application/octet-stream', 'type': 'string'}, + {'type': 'null'}, + ], } } }, @@ -994,6 +1011,8 @@ class Model(BaseModel): 'properties': {'a': {'title': 'A', 'type': inner_type, 'writeOnly': True, 'format': 'password'}}, 'required': ['a'], } + if field_type is SecretBytes: + base_schema['properties']['a']['contentMediaType'] = 'application/octet-stream' assert Model.model_json_schema() == base_schema @@ -1876,8 +1895,29 @@ class Model(BaseModel): @pytest.mark.parametrize( 'ser_json_bytes,properties', [ - ('base64', {'data': {'default': 'Zm9vYmFy', 'format': 'base64url', 'title': 'Data', 'type': 'string'}}), - ('utf8', {'data': {'default': 'foobar', 'format': 'binary', 'title': 'Data', 'type': 'string'}}), + ( + 'base64', + { + 'data': { + 'default': 'Zm9vYmFy', + 'contentEncoding': 'base64', + 'contentMediaType': 'application/octet-stream', + 'title': 'Data', + 'type': 'string', + } + }, + ), + ( + 'utf8', + { + 'data': { + 'contentMediaType': 'application/octet-stream', + 'default': 'foobar', + 'title': 'Data', + 'type': 'string', + } + }, + ), ], ) def test_model_default_bytes(ser_json_bytes: Literal['base64', 'utf8'], properties: dict[str, Any]): @@ -1917,8 +1957,29 @@ class Dataclass: @pytest.mark.parametrize( 'ser_json_bytes,properties', [ - ('base64', {'data': {'default': 'Zm9vYmFy', 'format': 'base64url', 'title': 'Data', 'type': 'string'}}), - ('utf8', {'data': {'default': 'foobar', 'format': 'binary', 'title': 'Data', 'type': 'string'}}), + ( + 'base64', + { + 'data': { + 'default': 'Zm9vYmFy', + 'contentEncoding': 'base64', + 'contentMediaType': 'application/octet-stream', + 'title': 'Data', + 'type': 'string', + } + }, + ), + ( + 'utf8', + { + 'data': { + 'contentMediaType': 'application/octet-stream', + 'default': 'foobar', + 'title': 'Data', + 'type': 'string', + } + }, + ), ], ) def test_dataclass_default_bytes(ser_json_bytes: Literal['base64', 'utf8'], properties: dict[str, Any]): @@ -1958,8 +2019,29 @@ class MyTypedDict(TypedDict): @pytest.mark.parametrize( 'ser_json_bytes,properties', [ - ('base64', {'data': {'default': 'Zm9vYmFy', 'format': 'base64url', 'title': 'Data', 'type': 'string'}}), - ('utf8', {'data': {'default': 'foobar', 'format': 'binary', 'title': 'Data', 'type': 'string'}}), + ( + 'base64', + { + 'data': { + 'default': 'Zm9vYmFy', + 'contentEncoding': 'base64', + 'contentMediaType': 'application/octet-stream', + 'title': 'Data', + 'type': 'string', + } + }, + ), + ( + 'utf8', + { + 'data': { + 'contentMediaType': 'application/octet-stream', + 'default': 'foobar', + 'title': 'Data', + 'type': 'string', + } + }, + ), ], ) def test_typeddict_default_bytes(ser_json_bytes: Literal['base64', 'utf8'], properties: dict[str, Any]): @@ -2018,7 +2100,7 @@ class A(BaseModel): ({'max_length': 5}, str, {'type': 'string', 'maxLength': 5}), ({}, constr(max_length=6), {'type': 'string', 'maxLength': 6}), ({'min_length': 2}, str, {'type': 'string', 'minLength': 2}), - ({'max_length': 5}, bytes, {'type': 'string', 'maxLength': 5, 'format': 'binary'}), + ({'max_length': 5}, bytes, {'contentMediaType': 'application/octet-stream', 'type': 'string', 'maxLength': 5}), ({'pattern': '^foo$'}, str, {'type': 'string', 'pattern': '^foo$'}), ({'gt': 2}, int, {'type': 'integer', 'exclusiveMinimum': 2}), ({'lt': 5}, int, {'type': 'integer', 'exclusiveMaximum': 5}), @@ -2122,7 +2204,7 @@ class Foo(BaseModel): ({'max_length': 5}, str, {'type': 'string', 'maxLength': 5}), ({}, constr(max_length=6), {'type': 'string', 'maxLength': 6}), ({'min_length': 2}, str, {'type': 'string', 'minLength': 2}), - ({'max_length': 5}, bytes, {'type': 'string', 'maxLength': 5, 'format': 'binary'}), + ({'max_length': 5}, bytes, {'contentMediaType': 'application/octet-stream', 'type': 'string', 'maxLength': 5}), ({'pattern': '^foo$'}, str, {'type': 'string', 'pattern': '^foo$'}), ({'gt': 2}, int, {'type': 'integer', 'exclusiveMinimum': 2}), ({'lt': 5}, int, {'type': 'integer', 'exclusiveMaximum': 5}), @@ -2295,7 +2377,13 @@ class Foo(BaseModel): # (ConstrainedBytes, {'title': 'A', 'type': 'string', 'format': 'binary'}), ( conbytes(min_length=3, max_length=5), - {'title': 'A', 'type': 'string', 'format': 'binary', 'minLength': 3, 'maxLength': 5}, + { + 'title': 'A', + 'contentMediaType': 'application/octet-stream', + 'type': 'string', + 'minLength': 3, + 'maxLength': 5, + }, ), ], ) @@ -4642,12 +4730,14 @@ def test_secrets_schema(secret_cls, field_kw, schema_kw): class Foobar(BaseModel): password: secret_cls = Field(**field_kw) + expected_props = {'title': 'Password', 'type': 'string', 'writeOnly': True, 'format': 'password', **schema_kw} + if secret_cls is SecretBytes: + expected_props['contentMediaType'] = 'application/octet-stream' + assert Foobar.model_json_schema() == { 'title': 'Foobar', 'type': 'object', - 'properties': { - 'password': {'title': 'Password', 'type': 'string', 'writeOnly': True, 'format': 'password', **schema_kw} - }, + 'properties': {'password': expected_props}, 'required': ['password'], } diff --git a/tests/test_types.py b/tests/test_types.py index 9cb3fe29b78..4b45d5f31b0 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -3547,7 +3547,12 @@ class Model(BaseModel): assert Model.model_json_schema() == { 'properties': { 'str_type': {'format': 'path', 'title': 'Str Type', 'type': 'string'}, - 'byte_type': {'format': 'path', 'title': 'Byte Type', 'type': 'string'}, + 'byte_type': { + 'contentMediaType': 'application/octet-stream', + 'format': 'path', + 'title': 'Byte Type', + 'type': 'string', + }, 'any_type': {'format': 'path', 'title': 'Any Type', 'type': 'string'}, }, 'required': ['str_type', 'byte_type', 'any_type'], @@ -5599,15 +5604,24 @@ class Model(BaseModel): 'base64_value_or_none': None, } + if field_type in (Base64Bytes,): + base64_schema = { + 'contentEncoding': 'base64', + 'contentMediaType': 'application/octet-stream', + 'format': 'base64', + 'type': 'string', + } + else: + base64_schema = {'contentEncoding': 'base64', 'format': 'base64', 'type': 'string'} + assert Model.model_json_schema() == { 'properties': { 'base64_value': { - 'format': 'base64', + **base64_schema, 'title': 'Base64 Value', - 'type': 'string', }, 'base64_value_or_none': { - 'anyOf': [{'type': 'string', 'format': 'base64'}, {'type': 'null'}], + 'anyOf': [base64_schema, {'type': 'null'}], 'default': None, 'title': 'Base64 Value Or None', }, @@ -5692,15 +5706,24 @@ class Model(BaseModel): 'base64url_value_or_none': None, } + if field_type in (Base64UrlBytes,): + base64url_schema = { + 'contentEncoding': 'base64url', + 'contentMediaType': 'application/octet-stream', + 'format': 'base64url', + 'type': 'string', + } + else: + base64url_schema = {'contentEncoding': 'base64url', 'format': 'base64url', 'type': 'string'} + assert Model.model_json_schema() == { 'properties': { 'base64url_value': { - 'format': 'base64url', + **base64url_schema, 'title': 'Base64Url Value', - 'type': 'string', }, 'base64url_value_or_none': { - 'anyOf': [{'type': 'string', 'format': 'base64url'}, {'type': 'null'}], + 'anyOf': [base64url_schema, {'type': 'null'}], 'default': None, 'title': 'Base64Url Value Or None', },