diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIChatClient.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIChatClient.cs index 43a2e21c9e0..d9f43069490 100644 --- a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIChatClient.cs +++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIChatClient.cs @@ -190,11 +190,11 @@ private static List ToOpenAIChatContent(IList break; case UriContent uriContent when uriContent.HasTopLevelMediaType("image"): - parts.Add(ChatMessageContentPart.CreateImagePart(uriContent.Uri)); + parts.Add(ChatMessageContentPart.CreateImagePart(uriContent.Uri, GetImageDetail(content))); break; case DataContent dataContent when dataContent.HasTopLevelMediaType("image"): - parts.Add(ChatMessageContentPart.CreateImagePart(BinaryData.FromBytes(dataContent.Data), dataContent.MediaType)); + parts.Add(ChatMessageContentPart.CreateImagePart(BinaryData.FromBytes(dataContent.Data), dataContent.MediaType, GetImageDetail(content))); break; case DataContent dataContent when dataContent.HasTopLevelMediaType("audio"): @@ -220,6 +220,21 @@ private static List ToOpenAIChatContent(IList return parts; } + private static ChatImageDetailLevel? GetImageDetail(AIContent content) + { + if (content.AdditionalProperties?.TryGetValue("detail", out object? value) is true) + { + return value switch + { + string detailString => new ChatImageDetailLevel(detailString), + ChatImageDetailLevel detail => detail, + _ => null + }; + } + + return null; + } + private static async IAsyncEnumerable FromOpenAIStreamingChatCompletionAsync( IAsyncEnumerable updates, [EnumeratorCancellation] CancellationToken cancellationToken = default) diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs index 5f00c9b9c44..e20f52aa568 100644 --- a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs +++ b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs @@ -6,7 +6,6 @@ using System.ComponentModel; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; -using System.IO; using System.Linq; using System.Reflection; using System.Text; @@ -176,7 +175,7 @@ public virtual async Task MultiModal_DescribeImage() new(ChatRole.User, [ new TextContent("What does this logo say?"), - new DataContent(GetImageDataUri(), "image/png"), + new DataContent(ImageDataUri.GetImageDataUri(), "image/png"), ]) ], new() { ModelId = GetModel_MultiModal_DescribeImage() }); @@ -955,15 +954,6 @@ private enum JobType Unknown, } - private static Uri GetImageDataUri() - { - using Stream? s = typeof(ChatClientIntegrationTests).Assembly.GetManifestResourceStream("Microsoft.Extensions.AI.dotnet.png"); - Assert.NotNull(s); - MemoryStream ms = new(); - s.CopyTo(ms); - return new Uri($"data:image/png;base64,{Convert.ToBase64String(ms.ToArray())}"); - } - [MemberNotNull(nameof(_chatClient))] protected void SkipIfNotEnabled() { diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj index ec925a15309..6653838d9ee 100644 --- a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj +++ b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj @@ -25,10 +25,11 @@ Never - + + diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/IntegrationTestHelpers.cs b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/IntegrationTestHelpers.cs index 2a20b121ab0..9d8f806ca8a 100644 --- a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/IntegrationTestHelpers.cs +++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/IntegrationTestHelpers.cs @@ -4,6 +4,7 @@ using System; using System.ClientModel; using Azure.AI.OpenAI; +using Azure.Identity; using Microsoft.Extensions.Configuration; using OpenAI; @@ -18,20 +19,26 @@ internal static class IntegrationTestHelpers var configuration = TestRunnerConfiguration.Instance; string? apiKey = configuration["OpenAI:Key"]; + string? mode = configuration["OpenAI:Mode"]; - if (apiKey is not null) + if (string.Equals(mode, "AzureOpenAI", StringComparison.OrdinalIgnoreCase)) { - if (string.Equals(configuration["OpenAI:Mode"], "AzureOpenAI", StringComparison.OrdinalIgnoreCase)) + var endpoint = configuration["OpenAI:Endpoint"] + ?? throw new InvalidOperationException("To use AzureOpenAI, set a value for OpenAI:Endpoint"); + + if (apiKey is not null) { - var endpoint = configuration["OpenAI:Endpoint"] - ?? throw new InvalidOperationException("To use AzureOpenAI, set a value for OpenAI:Endpoint"); return new AzureOpenAIClient(new Uri(endpoint), new ApiKeyCredential(apiKey)); } else { - return new OpenAIClient(apiKey); + return new AzureOpenAIClient(new Uri(endpoint), new DefaultAzureCredential()); } } + else if (apiKey is not null) + { + return new OpenAIClient(apiKey); + } return null; } diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj index 5626f4f207e..bd56d0ddfd6 100644 --- a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj +++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj @@ -2,7 +2,7 @@ Microsoft.Extensions.AI Unit tests for Microsoft.Extensions.AI.OpenAI - $(NoWarn);OPENAI002;MEAI001 + $(NoWarn);OPENAI002;MEAI001;S104 @@ -10,6 +10,14 @@ true + + + + + + + + @@ -24,5 +32,6 @@ + diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIChatClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIChatClientTests.cs index ae7c83e2237..78dc920f8cb 100644 --- a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIChatClientTests.cs +++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIChatClientTests.cs @@ -1033,6 +1033,124 @@ public async Task AssistantMessageWithBothToolsAndContent_NonStreaming() Assert.Equal("fp_f85bea6784", response.AdditionalProperties[nameof(ChatCompletion.SystemFingerprint)]); } + [Fact] + public Task DataContentMessage_Image_AdditionalProperty_ChatImageDetailLevel_NonStreaming() + => DataContentMessage_Image_AdditionalPropertyDetail_NonStreaming("high"); + + [Fact] + public Task DataContentMessage_Image_AdditionalProperty_StringDetail_NonStreaming() + => DataContentMessage_Image_AdditionalPropertyDetail_NonStreaming(ChatImageDetailLevel.High); + + private static async Task DataContentMessage_Image_AdditionalPropertyDetail_NonStreaming(object detailValue) + { + string input = $$""" + { + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What does this logo say?" + }, + { + "type": "image_url", + "image_url": { + "detail": "high", + "url": "{{ImageDataUri.GetImageDataUri()}}" + } + } + ] + } + ], + "model": "gpt-4o-mini" + } + """; + + const string Output = """ + { + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "The logo says \".NET\", which is a software development framework created by Microsoft. It is used for building and running applications on Windows, macOS, and Linux environments. The logo typically also represents the broader .NET ecosystem, which includes various programming languages, libraries, and tools.", + "refusal": null, + "role": "assistant" + } + } + ], + "created": 1743531271, + "id": "chatcmpl-BHaQ3nkeSDGhLzLya3mGbB1EXSqve", + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion", + "system_fingerprint": "fp_b705f0c291", + "usage": { + "completion_tokens": 56, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens": 8513, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + }, + "total_tokens": 8569 + } + } + """; + + using VerbatimHttpHandler handler = new(input, Output); + using HttpClient httpClient = new(handler); + using IChatClient client = CreateChatClient(httpClient, "gpt-4o-mini"); + + var response = await client.GetResponseAsync( + [ + new(ChatRole.User, + [ + new TextContent("What does this logo say?"), + new DataContent(ImageDataUri.GetImageDataUri(), "image/png") + { + AdditionalProperties = new() + { + { "detail", detailValue } + } + } + ]) + ]); + Assert.NotNull(response); + + Assert.Equal("chatcmpl-BHaQ3nkeSDGhLzLya3mGbB1EXSqve", response.ResponseId); + Assert.Equal("The logo says \".NET\", which is a software development framework created by Microsoft. It is used for building and running applications on Windows, macOS, and Linux environments. The logo typically also represents the broader .NET ecosystem, which includes various programming languages, libraries, and tools.", response.Text); + Assert.Single(response.Messages.Single().Contents); + Assert.Equal(ChatRole.Assistant, response.Messages.Single().Role); + Assert.Equal("chatcmpl-BHaQ3nkeSDGhLzLya3mGbB1EXSqve", response.Messages.Single().MessageId); + Assert.Equal("gpt-4o-mini-2024-07-18", response.ModelId); + Assert.Equal(DateTimeOffset.FromUnixTimeSeconds(1_743_531_271), response.CreatedAt); + Assert.Equal(ChatFinishReason.Stop, response.FinishReason); + + Assert.NotNull(response.Usage); + Assert.Equal(8513, response.Usage.InputTokenCount); + Assert.Equal(56, response.Usage.OutputTokenCount); + Assert.Equal(8569, response.Usage.TotalTokenCount); + Assert.Equal(new Dictionary + { + { "InputTokenDetails.AudioTokenCount", 0 }, + { "InputTokenDetails.CachedTokenCount", 0 }, + { "OutputTokenDetails.ReasoningTokenCount", 0 }, + { "OutputTokenDetails.AudioTokenCount", 0 }, + { "OutputTokenDetails.AcceptedPredictionTokenCount", 0 }, + { "OutputTokenDetails.RejectedPredictionTokenCount", 0 }, + }, response.Usage.AdditionalCounts); + + Assert.NotNull(response.AdditionalProperties); + Assert.Equal("fp_b705f0c291", response.AdditionalProperties[nameof(ChatCompletion.SystemFingerprint)]); + } + private static IChatClient CreateChatClient(HttpClient httpClient, string modelId) => new OpenAIClient(new ApiKeyCredential("apikey"), new OpenAIClientOptions { Transport = new HttpClientPipelineTransport(httpClient) }) .GetChatClient(modelId) diff --git a/test/Shared/ImageDataUri/ImageDataUri.cs b/test/Shared/ImageDataUri/ImageDataUri.cs new file mode 100644 index 00000000000..106e936b1ed --- /dev/null +++ b/test/Shared/ImageDataUri/ImageDataUri.cs @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.IO; +using Xunit; + +namespace Microsoft.Extensions.AI; + +internal static class ImageDataUri +{ + internal static Uri GetImageDataUri() + { + using Stream? s = typeof(ImageDataUri).Assembly.GetManifestResourceStream("Microsoft.Extensions.AI.Resources.dotnet.png"); + Assert.NotNull(s); + MemoryStream ms = new(); + s.CopyTo(ms); + return new Uri($"data:image/png;base64,{Convert.ToBase64String(ms.ToArray())}"); + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Resources/dotnet.png b/test/Shared/ImageDataUri/dotnet.png similarity index 100% rename from test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Resources/dotnet.png rename to test/Shared/ImageDataUri/dotnet.png