diff --git a/docs/admin/integrations/prometheus.md b/docs/admin/integrations/prometheus.md index f3820bdd298dd..5085832775b87 100644 --- a/docs/admin/integrations/prometheus.md +++ b/docs/admin/integrations/prometheus.md @@ -104,90 +104,97 @@ deployment. They will always be available from the agent. -| Name | Type | Description | Labels | -|---------------------------------------------------------------|-----------|----------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------| -| `agent_scripts_executed_total` | counter | Total number of scripts executed by the Coder agent. Includes cron scheduled scripts. | `agent_name` `success` `template_name` `username` `workspace_name` | -| `coderd_agents_apps` | gauge | Agent applications with statuses. | `agent_name` `app_name` `health` `username` `workspace_name` | -| `coderd_agents_connection_latencies_seconds` | gauge | Agent connection latencies in seconds. | `agent_name` `derp_region` `preferred` `username` `workspace_name` | -| `coderd_agents_connections` | gauge | Agent connections with statuses. | `agent_name` `lifecycle_state` `status` `tailnet_node` `username` `workspace_name` | -| `coderd_agents_up` | gauge | The number of active agents per workspace. | `template_name` `username` `workspace_name` | -| `coderd_agentstats_connection_count` | gauge | The number of established connections by agent | `agent_name` `username` `workspace_name` | -| `coderd_agentstats_connection_median_latency_seconds` | gauge | The median agent connection latency | `agent_name` `username` `workspace_name` | -| `coderd_agentstats_currently_reachable_peers` | gauge | The number of peers (e.g. clients) that are currently reachable over the encrypted network. | `agent_name` `connection_type` `template_name` `username` `workspace_name` | -| `coderd_agentstats_rx_bytes` | gauge | Agent Rx bytes | `agent_name` `username` `workspace_name` | -| `coderd_agentstats_session_count_jetbrains` | gauge | The number of session established by JetBrains | `agent_name` `username` `workspace_name` | -| `coderd_agentstats_session_count_reconnecting_pty` | gauge | The number of session established by reconnecting PTY | `agent_name` `username` `workspace_name` | -| `coderd_agentstats_session_count_ssh` | gauge | The number of session established by SSH | `agent_name` `username` `workspace_name` | -| `coderd_agentstats_session_count_vscode` | gauge | The number of session established by VSCode | `agent_name` `username` `workspace_name` | -| `coderd_agentstats_startup_script_seconds` | gauge | The number of seconds the startup script took to execute. | `agent_name` `success` `template_name` `username` `workspace_name` | -| `coderd_agentstats_tx_bytes` | gauge | Agent Tx bytes | `agent_name` `username` `workspace_name` | -| `coderd_api_active_users_duration_hour` | gauge | The number of users that have been active within the last hour. | | -| `coderd_api_concurrent_requests` | gauge | The number of concurrent API requests. | | -| `coderd_api_concurrent_websockets` | gauge | The total number of concurrent API websockets. | | -| `coderd_api_request_latencies_seconds` | histogram | Latency distribution of requests in seconds. | `method` `path` | -| `coderd_api_requests_processed_total` | counter | The total number of processed API requests | `code` `method` `path` | -| `coderd_api_websocket_durations_seconds` | histogram | Websocket duration distribution of requests in seconds. | `path` | -| `coderd_api_workspace_latest_build` | gauge | The latest workspace builds with a status. | `status` | -| `coderd_api_workspace_latest_build_total` | gauge | DEPRECATED: use coderd_api_workspace_latest_build instead | `status` | -| `coderd_insights_applications_usage_seconds` | gauge | The application usage per template. | `application_name` `slug` `template_name` | -| `coderd_insights_parameters` | gauge | The parameter usage per template. | `parameter_name` `parameter_type` `parameter_value` `template_name` | -| `coderd_insights_templates_active_users` | gauge | The number of active users of the template. | `template_name` | -| `coderd_license_active_users` | gauge | The number of active users. | | -| `coderd_license_limit_users` | gauge | The user seats limit based on the active Coder license. | | -| `coderd_license_user_limit_enabled` | gauge | Returns 1 if the current license enforces the user limit. | | -| `coderd_metrics_collector_agents_execution_seconds` | histogram | Histogram for duration of agents metrics collection in seconds. | | -| `coderd_oauth2_external_requests_rate_limit` | gauge | The total number of allowed requests per interval. | `name` `resource` | -| `coderd_oauth2_external_requests_rate_limit_next_reset_unix` | gauge | Unix timestamp of the next interval | `name` `resource` | -| `coderd_oauth2_external_requests_rate_limit_remaining` | gauge | The remaining number of allowed requests in this interval. | `name` `resource` | -| `coderd_oauth2_external_requests_rate_limit_reset_in_seconds` | gauge | Seconds until the next interval | `name` `resource` | -| `coderd_oauth2_external_requests_rate_limit_total` | gauge | DEPRECATED: use coderd_oauth2_external_requests_rate_limit instead | `name` `resource` | -| `coderd_oauth2_external_requests_rate_limit_used` | gauge | The number of requests made in this interval. | `name` `resource` | -| `coderd_oauth2_external_requests_total` | counter | The total number of api calls made to external oauth2 providers. 'status_code' will be 0 if the request failed with no response. | `name` `source` `status_code` | -| `coderd_prebuilt_workspace_claim_duration_seconds` | histogram | Time to claim a prebuilt workspace by organization, template, and preset. | `organization_name` `preset_name` `template_name` | -| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` | -| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` | -| `coderd_provisionerd_num_daemons` | gauge | The number of provisioner daemons. | | -| `coderd_provisionerd_workspace_build_timings_seconds` | histogram | The time taken for a workspace to build. | `status` `template_name` `template_version` `workspace_transition` | -| `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` | -| `coderd_workspace_creation_duration_seconds` | histogram | Time to create a workspace by organization, template, preset, and type (regular or prebuild). | `organization_name` `preset_name` `template_name` `type` | -| `coderd_workspace_creation_total` | counter | Total regular (non-prebuilt) workspace creations by organization, template, and preset. | `organization_name` `preset_name` `template_name` | -| `coderd_workspace_latest_build_status` | gauge | The current workspace statuses by template, transition, and owner. | `status` `template_name` `template_version` `workspace_owner` `workspace_transition` | -| `go_gc_duration_seconds` | summary | A summary of the pause duration of garbage collection cycles. | | -| `go_goroutines` | gauge | Number of goroutines that currently exist. | | -| `go_info` | gauge | Information about the Go environment. | `version` | -| `go_memstats_alloc_bytes` | gauge | Number of bytes allocated and still in use. | | -| `go_memstats_alloc_bytes_total` | counter | Total number of bytes allocated, even if freed. | | -| `go_memstats_buck_hash_sys_bytes` | gauge | Number of bytes used by the profiling bucket hash table. | | -| `go_memstats_frees_total` | counter | Total number of frees. | | -| `go_memstats_gc_sys_bytes` | gauge | Number of bytes used for garbage collection system metadata. | | -| `go_memstats_heap_alloc_bytes` | gauge | Number of heap bytes allocated and still in use. | | -| `go_memstats_heap_idle_bytes` | gauge | Number of heap bytes waiting to be used. | | -| `go_memstats_heap_inuse_bytes` | gauge | Number of heap bytes that are in use. | | -| `go_memstats_heap_objects` | gauge | Number of allocated objects. | | -| `go_memstats_heap_released_bytes` | gauge | Number of heap bytes released to OS. | | -| `go_memstats_heap_sys_bytes` | gauge | Number of heap bytes obtained from system. | | -| `go_memstats_last_gc_time_seconds` | gauge | Number of seconds since 1970 of last garbage collection. | | -| `go_memstats_lookups_total` | counter | Total number of pointer lookups. | | -| `go_memstats_mallocs_total` | counter | Total number of mallocs. | | -| `go_memstats_mcache_inuse_bytes` | gauge | Number of bytes in use by mcache structures. | | -| `go_memstats_mcache_sys_bytes` | gauge | Number of bytes used for mcache structures obtained from system. | | -| `go_memstats_mspan_inuse_bytes` | gauge | Number of bytes in use by mspan structures. | | -| `go_memstats_mspan_sys_bytes` | gauge | Number of bytes used for mspan structures obtained from system. | | -| `go_memstats_next_gc_bytes` | gauge | Number of heap bytes when next garbage collection will take place. | | -| `go_memstats_other_sys_bytes` | gauge | Number of bytes used for other system allocations. | | -| `go_memstats_stack_inuse_bytes` | gauge | Number of bytes in use by the stack allocator. | | -| `go_memstats_stack_sys_bytes` | gauge | Number of bytes obtained from system for stack allocator. | | -| `go_memstats_sys_bytes` | gauge | Number of bytes obtained from system. | | -| `go_threads` | gauge | Number of OS threads created. | | -| `process_cpu_seconds_total` | counter | Total user and system CPU time spent in seconds. | | -| `process_max_fds` | gauge | Maximum number of open file descriptors. | | -| `process_open_fds` | gauge | Number of open file descriptors. | | -| `process_resident_memory_bytes` | gauge | Resident memory size in bytes. | | -| `process_start_time_seconds` | gauge | Start time of the process since unix epoch in seconds. | | -| `process_virtual_memory_bytes` | gauge | Virtual memory size in bytes. | | -| `process_virtual_memory_max_bytes` | gauge | Maximum amount of virtual memory available in bytes. | | -| `promhttp_metric_handler_requests_in_flight` | gauge | Current number of scrapes being served. | | -| `promhttp_metric_handler_requests_total` | counter | Total number of scrapes by HTTP status code. | `code` | +| Name | Type | Description | Labels | +|---------------------------------------------------------------|-----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------| +| `agent_scripts_executed_total` | counter | Total number of scripts executed by the Coder agent. Includes cron scheduled scripts. | `agent_name` `success` `template_name` `username` `workspace_name` | +| `coder_aibridged_injected_tool_invocations_total` | counter | The number of times an injected MCP tool was invoked by aibridge. | `model` `name` `provider` `server` | +| `coder_aibridged_interceptions_duration_seconds` | histogram | The total duration of intercepted requests, in seconds. The majority of this time will be the upstream processing of the request. aibridge has no control over upstream processing time, so it's just an illustrative metric. | `model` `provider` | +| `coder_aibridged_interceptions_inflight` | gauge | The number of intercepted requests which are being processed. | `model` `provider` `route` | +| `coder_aibridged_interceptions_total` | counter | The count of intercepted requests. | `initiator_id` `method` `model` `provider` `route` `status` | +| `coder_aibridged_non_injected_tool_selections_total` | counter | The number of times an AI model selected a tool to be invoked by the client. | `model` `name` `provider` | +| `coder_aibridged_prompts_total` | counter | The number of prompts issued by users (initiators). | `initiator_id` `model` `provider` | +| `coder_aibridged_tokens_total` | counter | The number of tokens used by intercepted requests. | `initiator_id` `model` `provider` `type` | +| `coderd_agents_apps` | gauge | Agent applications with statuses. | `agent_name` `app_name` `health` `username` `workspace_name` | +| `coderd_agents_connection_latencies_seconds` | gauge | Agent connection latencies in seconds. | `agent_name` `derp_region` `preferred` `username` `workspace_name` | +| `coderd_agents_connections` | gauge | Agent connections with statuses. | `agent_name` `lifecycle_state` `status` `tailnet_node` `username` `workspace_name` | +| `coderd_agents_up` | gauge | The number of active agents per workspace. | `template_name` `username` `workspace_name` | +| `coderd_agentstats_connection_count` | gauge | The number of established connections by agent | `agent_name` `username` `workspace_name` | +| `coderd_agentstats_connection_median_latency_seconds` | gauge | The median agent connection latency | `agent_name` `username` `workspace_name` | +| `coderd_agentstats_currently_reachable_peers` | gauge | The number of peers (e.g. clients) that are currently reachable over the encrypted network. | `agent_name` `connection_type` `template_name` `username` `workspace_name` | +| `coderd_agentstats_rx_bytes` | gauge | Agent Rx bytes | `agent_name` `username` `workspace_name` | +| `coderd_agentstats_session_count_jetbrains` | gauge | The number of session established by JetBrains | `agent_name` `username` `workspace_name` | +| `coderd_agentstats_session_count_reconnecting_pty` | gauge | The number of session established by reconnecting PTY | `agent_name` `username` `workspace_name` | +| `coderd_agentstats_session_count_ssh` | gauge | The number of session established by SSH | `agent_name` `username` `workspace_name` | +| `coderd_agentstats_session_count_vscode` | gauge | The number of session established by VSCode | `agent_name` `username` `workspace_name` | +| `coderd_agentstats_startup_script_seconds` | gauge | The number of seconds the startup script took to execute. | `agent_name` `success` `template_name` `username` `workspace_name` | +| `coderd_agentstats_tx_bytes` | gauge | Agent Tx bytes | `agent_name` `username` `workspace_name` | +| `coderd_api_active_users_duration_hour` | gauge | The number of users that have been active within the last hour. | | +| `coderd_api_concurrent_requests` | gauge | The number of concurrent API requests. | | +| `coderd_api_concurrent_websockets` | gauge | The total number of concurrent API websockets. | | +| `coderd_api_request_latencies_seconds` | histogram | Latency distribution of requests in seconds. | `method` `path` | +| `coderd_api_requests_processed_total` | counter | The total number of processed API requests | `code` `method` `path` | +| `coderd_api_websocket_durations_seconds` | histogram | Websocket duration distribution of requests in seconds. | `path` | +| `coderd_api_workspace_latest_build` | gauge | The latest workspace builds with a status. | `status` | +| `coderd_api_workspace_latest_build_total` | gauge | DEPRECATED: use coderd_api_workspace_latest_build instead | `status` | +| `coderd_insights_applications_usage_seconds` | gauge | The application usage per template. | `application_name` `slug` `template_name` | +| `coderd_insights_parameters` | gauge | The parameter usage per template. | `parameter_name` `parameter_type` `parameter_value` `template_name` | +| `coderd_insights_templates_active_users` | gauge | The number of active users of the template. | `template_name` | +| `coderd_license_active_users` | gauge | The number of active users. | | +| `coderd_license_limit_users` | gauge | The user seats limit based on the active Coder license. | | +| `coderd_license_user_limit_enabled` | gauge | Returns 1 if the current license enforces the user limit. | | +| `coderd_metrics_collector_agents_execution_seconds` | histogram | Histogram for duration of agents metrics collection in seconds. | | +| `coderd_oauth2_external_requests_rate_limit` | gauge | The total number of allowed requests per interval. | `name` `resource` | +| `coderd_oauth2_external_requests_rate_limit_next_reset_unix` | gauge | Unix timestamp of the next interval | `name` `resource` | +| `coderd_oauth2_external_requests_rate_limit_remaining` | gauge | The remaining number of allowed requests in this interval. | `name` `resource` | +| `coderd_oauth2_external_requests_rate_limit_reset_in_seconds` | gauge | Seconds until the next interval | `name` `resource` | +| `coderd_oauth2_external_requests_rate_limit_total` | gauge | DEPRECATED: use coderd_oauth2_external_requests_rate_limit instead | `name` `resource` | +| `coderd_oauth2_external_requests_rate_limit_used` | gauge | The number of requests made in this interval. | `name` `resource` | +| `coderd_oauth2_external_requests_total` | counter | The total number of api calls made to external oauth2 providers. 'status_code' will be 0 if the request failed with no response. | `name` `source` `status_code` | +| `coderd_prebuilt_workspace_claim_duration_seconds` | histogram | Time to claim a prebuilt workspace by organization, template, and preset. | `organization_name` `preset_name` `template_name` | +| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` | +| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` | +| `coderd_provisionerd_num_daemons` | gauge | The number of provisioner daemons. | | +| `coderd_provisionerd_workspace_build_timings_seconds` | histogram | The time taken for a workspace to build. | `status` `template_name` `template_version` `workspace_transition` | +| `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` | +| `coderd_workspace_creation_duration_seconds` | histogram | Time to create a workspace by organization, template, preset, and type (regular or prebuild). | `organization_name` `preset_name` `template_name` `type` | +| `coderd_workspace_creation_total` | counter | Total regular (non-prebuilt) workspace creations by organization, template, and preset. | `organization_name` `preset_name` `template_name` | +| `coderd_workspace_latest_build_status` | gauge | The current workspace statuses by template, transition, and owner. | `status` `template_name` `template_version` `workspace_owner` `workspace_transition` | +| `go_gc_duration_seconds` | summary | A summary of the pause duration of garbage collection cycles. | | +| `go_goroutines` | gauge | Number of goroutines that currently exist. | | +| `go_info` | gauge | Information about the Go environment. | `version` | +| `go_memstats_alloc_bytes` | gauge | Number of bytes allocated and still in use. | | +| `go_memstats_alloc_bytes_total` | counter | Total number of bytes allocated, even if freed. | | +| `go_memstats_buck_hash_sys_bytes` | gauge | Number of bytes used by the profiling bucket hash table. | | +| `go_memstats_frees_total` | counter | Total number of frees. | | +| `go_memstats_gc_sys_bytes` | gauge | Number of bytes used for garbage collection system metadata. | | +| `go_memstats_heap_alloc_bytes` | gauge | Number of heap bytes allocated and still in use. | | +| `go_memstats_heap_idle_bytes` | gauge | Number of heap bytes waiting to be used. | | +| `go_memstats_heap_inuse_bytes` | gauge | Number of heap bytes that are in use. | | +| `go_memstats_heap_objects` | gauge | Number of allocated objects. | | +| `go_memstats_heap_released_bytes` | gauge | Number of heap bytes released to OS. | | +| `go_memstats_heap_sys_bytes` | gauge | Number of heap bytes obtained from system. | | +| `go_memstats_last_gc_time_seconds` | gauge | Number of seconds since 1970 of last garbage collection. | | +| `go_memstats_lookups_total` | counter | Total number of pointer lookups. | | +| `go_memstats_mallocs_total` | counter | Total number of mallocs. | | +| `go_memstats_mcache_inuse_bytes` | gauge | Number of bytes in use by mcache structures. | | +| `go_memstats_mcache_sys_bytes` | gauge | Number of bytes used for mcache structures obtained from system. | | +| `go_memstats_mspan_inuse_bytes` | gauge | Number of bytes in use by mspan structures. | | +| `go_memstats_mspan_sys_bytes` | gauge | Number of bytes used for mspan structures obtained from system. | | +| `go_memstats_next_gc_bytes` | gauge | Number of heap bytes when next garbage collection will take place. | | +| `go_memstats_other_sys_bytes` | gauge | Number of bytes used for other system allocations. | | +| `go_memstats_stack_inuse_bytes` | gauge | Number of bytes in use by the stack allocator. | | +| `go_memstats_stack_sys_bytes` | gauge | Number of bytes obtained from system for stack allocator. | | +| `go_memstats_sys_bytes` | gauge | Number of bytes obtained from system. | | +| `go_threads` | gauge | Number of OS threads created. | | +| `process_cpu_seconds_total` | counter | Total user and system CPU time spent in seconds. | | +| `process_max_fds` | gauge | Maximum number of open file descriptors. | | +| `process_open_fds` | gauge | Number of open file descriptors. | | +| `process_resident_memory_bytes` | gauge | Resident memory size in bytes. | | +| `process_start_time_seconds` | gauge | Start time of the process since unix epoch in seconds. | | +| `process_virtual_memory_bytes` | gauge | Virtual memory size in bytes. | | +| `process_virtual_memory_max_bytes` | gauge | Maximum amount of virtual memory available in bytes. | | +| `promhttp_metric_handler_requests_in_flight` | gauge | Current number of scrapes being served. | | +| `promhttp_metric_handler_requests_total` | counter | Total number of scrapes by HTTP status code. | `code` | diff --git a/enterprise/aibridged/aibridged.go b/enterprise/aibridged/aibridged.go index fcec1629b8701..edbd353ccf1d3 100644 --- a/enterprise/aibridged/aibridged.go +++ b/enterprise/aibridged/aibridged.go @@ -55,13 +55,14 @@ func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger) ctx, cancel := context.WithCancel(ctx) daemon := &Server{ - logger: logger, - clientDialer: rpcDialer, + logger: logger, + clientDialer: rpcDialer, + clientCh: make(chan DRPCClient), + lifecycleCtx: ctx, + cancelFn: cancel, + initConnectionCh: make(chan struct{}), + requestBridgePool: pool, - clientCh: make(chan DRPCClient), - lifecycleCtx: ctx, - cancelFn: cancel, - initConnectionCh: make(chan struct{}), } daemon.wg.Add(1) diff --git a/enterprise/aibridged/aibridged_integration_test.go b/enterprise/aibridged/aibridged_integration_test.go index 106bea2d7ea7d..655992cbc0411 100644 --- a/enterprise/aibridged/aibridged_integration_test.go +++ b/enterprise/aibridged/aibridged_integration_test.go @@ -9,6 +9,8 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" + promtest "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" "github.com/coder/aibridge" @@ -166,7 +168,7 @@ func TestIntegration(t *testing.T) { logger := testutil.Logger(t) providers := []aibridge.Provider{aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{BaseURL: mockOpenAI.URL})} - pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, logger) + pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, nil, logger) require.NoError(t, err) // Given: aibridged is started. @@ -253,3 +255,109 @@ func TestIntegration(t *testing.T) { // Then: the MCP server was initialized. require.Contains(t, mcpTokenReceived, authLink.OAuthAccessToken, "mock MCP server not requested") } + +// TestIntegrationWithMetrics validates that Prometheus metrics are correctly incremented +// when requests are processed through aibridged. +func TestIntegrationWithMetrics(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t, testutil.WaitLong) + + // Create prometheus registry and metrics. + registry := prometheus.NewRegistry() + metrics := aibridge.NewMetrics(registry) + + // Set up mock OpenAI server. + mockOpenAI := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{ + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1753343279, + "model": "gpt-4.1", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "test response" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15 + } +}`)) + })) + t.Cleanup(mockOpenAI.Close) + + // Database and coderd setup. + db, ps := dbtestutil.NewDB(t) + client, _, api, firstUser := coderdenttest.NewWithAPI(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + Database: db, + Pubsub: ps, + }, + }) + + userClient, _ := coderdtest.CreateAnotherUser(t, client, firstUser.OrganizationID) + + // Create an API token for the user. + apiKey, err := userClient.CreateToken(ctx, "me", codersdk.CreateTokenRequest{ + TokenName: fmt.Sprintf("test-key-%d", time.Now().UnixNano()), + Lifetime: time.Hour, + Scope: codersdk.APIKeyScopeCoderAll, + }) + require.NoError(t, err) + + // Create aibridge client. + aiBridgeClient, err := api.CreateInMemoryAIBridgeServer(ctx) + require.NoError(t, err) + + logger := testutil.Logger(t) + providers := []aibridge.Provider{aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{BaseURL: mockOpenAI.URL})} + + // Create pool with metrics. + pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, metrics, logger) + require.NoError(t, err) + + // Given: aibridged is started. + srv, err := aibridged.New(ctx, pool, func(ctx context.Context) (aibridged.DRPCClient, error) { + return aiBridgeClient, nil + }, logger) + require.NoError(t, err, "create new aibridged") + t.Cleanup(func() { + _ = srv.Shutdown(ctx) + }) + + // When: a request is made to aibridged. + req, err := http.NewRequestWithContext(ctx, http.MethodPost, "/openai/v1/chat/completions", bytes.NewBufferString(`{ + "messages": [ + { + "role": "user", + "content": "test message" + } + ], + "model": "gpt-4.1" +}`)) + require.NoError(t, err, "make request to test server") + req.Header.Add("Authorization", "Bearer "+apiKey.Key) + req.Header.Add("Accept", "application/json") + + // When: aibridged handles the request. + rec := httptest.NewRecorder() + srv.ServeHTTP(rec, req) + require.Equal(t, http.StatusOK, rec.Code) + + // Then: the interceptions metric should increase to 1. + // This is not exhaustively checking the available metrics; just an indicative one to prove + // the plumbing is working. + require.Eventually(t, func() bool { + count := promtest.ToFloat64(metrics.InterceptionCount) + return count == 1 + }, testutil.WaitShort, testutil.IntervalFast, "interceptions_total metric should be 1") +} diff --git a/enterprise/aibridged/aibridged_test.go b/enterprise/aibridged/aibridged_test.go index 5d38b7f54d18c..a66f3157a0d42 100644 --- a/enterprise/aibridged/aibridged_test.go +++ b/enterprise/aibridged/aibridged_test.go @@ -41,8 +41,7 @@ func newTestServer(t *testing.T) (*aibridged.Server, *mock.MockDRPCClient, *mock pool, func(ctx context.Context) (aibridged.DRPCClient, error) { return client, nil - }, - logger) + }, logger) require.NoError(t, err, "create new aibridged") t.Cleanup(func() { srv.Shutdown(context.Background()) @@ -291,7 +290,7 @@ func TestRouting(t *testing.T) { aibridge.NewOpenAIProvider(aibridge.OpenAIConfig{BaseURL: openaiSrv.URL}), aibridge.NewAnthropicProvider(aibridge.AnthropicConfig{BaseURL: antSrv.URL}, nil), } - pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, logger) + pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, nil, logger) require.NoError(t, err) conn := &mockDRPCConn{} client.EXPECT().DRPCConn().AnyTimes().Return(conn) diff --git a/enterprise/aibridged/pool.go b/enterprise/aibridged/pool.go index 3dd27843c4d79..d043f533cdaf0 100644 --- a/enterprise/aibridged/pool.go +++ b/enterprise/aibridged/pool.go @@ -51,11 +51,13 @@ type CachedBridgePool struct { singleflight *singleflight.Group[string, *aibridge.RequestBridge] + metrics *aibridge.Metrics + shutDownOnce sync.Once shuttingDownCh chan struct{} } -func NewCachedBridgePool(options PoolOptions, providers []aibridge.Provider, logger slog.Logger) (*CachedBridgePool, error) { +func NewCachedBridgePool(options PoolOptions, providers []aibridge.Provider, metrics *aibridge.Metrics, logger slog.Logger) (*CachedBridgePool, error) { cache, err := ristretto.NewCache(&ristretto.Config[string, *aibridge.RequestBridge]{ NumCounters: options.MaxItems * 10, // Docs suggest setting this 10x number of keys. MaxCost: options.MaxItems * cacheCost, // Up to n instances. @@ -88,6 +90,8 @@ func NewCachedBridgePool(options PoolOptions, providers []aibridge.Provider, log singleflight: &singleflight.Group[string, *aibridge.RequestBridge]{}, + metrics: metrics, + shuttingDownCh: make(chan struct{}), }, nil } @@ -154,7 +158,7 @@ func (p *CachedBridgePool) Acquire(ctx context.Context, req Request, clientFn Cl } } - bridge, err := aibridge.NewRequestBridge(ctx, p.providers, p.logger, recorder, mcpServers) + bridge, err := aibridge.NewRequestBridge(ctx, p.providers, recorder, mcpServers, p.metrics, p.logger) if err != nil { return nil, xerrors.Errorf("create new request bridge: %w", err) } @@ -167,7 +171,7 @@ func (p *CachedBridgePool) Acquire(ctx context.Context, req Request, clientFn Cl return instance, err } -func (p *CachedBridgePool) Metrics() PoolMetrics { +func (p *CachedBridgePool) CacheMetrics() PoolMetrics { if p.cache == nil { return nil } diff --git a/enterprise/aibridged/pool_test.go b/enterprise/aibridged/pool_test.go index d15e8a7fa1820..d1b07d293d5c3 100644 --- a/enterprise/aibridged/pool_test.go +++ b/enterprise/aibridged/pool_test.go @@ -30,7 +30,7 @@ func TestPool(t *testing.T) { mcpProxy := mcpmock.NewMockServerProxier(ctrl) opts := aibridged.PoolOptions{MaxItems: 1, TTL: time.Second} - pool, err := aibridged.NewCachedBridgePool(opts, nil, logger) + pool, err := aibridged.NewCachedBridgePool(opts, nil, nil, logger) require.NoError(t, err) t.Cleanup(func() { pool.Shutdown(context.Background()) }) @@ -63,11 +63,11 @@ func TestPool(t *testing.T) { require.NoError(t, err, "acquire pool instance") require.Same(t, inst, instB) - metrics := pool.Metrics() - require.EqualValues(t, 1, metrics.KeysAdded()) - require.EqualValues(t, 0, metrics.KeysEvicted()) - require.EqualValues(t, 1, metrics.Hits()) - require.EqualValues(t, 1, metrics.Misses()) + cacheMetrics := pool.CacheMetrics() + require.EqualValues(t, 1, cacheMetrics.KeysAdded()) + require.EqualValues(t, 0, cacheMetrics.KeysEvicted()) + require.EqualValues(t, 1, cacheMetrics.Hits()) + require.EqualValues(t, 1, cacheMetrics.Misses()) // This will get called again because a new instance will be created. mcpProxy.EXPECT().Init(gomock.Any()).Times(1).Return(nil) @@ -81,11 +81,11 @@ func TestPool(t *testing.T) { require.NoError(t, err, "acquire pool instance") require.NotSame(t, inst, inst2) - metrics = pool.Metrics() - require.EqualValues(t, 2, metrics.KeysAdded()) - require.EqualValues(t, 1, metrics.KeysEvicted()) - require.EqualValues(t, 1, metrics.Hits()) - require.EqualValues(t, 2, metrics.Misses()) + cacheMetrics = pool.CacheMetrics() + require.EqualValues(t, 2, cacheMetrics.KeysAdded()) + require.EqualValues(t, 1, cacheMetrics.KeysEvicted()) + require.EqualValues(t, 1, cacheMetrics.Hits()) + require.EqualValues(t, 2, cacheMetrics.Misses()) // This will get called again because a new instance will be created. mcpProxy.EXPECT().Init(gomock.Any()).Times(1).Return(nil) @@ -99,11 +99,11 @@ func TestPool(t *testing.T) { require.NoError(t, err, "acquire pool instance 2B") require.NotSame(t, inst2, inst2B) - metrics = pool.Metrics() - require.EqualValues(t, 3, metrics.KeysAdded()) - require.EqualValues(t, 2, metrics.KeysEvicted()) - require.EqualValues(t, 1, metrics.Hits()) - require.EqualValues(t, 3, metrics.Misses()) + cacheMetrics = pool.CacheMetrics() + require.EqualValues(t, 3, cacheMetrics.KeysAdded()) + require.EqualValues(t, 2, cacheMetrics.KeysEvicted()) + require.EqualValues(t, 1, cacheMetrics.Hits()) + require.EqualValues(t, 3, cacheMetrics.Misses()) // TODO: add test for expiry. // This requires Go 1.25's [synctest](https://pkg.go.dev/testing/synctest) since the diff --git a/enterprise/aibridged/translator.go b/enterprise/aibridged/translator.go index f9aca239d5ab0..73ae5e2ac706c 100644 --- a/enterprise/aibridged/translator.go +++ b/enterprise/aibridged/translator.go @@ -57,6 +57,16 @@ func (t *recorderTranslation) RecordPromptUsage(ctx context.Context, req *aibrid } func (t *recorderTranslation) RecordTokenUsage(ctx context.Context, req *aibridge.TokenUsageRecord) error { + merged := req.Metadata + if merged == nil { + merged = aibridge.Metadata{} + } + + // Merge the token usage values into metadata; later we might want to store some of these in their own fields. + for k, v := range req.ExtraTokenTypes { + merged[k] = v + } + _, err := t.client.RecordTokenUsage(ctx, &proto.RecordTokenUsageRequest{ InterceptionId: req.InterceptionID, MsgId: req.MsgID, diff --git a/enterprise/cli/aibridged.go b/enterprise/cli/aibridged.go index b2dc3d7725b93..e955c4fcbe73b 100644 --- a/enterprise/cli/aibridged.go +++ b/enterprise/cli/aibridged.go @@ -7,6 +7,8 @@ import ( "golang.org/x/xerrors" + "github.com/prometheus/client_golang/prometheus" + "github.com/coder/aibridge" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/enterprise/aibridged" @@ -31,8 +33,11 @@ func newAIBridgeDaemon(coderAPI *coderd.API) (*aibridged.Server, error) { }, getBedrockConfig(coderAPI.DeploymentValues.AI.BridgeConfig.Bedrock)), } + reg := prometheus.WrapRegistererWithPrefix("coder_aibridged_", coderAPI.PrometheusRegistry) + metrics := aibridge.NewMetrics(reg) + // Create pool for reusable stateful [aibridge.RequestBridge] instances (one per user). - pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, logger.Named("pool")) // TODO: configurable. + pool, err := aibridged.NewCachedBridgePool(aibridged.DefaultPoolOptions, providers, metrics, logger.Named("pool")) // TODO: configurable size. if err != nil { return nil, xerrors.Errorf("create request pool: %w", err) } diff --git a/go.mod b/go.mod index a11e69ee6aa0f..6803bdc458bce 100644 --- a/go.mod +++ b/go.mod @@ -165,9 +165,9 @@ require ( github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e github.com/pkg/sftp v1.13.7 github.com/prometheus-community/pro-bing v0.7.0 - github.com/prometheus/client_golang v1.23.0 + github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_model v0.6.2 - github.com/prometheus/common v0.65.0 + github.com/prometheus/common v0.66.1 github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/robfig/cron/v3 v3.0.1 github.com/shirou/gopsutil/v4 v4.25.5 @@ -476,7 +476,7 @@ require ( github.com/anthropics/anthropic-sdk-go v1.18.0 github.com/brianvoe/gofakeit/v7 v7.9.0 github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225 - github.com/coder/aibridge v0.1.7 + github.com/coder/aibridge v0.2.0 github.com/coder/aisdk-go v0.0.9 github.com/coder/boundary v1.0.1-0.20250925154134-55a44f2a7945 github.com/coder/preview v1.0.4 diff --git a/go.sum b/go.sum index e3cd268544f24..9d313c9374b19 100644 --- a/go.sum +++ b/go.sum @@ -919,8 +919,8 @@ github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225 h1:tRIViZ5JRmzdOEo5wUWngaGEFBG8OaE1o2GIHN5ujJ8= github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225/go.mod h1:rNLVpYgEVeu1Zk29K64z6Od8RBP9DwqCu9OfCzh8MR4= -github.com/coder/aibridge v0.1.7 h1:GTAM8nHawXMeb/pxAIwvzr76dyVGu9hw9qV6Gvpc7nw= -github.com/coder/aibridge v0.1.7/go.mod h1:7GhrLbzf6uM3sCA7OPaDzvq9QNrCjNuzMy+WgipYwfQ= +github.com/coder/aibridge v0.2.0 h1:kAWhHD6fsmDLH1WxIwXPu9Ineijj+lVniko45C003Vo= +github.com/coder/aibridge v0.2.0/go.mod h1:2T0RSnIX1WTqFajzXsaNsoNe6mmNsNeCTxiHBWEsFnE= github.com/coder/aisdk-go v0.0.9 h1:Vzo/k2qwVGLTR10ESDeP2Ecek1SdPfZlEjtTfMveiVo= github.com/coder/aisdk-go v0.0.9/go.mod h1:KF6/Vkono0FJJOtWtveh5j7yfNrSctVTpwgweYWSp5M= github.com/coder/boundary v1.0.1-0.20250925154134-55a44f2a7945 h1:hDUf02kTX8EGR3+5B+v5KdYvORs4YNfDPci0zCs+pC0= @@ -1718,15 +1718,15 @@ github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus-community/pro-bing v0.7.0 h1:KFYFbxC2f2Fp6c+TyxbCOEarf7rbnzr9Gw8eIb0RfZA= github.com/prometheus-community/pro-bing v0.7.0/go.mod h1:Moob9dvlY50Bfq6i88xIwfyw7xLFHH69LUgx9n5zqCE= -github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= -github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= -github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/puzpuzpuz/xsync/v3 v3.5.1 h1:GJYJZwO6IdxN/IKbneznS6yPkVC+c3zyY/j19c++5Fg= diff --git a/scripts/metricsdocgen/main.go b/scripts/metricsdocgen/main.go index ea7e8f79663c1..efdf55b29c809 100644 --- a/scripts/metricsdocgen/main.go +++ b/scripts/metricsdocgen/main.go @@ -64,7 +64,7 @@ func readMetrics() ([]*dto.MetricFamily, error) { var metrics []*dto.MetricFamily - decoder := expfmt.NewDecoder(f, expfmt.NewFormat(expfmt.TypeProtoText)) + decoder := expfmt.NewDecoder(f, expfmt.NewFormat(expfmt.TypeTextPlain)) for { var m dto.MetricFamily err = decoder.Decode(&m) diff --git a/scripts/metricsdocgen/metrics b/scripts/metricsdocgen/metrics index ba9a991fc2a06..e1942fbda7edd 100644 --- a/scripts/metricsdocgen/metrics +++ b/scripts/metricsdocgen/metrics @@ -878,3 +878,40 @@ promhttp_metric_handler_requests_in_flight 1 promhttp_metric_handler_requests_total{code="200"} 2 promhttp_metric_handler_requests_total{code="500"} 0 promhttp_metric_handler_requests_total{code="503"} 0 +# HELP coder_aibridged_injected_tool_invocations_total The number of times an injected MCP tool was invoked by aibridge. +# TYPE coder_aibridged_injected_tool_invocations_total counter +coder_aibridged_injected_tool_invocations_total{model="gpt-5-nano",name="coder_list_templates",provider="openai",server="https://xxx.pit-1.try.coder.app/api/experimental/mcp/http"} 1 +# HELP coder_aibridged_interceptions_duration_seconds The total duration of intercepted requests, in seconds. The majority of this time will be the upstream processing of the request. aibridge has no control over upstream processing time, so it's just an illustrative metric. +# TYPE coder_aibridged_interceptions_duration_seconds histogram +coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="0.5"} 0 +coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="2"} 0 +coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="5"} 3 +coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="15"} 6 +coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="30"} 6 +coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="60"} 6 +coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="120"} 6 +coder_aibridged_interceptions_duration_seconds_bucket{model="gpt-5-nano",provider="openai",le="+Inf"} 6 +coder_aibridged_interceptions_duration_seconds_sum{model="gpt-5-nano",provider="openai"} 34.120188692 +coder_aibridged_interceptions_duration_seconds_count{model="gpt-5-nano",provider="openai"} 6 +# HELP coder_aibridged_interceptions_inflight The number of intercepted requests which are being processed. +# TYPE coder_aibridged_interceptions_inflight gauge +coder_aibridged_interceptions_inflight{model="gpt-5-nano",provider="openai",route="/v1/chat/completions"} 0 +# HELP coder_aibridged_interceptions_total The count of intercepted requests. +# TYPE coder_aibridged_interceptions_total counter +coder_aibridged_interceptions_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",method="POST",model="gpt-5-nano",provider="openai",route="/v1/chat/completions",status="completed"} 6 +# HELP coder_aibridged_non_injected_tool_selections_total The number of times an AI model selected a tool to be invoked by the client. +# TYPE coder_aibridged_non_injected_tool_selections_total counter +coder_aibridged_non_injected_tool_selections_total{model="gpt-5-nano",name="read_file",provider="openai"} 2 +# HELP coder_aibridged_prompts_total The number of prompts issued by users (initiators). +# TYPE coder_aibridged_prompts_total counter +coder_aibridged_prompts_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai"} 4 +# HELP coder_aibridged_tokens_total The number of tokens used by intercepted requests. +# TYPE coder_aibridged_tokens_total counter +coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="completion_accepted_prediction"} 0 +coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="completion_audio"} 0 +coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="completion_reasoning"} 1664 +coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="completion_rejected_prediction"} 0 +coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="input"} 13823 +coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="output"} 2014 +coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="prompt_audio"} 0 +coder_aibridged_tokens_total{initiator_id="95f6752b-08cc-4cf1-97f7-c2165e3519c5",model="gpt-5-nano",provider="openai",type="prompt_cached"} 31872