Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions gateway/src/routes/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ pub fn build_internal_non_otel_enabled_routes() -> Router<AppStateData> {
"/internal/evaluations/run-stats",
get(endpoints::internal::evaluations::get_evaluation_run_stats_handler),
)
.route(
"/internal/evaluations/runs",
get(endpoints::internal::evaluations::list_evaluation_runs_handler),
)
.route(
"/internal/models/usage",
get(endpoints::internal::models::get_model_usage_handler),
Expand Down
13 changes: 13 additions & 0 deletions internal/tensorzero-node/lib/bindings/EvaluationRunInfo.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.

/**
* Information about a single evaluation run.
*/
export type EvaluationRunInfo = {
evaluation_run_id: string;
evaluation_name: string;
dataset_name: string;
function_name: string;
variant_name: string;
last_inference_timestamp: string;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
import type { EvaluationRunInfo } from "./EvaluationRunInfo";

/**
* Response containing a list of evaluation runs.
*/
export type ListEvaluationRunsResponse = { runs: Array<EvaluationRunInfo> };
2 changes: 2 additions & 0 deletions internal/tensorzero-node/lib/bindings/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ export * from "./EvaluationRunCompleteEvent";
export * from "./EvaluationRunErrorEvent";
export * from "./EvaluationRunEvent";
export * from "./EvaluationRunFatalErrorEvent";
export * from "./EvaluationRunInfo";
export * from "./EvaluationRunStartEvent";
export * from "./EvaluationRunStatsResponse";
export * from "./EvaluationRunSuccessEvent";
Expand Down Expand Up @@ -154,6 +155,7 @@ export * from "./LaunchOptimizationParams";
export * from "./LaunchOptimizationWorkflowParams";
export * from "./ListDatapointsRequest";
export * from "./ListDatasetsResponse";
export * from "./ListEvaluationRunsResponse";
export * from "./ListInferenceMetadataResponse";
export * from "./ListInferencesRequest";
export * from "./MetricConfig";
Expand Down
222 changes: 221 additions & 1 deletion tensorzero-core/src/db/clickhouse/evaluation_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ use std::collections::HashMap;
use async_trait::async_trait;

use super::ClickHouseConnectionInfo;
use super::select_queries::parse_count;
use super::select_queries::{parse_count, parse_json_rows};
use crate::db::evaluation_queries::EvaluationQueries;
use crate::db::evaluation_queries::EvaluationRunInfoRow;
use crate::error::Error;

#[async_trait]
Expand All @@ -21,4 +22,223 @@ impl EvaluationQueries for ClickHouseConnectionInfo {
let response = self.run_query_synchronous(query, &HashMap::new()).await?;
parse_count(&response.response)
}

async fn list_evaluation_runs(
&self,
limit: u32,
offset: u32,
) -> Result<Vec<EvaluationRunInfoRow>, Error> {
let query = r"
SELECT
evaluation_run_id,
any(evaluation_name) AS evaluation_name,
any(inference_function_name) AS function_name,
any(variant_name) AS variant_name,
any(dataset_name) AS dataset_name,
formatDateTime(UUIDv7ToDateTime(uint_to_uuid(max(max_inference_id))), '%Y-%m-%dT%H:%i:%SZ') AS last_inference_timestamp
FROM (
SELECT
maxIf(value, key = 'tensorzero::evaluation_run_id') AS evaluation_run_id,
maxIf(value, key = 'tensorzero::evaluation_name') AS evaluation_name,
maxIf(value, key = 'tensorzero::dataset_name') AS dataset_name,
any(function_name) AS inference_function_name,
any(variant_name) AS variant_name,
max(toUInt128(inference_id)) AS max_inference_id
FROM TagInference FINAL
WHERE key IN ('tensorzero::evaluation_run_id', 'tensorzero::evaluation_name', 'tensorzero::dataset_name')
GROUP BY inference_id
)
WHERE NOT startsWith(inference_function_name, 'tensorzero::')
GROUP BY evaluation_run_id
ORDER BY toUInt128(toUUID(evaluation_run_id)) DESC
LIMIT {limit:UInt32}
OFFSET {offset:UInt32}
FORMAT JSONEachRow
"
.to_string();

let limit_str = limit.to_string();
let offset_str = offset.to_string();
let mut params = HashMap::new();
params.insert("limit", limit_str.as_str());
params.insert("offset", offset_str.as_str());

let response = self.run_query_synchronous(query, &params).await?;

parse_json_rows(response.response.as_str())
}
}

#[cfg(test)]
mod tests {
use std::sync::Arc;

use crate::db::{
clickhouse::{
ClickHouseConnectionInfo, ClickHouseResponse, ClickHouseResponseMetadata,
clickhouse_client::MockClickHouseClient,
query_builder::test_util::assert_query_contains,
},
evaluation_queries::EvaluationQueries,
};

#[tokio::test]
async fn test_count_total_evaluation_runs() {
let mut mock_clickhouse_client = MockClickHouseClient::new();

mock_clickhouse_client
.expect_run_query_synchronous()
.withf(|query, params| {
assert_query_contains(
query,
"SELECT toUInt32(uniqExact(value)) as count
FROM TagInference
WHERE key = 'tensorzero::evaluation_run_id'
FORMAT JSONEachRow",
);
assert_eq!(params.len(), 0, "Should have no parameters");
true
})
.returning(|_, _| {
Ok(ClickHouseResponse {
response: r#"{"count":42}"#.to_string(),
metadata: ClickHouseResponseMetadata {
read_rows: 1,
written_rows: 0,
},
})
});

let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));

let result = conn.count_total_evaluation_runs().await.unwrap();

assert_eq!(result, 42, "Should return count of 42");
}

#[tokio::test]
async fn test_list_evaluation_runs_with_defaults() {
let mut mock_clickhouse_client = MockClickHouseClient::new();

mock_clickhouse_client
.expect_run_query_synchronous()
.withf(|query, params| {
// Verify the query contains the expected structure
assert_query_contains(query, "SELECT");
assert_query_contains(query, "evaluation_run_id");
assert_query_contains(query, "FROM TagInference FINAL");
assert_query_contains(query, "LIMIT {limit:UInt32}");
assert_query_contains(query, "OFFSET {offset:UInt32}");

// Verify parameters
assert_eq!(params.get("limit"), Some(&"100"));
assert_eq!(params.get("offset"), Some(&"0"));
true
})
.returning(|_, _| {
Ok(ClickHouseResponse {
response: r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","evaluation_name":"test_eval","function_name":"test_func","variant_name":"test_variant","dataset_name":"test_dataset","last_inference_timestamp":"2025-05-20T16:52:58Z"}"#.to_string(),
metadata: ClickHouseResponseMetadata {
read_rows: 1,
written_rows: 0,
},
})
});

let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));

let result = conn.list_evaluation_runs(100, 0).await.unwrap();

assert_eq!(result.len(), 1, "Should return one evaluation run");
assert_eq!(result[0].evaluation_name, "test_eval");
assert_eq!(result[0].function_name, "test_func");
assert_eq!(result[0].variant_name, "test_variant");
assert_eq!(result[0].dataset_name, "test_dataset");
}

#[tokio::test]
async fn test_list_evaluation_runs_with_custom_pagination() {
let mut mock_clickhouse_client = MockClickHouseClient::new();

mock_clickhouse_client
.expect_run_query_synchronous()
.withf(|_query, params| {
// Verify custom pagination parameters
assert_eq!(params.get("limit"), Some(&"50"));
assert_eq!(params.get("offset"), Some(&"100"));
true
})
.returning(|_, _| {
Ok(ClickHouseResponse {
response: String::new(),
metadata: ClickHouseResponseMetadata {
read_rows: 0,
written_rows: 0,
},
})
});

let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));

let result = conn.list_evaluation_runs(50, 100).await.unwrap();

assert_eq!(result.len(), 0, "Should return empty results");
}

#[tokio::test]
async fn test_list_evaluation_runs_multiple_results() {
let mut mock_clickhouse_client = MockClickHouseClient::new();

mock_clickhouse_client
.expect_run_query_synchronous()
.returning(|_, _| {
Ok(ClickHouseResponse {
response: r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","evaluation_name":"eval1","function_name":"func1","variant_name":"variant1","dataset_name":"dataset1","last_inference_timestamp":"2025-05-20T16:52:58Z"}
{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95e","evaluation_name":"eval2","function_name":"func2","variant_name":"variant2","dataset_name":"dataset2","last_inference_timestamp":"2025-05-20T17:52:58Z"}
{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95f","evaluation_name":"eval3","function_name":"func3","variant_name":"variant3","dataset_name":"dataset3","last_inference_timestamp":"2025-05-20T18:52:58Z"}"#.to_string(),
metadata: ClickHouseResponseMetadata {
read_rows: 3,
written_rows: 0,
},
})
});

let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));

let result = conn.list_evaluation_runs(100, 0).await.unwrap();

assert_eq!(result.len(), 3, "Should return three evaluation runs");
assert_eq!(result[0].evaluation_name, "eval1");
assert_eq!(result[1].evaluation_name, "eval2");
assert_eq!(result[2].evaluation_name, "eval3");
}

#[tokio::test]
async fn test_list_evaluation_runs_filters_out_tensorzero_functions() {
let mut mock_clickhouse_client = MockClickHouseClient::new();

mock_clickhouse_client
.expect_run_query_synchronous()
.withf(|query, _params| {
// Verify the query filters out tensorzero:: functions
assert_query_contains(
query,
"NOT startsWith(inference_function_name, 'tensorzero::')",
);
true
})
.returning(|_, _| {
Ok(ClickHouseResponse {
response: String::new(),
metadata: ClickHouseResponseMetadata {
read_rows: 0,
written_rows: 0,
},
})
});

let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));

let _result = conn.list_evaluation_runs(100, 0).await.unwrap();
}
}
25 changes: 25 additions & 0 deletions tensorzero-core/src/db/evaluation_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,36 @@

use async_trait::async_trait;

use chrono::{DateTime, Utc};
#[cfg(test)]
use mockall::automock;
use serde::Deserialize;
use uuid::Uuid;

use crate::error::Error;

/// Database struct for deserializing evaluation run info from ClickHouse.
#[derive(Debug, Deserialize)]
pub struct EvaluationRunInfoRow {
pub evaluation_run_id: Uuid,
pub evaluation_name: String,
pub function_name: String,
pub variant_name: String,
pub dataset_name: String,
pub last_inference_timestamp: DateTime<Utc>,
}

/// Trait for evaluation-related queries.
#[async_trait]
#[cfg_attr(test, automock)]
pub trait EvaluationQueries {
/// Counts the total number of unique evaluation runs across all functions.
async fn count_total_evaluation_runs(&self) -> Result<u64, Error>;

/// Lists evaluation runs with pagination.
async fn list_evaluation_runs(
&self,
limit: u32,
offset: u32,
) -> Result<Vec<EvaluationRunInfoRow>, Error>;
}
Loading
Loading