Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions crates/adapters/src/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,6 @@ impl Controller {
})
}

pub(crate) fn last_checkpoint(&self) -> LastCheckpoint {
self.inner.last_checkpoint()
}

pub(crate) fn last_checkpoint_sync(&self) -> LastCheckpoint {
self.inner.last_checkpoint_sync()
}
Expand Down
27 changes: 15 additions & 12 deletions crates/adapters/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ use rustls::pki_types::{CertificateDer, PrivateKeyDer};
use serde::{Deserialize, Serialize};
use serde_json::json;
use std::cell::RefCell;
use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque};
use std::convert::Infallible;
use std::ffi::OsStr;
use std::hash::{BuildHasherDefault, DefaultHasher};
Expand Down Expand Up @@ -1195,7 +1195,7 @@ where
.service(lir)
.service(checkpoint)
.service(checkpoint_status)
.service(checkpoint_list)
.service(checkpoints)
.service(checkpoint_sync)
.service(sync_checkpoint_status)
.service(suspend)
Expand Down Expand Up @@ -1828,11 +1828,20 @@ fn samply_profile_response(last_profile: &SamplyProfile) -> HttpResponse {
}
}

fn get_checkpoints(state: &ServerState) -> Result<VecDeque<CheckpointMetadata>, PipelineError> {
Ok(match &state.storage {
Some(backend) => {
Checkpointer::read_checkpoints(&**backend).map_err(ControllerError::dbsp_error)?
}
None => Default::default(),
})
}

#[post("/checkpoint/sync")]
async fn checkpoint_sync(state: WebData<ServerState>) -> Result<HttpResponse, PipelineError> {
let controller = state.controller()?;

let Some(last_checkpoint) = controller.last_checkpoint().id else {
let Some(last_checkpoint) = get_checkpoints(&state)?.back().map(|c| c.uuid) else {
return Ok(HttpResponse::BadRequest().json(ErrorResponse {
message: "no checkpoints found; make a POST request to `/checkpoint` to make a new checkpoint".to_string(),
error_code: "400".into(),
Expand Down Expand Up @@ -1874,15 +1883,9 @@ async fn checkpoint_status(state: WebData<ServerState>) -> impl Responder {
HttpResponse::Ok().json(state.checkpoint_state.lock().unwrap().status.clone())
}

#[get("/checkpoint_list")]
async fn checkpoint_list(state: WebData<ServerState>) -> Result<HttpResponse, PipelineError> {
let checkpoints = match &state.storage {
Some(backend) => {
Checkpointer::read_checkpoints(&**backend).map_err(ControllerError::dbsp_error)?
}
None => Default::default(),
};
Ok(HttpResponse::Ok().json(checkpoints))
#[get("/checkpoints")]
async fn checkpoints(state: WebData<ServerState>) -> Result<HttpResponse, PipelineError> {
Ok(HttpResponse::Ok().json(get_checkpoints(&state)?))
}

#[get("/checkpoint/sync_status")]
Expand Down
2 changes: 1 addition & 1 deletion crates/feldera-types/src/checkpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub struct CheckpointSyncFailure {

/// Holds meta-data about a checkpoint that was taken for persistent storage
/// and recovery of a circuit's state.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
pub struct CheckpointMetadata {
/// A unique identifier for the given checkpoint.
///
Expand Down
54 changes: 54 additions & 0 deletions crates/pipeline-manager/src/api/endpoints/pipeline_interaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,60 @@ pub(crate) async fn get_checkpoint_sync_status(
.await
}

/// Get the checkpoints for a pipeline
///
/// Retrieve the current checkpoints made by a pipeline.
#[utoipa::path(
context_path = "/v0",
security(("JSON web token (JWT) or API key" = [])),
params(
("pipeline_name" = String, Path, description = "Unique pipeline name"),
),
responses(
(status = OK
, description = "Checkpoints retrieved successfully"
, content_type = "application/json"
, body = CheckpointMetadata),
(status = NOT_FOUND
, description = "Pipeline with that name does not exist"
, body = ErrorResponse
, example = json!(examples::error_unknown_pipeline_name())),
(status = SERVICE_UNAVAILABLE
, body = ErrorResponse
, examples(
("Pipeline is not deployed" = (value = json!(examples::error_pipeline_interaction_not_deployed()))),
("Pipeline is currently unavailable" = (value = json!(examples::error_pipeline_interaction_currently_unavailable()))),
("Disconnected during response" = (value = json!(examples::error_pipeline_interaction_disconnected()))),
("Response timeout" = (value = json!(examples::error_pipeline_interaction_timeout())))
)
),
(status = INTERNAL_SERVER_ERROR, body = ErrorResponse),
),
tag = "Pipeline Lifecycle"
)]
#[get("/pipelines/{pipeline_name}/checkpoints")]
pub(crate) async fn get_checkpoints(
state: WebData<ServerState>,
client: WebData<awc::Client>,
tenant_id: ReqData<TenantId>,
path: web::Path<String>,
request: HttpRequest,
) -> Result<HttpResponse, ManagerError> {
let pipeline_name = path.into_inner();
state
.runner
.forward_http_request_to_pipeline_by_name(
client.as_ref(),
*tenant_id,
&pipeline_name,
Method::GET,
"checkpoints",
request.query_string(),
None,
)
.await
}

/// Start a Samply profile
///
/// Profile the pipeline using the Samply profiler for the next `duration_secs` seconds.
Expand Down
3 changes: 3 additions & 0 deletions crates/pipeline-manager/src/api/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ It contains the following fields:
endpoints::pipeline_interaction::get_checkpoint_status,
endpoints::pipeline_interaction::sync_checkpoint,
endpoints::pipeline_interaction::get_checkpoint_sync_status,
endpoints::pipeline_interaction::get_checkpoints,
endpoints::pipeline_interaction::post_pipeline_pause,
endpoints::pipeline_interaction::post_pipeline_resume,
endpoints::pipeline_interaction::post_pipeline_activate,
Expand Down Expand Up @@ -423,6 +424,7 @@ It contains the following fields:
feldera_types::checkpoint::CheckpointStatus,
feldera_types::checkpoint::CheckpointResponse,
feldera_types::checkpoint::CheckpointFailure,
feldera_types::checkpoint::CheckpointMetadata,
feldera_types::transaction::StartTransactionResponse,
feldera_types::time_series::TimeSeries,
feldera_types::time_series::SampleStatistics,
Expand Down Expand Up @@ -546,6 +548,7 @@ fn api_scope() -> Scope {
.service(endpoints::pipeline_interaction::sync_checkpoint)
.service(endpoints::pipeline_interaction::get_checkpoint_status)
.service(endpoints::pipeline_interaction::get_checkpoint_sync_status)
.service(endpoints::pipeline_interaction::get_checkpoints)
.service(endpoints::pipeline_interaction::post_pipeline_pause)
.service(endpoints::pipeline_interaction::post_pipeline_resume)
.service(endpoints::pipeline_interaction::post_pipeline_activate)
Expand Down
168 changes: 168 additions & 0 deletions openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -2509,6 +2509,127 @@
]
}
},
"/v0/pipelines/{pipeline_name}/checkpoints": {
"get": {
"tags": [
"Pipeline Lifecycle"
],
"summary": "Get the checkpoints for a pipeline",
"description": "Retrieve the current checkpoints made by a pipeline.",
"operationId": "get_checkpoints",
"parameters": [
{
"name": "pipeline_name",
"in": "path",
"description": "Unique pipeline name",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "Checkpoints retrieved successfully",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CheckpointMetadata"
}
}
}
},
"404": {
"description": "Pipeline with that name does not exist",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"message": "Unknown pipeline name 'non-existent-pipeline'",
"error_code": "UnknownPipelineName",
"details": {
"pipeline_name": "non-existent-pipeline"
}
}
}
}
},
"500": {
"description": "",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
}
}
}
},
"503": {
"description": "",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"examples": {
"Disconnected during response": {
"value": {
"message": "Error sending HTTP request to pipeline: the pipeline disconnected while it was processing this HTTP request. This could be because the pipeline either (a) encountered a fatal error or panic, (b) was stopped, or (c) experienced network issues -- retrying might help in the last case. Alternatively, check the pipeline logs. Failed request: /pause pipeline-id=N/A pipeline-name=\"my_pipeline\"",
"error_code": "PipelineInteractionUnreachable",
"details": {
"pipeline_name": "my_pipeline",
"request": "/pause",
"error": "the pipeline disconnected while it was processing this HTTP request. This could be because the pipeline either (a) encountered a fatal error or panic, (b) was stopped, or (c) experienced network issues -- retrying might help in the last case. Alternatively, check the pipeline logs."
}
}
},
"Pipeline is currently unavailable": {
"value": {
"message": "Error sending HTTP request to pipeline: deployment status is currently 'unavailable' -- wait for it to become 'running' or 'paused' again Failed request: /pause pipeline-id=N/A pipeline-name=\"my_pipeline\"",
"error_code": "PipelineInteractionUnreachable",
"details": {
"pipeline_name": "my_pipeline",
"request": "/pause",
"error": "deployment status is currently 'unavailable' -- wait for it to become 'running' or 'paused' again"
}
}
},
"Pipeline is not deployed": {
"value": {
"message": "Unable to interact with pipeline because the deployment status (stopped) indicates it is not (yet) fully provisioned pipeline-id=N/A pipeline-name=\"my_pipeline\"",
"error_code": "PipelineInteractionNotDeployed",
"details": {
"pipeline_name": "my_pipeline",
"status": "Stopped",
"desired_status": "Provisioned"
}
}
},
"Response timeout": {
"value": {
"message": "Error sending HTTP request to pipeline: timeout (10s) was reached: this means the pipeline took too long to respond -- this can simply be because the request was too difficult to process in time, or other reasons (e.g., deadlock): the pipeline logs might contain additional information (original send request error: Timeout while waiting for response) Failed request: /pause pipeline-id=N/A pipeline-name=\"my_pipeline\"",
"error_code": "PipelineInteractionUnreachable",
"details": {
"pipeline_name": "my_pipeline",
"request": "/pause",
"error": "timeout (10s) was reached: this means the pipeline took too long to respond -- this can simply be because the request was too difficult to process in time, or other reasons (e.g., deadlock): the pipeline logs might contain additional information (original send request error: Timeout while waiting for response)"
}
}
}
}
}
}
}
},
"security": [
{
"JSON web token (JWT) or API key": []
}
]
}
},
"/v0/pipelines/{pipeline_name}/circuit_json_profile": {
"get": {
"tags": [
Expand Down Expand Up @@ -6509,6 +6630,53 @@
}
}
},
"CheckpointMetadata": {
"type": "object",
"description": "Holds meta-data about a checkpoint that was taken for persistent storage\nand recovery of a circuit's state.",
"required": [
"uuid",
"fingerprint"
],
"properties": {
"fingerprint": {
"type": "integer",
"format": "int64",
"description": "Fingerprint of the circuit at the time of the checkpoint.",
"minimum": 0
},
"identifier": {
"type": "string",
"description": "An optional name for the checkpoint.",
"nullable": true
},
"processed_records": {
"type": "integer",
"format": "int64",
"description": "Total number of records processed.",
"nullable": true,
"minimum": 0
},
"size": {
"type": "integer",
"format": "int64",
"description": "Total size of the checkpoint files in bytes.",
"nullable": true,
"minimum": 0
},
"steps": {
"type": "integer",
"format": "int64",
"description": "Total number of steps made.",
"nullable": true,
"minimum": 0
},
"uuid": {
"type": "string",
"format": "uuid",
"description": "A unique identifier for the given checkpoint.\n\nThis is used to identify the checkpoint in the file-system hierarchy."
}
}
},
"CheckpointResponse": {
"type": "object",
"description": "Response to a checkpoint request.",
Expand Down
11 changes: 11 additions & 0 deletions python/feldera/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from feldera.rest.sql_view import SQLView
from feldera.runtime_config import RuntimeConfig
from feldera.stats import PipelineStatistics
from feldera.types import CheckpointMetadata


class Pipeline:
Expand Down Expand Up @@ -1493,3 +1494,13 @@ def wait_for_token(self, token: str):
"""

self.client.wait_for_token(self.name, token)

def checkpoints(self) -> List[CheckpointMetadata]:
"""
Returns the list of checkpoints for this pipeline.
"""

return [
CheckpointMetadata.from_dict(chk)
for chk in self.client.get_checkpoints(self.name)
]
3 changes: 3 additions & 0 deletions python/feldera/rest/feldera_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1328,3 +1328,6 @@ def get_cluster_event(self, event_id: str, selector: str = "status") -> dict:

def rebalance_pipeline(self, pipeline_name: str):
self.http.post(path=f"/pipelines/{pipeline_name}/rebalance")

def get_checkpoints(self, pipeline_name: str):
return self.http.get(path=f"/pipelines/{pipeline_name}/checkpoints")
Loading