From 1c77415e5786b1adf466942276c29c65f541b99c Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Wed, 15 Oct 2025 12:11:34 +0100
Subject: [PATCH 01/32] feat: support geospatial benchmark

- Add backend arg to execute benchmark
- Add execute_geospatial_benchmark function

This uses the geospatial_valencia.jsonl dataset
---
 .../execute_benchmark.py                      | 73 ++++++++++++++++---
 1 file changed, 62 insertions(+), 11 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index af8e2c34..a65463f9 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -17,6 +17,7 @@ def execute_benchmark(
     base_url: str,
     model: str,
     data_set: str,
+    backend: str = "openai",
     interpreter: str = "python",
     num_prompts: int = 500,
     request_rate: int | None = None,
@@ -68,21 +69,21 @@ def execute_benchmark(
     request += (
         # changing from script invocation to cli invocation
         # f"{interpreter} {code} --backend openai --base-url {base_url} --dataset-name {data_set} "
-        f"vllm bench serve --backend openai --base-url {base_url} --dataset-name {data_set} "
+        f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {data_set} "
         f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles "
         f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} '
         f"--burstiness {burstiness} "
     )
 
     if data_set_path is not None:
-        request += f"--dataset-path {data_set_path} "
+        request += f" --dataset-path {data_set_path} "
     if request_rate is not None:
-        request += f"--request-rate {request_rate!s} "
+        request += f" --request-rate {request_rate!s} "
     if max_concurrency is not None:
-        request += f"--max-concurrency {max_concurrency!s}"
+        request += f" --max-concurrency {max_concurrency!s} "
     if custom_args is not None:
         for key, value in custom_args.items():
-            request += f"{key} {value!s} "
+            request += f" {key} {value!s} "
     timeout = retries_timeout
 
     logger.debug(f"Command line: {request}")
@@ -149,14 +150,64 @@ def execute_random_benchmark(
     )
 
 
+def execute_geospatial_benchmark(
+    base_url: str,
+    model: str,
+    num_prompts: int = 500,
+    request_rate: int | None = None,
+    max_concurrency: int | None = None,
+    hf_token: str | None = None,
+    benchmark_retries: int = 3,
+    retries_timeout: int = 5,
+    burstiness: float = 1,
+    interpreter: str = "python",
+) -> dict[str, Any]:
+    """
+    Execute benchmark with random dataset
+    :param base_url: url for vllm endpoint
+    :param model: model
+    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param hf_token: huggingface token
+    :param benchmark_retries: number of benchmark execution retries
+    :param retries_timeout: timeout between initial retry
+    :param input_token_length: length of input tokens
+    :param output_token_length: length of output tokens
+    :return: results dictionary
+    """
+    from importlib import resources
+
+    data_set_path = resources.path(
+        "ado_actuators.vllm_performance",
+        "geospatial_valencia.jsonl",
+    )
+    return execute_benchmark(
+        base_url=base_url,
+        backend="io-processor-plugin",
+        model=model,
+        data_set="custom",
+        interpreter=interpreter,
+        num_prompts=num_prompts,
+        request_rate=request_rate,
+        max_concurrency=max_concurrency,
+        hf_token=hf_token,
+        benchmark_retries=benchmark_retries,
+        retries_timeout=retries_timeout,
+        burstiness=burstiness,
+        custom_args={
+            "--dataset-path": data_set_path,
+            "--endpoint": "/pooling",
+            "--skip-tokenizer-init": True,
+        },
+    )
+
+
 if __name__ == "__main__":
-    results = execute_benchmark(
+    results = execute_geospatial_benchmark(
         interpreter="python3.10",
-        base_url="http://localhost:28015",
-        data_set="random",
-        model="openai/gpt-oss-20b",
-        request_rate=None,
-        max_concurrency=None,
+        base_url="http://localhost:8000",
+        model="ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",
+        request_rate=2,
+        max_concurrency=10,
         hf_token=os.getenv("HF_TOKEN"),
         num_prompts=100,
     )

From 52ccff47f116f9486f0c65214b12e3c116dd4706 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Wed, 15 Oct 2025 12:13:55 +0100
Subject: [PATCH 02/32] feat: add geospatial experiments

endpoint and full
---
 .../vllm_performance/experiment_executor.py   |  85 ++++--
 .../vllm_performance/experiments.yaml         | 266 ++++++++++++++++++
 2 files changed, 323 insertions(+), 28 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 3da664a8..2e4f213c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -22,6 +22,7 @@
     VLLMDtype,
 )
 from ado_actuators.vllm_performance.vllm_performance_test.execute_benchmark import (
+    execute_geospatial_benchmark,
     execute_random_benchmark,
 )
 from ray.actor import ActorHandle
@@ -279,20 +280,34 @@ def run_resource_and_workload_experiment(
                 start = time.time()
                 result = None
                 try:
-                    result = execute_random_benchmark(
-                        base_url=base_url,
-                        model=values.get("model"),
-                        interpreter=actuator_parameters.interpreter,
-                        num_prompts=int(values.get("num_prompts")),
-                        request_rate=request_rate,
-                        max_concurrency=max_concurrency,
-                        hf_token=actuator_parameters.hf_token,
-                        benchmark_retries=actuator_parameters.benchmark_retries,
-                        retries_timeout=actuator_parameters.retries_timeout,
-                        number_input_tokens=int(values.get("number_input_tokens")),
-                        max_output_tokens=int(values.get("max_output_tokens")),
-                        burstiness=float(values.get("burstiness")),
-                    )
+                    if experiment.identifier == "performance-testing-geospatial-full":
+                        result = execute_geospatial_benchmark(
+                            base_url=base_url,
+                            model=values.get("model"),
+                            interpreter=actuator_parameters.interpreter,
+                            num_prompts=int(values.get("num_prompts")),
+                            request_rate=request_rate,
+                            max_concurrency=max_concurrency,
+                            hf_token=actuator_parameters.hf_token,
+                            benchmark_retries=actuator_parameters.benchmark_retries,
+                            retries_timeout=actuator_parameters.retries_timeout,
+                            burstiness=float(values.get("burstiness")),
+                        )
+                    else:
+                        result = execute_random_benchmark(
+                            base_url=base_url,
+                            model=values.get("model"),
+                            interpreter=actuator_parameters.interpreter,
+                            num_prompts=int(values.get("num_prompts")),
+                            request_rate=request_rate,
+                            max_concurrency=max_concurrency,
+                            hf_token=actuator_parameters.hf_token,
+                            benchmark_retries=actuator_parameters.benchmark_retries,
+                            retries_timeout=actuator_parameters.retries_timeout,
+                            number_input_tokens=int(values.get("number_input_tokens")),
+                            max_output_tokens=int(values.get("max_output_tokens")),
+                            burstiness=float(values.get("burstiness")),
+                        )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
                     logger.error(f"Failed to execute VLLM performance test {e}")
@@ -379,20 +394,34 @@ def run_workload_experiment(
         error = None
         measured_values = []
         try:
-            result = execute_random_benchmark(
-                base_url=values.get("endpoint"),
-                model=values.get("model"),
-                interpreter=actuator_parameters.interpreter,
-                num_prompts=int(values.get("num_prompts")),
-                request_rate=request_rate,
-                max_concurrency=max_concurrency,
-                hf_token=actuator_parameters.hf_token,
-                benchmark_retries=actuator_parameters.benchmark_retries,
-                retries_timeout=actuator_parameters.retries_timeout,
-                number_input_tokens=int(values.get("number_input_tokens")),
-                max_output_tokens=int(values.get("max_output_tokens")),
-                burstiness=float(values.get("burstiness")),
-            )
+            if experiment.identifier == "performance-testing-geospatial-endpoint":
+                result = execute_geospatial_benchmark(
+                    base_url=values.get("endpoint"),
+                    model=values.get("model"),
+                    interpreter=actuator_parameters.interpreter,
+                    num_prompts=int(values.get("num_prompts")),
+                    request_rate=request_rate,
+                    max_concurrency=max_concurrency,
+                    hf_token=actuator_parameters.hf_token,
+                    benchmark_retries=actuator_parameters.benchmark_retries,
+                    retries_timeout=actuator_parameters.retries_timeout,
+                    burstiness=float(values.get("burstiness")),
+                )
+            else:
+                result = execute_random_benchmark(
+                    base_url=values.get("endpoint"),
+                    model=values.get("model"),
+                    interpreter=actuator_parameters.interpreter,
+                    num_prompts=int(values.get("num_prompts")),
+                    request_rate=request_rate,
+                    max_concurrency=max_concurrency,
+                    hf_token=actuator_parameters.hf_token,
+                    benchmark_retries=actuator_parameters.benchmark_retries,
+                    retries_timeout=actuator_parameters.retries_timeout,
+                    number_input_tokens=int(values.get("number_input_tokens")),
+                    max_output_tokens=int(values.get("max_output_tokens")),
+                    burstiness=float(values.get("burstiness")),
+                )
         except Exception as e:
             logger.error(f"Failed to execute VLLM performance test {e}")
             error = f"Failed to execute VLLM performance test {e}"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 1d03b13a..40aa9777 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -319,3 +319,269 @@ performance_testing-endpoint:
     - identifier: "p99_e2el_ms"
   metadata:
     description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
+performance_testing-geospatial-endpoint:
+  identifier: performance-testing-geospatial-endpoint
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
+    - identifier: 'endpoint'
+      metadata:
+        description: 'The endpoint(s) to test'
+      propertyDomain:
+        variableType: "UNKNOWN_VARIABLE_TYPE"
+    - identifier: 'request_rate'
+      metadata:
+        description: "The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+  defaultParameterization:
+    - value: 100
+      property:
+        identifier: 'num_prompts'
+    - value: -1
+      property:
+        identifier: 'max_concurrency'
+    - value: 1.0
+      property:
+        identifier: 'burstiness'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'
+performance_testing-geospatial-full:
+  identifier: performance-testing-geospatial-full
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+    - identifier: 'request_rate'
+      metadata:
+        description: "(benchmark) The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "(benchmark) The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "(benchmark) The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: image
+      metadata:
+        description: "(deployment) Docker image to use to create vllm deployments"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+    - identifier: n_cpus
+      metadata:
+        description: "(deployment) the number of CPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1,17 ]
+        interval: 1
+    - identifier: memory
+      metadata:
+        description: "(deployment) the amount of memory to allocate to vLLM pod"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "64Gi", "128Gi", "256Gi" ]
+    - identifier: dtype
+      metadata:
+        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+    - identifier: 'gpu_memory_utilization'
+      metadata:
+        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ .5, .75, .9 ]
+    - identifier: 'cpu_offload'
+      metadata:
+        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_num_seq'
+      metadata:
+        description: "(deployment) Maximum number of sequences per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [32,2049]
+        interval: 32
+    - identifier: 'n_gpus'
+      metadata:
+        description: "(deployment) Number of GPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,9]
+        interval: 1
+    - identifier: 'gpu_type'
+      metadata:
+        description: "(deployment) The GPU type to use"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+  defaultParameterization:
+    - property:
+        identifier: 'image'
+      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+    - property:
+        identifier: n_cpus
+      value: 8
+    - property:
+        identifier:
+          memory
+      value:  "128Gi"
+    - property:
+        identifier: dtype
+      value: "auto"
+    - property:
+        identifier: 'num_prompts'
+      value: 500
+    - property:
+        identifier: 'max_concurrency'
+      value: -1
+    - property:
+        identifier: 'burstiness'
+      value: 1.0
+    - property:
+        identifier: 'gpu_memory_utilization'
+      value: .9
+    - property:
+        identifier: 'cpu_offload'
+      value: 0
+    - property:
+        identifier: 'max_num_seq'
+      value: 256
+    - property:
+        identifier: 'n_gpus'
+      value: 1
+    - property:
+        identifier: 'gpu_type'
+      value: 'NVIDIA-A100-80GB-PCIe'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'VLLM performance testing across compute resource and workload configuration'
\ No newline at end of file

From 98591b581dcdfb69b555ecfde48fec9fa4bf36d1 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Wed, 15 Oct 2025 20:49:41 +0100
Subject: [PATCH 03/32] various fixes to the vllm_performance actuator

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/actuator.py     |  2 +-
 .../vllm_performance/experiment_executor.py        |  4 +++-
 .../vllm_performance/experiments.yaml              | 14 ++++++++++++--
 .../k8/yaml_support/build_components.py            |  6 +++++-
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index a9e5dc30..a4fea988 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -176,7 +176,7 @@ async def submit(
         if experiment.deprecated is True:
             raise DeprecatedExperimentError(f"Experiment {experiment} is deprecated")
 
-        if experiment.identifier == "performance-testing-full":
+        if experiment.identifier in ["performance-testing-full", "performance-testing-geospatial-full"]:
             if not self.env_manager:
                 raise MissingConfigurationForExperimentError(
                     f"Actuator configuration did not contain sufficient information for a kubernetes environment manager to be created. "
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 2e4f213c..f3594150 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -7,6 +7,7 @@
 import subprocess
 import sys
 import time
+import traceback
 
 import ray
 from ado_actuators.vllm_performance.actuator_parameters import (
@@ -152,6 +153,7 @@ def _create_environment(
                     logger.error(
                         f"Attempt {attempt}. Failed to create test environment {e}"
                     )
+                    logger.error(traceback.format_exception(e))
                     error = f"Failed to create test environment {e}"
                     time.sleep(tmout)
                     tmout *= 2
@@ -310,7 +312,7 @@ def run_resource_and_workload_experiment(
                         )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
-                    logger.error(f"Failed to execute VLLM performance test {e}")
+                    logger.error(traceback.format_exception(e))
                     error = f"Failed to execute VLLM performance test {e}"
                 finally:
                     if pf is not None:
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 40aa9777..0c1859ca 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -420,7 +420,7 @@ performance_testing-geospatial-full:
       metadata:
         description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
       propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
     - identifier: 'request_rate'
       metadata:
@@ -455,7 +455,7 @@ performance_testing-geospatial-full:
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
       propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
     - identifier: n_cpus
       metadata:
@@ -495,6 +495,13 @@ performance_testing-geospatial-full:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [32,2049]
         interval: 32
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum number of batched tokens per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 8192, 32769]
+        interval: 1024
     - identifier: 'n_gpus'
       metadata:
         description: "(deployment) Number of GPUs to use"
@@ -540,6 +547,9 @@ performance_testing-geospatial-full:
     - property:
         identifier: 'max_num_seq'
       value: 256
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
     - property:
         identifier: 'n_gpus'
       value: 1
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index 1e2355a1..511a2ea6 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -41,7 +41,11 @@ def get_k8_name(model: str) -> str:
         :return: k8 unique name for a given LLM model
         """
         m_parts = model.split("/")
-        return f"vllm-{m_parts[-1].lower()}-{uuid.uuid4().hex}".replace(".", "-")
+
+        # Making sure the resulting name is not longer than 63 characters as it is
+        # the maximum allowed for a name in kubernetes.
+        name_prefix = m_parts[-1][:min(len(m_parts[-1]), 21)].rstrip("-")
+        return f"vllm-{name_prefix.lower()}-{uuid.uuid4()}".replace(".", "-")
 
     @staticmethod
     def _adjust_file_name(f: str) -> str:

From bd62781809e02f3b540c6c778f82e167334e0cd3 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Wed, 15 Oct 2025 22:48:32 +0100
Subject: [PATCH 04/32] fix: add max_batch_tokens

---
 .../ado_actuators/vllm_performance/experiments.yaml    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 40aa9777..cd67986a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -488,6 +488,13 @@ performance_testing-geospatial-full:
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum size of the sum of the 1st image dimensions per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1024, 32769 ]
+        interval: 1024
     - identifier: 'max_num_seq'
       metadata:
         description: "(deployment) Maximum number of sequences per iteration"
@@ -546,6 +553,9 @@ performance_testing-geospatial-full:
     - property:
         identifier: 'gpu_type'
       value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
   # measurements
   targetProperties:
     - identifier: "duration"

From c1dec4aab0e51534e35cf2da7db4285f38d0b5fe Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 28 Oct 2025 15:49:18 +0000
Subject: [PATCH 05/32] Updated vllm performance actuator to support geospatial

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/experiment_executor.py   |  3 +
 .../vllm_performance/experiments.yaml         | 55 +++++++++++++++++-
 .../vllm_performance/k8/create_environment.py | 11 ++--
 .../vllm_performance/k8/manage_components.py  |  6 ++
 .../k8/yaml_support/build_components.py       | 56 +++++++++++++++----
 .../execute_benchmark.py                      | 42 +++++++-------
 6 files changed, 135 insertions(+), 38 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index f3594150..9be6921a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -144,6 +144,9 @@ def _create_environment(
                         reuse_deployment=False,
                         pvc_name=actuator.pvc_template,
                         namespace=actuator.namespace,
+                        skip_tokenizer_init=values.get("skip_tokenizer_init"),
+                        enforce_eager=values.get("enforce_eager"),
+                        io_processor_plugin=values.get("io_processor_plugin")
                     )
                     # Update manager
                     env_manager.done_creating.remote(definition=definition)
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 072a936d..69d3460d 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -120,6 +120,24 @@ performance_testing-full:
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer intialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Pocessor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -167,6 +185,15 @@ performance_testing-full:
     - property:
         identifier: 'gpu_type'
       value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: False
+    - property:
+        identifier: 'enforce_eager'
+      value: False
+    - property:
+        identifier: 'io_processor_plugin'
+      value: None
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -522,6 +549,24 @@ performance_testing-geospatial-full:
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer intialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Pocessor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -564,8 +609,14 @@ performance_testing-geospatial-full:
         identifier: 'gpu_type'
       value: 'NVIDIA-A100-80GB-PCIe'
     - property:
-        identifier: 'max_batch_tokens'
-      value: 16384
+        identifier: 'skip_tokenizer_init'
+      value: True
+    - property:
+        identifier: 'enforce_eager'
+      value: True
+    - property:
+        identifier: 'io_processor_plugin'
+      value: "terratorch_segmentation"
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
index 87ee719d..3f0a0809 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
@@ -40,6 +40,9 @@ def create_test_environment(
     reuse_pvc: bool = True,
     pvc_name: str = "vllm-support",
     namespace: str = "vllm-testing",
+    enforce_eager: bool = False,
+    skip_tokenizer_init: bool = False,
+    io_processor_plugin: str | None = None
 ) -> None:
     """
     Create test deployment
@@ -113,15 +116,13 @@ def create_test_environment(
         n_gpus=n_gpus,
         n_cpus=n_cpus,
         memory=memory,
-        max_batch_tokens=max_batch_tokens,
-        gpu_memory_utilization=gpu_memory_utilization,
-        dtype=dtype,
-        cpu_offload=cpu_offload,
-        max_num_seq=max_num_seq,
         template=deployment_template,
         claim_name=pvc_name,
         hf_token=hf_token,
         reuse=reuse_deployment,
+        enforce_eager=enforce_eager,
+        skip_tokenizer_init=skip_tokenizer_init,
+        io_processor_plugin=io_processor_plugin
     )
     logger.debug("deployment created")
     c_manager.wait_deployment_ready(k8_name=k8_name)
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
index dfef4725..cd77a444 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
@@ -231,6 +231,9 @@ def create_deployment(
         claim_name: str | None = None,
         hf_token: str | None = None,
         reuse: bool = False,
+        enforce_eager: bool = False,
+        skip_tokenizer_init: bool = False,
+        io_processor_plugin: str | None = None
     ) -> None:
         """
         create deployment for model
@@ -293,6 +296,9 @@ def create_deployment(
                     template=template,
                     claim_name=claim_name,
                     hf_token=hf_token,
+                    enforce_eager=enforce_eager,
+                    skip_tokenizer_init=skip_tokenizer_init,
+                    io_processor_plugin=io_processor_plugin,
                 ),
             )
         except ApiException as e:
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index 511a2ea6..b4069ace 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -77,6 +77,9 @@ def deployment_yaml(
         template: str = "deployment.yaml",
         claim_name: str | None = None,
         hf_token: str | None = None,
+        enforce_eager: bool = False,
+        skip_tokenizer_init: bool = False,
+        io_processor_plugin: str | None = None,
     ) -> dict[str, Any]:
         """
         Generate deployment yaml
@@ -138,6 +141,30 @@ def deployment_yaml(
                 [{"name": PVC_NAME, "persistentVolumeClaim": {"claimName": claim_name}}]
             )
 
+        vllm_serve_args = [
+            model,
+            "--max-num-batched-tokens",
+            f"{max_batch_tokens}",
+            "--gpu-memory-utilization",
+            f"{gpu_memory_utilization}",
+            "--cpu-offload-gb",
+            f"{cpu_offload}",
+            "--max-num-seq",
+            f"{max_num_seq}",
+            "--tensor-parallel-size",
+            f"{n_gpus}",
+            "--dtype",
+            dtype.value,
+        ]
+
+        if enforce_eager:
+            vllm_serve_args.append("--skip-tokenizer-init")
+        if skip_tokenizer_init:
+            vllm_serve_args.append("--enforce-eager")
+        if io_processor_plugin:
+            vllm_serve_args.append("--io-processor-plugin")
+            vllm_serve_args.append(io_processor_plugin)
+
         # container
         container = spec["containers"][0]
         # image
@@ -151,19 +178,25 @@ def deployment_yaml(
         limits["cpu"] = str(n_cpus)
         limits["memory"] = memory
         limits["nvidia.com/gpu"] = str(n_gpus)
+
+        #command
+        container["command"] = ["vllm", "serve"]
+        container["args"] = vllm_serve_args
         # env variables to to set parameters for docker execution
-        container["env"] = [
-            {"name": "MODEL", "value": model},
-            {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)},
-            {"name": "DTYPE", "value": dtype.value},
-            {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)},
-            {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)},
-            {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)},
-            {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
-        ]
+        # container["env"] = [
+        #     {"name": "MODEL", "value": model},
+        #     {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)},
+        #     {"name": "DTYPE", "value": dtype.value},
+        #     {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)},
+        #     {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)},
+        #     {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)},
+        #     {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
+        # ]
         if hf_token is not None:
-            container["env"].extend([{"name": "HF_TOKEN", "value": hf_token}])
+            container["env"]=[{"name": "HF_TOKEN", "value": hf_token}]
         if claim_name is not None:
+            if "env" not in container:
+                container["env"] = []
             container["env"].extend(
                 [
                     {
@@ -181,6 +214,9 @@ def deployment_yaml(
             )
 
         # return
+
+        import json
+        print(json.dumps(deployment_yaml, indent=2))
         return deployment_yaml
 
     @staticmethod
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index a65463f9..952cb26a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -176,29 +176,29 @@ def execute_geospatial_benchmark(
     """
     from importlib import resources
 
-    data_set_path = resources.path(
+    with resources.path(
         "ado_actuators.vllm_performance",
         "geospatial_valencia.jsonl",
-    )
-    return execute_benchmark(
-        base_url=base_url,
-        backend="io-processor-plugin",
-        model=model,
-        data_set="custom",
-        interpreter=interpreter,
-        num_prompts=num_prompts,
-        request_rate=request_rate,
-        max_concurrency=max_concurrency,
-        hf_token=hf_token,
-        benchmark_retries=benchmark_retries,
-        retries_timeout=retries_timeout,
-        burstiness=burstiness,
-        custom_args={
-            "--dataset-path": data_set_path,
-            "--endpoint": "/pooling",
-            "--skip-tokenizer-init": True,
-        },
-    )
+    ) as data_set_path:
+        return execute_benchmark(
+            base_url=base_url,
+            backend="io-processor-plugin",
+            model=model,
+            data_set="custom",
+            interpreter=interpreter,
+            num_prompts=num_prompts,
+            request_rate=request_rate,
+            max_concurrency=max_concurrency,
+            hf_token=hf_token,
+            benchmark_retries=benchmark_retries,
+            retries_timeout=retries_timeout,
+            burstiness=burstiness,
+            custom_args={
+                "--dataset-path": data_set_path,
+                "--endpoint": "/pooling",
+                "--skip-tokenizer-init": True,
+            },
+        )
 
 
 if __name__ == "__main__":

From 2e030282585b6baacc43c06ee0ac98803f9dbc45 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 09:32:29 +0000
Subject: [PATCH 06/32] Termorarily avoiding cpu14

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/k8/yaml_support/deployment.yaml     | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index 2b90302a..25851982 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -27,6 +27,15 @@ spec:
             - name: http
               containerPort: 8000
               protocol: TCP
+          affinity:
+            nodeAffinity:
+              requiredDuringSchedulingIgnoredDuringExecution:
+                nodeSelectorTerms:
+                - matchExpressions:
+                  - operator: NotIn
+                    key: kubernetes.io/hostname
+                    values:
+                    - adcpu014 # funny node
           startupProbe:
             exec:
               command:

From 80c68c4c0c0113ffbe764f72bff7a60d527c2d9d Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 09:42:58 +0000
Subject: [PATCH 07/32] Termorarily avoiding cpu14

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../k8/yaml_support/deployment.yaml            | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index 25851982..bc9c10a6 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -19,6 +19,15 @@ spec:
         app.kubernetes.io/name: vllm
         app.kubernetes.io/instance: vllm-testing
     spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - operator: NotIn
+                key: kubernetes.io/hostname
+                values:
+                - adcpu014 # funny node
       containers:
         - name: vllm
           image: "vllm/vllm-openai:v0.6.3"
@@ -27,15 +36,6 @@ spec:
             - name: http
               containerPort: 8000
               protocol: TCP
-          affinity:
-            nodeAffinity:
-              requiredDuringSchedulingIgnoredDuringExecution:
-                nodeSelectorTerms:
-                - matchExpressions:
-                  - operator: NotIn
-                    key: kubernetes.io/hostname
-                    values:
-                    - adcpu014 # funny node
           startupProbe:
             exec:
               command:

From 592e17940ffae7971936b268678944bdcf8c9107 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 11:55:19 +0000
Subject: [PATCH 08/32] Added india dataset

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/geospatial_india.jsonl       | 1 +
 .../vllm_performance/vllm_performance_test/execute_benchmark.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl
new file mode 100644
index 00000000..693bbc09
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl
@@ -0,0 +1 @@
+{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}}
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 952cb26a..cf3ed96a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -178,7 +178,7 @@ def execute_geospatial_benchmark(
 
     with resources.path(
         "ado_actuators.vllm_performance",
-        "geospatial_valencia.jsonl",
+        "geospatial_india.jsonl",
     ) as data_set_path:
         return execute_benchmark(
             base_url=base_url,

From 94c7490b34edf6d59918087a431f0683ee2b3b87 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 14:45:22 +0000
Subject: [PATCH 09/32] Fixed BaseSamplerConfig

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/modules/operators/randomwalk.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py
index 1018c17a..aa050494 100644
--- a/orchestrator/modules/operators/randomwalk.py
+++ b/orchestrator/modules/operators/randomwalk.py
@@ -207,8 +207,11 @@ def sampler(self) -> BaseSampler | GroupSampler:
                         sampler = SequentialSampleSelector()
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = RandomGroupSampleSelector(group=self.grouping)
+                        sampler = RandomGroupSampleSelector(group=self.grouping)
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
-                        sampler = SequentialGroupSampleSelector(group=self.grouping)
+                        sampler = SequentialGroupSampleSelector(
+                            group=self.grouping
+                        )
                     case _:
                         # this can never happen, as we are validating this above
                         pass
@@ -218,10 +221,12 @@ def sampler(self) -> BaseSampler | GroupSampler:
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = ExplicitEntitySpaceGroupedGridSampleGenerator(
                             mode=WalkModeEnum.RANDOM, group=self.grouping
+                            mode=WalkModeEnum.RANDOM, group=self.grouping
                         )
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
                         sampler = ExplicitEntitySpaceGroupedGridSampleGenerator(
                             mode=WalkModeEnum.SEQUENTIAL, group=self.grouping
+                            mode=WalkModeEnum.SEQUENTIAL, group=self.grouping
                         )
                     case CombinedWalkModeEnum.RANDOM:
                         sampler = ExplicitEntitySpaceGridSampleGenerator(

From 3fd83b83cb6988031918a22d835bcd3301255e9d Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 15:48:05 +0000
Subject: [PATCH 10/32] Some changes to the vllmperformance experiments

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/experiments.yaml    |  9 ---------
 .../k8/yaml_support/build_components.py                | 10 ++++------
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 69d3460d..53ee0c33 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -132,12 +132,6 @@ performance_testing-full:
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
-    - identifier: 'io_processor_plugin'
-      metadata:
-        description: 'IO Pocessor plugin to load for the model'
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ None, "terratorch_segmentation" ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -191,9 +185,6 @@ performance_testing-full:
     - property:
         identifier: 'enforce_eager'
       value: False
-    - property:
-        identifier: 'io_processor_plugin'
-      value: None
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index b4069ace..a3277a08 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -6,6 +6,7 @@
 import sys
 import uuid
 from enum import Enum
+import json
 from typing import Any
 
 import yaml
@@ -158,9 +159,9 @@ def deployment_yaml(
         ]
 
         if enforce_eager:
-            vllm_serve_args.append("--skip-tokenizer-init")
-        if skip_tokenizer_init:
             vllm_serve_args.append("--enforce-eager")
+        if skip_tokenizer_init:
+            vllm_serve_args.append("--skip-tokenizer-init")
         if io_processor_plugin:
             vllm_serve_args.append("--io-processor-plugin")
             vllm_serve_args.append(io_processor_plugin)
@@ -213,10 +214,7 @@ def deployment_yaml(
                 ]
             )
 
-        # return
-
-        import json
-        print(json.dumps(deployment_yaml, indent=2))
+        logger.debug(json.dumps(deployment_yaml, indent=2))
         return deployment_yaml
 
     @staticmethod

From 90ae6bbbbcc96df29ee4b89c45d48cde4aa4efde Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 31 Oct 2025 09:23:01 +0000
Subject: [PATCH 11/32] Some changes to changes to the experiment and reverted
 the deployment template

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/actuator.py           | 2 +-
 .../vllm_performance/k8/yaml_support/build_components.py | 2 +-
 .../vllm_performance/k8/yaml_support/deployment.yaml     | 9 ---------
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index a4fea988..2e522bf6 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -197,7 +197,7 @@ async def submit(
                     )
 
             # Execute experiment
-            # Note: Here the experiment instance is just past for convenience since we retrieved it above
+            # Note: Here the experiment instance is just passed for convenience since we retrieved it above
             run_resource_and_workload_experiment.remote(
                 request=request,
                 experiment=experiment,
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index a3277a08..0abcc8c9 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -162,7 +162,7 @@ def deployment_yaml(
             vllm_serve_args.append("--enforce-eager")
         if skip_tokenizer_init:
             vllm_serve_args.append("--skip-tokenizer-init")
-        if io_processor_plugin:
+        if io_processor_plugin is not None:
             vllm_serve_args.append("--io-processor-plugin")
             vllm_serve_args.append(io_processor_plugin)
 
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index bc9c10a6..2b90302a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -19,15 +19,6 @@ spec:
         app.kubernetes.io/name: vllm
         app.kubernetes.io/instance: vllm-testing
     spec:
-      affinity:
-        nodeAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            nodeSelectorTerms:
-            - matchExpressions:
-              - operator: NotIn
-                key: kubernetes.io/hostname
-                values:
-                - adcpu014 # funny node
       containers:
         - name: vllm
           image: "vllm/vllm-openai:v0.6.3"

From a7509754d612e0e0063ddd3c67c66a2a01e212cc Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 31 Oct 2025 09:28:36 +0000
Subject: [PATCH 12/32] Removed some clutter from deployment template

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/k8/yaml_support/deployment.yaml          | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index 2b90302a..2659550d 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -68,6 +68,4 @@ spec:
           emptyDir:
             medium: Memory
       nodeSelector:
-        nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe
-        #nvidia.com/gpu.product: Tesla-V100-PCIE-16GB
-        #kubernetes.io/hostname: cpu15
\ No newline at end of file
+        nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe
\ No newline at end of file

From c432cff57b38f6dc2e77b7a7210ffe928fe7b2e2 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 31 Oct 2025 11:05:03 +0000
Subject: [PATCH 13/32] Few more fixes

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/modules/operators/randomwalk.py  |   3 -
 .../vllm_performance/actuator.py              |  26 +-
 .../performance_testing.yaml}                 | 312 +-----------------
 .../performance_testing_geospatial.yaml       | 308 +++++++++++++++++
 4 files changed, 328 insertions(+), 321 deletions(-)
 rename plugins/actuators/vllm_performance/ado_actuators/vllm_performance/{experiments.yaml => experiments/performance_testing.yaml} (51%)
 create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml

diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py
index aa050494..eff9a6f2 100644
--- a/orchestrator/modules/operators/randomwalk.py
+++ b/orchestrator/modules/operators/randomwalk.py
@@ -207,7 +207,6 @@ def sampler(self) -> BaseSampler | GroupSampler:
                         sampler = SequentialSampleSelector()
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = RandomGroupSampleSelector(group=self.grouping)
-                        sampler = RandomGroupSampleSelector(group=self.grouping)
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
                         sampler = SequentialGroupSampleSelector(
                             group=self.grouping
@@ -221,12 +220,10 @@ def sampler(self) -> BaseSampler | GroupSampler:
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = ExplicitEntitySpaceGroupedGridSampleGenerator(
                             mode=WalkModeEnum.RANDOM, group=self.grouping
-                            mode=WalkModeEnum.RANDOM, group=self.grouping
                         )
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
                         sampler = ExplicitEntitySpaceGroupedGridSampleGenerator(
                             mode=WalkModeEnum.SEQUENTIAL, group=self.grouping
-                            mode=WalkModeEnum.SEQUENTIAL, group=self.grouping
                         )
                     case CombinedWalkModeEnum.RANDOM:
                         sampler = ExplicitEntitySpaceGridSampleGenerator(
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index 2e522bf6..88d4e67c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -54,14 +54,26 @@ def catalog(
     ) -> ExperimentCatalog:
         """Returns the Experiments your actuator provides"""
 
-        # The catalog be formed in code here or read from a file containing the Experiments models
-        # This shows reading from a file
-
+        # Loading experiment definitions for yaml files contained in the `experiments` directory.
+        # NOTE: Only files can be placed in the experiments directory,
+        #       but each file can contain multiple experiment definitions
         path = os.path.abspath(__file__)
-        path = os.path.split(path)[0]
-        with open(os.path.join(path, "experiments.yaml")) as f:
-            data = yaml.safe_load(f)
-            experiments = [Experiment(**data[e]) for e in data]
+        exp_dir = os.path.join(os.path.split(path)[0], "experiments")
+        experiments = []
+        for exp_file in os.listdir(exp_dir):
+            logger.debug(f"Loading experiments from {exp_file}")
+            exp_file_path = os.path.join(exp_dir, exp_file)
+            if os.path.isdir(exp_file_path):
+                logger.error(f"{exp_file_path} is a directory. Only files are supported in the experiments directory")
+                raise Exception(f"{exp_file_path} is a directory. Only files are supported in the experiments directory")
+            with open(exp_file_path) as f:
+                try:
+                    data = yaml.safe_load(f)
+                except yaml.YAMLError as e:
+                    logger.error(f"File {exp_file} is a malformed YAML - {e}")
+                    raise Exception (f"File {exp_file} is a malformed YAML - {e}")
+
+            experiments.extend([Experiment(**data[e]) for e in data])
 
         return ExperimentCatalog(
             catalogIdentifier=cls.identifier,
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
similarity index 51%
rename from plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
rename to plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index 53ee0c33..216d6ae4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -336,314 +336,4 @@ performance_testing-endpoint:
     - identifier: "p75_e2el_ms"
     - identifier: "p99_e2el_ms"
   metadata:
-    description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
-performance_testing-geospatial-endpoint:
-  identifier: performance-testing-geospatial-endpoint
-  actuatorIdentifier: "vllm_performance"
-  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
-    - identifier: 'model'
-      metadata:
-        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
-    - identifier: 'endpoint'
-      metadata:
-        description: 'The endpoint(s) to test'
-      propertyDomain:
-        variableType: "UNKNOWN_VARIABLE_TYPE"
-    - identifier: 'request_rate'
-      metadata:
-        description: "The number of requests to send per second"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [-1,1000]
-        interval: 1  # -1 means send all requests at time 0
-  optionalProperties:
-    - identifier: 'num_prompts'
-      metadata:
-        description: "The number of prompts to send (total number of requests)"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,10001]
-        interval: 1
-    - identifier: 'burstiness'
-      metadata:
-        description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 0, 10 ]
-        interval: 1
-    - identifier: 'max_concurrency'
-      metadata:
-        description: "The maximum number of concurrent requests to send"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ -1, 500 ] # -1 means no concurrency control
-        interval: 1
-  defaultParameterization:
-    - value: 100
-      property:
-        identifier: 'num_prompts'
-    - value: -1
-      property:
-        identifier: 'max_concurrency'
-    - value: 1.0
-      property:
-        identifier: 'burstiness'
-  # measurements
-  targetProperties:
-    - identifier: "duration"
-    - identifier: "completed"
-    - identifier: "total_input_tokens"
-    - identifier: "total_output_tokens"
-    - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
-    - identifier: "mean_e2el_ms"
-    - identifier: "median_e2el_ms"
-    - identifier: "std_e2el_ms"
-    - identifier: "p25_e2el_ms"
-    - identifier: "p50_e2el_ms"
-    - identifier: "p75_e2el_ms"
-    - identifier: "p99_e2el_ms"
-  metadata:
-    description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'
-performance_testing-geospatial-full:
-  identifier: performance-testing-geospatial-full
-  actuatorIdentifier: "vllm_performance"
-  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
-    - identifier: 'model'
-      metadata:
-        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
-    - identifier: 'request_rate'
-      metadata:
-        description: "(benchmark) The number of requests to send per second"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [-1,1000]
-        interval: 1  # -1 means send all requests at time 0
-  optionalProperties:
-    - identifier: 'num_prompts'
-      metadata:
-        description: "(benchmark) The number of prompts to send (total number of requests)"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,10001]
-        interval: 1
-    - identifier: 'max_concurrency'
-      metadata:
-        description: "(benchmark) The maximum number of concurrent requests to send"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ -1, 500 ] # -1 means no concurrency control
-        interval: 1
-    - identifier: 'burstiness'
-      metadata:
-        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 0, 10 ]
-        interval: 1
-    - identifier: image
-      metadata:
-        description: "(deployment) Docker image to use to create vllm deployments"
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
-    - identifier: n_cpus
-      metadata:
-        description: "(deployment) the number of CPUs to use"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 1,17 ]
-        interval: 1
-    - identifier: memory
-      metadata:
-        description: "(deployment) the amount of memory to allocate to vLLM pod"
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ "64Gi", "128Gi", "256Gi" ]
-    - identifier: dtype
-      metadata:
-        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
-    - identifier: 'gpu_memory_utilization'
-      metadata:
-        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        values: [ .5, .75, .9 ]
-    - identifier: 'cpu_offload'
-      metadata:
-        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        values: [ 0, 8, 16, 24, 32 ]
-    - identifier: 'max_batch_tokens'
-      metadata:
-        description: "(deployment) maximum size of the sum of the 1st image dimensions per iteration"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 1024, 32769 ]
-        interval: 1024
-    - identifier: 'max_num_seq'
-      metadata:
-        description: "(deployment) Maximum number of sequences per iteration"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [32,2049]
-        interval: 32
-    - identifier: 'max_batch_tokens'
-      metadata:
-        description: "(deployment) maximum number of batched tokens per iteration"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 8192, 32769]
-        interval: 1024
-    - identifier: 'n_gpus'
-      metadata:
-        description: "(deployment) Number of GPUs to use"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,9]
-        interval: 1
-    - identifier: 'gpu_type'
-      metadata:
-        description: "(deployment) The GPU type to use"
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
-    - identifier: 'skip_tokenizer_init'
-      metadata:
-        description: "(deployment) skip tokenizer intialization"
-      propertyDomain:
-        variableType: BINARY_VARIABLE_TYPE 
-        values: [True, False]
-    - identifier: 'enforce_eager'
-      metadata:
-        description: "(deployment) enforce pytorch eager mode"
-      propertyDomain:
-        variableType: BINARY_VARIABLE_TYPE 
-        values: [True, False]
-    - identifier: 'io_processor_plugin'
-      metadata:
-        description: 'IO Pocessor plugin to load for the model'
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ None, "terratorch_segmentation" ]
-  defaultParameterization:
-    - property:
-        identifier: 'image'
-      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
-    - property:
-        identifier: n_cpus
-      value: 8
-    - property:
-        identifier:
-          memory
-      value:  "128Gi"
-    - property:
-        identifier: dtype
-      value: "auto"
-    - property:
-        identifier: 'num_prompts'
-      value: 500
-    - property:
-        identifier: 'max_concurrency'
-      value: -1
-    - property:
-        identifier: 'burstiness'
-      value: 1.0
-    - property:
-        identifier: 'gpu_memory_utilization'
-      value: .9
-    - property:
-        identifier: 'cpu_offload'
-      value: 0
-    - property:
-        identifier: 'max_num_seq'
-      value: 256
-    - property:
-        identifier: 'max_batch_tokens'
-      value: 16384
-    - property:
-        identifier: 'n_gpus'
-      value: 1
-    - property:
-        identifier: 'gpu_type'
-      value: 'NVIDIA-A100-80GB-PCIe'
-    - property:
-        identifier: 'skip_tokenizer_init'
-      value: True
-    - property:
-        identifier: 'enforce_eager'
-      value: True
-    - property:
-        identifier: 'io_processor_plugin'
-      value: "terratorch_segmentation"
-  # measurements
-  targetProperties:
-    - identifier: "duration"
-    - identifier: "completed"
-    - identifier: "total_input_tokens"
-    - identifier: "total_output_tokens"
-    - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
-    - identifier: "mean_e2el_ms"
-    - identifier: "median_e2el_ms"
-    - identifier: "std_e2el_ms"
-    - identifier: "p25_e2el_ms"
-    - identifier: "p50_e2el_ms"
-    - identifier: "p75_e2el_ms"
-    - identifier: "p99_e2el_ms"
-  metadata:
-    description: 'VLLM performance testing across compute resource and workload configuration'
\ No newline at end of file
+    description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
\ No newline at end of file
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
new file mode 100644
index 00000000..ad62052d
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -0,0 +1,308 @@
+# Copyright (c) IBM Corporation
+# SPDX-License-Identifier: MIT
+
+# The input to an experiment is an Entity. For the Entity to be a valid input
+# it's properties which  match what is defined here
+performance_testing-geospatial-endpoint:
+  identifier: performance-testing-geospatial-endpoint
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
+    - identifier: 'endpoint'
+      metadata:
+        description: 'The endpoint(s) to test'
+      propertyDomain:
+        variableType: "UNKNOWN_VARIABLE_TYPE"
+    - identifier: 'request_rate'
+      metadata:
+        description: "The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+  defaultParameterization:
+    - value: 100
+      property:
+        identifier: 'num_prompts'
+    - value: -1
+      property:
+        identifier: 'max_concurrency'
+    - value: 1.0
+      property:
+        identifier: 'burstiness'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'
+performance_testing-geospatial-full:
+  identifier: performance-testing-geospatial-full
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+    - identifier: 'request_rate'
+      metadata:
+        description: "(benchmark) The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "(benchmark) The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "(benchmark) The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: image
+      metadata:
+        description: "(deployment) Docker image to use to create vllm deployments"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+    - identifier: n_cpus
+      metadata:
+        description: "(deployment) the number of CPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1,17 ]
+        interval: 1
+    - identifier: memory
+      metadata:
+        description: "(deployment) the amount of memory to allocate to vLLM pod"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "64Gi", "128Gi", "256Gi" ]
+    - identifier: dtype
+      metadata:
+        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+    - identifier: 'gpu_memory_utilization'
+      metadata:
+        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ .5, .75, .9 ]
+    - identifier: 'cpu_offload'
+      metadata:
+        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_num_seq'
+      metadata:
+        description: "(deployment) Maximum number of sequences per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [32,2049]
+        interval: 32
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum number of batched tokens per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 8192, 32769]
+        interval: 1024
+    - identifier: 'n_gpus'
+      metadata:
+        description: "(deployment) Number of GPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,9]
+        interval: 1
+    - identifier: 'gpu_type'
+      metadata:
+        description: "(deployment) The GPU type to use"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer intialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Pocessor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
+  defaultParameterization:
+    - property:
+        identifier: 'image'
+      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+    - property:
+        identifier: n_cpus
+      value: 8
+    - property:
+        identifier:
+          memory
+      value:  "128Gi"
+    - property:
+        identifier: dtype
+      value: "auto"
+    - property:
+        identifier: 'num_prompts'
+      value: 500
+    - property:
+        identifier: 'max_concurrency'
+      value: -1
+    - property:
+        identifier: 'burstiness'
+      value: 1.0
+    - property:
+        identifier: 'gpu_memory_utilization'
+      value: .9
+    - property:
+        identifier: 'cpu_offload'
+      value: 0
+    - property:
+        identifier: 'max_num_seq'
+      value: 256
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
+    - property:
+        identifier: 'n_gpus'
+      value: 1
+    - property:
+        identifier: 'gpu_type'
+      value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: True
+    - property:
+        identifier: 'enforce_eager'
+      value: True
+    - property:
+        identifier: 'io_processor_plugin'
+      value: "terratorch_segmentation"
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'VLLM performance testing across compute resource and workload configuration'
\ No newline at end of file

From b851d033e45436bc79dd2b56bc4c4233c2363d55 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 09:24:43 +0000
Subject: [PATCH 14/32] Fixed bug in validate_entitiy

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/experiment.py     |  6 +++---
 orchestrator/schema/property_value.py | 13 +++++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py
index 9e17ee6e..deb87a30 100644
--- a/orchestrator/schema/experiment.py
+++ b/orchestrator/schema/experiment.py
@@ -636,14 +636,14 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
         }
         if validate_point_against_properties(
             point,
-            constitutive_properties=self.requiredConstitutiveProperties,
+            constitutive_properties=self.requiredConstitutiveProperties + list(self.optionalProperties),
         ):
             return True
 
         # It's not an exact match - check if partial match
         if not validate_point_against_properties(
             point,
-            constitutive_properties=self.requiredConstitutiveProperties,
+            constitutive_properties=self.requiredConstitutiveProperties + list(self.optionalProperties),
             allow_partial_matches=True,
         ):
             # no partial match - missing required properties or has incorrect values for them
@@ -654,7 +654,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
             return False
 
         # It has the required properties with valid values but there are additional properties
-        # See if these properties are optional propertiesof the experiment
+        # See if these properties are optional properties of the experiment
         potential_optional_properties: set[str] = point.keys() - {
             cp.identifier for cp in self.requiredProperties
         }
diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index a429412a..d7b56107 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: MIT
 
 import enum
+import logging
 import typing
 
 import pydantic
@@ -13,6 +14,7 @@
     PropertyDescriptor,
 )
 
+logger = logging.getLogger("property_value")
 
 class ValueTypeEnum(str, enum.Enum):
     NUMERIC_VALUE_TYPE = "NUMERIC_VALUE_TYPE"  # the value is a bool,int, float etc.
@@ -189,6 +191,12 @@ def validate_point_against_properties(
         cp.identifier for cp in constitutive_properties
     }
 
+    logger.debug(
+        f"Validating point's constitutive properties "
+        f"(allow_partial_matches = {allow_partial_matches}) {constitutive_property_identifiers_for_point}, "
+        f"against the space constitutive properties {constitutive_property_identifiers_for_entity_space}"
+    )
+
     matching_constitutive_property_identifiers = (
         constitutive_property_identifiers_for_point.intersection(
             constitutive_property_identifiers_for_entity_space
@@ -221,6 +229,11 @@ def validate_point_against_properties(
         if not constitutive_property.propertyDomain.valueInDomain(
             point[constitutive_property.identifier]
         ):
+            logger.warning(
+                f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
+                "is not in the target consitutive property "
+                f"domain ({constitutive_property.propertyDomain.domainRange})"
+            )
             return False
 
     return True

From 7055c38850d67e24104c39260d671c4659a99285 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 11:32:16 +0000
Subject: [PATCH 15/32] One more fix to to a log message

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/property_value.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index d7b56107..0ed81e4f 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -232,7 +232,7 @@ def validate_point_against_properties(
             logger.warning(
                 f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
                 "is not in the target consitutive property "
-                f"domain ({constitutive_property.propertyDomain.domainRange})"
+                f"domain ({constitutive_property.propertyDomain.domain_values()})"
             )
             return False
 

From 5bdf90263b21e8061ac86fb6b0a298c477e47ee1 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 11:34:43 +0000
Subject: [PATCH 16/32] One more fix to to a log message

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/property_value.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index 0ed81e4f..46f595ef 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -232,7 +232,7 @@ def validate_point_against_properties(
             logger.warning(
                 f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
                 "is not in the target consitutive property "
-                f"domain ({constitutive_property.propertyDomain.domain_values()})"
+                f"domain ({constitutive_property.propertyDomain.domain_values})"
             )
             return False
 

From dbab4c7cb55fb664e4fe74b0eb6c41782dbbc1de Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 11:35:41 +0000
Subject: [PATCH 17/32] One more fix to to a log message

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/property_value.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index 0ed81e4f..46f595ef 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -232,7 +232,7 @@ def validate_point_against_properties(
             logger.warning(
                 f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
                 "is not in the target consitutive property "
-                f"domain ({constitutive_property.propertyDomain.domain_values()})"
+                f"domain ({constitutive_property.propertyDomain.domain_values})"
             )
             return False
 

From fd100b6a1faf9bfce4e985c43c8e6b1e31eaee7d Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 12:59:43 +0000
Subject: [PATCH 18/32] Fixes to vllm_performance actuator

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../datasets/india_url_in_b64_out.jsonl        |  1 +
 .../valencia_url_in_b64_out.jsonl}             |  0
 .../vllm_performance/experiment_executor.py    |  4 ++++
 .../experiments/performance_testing.yaml       | 18 ++++++++++++++++++
 .../performance_testing_geospatial.yaml        | 18 ++++++++++++++++++
 .../vllm_performance_test/execute_benchmark.py | 14 +++++++++++---
 6 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl
 rename plugins/actuators/vllm_performance/ado_actuators/vllm_performance/{geospatial_valencia.jsonl => datasets/valencia_url_in_b64_out.jsonl} (100%)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl
new file mode 100644
index 00000000..693bbc09
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl
@@ -0,0 +1 @@
+{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}}
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_valencia.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/valencia_url_in_b64_out.jsonl
similarity index 100%
rename from plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_valencia.jsonl
rename to plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/valencia_url_in_b64_out.jsonl
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 9be6921a..459e8473 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -297,6 +297,7 @@ def run_resource_and_workload_experiment(
                             benchmark_retries=actuator_parameters.benchmark_retries,
                             retries_timeout=actuator_parameters.retries_timeout,
                             burstiness=float(values.get("burstiness")),
+                            dataset = values.get("dataset"),
                         )
                     else:
                         result = execute_random_benchmark(
@@ -312,6 +313,7 @@ def run_resource_and_workload_experiment(
                             number_input_tokens=int(values.get("number_input_tokens")),
                             max_output_tokens=int(values.get("max_output_tokens")),
                             burstiness=float(values.get("burstiness")),
+                            dataset = values.get("dataset"),
                         )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
@@ -411,6 +413,7 @@ def run_workload_experiment(
                     benchmark_retries=actuator_parameters.benchmark_retries,
                     retries_timeout=actuator_parameters.retries_timeout,
                     burstiness=float(values.get("burstiness")),
+                    dataset = values.get("dataset"),
                 )
             else:
                 result = execute_random_benchmark(
@@ -426,6 +429,7 @@ def run_workload_experiment(
                     number_input_tokens=int(values.get("number_input_tokens")),
                     max_output_tokens=int(values.get("max_output_tokens")),
                     burstiness=float(values.get("burstiness")),
+                    dataset = values.get("dataset"),
                 )
         except Exception as e:
             logger.error(f"Failed to execute VLLM performance test {e}")
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index 216d6ae4..c9537fd7 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -56,6 +56,12 @@ performance_testing-full:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ 1, 10000 ]
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used fof the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'random' ]
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
@@ -161,6 +167,9 @@ performance_testing-full:
     - property:
         identifier: 'max_output_tokens'
       value: 128
+    - property:
+        identifier: 'dataset'
+      value: 'random'
     - property:
         identifier: 'gpu_memory_utilization'
       value: .9
@@ -282,6 +291,12 @@ performance_testing-endpoint:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ -1, 500 ] # -1 means no concurrency control
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used fof the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'random' ]
   defaultParameterization:
     - value: 1000
       property:
@@ -298,6 +313,9 @@ performance_testing-endpoint:
     - value: 128
       property:
         identifier: 'max_output_tokens'
+    - property:
+        identifier: 'dataset'
+      value: 'random'
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index ad62052d..5d976439 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -47,6 +47,12 @@ performance_testing-geospatial-endpoint:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ -1, 500 ] # -1 means no concurrency control
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ]
   defaultParameterization:
     - value: 100
       property:
@@ -57,6 +63,9 @@ performance_testing-geospatial-endpoint:
     - value: 1.0
       property:
         identifier: 'burstiness'
+    - property:
+        identifier: 'dataset'
+      value: 'india_url_in_b64_out'
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -135,6 +144,12 @@ performance_testing-geospatial-full:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ 0, 10 ]
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ]
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
@@ -267,6 +282,9 @@ performance_testing-geospatial-full:
     - property:
         identifier: 'io_processor_plugin'
       value: "terratorch_segmentation"
+    - property:
+        identifier: 'dataset'
+      value: 'india_url_in_b64_out'
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index cf3ed96a..1d81ee36 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -12,6 +12,10 @@
     get_results,
 )
 
+default_geospatial_datasets_filenames = {
+    "india_url_in_b64_out": "india_url_in_b64_out.jsonl",
+    "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl",
+}
 
 def execute_benchmark(
     base_url: str,
@@ -107,6 +111,7 @@ def execute_benchmark(
 def execute_random_benchmark(
     base_url: str,
     model: str,
+    dataset: str,
     num_prompts: int = 500,
     request_rate: int | None = None,
     max_concurrency: int | None = None,
@@ -134,7 +139,7 @@ def execute_random_benchmark(
     return execute_benchmark(
         base_url=base_url,
         model=model,
-        data_set="random",
+        data_set=dataset,
         interpreter=interpreter,
         num_prompts=num_prompts,
         request_rate=request_rate,
@@ -153,6 +158,7 @@ def execute_random_benchmark(
 def execute_geospatial_benchmark(
     base_url: str,
     model: str,
+    dataset: str,
     num_prompts: int = 500,
     request_rate: int | None = None,
     max_concurrency: int | None = None,
@@ -176,9 +182,11 @@ def execute_geospatial_benchmark(
     """
     from importlib import resources
 
+    dataset_filename = default_geospatial_datasets_filenames[dataset]
+
     with resources.path(
-        "ado_actuators.vllm_performance",
-        "geospatial_india.jsonl",
+        "ado_actuators.vllm_performance.datasets",
+        dataset_filename,
     ) as data_set_path:
         return execute_benchmark(
             base_url=base_url,

From f7ceb520be5fde683d6e8b5c8f5a568fd82c160c Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 09:24:43 +0000
Subject: [PATCH 19/32] fix(experiment): Fixed bug in validate_entitiy

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/experiment.py     |  8 +++++---
 orchestrator/schema/property_value.py | 14 ++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py
index 9e17ee6e..61da6ec4 100644
--- a/orchestrator/schema/experiment.py
+++ b/orchestrator/schema/experiment.py
@@ -636,14 +636,16 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
         }
         if validate_point_against_properties(
             point,
-            constitutive_properties=self.requiredConstitutiveProperties,
+            constitutive_properties=self.requiredConstitutiveProperties
+            + list(self.optionalProperties),
         ):
             return True
 
         # It's not an exact match - check if partial match
         if not validate_point_against_properties(
             point,
-            constitutive_properties=self.requiredConstitutiveProperties,
+            constitutive_properties=self.requiredConstitutiveProperties
+            + list(self.optionalProperties),
             allow_partial_matches=True,
         ):
             # no partial match - missing required properties or has incorrect values for them
@@ -654,7 +656,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
             return False
 
         # It has the required properties with valid values but there are additional properties
-        # See if these properties are optional propertiesof the experiment
+        # See if these properties are optional properties of the experiment
         potential_optional_properties: set[str] = point.keys() - {
             cp.identifier for cp in self.requiredProperties
         }
diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index a429412a..08da1bb0 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: MIT
 
 import enum
+import logging
 import typing
 
 import pydantic
@@ -13,6 +14,8 @@
     PropertyDescriptor,
 )
 
+logger = logging.getLogger("property_value")
+
 
 class ValueTypeEnum(str, enum.Enum):
     NUMERIC_VALUE_TYPE = "NUMERIC_VALUE_TYPE"  # the value is a bool,int, float etc.
@@ -189,6 +192,12 @@ def validate_point_against_properties(
         cp.identifier for cp in constitutive_properties
     }
 
+    logger.debug(
+        f"Validating point's constitutive properties "
+        f"(allow_partial_matches = {allow_partial_matches}) {constitutive_property_identifiers_for_point}, "
+        f"against the space constitutive properties {constitutive_property_identifiers_for_entity_space}"
+    )
+
     matching_constitutive_property_identifiers = (
         constitutive_property_identifiers_for_point.intersection(
             constitutive_property_identifiers_for_entity_space
@@ -221,6 +230,11 @@ def validate_point_against_properties(
         if not constitutive_property.propertyDomain.valueInDomain(
             point[constitutive_property.identifier]
         ):
+            logger.warning(
+                f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
+                "is not in the target consitutive property "
+                f"domain ({constitutive_property.propertyDomain.domain_values})"
+            )
             return False
 
     return True

From df4f9bc1369c5744f01a25de30c43c82b3f94184 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Mon, 3 Nov 2025 15:21:16 +0000
Subject: [PATCH 20/32] fix: Not using reference which may be parameterized

---
 orchestrator/utilities/run_experiment.py | 44 +++++++++++++++---------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/orchestrator/utilities/run_experiment.py b/orchestrator/utilities/run_experiment.py
index 0ed6959d..85064c4a 100644
--- a/orchestrator/utilities/run_experiment.py
+++ b/orchestrator/utilities/run_experiment.py
@@ -24,7 +24,7 @@
 def local_execution_closure(
     registry: ActuatorRegistry,
     actuator_configuration_identifiers: list[str] | None = None,
-) -> Callable[[ExperimentReference, Entity], MeasurementRequest]:
+) -> Callable[[ExperimentReference, Entity], MeasurementRequest] | None:
     """Create a callable that submits a local measurement request.
 
     The function keeps a dictionary of Actuator actors so that each actuator
@@ -66,31 +66,41 @@ def local_execution_closure(
 
     def execute_local(
         reference: ExperimentReference, entity: Entity
-    ) -> MeasurementRequest:
+    ) -> MeasurementRequest | None:
         # instantiate the actuator for this experiment identifier.
-        experiment = registry.experimentForReference(reference)
-        if experiment.actuatorIdentifier not in actuators:
+        if reference.actuatorIdentifier not in actuators:
             actuator_class = registry.actuatorForIdentifier(
-                experiment.actuatorIdentifier
+                reference.actuatorIdentifier
             )
-            if experiment.actuatorIdentifier in actuator_configurations:
+            if reference.actuatorIdentifier in actuator_configurations:
                 config = actuator_configurations[
-                    experiment.actuatorIdentifier
+                    reference.actuatorIdentifier
                 ].parameters
             else:
                 config = actuator_class.default_parameters()
 
-            actuators[experiment.actuatorIdentifier] = actuator_class.remote(
+            actuators[reference.actuatorIdentifier] = actuator_class.remote(
                 queue=queue, params=config
             )
-        actuator = actuators[experiment.actuatorIdentifier]
-        # Submit the measurement request asynchronously.
-        actuator.submit.remote(
-            entities=[entity],
-            experimentReference=experiment.reference,
-            requesterid="run_experiment",
-            requestIndex=0,
-        )
+        actuator = actuators[reference.actuatorIdentifier]
+        # Submit the measurement request asynchronously, handle errors gracefully.
+        try:
+            actuator.submit.remote(
+                entities=[entity],
+                experimentReference=reference,
+                requesterid="run_experiment",
+                requestIndex=0,
+            )
+        except Exception as e:
+            print(
+                f"[ERROR] Failed to submit measurement request for {reference} to actuator '{reference.actuatorIdentifier}': {e}"
+            )
+            import traceback
+
+            traceback.print_exc()
+            # Either skip, or return None, or propagate. Let's return None.
+            return None
+
         return queue.get()
 
     return execute_local
@@ -239,7 +249,7 @@ def run(
             print("Skipping validation")
 
         if valid:
-            print(f"Executing: {reference.experimentIdentifier}")
+            print(f"Executing: {reference}")
             request = execute(reference, entity)
             print("Result:")
             print(f"{request.series_representation(output_format='target')}\n")

From d744287b1ec3ab53b3d1c93c7aae161c4d219e28 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Mon, 3 Nov 2025 16:34:43 +0000
Subject: [PATCH 21/32] fix: validate_entity

validate_entity was incorrectly identifying Entities with optional properties as invalid

This was because validate_point_against_properties(allow_partial_matches=True) does not work as code expected.
Code expected that given {point props} and {required props}, if {required props}.issubset(point_props) it would return True. However, it was checking the opposite i.e.{point_probs}.issubset{required_props}
---
 orchestrator/schema/experiment.py | 84 +++++++++++++++----------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py
index 61da6ec4..46b7eb62 100644
--- a/orchestrator/schema/experiment.py
+++ b/orchestrator/schema/experiment.py
@@ -624,71 +624,71 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
         """Returns True if Experiment can be applied to entity, false otherwise
 
         This method only checks constitutive properties.
-        - All properties of the Entity that match the experiments required or optional properties must have
-        values in the domain of that property
-        - All required properties of the experiment must have a matching constitutive property
-        - If strict_optional is True all properties of the Entity that are not required properties of the Experiment
-        must match optional properties of the experiment.
+        - The entity has valid values for all required properties of the experiment
+        - The entity has valid values for any optional properties of the experiment it contains
+        - If strict_optional is True all properties of the Entity are properties (required+optional) of the experiment
         """
 
         point = {
             v.property.identifier: v.value for v in entity.constitutive_property_values
         }
-        if validate_point_against_properties(
-            point,
-            constitutive_properties=self.requiredConstitutiveProperties
-            + list(self.optionalProperties),
-        ):
-            return True
-
-        # It's not an exact match - check if partial match
-        if not validate_point_against_properties(
-            point,
-            constitutive_properties=self.requiredConstitutiveProperties
-            + list(self.optionalProperties),
-            allow_partial_matches=True,
-        ):
-            # no partial match - missing required properties or has incorrect values for them
-            logging.getLogger("experiment").warning(
-                f"The entity is missing or has invalid values for required properties of "
-                f" {self.identifier}"
-            )
-            return False
 
-        # It has the required properties with valid values but there are additional properties
-        # See if these properties are optional properties of the experiment
-        potential_optional_properties: set[str] = point.keys() - {
-            cp.identifier for cp in self.requiredProperties
+        #
+        # Get required and optional property sets of the experiment
+        #
+        required_property_identifiers = {
+            cp.identifier for cp in self.requiredConstitutiveProperties
         }
-        optional_properties = potential_optional_properties & {
+        optional_property_identifiers = {
             cp.identifier for cp in self.optionalProperties
         }
-        # If strict_optional is on all the additional properties must be optional properties
-        if (
-            len(optional_properties) != len(potential_optional_properties)
-            and strict_optional
-        ):
+
+        #
+        # Get the equivalent sets from the entity
+        #
+        required_properties_present = point.keys() & required_property_identifiers
+        optional_properties_present = point.keys() & optional_property_identifiers
+        additional_properties_present = (
+            point.keys() - required_properties_present - optional_properties_present
+        )
+
+        # First check against strict optional as it is a quick fail condition
+        if additional_properties_present and strict_optional:
             logging.getLogger("experiment").warning(
                 f"Strict property checking is on and the following entity "
                 f"properties are not required or optional properties of {self.identifier}:"
-                f"{potential_optional_properties-optional_properties} "
+                f"{additional_properties_present} "
+            )
+
+        # Check if all the required properties are present with values in domain
+        if not validate_point_against_properties(
+            point={k: v for k, v in point.items() if k in required_properties_present},
+            constitutive_properties=self.requiredConstitutiveProperties,
+        ):
+            logging.getLogger("experiment").warning(
+                f"The entity is missing values for required properties of {self.identifier}: {required_property_identifiers - required_properties_present}"
             )
             return False
 
-        is_valid = validate_point_against_properties(
-            point={key: point[key] for key in optional_properties},
+        # All required properties are there
+        # Now check optional properties, if given
+        # We can set partial_match=True because:
+        # - If we wanted full match of optional properties (strict_optional), but it wasn't present,
+        #   we would have already exited
+        if optional_properties_present and not validate_point_against_properties(
+            point={k: v for k, v in point.items() if k in optional_properties_present},
             constitutive_properties=list(self.optionalProperties),
             allow_partial_matches=True,
-        )
-        if not is_valid:
+        ):
             logging.getLogger("experiment").warning(
                 f"The entity has properties that match optional properties"
                 f"of {self.identifier} - "
-                f"{potential_optional_properties - optional_properties} - "
+                f"{optional_properties_present} - "
                 f"but its values for those properties are not in the domain of the optional properties"
             )
+            return False
 
-        return is_valid
+        return True
 
 
 class ParameterizedExperiment(Experiment):

From 1869a49af194cbc3307e841a530d47d6f3ea9733 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Mon, 3 Nov 2025 16:53:45 +0000
Subject: [PATCH 22/32] fix: missing return

---
 orchestrator/schema/experiment.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py
index 46b7eb62..07bbfadf 100644
--- a/orchestrator/schema/experiment.py
+++ b/orchestrator/schema/experiment.py
@@ -659,6 +659,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
                 f"properties are not required or optional properties of {self.identifier}:"
                 f"{additional_properties_present} "
             )
+            return False
 
         # Check if all the required properties are present with values in domain
         if not validate_point_against_properties(

From 883e8771d801558f05565d73e83f7c89e9c7cfee Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Mon, 3 Nov 2025 16:54:05 +0000
Subject: [PATCH 23/32] test: for validate_entity

---
 tests/schema/test_experiment.py | 194 +++++++++++++++++++++++++++++++-
 1 file changed, 193 insertions(+), 1 deletion(-)

diff --git a/tests/schema/test_experiment.py b/tests/schema/test_experiment.py
index 14f3230d..72401bf1 100644
--- a/tests/schema/test_experiment.py
+++ b/tests/schema/test_experiment.py
@@ -8,8 +8,12 @@
 from orchestrator.modules.actuators.registry import (
     ActuatorRegistry,
 )
+from orchestrator.schema.domain import PropertyDomain, VariableTypeEnum
 from orchestrator.schema.entity import Entity
-from orchestrator.schema.experiment import Experiment, ParameterizedExperiment
+from orchestrator.schema.experiment import (
+    Experiment,
+    ParameterizedExperiment,
+)
 from orchestrator.schema.property import (
     AbstractProperty,
     ConstitutiveProperty,
@@ -815,3 +819,191 @@ def test_experiment_provides_requirements(
             mock_parameterizable_experiment
         )
     )
+
+
+@pytest.fixture(scope="module")
+def nevergrad_opt_3d_test_func_experiment():
+    # Define required constitutive properties (x0, x1, x2, all continuous)
+    required_props = [
+        ConstitutiveProperty(
+            identifier="x0",
+            propertyDomain=PropertyDomain(
+                variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE
+            ),
+        ),
+        ConstitutiveProperty(
+            identifier="x1",
+            propertyDomain=PropertyDomain(
+                variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE
+            ),
+        ),
+        ConstitutiveProperty(
+            identifier="x2",
+            propertyDomain=PropertyDomain(
+                variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE
+            ),
+        ),
+    ]
+    # Optional property: name (categorical)
+    optional_props = (
+        ConstitutiveProperty(
+            identifier="name",
+            propertyDomain=PropertyDomain(
+                variableType=VariableTypeEnum.CATEGORICAL_VARIABLE_TYPE,
+                values=["rosenbrock", "griewank", "sphere"],
+            ),
+        ),
+    )
+    default_param = (
+        ConstitutivePropertyValue(
+            value="rosenbrock",
+            property=ConstitutivePropertyDescriptor(identifier="name"),
+        ),
+    )
+    return Experiment(
+        actuatorIdentifier="custom_experiments",
+        identifier="nevergrad_opt_3d_test_func",
+        targetProperties=[],
+        requiredProperties=tuple(required_props),
+        optionalProperties=optional_props,
+        defaultParameterization=default_param,
+    )
+
+
+def entity_with_props(props):
+    return Entity(constitutive_property_values=tuple(props))
+
+
+def test_validate_entity_required_only(nevergrad_opt_3d_test_func_experiment):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True
+
+
+def test_validate_entity_with_optional_valid(nevergrad_opt_3d_test_func_experiment):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+        ConstitutivePropertyValue(
+            value="sphere", property=ConstitutivePropertyDescriptor(identifier="name")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True
+
+
+def test_validate_entity_with_optional_invalid(nevergrad_opt_3d_test_func_experiment):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+        ConstitutivePropertyValue(
+            value="foobar", property=ConstitutivePropertyDescriptor(identifier="name")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False
+
+
+def test_validate_entity_missing_required(nevergrad_opt_3d_test_func_experiment):
+    # missing x2
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False
+
+
+def test_validate_entity_missing_required_with_optional_valid(
+    nevergrad_opt_3d_test_func_experiment,
+):
+    # missing x2 but valid name
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value="griewank", property=ConstitutivePropertyDescriptor(identifier="name")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False
+
+
+def test_validate_entity_additional_property_strict_optional_false(
+    nevergrad_opt_3d_test_func_experiment,
+):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+        ConstitutivePropertyValue(
+            value=10, property=ConstitutivePropertyDescriptor(identifier="test")
+        ),
+    ]
+    entity = entity_with_props(props)
+    # Default: strict_optional=False, extra property is fine
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True
+
+
+def test_validate_entity_additional_property_strict_optional_true(
+    nevergrad_opt_3d_test_func_experiment,
+):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+        ConstitutivePropertyValue(
+            value=10, property=ConstitutivePropertyDescriptor(identifier="test")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert (
+        nevergrad_opt_3d_test_func_experiment.validate_entity(
+            entity, strict_optional=True
+        )
+        is False
+    )

From 064750f8403e140ba38a253b9debdd69e1dea9d5 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 4 Nov 2025 09:30:49 +0000
Subject: [PATCH 24/32] chore: fixed formatting with black

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/modules/operators/randomwalk.py      |  4 +---
 .../ado_actuators/vllm_performance/actuator.py    | 15 +++++++++++----
 .../vllm_performance/experiment_executor.py       | 10 +++++-----
 .../vllm_performance/k8/create_environment.py     |  4 ++--
 .../vllm_performance/k8/manage_components.py      |  2 +-
 .../k8/yaml_support/build_components.py           |  8 ++++----
 .../vllm_performance_test/execute_benchmark.py    |  1 +
 7 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py
index eff9a6f2..1018c17a 100644
--- a/orchestrator/modules/operators/randomwalk.py
+++ b/orchestrator/modules/operators/randomwalk.py
@@ -208,9 +208,7 @@ def sampler(self) -> BaseSampler | GroupSampler:
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = RandomGroupSampleSelector(group=self.grouping)
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
-                        sampler = SequentialGroupSampleSelector(
-                            group=self.grouping
-                        )
+                        sampler = SequentialGroupSampleSelector(group=self.grouping)
                     case _:
                         # this can never happen, as we are validating this above
                         pass
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index 88d4e67c..dd45091f 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -64,14 +64,18 @@ def catalog(
             logger.debug(f"Loading experiments from {exp_file}")
             exp_file_path = os.path.join(exp_dir, exp_file)
             if os.path.isdir(exp_file_path):
-                logger.error(f"{exp_file_path} is a directory. Only files are supported in the experiments directory")
-                raise Exception(f"{exp_file_path} is a directory. Only files are supported in the experiments directory")
+                logger.error(
+                    f"{exp_file_path} is a directory. Only files are supported in the experiments directory"
+                )
+                raise Exception(
+                    f"{exp_file_path} is a directory. Only files are supported in the experiments directory"
+                )
             with open(exp_file_path) as f:
                 try:
                     data = yaml.safe_load(f)
                 except yaml.YAMLError as e:
                     logger.error(f"File {exp_file} is a malformed YAML - {e}")
-                    raise Exception (f"File {exp_file} is a malformed YAML - {e}")
+                    raise Exception(f"File {exp_file} is a malformed YAML - {e}")
 
             experiments.extend([Experiment(**data[e]) for e in data])
 
@@ -188,7 +192,10 @@ async def submit(
         if experiment.deprecated is True:
             raise DeprecatedExperimentError(f"Experiment {experiment} is deprecated")
 
-        if experiment.identifier in ["performance-testing-full", "performance-testing-geospatial-full"]:
+        if experiment.identifier in [
+            "performance-testing-full",
+            "performance-testing-geospatial-full",
+        ]:
             if not self.env_manager:
                 raise MissingConfigurationForExperimentError(
                     f"Actuator configuration did not contain sufficient information for a kubernetes environment manager to be created. "
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 459e8473..693bdb58 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -146,7 +146,7 @@ def _create_environment(
                         namespace=actuator.namespace,
                         skip_tokenizer_init=values.get("skip_tokenizer_init"),
                         enforce_eager=values.get("enforce_eager"),
-                        io_processor_plugin=values.get("io_processor_plugin")
+                        io_processor_plugin=values.get("io_processor_plugin"),
                     )
                     # Update manager
                     env_manager.done_creating.remote(definition=definition)
@@ -297,7 +297,7 @@ def run_resource_and_workload_experiment(
                             benchmark_retries=actuator_parameters.benchmark_retries,
                             retries_timeout=actuator_parameters.retries_timeout,
                             burstiness=float(values.get("burstiness")),
-                            dataset = values.get("dataset"),
+                            dataset=values.get("dataset"),
                         )
                     else:
                         result = execute_random_benchmark(
@@ -313,7 +313,7 @@ def run_resource_and_workload_experiment(
                             number_input_tokens=int(values.get("number_input_tokens")),
                             max_output_tokens=int(values.get("max_output_tokens")),
                             burstiness=float(values.get("burstiness")),
-                            dataset = values.get("dataset"),
+                            dataset=values.get("dataset"),
                         )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
@@ -413,7 +413,7 @@ def run_workload_experiment(
                     benchmark_retries=actuator_parameters.benchmark_retries,
                     retries_timeout=actuator_parameters.retries_timeout,
                     burstiness=float(values.get("burstiness")),
-                    dataset = values.get("dataset"),
+                    dataset=values.get("dataset"),
                 )
             else:
                 result = execute_random_benchmark(
@@ -429,7 +429,7 @@ def run_workload_experiment(
                     number_input_tokens=int(values.get("number_input_tokens")),
                     max_output_tokens=int(values.get("max_output_tokens")),
                     burstiness=float(values.get("burstiness")),
-                    dataset = values.get("dataset"),
+                    dataset=values.get("dataset"),
                 )
         except Exception as e:
             logger.error(f"Failed to execute VLLM performance test {e}")
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
index 3f0a0809..c3a2a2b1 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
@@ -42,7 +42,7 @@ def create_test_environment(
     namespace: str = "vllm-testing",
     enforce_eager: bool = False,
     skip_tokenizer_init: bool = False,
-    io_processor_plugin: str | None = None
+    io_processor_plugin: str | None = None,
 ) -> None:
     """
     Create test deployment
@@ -122,7 +122,7 @@ def create_test_environment(
         reuse=reuse_deployment,
         enforce_eager=enforce_eager,
         skip_tokenizer_init=skip_tokenizer_init,
-        io_processor_plugin=io_processor_plugin
+        io_processor_plugin=io_processor_plugin,
     )
     logger.debug("deployment created")
     c_manager.wait_deployment_ready(k8_name=k8_name)
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
index cd77a444..9fddc978 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
@@ -233,7 +233,7 @@ def create_deployment(
         reuse: bool = False,
         enforce_eager: bool = False,
         skip_tokenizer_init: bool = False,
-        io_processor_plugin: str | None = None
+        io_processor_plugin: str | None = None,
     ) -> None:
         """
         create deployment for model
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index 0abcc8c9..41d8cdb4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -1,12 +1,12 @@
 # Copyright (c) IBM Corporation
 # SPDX-License-Identifier: MIT
 
+import json
 import logging
 import os
 import sys
 import uuid
 from enum import Enum
-import json
 from typing import Any
 
 import yaml
@@ -45,7 +45,7 @@ def get_k8_name(model: str) -> str:
 
         # Making sure the resulting name is not longer than 63 characters as it is
         # the maximum allowed for a name in kubernetes.
-        name_prefix = m_parts[-1][:min(len(m_parts[-1]), 21)].rstrip("-")
+        name_prefix = m_parts[-1][: min(len(m_parts[-1]), 21)].rstrip("-")
         return f"vllm-{name_prefix.lower()}-{uuid.uuid4()}".replace(".", "-")
 
     @staticmethod
@@ -180,7 +180,7 @@ def deployment_yaml(
         limits["memory"] = memory
         limits["nvidia.com/gpu"] = str(n_gpus)
 
-        #command
+        # command
         container["command"] = ["vllm", "serve"]
         container["args"] = vllm_serve_args
         # env variables to to set parameters for docker execution
@@ -194,7 +194,7 @@ def deployment_yaml(
         #     {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
         # ]
         if hf_token is not None:
-            container["env"]=[{"name": "HF_TOKEN", "value": hf_token}]
+            container["env"] = [{"name": "HF_TOKEN", "value": hf_token}]
         if claim_name is not None:
             if "env" not in container:
                 container["env"] = []
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 1d81ee36..e3d39cc8 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -17,6 +17,7 @@
     "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl",
 }
 
+
 def execute_benchmark(
     base_url: str,
     model: str,

From 35bb2e2f396bc056e7e108eac756f28b5609e4ac Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 4 Nov 2025 09:35:00 +0000
Subject: [PATCH 25/32] chore: Removed dataset file as it was relocated to a
 different folder

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/geospatial_india.jsonl        | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl
deleted file mode 100644
index 693bbc09..00000000
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}}

From c68ab3540c06788f5249cdb5bd21c87be05ec7f3 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 4 Nov 2025 16:03:38 +0000
Subject: [PATCH 26/32] feat: Added custom dataset geospatial experiment

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/actuator.py              |   1 +
 .../vllm_performance/experiment_executor.py   |   5 +-
 .../performance_testing_geospatial.yaml       | 220 +++++++++++++++++-
 .../execute_benchmark.py                      |  71 +++---
 4 files changed, 268 insertions(+), 29 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index dd45091f..38e9f47a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -195,6 +195,7 @@ async def submit(
         if experiment.identifier in [
             "performance-testing-full",
             "performance-testing-geospatial-full",
+            "performance-testing-geospatial-full-custom-dataset",
         ]:
             if not self.env_manager:
                 raise MissingConfigurationForExperimentError(
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 693bdb58..d588a884 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -285,7 +285,10 @@ def run_resource_and_workload_experiment(
                 start = time.time()
                 result = None
                 try:
-                    if experiment.identifier == "performance-testing-geospatial-full":
+                    if experiment.identifier in [
+                        "performance-testing-geospatial-full",
+                        "performance-testing-geospatial-full-custom-dataset",
+                    ]:
                         result = execute_geospatial_benchmark(
                             base_url=base_url,
                             model=values.get("model"),
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 5d976439..9edd8668 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -216,7 +216,7 @@ performance_testing-geospatial-full:
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
     - identifier: 'skip_tokenizer_init'
       metadata:
-        description: "(deployment) skip tokenizer intialization"
+        description: "(deployment) skip tokenizer initialization"
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
@@ -286,6 +286,224 @@ performance_testing-geospatial-full:
         identifier: 'dataset'
       value: 'india_url_in_b64_out'
   # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'VLLM performance testing across compute resource and workload configuration'
+performance_testing-geospatial-full-custom-dataset:
+  identifier: performance-testing-geospatial-full-custom-dataset
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+    - identifier: 'request_rate'
+      metadata:
+        description: "(benchmark) The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "(benchmark) The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "(benchmark) The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: image
+      metadata:
+        description: "(deployment) Docker image to use to create vllm deployments"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+    - identifier: n_cpus
+      metadata:
+        description: "(deployment) the number of CPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1,17 ]
+        interval: 1
+    - identifier: memory
+      metadata:
+        description: "(deployment) the amount of memory to allocate to vLLM pod"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "64Gi", "128Gi", "256Gi" ]
+    - identifier: dtype
+      metadata:
+        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+    - identifier: 'gpu_memory_utilization'
+      metadata:
+        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ .5, .75, .9 ]
+    - identifier: 'cpu_offload'
+      metadata:
+        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_num_seq'
+      metadata:
+        description: "(deployment) Maximum number of sequences per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [32,2049]
+        interval: 32
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum number of batched tokens per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 8192, 32769]
+        interval: 1024
+    - identifier: 'n_gpus'
+      metadata:
+        description: "(deployment) Number of GPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,9]
+        interval: 1
+    - identifier: 'gpu_type'
+      metadata:
+        description: "(deployment) The GPU type to use"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer initialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Pocessor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+  defaultParameterization:
+    - property:
+        identifier: 'image'
+      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+    - property:
+        identifier: n_cpus
+      value: 8
+    - property:
+        identifier:
+          memory
+      value:  "128Gi"
+    - property:
+        identifier: dtype
+      value: "auto"
+    - property:
+        identifier: 'num_prompts'
+      value: 500
+    - property:
+        identifier: 'max_concurrency'
+      value: -1
+    - property:
+        identifier: 'burstiness'
+      value: 1.0
+    - property:
+        identifier: 'gpu_memory_utilization'
+      value: .9
+    - property:
+        identifier: 'cpu_offload'
+      value: 0
+    - property:
+        identifier: 'max_num_seq'
+      value: 256
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
+    - property:
+        identifier: 'n_gpus'
+      value: 1
+    - property:
+        identifier: 'gpu_type'
+      value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: True
+    - property:
+        identifier: 'enforce_eager'
+      value: True
+    - property:
+        identifier: 'io_processor_plugin'
+      value: "terratorch_segmentation"
+    - property:
+        identifier: 'dataset'
+      value: None
+  # measurements
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index e3d39cc8..8b651cfc 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -12,6 +12,8 @@
     get_results,
 )
 
+logger = logging.getLogger("vllm-bench")
+
 default_geospatial_datasets_filenames = {
     "india_url_in_b64_out": "india_url_in_b64_out.jsonl",
     "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl",
@@ -51,7 +53,6 @@ def execute_benchmark(
     keys are vllm benchmark arguments. values are the values to pass to the arguments
     :return: results dictionary
     """
-    logger = logging.getLogger("vllm-bench")
 
     logger.debug(
         f"executing benchmark, invoking service at {base_url} with the parameters: "
@@ -181,34 +182,50 @@ def execute_geospatial_benchmark(
     :param output_token_length: length of output tokens
     :return: results dictionary
     """
-    from importlib import resources
-
-    dataset_filename = default_geospatial_datasets_filenames[dataset]
-
-    with resources.path(
-        "ado_actuators.vllm_performance.datasets",
-        dataset_filename,
-    ) as data_set_path:
-        return execute_benchmark(
-            base_url=base_url,
-            backend="io-processor-plugin",
-            model=model,
-            data_set="custom",
-            interpreter=interpreter,
-            num_prompts=num_prompts,
-            request_rate=request_rate,
-            max_concurrency=max_concurrency,
-            hf_token=hf_token,
-            benchmark_retries=benchmark_retries,
-            retries_timeout=retries_timeout,
-            burstiness=burstiness,
-            custom_args={
-                "--dataset-path": data_set_path,
-                "--endpoint": "/pooling",
-                "--skip-tokenizer-init": True,
-            },
+
+    if dataset in default_geospatial_datasets_filenames:
+        from pathlib import Path
+
+        dataset_filename = default_geospatial_datasets_filenames[dataset]
+        parent_path = Path(__file__).parents[1].absolute()
+        data_set_path = os.path.join(parent_path, "datasets", dataset_filename)
+    else:
+        # This can only happen with the performance-testing-geospatial-full-custom-dataset
+        # experiment, otherwise the dataset name is always one of the allowed ones.
+        # Here the assumption is that the dataset file is placed in the  process working directory.
+        ray_working_dir = os.getcwd()
+        data_set_path = os.path.join(ray_working_dir, dataset)
+
+    if not os.path.exists(data_set_path) or not os.path.isfile(data_set_path):
+        logger.warning(
+            f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}"
+        )
+        raise Exception(
+            f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}"
         )
 
+    logger.debug(f"Dataset path {data_set_path}")
+
+    return execute_benchmark(
+        base_url=base_url,
+        backend="io-processor-plugin",
+        model=model,
+        data_set="custom",
+        interpreter=interpreter,
+        num_prompts=num_prompts,
+        request_rate=request_rate,
+        max_concurrency=max_concurrency,
+        hf_token=hf_token,
+        benchmark_retries=benchmark_retries,
+        retries_timeout=retries_timeout,
+        burstiness=burstiness,
+        custom_args={
+            "--dataset-path": data_set_path,
+            "--endpoint": "/pooling",
+            "--skip-tokenizer-init": True,
+        },
+    )
+
 
 if __name__ == "__main__":
     results = execute_geospatial_benchmark(

From 41815b6073d83570962f11468be04c7ca9ed782a Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 6 Nov 2025 13:28:51 +0000
Subject: [PATCH 27/32] fix: Reworked vllm_catalog actuator experiments catalog
 loading

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/actuator.py              | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index 38e9f47a..ec38f1fe 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -3,8 +3,8 @@
 
 import json
 import logging
-import os
 import uuid
+from pathlib import Path
 
 import ray
 import yaml
@@ -57,27 +57,24 @@ def catalog(
         # Loading experiment definitions for yaml files contained in the `experiments` directory.
         # NOTE: Only files can be placed in the experiments directory,
         #       but each file can contain multiple experiment definitions
-        path = os.path.abspath(__file__)
-        exp_dir = os.path.join(os.path.split(path)[0], "experiments")
+        curr_path = Path(__file__)
+        exp_dir = curr_path.parent / Path("experiments")
+        logger.debug(f"Experiments dir {exp_dir.absolute()}")
         experiments = []
-        for exp_file in os.listdir(exp_dir):
-            logger.debug(f"Loading experiments from {exp_file}")
-            exp_file_path = os.path.join(exp_dir, exp_file)
-            if os.path.isdir(exp_file_path):
-                logger.error(
-                    f"{exp_file_path} is a directory. Only files are supported in the experiments directory"
-                )
-                raise Exception(
-                    f"{exp_file_path} is a directory. Only files are supported in the experiments directory"
-                )
-            with open(exp_file_path) as f:
-                try:
-                    data = yaml.safe_load(f)
-                except yaml.YAMLError as e:
-                    logger.error(f"File {exp_file} is a malformed YAML - {e}")
-                    raise Exception(f"File {exp_file} is a malformed YAML - {e}")
+        for exp_file in exp_dir.iterdir():
+            if exp_file.is_dir():
+                continue
 
-            experiments.extend([Experiment(**data[e]) for e in data])
+            logger.debug(f"Loading experiments from {exp_file.name}")
+            try:
+                file_data = exp_file.read_text()
+                data = yaml.safe_load(file_data)
+            except yaml.YAMLError:
+                error_message = f"File {exp_file.name} is a malformed YAML"
+                logger.error(error_message)
+                raise ValueError(error_message)
+
+            experiments.extend([Experiment.model_validate(data[e]) for e in data])
 
         return ExperimentCatalog(
             catalogIdentifier=cls.identifier,

From 53bf77ae3423e3b9ab8290a5f5d9c5350e8830c3 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 6 Nov 2025 13:49:03 +0000
Subject: [PATCH 28/32] chore: Improved experiment definition language and
 properties typing

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../experiments/performance_testing.yaml      |  7 ++++---
 .../performance_testing_geospatial.yaml       | 19 +++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index c9537fd7..032bfec1 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -58,7 +58,7 @@ performance_testing-full:
         interval: 1
     - identifier: 'dataset'
       metadata:
-        description: "(benchmark) The dataset to be used fof the experiment"
+        description: "(benchmark) The dataset to be used for the experiment"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'random' ]
@@ -128,7 +128,7 @@ performance_testing-full:
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
     - identifier: 'skip_tokenizer_init'
       metadata:
-        description: "(deployment) skip tokenizer intialization"
+        description: "(deployment) skip tokenizer initialization"
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
@@ -248,6 +248,7 @@ performance_testing-endpoint:
         description: 'The endpoint(s) to test'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["http://localhost:8000"]
     - identifier: 'request_rate'
       metadata:
         description: "The number of requests to send per second"
@@ -293,7 +294,7 @@ performance_testing-endpoint:
         interval: 1
     - identifier: 'dataset'
       metadata:
-        description: "(benchmark) The dataset to be used fof the experiment"
+        description: "(benchmark) The dataset to be used for the experiment"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'random' ]
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 9edd8668..29d3a81d 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -17,7 +17,8 @@ performance_testing-geospatial-endpoint:
       metadata:
         description: 'The endpoint(s) to test'
       propertyDomain:
-        variableType: "UNKNOWN_VARIABLE_TYPE"
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["http://localhost:8000"]
     - identifier: 'request_rate'
       metadata:
         description: "The number of requests to send per second"
@@ -341,6 +342,12 @@ performance_testing-geospatial-full-custom-dataset:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [-1,1000]
         interval: 1  # -1 means send all requests at time 0
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [" custom_dataset.jsonl "]
   optionalProperties:
     - identifier: 'num_prompts'
       metadata:
@@ -444,12 +451,7 @@ performance_testing-geospatial-full-custom-dataset:
         description: 'IO Pocessor plugin to load for the model'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ None, "terratorch_segmentation" ]
-    - identifier: 'dataset'
-      metadata:
-        description: "(benchmark) The dataset to be used for the experiment"
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "terratorch_segmentation" ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -500,9 +502,6 @@ performance_testing-geospatial-full-custom-dataset:
     - property:
         identifier: 'io_processor_plugin'
       value: "terratorch_segmentation"
-    - property:
-        identifier: 'dataset'
-      value: None
   # measurements
   targetProperties:
     - identifier: "duration"

From 18f217c331e55781d07453a4548115e89bf7abc2 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 6 Nov 2025 13:56:19 +0000
Subject: [PATCH 29/32] fix: Improved logic for fetching a dataset in the
 geospatial benchmark

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../k8/yaml_support/build_components.py       | 11 +------
 .../execute_benchmark.py                      | 33 ++++++++-----------
 2 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index ffca8985..36ab4fe0 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -183,16 +183,7 @@ def deployment_yaml(
         # command
         container["command"] = ["vllm", "serve"]
         container["args"] = vllm_serve_args
-        # env variables to to set parameters for docker execution
-        # container["env"] = [
-        #     {"name": "MODEL", "value": model},
-        #     {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)},
-        #     {"name": "DTYPE", "value": dtype.value},
-        #     {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)},
-        #     {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)},
-        #     {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)},
-        #     {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
-        # ]
+
         if hf_token is not None:
             container["env"] = [{"name": "HF_TOKEN", "value": hf_token}]
         if claim_name is not None:
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index a23285fe..0120694c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -63,13 +63,7 @@ def execute_benchmark(
     logger.debug(
         f"request_rate {request_rate}, max_concurrency {max_concurrency}, benchmark retries {benchmark_retries}"
     )
-    # The code below is commented as we are switching from a script invocation to command line
-    # invocation. If we want to bring back script execution for any reason, this code must be
-    # uncommented
-    # parameters
-    # code = os.path.abspath(
-    #    os.path.join(os.path.dirname(__file__), "benchmark_serving.py")
-    # )
+
     request = f"export HF_TOKEN={hf_token} && " if hf_token is not None else ""
     f_name = f"{uuid.uuid4().hex}.json"
     request += (
@@ -182,27 +176,26 @@ def execute_geospatial_benchmark(
     :param output_token_length: length of output tokens
     :return: results dictionary
     """
+    from pathlib import Path
 
     if dataset in default_geospatial_datasets_filenames:
-        from pathlib import Path
-
         dataset_filename = default_geospatial_datasets_filenames[dataset]
-        parent_path = Path(__file__).parents[1].absolute()
-        data_set_path = os.path.join(parent_path, "datasets", dataset_filename)
+        parent_path = Path(__file__).parents[1]
+        data_set_path = parent_path / "datasets" / dataset_filename
     else:
         # This can only happen with the performance-testing-geospatial-full-custom-dataset
         # experiment, otherwise the dataset name is always one of the allowed ones.
         # Here the assumption is that the dataset file is placed in the  process working directory.
-        ray_working_dir = os.getcwd()
-        data_set_path = os.path.join(ray_working_dir, dataset)
+        ray_working_dir = Path.cwd()
+        data_set_path = ray_working_dir / dataset
 
-    if not os.path.exists(data_set_path) or not os.path.isfile(data_set_path):
-        logger.warning(
-            f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}"
-        )
-        raise Exception(
-            f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}"
+    if not data_set_path.is_file():
+        error_string = (
+            "The dataset filename provided does not exist or "
+            f"does not point to a valid file: {data_set_path}"
         )
+        logger.warning(error_string)
+        raise ValueError(error_string)
 
     logger.debug(f"Dataset path {data_set_path}")
 
@@ -220,7 +213,7 @@ def execute_geospatial_benchmark(
         retries_timeout=retries_timeout,
         burstiness=burstiness,
         custom_args={
-            "--dataset-path": data_set_path,
+            "--dataset-path": f"{data_set_path.resolve()}",
             "--endpoint": "/pooling",
             "--skip-tokenizer-init": True,
         },

From e93779454af7494a7de4d152bb7ff106fc6d1bb1 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 6 Nov 2025 16:24:28 +0000
Subject: [PATCH 30/32] chore: various fixes around after review

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../experiments/performance_testing.yaml      |  2 +-
 .../performance_testing_geospatial.yaml       |  8 ++--
 .../execute_benchmark.py                      | 42 +++++++++----------
 3 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index 032bfec1..a60a17d4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -134,7 +134,7 @@ performance_testing-full:
         values: [True, False]
     - identifier: 'enforce_eager'
       metadata:
-        description: "(deployment) enforce pytorch eager mode"
+        description: "(deployment) enforce PyTorch eager mode"
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 29d3a81d..43f8e3e2 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -229,7 +229,7 @@ performance_testing-geospatial-full:
         values: [True, False]
     - identifier: 'io_processor_plugin'
       metadata:
-        description: 'IO Pocessor plugin to load for the model'
+        description: 'IO Processor plugin to load for the model'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ None, "terratorch_segmentation" ]
@@ -347,7 +347,7 @@ performance_testing-geospatial-full-custom-dataset:
         description: "(benchmark) The dataset to be used for the experiment"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [" custom_dataset.jsonl "]
+        values: ["custom_dataset.jsonl"]
   optionalProperties:
     - identifier: 'num_prompts'
       metadata:
@@ -442,13 +442,13 @@ performance_testing-geospatial-full-custom-dataset:
         values: [True, False]
     - identifier: 'enforce_eager'
       metadata:
-        description: "(deployment) enforce pytorch eager mode"
+        description: "(deployment) enforce PyTorch eager mode"
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
     - identifier: 'io_processor_plugin'
       metadata:
-        description: 'IO Pocessor plugin to load for the model'
+        description: 'IO Processor plugin to load for the model'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ "terratorch_segmentation" ]
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 0120694c..028abdd3 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -23,7 +23,7 @@
 def execute_benchmark(
     base_url: str,
     model: str,
-    data_set: str,
+    dataset: str,
     backend: str = "openai",
     interpreter: str = "python",
     num_prompts: int = 500,
@@ -32,7 +32,7 @@ def execute_benchmark(
     hf_token: str | None = None,
     benchmark_retries: int = 3,
     retries_timeout: int = 5,
-    data_set_path: str | None = None,
+    dataset_path: str | None = None,
     custom_args: dict[str, Any] | None = None,
     burstiness: float = 1,
 ) -> dict[str, Any]:
@@ -40,7 +40,7 @@ def execute_benchmark(
     Execute benchmark
     :param base_url: url for vllm endpoint
     :param model: model
-    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
     :param interpreter - name of Python interpreter
     :param num_prompts: number of prompts
     :param request_rate: request rate
@@ -48,7 +48,7 @@ def execute_benchmark(
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
-    :param data_set_path: path to the dataset
+    :param dataset_path: path to the dataset
     :param custom_args: custom arguments to pass to the benchmark.
     keys are vllm benchmark arguments. values are the values to pass to the arguments
     :return: results dictionary
@@ -58,7 +58,7 @@ def execute_benchmark(
         f"executing benchmark, invoking service at {base_url} with the parameters: "
     )
     logger.debug(
-        f"model {model}, data set {data_set}, python {interpreter}, num prompts {num_prompts}"
+        f"model {model}, data set {dataset}, python {interpreter}, num prompts {num_prompts}"
     )
     logger.debug(
         f"request_rate {request_rate}, max_concurrency {max_concurrency}, benchmark retries {benchmark_retries}"
@@ -67,16 +67,14 @@ def execute_benchmark(
     request = f"export HF_TOKEN={hf_token} && " if hf_token is not None else ""
     f_name = f"{uuid.uuid4().hex}.json"
     request += (
-        # changing from script invocation to cli invocation
-        # f"{interpreter} {code} --backend openai --base-url {base_url} --dataset-name {data_set} "
-        f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {data_set} "
+        f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} "
         f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles "
         f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} '
         f"--burstiness {burstiness} "
     )
 
-    if data_set_path is not None:
-        request += f" --dataset-path {data_set_path} "
+    if dataset_path is not None:
+        request += f" --dataset-path {dataset_path} "
     if request_rate is not None:
         request += f" --request-rate {request_rate!s} "
     if max_concurrency is not None:
@@ -123,7 +121,7 @@ def execute_random_benchmark(
     Execute benchmark with random dataset
     :param base_url: url for vllm endpoint
     :param model: model
-    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
@@ -135,7 +133,7 @@ def execute_random_benchmark(
     return execute_benchmark(
         base_url=base_url,
         model=model,
-        data_set=dataset,
+        dataset=dataset,
         interpreter=interpreter,
         num_prompts=num_prompts,
         request_rate=request_rate,
@@ -168,12 +166,12 @@ def execute_geospatial_benchmark(
     Execute benchmark with random dataset
     :param base_url: url for vllm endpoint
     :param model: model
-    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
-    :param input_token_length: length of input tokens
-    :param output_token_length: length of output tokens
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
+    :param interpreter: python interpreter to use
     :return: results dictionary
     """
     from pathlib import Path
@@ -181,29 +179,29 @@ def execute_geospatial_benchmark(
     if dataset in default_geospatial_datasets_filenames:
         dataset_filename = default_geospatial_datasets_filenames[dataset]
         parent_path = Path(__file__).parents[1]
-        data_set_path = parent_path / "datasets" / dataset_filename
+        dataset_path = parent_path / "datasets" / dataset_filename
     else:
         # This can only happen with the performance-testing-geospatial-full-custom-dataset
         # experiment, otherwise the dataset name is always one of the allowed ones.
         # Here the assumption is that the dataset file is placed in the  process working directory.
         ray_working_dir = Path.cwd()
-        data_set_path = ray_working_dir / dataset
+        dataset_path = ray_working_dir / dataset
 
-    if not data_set_path.is_file():
+    if not dataset_path.is_file():
         error_string = (
             "The dataset filename provided does not exist or "
-            f"does not point to a valid file: {data_set_path}"
+            f"does not point to a valid file: {dataset_path}"
         )
         logger.warning(error_string)
         raise ValueError(error_string)
 
-    logger.debug(f"Dataset path {data_set_path}")
+    logger.debug(f"Dataset path {dataset_path}")
 
     return execute_benchmark(
         base_url=base_url,
         backend="io-processor-plugin",
         model=model,
-        data_set="custom",
+        dataset="custom",
         interpreter=interpreter,
         num_prompts=num_prompts,
         request_rate=request_rate,
@@ -213,7 +211,7 @@ def execute_geospatial_benchmark(
         retries_timeout=retries_timeout,
         burstiness=burstiness,
         custom_args={
-            "--dataset-path": f"{data_set_path.resolve()}",
+            "--dataset-path": f"{dataset_path.resolve()}",
             "--endpoint": "/pooling",
             "--skip-tokenizer-init": True,
         },

From bad013389fbd453c271ca752778abc6fa4742d62 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 7 Nov 2025 11:37:47 +0000
Subject: [PATCH 31/32] chore(performance_testing_geospatial): removing target
 properties that are irrelevant to geospatial tests

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../performance_testing_geospatial.yaml       | 69 -------------------
 1 file changed, 69 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 43f8e3e2..65ee2733 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -74,29 +74,6 @@ performance_testing-geospatial-endpoint:
     - identifier: "total_input_tokens"
     - identifier: "total_output_tokens"
     - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
     - identifier: "std_e2el_ms"
@@ -293,29 +270,6 @@ performance_testing-geospatial-full:
     - identifier: "total_input_tokens"
     - identifier: "total_output_tokens"
     - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
     - identifier: "std_e2el_ms"
@@ -509,29 +463,6 @@ performance_testing-geospatial-full-custom-dataset:
     - identifier: "total_input_tokens"
     - identifier: "total_output_tokens"
     - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
     - identifier: "std_e2el_ms"

From 164e13e35e7750777616f7210e08e34b991644b6 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 7 Nov 2025 11:39:20 +0000
Subject: [PATCH 32/32] chore(execute_benchmark): docstrings cleanup

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../execute_benchmark.py                      | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 028abdd3..839aa528 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -41,16 +41,19 @@ def execute_benchmark(
     :param base_url: url for vllm endpoint
     :param model: model
     :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
-    :param interpreter - name of Python interpreter
+    :param backend: name of the vLLM benchmark backend to be used ["vllm", "openai", "openai-chat", "openai-audio", "openai-embeddings"]
+    :param interpreter: name of Python interpreter
     :param num_prompts: number of prompts
     :param request_rate: request rate
-    :param max_concurrency: max concurrency
+    :param max_concurrency: maximum number of concurrent requests
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
     :param dataset_path: path to the dataset
     :param custom_args: custom arguments to pass to the benchmark.
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
     keys are vllm benchmark arguments. values are the values to pass to the arguments
+
     :return: results dictionary
     """
 
@@ -68,7 +71,7 @@ def execute_benchmark(
     f_name = f"{uuid.uuid4().hex}.json"
     request += (
         f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} "
-        f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles "
+        f"--model {model} --seed 12345 --num-prompts 10 --save-result --metric-percentiles "
         f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} '
         f"--burstiness {burstiness} "
     )
@@ -122,11 +125,17 @@ def execute_random_benchmark(
     :param base_url: url for vllm endpoint
     :param model: model
     :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param num_prompts: number of prompts
+    :param request_rate: request rate
+    :param max_concurrency: maximum number of concurrent requests
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
-    :param input_token_length: length of input tokens
-    :param output_token_length: length of output tokens
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
+    :param number_input_tokens: maximum number of input tokens for each request,
+    :param max_output_tokens: maximum number of output tokens for each request,
+    :param interpreter: name of Python interpreter
+
     :return: results dictionary
     """
     # Call execute_benchmark with the appropriate arguments
@@ -167,11 +176,15 @@ def execute_geospatial_benchmark(
     :param base_url: url for vllm endpoint
     :param model: model
     :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param num_prompts: number of prompts
+    :param request_rate: request rate
+    :param max_concurrency: maximum number of concurrent requests
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
     :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
     :param interpreter: python interpreter to use
+
     :return: results dictionary
     """
     from pathlib import Path