Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
1c77415
feat: support geospatial benchmark
michael-johnston Oct 15, 2025
52ccff4
feat: add geospatial experiments
michael-johnston Oct 15, 2025
98591b5
various fixes to the vllm_performance actuator
christian-pinto Oct 15, 2025
bd62781
fix: add max_batch_tokens
michael-johnston Oct 15, 2025
016bf1c
Merge remote-tracking branch 'origin/main' into maj_vllm_geospatial
michael-johnston Oct 16, 2025
6b7c67b
Merge remote-tracking branch 'origin/maj_vllm_geospatial' into maj_vl…
michael-johnston Oct 18, 2025
c1dec4a
Updated vllm performance actuator to support geospatial
christian-pinto Oct 28, 2025
2e03028
Termorarily avoiding cpu14
christian-pinto Oct 30, 2025
80c68c4
Termorarily avoiding cpu14
christian-pinto Oct 30, 2025
592e179
Added india dataset
christian-pinto Oct 30, 2025
94c7490
Fixed BaseSamplerConfig
christian-pinto Oct 30, 2025
3fd83b8
Some changes to the vllmperformance experiments
christian-pinto Oct 30, 2025
90ae6bb
Some changes to changes to the experiment and reverted the deployment…
christian-pinto Oct 31, 2025
a750975
Removed some clutter from deployment template
christian-pinto Oct 31, 2025
c432cff
Few more fixes
christian-pinto Oct 31, 2025
b851d03
Fixed bug in validate_entitiy
christian-pinto Nov 3, 2025
6013270
Merge branch 'cp-fix-run-experiment' into maj_vllm_geospatial
christian-pinto Nov 3, 2025
7055c38
One more fix to to a log message
christian-pinto Nov 3, 2025
c212fe6
Merge branch 'cp-fix-run-experiment' into maj_vllm_geospatial
christian-pinto Nov 3, 2025
5bdf902
One more fix to to a log message
christian-pinto Nov 3, 2025
dbab4c7
One more fix to to a log message
christian-pinto Nov 3, 2025
4c1ea32
Merge branch 'cp-fix-run-experiment' into maj_vllm_geospatial
christian-pinto Nov 3, 2025
fd100b6
Fixes to vllm_performance actuator
christian-pinto Nov 3, 2025
f7ceb52
fix(experiment): Fixed bug in validate_entitiy
christian-pinto Nov 3, 2025
dcfe10e
Merge branch 'cp-fix-run-experiment' into maj_vllm_geospatial
christian-pinto Nov 3, 2025
df4f9bc
fix: Not using reference which may be parameterized
michael-johnston Nov 3, 2025
601bf7b
Merge remote-tracking branch 'origin/cp-fix-run-experiment' into cp-f…
michael-johnston Nov 3, 2025
d744287
fix: validate_entity
michael-johnston Nov 3, 2025
1869a49
fix: missing return
michael-johnston Nov 3, 2025
883e877
test: for validate_entity
michael-johnston Nov 3, 2025
4dc6bda
Merge branch 'cp-fix-run-experiment' into maj_vllm_geospatial
christian-pinto Nov 4, 2025
064750f
chore: fixed formatting with black
christian-pinto Nov 4, 2025
35bb2e2
chore: Removed dataset file as it was relocated to a different folder
christian-pinto Nov 4, 2025
c68ab35
feat: Added custom dataset geospatial experiment
christian-pinto Nov 4, 2025
25aaf9f
Merge branch 'main' into maj_vllm_geospatial
christian-pinto Nov 4, 2025
8f83bf3
Merge branch 'main' into maj_vllm_geospatial
christian-pinto Nov 6, 2025
0e97d20
Merge remote-tracking branch 'origin' into maj_vllm_geospatial
christian-pinto Nov 6, 2025
41815b6
fix: Reworked vllm_catalog actuator experiments catalog loading
christian-pinto Nov 6, 2025
53bf77a
chore: Improved experiment definition language and properties typing
christian-pinto Nov 6, 2025
18f217c
fix: Improved logic for fetching a dataset in the geospatial benchmark
christian-pinto Nov 6, 2025
e937794
chore: various fixes around after review
christian-pinto Nov 6, 2025
5764da2
Merge branch 'main' into maj_vllm_geospatial
christian-pinto Nov 7, 2025
bad0133
chore(performance_testing_geospatial): removing target properties tha…
christian-pinto Nov 7, 2025
164e13e
chore(execute_benchmark): docstrings cleanup
christian-pinto Nov 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import json
import logging
import os
import uuid
from pathlib import Path

import ray
import yaml
Expand Down Expand Up @@ -54,14 +54,27 @@ def catalog(
) -> ExperimentCatalog:
"""Returns the Experiments your actuator provides"""

# The catalog be formed in code here or read from a file containing the Experiments models
# This shows reading from a file
# Loading experiment definitions for yaml files contained in the `experiments` directory.
# NOTE: Only files can be placed in the experiments directory,
# but each file can contain multiple experiment definitions
curr_path = Path(__file__)
exp_dir = curr_path.parent / Path("experiments")
logger.debug(f"Experiments dir {exp_dir.absolute()}")
experiments = []
for exp_file in exp_dir.iterdir():
if exp_file.is_dir():
continue

logger.debug(f"Loading experiments from {exp_file.name}")
try:
file_data = exp_file.read_text()
data = yaml.safe_load(file_data)
except yaml.YAMLError:
error_message = f"File {exp_file.name} is a malformed YAML"
logger.error(error_message)
raise ValueError(error_message)

path = os.path.abspath(__file__)
path = os.path.split(path)[0]
with open(os.path.join(path, "experiments.yaml")) as f:
data = yaml.safe_load(f)
experiments = [Experiment(**data[e]) for e in data]
experiments.extend([Experiment.model_validate(data[e]) for e in data])

return ExperimentCatalog(
catalogIdentifier=cls.identifier,
Expand Down Expand Up @@ -176,7 +189,11 @@ async def submit(
if experiment.deprecated is True:
raise DeprecatedExperimentError(f"Experiment {experiment} is deprecated")

if experiment.identifier == "performance-testing-full":
if experiment.identifier in [
"performance-testing-full",
"performance-testing-geospatial-full",
"performance-testing-geospatial-full-custom-dataset",
]:
if not self.env_manager:
raise MissingConfigurationForExperimentError(
f"Actuator configuration did not contain sufficient information for a kubernetes environment manager to be created. "
Expand All @@ -197,7 +214,7 @@ async def submit(
)

# Execute experiment
# Note: Here the experiment instance is just past for convenience since we retrieved it above
# Note: Here the experiment instance is just passed for convenience since we retrieved it above
run_resource_and_workload_experiment.remote(
request=request,
experiment=experiment,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import sys
import time
import traceback

import ray
from ado_actuators.vllm_performance.actuator_parameters import (
Expand All @@ -22,6 +23,7 @@
VLLMDtype,
)
from ado_actuators.vllm_performance.vllm_performance_test.execute_benchmark import (
execute_geospatial_benchmark,
execute_random_benchmark,
)
from ray.actor import ActorHandle
Expand Down Expand Up @@ -142,6 +144,9 @@ def _create_environment(
reuse_deployment=False,
pvc_name=actuator.pvc_template,
namespace=actuator.namespace,
skip_tokenizer_init=values.get("skip_tokenizer_init"),
enforce_eager=values.get("enforce_eager"),
io_processor_plugin=values.get("io_processor_plugin"),
)
# Update manager
env_manager.done_creating.remote(definition=definition)
Expand All @@ -151,6 +156,7 @@ def _create_environment(
logger.error(
f"Attempt {attempt}. Failed to create test environment {e}"
)
logger.error(traceback.format_exception(e))
error = f"Failed to create test environment {e}"
time.sleep(tmout)
tmout *= 2
Expand Down Expand Up @@ -279,23 +285,42 @@ def run_resource_and_workload_experiment(
start = time.time()
result = None
try:
result = execute_random_benchmark(
base_url=base_url,
model=values.get("model"),
interpreter=actuator_parameters.interpreter,
num_prompts=int(values.get("num_prompts")),
request_rate=request_rate,
max_concurrency=max_concurrency,
hf_token=actuator_parameters.hf_token,
benchmark_retries=actuator_parameters.benchmark_retries,
retries_timeout=actuator_parameters.retries_timeout,
number_input_tokens=int(values.get("number_input_tokens")),
max_output_tokens=int(values.get("max_output_tokens")),
burstiness=float(values.get("burstiness")),
)
if experiment.identifier in [
"performance-testing-geospatial-full",
"performance-testing-geospatial-full-custom-dataset",
]:
result = execute_geospatial_benchmark(
base_url=base_url,
model=values.get("model"),
interpreter=actuator_parameters.interpreter,
num_prompts=int(values.get("num_prompts")),
request_rate=request_rate,
max_concurrency=max_concurrency,
hf_token=actuator_parameters.hf_token,
benchmark_retries=actuator_parameters.benchmark_retries,
retries_timeout=actuator_parameters.retries_timeout,
burstiness=float(values.get("burstiness")),
dataset=values.get("dataset"),
)
else:
result = execute_random_benchmark(
base_url=base_url,
model=values.get("model"),
interpreter=actuator_parameters.interpreter,
num_prompts=int(values.get("num_prompts")),
request_rate=request_rate,
max_concurrency=max_concurrency,
hf_token=actuator_parameters.hf_token,
benchmark_retries=actuator_parameters.benchmark_retries,
retries_timeout=actuator_parameters.retries_timeout,
number_input_tokens=int(values.get("number_input_tokens")),
max_output_tokens=int(values.get("max_output_tokens")),
burstiness=float(values.get("burstiness")),
dataset=values.get("dataset"),
)
logger.debug(f"benchmark executed in {time.time() - start} sec")
except Exception as e:
logger.error(f"Failed to execute VLLM performance test {e}")
logger.error(traceback.format_exception(e))
error = f"Failed to execute VLLM performance test {e}"
finally:
if pf is not None:
Expand Down Expand Up @@ -379,20 +404,36 @@ def run_workload_experiment(
error = None
measured_values = []
try:
result = execute_random_benchmark(
base_url=values.get("endpoint"),
model=values.get("model"),
interpreter=actuator_parameters.interpreter,
num_prompts=int(values.get("num_prompts")),
request_rate=request_rate,
max_concurrency=max_concurrency,
hf_token=actuator_parameters.hf_token,
benchmark_retries=actuator_parameters.benchmark_retries,
retries_timeout=actuator_parameters.retries_timeout,
number_input_tokens=int(values.get("number_input_tokens")),
max_output_tokens=int(values.get("max_output_tokens")),
burstiness=float(values.get("burstiness")),
)
if experiment.identifier == "performance-testing-geospatial-endpoint":
result = execute_geospatial_benchmark(
base_url=values.get("endpoint"),
model=values.get("model"),
interpreter=actuator_parameters.interpreter,
num_prompts=int(values.get("num_prompts")),
request_rate=request_rate,
max_concurrency=max_concurrency,
hf_token=actuator_parameters.hf_token,
benchmark_retries=actuator_parameters.benchmark_retries,
retries_timeout=actuator_parameters.retries_timeout,
burstiness=float(values.get("burstiness")),
dataset=values.get("dataset"),
)
else:
result = execute_random_benchmark(
base_url=values.get("endpoint"),
model=values.get("model"),
interpreter=actuator_parameters.interpreter,
num_prompts=int(values.get("num_prompts")),
request_rate=request_rate,
max_concurrency=max_concurrency,
hf_token=actuator_parameters.hf_token,
benchmark_retries=actuator_parameters.benchmark_retries,
retries_timeout=actuator_parameters.retries_timeout,
number_input_tokens=int(values.get("number_input_tokens")),
max_output_tokens=int(values.get("max_output_tokens")),
burstiness=float(values.get("burstiness")),
dataset=values.get("dataset"),
)
except Exception as e:
logger.error(f"Failed to execute VLLM performance test {e}")
error = f"Failed to execute VLLM performance test {e}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ performance_testing-full:
variableType: 'DISCRETE_VARIABLE_TYPE'
domainRange: [ 1, 10000 ]
interval: 1
- identifier: 'dataset'
metadata:
description: "(benchmark) The dataset to be used for the experiment"
propertyDomain:
variableType: "CATEGORICAL_VARIABLE_TYPE"
values: [ 'random' ]
- identifier: image
metadata:
description: "(deployment) Docker image to use to create vllm deployments"
Expand Down Expand Up @@ -120,6 +126,18 @@ performance_testing-full:
propertyDomain:
variableType: "CATEGORICAL_VARIABLE_TYPE"
values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
- identifier: 'skip_tokenizer_init'
metadata:
description: "(deployment) skip tokenizer initialization"
propertyDomain:
variableType: BINARY_VARIABLE_TYPE
values: [True, False]
- identifier: 'enforce_eager'
metadata:
description: "(deployment) enforce PyTorch eager mode"
propertyDomain:
variableType: BINARY_VARIABLE_TYPE
values: [True, False]
defaultParameterization:
- property:
identifier: 'image'
Expand Down Expand Up @@ -149,6 +167,9 @@ performance_testing-full:
- property:
identifier: 'max_output_tokens'
value: 128
- property:
identifier: 'dataset'
value: 'random'
- property:
identifier: 'gpu_memory_utilization'
value: .9
Expand All @@ -167,6 +188,12 @@ performance_testing-full:
- property:
identifier: 'gpu_type'
value: 'NVIDIA-A100-80GB-PCIe'
- property:
identifier: 'skip_tokenizer_init'
value: False
- property:
identifier: 'enforce_eager'
value: False
# measurements
targetProperties:
- identifier: "duration"
Expand Down Expand Up @@ -221,6 +248,7 @@ performance_testing-endpoint:
description: 'The endpoint(s) to test'
propertyDomain:
variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
values: ["http://localhost:8000"]
- identifier: 'request_rate'
metadata:
description: "The number of requests to send per second"
Expand Down Expand Up @@ -264,6 +292,12 @@ performance_testing-endpoint:
variableType: 'DISCRETE_VARIABLE_TYPE'
domainRange: [ -1, 500 ] # -1 means no concurrency control
interval: 1
- identifier: 'dataset'
metadata:
description: "(benchmark) The dataset to be used for the experiment"
propertyDomain:
variableType: "CATEGORICAL_VARIABLE_TYPE"
values: [ 'random' ]
defaultParameterization:
- value: 1000
property:
Expand All @@ -280,6 +314,9 @@ performance_testing-endpoint:
- value: 128
property:
identifier: 'max_output_tokens'
- property:
identifier: 'dataset'
value: 'random'
# measurements
targetProperties:
- identifier: "duration"
Expand Down Expand Up @@ -318,4 +355,4 @@ performance_testing-endpoint:
- identifier: "p75_e2el_ms"
- identifier: "p99_e2el_ms"
metadata:
description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
Loading