Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 72 additions & 55 deletions src/amltk/pipeline/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,9 @@ def register_optimization_loop( # noqa: C901, PLR0915, PLR0912
),
metric: Metric | Sequence[Metric],
*,
optimizer: type[Optimizer] | Optimizer.CreateSignature | None = None,
optimizer: (
type[Optimizer] | Optimizer.CreateSignature | Optimizer | None
) = None,
Comment on lines +904 to +906
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now accept already instantiated optimizer (Optimizer)

seed: Seed | None = None,
max_trials: int | None = None,
n_workers: int = 1,
Expand Down Expand Up @@ -941,12 +943,15 @@ def register_optimization_loop( # noqa: C901, PLR0915, PLR0912

Alternatively, this can be a class inheriting from
[`Optimizer`][amltk.optimization.optimizer.Optimizer] or else
a signature match [`Optimizer.CreateSignature`][amltk.optimization.Optimizer.CreateSignature]
a signature match [`Optimizer.CreateSignature`][amltk.optimization.Optimizer.CreateSignature].

??? tip "`Optimizer.CreateSignature`"

::: amltk.optimization.Optimizer.CreateSignature

Lastly, you can also pass in your own already instantiated optimizer if you prefer, however
you should make sure to set it up correctly with the given metrics and search space.
It is recommened to just pass in the class if you are unsure how to do this properly.
seed:
A [`seed`][amltk.types.Seed] for the optimizer to use.
n_workers:
Expand Down Expand Up @@ -1175,63 +1180,66 @@ def register_optimization_loop( # noqa: C901, PLR0915, PLR0912
case _:
raise ValueError(f"Invalid {target=}. Must be a function or Task.")

# NOTE: I'm not particularly fond of this hack but I assume most people
# when prototyping don't care for the actual underlying optimizer and
# so we should just *pick one*.
create_optimizer: Optimizer.CreateSignature
match optimizer:
case None:
first_opt_class = next(
Optimizer._get_known_importable_optimizer_classes(),
None,
)
if first_opt_class is None:
raise ValueError(
"No optimizer was given and no known importable optimizers were"
" found. Please consider giving one explicitly or installing"
" one of the following packages:\n"
"\n - optuna"
"\n - smac"
"\n - neural-pipeline-search",
if isinstance(optimizer, Optimizer):
_optimizer = optimizer
else:
Comment on lines +1183 to +1185
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main changes here is that previously we would always construct an optimizer, I just shifted the previous code into the else block while using the if part to check if we have an already instantiated one.

# NOTE: I'm not particularly fond of this hack but I assume most people
# when prototyping don't care for the actual underlying optimizer and
# so we should just *pick one*.
create_optimizer: Optimizer.CreateSignature
match optimizer:
case None:
first_opt_class = next(
Optimizer._get_known_importable_optimizer_classes(),
None,
)
if first_opt_class is None:
raise ValueError(
"No optimizer was given and no known importable optimizers "
" were found. Please consider giving one explicitly or"
" installing one of the following packages:\n"
"\n - optuna"
"\n - smac"
"\n - neural-pipeline-search",
)

create_optimizer = first_opt_class.create
opt_name = classname(first_opt_class)
case type():
if not issubclass(optimizer, Optimizer):
raise ValueError(
f"Invalid optimizer {optimizer}. Must be a subclass of"
" Optimizer or a function that returns an Optimizer",
)
create_optimizer = optimizer.create
opt_name = classname(optimizer)
case _:
assert not isinstance(optimizer, type)
create_optimizer = optimizer
opt_name = funcname(optimizer)

create_optimizer = first_opt_class.create
opt_name = classname(first_opt_class)
case type():
if not issubclass(optimizer, Optimizer):
match working_dir:
case None:
now = datetime.utcnow().isoformat()

working_dir = PathBucket(f"{opt_name}-{self.name}-{now}")
case str() | Path():
working_dir = PathBucket(working_dir)
case PathBucket():
pass
case _:
raise ValueError(
f"Invalid optimizer {optimizer}. Must be a subclass of"
" Optimizer or a function that returns an Optimizer",
f"Invalid working_dir {working_dir}."
" Must be a str, Path or PathBucket",
)
create_optimizer = optimizer.create
opt_name = classname(optimizer)
case _:
assert not isinstance(optimizer, type)
create_optimizer = optimizer
opt_name = funcname(optimizer)

match working_dir:
case None:
now = datetime.utcnow().isoformat()

working_dir = PathBucket(f"{opt_name}-{self.name}-{now}")
case str() | Path():
working_dir = PathBucket(working_dir)
case PathBucket():
pass
case _:
raise ValueError(
f"Invalid working_dir {working_dir}."
" Must be a str, Path or PathBucket",
)

_optimizer = create_optimizer(
space=self,
metrics=metric,
bucket=working_dir,
seed=seed,
)
assert _optimizer is not None
_optimizer = create_optimizer(
space=self,
metrics=metric,
bucket=working_dir,
seed=seed,
)
assert _optimizer is not None

if on_begin is not None:
hook = partial(on_begin, task, scheduler, history)
Expand All @@ -1245,6 +1253,10 @@ def launch_initial_trials() -> None:

from amltk.optimization.trial import Trial

@task.on_result
def tell_optimizer(_: Any, report: Trial.Report) -> None:
_optimizer.tell(report)

Comment on lines 1255 to +1259
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops, this should have been here...

@task.on_result
def add_report_to_history(_: Any, report: Trial.Report) -> None:
history.add(report)
Expand Down Expand Up @@ -1285,7 +1297,9 @@ def optimize(
),
metric: Metric | Sequence[Metric],
*,
optimizer: type[Optimizer] | Optimizer.CreateSignature | None = None,
optimizer: (
type[Optimizer] | Optimizer.CreateSignature | Optimizer | None
) = None,
seed: Seed | None = None,
max_trials: int | None = None,
n_workers: int = 1,
Expand Down Expand Up @@ -1336,6 +1350,9 @@ def optimize(

::: amltk.optimization.Optimizer.CreateSignature

Lastly, you can also pass in your own already instantiated optimizer if you prefer, however
you should make sure to set it up correctly with the given metrics and search space.
It is recommened to just pass in the class if you are unsure how to do this properly.
seed:
A [`seed`][amltk.types.Seed] for the optimizer to use.
n_workers:
Expand Down
35 changes: 35 additions & 0 deletions tests/pipeline/test_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from collections.abc import Sequence
from pathlib import Path
from typing import Any
from typing_extensions import override

import pytest
import threadpoolctl
Expand Down Expand Up @@ -138,3 +140,36 @@ def test_no_sklearn_head_does_not_trigger_threadpoolctl(tmp_path: Path) -> None:

report = history[0]
assert report.summary["num_threads"] == num_threads


def test_optimizer_is_reported_to(tmp_path: Path) -> None:
class MyOptimizer(SMACOptimizer):
def __init__(
self,
*args: Any,
**kwargs: Any,
) -> None:
self.told_report: Trial.Report | None = None
super().__init__(*args, **kwargs)

@override
def tell(self, report: Trial.Report) -> None:
self.told_report = report
return super().tell(report)
Comment on lines +146 to +158
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Custom optimizer we can inspect post fact, it just records the report it was tell'ed about, defaulting to SMAC optimizer for everything else.


component = Component(object, space={"a": (0.0, 1.0)})
optimizer = MyOptimizer.create(
space=component,
metrics=METRIC,
bucket=PathBucket(tmp_path),
)

history = component.optimize(
target_funtion,
metric=METRIC,
optimizer=optimizer,
max_trials=1,
working_dir=tmp_path,
)

assert optimizer.told_report is history[0]
Comment on lines +160 to +175
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pass in the instantiated optimizer and ensure that the report it was told about is exactly the same object that is recorded in the history.