Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ConfigSpace/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def __init__(
if not hp.legal_value(value):
raise IllegalValueError(hp, value)

# Truncate the float to be of constant lengt
# Truncate the float to be of constant length
if isinstance(hp, FloatHyperparameter):
value = float(np.round(value, ROUND_PLACES)) # type: ignore

Expand Down
2 changes: 1 addition & 1 deletion src/ConfigSpace/configuration_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ def estimate_size(self) -> float | int:
otherwise it is the product of the size of all hyperparameters. The function
correctly guesses the number of unique configurations if there are no condition
and forbidden statements in the configuration spaces. Otherwise, this is an
upper bound. Use [`generate_grid()`][ConfigSpace.util.generate_grid] to generate
upper bound. Use [`grid_generator()`][ConfigSpace.util.grid_generator] to generate
all valid configurations if required.

Returns:
Expand Down
294 changes: 134 additions & 160 deletions src/ConfigSpace/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,19 @@
from __future__ import annotations

import copy
import itertools
import math
from collections import deque
from collections.abc import Iterator, Sequence
from typing import TYPE_CHECKING, Any, cast
from typing import TYPE_CHECKING, Any, cast, Generator

import numpy as np

from ConfigSpace import Configuration
from ConfigSpace.exceptions import (
ActiveHyperparameterNotSetError,
ForbiddenValueError,
IllegalValueError,
IllegalVectorizedValueError,
InactiveHyperparameterSetError,
NoPossibleNeighborsError,
Expand Down Expand Up @@ -571,14 +574,19 @@ def check_configuration( # noqa: D103
space: ConfigurationSpace,
vector: np.ndarray,
allow_inactive_with_values: bool = False,
#yield_all_unset_active_hyperparameters: bool = False,
) -> None:
activated = np.isfinite(vector)
#unset_active_hps: list[Hyperparameter] = []

# Make sure the roots are all good
for root in space._dag.roots.values():
hp_idx = root.idx
if not activated[hp_idx]:
#if not yield_all_unset_active_hyperparameters:
raise ActiveHyperparameterNotSetError(root.hp)
#else:
# unset_active_hps.append(hp)

for cnode in space._dag.minimum_conditions:
# Everything for the condition is satisfied, make sure active
Expand All @@ -590,7 +598,10 @@ def check_configuration( # noqa: D103
idx: int = children_idxs[~active_mask][0]
hp_name = space.at[idx]
hp = space[hp_name]
#if not yield_all_unset_active_hyperparameters:
raise ActiveHyperparameterNotSetError(hp)
#else:
# unset_active_hps.append(hp)

for hp_idx, hp_node in cnode.unique_children.items():
# OPTIM: We bypass the larger safety checking of the hp and access
Expand All @@ -613,6 +624,10 @@ def check_configuration( # noqa: D103
f"Given vector violates forbidden clause: {forbidden}",
)

# All checks passed, except for possible plural ActiveHyperparameterNotSetError
#if unset_active_hps:
# raise ActiveHyperparametersNotSetError(unset_active_hps)


def change_hp_value( # noqa: D103
configuration_space: ConfigurationSpace,
Expand Down Expand Up @@ -644,187 +659,146 @@ def change_hp_value( # noqa: D103
return arr


def generate_grid(
def grid_generator(
configuration_space: ConfigurationSpace,
num_steps_dict: dict[str, int] | None = None,
) -> list[Configuration]:
) -> Generator[Configuration, None, None]:
"""Generates a grid of Configurations for a given ConfigurationSpace.
Can be used, for example, for grid search.

Args:
configuration_spac:
configuration_space:
The Configuration space over which to create a grid of HyperParameter
Configuration values. It knows the types for all parameter values.

num_steps_dic:
num_steps_dict:
A dict containing the number of points to divide the grid side formed by
Hyperparameters which are either of type UniformFloatHyperparameter or
type UniformIntegerHyperparameter. The keys in the dict should be the names
of the corresponding Hyperparameters and the values should be the number of
points to divide the grid side formed by the corresponding Hyperparameter in to.

Returns:
List containing Configurations. It is a cartesian product of tuples
of HyperParameter values.
Each tuple lists the possible values taken by the corresponding HyperParameter.
Within the cartesian product, in each element, the ordering of HyperParameters
is the same for the OrderedDict within the ConfigurationSpace.
A generator producing Configurations for a given ConfigurationSpace as a cartesian product of tuples of HyperParameter values.
It is a cartesian product of tuples, where each tuple lists the possible values taken by the corresponding HyperParameter.
Within the cartesian product, in each element, the ordering of HyperParameters is the same for the OrderedDict within the ConfigurationSpace.
"""

def _get_value_set(num_steps_dict: dict[str, int] | None, hp_name: str) -> tuple:
param = configuration_space[hp_name]
if isinstance(param, (CategoricalHyperparameter)):
return cast(tuple, param.choices)

if isinstance(param, (OrdinalHyperparameter)):
return cast(tuple, param.sequence)

if isinstance(param, Constant):
return (param.value,)

if isinstance(param, UniformFloatHyperparameter):
if param.log:
lower, upper = np.log([param.lower, param.upper])
else:
lower, upper = param.lower, param.upper

if num_steps_dict is not None and param.name in num_steps_dict:
num_steps = num_steps_dict[param.name]
grid_points = np.linspace(lower, upper, num_steps)
else:
raise ValueError(
"num_steps_dict is None or doesn't contain the number of points"
f" to divide {param.name} into. And its quantization factor "
"is None. Please provide/set one of these values.",
)

if param.log:
grid_points = np.exp(grid_points)

# Avoiding rounding off issues
grid_points[0] = max(grid_points[0], param.lower)
grid_points[-1] = min(grid_points[-1], param.upper)

return tuple(grid_points)

if isinstance(param, UniformIntegerHyperparameter):
if param.log:
lower, upper = np.log([param.lower, param.upper])
else:
lower, upper = param.lower, param.upper

if num_steps_dict is not None and param.name in num_steps_dict:
num_steps = num_steps_dict[param.name]
grid_points = np.linspace(lower, upper, num_steps)
else:
raise ValueError(
"num_steps_dict is None or doesn't contain the number of points "
f"to divide {param.name} into. And its quantization factor "
"is None. Please provide/set one of these values.",
# Idea; we can perhaps create a generator for each HP, to avoid taking the entire grid into memory
# Then we can draw for each HP a value from each generator and test the yielded configuration (masking out the HP values that actually should be inactive)
# For each combination that **could** result in a duplicate (due to active vs inactive HPs), we need to store a light weight hash of the configuration
# That we can check each time s.t. we can quickly skip over combinations that are known to be duplicates
# 1. Build a generator for each HP based on their min/max and step size
# 2. This generator allows us to build a 'cartesian product' generator s.t. all combinations are made (including inactive HPs....)
# 3. It would be best if we could make the HPs generate values for active HPs only when applicable but this is complicated due to not knowing the dependency order
# 4. ??
# 5. Profit

def _hyperparameter_range(hp: Hyperparameter, num_steps: int) -> range | tuple | Generator:
"""Constructs the range of the hyperparameter or tuple for categorical / ordinal hyperparameters and constants."""

def frange(lower: float, upper: float, numsteps: int, log: bool=False, as_int: bool=False, conditional: bool=False) -> Generator[float, None, None]:
"""For some reason this does not exist by default in Python, and Numpy returns arrays instead of generators."""
if log:
lower_source, upper_source = lower, upper
lower, upper = math.log(lower), math.log(upper)
x = lower # Starting point
step_size = float((upper - lower) / (numsteps-1))
if not log: # Determine precision
precision = len(str(step_size).split(".", maxsplit=1)[1]) # This is so ugly...
while x <= upper:
if log: # Capping for float rounding errors
# NOTE: What if the capping is now letting through a final value that was originally waaaaaay out of bounds? Should it not be rejected?
value = min(max(math.exp(x), lower_source), upper_source)
if as_int:
value = round(value)
else:
value = round(x) if as_int else x
yield value
x += step_size
if not log: # Linear, thus we can make the precision to be the same as the step_size for accuracy purposes
x = round(x, precision)
#if conditional:
# yield NotSet # Include the 'inactive' option

conditional_hp = hp.name in configuration_space.conditional_hyperparameters
if isinstance(hp, (CategoricalHyperparameter)):
#return cast(tuple, list(hp.choices) + [NotSet] if conditional_hp else hp.choices)
return cast(tuple, hp.choices)
elif isinstance(hp, (OrdinalHyperparameter)):
#return cast(tuple, list(hp.sequence) + [NotSet] if conditional_hp else hp.sequence)
return cast(tuple, hp.sequence)
elif isinstance(hp, Constant):
#return (hp.value, NotSet) if conditional_hp else (hp.value,)
return (hp.value,)
elif num_steps is None: # The latter two hyperparameter require a number of steps, do a quick check if to see if we can proceed
raise ValueError(f"No number of steps provided for {hp.name} i.e. the number of points to divide {hp.name} into.")
elif isinstance(hp, UniformIntegerHyperparameter):
return frange(hp.lower, hp.upper, num_steps, log=hp.log, as_int=True, conditional=conditional_hp)
elif isinstance(hp, UniformFloatHyperparameter):
return frange(hp.lower, hp.upper, num_steps, log=hp.log, conditional=conditional_hp)
raise TypeError(f"Unknown hyperparameter type {type(hp)}")

def _cartesian_product_generator(hps: list[Hyperparameter]) -> Generator[tuple, None, None]:
"""Constructs a generator that produces a cartesian product of the Hyperparameter values."""
hp_ranges = [_hyperparameter_range(hp, num_steps_dict.get(hp.name, None) if num_steps_dict else None) for hp in hps]
if not hp_ranges:
# Itertools.product returns an empty tuple if hp_ranges is empty, to prevent this we check if the list contains anything before unpacking
return itertools.product([])
return itertools.product(*hp_ranges)

# We record the hash of the configurations that we have seen so far?
duplicates_memory: set[int] = set()
hyperparameter_names = list(configuration_space.keys())
hyperparameters = configuration_space.values()

regular_hyperparameters = [hp for hp in configuration_space.values() if hp.name not in configuration_space.conditional_hyperparameters]
conditional_hyperparameters = [hp for hp in configuration_space.values() if hp.name in configuration_space.conditional_hyperparameters]

# hyperparameters = [hp for hp in configuration_space.values() if hp.name not in configuration_space.conditional_hyperparameters]
# hyperparameter_names = [hp.name for hp in hyperparameters]
from ConfigSpace.hyperparameters import FloatHyperparameter
from ConfigSpace.types import Array, Mask, f64
from ConfigSpace.hyperparameters.hp_components import ROUND_PLACES

def generate_with_conditionals(regular_configuration: dict[str, Any], active_conditionals: list[Hyperparameter]) -> Generator[Configuration, None, None]:
"""Recursively adds all conditional hyperparameters to some configuration of regular HPs."""
for conditional_configuration in _cartesian_product_generator(active_conditionals):
new_configuration = regular_configuration.copy()# + conditional_configuration
for hp, value in zip(active_conditionals, conditional_configuration): # Combine the existing configuration with new conditional values
new_configuration[hp.name] = value
try:
grid_point = Configuration(
configuration_space,
values=new_configuration,
)
yield grid_point
except ActiveHyperparameterNotSetError as ex:
for configuration_with_conditionals in generate_with_conditionals(new_configuration, [ex.hyperparameter]):
yield configuration_with_conditionals
except ForbiddenValueError as ex: # The grid generator generates all possible combinations, including those violating the Forbidden rules
continue
except InactiveHyperparameterSetError as ex: # This should not happen?
raise ex
except IllegalValueError as ex: # Should not occur: The grid should only generate legal values for each HP.
raise ex

if param.log:
grid_points = np.exp(grid_points)
grid_points = np.round(grid_points).astype(int)

# Avoiding rounding off issues
grid_points[0] = max(grid_points[0], param.lower)
grid_points[-1] = min(grid_points[-1], param.upper)

return tuple(grid_points)

raise TypeError(f"Unknown hyperparameter type {type(param)}")

def _get_cartesian_product(
value_sets: list[tuple],
hp_names: list[str],
) -> list[dict[str, Any]]:
import itertools

if len(value_sets) == 0:
# Edge case
return []

grid = []
for element in itertools.product(*value_sets):
config_dict = dict(zip(hp_names, element))
grid.append(config_dict)

return grid

# Each tuple within is the grid values to be taken on by a Hyperparameter
value_sets = []
hp_names = []

# Get HP names and allowed grid values they can take for the HPs at the top
# level of ConfigSpace tree
for hp_name in configuration_space.unconditional_hyperparameters:
value_sets.append(_get_value_set(num_steps_dict, hp_name))
hp_names.append(hp_name)

# Create a Cartesian product of above allowed values for the HPs. Hold them in an
# "unchecked" deque because some of the conditionally dependent HPs may become
# active for some of the elements of the Cartesian product and in these cases
# creating a Configuration would throw an Error (see below).
# Creates a deque of Configuration dicts
unchecked_grid_pts = deque(_get_cartesian_product(value_sets, hp_names))
checked_grid_pts = []

while len(unchecked_grid_pts) > 0:
for configuration in _cartesian_product_generator(regular_hyperparameters):
configuration_dict = {key: value for key, value in zip(hyperparameter_names, configuration)}
try:
# NOTE: Build vector instead and call check_configuration here directly?
grid_point = Configuration(
configuration_space,
values=unchecked_grid_pts[0],
values=configuration_dict,
)
checked_grid_pts.append(grid_point)

# When creating a configuration that violates a forbidden clause, simply skip it
except ForbiddenValueError:
unchecked_grid_pts.popleft()
yield grid_point
except ActiveHyperparameterNotSetError as ex:
# NOTE: We are not getting all possible known ActiveHyperparameterNotSetErrors at once here; its thrown for the first 'mistake' only.
for configuration_with_conditionals in generate_with_conditionals(configuration_dict, [ex.hyperparameter]):
yield configuration_with_conditionals
except ForbiddenValueError as ex: # The grid generator generates all possible combinations, including those violating the Forbidden rules
continue

except ActiveHyperparameterNotSetError:
value_sets = []
hp_names = []
new_active_hp_names = []

# "for" loop over currently active HP names
for hp_name in unchecked_grid_pts[0]:
value_sets.append((unchecked_grid_pts[0][hp_name],))
hp_names.append(hp_name)
# Checks if the conditionally dependent children of already active
# HPs are now active
# TODO: Shorten this
for new_hp_name in configuration_space._dag.nodes[hp_name].children:
if (
new_hp_name not in new_active_hp_names
and new_hp_name not in unchecked_grid_pts[0]
):
all_cond_ = True
for cond in configuration_space.parent_conditions_of[
new_hp_name
]:
if not cond.satisfied_by_value(unchecked_grid_pts[0]):
all_cond_ = False
if all_cond_:
new_active_hp_names.append(new_hp_name)

for hp_name in new_active_hp_names:
value_sets.append(_get_value_set(num_steps_dict, hp_name))
hp_names.append(hp_name)

# this check might not be needed, as there is always going to be a new
# active HP when in this except block?
if len(new_active_hp_names) <= 0:
raise RuntimeError(
"Unexpected error: There should have been a newly activated"
" hyperparameter for the current configuration values:"
f" {unchecked_grid_pts[0]!s}. Please contact the developers with"
" the code you ran and the stack trace.",
) from None

new_conditonal_grid = _get_cartesian_product(value_sets, hp_names)
unchecked_grid_pts += new_conditonal_grid
unchecked_grid_pts.popleft()

return checked_grid_pts
except InactiveHyperparameterSetError as ex: # This should not occur due to how conditionals are handled
raise ex
except IllegalValueError as ex: # Should not occur: The grid should only generate legal values for each HP.
raise ex
Loading