import logging
import typing as t
from functools import partial
import numpy as np
import pandas as pd
from eBoruta.base import _E, _X
from eBoruta.containers import Features, Dataset, TrialData
CallbackReturn = t.Tuple[_E, Features, Dataset, TrialData, t.Dict[str, t.Any]]
Score = t.Callable[[_E, _X, _X], np.ndarray]
LOGGER = logging.getLogger(__name__)
[docs]
class CallbackFN(t.Protocol):
[docs]
def __call__(
self,
estimator: _E,
features: Features,
dataset: Dataset,
trial_data: TrialData,
**kwargs,
) -> CallbackReturn:
...
[docs]
class CallbackClass(t.Protocol):
def __init__(self, *args, **kwargs):
...
[docs]
def __call__(
self,
estimator: _E,
features: Features,
dataset: Dataset,
trial_data: TrialData,
**kwargs,
) -> CallbackReturn:
...
Callback = t.Union[CallbackFN, CallbackClass]
[docs]
def reduce_by_fraction(num_features: int, frac: float):
return int(num_features * frac)
[docs]
def change_params_and_reinit(estimator: _E, update: t.Mapping[str, t.Any]):
params = estimator.get_params()
params.update(**update)
estimator = estimator.__class__(**params)
return estimator
[docs]
class IterationAdjuster:
[docs]
def __init__(
self,
param_name: str,
min_value: int,
reducer: t.Callable[[int], int] = partial(reduce_by_fraction, frac=0.5),
):
self.param_name = param_name
self.min_value = min_value
self.reducer = reducer
self.history: t.List[t.Tuple[int, int]] = []
[docs]
def __call__(
self,
estimator: _E,
features: Features,
dataset: Dataset,
trial_data: TrialData,
**kwargs,
) -> CallbackReturn:
num_features = len(features.tentative) * 2
new_num_features = max([self.min_value, self.reducer(num_features)])
estimator = change_params_and_reinit(
estimator, {self.param_name: new_num_features}
)
self.history.append((num_features, new_num_features))
return estimator, features, dataset, trial_data, kwargs
[docs]
class Scorer:
[docs]
def __init__(self, scorers: t.Mapping[str, Score], verbose: int = 2):
self.scorers = scorers
self.score_hist = pd.DataFrame()
self.verbose = verbose
if verbose == 2:
self._log_fn = LOGGER.info
elif verbose == 1:
self._log_fn = LOGGER.debug
else:
self._log_fn = lambda x: x
[docs]
def __call__(
self,
estimator: _E,
features: Features,
dataset: Dataset,
trial_data: TrialData,
**kwargs,
) -> CallbackReturn:
d = {
score_name: score(estimator, trial_data.x_test, trial_data.y_test)
for score_name, score in self.scorers.items()
}
d["Features"] = ";".join(trial_data.x_test.columns)
self.score_hist = pd.concat([self.score_hist, pd.DataFrame.from_records([d])])
self._log_fn(d)
return estimator, features, dataset, trial_data, kwargs
[docs]
class CatFeaturesSupplier:
[docs]
def __call__(
self,
estimator: _E,
features: Features,
dataset: Dataset,
trial_data: TrialData,
**kwargs,
) -> CallbackReturn:
if hasattr(estimator, "cat_features"):
cat_features = [
i
for i, c in enumerate(trial_data.x_test.columns)
if pd.api.types.is_categorical_dtype(trial_data.x_test[c])
]
estimator = change_params_and_reinit(
estimator, {"cat_features": cat_features}
)
return estimator, features, dataset, trial_data, kwargs
[docs]
class EvalSetSupplier:
[docs]
def __init__(self, param_name: str = "eval_set"):
self.param_name = param_name
[docs]
def __call__(
self,
estimator: _E,
features: Features,
dataset: Dataset,
trial_data: TrialData,
**kwargs,
) -> CallbackReturn:
if self.param_name in kwargs:
LOGGER.debug(f"Overwriting existing {self.param_name}")
kwargs[self.param_name] = [(trial_data.x_test, trial_data.y_test)]
return estimator, features, dataset, trial_data, kwargs
if __name__ == "__main__":
raise RuntimeError