Skip to content

Commit

Permalink
fix logging
Browse files Browse the repository at this point in the history
  • Loading branch information
MorrisNein committed Nov 7, 2023
1 parent 95e9d7b commit bb3f03a
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import logging
import warnings
from copy import deepcopy
from functools import partial
from typing import Any, Dict, Sequence, Union

Expand All @@ -13,6 +15,8 @@
from meta_automl.data_preparation.datasets_loaders import DatasetsLoader, OpenMLDatasetsLoader
from meta_automl.data_preparation.meta_features_extractors import MetaFeaturesExtractor

logger = logging.getLogger(__file__)


class PymfeExtractor(MetaFeaturesExtractor):
default_params = {'groups': 'default'}
Expand Down Expand Up @@ -51,6 +55,9 @@ def extract(self, datasets_or_ids: Sequence[Union[DatasetBase, DatasetIDType]],
else:
dataset_id = dataset
dataset_class = self.datasets_loader.dataset_class
logger.debug(
f'{self.__class__.__name__}: extracting metafeatures of dataset {dataset_class.__name__}|{dataset_id}.'
)
meta_features_cached = self._get_meta_features_cache(dataset_id, dataset_class, meta_feature_names)

if use_cached and meta_features_cached:
Expand All @@ -59,15 +66,17 @@ def extract(self, datasets_or_ids: Sequence[Union[DatasetBase, DatasetIDType]],
if not isinstance(dataset, DatasetBase):
dataset = self._datasets_loader.load_single(dataset)
dataset_data = dataset.get_data()
x = dataset_data.x
y = dataset_data.y
cat_cols_indicator = dataset_data.categorical_indicator
if fill_input_nans:
x = self.fill_nans(x, cat_cols_indicator)
x = x.to_numpy()
y = y.to_numpy()
if cat_cols_indicator is not None:
cat_cols = [i for i, val in enumerate(cat_cols_indicator) if val]
else:
cat_cols = 'auto'
x = dataset_data.x.to_numpy()
y = dataset_data.y.to_numpy()
if fill_input_nans:
x = self.fill_nans(x)
fit_extractor = self._extractor.fit
fit_extractor = partial(fit_extractor, x, y, cat_cols=cat_cols, **fit_kwargs)
try:
Expand Down Expand Up @@ -104,8 +113,14 @@ def extract(self, datasets_or_ids: Sequence[Union[DatasetBase, DatasetIDType]],
return columns_or_rows

@staticmethod
def fill_nans(x):
if not isinstance(x, pd.DataFrame):
x = pd.DataFrame(x)
x = x.fillna(x.median())
return x.to_numpy()
def fill_nans(x: pd.DataFrame, cat_cols_indicator: Sequence[bool]):
x_new = deepcopy(x)
for idx, col in enumerate(x.columns):
is_categorical = cat_cols_indicator[idx]
if is_categorical:
most_frequent = x_new[col].value_counts(sort=True, ascending=False).values[0]
x_new[col].fillna(most_frequent, inplace=True)
else:
median = x_new[col].median()
x_new[col].fillna(median, inplace=True)
return x_new
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from functools import partial
from pathlib import Path
from typing import List, Optional, Union
Expand All @@ -11,7 +12,6 @@
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.core.validation.split import tabular_cv_generator
from golem.core.log import default_log
from tqdm import tqdm
from typing_extensions import Literal

Expand All @@ -21,6 +21,8 @@
from meta_automl.data_preparation.file_system import PathType
from meta_automl.data_preparation.models_loaders import ModelsLoader

logger = logging.getLogger(__file__)


def evaluate_classification_fedot_pipeline(pipeline, input_data):
cv_folds = partial(tabular_cv_generator, input_data, folds=5)
Expand Down Expand Up @@ -56,8 +58,6 @@ def __init__(self, datasets_to_load: Union[List[Union[DatasetBase, str]], Litera
launch_dir: Optional[PathType] = None,
datasets_loader: Optional[DatasetsLoader] = None):

self.log = default_log(self)

self.datasets_loader = datasets_loader or OpenMLDatasetsLoader(allow_names=True)

self.launch_dir: Path = Path(launch_dir) if isinstance(launch_dir, str) else launch_dir
Expand Down Expand Up @@ -106,7 +106,7 @@ def _import_pipelines(self, candidate_pipeline_paths: List[List[PathType]]):
desc='Importing pipelines', unit='dataset'):
candidates_for_dataset = [Pipeline.from_serialized(str(p)) for p in paths]
if not candidates_for_dataset:
self.log.warning(f'No pipelines found for the dataset "{dataset}".')
logger.warning(f'No pipelines found for the dataset "{dataset}".')
candidate_pipelines.append(candidates_for_dataset)
self.candidate_pipelines = candidate_pipelines

Expand Down

0 comments on commit bb3f03a

Please sign in to comment.