Skip to content

API: modeling.sklearn_wrapper

skyulf.modeling.sklearn_wrapper

Wrapper for Scikit-Learn models.

SklearnApplier

Bases: BaseModelApplier

Base applier for Scikit-Learn models.

Source code in skyulf-core\skyulf\modeling\sklearn_wrapper.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
class SklearnApplier(BaseModelApplier):
    """Base applier for Scikit-Learn models."""

    def predict(self, df: Union[pd.DataFrame, SkyulfDataFrame], model_artifact: Any) -> pd.Series:
        # Convert to Numpy
        X_np, _ = SklearnBridge.to_sklearn(df)

        preds = model_artifact.predict(X_np)

        # Return as Pandas Series for consistency
        # If input was Pandas, try to preserve index
        index = None
        if hasattr(df, "index"):
            index = df.index
        elif hasattr(df, "to_pandas"):
             # If it's a wrapper or Polars, we might lose index unless we convert
             # For now, default index is acceptable for predictions
             pass

        return pd.Series(preds, index=index)

    def predict_proba(
        self, df: Union[pd.DataFrame, SkyulfDataFrame], model_artifact: Any
    ) -> Optional[pd.DataFrame]:
        if not hasattr(model_artifact, "predict_proba"):
            return None

        X_np, _ = SklearnBridge.to_sklearn(df)
        probs = model_artifact.predict_proba(X_np)

        # Return as DataFrame
        index = None
        if hasattr(df, "index"):
            index = df.index

        # Column names usually 0, 1, etc. or classes_
        columns = None
        if hasattr(model_artifact, "classes_"):
            columns = model_artifact.classes_

        return pd.DataFrame(probs, index=index, columns=columns)

SklearnCalculator

Bases: BaseModelCalculator

Base calculator for Scikit-Learn models.

Source code in skyulf-core\skyulf\modeling\sklearn_wrapper.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
class SklearnCalculator(BaseModelCalculator):
    """Base calculator for Scikit-Learn models."""

    def __init__(
        self,
        model_class: Type[BaseEstimator],
        default_params: Dict[str, Any],
        problem_type: str,
    ):
        self.model_class = model_class
        self._default_params = default_params
        self._problem_type = problem_type

    @property
    def default_params(self) -> Dict[str, Any]:
        return self._default_params

    @property
    def problem_type(self) -> str:
        return self._problem_type

    def fit(
        self,
        X: Union[pd.DataFrame, SkyulfDataFrame],
        y: Union[pd.Series, Any],
        config: Dict[str, Any],
        progress_callback=None,
        log_callback=None,
        validation_data=None,
    ) -> Any:
        """Fit the Scikit-Learn model."""
        # 1. Merge Config with Defaults
        params = self.default_params.copy()
        if config:
            # We support two configuration structures:
            # 1. Nested: {'params': {'C': 1.0, ...}} - Preferred
            # 2. Flat: {'C': 1.0, 'type': '...', ...} - Legacy/Simple support

            # Check for explicit 'params' dictionary first
            overrides = config.get("params", {})

            # If 'params' key exists but is None or empty, check if there are other keys at top level
            # that might be params. But be careful not to mix them.
            # If config has 'params', we assume it's the source of truth.

            if not overrides and "params" not in config:
                # Fallback to flat config if 'params' key is completely missing
                reserved_keys = {
                    "type",
                    "target_column",
                    "node_id",
                    "step_type",
                    "inputs",
                }
                overrides = {
                    k: v
                    for k, v in config.items()
                    if k not in reserved_keys and not isinstance(v, dict)
                }

            if overrides:
                params.update(overrides)

        msg = f"Initializing {self.model_class.__name__} with params: {params}"
        logger.info(msg)
        if log_callback:
            log_callback(msg)

        # 2. Instantiate Model
        model = self.model_class(**params)

        # 3. Fit
        # Convert to Numpy using Bridge (handles Polars/Pandas/Wrappers)
        X_np, y_np = SklearnBridge.to_sklearn((X, y))

        model.fit(X_np, y_np)

        return model

fit(X, y, config, progress_callback=None, log_callback=None, validation_data=None)

Fit the Scikit-Learn model.

Source code in skyulf-core\skyulf\modeling\sklearn_wrapper.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def fit(
    self,
    X: Union[pd.DataFrame, SkyulfDataFrame],
    y: Union[pd.Series, Any],
    config: Dict[str, Any],
    progress_callback=None,
    log_callback=None,
    validation_data=None,
) -> Any:
    """Fit the Scikit-Learn model."""
    # 1. Merge Config with Defaults
    params = self.default_params.copy()
    if config:
        # We support two configuration structures:
        # 1. Nested: {'params': {'C': 1.0, ...}} - Preferred
        # 2. Flat: {'C': 1.0, 'type': '...', ...} - Legacy/Simple support

        # Check for explicit 'params' dictionary first
        overrides = config.get("params", {})

        # If 'params' key exists but is None or empty, check if there are other keys at top level
        # that might be params. But be careful not to mix them.
        # If config has 'params', we assume it's the source of truth.

        if not overrides and "params" not in config:
            # Fallback to flat config if 'params' key is completely missing
            reserved_keys = {
                "type",
                "target_column",
                "node_id",
                "step_type",
                "inputs",
            }
            overrides = {
                k: v
                for k, v in config.items()
                if k not in reserved_keys and not isinstance(v, dict)
            }

        if overrides:
            params.update(overrides)

    msg = f"Initializing {self.model_class.__name__} with params: {params}"
    logger.info(msg)
    if log_callback:
        log_callback(msg)

    # 2. Instantiate Model
    model = self.model_class(**params)

    # 3. Fit
    # Convert to Numpy using Bridge (handles Polars/Pandas/Wrappers)
    X_np, y_np = SklearnBridge.to_sklearn((X, y))

    model.fit(X_np, y_np)

    return model