Skip to content

API: modeling.classification

skyulf.modeling.classification

Classification models.

AdaBoostClassifierApplier

Bases: SklearnApplier

AdaBoost Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
324
325
class AdaBoostClassifierApplier(SklearnApplier):
    """AdaBoost Classifier Applier."""

AdaBoostClassifierCalculator

Bases: SklearnCalculator

AdaBoost Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
@NodeRegistry.register("adaboost_classifier", AdaBoostClassifierApplier)
@node_meta(
    id="adaboost_classifier",
    name="AdaBoost Classifier",
    category="Modeling",
    description="An AdaBoost classifier.",
    params={"n_estimators": 50, "learning_rate": 1.0},
)
class AdaBoostClassifierCalculator(SklearnCalculator):
    """AdaBoost Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=AdaBoostClassifier,
            default_params={
                "n_estimators": 50,
                "learning_rate": 1.0,
                "random_state": 42,
            },
            problem_type="classification",
        )

CalibratedClassifierApplier

Bases: SklearnApplier

Calibrated Classifier Applier (well-calibrated predict_proba).

Source code in skyulf-core/skyulf/modeling/classification.py
86
87
class CalibratedClassifierApplier(SklearnApplier):
    """Calibrated Classifier Applier (well-calibrated predict_proba)."""

CalibratedClassifierCalculator

Bases: SklearnCalculator

Calibrated Classifier Calculator with a selectable base estimator.

The frontend sends base_estimator as a string key (e.g. "random_forest"); it is resolved here into a fresh estimator instance before CalibratedClassifierCV is constructed. Defaults to logistic regression for backward compatibility.

Source code in skyulf-core/skyulf/modeling/classification.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
@NodeRegistry.register("calibrated_classifier", CalibratedClassifierApplier)
@node_meta(
    id="calibrated_classifier",
    name="Calibrated Classifier",
    category="Modeling",
    description=(
        "Wraps a base classifier with CalibratedClassifierCV so predicted "
        "probabilities are well-calibrated (Platt/sigmoid or isotonic)."
    ),
    params={"base_estimator": "logistic_regression", "method": "sigmoid", "cv": 5},
    tags=["requires_scaling"],
)
class CalibratedClassifierCalculator(SklearnCalculator):
    """Calibrated Classifier Calculator with a selectable base estimator.

    The frontend sends ``base_estimator`` as a string key (e.g.
    ``"random_forest"``); it is resolved here into a fresh estimator instance
    before ``CalibratedClassifierCV`` is constructed. Defaults to logistic
    regression for backward compatibility.
    """

    # Map of selectable base estimators → factory. Each must support
    # ``predict_proba`` (or ``decision_function``) so calibration is meaningful.
    BASE_ESTIMATORS: Dict[str, Callable[[], BaseEstimator]] = {
        "logistic_regression": lambda: LogisticRegression(max_iter=1000),
        "random_forest": lambda: RandomForestClassifier(n_estimators=100, random_state=42),
        "gradient_boosting": lambda: GradientBoostingClassifier(random_state=42),
        "decision_tree": lambda: DecisionTreeClassifier(random_state=42),
        "gaussian_nb": lambda: GaussianNB(),
        "svc": lambda: SVC(probability=True, random_state=42),
    }

    def __init__(self):
        super().__init__(
            model_class=CalibratedClassifierCV,
            default_params={
                "estimator": LogisticRegression(max_iter=1000),
                "method": "sigmoid",
                "cv": 5,
            },
            problem_type="classification",
        )

    def fit(
        self,
        X: Any,
        y: Any,
        config: Dict[str, Any],
        progress_callback: Optional[Callable[..., Any]] = None,
        log_callback: Optional[Callable[..., Any]] = None,
        validation_data: Any = None,
    ) -> Any:
        config = self._resolve_base_estimator(config)
        return super().fit(X, y, config, progress_callback, log_callback, validation_data)

    @classmethod
    def _resolve_base_estimator(cls, config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
        """Translate a ``base_estimator`` string key into an estimator instance.

        Supports both the flat config shape and the nested ``{"params": {...}}``
        shape used by the model-training payload. Unknown keys fall back to
        logistic regression with a warning.
        """
        if not config:
            return config or {}
        resolved = dict(config)
        nested = isinstance(resolved.get("params"), dict)
        bucket = dict(resolved["params"]) if nested else resolved
        key = bucket.pop("base_estimator", None)
        if isinstance(key, str):
            factory = cls.BASE_ESTIMATORS.get(key)
            if factory is None:
                logger.warning(
                    "Unknown base_estimator '%s'; falling back to logistic_regression.", key
                )
                factory = cls.BASE_ESTIMATORS["logistic_regression"]
            bucket["estimator"] = factory()
        if nested:
            resolved["params"] = bucket
            return resolved
        return bucket

DecisionTreeClassifierApplier

Bases: SklearnApplier

Decision Tree Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
266
267
class DecisionTreeClassifierApplier(SklearnApplier):
    """Decision Tree Classifier Applier."""

DecisionTreeClassifierCalculator

Bases: SklearnCalculator

Decision Tree Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
@NodeRegistry.register("decision_tree_classifier", DecisionTreeClassifierApplier)
@node_meta(
    id="decision_tree_classifier",
    name="Decision Tree Classifier",
    category="Modeling",
    description="A non-parametric supervised learning method used for classification.",
    params={"max_depth": None, "min_samples_split": 2, "criterion": "gini"},
)
class DecisionTreeClassifierCalculator(SklearnCalculator):
    """Decision Tree Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=DecisionTreeClassifier,
            default_params={
                "max_depth": None,
                "min_samples_split": 2,
                "criterion": "gini",
                "random_state": 42,
            },
            problem_type="classification",
        )

ExtraTreesClassifierApplier

Bases: SklearnApplier

Extra Trees Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
385
386
class ExtraTreesClassifierApplier(SklearnApplier):
    """Extra Trees Classifier Applier."""

ExtraTreesClassifierCalculator

Bases: SklearnCalculator

Extra Trees Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
@NodeRegistry.register("extra_trees_classifier", ExtraTreesClassifierApplier)
@node_meta(
    id="extra_trees_classifier",
    name="Extra Trees Classifier",
    category="Modeling",
    description="Extremely randomised trees — faster than Random Forest, often comparably accurate.",
    params={"n_estimators": 100, "max_depth": None, "min_samples_split": 2},
)
class ExtraTreesClassifierCalculator(SklearnCalculator):
    """Extra Trees Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=ExtraTreesClassifier,
            default_params={
                "n_estimators": 100,
                "max_depth": None,
                "min_samples_split": 2,
                "min_samples_leaf": 1,
                "criterion": "gini",
                "bootstrap": False,
                "n_jobs": -1,
                "random_state": 42,
            },
            problem_type="classification",
        )

GaussianNBApplier

Bases: SklearnApplier

Gaussian Naive Bayes Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
529
530
class GaussianNBApplier(SklearnApplier):
    """Gaussian Naive Bayes Applier."""

GaussianNBCalculator

Bases: SklearnCalculator

Gaussian Naive Bayes Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
@NodeRegistry.register("gaussian_nb", GaussianNBApplier)
@node_meta(
    id="gaussian_nb",
    name="Gaussian Naive Bayes",
    category="Modeling",
    description="Gaussian Naive Bayes (GaussianNB).",
    params={"var_smoothing": 1e-9},
)
class GaussianNBCalculator(SklearnCalculator):
    """Gaussian Naive Bayes Calculator."""

    def __init__(self):
        super().__init__(
            model_class=GaussianNB,
            default_params={"var_smoothing": 1e-9},
            problem_type="classification",
        )

GradientBoostingClassifierApplier

Bases: SklearnApplier

Gradient Boosting Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
295
296
class GradientBoostingClassifierApplier(SklearnApplier):
    """Gradient Boosting Classifier Applier."""

GradientBoostingClassifierCalculator

Bases: SklearnCalculator

Gradient Boosting Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
@NodeRegistry.register("gradient_boosting_classifier", GradientBoostingClassifierApplier)
@node_meta(
    id="gradient_boosting_classifier",
    name="Gradient Boosting Classifier",
    category="Modeling",
    description="Gradient Boosting for classification.",
    params={"n_estimators": 100, "learning_rate": 0.1, "max_depth": 3},
)
class GradientBoostingClassifierCalculator(SklearnCalculator):
    """Gradient Boosting Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=GradientBoostingClassifier,
            default_params={
                "n_estimators": 100,
                "learning_rate": 0.1,
                "max_depth": 3,
                "random_state": 42,
            },
            problem_type="classification",
        )

HistGradientBoostingClassifierApplier

Bases: SklearnApplier

HistGradientBoosting Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
418
419
class HistGradientBoostingClassifierApplier(SklearnApplier):
    """HistGradientBoosting Classifier Applier."""

HistGradientBoostingClassifierCalculator

Bases: SklearnCalculator

HistGradientBoosting Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
@NodeRegistry.register("hist_gradient_boosting_classifier", HistGradientBoostingClassifierApplier)
@node_meta(
    id="hist_gradient_boosting_classifier",
    name="Hist Gradient Boosting Classifier",
    category="Modeling",
    description="Histogram-based gradient boosting — sklearn's fast LightGBM-style implementation.",
    params={"max_iter": 100, "learning_rate": 0.1, "max_leaf_nodes": 31},
)
class HistGradientBoostingClassifierCalculator(SklearnCalculator):
    """HistGradientBoosting Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=HistGradientBoostingClassifier,
            default_params={
                "max_iter": 100,
                "learning_rate": 0.1,
                "max_leaf_nodes": 31,
                "max_depth": None,
                "min_samples_leaf": 20,
                "l2_regularization": 0.0,
                "max_bins": 255,
                "random_state": 42,
            },
            problem_type="classification",
        )

KNeighborsClassifierApplier

Bases: SklearnApplier

K-Neighbors Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
236
237
class KNeighborsClassifierApplier(SklearnApplier):
    """K-Neighbors Classifier Applier."""

KNeighborsClassifierCalculator

Bases: SklearnCalculator

K-Neighbors Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
@NodeRegistry.register("k_neighbors_classifier", KNeighborsClassifierApplier)
@node_meta(
    id="k_neighbors_classifier",
    name="K-Neighbors Classifier",
    category="Modeling",
    description="Classifier implementing the k-nearest neighbors vote.",
    params={"n_neighbors": 5, "weights": "uniform", "algorithm": "auto"},
    tags=["requires_scaling"],
)
class KNeighborsClassifierCalculator(SklearnCalculator):
    """K-Neighbors Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=KNeighborsClassifier,
            default_params={
                "n_neighbors": 5,
                "weights": "uniform",
                "algorithm": "auto",
                "n_jobs": -1,
            },
            problem_type="classification",
        )

LGBMClassifierApplier

Bases: SklearnApplier

LightGBM Classifier Applier.

LightGBM 4.x sets feature_names_in_ to auto-generated names (Column_0, Column_1...) even when fit with numpy arrays, and the property's deleter is intentionally a no-op (see upstream source). That triggers sklearn's UserWarning: X does not have valid feature names on every predict call. We suppress it locally here so the warning never leaks out of the applier boundary.

Source code in skyulf-core/skyulf/modeling/classification.py
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
class LGBMClassifierApplier(SklearnApplier):
    """LightGBM Classifier Applier.

    LightGBM 4.x sets ``feature_names_in_`` to auto-generated names
    (``Column_0``, ``Column_1``...) even when fit with numpy arrays, and the
    property's deleter is intentionally a no-op (see upstream source). That
    triggers sklearn's ``UserWarning: X does not have valid feature names``
    on every predict call. We suppress it locally here so the warning never
    leaks out of the applier boundary.
    """

    def predict(self, df, model_artifact):
        import warnings

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message=".*valid feature names.*")
            return super().predict(df, model_artifact)

    def predict_proba(self, df, model_artifact):
        import warnings

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message=".*valid feature names.*")
            return super().predict_proba(df, model_artifact)

LGBMClassifierCalculator

Bases: SklearnCalculator

LightGBM Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
@NodeRegistry.register("lgbm_classifier", LGBMClassifierApplier)
@node_meta(
    id="lgbm_classifier",
    name="LightGBM Classifier",
    category="Modeling",
    description="LightGBM: leaf-wise gradient boosting, fast and memory-efficient with categorical support.",
    params={"n_estimators": 100, "num_leaves": 31, "learning_rate": 0.1},
)
class LGBMClassifierCalculator(SklearnCalculator):
    """LightGBM Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=LGBMClassifier,
            default_params={
                "n_estimators": 100,
                "num_leaves": 31,
                "learning_rate": 0.1,
                "max_depth": -1,
                "min_child_samples": 20,
                "subsample": 1.0,
                "colsample_bytree": 1.0,
                "reg_alpha": 0.0,
                "reg_lambda": 0.0,
                "boosting_type": "gbdt",
                "n_jobs": -1,
                "random_state": 42,
                "verbose": -1,
                "verbosity": -1,
            },
            problem_type="classification",
        )

    def fit(
        self, X, y, config, progress_callback=None, log_callback=None, validation_data=None
    ):
        import warnings

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message=".*valid feature names.*")
            return super().fit(
                X,
                y,
                config,
                progress_callback=progress_callback,
                log_callback=log_callback,
                validation_data=validation_data,
            )

LogisticRegressionApplier

Bases: SklearnApplier

Logistic Regression Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
57
58
class LogisticRegressionApplier(SklearnApplier):
    """Logistic Regression Applier."""

LogisticRegressionCalculator

Bases: SklearnCalculator

Logistic Regression Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
@NodeRegistry.register("logistic_regression", LogisticRegressionApplier)
@node_meta(
    id="logistic_regression",
    name="Logistic Regression",
    category="Modeling",
    description="Linear model for classification.",
    params={"max_iter": 1000, "solver": "lbfgs", "random_state": 42},
    tags=["requires_scaling"],
)
class LogisticRegressionCalculator(SklearnCalculator):
    """Logistic Regression Calculator."""

    def __init__(self):
        super().__init__(
            model_class=LogisticRegression,
            default_params={
                "max_iter": 1000,
                "solver": "lbfgs",
                "random_state": 42,
            },
            problem_type="classification",
        )

RandomForestClassifierApplier

Bases: SklearnApplier

Random Forest Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
174
175
class RandomForestClassifierApplier(SklearnApplier):
    """Random Forest Classifier Applier."""

RandomForestClassifierCalculator

Bases: SklearnCalculator

Random Forest Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
@NodeRegistry.register("random_forest_classifier", RandomForestClassifierApplier)
@node_meta(
    id="random_forest_classifier",
    name="Random Forest Classifier",
    category="Modeling",
    description="Ensemble of decision trees.",
    params={"n_estimators": 50, "max_depth": 10, "min_samples_split": 5},
)
class RandomForestClassifierCalculator(SklearnCalculator):
    """Random Forest Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=RandomForestClassifier,
            default_params={
                "n_estimators": 50,
                "max_depth": 10,
                "min_samples_split": 5,
                "min_samples_leaf": 2,
                "n_jobs": -1,
                "random_state": 42,
            },
            problem_type="classification",
        )

SGDClassifierApplier

Bases: SklearnApplier

Stochastic Gradient Descent Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
553
554
class SGDClassifierApplier(SklearnApplier):
    """Stochastic Gradient Descent Classifier Applier."""

SGDClassifierCalculator

Bases: SklearnCalculator

SGD Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
@NodeRegistry.register("sgd_classifier", SGDClassifierApplier)
@node_meta(
    id="sgd_classifier",
    name="SGD Classifier (text / linear)",
    category="Modeling",
    description=(
        "Linear classifiers (SVM, logistic regression, etc.) with SGD training. "
        "Highly efficient for high-dimensional sparse/dense text representations "
        "and large datasets."
    ),
    params={
        "loss": "log_loss",
        "penalty": "l2",
        "alpha": 0.0001,
        "l1_ratio": 0.15,
        "max_iter": 1000,
        "random_state": 42,
    },
    tags=["text", "nlp", "classification", "linear", "requires_scaling"],
)
class SGDClassifierCalculator(SklearnCalculator):
    """SGD Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=SGDClassifier,
            default_params={
                "loss": "log_loss",
                "penalty": "l2",
                "alpha": 0.0001,
                "l1_ratio": 0.15,
                "max_iter": 1000,
                "random_state": 42,
            },
            problem_type="classification",
        )

SVCApplier

Bases: SklearnApplier

SVC Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
205
206
class SVCApplier(SklearnApplier):
    """SVC Applier."""

SVCCalculator

Bases: SklearnCalculator

SVC Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
@NodeRegistry.register("svc", SVCApplier)
@node_meta(
    id="svc",
    name="Support Vector Classifier",
    category="Modeling",
    description="C-Support Vector Classification.",
    params={"C": 1.0, "kernel": "rbf", "gamma": "scale"},
    tags=["requires_scaling"],
)
class SVCCalculator(SklearnCalculator):
    """SVC Calculator."""

    def __init__(self):
        super().__init__(
            model_class=SVC,
            default_params={
                "C": 1.0,
                "kernel": "rbf",
                "gamma": "scale",
                "probability": True,
                "random_state": 42,
            },
            problem_type="classification",
        )

XGBClassifierApplier

Bases: SklearnApplier

XGBoost Classifier Applier.

Source code in skyulf-core/skyulf/modeling/classification.py
354
355
class XGBClassifierApplier(SklearnApplier):
    """XGBoost Classifier Applier."""

XGBClassifierCalculator

Bases: SklearnCalculator

XGBoost Classifier Calculator.

Source code in skyulf-core/skyulf/modeling/classification.py
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
@NodeRegistry.register("xgboost_classifier", XGBClassifierApplier)
@node_meta(
    id="xgboost_classifier",
    name="XGBoost Classifier",
    category="Modeling",
    description="Extreme Gradient Boosting classifier.",
    params={"n_estimators": 100, "max_depth": 6, "learning_rate": 0.3},
)
class XGBClassifierCalculator(SklearnCalculator):
    """XGBoost Classifier Calculator."""

    def __init__(self):
        super().__init__(
            model_class=XGBClassifier,
            default_params={
                "n_estimators": 100,
                "max_depth": 6,
                "learning_rate": 0.3,
                "subsample": 0.8,
                "colsample_bytree": 0.8,
                "n_jobs": -1,
                "random_state": 42,
            },
            problem_type="classification",
        )