Serialization
What is persisted
SkyulfPipeline.save() uses Python pickle to serialize the entire pipeline object.
That includes:
- preprocessing fitted artifacts (per-step
params) - the trained model (sklearn estimator object)
Practical guidance
- Prefer saving in environments where the same library versions are available.
- Some preprocessing nodes store sklearn objects inside
params(e.g., KNN/Iterative imputers, OneHotEncoder). Those are not JSON-serializable and require pickling.
Load and use
from __future__ import annotations
import tempfile
from pathlib import Path
import pandas as pd
from skyulf.pipeline import SkyulfPipeline
df = pd.DataFrame(
{
"age": [10, 20, None, 40, 50, 60, None, 80],
"city": ["A", "B", "A", "C", "B", "A", "C", "B"],
"target": [0, 1, 0, 1, 1, 0, 1, 0],
}
)
config = {
"preprocessing": [
{
"name": "split",
"transformer": "TrainTestSplitter",
"params": {
"test_size": 0.2,
"validation_size": 0.0,
"random_state": 42,
"shuffle": True,
"stratify": True,
"target_column": "target",
},
},
{
"name": "impute",
"transformer": "SimpleImputer",
"params": {"strategy": "mean", "columns": ["age"]},
},
{
"name": "encode",
"transformer": "OneHotEncoder",
"params": {"columns": ["city"], "drop_original": True},
},
],
"modeling": {
"type": "random_forest_classifier",
"params": {"n_estimators": 50, "random_state": 42},
},
}
with tempfile.TemporaryDirectory() as tmp:
model_path = Path(tmp) / "model.pkl"
pipeline = SkyulfPipeline(config)
_ = pipeline.fit(df, target_column="target")
pipeline.save(model_path)
loaded = SkyulfPipeline.load(model_path)
new_df = pd.DataFrame({"age": [25, None], "city": ["A", "C"]})
preds = loaded.predict(new_df)
print(preds)