Skip to content

utils_stable_hash

Utility for the hash.

Functions:

Name Description
serializable_preprocess_function

Return a JSON-serializable dictionary describing the scikit-learn preprocess_function.

serializable_preprocess_function(preprocess_function: TransformerMixin) -> dict[str, JsonValue] #

Return a JSON-serializable dictionary describing the scikit-learn preprocess_function.

Returns:

Type Description
dict[str, JsonValue]

JSON-safe representation capturing both parameters and fitted state.

Raises:

Type Description
ValueError

If the preprocessor is not supported.

Source code in src/xpdeep/dataset/preprocessor/utils_stable_hash.py
def serializable_preprocess_function(preprocess_function: TransformerMixin) -> dict[str, JsonValue]:  # noqa:C901
    """
    Return a JSON-serializable dictionary describing the scikit-learn preprocess_function.

    Returns
    -------
    dict[str, JsonValue]
        JSON-safe representation capturing both parameters and fitted state.

    Raises
    ------
    ValueError
        If the preprocessor is not supported.
    """
    match preprocess_function:
        case Binarizer():
            return {"type": "Binarizer", "params": _safe_to_json(preprocess_function.get_params(deep=False))}

        case KBinsDiscretizer():
            attrs = ["bin_edges_"]

        case LabelEncoder():
            attrs = ["classes_"]

        case MinMaxScaler():
            attrs = ["scale_", "min_", "data_min_", "data_max_", "data_range_"]

        case MaxAbsScaler():
            attrs = ["scale_", "max_abs_"]

        case OneHotEncoder():
            attrs = ["categories_"]

        case OrdinalEncoder():
            attrs = ["categories_"]

        case PowerTransformer():
            attrs = ["lambdas_"]

        case RobustScaler():
            attrs = ["center_", "scale_"]

        case StandardScaler():
            attrs = ["mean_", "scale_", "var_"]

        case _:
            msg = (
                f"{preprocess_function} not yet supported by Xpdeep, only preprocessors with an inverse "
                f"transform. If your preprocessor has an inverse transform, please file an issue to the Xpdeep "
                f"support."
            )
            raise ValueError(msg)

    return {
        "type": preprocess_function.__class__.__name__,
        "params": _safe_to_json(preprocess_function.get_params(deep=False)),
        "state": _collect_attrs(preprocess_function, attrs),
    }