preprocessor

Feature preprocessor.

`Preprocessor` #

Preprocessor class to preprocess the raw data.

preprocessed_size: if None indicate that this preprocessor was not fitted. Otherwise is the size of the feature after being preprocessed without the batch size.

Parameters:

Name	Type	Description	Default
`preprocessed_size`	`tuple[int, ...] \| None`		`None`

`preprocessed_size: tuple[int, ...] | None = None` #

`IdentityPreprocessor` #

Identity Preprocessor class.

`as_exposed: ExposedIdentityPreprocessor` #

Generate the corresponding exposed feature.

`from_exposed(exposed_identity_preprocessor: ExposedIdentityPreprocessor) -> Self` #

Create numerical feature from exposed numerical feature.

Source code in src/xpdeep/dataset/schema/preprocessor.py

@classmethod
def from_exposed(cls, exposed_identity_preprocessor: ExposedIdentityPreprocessor) -> Self:
    """Create numerical feature from exposed numerical feature."""
    return cls(preprocessed_size=exposed_identity_preprocessor.preprocessed_size)

`SklearnPreprocessor` #

Preprocessor class based on sklearn preprocessing classes.

Parameters:

Name	Type	Description	Default
`preprocess_function`	`TransformerMixin \| ExposedPreprocessFunction`		required
`dtype`	`str`		`'float32'`

`preprocess_function: TransformerMixin | ExposedPreprocessFunction` #

`dtype: str = 'float32'` #

`as_exposed: ExposedNumpyPreprocessor` #

Generate the corresponding exposed feature.

`from_exposed(numpy_preprocessor: ExposedNumpyPreprocessor) -> Self` #

Create SklearnPreprocessor from ExposedPreprocessArrowToTorchWithSklearn.

Source code in src/xpdeep/dataset/schema/preprocessor.py

@classmethod
def from_exposed(cls, numpy_preprocessor: ExposedNumpyPreprocessor) -> Self:
    """Create SklearnPreprocessor from ExposedPreprocessArrowToTorchWithSklearn."""
    try:
        preprocess_function = numpy_preprocessor.preprocess_function.unparse(None, None)
    except ModuleNotFoundError as err:
        warnings.warn(  # noqa: B028
            "Unable to recreate preprocess_function from ExposedPreprocessFunction. An additional module is "
            f"required to achieve this operation. {err.msg}"
        )
        preprocess_function = numpy_preprocessor.preprocess_function

    return cls(
        preprocessed_size=numpy_preprocessor.preprocessed_size,
        preprocess_function=preprocess_function,
    )

`transform(feature_raw_value: object) -> torch.Tensor` #

Transform a feature raw value into its preprocessed value.

Source code in src/xpdeep/dataset/schema/preprocessor.py

def transform(self, feature_raw_value: object) -> torch.Tensor:
    """Transform a feature raw value into its preprocessed value."""
    if not isinstance(self.preprocess_function, TransformerMixin):
        msg = f"{self.preprocess_function} was not parsable"
        raise TypeError(msg)
    return self.preprocess_function.transform(feature_raw_value)  # type: ignore[no-any-return]

`inverse_transform(preprocessed_value: torch.Tensor) -> object` #

Inverse transform a feature preprocessed value into its raw value.

Source code in src/xpdeep/dataset/schema/preprocessor.py

def inverse_transform(self, preprocessed_value: torch.Tensor) -> object:
    """Inverse transform a feature preprocessed value into its raw value."""
    if not isinstance(self.preprocess_function, TransformerMixin):
        msg = f"{self.preprocess_function} was not parsable"
        raise TypeError(msg)

    return self.preprocess_function.inverse_transform(preprocessed_value)

`TorchPreprocessor(input_size: tuple[int, ...], module_transform: torch.nn.Module | None = None, module_inverse_transform: torch.nn.Module | None = None)` #

Preprocessor class based on sklearn preprocessing classes.

Size of input.

Source code in src/xpdeep/dataset/schema/preprocessor.py

def __init__(
    self,
    input_size: tuple[int, ...],
    module_transform: torch.nn.Module | None = None,
    module_inverse_transform: torch.nn.Module | None = None,
):
    """Size of input."""
    super().__init__()
    self.input_size = input_size
    self.ward = True
    self.module_transform = module_transform
    self.module_inverse_transform = module_inverse_transform

`input_size = input_size` #

`ward = True` #

`module_transform = module_transform` #

`module_inverse_transform = module_inverse_transform` #

`as_exposed: ExposedTorchPreprocessor` #

Generate the corresponding exposed feature.

`forward(inputs: torch.Tensor) -> torch.Tensor` #

Transform.

Source code in src/xpdeep/dataset/schema/preprocessor.py

def forward(self, inputs: torch.Tensor) -> torch.Tensor:
    """Transform."""
    if self.ward:
        return self.transform(inputs)
    return self.inverse_transform(inputs)

`transform(inputs: torch.Tensor) -> torch.Tensor` #

Prpocess data: ie take in input a tensor and return the tensor preprocessed.

Source code in src/xpdeep/dataset/schema/preprocessor.py

def transform(self, inputs: torch.Tensor) -> torch.Tensor:
    """Prpocess data: ie take in input a tensor and return the tensor preprocessed."""
    if self.module_transform is None:
        raise NotImplementedError("Implement this function.")
    return cast(torch.Tensor, self.module_transform(inputs))

`inverse_transform(output: torch.Tensor) -> torch.Tensor` #

Reciprocal of preprocess.

ie \forall x inverse_transform(transform(x)) = transform(inverse_transform(x)) = x.

Source code in src/xpdeep/dataset/schema/preprocessor.py

def inverse_transform(self, output: torch.Tensor) -> torch.Tensor:
    r"""Reciprocal of preprocess.

    ie \forall x inverse_transform(transform(x)) = transform(inverse_transform(x)) = x.
    """
    if self.module_inverse_transform is None:
        raise NotImplementedError("implement this function.")
    return cast(torch.Tensor, self.module_inverse_transform(output))

`from_exposed(exposed_torch_preprocessor: ExposedTorchPreprocessor) -> Self` #

Create SklearnPreprocessor from ExposedPreprocessArrowToTorchWithSklearn.

Source code in src/xpdeep/dataset/schema/preprocessor.py

@classmethod
def from_exposed(cls, exposed_torch_preprocessor: ExposedTorchPreprocessor) -> Self:
    """Create SklearnPreprocessor from ExposedPreprocessArrowToTorchWithSklearn."""
    inverse_transform = exposed_torch_preprocessor.inverse_preprocess_transformer.to_torch_module()
    if exposed_torch_preprocessor.preprocessed_size is None:
        raise ValueError("")
    input_size = inverse_transform(torch.randn(size=(2, *exposed_torch_preprocessor.preprocessed_size))).size()[1:]
    return cls(
        input_size=input_size,
        module_transform=exposed_torch_preprocessor.preprocess_transformer.to_torch_module(),
        module_inverse_transform=inverse_transform,
    )

preprocessor

Preprocessor #

preprocessed_size: tuple[int, ...] | None = None #

IdentityPreprocessor #

as_exposed: ExposedIdentityPreprocessor #

from_exposed(exposed_identity_preprocessor: ExposedIdentityPreprocessor) -> Self #

SklearnPreprocessor #

preprocess_function: TransformerMixin | ExposedPreprocessFunction #

dtype: str = 'float32' #

as_exposed: ExposedNumpyPreprocessor #

from_exposed(numpy_preprocessor: ExposedNumpyPreprocessor) -> Self #

transform(feature_raw_value: object) -> torch.Tensor #

inverse_transform(preprocessed_value: torch.Tensor) -> object #

TorchPreprocessor(input_size: tuple[int, ...], module_transform: torch.nn.Module | None = None, module_inverse_transform: torch.nn.Module | None = None) #

input_size = input_size #

ward = True #

module_transform = module_transform #

module_inverse_transform = module_inverse_transform #

as_exposed: ExposedTorchPreprocessor #

forward(inputs: torch.Tensor) -> torch.Tensor #

transform(inputs: torch.Tensor) -> torch.Tensor #

inverse_transform(output: torch.Tensor) -> torch.Tensor #

from_exposed(exposed_torch_preprocessor: ExposedTorchPreprocessor) -> Self #

`Preprocessor` #

`preprocessed_size: tuple[int, ...] | None = None` #

`IdentityPreprocessor` #

`as_exposed: ExposedIdentityPreprocessor` #

`from_exposed(exposed_identity_preprocessor: ExposedIdentityPreprocessor) -> Self` #

`SklearnPreprocessor` #

`preprocess_function: TransformerMixin | ExposedPreprocessFunction` #

`dtype: str = 'float32'` #

`as_exposed: ExposedNumpyPreprocessor` #

`from_exposed(numpy_preprocessor: ExposedNumpyPreprocessor) -> Self` #

`transform(feature_raw_value: object) -> torch.Tensor` #

`inverse_transform(preprocessed_value: torch.Tensor) -> object` #

`TorchPreprocessor(input_size: tuple[int, ...], module_transform: torch.nn.Module | None = None, module_inverse_transform: torch.nn.Module | None = None)` #

`input_size = input_size` #

`ward = True` #

`module_transform = module_transform` #

`module_inverse_transform = module_inverse_transform` #

`as_exposed: ExposedTorchPreprocessor` #

`forward(inputs: torch.Tensor) -> torch.Tensor` #

`transform(inputs: torch.Tensor) -> torch.Tensor` #

`inverse_transform(output: torch.Tensor) -> torch.Tensor` #

`from_exposed(exposed_torch_preprocessor: ExposedTorchPreprocessor) -> Self` #