schema

Schemas.

Classes:

Name	Description
`Schema`	Schema definition. Used as base class for AnalyzedSchema and FittedSchema classes.
`AnalyzedSchema`	A schema which represents the raw data structure: feature types and positions.
`FittedSchema`	A schema which represents the raw data structure: feature types and positions.

`Schema(*features: _FEATURE_TYPE)` #

Schema definition. Used as base class for AnalyzedSchema and FittedSchema classes.

Initialize the class with the specified features.

Parameters:

Name	Type	Description	Default
`features` #	`_FEATURE_TYPE`	A variable number of features to be included. These features will be stored in the `columns` attribute as a list.	`()`

Attributes:

Name	Type	Description
`columns`	`list[_FEATURE_TYPE]`	A list containing the features provided during initialization.

Methods:

Name	Description
`__getitem__`	Get a feature.
`__repr__`	Represent the schema.

Attributes:

Name	Type	Description
`columns`	`list[_FEATURE_TYPE]`

Source code in src/xpdeep/dataset/schema/schema.py

def __init__(self, *features: _FEATURE_TYPE) -> None:
    """
    Initialize the class with the specified features.

    Parameters
    ----------
    features : _FEATURE_TYPE
        A variable number of features to be included. These features will be stored
        in the `columns` attribute as a list.

    Attributes
    ----------
    columns : list[_FEATURE_TYPE]
        A list containing the features provided during initialization.
    """
    self.columns: list[_FEATURE_TYPE] = list(features)

`columns: list[_FEATURE_TYPE] = list(features)` #

`getitem(feature_name: str) -> _FEATURE_TYPE` #

Get a feature.

Source code in src/xpdeep/dataset/schema/schema.py

def __getitem__(self, feature_name: str) -> _FEATURE_TYPE:
    """Get a feature."""
    result = [feature for feature in self.columns if feature.name == feature_name]
    if len(result) > 1:
        message = f"Multiple features with name : {feature_name} in the analyzed schema."
        raise NameError(message)
    if len(result) == 0:
        message = f"No feature with name : {feature_name} in the analyzed schema."
        raise NameError(message)
    return result[0]

`repr() -> str` #

Represent the schema.

Source code in src/xpdeep/dataset/schema/schema.py

def __repr__(self) -> str:
    """Represent the schema."""
    from prettytable import PrettyTable  # noqa: PLC0415

    # Create a single table for both features and metadata
    combined_table = PrettyTable()
    combined_table.title = "Schema Contents"
    combined_table.align = "l"
    combined_table.field_names = ["Type", "Name", "Is Target"]

    # Add feature rows to the table
    for feature in self.columns:
        if not isinstance(feature, Metadata):
            combined_table.add_row([feature.__class__.__name__, feature.name, "✅" if feature.is_target else "❌"])
        else:
            combined_table.add_row([
                "Metadata",
                feature.name,
                "",  # Empty for metadata
            ])

    return combined_table.get_string()

`AnalyzedSchema(*features: _FEATURE_TYPE)` #

A schema which represents the raw data structure: feature types and positions.

Initialize the class with the specified features.

Parameters:

Name	Type	Description	Default
`features` #	`_FEATURE_TYPE`	A variable number of features to be included. These features will be stored in the `columns` attribute as a list.	`()`

Attributes:

Name	Type	Description
`columns`	`list[_FEATURE_TYPE]`	A list containing the features provided during initialization.

Methods:

Name	Description
`from_exposed`	Convert ExposedAnalyzedSchema to AnalyzedSchema.
`from_bytes`	Convert bytes to analyzed schema.
`remove_feature`	Remove the feature with the given name from the schema, and return the removed feature.
`__setitem__`	Set the feature with the given name from the parquet dataset analyzed schema.
`__delitem__`	Delete feature.

Attributes:

Name	Type	Description
`as_exposed`	`ExposedAnalyzedSchema`	Return exposed analyzed schema.

Source code in src/xpdeep/dataset/schema/schema.py

def __init__(self, *features: _FEATURE_TYPE) -> None:
    """
    Initialize the class with the specified features.

    Parameters
    ----------
    features : _FEATURE_TYPE
        A variable number of features to be included. These features will be stored
        in the `columns` attribute as a list.

    Attributes
    ----------
    columns : list[_FEATURE_TYPE]
        A list containing the features provided during initialization.
    """
    self.columns: list[_FEATURE_TYPE] = list(features)

`as_exposed: ExposedAnalyzedSchema` #

Return exposed analyzed schema.

`from_exposed(exposed_analyzed_schema: ExposedAnalyzedSchema) -> AnalyzedSchema` #

Convert ExposedAnalyzedSchema to AnalyzedSchema.

Source code in src/xpdeep/dataset/schema/schema.py

@staticmethod
def from_exposed(exposed_analyzed_schema: ExposedAnalyzedSchema) -> "AnalyzedSchema":
    """Convert ExposedAnalyzedSchema to AnalyzedSchema."""
    return AnalyzedSchema(
        *(extract_base_feature_from_exposed(exposed_feature) for exposed_feature in exposed_analyzed_schema.columns)
    )

`from_bytes(analyzed_schema_as_bytes: bytes) -> AnalyzedSchema` #

Convert bytes to analyzed schema.

Source code in src/xpdeep/dataset/schema/schema.py

@staticmethod
def from_bytes(analyzed_schema_as_bytes: bytes) -> "AnalyzedSchema":
    """Convert bytes to analyzed schema."""
    return AnalyzedSchema.from_exposed(NumpyMsgpackDecoder(ExposedAnalyzedSchema).decode(analyzed_schema_as_bytes))

`remove_feature(feature_name: str) -> Feature | BaseFeature | Metadata` #

Remove the feature with the given name from the schema, and return the removed feature.

Source code in src/xpdeep/dataset/schema/schema.py

def remove_feature(self, feature_name: str) -> Feature | BaseFeature | Metadata:
    """Remove the feature with the given name from the schema, and return the removed feature."""
    feature = self[feature_name]
    return self.columns.pop(self.columns.index(feature))

`setitem(feature_name: str, new_feature: Feature) -> None` #

Set the feature with the given name from the parquet dataset analyzed schema.

Source code in src/xpdeep/dataset/schema/schema.py

def __setitem__(self, feature_name: str, new_feature: Feature) -> None:
    """Set the feature with the given name from the parquet dataset analyzed schema."""
    feature_to_replace = self[feature_name]
    if feature_to_replace.name != new_feature.name:
        message = (
            f"The new feature name {new_feature.name} is different than the overwritten feature one. The new "
            f"feature name should be {feature_name}. "
        )
        raise NameError(message)

    feature_to_replace_index = self.columns.index(feature_to_replace)
    self.columns[feature_to_replace_index] = new_feature

`delitem(feature_name: str) -> None` #

Delete feature.

Source code in src/xpdeep/dataset/schema/schema.py

def __delitem__(self, feature_name: str) -> None:
    """Delete feature."""
    self.remove_feature(feature_name)

`FittedSchema(*features: Feature | Metadata, input_size: tuple[int, ...] | None = None, target_size: tuple[int, ...] | None = None)` #

A schema which represents the raw data structure: feature types and positions.

FittedSchema constructor.

Methods:

Name	Description
`from_exposed`	Convert ExposedFittedSchema to FittedSchema.
`from_bytes`	Convert bytes to fitted schema.

Attributes:

Name	Type	Description
`input_size`
`target_size`
`as_exposed`	`ExposedFittedSchema`	Return exposed fitted schema.

Source code in src/xpdeep/dataset/schema/schema.py

def __init__(
    self,
    *features: Feature | Metadata,
    input_size: tuple[int, ...] | None = None,
    target_size: tuple[int, ...] | None = None,
) -> None:
    """FittedSchema constructor."""
    super().__init__(*features)
    if input_size is not None and target_size is not None:
        self.input_size = input_size
        self.target_size = target_size
    else:
        exposed_fitted_schema = FittedSchema._build_already_fitted(AnalyzedSchema(*features))
        self.input_size = exposed_fitted_schema.input_shape
        self.target_size = exposed_fitted_schema.target_shape

`input_size = input_size` #

`target_size = target_size` #

`as_exposed: ExposedFittedSchema` #

Return exposed fitted schema.

`from_exposed(exposed_fitted_schema: ExposedFittedSchema) -> FittedSchema` #

Convert ExposedFittedSchema to FittedSchema.

Source code in src/xpdeep/dataset/schema/schema.py

@staticmethod
def from_exposed(exposed_fitted_schema: ExposedFittedSchema) -> "FittedSchema":
    """Convert ExposedFittedSchema to FittedSchema."""
    exposed_features = exposed_fitted_schema.columns
    for feature in exposed_features:
        if not isinstance(feature, ExposedFeature):
            msg = f"Unsupported feature {feature}."
            raise TypeError(msg)
    return FittedSchema(
        *(extract_feature_from_exposed(exposed_feature) for exposed_feature in exposed_features),  # type: ignore[arg-type]
        input_size=exposed_fitted_schema.input_shape,
        target_size=exposed_fitted_schema.target_shape,
    )

`from_bytes(fitted_schema_as_bytes: bytes) -> FittedSchema` #

Convert bytes to fitted schema.

Source code in src/xpdeep/dataset/schema/schema.py

@staticmethod
def from_bytes(fitted_schema_as_bytes: bytes) -> "FittedSchema":
    """Convert bytes to fitted schema."""
    return FittedSchema.from_exposed(NumpyMsgpackDecoder(ExposedFittedSchema).decode(fitted_schema_as_bytes))

schema

`Schema(*features: _FEATURE_TYPE)` #

`features` #

`columns: list[_FEATURE_TYPE] = list(features)` #

`getitem(feature_name: str) -> _FEATURE_TYPE` #

`repr() -> str` #

`AnalyzedSchema(*features: _FEATURE_TYPE)` #

`features` #

`as_exposed: ExposedAnalyzedSchema` #

`from_exposed(exposed_analyzed_schema: ExposedAnalyzedSchema) -> AnalyzedSchema` #

`from_bytes(analyzed_schema_as_bytes: bytes) -> AnalyzedSchema` #

`remove_feature(feature_name: str) -> Feature | BaseFeature | Metadata` #

`setitem(feature_name: str, new_feature: Feature) -> None` #

`delitem(feature_name: str) -> None` #

`FittedSchema(*features: Feature | Metadata, input_size: tuple[int, ...] | None = None, target_size: tuple[int, ...] | None = None)` #

`input_size = input_size` #

`target_size = target_size` #

`as_exposed: ExposedFittedSchema` #

`from_exposed(exposed_fitted_schema: ExposedFittedSchema) -> FittedSchema` #

`from_bytes(fitted_schema_as_bytes: bytes) -> FittedSchema` #

schema

Schema(*features: _FEATURE_TYPE) #

features #

columns: list[_FEATURE_TYPE] = list(features) #

__getitem__(feature_name: str) -> _FEATURE_TYPE #

__repr__() -> str #

AnalyzedSchema(*features: _FEATURE_TYPE) #

features #

as_exposed: ExposedAnalyzedSchema #

from_exposed(exposed_analyzed_schema: ExposedAnalyzedSchema) -> AnalyzedSchema #

from_bytes(analyzed_schema_as_bytes: bytes) -> AnalyzedSchema #

remove_feature(feature_name: str) -> Feature | BaseFeature | Metadata #

__setitem__(feature_name: str, new_feature: Feature) -> None #

__delitem__(feature_name: str) -> None #

FittedSchema(*features: Feature | Metadata, input_size: tuple[int, ...] | None = None, target_size: tuple[int, ...] | None = None) #

input_size = input_size #

target_size = target_size #

as_exposed: ExposedFittedSchema #

from_exposed(exposed_fitted_schema: ExposedFittedSchema) -> FittedSchema #

from_bytes(fitted_schema_as_bytes: bytes) -> FittedSchema #

`Schema(*features: _FEATURE_TYPE)` #

`features` #

`columns: list[_FEATURE_TYPE] = list(features)` #

`getitem(feature_name: str) -> _FEATURE_TYPE` #

`repr() -> str` #

`AnalyzedSchema(*features: _FEATURE_TYPE)` #

`features` #

`as_exposed: ExposedAnalyzedSchema` #

`from_exposed(exposed_analyzed_schema: ExposedAnalyzedSchema) -> AnalyzedSchema` #

`from_bytes(analyzed_schema_as_bytes: bytes) -> AnalyzedSchema` #

`remove_feature(feature_name: str) -> Feature | BaseFeature | Metadata` #

`setitem(feature_name: str, new_feature: Feature) -> None` #

`delitem(feature_name: str) -> None` #

`FittedSchema(*features: Feature | Metadata, input_size: tuple[int, ...] | None = None, target_size: tuple[int, ...] | None = None)` #

`input_size = input_size` #

`target_size = target_size` #

`as_exposed: ExposedFittedSchema` #

`from_exposed(exposed_fitted_schema: ExposedFittedSchema) -> FittedSchema` #

`from_bytes(fitted_schema_as_bytes: bytes) -> FittedSchema` #