Skip to content

schema

Schemas.

Classes:

Name Description
Schema

Schema definition. Used as base class for AnalyzedSchema and FittedSchema classes.

AnalyzedSchema

A schema which represents the raw data structure: feature types and positions.

FittedSchema

A schema which represents the raw data structure: feature types and positions.

Schema(*features: _FEATURE_TYPE) #

Schema definition. Used as base class for AnalyzedSchema and FittedSchema classes.

Initialize the class with the specified features.

Parameters:

Name Type Description Default

features #

_FEATURE_TYPE

A variable number of features to be included. These features will be stored in the columns attribute as a list.

()

Attributes:

Name Type Description
columns list[_FEATURE_TYPE]

A list containing the features provided during initialization.

Methods:

Name Description
__getitem__

Get a feature.

__repr__

Represent the schema.

Attributes:

Name Type Description
columns list[_FEATURE_TYPE]
Source code in src/xpdeep/dataset/schema/schema.py
def __init__(self, *features: _FEATURE_TYPE) -> None:
    """
    Initialize the class with the specified features.

    Parameters
    ----------
    features : _FEATURE_TYPE
        A variable number of features to be included. These features will be stored
        in the `columns` attribute as a list.

    Attributes
    ----------
    columns : list[_FEATURE_TYPE]
        A list containing the features provided during initialization.
    """
    self.columns: list[_FEATURE_TYPE] = list(features)

columns: list[_FEATURE_TYPE] = list(features) #

__getitem__(feature_name: str) -> _FEATURE_TYPE #

Get a feature.

Source code in src/xpdeep/dataset/schema/schema.py
def __getitem__(self, feature_name: str) -> _FEATURE_TYPE:
    """Get a feature."""
    result = [feature for feature in self.columns if feature.name == feature_name]
    if len(result) > 1:
        message = f"Multiple features with name : {feature_name} in the analyzed schema."
        raise NameError(message)
    if len(result) == 0:
        message = f"No feature with name : {feature_name} in the analyzed schema."
        raise NameError(message)
    return result[0]

__repr__() -> str #

Represent the schema.

Source code in src/xpdeep/dataset/schema/schema.py
def __repr__(self) -> str:
    """Represent the schema."""
    from prettytable import PrettyTable  # noqa: PLC0415

    # Create a single table for both features and metadata
    combined_table = PrettyTable()
    combined_table.title = "Schema Contents"
    combined_table.align = "l"
    combined_table.field_names = ["Type", "Name", "Is Target"]

    # Add feature rows to the table
    for feature in self.columns:
        if not isinstance(feature, Metadata):
            combined_table.add_row([feature.__class__.__name__, feature.name, "✅" if feature.is_target else "❌"])
        else:
            combined_table.add_row([
                "Metadata",
                feature.name,
                "",  # Empty for metadata
            ])

    return combined_table.get_string()

AnalyzedSchema(*features: _FEATURE_TYPE) #

A schema which represents the raw data structure: feature types and positions.

Initialize the class with the specified features.

Parameters:

Name Type Description Default

features #

_FEATURE_TYPE

A variable number of features to be included. These features will be stored in the columns attribute as a list.

()

Attributes:

Name Type Description
columns list[_FEATURE_TYPE]

A list containing the features provided during initialization.

Methods:

Name Description
from_exposed

Convert ExposedAnalyzedSchema to AnalyzedSchema.

from_bytes

Convert bytes to analyzed schema.

remove_feature

Remove the feature with the given name from the schema, and return the removed feature.

__setitem__

Set the feature with the given name from the parquet dataset analyzed schema.

__delitem__

Delete feature.

Attributes:

Name Type Description
as_exposed ExposedAnalyzedSchema

Return exposed analyzed schema.

Source code in src/xpdeep/dataset/schema/schema.py
def __init__(self, *features: _FEATURE_TYPE) -> None:
    """
    Initialize the class with the specified features.

    Parameters
    ----------
    features : _FEATURE_TYPE
        A variable number of features to be included. These features will be stored
        in the `columns` attribute as a list.

    Attributes
    ----------
    columns : list[_FEATURE_TYPE]
        A list containing the features provided during initialization.
    """
    self.columns: list[_FEATURE_TYPE] = list(features)

as_exposed: ExposedAnalyzedSchema #

Return exposed analyzed schema.

from_exposed(exposed_analyzed_schema: ExposedAnalyzedSchema) -> AnalyzedSchema #

Convert ExposedAnalyzedSchema to AnalyzedSchema.

Source code in src/xpdeep/dataset/schema/schema.py
@staticmethod
def from_exposed(exposed_analyzed_schema: ExposedAnalyzedSchema) -> "AnalyzedSchema":
    """Convert ExposedAnalyzedSchema to AnalyzedSchema."""
    return AnalyzedSchema(
        *(extract_base_feature_from_exposed(exposed_feature) for exposed_feature in exposed_analyzed_schema.columns)
    )

from_bytes(analyzed_schema_as_bytes: bytes) -> AnalyzedSchema #

Convert bytes to analyzed schema.

Source code in src/xpdeep/dataset/schema/schema.py
@staticmethod
def from_bytes(analyzed_schema_as_bytes: bytes) -> "AnalyzedSchema":
    """Convert bytes to analyzed schema."""
    return AnalyzedSchema.from_exposed(NumpyMsgpackDecoder(ExposedAnalyzedSchema).decode(analyzed_schema_as_bytes))

remove_feature(feature_name: str) -> Feature | BaseFeature | Metadata #

Remove the feature with the given name from the schema, and return the removed feature.

Source code in src/xpdeep/dataset/schema/schema.py
def remove_feature(self, feature_name: str) -> Feature | BaseFeature | Metadata:
    """Remove the feature with the given name from the schema, and return the removed feature."""
    feature = self[feature_name]
    return self.columns.pop(self.columns.index(feature))

__setitem__(feature_name: str, new_feature: Feature) -> None #

Set the feature with the given name from the parquet dataset analyzed schema.

Source code in src/xpdeep/dataset/schema/schema.py
def __setitem__(self, feature_name: str, new_feature: Feature) -> None:
    """Set the feature with the given name from the parquet dataset analyzed schema."""
    feature_to_replace = self[feature_name]
    if feature_to_replace.name != new_feature.name:
        message = (
            f"The new feature name {new_feature.name} is different than the overwritten feature one. The new "
            f"feature name should be {feature_name}. "
        )
        raise NameError(message)

    feature_to_replace_index = self.columns.index(feature_to_replace)
    self.columns[feature_to_replace_index] = new_feature

__delitem__(feature_name: str) -> None #

Delete feature.

Source code in src/xpdeep/dataset/schema/schema.py
def __delitem__(self, feature_name: str) -> None:
    """Delete feature."""
    self.remove_feature(feature_name)

FittedSchema(*features: Feature | Metadata, input_size: tuple[int, ...] | None = None, target_size: tuple[int, ...] | None = None) #

A schema which represents the raw data structure: feature types and positions.

FittedSchema constructor.

Methods:

Name Description
from_exposed

Convert ExposedFittedSchema to FittedSchema.

from_bytes

Convert bytes to fitted schema.

Attributes:

Name Type Description
input_size
target_size
as_exposed ExposedFittedSchema

Return exposed fitted schema.

Source code in src/xpdeep/dataset/schema/schema.py
def __init__(
    self,
    *features: Feature | Metadata,
    input_size: tuple[int, ...] | None = None,
    target_size: tuple[int, ...] | None = None,
) -> None:
    """FittedSchema constructor."""
    super().__init__(*features)
    if input_size is not None and target_size is not None:
        self.input_size = input_size
        self.target_size = target_size
    else:
        exposed_fitted_schema = FittedSchema._build_already_fitted(AnalyzedSchema(*features))
        self.input_size = exposed_fitted_schema.input_shape
        self.target_size = exposed_fitted_schema.target_shape

input_size = input_size #

target_size = target_size #

as_exposed: ExposedFittedSchema #

Return exposed fitted schema.

from_exposed(exposed_fitted_schema: ExposedFittedSchema) -> FittedSchema #

Convert ExposedFittedSchema to FittedSchema.

Source code in src/xpdeep/dataset/schema/schema.py
@staticmethod
def from_exposed(exposed_fitted_schema: ExposedFittedSchema) -> "FittedSchema":
    """Convert ExposedFittedSchema to FittedSchema."""
    exposed_features = exposed_fitted_schema.columns
    for feature in exposed_features:
        if not isinstance(feature, ExposedFeature):
            msg = f"Unsupported feature {feature}."
            raise TypeError(msg)
    return FittedSchema(
        *(extract_feature_from_exposed(exposed_feature) for exposed_feature in exposed_features),  # type: ignore[arg-type]
        input_size=exposed_fitted_schema.input_shape,
        target_size=exposed_fitted_schema.target_shape,
    )

from_bytes(fitted_schema_as_bytes: bytes) -> FittedSchema #

Convert bytes to fitted schema.

Source code in src/xpdeep/dataset/schema/schema.py
@staticmethod
def from_bytes(fitted_schema_as_bytes: bytes) -> "FittedSchema":
    """Convert bytes to fitted schema."""
    return FittedSchema.from_exposed(NumpyMsgpackDecoder(ExposedFittedSchema).decode(fitted_schema_as_bytes))