Skip to content

schema

Schemas.

Classes:

Name Description
Schema

Schema definition. Used as base class for AnalyzedSchema and FittedSchema classes.

AnalyzedSchema

A schema which represents the raw data structure: feature types and positions.

FittedSchema

A schema which represents the raw data structure: feature types and positions.

Schema(*features: FEATURE_TYPE) #

Schema definition. Used as base class for AnalyzedSchema and FittedSchema classes.

Initialize the class with the specified features.

Parameters:

Name Type Description Default

features #

_FEATURE_TYPE

A variable number of features to be included. These features will be stored in the columns attribute as a list.

()

Attributes:

Name Type Description
columns list[_FEATURE_TYPE]

A list containing the features provided during initialization.

Methods:

Name Description
__getitem__

Get a feature.

__repr__

Represent the schema.

Attributes:

Name Type Description
columns list[FEATURE_TYPE]
Source code in src/xpdeep/dataset/schema.py
def __init__(self, *features: FEATURE_TYPE) -> None:
    """
    Initialize the class with the specified features.

    Parameters
    ----------
    features : _FEATURE_TYPE
        A variable number of features to be included. These features will be stored
        in the `columns` attribute as a list.

    Attributes
    ----------
    columns : list[_FEATURE_TYPE]
        A list containing the features provided during initialization.
    """
    self.columns: list[FEATURE_TYPE] = list(features)

    # Required for tests only. We need to save schemas as json files to achieve integration tests.
    self._json_api_response: dict[str, object] | None = None

columns: list[FEATURE_TYPE] = list(features) #

__getitem__(feature_name: str) -> FEATURE_TYPE #

Get a feature.

Source code in src/xpdeep/dataset/schema.py
def __getitem__(self, feature_name: str) -> FEATURE_TYPE:
    """Get a feature."""
    result = [feature for feature in self.columns if feature.name == feature_name]
    if len(result) > 1:
        message = f"Multiple features with name : {feature_name} in the analyzed schema."
        raise NameError(message)
    if len(result) == 0:
        message = f"No feature with name : {feature_name} in the analyzed schema."
        raise NameError(message)
    return result[0]

__repr__() -> str #

Represent the schema.

Source code in src/xpdeep/dataset/schema.py
def __repr__(self) -> str:
    """Represent the schema."""
    from prettytable import PrettyTable  # noqa: PLC0415

    # Create a single table for both features and metadata
    combined_table = PrettyTable()
    combined_table.title = "Schema Contents"
    combined_table.align = "l"
    combined_table.field_names = ["Type", "Name", "Is Target"]

    # Add feature rows to the table
    for feature in self.columns:
        if not isinstance(feature, IndexMetadata):
            combined_table.add_row([
                feature.feature_type.__class__.__name__
                if isinstance(feature, ExplainableFeature)
                else "BaseFeature",
                feature.name,
                "✅" if feature.is_target else "❌",
            ])
        else:
            combined_table.add_row([
                "IndexMetadata",
                feature.name,
                "",  # Empty for metadata
            ])

    return combined_table.get_string()

AnalyzedSchema(*features: FEATURE_TYPE) #

A schema which represents the raw data structure: feature types and positions.

Methods:

Name Description
from_model

Create the client object from api response.

remove_feature

Remove the feature with the given name from the schema, and return the removed feature.

__setitem__

Set the feature with the given name from the parquet dataset analyzed schema.

__delitem__

Delete feature.

Attributes:

Name Type Description
as_fit_schema_pipeline_input_schema list[FeatureInsert | IndexMetadataInsert | MetadataInsert]

Convert to FeatureInsert or IndexMetadataInsert instance.

Source code in src/xpdeep/dataset/schema.py
def __init__(self, *features: FEATURE_TYPE) -> None:
    """
    Initialize the class with the specified features.

    Parameters
    ----------
    features : _FEATURE_TYPE
        A variable number of features to be included. These features will be stored
        in the `columns` attribute as a list.

    Attributes
    ----------
    columns : list[_FEATURE_TYPE]
        A list containing the features provided during initialization.
    """
    self.columns: list[FEATURE_TYPE] = list(features)

    # Required for tests only. We need to save schemas as json files to achieve integration tests.
    self._json_api_response: dict[str, object] | None = None

as_fit_schema_pipeline_input_schema: list[FeatureInsert | IndexMetadataInsert | MetadataInsert] #

Convert to FeatureInsert or IndexMetadataInsert instance.

from_model(json_response: dict[str, object]) -> AnalyzedSchema #

Create the client object from api response.

Source code in src/xpdeep/dataset/schema.py
@classmethod
def from_model(cls, json_response: dict[str, object]) -> AnalyzedSchema:
    """Create the client object from api response."""
    analyzed_schema = cls(*[
        feature_from_model(feature)
        for feature in json_response["columns"]  # type: ignore[attr-defined]
    ])

    analyzed_schema._json_api_response = deepcopy(json_response)

    return analyzed_schema

remove_feature(feature_name: str) -> ExplainableFeature | BaseFeature | IndexMetadata #

Remove the feature with the given name from the schema, and return the removed feature.

Source code in src/xpdeep/dataset/schema.py
def remove_feature(self, feature_name: str) -> ExplainableFeature | BaseFeature | IndexMetadata:
    """Remove the feature with the given name from the schema, and return the removed feature."""
    feature = self[feature_name]
    return self.columns.pop(self.columns.index(feature))

__setitem__(feature_name: str, new_feature: ExplainableFeature | BaseFeature) -> None #

Set the feature with the given name from the parquet dataset analyzed schema.

Source code in src/xpdeep/dataset/schema.py
def __setitem__(self, feature_name: str, new_feature: ExplainableFeature | BaseFeature) -> None:
    """Set the feature with the given name from the parquet dataset analyzed schema."""
    feature_to_replace = self[feature_name]
    if feature_to_replace.name != new_feature.name:
        message = (
            f"The new feature name {new_feature.name} is different than the overwritten feature one. The new "
            f"feature name should be {feature_name}. "
        )
        raise NameError(message)

    feature_to_replace_index = self.columns.index(feature_to_replace)
    self.columns[feature_to_replace_index] = new_feature

__delitem__(feature_name: str) -> None #

Delete feature.

Source code in src/xpdeep/dataset/schema.py
def __delitem__(self, feature_name: str) -> None:
    """Delete feature."""
    self.remove_feature(feature_name)

FittedSchema(*features: ExplainableFeature | IndexMetadata, input_size: tuple[int, ...], target_size: tuple[int, ...]) #

A schema which represents the raw data structure: feature types and positions.

Initialize the FittedSchema instance.

The FittedSchema may be directly returned after an analyzed step, with AnalyzedParquetDataset.fit but may also be constructed from scratch.

Parameters:

Name Type Description Default

features #

ExplainableFeature | IndexMetadata

A feature or metadata object, fitted.

()

input_size #

tuple[int, ...]

The size of the input data (with the first dimension being batch dimension), used by pytorch to serialize the associated models.

required

target_size #

tuple[int, ...]

The size of the target data (with the first dimension being batch dimension), used by pytorch to serialize the associated models.

required

Methods:

Name Description
from_model

Convert to FittedSchema.

to_model

Convert to DatasetArtifactSchemaInsert instance.

add_augmentation

When the schema is fitted, augmentation can be added to features.

Attributes:

Name Type Description
input_size
target_size
Source code in src/xpdeep/dataset/schema.py
def __init__(
    self,
    *features: ExplainableFeature | IndexMetadata,
    input_size: tuple[int, ...],
    target_size: tuple[int, ...],
) -> None:
    """
    Initialize the FittedSchema instance.

    The FittedSchema may be directly returned after an analyzed step, with AnalyzedParquetDataset.fit but may
    also be constructed from scratch.

    Parameters
    ----------
    features: ExplainableFeature | IndexMetadata
        A feature or metadata object, fitted.
    input_size: tuple[int, ...]
        The size of the input data (with the first dimension being batch dimension), used by pytorch to serialize
        the associated models.
    target_size: tuple[int, ...]
        The size of the target data (with the first dimension being batch dimension), used by pytorch to serialize
        the associated models.
    """
    super().__init__(*features)

    # If instantiated from scratch and not with from_model, we need to add the index_xp_deep column.
    if not any(isinstance(feature, IndexMetadata) for feature in self.columns):
        self.columns.append(IndexMetadata(name="index_xp_deep"))

    self.input_size = input_size
    self.target_size = target_size

input_size = input_size #

target_size = target_size #

from_model(json_response: dict[str, object]) -> FittedSchema #

Convert to FittedSchema.

Source code in src/xpdeep/dataset/schema.py
@classmethod
def from_model(cls, json_response: dict[str, object]) -> FittedSchema:
    """Convert to FittedSchema."""
    fitted_schema = cls(
        *[feature_from_model(feature) for feature in json_response["columns"]],  # type: ignore[arg-type,attr-defined]
        input_size=json_response["input_shape"],  # type: ignore[arg-type]
        target_size=json_response["target_shape"],  # type: ignore[arg-type]
    )

    fitted_schema._json_api_response = deepcopy(json_response)

    return fitted_schema

to_model() -> DatasetArtifactSchemaInsert #

Convert to DatasetArtifactSchemaInsert instance.

Source code in src/xpdeep/dataset/schema.py
def to_model(self) -> DatasetArtifactSchemaInsert:
    """Convert to DatasetArtifactSchemaInsert instance."""
    return DatasetArtifactSchemaInsert(
        columns=[feature.to_model() for feature in self.columns],
        input_shape=list(self.input_size),
        target_shape=list(self.target_size),
    )

add_augmentation(feature_name: str, augmentation: ImageFeatureAugmentation) -> None #

When the schema is fitted, augmentation can be added to features.

Source code in src/xpdeep/dataset/schema.py
def add_augmentation(self, feature_name: str, augmentation: ImageFeatureAugmentation) -> None:
    """When the schema is fitted, augmentation can be added to features."""
    feature = self[feature_name]
    if isinstance(feature, IndexMetadata):
        message = "Augmentation can only be defined on features."
        raise ApiError(message)
    feature.feature_augmentation = augmentation