schema

Schemas.

Classes:

Name	Description
`Schema`	Schema definition. Used as base class for AnalyzedSchema and FittedSchema classes.
`AnalyzedSchema`	A schema which represents the raw data structure: feature types and positions.
`FittedSchema`	A schema which represents the raw data structure: feature types and positions.

`Schema(*features: FEATURE_TYPE)` #

Schema definition. Used as base class for AnalyzedSchema and FittedSchema classes.

Initialize the class with the specified features.

Parameters:

Name	Type	Description	Default
`features` #	`_FEATURE_TYPE`	A variable number of features to be included. These features will be stored in the `columns` attribute as a list.	`()`

Attributes:

Name	Type	Description
`columns`	`list[_FEATURE_TYPE]`	A list containing the features provided during initialization.

Methods:

Name	Description
`__getitem__`	Get a feature.
`__repr__`	Represent the schema.

Attributes:

Name	Type	Description
`columns`	`list[FEATURE_TYPE]`

Source code in src/xpdeep/dataset/schema.py

def __init__(self, *features: FEATURE_TYPE) -> None:
    """
    Initialize the class with the specified features.

    Parameters
    ----------
    features : _FEATURE_TYPE
        A variable number of features to be included. These features will be stored
        in the `columns` attribute as a list.

    Attributes
    ----------
    columns : list[_FEATURE_TYPE]
        A list containing the features provided during initialization.
    """
    self.columns: list[FEATURE_TYPE] = list(features)

    # Required for tests only. We need to save schemas as json files to achieve integration tests.
    self._json_api_response: dict[str, object] | None = None

`columns: list[FEATURE_TYPE] = list(features)` #

`getitem(feature_name: str) -> FEATURE_TYPE` #

Get a feature.

Source code in src/xpdeep/dataset/schema.py

def __getitem__(self, feature_name: str) -> FEATURE_TYPE:
    """Get a feature."""
    result = [feature for feature in self.columns if feature.name == feature_name]
    if len(result) > 1:
        message = f"Multiple features with name : {feature_name} in the analyzed schema."
        raise NameError(message)
    if len(result) == 0:
        message = f"No feature with name : {feature_name} in the analyzed schema."
        raise NameError(message)
    return result[0]

`repr() -> str` #

Represent the schema.

Source code in src/xpdeep/dataset/schema.py

def __repr__(self) -> str:
    """Represent the schema."""
    from prettytable import PrettyTable  # noqa: PLC0415

    # Create a single table for both features and metadata
    combined_table = PrettyTable()
    combined_table.title = "Schema Contents"
    combined_table.align = "l"
    combined_table.field_names = ["Type", "Name", "Is Target"]

    # Add feature rows to the table
    for feature in self.columns:
        if not isinstance(feature, IndexMetadata):
            combined_table.add_row([
                feature.feature_type.__class__.__name__
                if isinstance(feature, ExplainableFeature)
                else "BaseFeature",
                feature.name,
                "✅" if feature.is_target else "❌",
            ])
        else:
            combined_table.add_row([
                "IndexMetadata",
                feature.name,
                "",  # Empty for metadata
            ])

    return combined_table.get_string()

`AnalyzedSchema(*features: FEATURE_TYPE)` #

A schema which represents the raw data structure: feature types and positions.

Methods:

Name	Description
`from_model`	Create the client object from api response.
`remove_feature`	Remove the feature with the given name from the schema, and return the removed feature.
`__setitem__`	Set the feature with the given name from the parquet dataset analyzed schema.
`__delitem__`	Delete feature.

Attributes:

Name	Type	Description
`as_fit_schema_pipeline_input_schema`	`list[FeatureInsert \| IndexMetadataInsert \| MetadataInsert]`	Convert to FeatureInsert or IndexMetadataInsert instance.

Source code in src/xpdeep/dataset/schema.py

def __init__(self, *features: FEATURE_TYPE) -> None:
    """
    Initialize the class with the specified features.

    Parameters
    ----------
    features : _FEATURE_TYPE
        A variable number of features to be included. These features will be stored
        in the `columns` attribute as a list.

    Attributes
    ----------
    columns : list[_FEATURE_TYPE]
        A list containing the features provided during initialization.
    """
    self.columns: list[FEATURE_TYPE] = list(features)

    # Required for tests only. We need to save schemas as json files to achieve integration tests.
    self._json_api_response: dict[str, object] | None = None

`as_fit_schema_pipeline_input_schema: list[FeatureInsert | IndexMetadataInsert | MetadataInsert]` #

Convert to FeatureInsert or IndexMetadataInsert instance.

`from_model(json_response: dict[str, object]) -> AnalyzedSchema` #

Create the client object from api response.

Source code in src/xpdeep/dataset/schema.py

@classmethod
def from_model(cls, json_response: dict[str, object]) -> AnalyzedSchema:
    """Create the client object from api response."""
    analyzed_schema = cls(*[
        feature_from_model(feature)
        for feature in json_response["columns"]  # type: ignore[attr-defined]
    ])

    analyzed_schema._json_api_response = deepcopy(json_response)

    return analyzed_schema

`remove_feature(feature_name: str) -> ExplainableFeature | BaseFeature | IndexMetadata` #

Remove the feature with the given name from the schema, and return the removed feature.

Source code in src/xpdeep/dataset/schema.py

def remove_feature(self, feature_name: str) -> ExplainableFeature | BaseFeature | IndexMetadata:
    """Remove the feature with the given name from the schema, and return the removed feature."""
    feature = self[feature_name]
    return self.columns.pop(self.columns.index(feature))

`setitem(feature_name: str, new_feature: ExplainableFeature | BaseFeature) -> None` #

Set the feature with the given name from the parquet dataset analyzed schema.

Source code in src/xpdeep/dataset/schema.py

def __setitem__(self, feature_name: str, new_feature: ExplainableFeature | BaseFeature) -> None:
    """Set the feature with the given name from the parquet dataset analyzed schema."""
    feature_to_replace = self[feature_name]
    if feature_to_replace.name != new_feature.name:
        message = (
            f"The new feature name {new_feature.name} is different than the overwritten feature one. The new "
            f"feature name should be {feature_name}. "
        )
        raise NameError(message)

    feature_to_replace_index = self.columns.index(feature_to_replace)
    self.columns[feature_to_replace_index] = new_feature

`delitem(feature_name: str) -> None` #

Delete feature.

Source code in src/xpdeep/dataset/schema.py

def __delitem__(self, feature_name: str) -> None:
    """Delete feature."""
    self.remove_feature(feature_name)

`FittedSchema(*features: ExplainableFeature | IndexMetadata, input_size: tuple[int, ...], target_size: tuple[int, ...])` #

A schema which represents the raw data structure: feature types and positions.

Initialize the FittedSchema instance.

The FittedSchema may be directly returned after an analyzed step, with AnalyzedParquetDataset.fit but may also be constructed from scratch.

Parameters:

Name	Type	Description	Default
`features` #	`ExplainableFeature \| IndexMetadata`	A feature or metadata object, fitted.	`()`
`input_size` #	`tuple[int, ...]`	The size of the input data (with the first dimension being batch dimension), used by pytorch to serialize the associated models.	required
`target_size` #	`tuple[int, ...]`	The size of the target data (with the first dimension being batch dimension), used by pytorch to serialize the associated models.	required

Methods:

Name	Description
`from_model`	Convert to FittedSchema.
`to_model`	Convert to DatasetArtifactSchemaInsert instance.
`add_augmentation`	When the schema is fitted, augmentation can be added to features.

Attributes:

Name	Type	Description
`input_size`
`target_size`

Source code in src/xpdeep/dataset/schema.py

def __init__(
    self,
    *features: ExplainableFeature | IndexMetadata,
    input_size: tuple[int, ...],
    target_size: tuple[int, ...],
) -> None:
    """
    Initialize the FittedSchema instance.

    The FittedSchema may be directly returned after an analyzed step, with AnalyzedParquetDataset.fit but may
    also be constructed from scratch.

    Parameters
    ----------
    features: ExplainableFeature | IndexMetadata
        A feature or metadata object, fitted.
    input_size: tuple[int, ...]
        The size of the input data (with the first dimension being batch dimension), used by pytorch to serialize
        the associated models.
    target_size: tuple[int, ...]
        The size of the target data (with the first dimension being batch dimension), used by pytorch to serialize
        the associated models.
    """
    super().__init__(*features)

    # If instantiated from scratch and not with from_model, we need to add the index_xp_deep column.
    if not any(isinstance(feature, IndexMetadata) for feature in self.columns):
        self.columns.append(IndexMetadata(name="index_xp_deep"))

    self.input_size = input_size
    self.target_size = target_size

`input_size = input_size` #

`target_size = target_size` #

`from_model(json_response: dict[str, object]) -> FittedSchema` #

Convert to FittedSchema.

Source code in src/xpdeep/dataset/schema.py

@classmethod
def from_model(cls, json_response: dict[str, object]) -> FittedSchema:
    """Convert to FittedSchema."""
    fitted_schema = cls(
        *[feature_from_model(feature) for feature in json_response["columns"]],  # type: ignore[arg-type,attr-defined]
        input_size=json_response["input_shape"],  # type: ignore[arg-type]
        target_size=json_response["target_shape"],  # type: ignore[arg-type]
    )

    fitted_schema._json_api_response = deepcopy(json_response)

    return fitted_schema

`to_model() -> DatasetArtifactSchemaInsert` #

Convert to DatasetArtifactSchemaInsert instance.

Source code in src/xpdeep/dataset/schema.py

def to_model(self) -> DatasetArtifactSchemaInsert:
    """Convert to DatasetArtifactSchemaInsert instance."""
    return DatasetArtifactSchemaInsert(
        columns=[feature.to_model() for feature in self.columns],
        input_shape=list(self.input_size),
        target_shape=list(self.target_size),
    )

`add_augmentation(feature_name: str, augmentation: ImageFeatureAugmentation) -> None` #

When the schema is fitted, augmentation can be added to features.

Source code in src/xpdeep/dataset/schema.py

def add_augmentation(self, feature_name: str, augmentation: ImageFeatureAugmentation) -> None:
    """When the schema is fitted, augmentation can be added to features."""
    feature = self[feature_name]
    if isinstance(feature, IndexMetadata):
        message = "Augmentation can only be defined on features."
        raise ApiError(message)
    feature.feature_augmentation = augmentation

schema

`Schema(*features: FEATURE_TYPE)` #

`features` #

`columns: list[FEATURE_TYPE] = list(features)` #

`getitem(feature_name: str) -> FEATURE_TYPE` #

`repr() -> str` #

`AnalyzedSchema(*features: FEATURE_TYPE)` #

`as_fit_schema_pipeline_input_schema: list[FeatureInsert | IndexMetadataInsert | MetadataInsert]` #

`from_model(json_response: dict[str, object]) -> AnalyzedSchema` #

`remove_feature(feature_name: str) -> ExplainableFeature | BaseFeature | IndexMetadata` #

`setitem(feature_name: str, new_feature: ExplainableFeature | BaseFeature) -> None` #

`delitem(feature_name: str) -> None` #

`FittedSchema(*features: ExplainableFeature | IndexMetadata, input_size: tuple[int, ...], target_size: tuple[int, ...])` #

`features` #

`input_size` #

`target_size` #

`input_size = input_size` #

`target_size = target_size` #

`from_model(json_response: dict[str, object]) -> FittedSchema` #

`to_model() -> DatasetArtifactSchemaInsert` #

`add_augmentation(feature_name: str, augmentation: ImageFeatureAugmentation) -> None` #

schema

Schema(*features: FEATURE_TYPE) #

features #

columns: list[FEATURE_TYPE] = list(features) #

__getitem__(feature_name: str) -> FEATURE_TYPE #

__repr__() -> str #

AnalyzedSchema(*features: FEATURE_TYPE) #

as_fit_schema_pipeline_input_schema: list[FeatureInsert | IndexMetadataInsert | MetadataInsert] #

from_model(json_response: dict[str, object]) -> AnalyzedSchema #

remove_feature(feature_name: str) -> ExplainableFeature | BaseFeature | IndexMetadata #

__setitem__(feature_name: str, new_feature: ExplainableFeature | BaseFeature) -> None #

__delitem__(feature_name: str) -> None #

FittedSchema(*features: ExplainableFeature | IndexMetadata, input_size: tuple[int, ...], target_size: tuple[int, ...]) #

features #

input_size #

target_size #

input_size = input_size #

target_size = target_size #

from_model(json_response: dict[str, object]) -> FittedSchema #

to_model() -> DatasetArtifactSchemaInsert #

add_augmentation(feature_name: str, augmentation: ImageFeatureAugmentation) -> None #

`Schema(*features: FEATURE_TYPE)` #

`features` #

`columns: list[FEATURE_TYPE] = list(features)` #

`getitem(feature_name: str) -> FEATURE_TYPE` #

`repr() -> str` #

`AnalyzedSchema(*features: FEATURE_TYPE)` #

`as_fit_schema_pipeline_input_schema: list[FeatureInsert | IndexMetadataInsert | MetadataInsert]` #

`from_model(json_response: dict[str, object]) -> AnalyzedSchema` #

`remove_feature(feature_name: str) -> ExplainableFeature | BaseFeature | IndexMetadata` #

`setitem(feature_name: str, new_feature: ExplainableFeature | BaseFeature) -> None` #

`delitem(feature_name: str) -> None` #

`FittedSchema(*features: ExplainableFeature | IndexMetadata, input_size: tuple[int, ...], target_size: tuple[int, ...])` #

`features` #

`input_size` #

`target_size` #

`input_size = input_size` #

`target_size = target_size` #

`from_model(json_response: dict[str, object]) -> FittedSchema` #

`to_model() -> DatasetArtifactSchemaInsert` #

`add_augmentation(feature_name: str, augmentation: ImageFeatureAugmentation) -> None` #