Skip to content

criteria

Criteria.

Classes:

Name Description
NumericalCriterion

Choose max and min values to keep.

CategoricalCriterion

Choose which categories to keep.

MultivariateCriterion

Choose samples that got max or min value in the given target_channel, which is interpreted as an array index.

TimeseriesBoxCriterion

Defines a 2D box area that should include aggregation results of chosen times series subpart.

NumericalCriterion(feature: NumericalFeature | Metadata, *, min_: float | None = None, max_: float | None = None) #

Choose max and min values to keep.

Numerical Criterion initialization.

Parameters:

Name Type Description Default

feature #

NumericalFeature | Metadata

The feature on which apply on the criterion.

required

min_ #

int | None

Filter's minimum value.

None

max_ #

int | None

Filter's maximum value.

None
Source code in src/xpdeep/filtering/criteria.py
def __init__(
    self, feature: NumericalFeature | Metadata, *, min_: float | None = None, max_: float | None = None
) -> None:
    """
    Numerical Criterion initialization.

    Parameters
    ----------
    feature : NumericalFeature | Metadata
        The feature on which apply on the criterion.
    min_ : int | None, default None
        Filter's minimum value.
    max_ : int | None, default None
        Filter's maximum value.
    """
    if min_ is None and max_ is None:
        message = "Either `min_` or `max_` must be specified."
        raise ValueError(message)
    if isinstance(feature, Metadata) and feature.as_exposed.feature_type != "xpdeep_index":
        message = "Only Metadata feature with 'xpdeep_index' as feature type are supported."
        raise TypeError(message)

    super().__init__(
        feature.name, NumericalCriterionCreateBodyFeaturetype(feature.as_exposed.feature_type), min_, max_
    )

CategoricalCriterion(feature: CategoricalFeature, *, categories: list[str | int | bool]) #

Choose which categories to keep.

Categorical Criterion initialization.

Parameters:

Name Type Description Default

feature #

CategoricalFeature

The feature on which apply on the criterion.

required

categories #

list[str | int | bool]

List on categories to keep.

required
Source code in src/xpdeep/filtering/criteria.py
def __init__(self, feature: CategoricalFeature, *, categories: list[str | int | bool]) -> None:
    """
    Categorical Criterion initialization.

    Parameters
    ----------
    feature : CategoricalFeature
        The feature on which apply on the criterion.
    categories : list[str | int | bool]
        List on categories to keep.
    """
    for category in categories:
        if category not in feature.categories:
            msg = f"Wrong category value : '{category}"
            raise ValueError(msg)
    # Map to string prior to filter as filter is performed on string only.
    super().__init__(
        feature.name,
        CategoricalCriterionCreateBodyFeaturetype(feature.as_exposed.feature_type),
        categories=[str(category) for category in categories],
    )

MultivariateCriterion(feature: MultivariateNumericalFeature, *, target_channel: int = 0, mode: Literal['min', 'max'] = 'max') #

Choose samples that got max or min value in the given target_channel, which is interpreted as an array index.

Only for 1D arrays.

Multivariate Criterion initialization.

Parameters:

Name Type Description Default

feature #

MultivariateNumericalFeature

The feature on which apply on the criterion.

required

target_channel #

int

An array's index value (starts from 0 to array size), so the resulting samples will have this dimension as their greatest or lowest value, depending on the modeparameter.

1

mode #

Literal['min', 'max']

If max(Default), the filter returns sample which has the defined target_channel as their greatest value. If min, it considers the lowest value.

"max"
Source code in src/xpdeep/filtering/criteria.py
def __init__(
    self,
    feature: MultivariateNumericalFeature,
    *,
    target_channel: int = 0,
    mode: Literal["min", "max"] = "max",
) -> None:
    """
    Multivariate Criterion initialization.

    Parameters
    ----------
    feature : MultivariateNumericalFeature
        The feature on which apply on the criterion.
    target_channel : int, default 1
        An array's index value (starts from 0 to array size),
        so the resulting samples will have this dimension as their greatest
        or lowest value, depending on the `mode`parameter.
    mode : Literal["min", "max"], default "max"
        If `max`(Default), the filter returns sample which has the defined `target_channel` as their greatest value.
        If `min`, it considers the lowest value.
    """
    if mode not in {"min", "max"}:
        message = "Only `min` and `max` values are accepted for `mode` parameter."
        raise ValueError(message)

    super().__init__(
        feature.name,
        MultivariateCriterionCreateBodyFeaturetype(feature.as_exposed.feature_type),
        target_channel,
        MultivariateFilterMode(mode),
    )

TimeseriesBoxCriterion(feature: UnivariateSynchronousTimeSerie | UnivariateAsynchronousTimeSerie | MultivariateSynchronousTimeSerie | MultivariateAsynchronousTimeSerie, *, target_channel: int = 0, min_: float | None = None, max_: float | None = None, start: int | None = None, end: int | None = None, aggregators: list[Literal['min', 'max', 'avg']]) #

Defines a 2D box area that should include aggregation results of chosen times series subpart.

Time series Box Criterion initialization.

Parameters:

Name Type Description Default

feature #

UnivariateSynchronousTimeSerie | UnivariateAsynchronousTimeSerie | MultivariateSynchronousTimeSerie
required

target_channel #

Literal['min', 'max']

Time series dimension to filter.

"max"

min_ #

float | None

The aggregation result of chosen time serie subpart should be greater than this value. Default as None, which means no limit.

None

max_ #

float | None

The aggregation result of chosen time serie subpart should be lower than this value. Default as None, which means no limit.

None

start #

int | None

Array's index from where starts the chosen time serie subpart. Default as None, which means start from index 0. Negative index values are not supported.

None

end #

int | None

Array's index where ends the chosen time serie subpart. Default as None, which means goes to index (last index). Negative index values are not supported.

None

aggregators #

list[Literal['min', 'max', 'avg']]

Used aggregators to compute resulting values that will be projected to verify if they are included in the defined box.

required
Source code in src/xpdeep/filtering/criteria.py
def __init__(  # noqa: PLR0913
    self,
    feature: UnivariateSynchronousTimeSerie
    | UnivariateAsynchronousTimeSerie
    | MultivariateSynchronousTimeSerie
    | MultivariateAsynchronousTimeSerie,
    *,
    target_channel: int = 0,
    min_: float | None = None,
    max_: float | None = None,
    start: int | None = None,
    end: int | None = None,
    aggregators: list[Literal["min", "max", "avg"]],
) -> None:
    """
    Time series Box Criterion initialization.

    Parameters
    ----------
    feature : UnivariateSynchronousTimeSerie | UnivariateAsynchronousTimeSerie | MultivariateSynchronousTimeSerie
    | MultivariateAsynchronousTimeSerie
        The feature on which apply on the criterion.
    target_channel : Literal["min", "max"], default "max"
        Time series dimension to filter.
    min_ : float | None, default None
        The aggregation result of chosen time serie subpart should be greater than this value.
        Default as None, which means no limit.
    max_ : float | None, default None
        The aggregation result of chosen time serie subpart should be lower than this value.
        Default as None, which means no limit.
    start : int | None, default None
        Array's index from where starts the chosen time serie subpart.
        Default as None, which means start from index 0.
        Negative index values are not supported.
    end : int | None, default None
        Array's index where ends the chosen time serie subpart.
        Default as None, which means goes to index (last index).
        Negative index values are not supported.
    aggregators : list[Literal["min", "max", "avg"]]
        Used aggregators to compute resulting values that will be projected to verify if they are included in
        the defined box.
    """
    for aggregator in aggregators:
        if aggregator not in {"min", "max", "avg"}:
            message = "Only `min`, `max` and `avg` values are accepted for elements of `aggregators` parameter."
            raise ValueError(message)

    super().__init__(
        feature.name,
        TimeseriesBoxCriterionCreateBodyFeaturetype(feature.as_exposed.feature_type),
        list({Aggregator(aggregator) for aggregator in aggregators}),
        target_channel,
        min_,
        max_,
        start,
        end,
    )