Skip to content

Commit 446dde4

Browse files
Add detections validator (#117)
* Add detections validator * Rename validators for consistency * Rename fixtures and test to highlight annotations vs detection datasets. Add test for bbox detections dataset validation * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Extend validator to check minimum dimensions per data variable too. Extend tests * Improve error message * Ignore code cov in abstract base class * Factor out validators module * Fix manifest * Fix typo in manifest * Remove docs warnings * Update to autodoc defaults new syntax and simplify * Review docstrings --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 48cdd82 commit 446dde4

File tree

19 files changed

+891
-423
lines changed

19 files changed

+891
-423
lines changed

MANIFEST.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@ recursive-exclude examples *
1212
recursive-include docs *
1313

1414
# Include json schemas
15-
recursive-include ethology/io/annotations/json_schemas/schemas *.json
16-
recursive-include ethology/io/annotations/json_schemas/schemas *.md
15+
recursive-include ethology/validators/json_schemas/schemas *.json
16+
recursive-include ethology/validators/json_schemas/schemas *.md

docs/source/_templates/autosummary/class.rst

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33
.. currentmodule:: {{ module }}
44

55
.. autoclass:: {{ objname }}
6-
:members:
7-
:show-inheritance:
8-
:inherited-members:
9-
:exclude-members: Config
10-
6+
{% if objname != 'ValidDataset' %}:members:{% endif %}
7+
{% if objname != 'ValidDataset' %}:inherited-members:{% endif %}
8+
{% if objname == 'ValidBboxAnnotationsDataFrame' %}:exclude-members: Config{% endif %}
119

1210
{% block methods %}
1311
{% set ns = namespace(has_public_methods=false) %}

docs/source/conf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
# Automatically generate stub pages for API
7171
autosummary_generate = True
7272
autosummary_generate_overwrite = False
73-
autodoc_default_flags = ["members", "inherited-members"]
73+
autodoc_default_options = {"show-inheritance": True} # applies to all classes
7474

7575
# Prefix section labels with the document name
7676
autosectionlabel_prefix_document = True
@@ -182,6 +182,10 @@
182182
"pandera": ("https://pandera.readthedocs.io/en/stable/", None),
183183
"movement": ("https://movement.neuroinformatics.dev/latest/", None),
184184
"sklearn": ("https://scikit-learn.org/stable/", None),
185+
"jsonschema": (
186+
"https://python-jsonschema.readthedocs.io/en/stable/",
187+
None,
188+
),
185189
}
186190

187191

ethology/io/annotations/load_bboxes.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@
1010
import xarray as xr
1111
from pandera.typing.pandas import DataFrame
1212

13-
from ethology.io.annotations.validate import (
14-
ValidBboxesDataFrame,
15-
ValidBboxesDataset,
13+
from ethology.validators.annotations import (
14+
ValidBboxAnnotationsDataFrame,
15+
ValidBboxAnnotationsDataset,
1616
ValidCOCO,
1717
ValidVIA,
18-
_check_output,
1918
)
19+
from ethology.validators.utils import _check_output
2020

2121

22-
@_check_output(ValidBboxesDataset)
22+
@_check_output(ValidBboxAnnotationsDataset)
2323
def from_files(
2424
file_paths: Path | str | list[Path | str],
2525
format: Literal["VIA", "COCO"],
@@ -138,7 +138,7 @@ def from_files(
138138

139139

140140
def _get_map_attributes_from_df(
141-
df: DataFrame[ValidBboxesDataFrame],
141+
df: DataFrame[ValidBboxAnnotationsDataFrame],
142142
) -> tuple[dict, dict]:
143143
"""Get the map attributes from the dataframe.
144144
@@ -179,7 +179,7 @@ def _get_map_attributes_from_df(
179179
@pa.check_types
180180
def _df_from_multiple_files(
181181
list_filepaths: list[Path | str], format: Literal["VIA", "COCO"]
182-
) -> DataFrame[ValidBboxesDataFrame]:
182+
) -> DataFrame[ValidBboxAnnotationsDataFrame]:
183183
"""Read annotations from multiple files as a valid intermediate dataframe.
184184
185185
Parameters
@@ -242,7 +242,7 @@ def _df_from_multiple_files(
242242
@pa.check_types
243243
def _df_from_single_file(
244244
file_path: Path | str, format: Literal["VIA", "COCO"]
245-
) -> DataFrame[ValidBboxesDataFrame]:
245+
) -> DataFrame[ValidBboxAnnotationsDataFrame]:
246246
"""Read annotations from a single file as a valid intermediate dataframe.
247247
248248
Parameters
@@ -374,7 +374,7 @@ def _df_rows_from_valid_VIA_file(file_path: Path) -> list[dict]:
374374

375375
else:
376376
supercategory, category, category_id = (
377-
ValidBboxesDataFrame.get_empty_values()[key]
377+
ValidBboxAnnotationsDataFrame.get_empty_values()[key]
378378
for key in ["supercategory", "category", "category_id"]
379379
)
380380

@@ -428,7 +428,7 @@ def _get_image_shape_attr_as_integer(
428428
ValidBboxesDataFrame.get_empty_values().
429429
430430
"""
431-
default_value = ValidBboxesDataFrame.get_empty_values()[
431+
default_value = ValidBboxAnnotationsDataFrame.get_empty_values()[
432432
f"image_{attr_name}"
433433
]
434434
try:
@@ -557,7 +557,9 @@ def _df_rows_from_valid_COCO_file(file_path: Path) -> list[dict]:
557557

558558

559559
@pa.check_types
560-
def _df_to_xarray_ds(df: DataFrame[ValidBboxesDataFrame]) -> xr.Dataset:
560+
def _df_to_xarray_ds(
561+
df: DataFrame[ValidBboxAnnotationsDataFrame],
562+
) -> xr.Dataset:
561563
"""Convert a bounding box annotations dataframe to an xarray dataset.
562564
563565
Parameters
@@ -585,7 +587,7 @@ def _df_to_xarray_ds(df: DataFrame[ValidBboxesDataFrame]) -> xr.Dataset:
585587
586588
"""
587589
# Drop columns if all values in that column are empty
588-
default_values = ValidBboxesDataFrame.get_empty_values()
590+
default_values = ValidBboxAnnotationsDataFrame.get_empty_values()
589591
list_empty_cols = [
590592
col for col in default_values if all(df[col] == default_values[col])
591593
]

ethology/io/annotations/save_bboxes.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,16 @@
1111
import xarray as xr
1212
from pandera.typing.pandas import DataFrame
1313

14-
from ethology.io.annotations.validate import (
15-
ValidBboxesDataFrameCOCO,
16-
ValidBboxesDataset,
14+
from ethology.validators.annotations import (
15+
ValidBboxAnnotationsCOCO,
16+
ValidBboxAnnotationsDataset,
1717
ValidCOCO,
18-
_check_input,
19-
_check_output,
2018
)
19+
from ethology.validators.utils import _check_input, _check_output
2120

2221

23-
@_check_input(validator=ValidBboxesDataset)
24-
@_check_output(validator=ValidCOCO) # check output is ethology importable
22+
@_check_input(validator=ValidBboxAnnotationsDataset)
23+
@_check_output(validator=ValidCOCO) # check output is ethology-importable
2524
def to_COCO_file(dataset: xr.Dataset, output_filepath: str | Path):
2625
"""Save an ``ethology`` bounding box annotations dataset to a COCO file.
2726
@@ -56,11 +55,11 @@ def to_COCO_file(dataset: xr.Dataset, output_filepath: str | Path):
5655
return output_filepath
5756

5857

59-
@_check_input(validator=ValidBboxesDataset)
58+
@_check_input(validator=ValidBboxAnnotationsDataset)
6059
@pa.check_types
6160
def _to_COCO_exportable_df(
6261
ds: xr.Dataset,
63-
) -> DataFrame[ValidBboxesDataFrameCOCO]:
62+
) -> DataFrame[ValidBboxAnnotationsCOCO]:
6463
"""Convert dataset of bounding boxes annotations to a COCO-exportable df.
6564
6665
The returned dataframe is validated using ValidBBoxesDataFrameCOCO.
@@ -98,7 +97,7 @@ def _to_COCO_exportable_df(
9897
return df[cols_to_select]
9998

10099

101-
@_check_input(validator=ValidBboxesDataset)
100+
@_check_input(validator=ValidBboxAnnotationsDataset)
102101
def _get_raw_df_from_ds(ds: xr.Dataset) -> pd.DataFrame:
103102
"""Get preliminary dataframe from a dataset of bounding boxes annotations.
104103
@@ -164,7 +163,7 @@ def _get_raw_df_from_ds(ds: xr.Dataset) -> pd.DataFrame:
164163
@pa.check_types
165164
def _add_COCO_data_to_df(
166165
df: pd.DataFrame, ds_attrs: dict
167-
) -> DataFrame[ValidBboxesDataFrameCOCO]:
166+
) -> DataFrame[ValidBboxAnnotationsCOCO]:
168167
"""Add COCO-required data to preliminary dataframe.
169168
170169
The input dataframe is obtained from a dataset of bounding boxes
@@ -266,7 +265,9 @@ def _add_COCO_data_to_df(
266265

267266

268267
@pa.check_types
269-
def _create_COCO_dict(df: DataFrame[ValidBboxesDataFrameCOCO]) -> dict:
268+
def _create_COCO_dict(
269+
df: DataFrame[ValidBboxAnnotationsCOCO],
270+
) -> dict:
270271
"""Extract COCO dictionary from a COCO-exportable dataframe.
271272
272273
Parameters
@@ -282,7 +283,7 @@ def _create_COCO_dict(df: DataFrame[ValidBboxesDataFrameCOCO]) -> dict:
282283
"""
283284
COCO_dict: dict[str, Any] = {}
284285
map_columns_to_COCO_fields = (
285-
ValidBboxesDataFrameCOCO.map_df_columns_to_COCO_fields()
286+
ValidBboxAnnotationsCOCO.map_df_columns_to_COCO_fields()
286287
)
287288
for sections in ["images", "categories", "annotations"]:
288289
# Extract and rename required columns for this section
Lines changed: 29 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,20 @@
11
"""Validators for annotation files and datasets."""
22

33
import json
4-
from collections.abc import Callable
5-
from functools import wraps
64
from pathlib import Path
75

86
import pandas as pd
97
import pandera.pandas as pa
10-
import xarray as xr
118
from attrs import define, field
129
from pandera.typing import Index
1310

14-
from ethology.io.annotations.json_schemas.utils import (
11+
from ethology.validators.json_schemas.utils import (
1512
_check_file_is_json,
1613
_check_file_matches_schema,
1714
_check_required_keys_in_dict,
1815
_get_default_schema,
1916
)
17+
from ethology.validators.utils import ValidDataset
2018

2119

2220
@define
@@ -227,25 +225,39 @@ def _file_contains_unique_image_IDs(self, attribute, value):
227225

228226

229227
@define
230-
class ValidBboxesDataset:
228+
class ValidBboxAnnotationsDataset(ValidDataset):
231229
"""Class for valid ``ethology`` bounding box annotations datasets.
232230
233-
It checks that the input dataset has:
231+
This class validates that the input dataset:
232+
233+
- is an xarray Dataset,
234+
- has ``image_id``, ``space``, ``id`` as dimensions,
235+
- has ``position`` and ``shape`` as data variables,
236+
- both data variables span at least the dimensions ``image_id``,
237+
``space`` and ``id``.
234238
235-
- ``image_id``, ``space``, ``id`` as dimensions
236-
- ``position`` and ``shape`` as data variables
237239
238240
Attributes
239241
----------
240242
dataset : xarray.Dataset
241243
The xarray dataset to validate.
244+
required_dims : set[str]
245+
The set of required dimension names: ``image_id``, ``space`` and
246+
``id``.
247+
required_data_vars : dict[str, set]
248+
A dictionary mapping data variable names to their required minimum
249+
dimensions:
250+
251+
- ``position`` maps to ``image_id``, ``space`` and ``id``,
252+
- ``shape`` maps to ``image_id``, ``space`` and ``id``.
242253
243254
Raises
244255
------
245256
TypeError
246257
If the input is not an xarray Dataset.
247258
ValueError
248-
If the dataset is missing required data variables or dimensions.
259+
If the dataset is missing required data variables or dimensions,
260+
or if any required dimensions are missing for any data variable.
249261
250262
Notes
251263
-----
@@ -254,46 +266,21 @@ class ValidBboxesDataset:
254266
255267
"""
256268

257-
dataset: xr.Dataset = field()
258-
259-
# Minimum requirements for annotations datasets holding bboxes
269+
# Minimum requirements for a bbox dataset holding detections
260270
required_dims: set = field(
261271
default={"image_id", "space", "id"},
262272
init=False,
263273
)
264-
required_data_vars: set = field(
265-
default={"position", "shape"},
274+
required_data_vars: dict = field(
275+
default={
276+
"position": {"image_id", "space", "id"},
277+
"shape": {"image_id", "space", "id"},
278+
},
266279
init=False,
267280
)
268281

269-
@dataset.validator
270-
def _check_dataset_type(self, attribute, value):
271-
"""Ensure the input is an xarray Dataset."""
272-
if not isinstance(value, xr.Dataset):
273-
raise TypeError(
274-
f"Expected an xarray Dataset, but got {type(value)}."
275-
)
276-
277-
@dataset.validator
278-
def _check_required_data_variables(self, attribute, value):
279-
"""Ensure the dataset has all required data variables."""
280-
missing_vars = self.required_data_vars - set(value.data_vars)
281-
if missing_vars:
282-
raise ValueError(
283-
f"Missing required data variables: {sorted(missing_vars)}"
284-
)
285-
286-
@dataset.validator
287-
def _check_required_dimensions(self, attribute, value):
288-
"""Ensure the dataset has all required dimensions."""
289-
missing_dims = self.required_dims - set(value.dims)
290-
if missing_dims:
291-
raise ValueError(
292-
f"Missing required dimensions: {sorted(missing_dims)}"
293-
)
294-
295282

296-
class ValidBboxesDataFrame(pa.DataFrameModel):
283+
class ValidBboxAnnotationsDataFrame(pa.DataFrameModel):
297284
"""Class for valid bounding boxes intermediate dataframes.
298285
299286
We use this dataframe internally as an intermediate step in the process of
@@ -422,7 +409,7 @@ def get_empty_values() -> dict:
422409
}
423410

424411

425-
class ValidBboxesDataFrameCOCO(pa.DataFrameModel):
412+
class ValidBboxAnnotationsCOCO(pa.DataFrameModel):
426413
"""Class for COCO-exportable bounding box annotations dataframes.
427414
428415
The validation checks the required columns exist and their types are
@@ -573,38 +560,3 @@ def check_idx_and_annotation_id(cls, df: pd.DataFrame) -> bool:
573560
574561
"""
575562
return all(df.index == df["annotation_id"])
576-
577-
578-
def _check_output(validator: type):
579-
"""Return a decorator that validates the output of a function."""
580-
581-
def decorator(function: Callable) -> Callable:
582-
@wraps(function) # to preserve function metadata
583-
def wrapper(*args, **kwargs):
584-
result = function(*args, **kwargs)
585-
validator(result)
586-
return result
587-
588-
return wrapper
589-
590-
return decorator
591-
592-
593-
def _check_input(validator: type, input_index: int = 0):
594-
"""Return a decorator that validates a specific input of a function.
595-
596-
By default, the first input is validated. If the input index is
597-
larger than the number of inputs, no validation is performed.
598-
"""
599-
600-
def decorator(function: Callable) -> Callable:
601-
@wraps(function)
602-
def wrapper(*args, **kwargs):
603-
if len(args) > input_index:
604-
validator(args[input_index])
605-
result = function(*args, **kwargs)
606-
return result
607-
608-
return wrapper
609-
610-
return decorator

0 commit comments

Comments
 (0)