Merge pull request #2006 from roboflow/feature/xyxy_to_mask

SkalskiP · web-flow · commit b02ec93b3012 · 2025-11-05T13:23:13.000+01:00
Feature/xyxy to mask
diff --git a/docs/detection/utils/converters.md b/docs/detection/utils/converters.md
@@ -58,3 +58,9 @@ status: new
 </div>
 
 :::supervision.detection.utils.converters.polygon_to_xyxy
+
+<div class="md-typeset">
+  <h2><a href="#supervision.detection.utils.converters.xyxy_to_mask">xyxy_to_mask</a></h2>
+</div>
+
+:::supervision.detection.utils.converters.xyxy_to_mask
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.27.0rc1"
+version = "0.27.0rc2"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [
diff --git a/supervision/__init__.py b/supervision/__init__.py
@@ -65,6 +65,7 @@
     polygon_to_xyxy,
     xcycwh_to_xyxy,
     xywh_to_xyxy,
+    xyxy_to_mask,
     xyxy_to_polygons,
     xyxy_to_xcycarh,
     xyxy_to_xywh,
@@ -249,6 +250,7 @@
     "tint_image",
     "xcycwh_to_xyxy",
     "xywh_to_xyxy",
+    "xyxy_to_mask",
     "xyxy_to_polygons",
     "xyxy_to_xcycarh",
     "xyxy_to_xywh",
diff --git a/supervision/detection/utils/converters.py b/supervision/detection/utils/converters.py
@@ -229,6 +229,70 @@ def mask_to_xyxy(masks: np.ndarray) -> np.ndarray:
     return xyxy
 
 
+def xyxy_to_mask(boxes: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
+    """
+    Converts a 2D `np.ndarray` of bounding boxes into a 3D `np.ndarray` of bool masks.
+
+    Parameters:
+        boxes (np.ndarray): A 2D `np.ndarray` of shape `(N, 4)`
+            containing bounding boxes `(x_min, y_min, x_max, y_max)`
+        resolution_wh (Tuple[int, int]): A tuple `(width, height)` specifying
+            the resolution of the output masks
+
+    Returns:
+        np.ndarray: A 3D `np.ndarray` of shape `(N, height, width)`
+            containing 2D bool masks for each bounding box
+
+    Examples:
+        ```python
+        import numpy as np
+        import supervision as sv
+
+        boxes = np.array([[0, 0, 2, 2]])
+
+        sv.xyxy_to_mask(boxes, (5, 5))
+        # array([
+        #     [[ True,  True,  True, False, False],
+        #      [ True,  True,  True, False, False],
+        #      [ True,  True,  True, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False, False, False]]
+        # ])
+
+        boxes = np.array([[0, 0, 1, 1], [3, 3, 4, 4]])
+
+        sv.xyxy_to_mask(boxes, (5, 5))
+        # array([
+        #     [[ True,  True, False, False, False],
+        #      [ True,  True, False, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False, False, False]],
+        #
+        #     [[False, False, False, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False,  True,  True],
+        #      [False, False, False,  True,  True]]
+        # ])
+        ```
+    """
+    width, height = resolution_wh
+    n = boxes.shape[0]
+    masks = np.zeros((n, height, width), dtype=bool)
+
+    for i, (x_min, y_min, x_max, y_max) in enumerate(boxes):
+        x_min = max(0, int(x_min))
+        y_min = max(0, int(y_min))
+        x_max = min(width - 1, int(x_max))
+        y_max = min(height - 1, int(y_max))
+
+        if x_max >= x_min and y_max >= y_min:
+            masks[i, y_min : y_max + 1, x_min : x_max + 1] = True
+
+    return masks
+
+
 def mask_to_polygons(mask: np.ndarray) -> list[np.ndarray]:
     """
     Converts a binary mask to a list of polygons.
diff --git a/test/detection/utils/test_converters.py b/test/detection/utils/test_converters.py
@@ -6,6 +6,7 @@
 from supervision.detection.utils.converters import (
     xcycwh_to_xyxy,
     xywh_to_xyxy,
+    xyxy_to_mask,
     xyxy_to_xcycarh,
     xyxy_to_xywh,
 )
@@ -129,3 +130,174 @@ def test_xyxy_to_xcycarh(xyxy: np.ndarray, expected_result: np.ndarray) -> None:
 def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None:
     result = xcycwh_to_xyxy(xcycwh)
     np.testing.assert_array_equal(result, expected_result)
+
+
+@pytest.mark.parametrize(
+    "boxes,resolution_wh,expected",
+    [
+        # 0) Empty input
+        (
+            np.array([], dtype=float).reshape(0, 4),
+            (5, 4),
+            np.array([], dtype=bool).reshape(0, 4, 5),
+        ),
+        # 1) Single pixel box
+        (
+            np.array([[2, 1, 2, 1]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, False, True, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+        # 2) Horizontal line, inclusive bounds
+        (
+            np.array([[1, 2, 3, 2]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, True, True, True, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+        # 3) Vertical line, inclusive bounds
+        (
+            np.array([[3, 0, 3, 2]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, True, False],
+                        [False, False, False, True, False],
+                        [False, False, False, True, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+        # 4) Proper rectangle fill
+        (
+            np.array([[1, 1, 3, 2]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, True, True, True, False],
+                        [False, True, True, True, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+        # 5) Negative coordinates clipped to [0, 0]
+        (
+            np.array([[-2, -1, 1, 1]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [True, True, False, False, False],
+                        [True, True, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+        # 6) Overflow coordinates clipped to width-1 and height-1
+        (
+            np.array([[3, 2, 10, 10]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, True, True],
+                        [False, False, False, True, True],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+        # 7) Invalid box where max < min after ints, mask stays empty
+        (
+            np.array([[3, 2, 1, 4]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+        # 8) Fractional coordinates are floored by int conversion
+        #    (0.2,0.2)-(2.8,1.9) -> (0,0)-(2,1)
+        (
+            np.array([[0.2, 0.2, 2.8, 1.9]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [True, True, True, False, False],
+                        [True, True, True, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+        # 9) Multiple boxes, separate masks
+        (
+            np.array([[0, 0, 1, 0], [2, 1, 4, 3]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    # Box 0: row 0, cols 0..1
+                    [
+                        [True, True, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ],
+                    # Box 1: rows 1..3, cols 2..4
+                    [
+                        [False, False, False, False, False],
+                        [False, False, True, True, True],
+                        [False, False, True, True, True],
+                        [False, False, True, True, True],
+                    ],
+                ],
+                dtype=bool,
+            ),
+        ),
+    ],
+)
+def test_xyxy_to_mask(boxes: np.ndarray, resolution_wh, expected: np.ndarray) -> None:
+    result = xyxy_to_mask(boxes, resolution_wh)
+    assert result.dtype == np.bool_
+    assert result.shape == expected.shape
+    np.testing.assert_array_equal(result, expected)