diff --git a/docs/reference/sql/rs_bandtodim.qmd b/docs/reference/sql/rs_bandtodim.qmd new file mode 100644 index 000000000..4b1ff69b5 --- /dev/null +++ b/docs/reference/sql/rs_bandtodim.qmd @@ -0,0 +1,57 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_BandToDim +description: Collapses all bands into a single band with a new dimension. +kernels: + - returns: raster + args: + - raster + - name: dim_name + type: utf8 + description: Name for the new dimension (e.g., 'time', 'band'). +--- + +## Description + +Collapses all bands in a raster into a single band by introducing a new +dimension. The new dimension is prepended to the existing dimensions, with +size equal to the number of bands. Band data is concatenated in band order. + +All bands must have identical dimension names, shapes, and data types. If +they differ, an error is returned. + +This is the inverse of [RS_DimToBand](rs_dimtoband.qmd). A round-trip +`RS_BandToDim(RS_DimToBand(raster, 'dim'), 'dim')` recovers the original +data layout. + +This is useful for converting GeoTIFF-style multi-band rasters into a single +N-dimensional chunk for export to formats like Zarr. + +## Examples + +```sql +-- Collapse 3 RGB bands [y, x] into 1 band [band=3, y, x] +SELECT RS_BandToDim(raster, 'band'); + +-- Collapse time-step bands back into a time dimension +SELECT RS_BandToDim(raster, 'time'); + +-- Round-trip: split then recombine +SELECT RS_BandToDim(RS_DimToBand(raster, 'time'), 'time'); +``` diff --git a/docs/reference/sql/rs_dimnames.qmd b/docs/reference/sql/rs_dimnames.qmd new file mode 100644 index 000000000..6eb9af93d --- /dev/null +++ b/docs/reference/sql/rs_dimnames.qmd @@ -0,0 +1,56 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_DimNames +description: Returns the ordered list of dimension names for a raster band. +kernels: + - returns: list + args: [raster] + - returns: list + args: + - raster + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +Returns the ordered list of dimension names for a raster band. Standard 2D +rasters have dimensions `["y", "x"]`. N-dimensional rasters may include +additional dimensions such as `time`, `pressure`, `wavelength`, etc. + +The dimension names correspond to the entries in +[RS_Shape](rs_shape.qmd) — the i-th name matches the i-th size. + +When the band index is omitted, returns the dimension names of band 1. If +bands have different dimension names, an error is returned prompting the user +to specify a band index. + +## Examples + +```sql +-- 2D raster: returns ["y", "x"] +SELECT RS_DimNames(raster); + +-- 3D raster with time: returns ["time", "y", "x"] +SELECT RS_DimNames(raster); + +-- Specific band +SELECT RS_DimNames(raster, 2); +``` diff --git a/docs/reference/sql/rs_dimsize.qmd b/docs/reference/sql/rs_dimsize.qmd new file mode 100644 index 000000000..f81f833fb --- /dev/null +++ b/docs/reference/sql/rs_dimsize.qmd @@ -0,0 +1,67 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_DimSize +description: Returns the size of a named dimension in a raster band. +kernels: + - returns: int64 + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to query (e.g., 'x', 'y', 'time'). + - returns: int64 + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to query. + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +Returns the size of a named dimension in a raster band. For example, +`RS_DimSize(raster, 'x')` returns the width and `RS_DimSize(raster, 'time')` +returns the number of time steps. Returns null if the dimension does not exist +in the band. + +This is equivalent to looking up a specific entry in +[RS_Shape](rs_shape.qmd) by name rather than by position. + +When the band index is omitted, returns the dimension size from band 1. If +bands disagree on the size (or some bands are missing the dimension), an error +is returned prompting the user to specify a band index. + +## Examples + +```sql +-- Get the width (x dimension size) +SELECT RS_DimSize(raster, 'x'); + +-- Get the number of time steps +SELECT RS_DimSize(raster, 'time'); + +-- Returns null if the dimension doesn't exist +SELECT RS_DimSize(raster, 'wavelength'); + +-- Query a specific band +SELECT RS_DimSize(raster, 'time', 2); +``` diff --git a/docs/reference/sql/rs_dimtoband.qmd b/docs/reference/sql/rs_dimtoband.qmd new file mode 100644 index 000000000..28182c8dd --- /dev/null +++ b/docs/reference/sql/rs_dimtoband.qmd @@ -0,0 +1,61 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_DimToBand +description: Promotes a within-band dimension into separate bands. +kernels: + - returns: raster + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to promote (e.g., 'wavelength', 'time'). +--- + +## Description + +Promotes a named non-spatial dimension into separate bands. Each index along +the dimension becomes its own band with that dimension removed. This bridges +the gap between the dimension model (where all indices live in one band) and +the band model (where each index is a separate band accessible by number). + +For example, a raster with 1 band of shape `[wavelength=200, y=256, x=256]` +becomes a raster with 200 bands, each of shape `[y=256, x=256]`. Standard +band math functions like [RS_NormalizedDifference](rs_normalizeddifference.qmd) +can then operate on individual wavelength bands by index. + +Bands that do not contain the named dimension are passed through unchanged. + +The spatial dimensions (`x_dim` and `y_dim`) cannot be promoted. + +The inverse operation is [RS_BandToDim](rs_bandtodim.qmd). + +## Examples + +```sql +-- Promote wavelength into separate bands for band math +SELECT RS_DimToBand(raster, 'wavelength'); + +-- Compute NDVI from specific wavelength bands +SELECT RS_NormalizedDifference( + RS_DimToBand(raster, 'wavelength'), 77, 54 +); + +-- Promote time steps into bands +SELECT RS_DimToBand(raster, 'time'); +``` diff --git a/docs/reference/sql/rs_numdimensions.qmd b/docs/reference/sql/rs_numdimensions.qmd new file mode 100644 index 000000000..f92a96917 --- /dev/null +++ b/docs/reference/sql/rs_numdimensions.qmd @@ -0,0 +1,51 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_NumDimensions +description: Returns the number of dimensions in a raster band. +kernels: + - returns: int32 + args: [raster] + - returns: int32 + args: + - raster + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +Returns the number of dimensions in a raster band. A standard 2D raster has 2 +dimensions (`y` and `x`). N-dimensional rasters (e.g., from Zarr or NetCDF +sources) may have additional dimensions such as `time`, `pressure`, or +`wavelength`. + +When the band index is omitted, returns the dimensionality of band 1. If +bands have different numbers of dimensions, an error is returned prompting +the user to specify a band index. + +## Examples + +```sql +-- 2D raster: returns 2 +SELECT RS_NumDimensions(raster); + +-- Specific band +SELECT RS_NumDimensions(raster, 2); +``` diff --git a/docs/reference/sql/rs_shape.qmd b/docs/reference/sql/rs_shape.qmd new file mode 100644 index 000000000..733d1df6f --- /dev/null +++ b/docs/reference/sql/rs_shape.qmd @@ -0,0 +1,57 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_Shape +description: Returns the shape (size of each dimension) of a raster band. +kernels: + - returns: list + args: [raster] + - returns: list + args: + - raster + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +Returns the shape of a raster band as a list of dimension sizes. The entries +correspond to the dimension names returned by +[RS_DimNames](rs_dimnames.qmd). + +For a standard 2D raster this returns `[height, width]`. For an N-dimensional +raster with a time dimension it might return `[12, 256, 256]` meaning 12 time +steps at 256x256 spatial resolution. + +When the band index is omitted, returns the shape of band 1. If bands have +different shapes, an error is returned prompting the user to specify a band +index. + +## Examples + +```sql +-- 2D raster: returns [256, 256] +SELECT RS_Shape(raster); + +-- 3D raster with time: returns [12, 256, 256] +SELECT RS_Shape(raster); + +-- Specific band +SELECT RS_Shape(raster, 2); +``` diff --git a/docs/reference/sql/rs_slice.qmd b/docs/reference/sql/rs_slice.qmd new file mode 100644 index 000000000..a110b1526 --- /dev/null +++ b/docs/reference/sql/rs_slice.qmd @@ -0,0 +1,56 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_Slice +description: Selects a single index along a dimension, removing that dimension from the output. +kernels: + - returns: raster + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to slice (e.g., 'time'). + - name: index + type: int + description: Zero-based index along the dimension. +--- + +## Description + +Extracts a single slice along a named non-spatial dimension, removing that +dimension from every band in the output raster. For example, slicing a raster +with shape `[time=12, y=256, x=256]` on `'time'` at index 5 produces a 2D +raster with shape `[y=256, x=256]` containing the data at time step 5. + +The spatial dimensions (`x_dim` and `y_dim`) cannot be sliced — use +[RS_Clip](rs_clip.qmd) for spatial subsetting. + +Returns an error if the dimension does not exist or the index is out of range. + +See also [RS_SliceRange](rs_slicerange.qmd) to extract a range of indices +while keeping the dimension. + +## Examples + +```sql +-- Extract time step 5 from a [time=12, y, x] raster +SELECT RS_Slice(raster, 'time', 5); + +-- Extract the first pressure level +SELECT RS_Slice(raster, 'pressure', 0); +``` diff --git a/docs/reference/sql/rs_slicerange.qmd b/docs/reference/sql/rs_slicerange.qmd new file mode 100644 index 000000000..01ae5da12 --- /dev/null +++ b/docs/reference/sql/rs_slicerange.qmd @@ -0,0 +1,60 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_SliceRange +description: Narrows a dimension to a half-open range, keeping the dimension with reduced size. +kernels: + - returns: raster + args: + - raster + - name: dim_name + type: utf8 + description: Name of the dimension to narrow (e.g., 'time'). + - name: start + type: int + description: Start index (inclusive, zero-based). + - name: end + type: int + description: End index (exclusive). +--- + +## Description + +Narrows a named non-spatial dimension to the half-open range `[start, end)`, +keeping that dimension in the output with reduced size. For example, +narrowing a raster with shape `[time=12, y=256, x=256]` on `'time'` with +range `[2, 7)` produces a raster with shape `[time=5, y=256, x=256]`. + +The spatial dimensions (`x_dim` and `y_dim`) cannot be narrowed — use +[RS_Clip](rs_clip.qmd) for spatial subsetting. + +Returns an error if the dimension does not exist, `start >= end`, or the +range is out of bounds. + +See also [RS_Slice](rs_slice.qmd) to extract a single index and remove the +dimension entirely. + +## Examples + +```sql +-- Extract time steps 0 through 4 from a [time=12, y, x] raster +SELECT RS_SliceRange(raster, 'time', 0, 5); + +-- Extract a range of pressure levels +SELECT RS_SliceRange(raster, 'pressure', 2, 8); +``` diff --git a/rust/sedona-raster-functions/src/executor.rs b/rust/sedona-raster-functions/src/executor.rs index 756527212..2ba4808ef 100644 --- a/rust/sedona-raster-functions/src/executor.rs +++ b/rust/sedona-raster-functions/src/executor.rs @@ -359,13 +359,16 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { arr0.len() ); } + + // Hoist the RasterStructArray so its lifetime covers the loop. + let scalar_arr1; let r1 = match sv1 { ScalarValue::Struct(arc_struct) => { - let arr1 = RasterStructArray::new(arc_struct.as_ref()); - if arr1.is_null(0) { + scalar_arr1 = RasterStructArray::new(arc_struct.as_ref()); + if scalar_arr1.is_null(0) { None } else { - Some(arr1.get(0)?) + Some(scalar_arr1.get(0)?) } } ScalarValue::Null => None, @@ -396,13 +399,16 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { arr1.len() ); } + + // Hoist the RasterStructArray so its lifetime covers the loop. + let scalar_arr0; let r0 = match sv0 { ScalarValue::Struct(arc_struct) => { - let arr0 = RasterStructArray::new(arc_struct.as_ref()); - if arr0.is_null(0) { + scalar_arr0 = RasterStructArray::new(arc_struct.as_ref()); + if scalar_arr0.is_null(0) { None } else { - Some(arr0.get(0)?) + Some(scalar_arr0.get(0)?) } } ScalarValue::Null => None, @@ -422,13 +428,15 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { Ok(()) } (ColumnarValue::Scalar(sv0), ColumnarValue::Scalar(sv1)) => { + // Hoist both RasterStructArrays so their lifetimes cover the loop. + let scalar_arr0; let r0 = match sv0 { ScalarValue::Struct(arc_struct) => { - let arr0 = RasterStructArray::new(arc_struct.as_ref()); - if arr0.is_null(0) { + scalar_arr0 = RasterStructArray::new(arc_struct.as_ref()); + if scalar_arr0.is_null(0) { None } else { - Some(arr0.get(0)?) + Some(scalar_arr0.get(0)?) } } ScalarValue::Null => None, @@ -436,13 +444,14 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { return sedona_internal_err!("Expected Struct scalar for raster"); } }; + let scalar_arr1; let r1 = match sv1 { ScalarValue::Struct(arc_struct) => { - let arr1 = RasterStructArray::new(arc_struct.as_ref()); - if arr1.is_null(0) { + scalar_arr1 = RasterStructArray::new(arc_struct.as_ref()); + if scalar_arr1.is_null(0) { None } else { - Some(arr1.get(0)?) + Some(scalar_arr1.get(0)?) } } ScalarValue::Null => None, @@ -725,7 +734,7 @@ mod tests { match raster_opt { None => builder.append_null(), Some(raster) => { - let width = raster.metadata().width(); + let width = raster.width().unwrap(); builder.append_value(width); } } @@ -767,7 +776,7 @@ mod tests { match raster_opt { None => builder.append_null(), Some(raster) => { - let width = raster.metadata().width(); + let width = raster.width().unwrap(); builder.append_value(width); } } @@ -804,7 +813,7 @@ mod tests { match raster_opt { None => builder.append_null(), Some(raster) => { - let width = raster.metadata().width(); + let width = raster.width().unwrap(); builder.append_value(width); } } diff --git a/rust/sedona-raster-functions/src/lib.rs b/rust/sedona-raster-functions/src/lib.rs index 592a0b4c3..bb6f9536b 100644 --- a/rust/sedona-raster-functions/src/lib.rs +++ b/rust/sedona-raster-functions/src/lib.rs @@ -21,6 +21,8 @@ pub mod register; pub mod rs_band_accessors; pub mod rs_bandpath; pub mod rs_convexhull; +pub mod rs_dim_band; +pub mod rs_dimensions; pub mod rs_envelope; pub mod rs_example; pub mod rs_georeference; @@ -30,6 +32,7 @@ pub mod rs_pixel_functions; pub mod rs_rastercoordinate; pub mod rs_setsrid; pub mod rs_size; +pub mod rs_slice; pub mod rs_spatial_predicates; pub mod rs_srid; pub mod rs_worldcoordinate; diff --git a/rust/sedona-raster-functions/src/register.rs b/rust/sedona-raster-functions/src/register.rs index e86fe8e98..4df5e2ca1 100644 --- a/rust/sedona-raster-functions/src/register.rs +++ b/rust/sedona-raster-functions/src/register.rs @@ -42,6 +42,12 @@ pub fn default_function_set() -> FunctionSet { crate::rs_band_accessors::rs_bandnodatavalue_udf, crate::rs_bandpath::rs_bandpath_udf, crate::rs_convexhull::rs_convexhull_udf, + crate::rs_dim_band::rs_dimtoband_udf, + crate::rs_dim_band::rs_bandtodim_udf, + crate::rs_dimensions::rs_numdimensions_udf, + crate::rs_dimensions::rs_dimnames_udf, + crate::rs_dimensions::rs_dimsize_udf, + crate::rs_dimensions::rs_shape_udf, crate::rs_envelope::rs_envelope_udf, crate::rs_example::rs_example_udf, crate::rs_georeference::rs_georeference_udf, @@ -61,6 +67,8 @@ pub fn default_function_set() -> FunctionSet { crate::rs_rastercoordinate::rs_worldtorastercoordy_udf, crate::rs_size::rs_height_udf, crate::rs_size::rs_width_udf, + crate::rs_slice::rs_slice_udf, + crate::rs_slice::rs_slicerange_udf, crate::rs_setsrid::rs_set_crs_udf, crate::rs_setsrid::rs_set_srid_udf, crate::rs_srid::rs_crs_udf, diff --git a/rust/sedona-raster-functions/src/rs_band_accessors.rs b/rust/sedona-raster-functions/src/rs_band_accessors.rs index ee1a308e1..463dc4749 100644 --- a/rust/sedona-raster-functions/src/rs_band_accessors.rs +++ b/rust/sedona-raster-functions/src/rs_band_accessors.rs @@ -120,13 +120,18 @@ fn get_pixel_type( Ok(()) } Some(raster) => { - let num_bands = raster.bands().len(); + let num_bands = raster.num_bands(); if band_index < 1 || band_index > num_bands as i32 { builder.append_null(); return Ok(()); } - let band = raster.bands().band(band_index as usize)?; - let dt = band.metadata().data_type()?; + let band = raster.band((band_index - 1) as usize).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "Band index {} out of range", + band_index + )) + })?; + let dt = band.data_type(); builder.append_value(dt.pixel_type_name()); Ok(()) } @@ -224,14 +229,21 @@ fn get_nodata_value( Ok(()) } Some(raster) => { - let num_bands = raster.bands().len(); + let num_bands = raster.num_bands(); if band_index < 1 || band_index > num_bands as i32 { builder.append_null(); return Ok(()); } - let band = raster.bands().band(band_index as usize)?; - let band_meta = band.metadata(); - match band_meta.nodata_value_as_f64()? { + let band = raster.band((band_index - 1) as usize).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "Band index {} out of range", + band_index + )) + })?; + match band + .nodata_as_f64() + .map_err(datafusion_common::DataFusionError::from)? + { None => builder.append_null(), Some(val) => builder.append_value(val), } @@ -246,30 +258,27 @@ mod tests { use arrow_array::{Array, Float64Array, Int32Array, Int64Array, StringArray, StructArray}; use datafusion_expr::ScalarUDF; use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; use sedona_schema::datatypes::RASTER; - use sedona_schema::raster::{BandDataType, StorageType}; + use sedona_schema::raster::BandDataType; use sedona_testing::compare::assert_array_equal; use sedona_testing::rasters::generate_test_rasters; use sedona_testing::testers::ScalarUdfTester; - /// Build a single-row raster StructArray with custom metadata and band metadata. + /// Build a single-row raster StructArray with custom parameters. fn build_custom_raster( - meta: &RasterMetadata, - band_meta: &BandMetadata, + width: u64, + height: u64, + data_type: BandDataType, + nodata: Option<&[u8]>, data: &[u8], crs: Option<&str>, ) -> StructArray { let mut builder = RasterBuilder::new(1); - builder.start_raster(meta, crs).expect("start raster"); builder - .start_band(BandMetadata { - datatype: band_meta.datatype, - nodata_value: band_meta.nodata_value.clone(), - storage_type: band_meta.storage_type, - outdb_url: band_meta.outdb_url.clone(), - outdb_band_id: band_meta.outdb_band_id, - }) + .start_raster_2d(width, height, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, crs) + .expect("start raster"); + builder + .start_band_2d(data_type, nodata) .expect("start band"); builder.band_data_writer().append_value(data); builder.finish_band().expect("finish band"); @@ -401,25 +410,9 @@ mod tests { #[test] fn udf_bandnodatavalue_no_nodata() { // Create a raster without nodata - let meta = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - let band_meta = BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }; let data = vec![1u8, 2, 3, 4]; - let rasters = build_custom_raster(&meta, &band_meta, &data, Some("OGC:CRS84")); + let rasters = + build_custom_raster(2, 2, BandDataType::UInt8, None, &data, Some("OGC:CRS84")); let udf: ScalarUDF = rs_bandnodatavalue_udf().into(); let tester = ScalarUdfTester::new(udf, vec![RASTER]); diff --git a/rust/sedona-raster-functions/src/rs_bandpath.rs b/rust/sedona-raster-functions/src/rs_bandpath.rs index 35cfe9a07..968d3c9d6 100644 --- a/rust/sedona-raster-functions/src/rs_bandpath.rs +++ b/rust/sedona-raster-functions/src/rs_bandpath.rs @@ -24,7 +24,6 @@ use datafusion_common::error::Result; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_raster::traits::RasterRef; -use sedona_schema::raster::StorageType; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; /// RS_BandPath() scalar UDF implementation @@ -124,22 +123,23 @@ fn get_band_path( match raster_opt { None => builder.append_null(), Some(raster) => { - let bands = raster.bands(); - let num_bands = bands.len() as i32; + let num_bands = raster.num_bands() as i32; if band_index < 1 || band_index > num_bands { builder.append_null(); - } else { - let band = bands.band(band_index as usize)?; - let band_metadata = band.metadata(); - - if band_metadata.storage_type()? == StorageType::OutDbRef { - match band_metadata.outdb_url() { - Some(url) => builder.append_value(url), - None => builder.append_null(), + } else if let Some(band) = raster.band((band_index - 1) as usize) { + match band.outdb_uri() { + Some(uri) => { + // Return just the path portion, stripping the internal + // scheme prefix and fragment from the outdb_uri. + let path = sedona_raster::outdb_uri::parse_outdb_uri(uri) + .map(|parsed| parsed.path) + .unwrap_or(uri); + builder.append_value(path); } - } else { - builder.append_null() + None => builder.append_null(), } + } else { + builder.append_null(); } } } @@ -225,11 +225,11 @@ mod tests { .downcast_ref::() .expect("Expected StringArray"); - // Raster 0, band 1: OutDbRef -> URL + // Raster 0, band 1: OutDbRef → URI assert_eq!(string_array.value(0), "s3://bucket/raster_0.tif"); - // Raster 1: null raster -> null + // Raster 1: null raster → null assert!(string_array.is_null(1)); - // Raster 2, band 2: OutDbRef -> URL + // Raster 2, band 2: OutDbRef → URI assert_eq!(string_array.value(2), "s3://bucket/raster_2.tif"); } @@ -258,37 +258,28 @@ mod tests { /// Build a raster array with out-db bands for testing RS_BandPath. /// Returns a StructArray with 3 rasters: - /// [0] OutDbRef band with URL "s3://bucket/raster_0.tif" + /// [0] OutDbRef band with URI "geotiff://s3://bucket/raster_0.tif#band=1" /// [1] null raster - /// [2] Two bands: InDb band 1, OutDbRef band 2 with URL "s3://bucket/raster_2.tif" + /// [2] Two bands: InDb band 1, OutDbRef band 2 with URI "geotiff://s3://bucket/raster_2.tif#band=3" fn build_outdb_rasters() -> arrow_array::StructArray { use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; - use sedona_schema::raster::{BandDataType, StorageType}; - - let metadata = RasterMetadata { - width: 4, - height: 4, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + use sedona_schema::raster::BandDataType; let mut builder = RasterBuilder::new(3); // Raster 0: single OutDbRef band - builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); builder - .start_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::OutDbRef, - datatype: BandDataType::Float32, - outdb_url: Some("s3://bucket/raster_0.tif".to_string()), - outdb_band_id: Some(1), - }) + .start_raster_2d(4, 4, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")) + .unwrap(); + builder + .start_band( + None, + &["y", "x"], + &[4, 4], + BandDataType::Float32, + None, + Some("geotiff://s3://bucket/raster_0.tif#band=1"), + ) .unwrap(); builder.band_data_writer().append_value([]); builder.finish_band().unwrap(); @@ -298,26 +289,21 @@ mod tests { builder.append_null().unwrap(); // Raster 2: two bands — InDb (band 1) + OutDbRef (band 2) - builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); builder - .start_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }) + .start_raster_2d(4, 4, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")) .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); builder.band_data_writer().append_value([0u8; 16]); builder.finish_band().unwrap(); builder - .start_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::OutDbRef, - datatype: BandDataType::Float32, - outdb_url: Some("s3://bucket/raster_2.tif".to_string()), - outdb_band_id: Some(3), - }) + .start_band( + None, + &["y", "x"], + &[4, 4], + BandDataType::Float32, + None, + Some("geotiff://s3://bucket/raster_2.tif#band=3"), + ) .unwrap(); builder.band_data_writer().append_value([]); builder.finish_band().unwrap(); @@ -339,8 +325,7 @@ mod tests { .downcast_ref::() .expect("Expected StringArray"); - // Raster 0: OutDbRef band 1 → returns URL - assert!(!string_array.is_null(0)); + // Raster 0: OutDbRef band → URI assert_eq!(string_array.value(0), "s3://bucket/raster_0.tif"); // Raster 1: null raster → null assert!(string_array.is_null(1)); @@ -365,11 +350,11 @@ mod tests { .downcast_ref::() .expect("Expected StringArray"); - // Raster 0, band 1: OutDbRef → URL + // Raster 0, band 1: OutDbRef → URI assert_eq!(string_array.value(0), "s3://bucket/raster_0.tif"); // Raster 1: null raster → null assert!(string_array.is_null(1)); - // Raster 2, band 2: OutDbRef → URL + // Raster 2, band 2: OutDbRef → URI assert_eq!(string_array.value(2), "s3://bucket/raster_2.tif"); } diff --git a/rust/sedona-raster-functions/src/rs_convexhull.rs b/rust/sedona-raster-functions/src/rs_convexhull.rs index e124e3e88..1fafc83fa 100644 --- a/rust/sedona-raster-functions/src/rs_convexhull.rs +++ b/rust/sedona-raster-functions/src/rs_convexhull.rs @@ -107,8 +107,18 @@ impl SedonaScalarKernel for RsConvexHull { /// of the raster in world coordinates. Due to skew/rotation in the affine /// transformation, each corner must be computed individually. fn write_convexhull_wkb(raster: &dyn RasterRef, out: &mut impl std::io::Write) -> Result<()> { - let width = raster.metadata().width() as i64; - let height = raster.metadata().height() as i64; + let Some(width) = raster.width() else { + return Err(DataFusionError::Execution( + "Raster has no bands; cannot determine width".into(), + )); + }; + let Some(height) = raster.height() else { + return Err(DataFusionError::Execution( + "Raster has no bands; cannot determine height".into(), + )); + }; + let width = width as i64; + let height = height as i64; // Compute the four corners in pixel coordinates: // Upper-left (0, 0), Upper-right (width, 0), Lower-right (width, height), Lower-left (0, height) diff --git a/rust/sedona-raster-functions/src/rs_dim_band.rs b/rust/sedona-raster-functions/src/rs_dim_band.rs new file mode 100644 index 000000000..b57e083ed --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_dim_band.rs @@ -0,0 +1,578 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_schema::DataType; +use datafusion_common::cast::as_string_array; +use datafusion_common::error::Result; +use datafusion_common::exec_err; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::builder::RasterBuilder; +use sedona_raster::traits::RasterRef; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::matchers::ArgMatcher; + +use crate::executor::RasterExecutor; +use crate::rs_slice::extract_slice; + +// =========================================================================== +// RS_DimToBand +// =========================================================================== + +/// RS_DimToBand(raster, dim_name) -> Raster +/// +/// Expands each band that has the named dimension into multiple bands +/// (one per index along that dimension), removing that dimension from each. +/// Bands that do not have the named dimension are passed through unchanged. +/// Spatial dimensions cannot be expanded. +pub fn rs_dimtoband_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_dimtoband", + vec![Arc::new(RsDimToBand {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsDimToBand {} + +impl SedonaScalarKernel for RsDimToBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_string()], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let mut new_builder = RasterBuilder::new(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + + executor.execute_raster_void(|_i, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + + match (raster_opt, dim_name) { + (None, _) | (_, None) => { + new_builder.append_null()?; + Ok(()) + } + (Some(raster), Some(name)) => { + if name == raster.x_dim() || name == raster.y_dim() { + return exec_err!("RS_DimToBand: cannot expand spatial dimension '{name}'"); + } + + let t: [f64; 6] = raster.transform().try_into().unwrap(); + new_builder.start_raster(&t, raster.x_dim(), raster.y_dim(), raster.crs())?; + + for band_idx in 0..raster.num_bands() { + let band = raster.band(band_idx).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "RS_DimToBand: band {band_idx} not found" + )) + })?; + + let maybe_dim_idx = band.dim_index(name); + match maybe_dim_idx { + None => { + // Band doesn't have this dimension -- pass through + let dim_names = band.dim_names(); + let dim_name_refs: Vec<&str> = dim_names.to_vec(); + let band_name = raster.band_name(band_idx); + new_builder.start_band( + band_name, + &dim_name_refs, + band.shape(), + band.data_type(), + band.nodata(), + None, + )?; + let data = band.contiguous_data()?; + new_builder.band_data_writer().append_value(&data); + new_builder.finish_band()?; + } + Some(dim_idx) => { + let dim_size = band.shape()[dim_idx]; + let new_dim_names: Vec<&str> = band + .dim_names() + .into_iter() + .enumerate() + .filter(|&(i, _)| i != dim_idx) + .map(|(_, n)| n) + .collect(); + let new_shape: Vec = band + .shape() + .iter() + .enumerate() + .filter(|&(i, _)| i != dim_idx) + .map(|(_, &s)| s) + .collect(); + + let orig_band_name = raster.band_name(band_idx); + + for idx in 0..dim_size { + let sliced_data = + extract_slice(band.as_ref(), dim_idx, idx, 1)?; + + let new_band_name = + orig_band_name.map(|n| format!("{n}_{name}_{idx}")); + let new_dim_name_refs: Vec<&str> = new_dim_names.to_vec(); + new_builder.start_band( + new_band_name.as_deref(), + &new_dim_name_refs, + &new_shape, + band.data_type(), + band.nodata(), + None, + )?; + new_builder.band_data_writer().append_value(&sliced_data); + new_builder.finish_band()?; + } + } + } + } + + new_builder.finish_raster()?; + Ok(()) + } + } + })?; + + executor.finish(Arc::new(new_builder.finish()?)) + } +} + +// =========================================================================== +// RS_BandToDim +// =========================================================================== + +/// RS_BandToDim(raster, dim_name) -> Raster +/// +/// Merges all bands into a single band by prepending a new dimension with +/// the given name. All bands must have identical dim_names, shape, and +/// data_type. The data from each band is concatenated in order. +pub fn rs_bandtodim_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_bandtodim", + vec![Arc::new(RsBandToDim {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsBandToDim {} + +impl SedonaScalarKernel for RsBandToDim { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_string()], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let mut new_builder = RasterBuilder::new(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + + executor.execute_raster_void(|_i, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + + match (raster_opt, dim_name) { + (None, _) | (_, None) => { + new_builder.append_null()?; + Ok(()) + } + (Some(raster), Some(name)) => { + let num_bands = raster.num_bands(); + if num_bands == 0 { + return exec_err!("RS_BandToDim: raster has no bands"); + } + + // Get reference band properties from band 0 + let band0 = raster.band(0).ok_or_else(|| { + datafusion_common::DataFusionError::Internal( + "RS_BandToDim: band 0 not found".to_string(), + ) + })?; + let ref_dim_names = band0.dim_names(); + let ref_shape = band0.shape().to_vec(); + let ref_data_type = band0.data_type(); + + // Validate all bands match + for i in 1..num_bands { + let band = raster.band(i).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "RS_BandToDim: band {i} not found" + )) + })?; + if band.dim_names() != ref_dim_names { + return exec_err!( + "RS_BandToDim: band {i} has different dim_names than band 0" + ); + } + if band.shape() != ref_shape.as_slice() { + return exec_err!( + "RS_BandToDim: band {i} has different shape than band 0" + ); + } + if band.data_type() != ref_data_type { + return exec_err!( + "RS_BandToDim: band {i} has different data_type than band 0" + ); + } + } + + // Build new dim_names: [new_dim_name] + original_dim_names + let mut new_dim_names: Vec<&str> = Vec::with_capacity(ref_dim_names.len() + 1); + new_dim_names.push(name); + new_dim_names.extend(ref_dim_names.iter()); + + // Build new shape: [num_bands] + original_shape + let mut new_shape: Vec = Vec::with_capacity(ref_shape.len() + 1); + new_shape.push(num_bands as u64); + new_shape.extend_from_slice(&ref_shape); + + // Concatenate all band data + let mut concat_data = Vec::new(); + for i in 0..num_bands { + let band = raster.band(i).unwrap(); + let data = band.contiguous_data()?; + concat_data.extend_from_slice(&data); + } + + let nodata = band0.nodata(); + + let t: [f64; 6] = raster.transform().try_into().unwrap(); + new_builder.start_raster(&t, raster.x_dim(), raster.y_dim(), raster.crs())?; + new_builder.start_band( + None, + &new_dim_names, + &new_shape, + ref_data_type, + nodata, + None, + )?; + new_builder.band_data_writer().append_value(&concat_data); + new_builder.finish_band()?; + new_builder.finish_raster()?; + + Ok(()) + } + } + })?; + + executor.finish(Arc::new(new_builder.finish()?)) + } +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::StructArray; + use arrow_schema::DataType; + use datafusion_common::ScalarValue; + use datafusion_expr::ScalarUDF; + use sedona_raster::array::RasterStructArray; + use sedona_raster::builder::RasterBuilder; + use sedona_raster::traits::RasterRef; + use sedona_schema::datatypes::RASTER; + use sedona_schema::raster::BandDataType; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; + + /// Build a single-row 3D raster with 1 band, shape [time, y, x], + /// and sequential UInt8 data. + fn build_3d_raster_sequential(time: u64, height: u64, width: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + builder + .start_band( + Some("temp"), + &["time", "y", "x"], + &[time, height, width], + BandDataType::UInt8, + None, + None, + ) + .unwrap(); + let total = (time * height * width) as usize; + let data: Vec = (0..total).map(|i| i as u8).collect(); + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Build a single-row 2D raster with N bands, each [y, x], with + /// sequential data starting at `band_idx * y * x`. + fn build_multi_band_2d(num_bands: usize, height: u64, width: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + let pixels = (height * width) as usize; + for b in 0..num_bands { + builder + .start_band( + None, + &["y", "x"], + &[height, width], + BandDataType::UInt8, + None, + None, + ) + .unwrap(); + let offset = b * pixels; + let data: Vec = (offset..offset + pixels).map(|i| i as u8).collect(); + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + } + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + // ----------------------------------------------------------------------- + // RS_DimToBand + // ----------------------------------------------------------------------- + + #[test] + fn dimtoband_3d_to_bands() { + let udf: ScalarUDF = rs_dimtoband_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + // 1 band with shape [time=3, y=2, x=2], sequential data 0..12 + let rasters = build_3d_raster_sequential(3, 2, 2); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + + // Should have 3 bands, each 2D [y=2, x=2] + assert_eq!(raster.num_bands(), 3); + + for b in 0..3 { + let band = raster.band(b).unwrap(); + assert_eq!(band.ndim(), 2); + assert_eq!(band.dim_names(), vec!["y", "x"]); + assert_eq!(band.shape(), &[2, 2]); + + let data = band.contiguous_data().unwrap(); + let offset = b * 4; + let expected: Vec = (offset..offset + 4).map(|i| i as u8).collect(); + assert_eq!(data.as_ref(), &expected[..]); + } + + // Verify band names + assert_eq!(raster.band_name(0), Some("temp_time_0")); + assert_eq!(raster.band_name(1), Some("temp_time_1")); + assert_eq!(raster.band_name(2), Some("temp_time_2")); + } + + #[test] + fn dimtoband_spatial_dim_error() { + let udf: ScalarUDF = rs_dimtoband_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_3d_raster_sequential(3, 2, 2); + let result = tester.invoke_array_scalar(Arc::new(rasters), "x"); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("cannot expand spatial dimension"), + "Unexpected error: {err_msg}" + ); + } + + #[test] + fn dimtoband_null_raster() { + let udf: ScalarUDF = rs_dimtoband_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + assert!(raster_array.is_null(0)); + } + + // ----------------------------------------------------------------------- + // RS_BandToDim + // ----------------------------------------------------------------------- + + #[test] + fn bandtodim_bands_to_3d() { + let udf: ScalarUDF = rs_bandtodim_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + // 3 bands, each [y=2, x=2], sequential data + let rasters = build_multi_band_2d(3, 2, 2); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "newdim") + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + + // Should be 1 band with shape [newdim=3, y=2, x=2] + assert_eq!(raster.num_bands(), 1); + let band = raster.band(0).unwrap(); + assert_eq!(band.ndim(), 3); + assert_eq!(band.dim_names(), vec!["newdim", "y", "x"]); + assert_eq!(band.shape(), &[3, 2, 2]); + + // Data should be concatenation of all 3 bands + let data = band.contiguous_data().unwrap(); + let expected: Vec = (0..12).map(|i| i as u8).collect(); + assert_eq!(data.as_ref(), &expected[..]); + } + + #[test] + fn bandtodim_mismatched_shapes_error() { + // Build a raster with bands of different shapes + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + + builder + .start_band(None, &["y", "x"], &[2, 2], BandDataType::UInt8, None, None) + .unwrap(); + builder.band_data_writer().append_value([0u8; 4]); + builder.finish_band().unwrap(); + + builder + .start_band(None, &["y", "x"], &[3, 3], BandDataType::UInt8, None, None) + .unwrap(); + builder.band_data_writer().append_value([0u8; 9]); + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + let rasters = builder.finish().unwrap(); + + let udf: ScalarUDF = rs_bandtodim_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let result = tester.invoke_array_scalar(Arc::new(rasters), "time"); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("different shape"), + "Unexpected error: {err_msg}" + ); + } + + #[test] + fn bandtodim_null_raster() { + let udf: ScalarUDF = rs_bandtodim_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + assert!(raster_array.is_null(0)); + } + + // ----------------------------------------------------------------------- + // Round-trip: DimToBand -> BandToDim + // ----------------------------------------------------------------------- + + #[test] + fn round_trip_dimtoband_bandtodim() { + // Start with 1 band [time=3, y=2, x=2] + let rasters = build_3d_raster_sequential(3, 2, 2); + let raster_array_in = RasterStructArray::new(&rasters); + let raster_in = raster_array_in.get(0).unwrap(); + let original_data = raster_in + .band(0) + .unwrap() + .contiguous_data() + .unwrap() + .to_vec(); + + // DimToBand: 1 band [time=3, y=2, x=2] -> 3 bands [y=2, x=2] + let kernel = RsDimToBand {}; + let arg_types = vec![RASTER, SedonaType::Arrow(DataType::Utf8)]; + let args = vec![ + ColumnarValue::Array(Arc::new(rasters)), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("time".to_string()))), + ]; + let mid_result = kernel.invoke_batch(&arg_types, &args).unwrap(); + let mid_array = match mid_result { + ColumnarValue::Array(arr) => arr, + _ => panic!("Expected array"), + }; + + // BandToDim: 3 bands [y=2, x=2] -> 1 band [time=3, y=2, x=2] + let kernel2 = RsBandToDim {}; + let args2 = vec![ + ColumnarValue::Array(mid_array), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("time".to_string()))), + ]; + let final_result = kernel2.invoke_batch(&arg_types, &args2).unwrap(); + let final_array = match final_result { + ColumnarValue::Array(arr) => arr, + _ => panic!("Expected array"), + }; + let final_struct = final_array.as_any().downcast_ref::().unwrap(); + let raster_array_out = RasterStructArray::new(final_struct); + let raster_out = raster_array_out.get(0).unwrap(); + + // Verify shape and data match + let band_out = raster_out.band(0).unwrap(); + assert_eq!(band_out.dim_names(), vec!["time", "y", "x"]); + assert_eq!(band_out.shape(), &[3, 2, 2]); + let round_trip_data = band_out.contiguous_data().unwrap(); + assert_eq!(round_trip_data.as_ref(), &original_data[..]); + } +} diff --git a/rust/sedona-raster-functions/src/rs_dimensions.rs b/rust/sedona-raster-functions/src/rs_dimensions.rs new file mode 100644 index 000000000..ee4fc8cad --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_dimensions.rs @@ -0,0 +1,946 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_array::builder::{Int32Builder, Int64Builder, ListBuilder, StringBuilder}; +use arrow_schema::DataType; +use datafusion_common::cast::{as_int32_array, as_string_array}; +use datafusion_common::error::Result; +use datafusion_common::exec_err; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::traits::RasterRef; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::matchers::ArgMatcher; + +use crate::executor::RasterExecutor; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Check that all bands agree on a value. Returns the value from band 0, +/// or an error if any band disagrees. +fn check_band_agreement( + raster: &dyn RasterRef, + func_name: &str, + property_name: &str, + extractor: impl Fn(&dyn sedona_raster::traits::BandRef) -> T, +) -> Result { + let band0 = raster.band(0).ok_or_else(|| { + datafusion_common::DataFusionError::Execution(format!("{func_name}: raster has no bands")) + })?; + let value = extractor(band0.as_ref()); + for i in 1..raster.num_bands() { + if let Some(band) = raster.band(i) { + let other = extractor(band.as_ref()); + if other != value { + return exec_err!( + "{func_name}: bands have different {property_name} — specify a band index" + ); + } + } + } + Ok(value) +} + +fn list_utf8_type() -> DataType { + DataType::List(Arc::new(arrow_schema::Field::new( + "item", + DataType::Utf8, + true, + ))) +} + +fn list_int64_type() -> DataType { + DataType::List(Arc::new(arrow_schema::Field::new( + "item", + DataType::Int64, + true, + ))) +} + +// =========================================================================== +// RS_NumDimensions +// =========================================================================== + +/// RS_NumDimensions(raster [, band]) -> Int32 +/// +/// Returns the number of dimensions in the raster (or a specific band). +pub fn rs_numdimensions_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_numdimensions", + vec![ + Arc::new(RsNumDimensions {}), + Arc::new(RsNumDimensionsWithBand {}), + ], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsNumDimensions {} + +impl SedonaScalarKernel for RsNumDimensions { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(DataType::Int32), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut builder = Int32Builder::with_capacity(executor.num_iterations()); + + executor.execute_raster_void(|_i, raster_opt| match raster_opt { + None => { + builder.append_null(); + Ok(()) + } + Some(raster) => { + let ndim = + check_band_agreement(raster, "RS_NumDimensions", "dimensionality", |b| { + b.ndim() + })?; + builder.append_value(ndim as i32); + Ok(()) + } + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[derive(Debug)] +struct RsNumDimensionsWithBand {} + +impl SedonaScalarKernel for RsNumDimensionsWithBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_integer()], + SedonaType::Arrow(DataType::Int32), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let band_index_array = args[1].clone().cast_to(&DataType::Int32, None)?; + let band_index_array = band_index_array.into_array(executor.num_iterations())?; + let band_index_array = as_int32_array(&band_index_array)?; + + let mut builder = Int32Builder::with_capacity(executor.num_iterations()); + let mut band_index_iter = band_index_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let band_index = band_index_iter.next().unwrap().unwrap_or(1); + match raster_opt { + None => { + builder.append_null(); + Ok(()) + } + Some(raster) => { + if band_index < 1 || band_index > raster.num_bands() as i32 { + builder.append_null(); + return Ok(()); + } + let band = raster.band((band_index - 1) as usize).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "Band index {} out of range", + band_index + )) + })?; + builder.append_value(band.ndim() as i32); + Ok(()) + } + } + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +// =========================================================================== +// RS_DimNames +// =========================================================================== + +/// RS_DimNames(raster [, band]) -> List +/// +/// Returns the dimension names of the raster (or a specific band). +pub fn rs_dimnames_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_dimnames", + vec![Arc::new(RsDimNames {}), Arc::new(RsDimNamesWithBand {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsDimNames {} + +impl SedonaScalarKernel for RsDimNames { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(list_utf8_type()), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut list_builder = ListBuilder::new(StringBuilder::new()); + + executor.execute_raster_void(|_i, raster_opt| match raster_opt { + None => { + list_builder.append_null(); + Ok(()) + } + Some(raster) => { + let names = check_band_agreement(raster, "RS_DimNames", "dimension names", |b| { + b.dim_names() + .iter() + .map(|s| s.to_string()) + .collect::>() + })?; + for name in &names { + list_builder.values().append_value(name); + } + list_builder.append(true); + Ok(()) + } + })?; + + executor.finish(Arc::new(list_builder.finish())) + } +} + +#[derive(Debug)] +struct RsDimNamesWithBand {} + +impl SedonaScalarKernel for RsDimNamesWithBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_integer()], + SedonaType::Arrow(list_utf8_type()), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let band_index_array = args[1].clone().cast_to(&DataType::Int32, None)?; + let band_index_array = band_index_array.into_array(executor.num_iterations())?; + let band_index_array = as_int32_array(&band_index_array)?; + + let mut list_builder = ListBuilder::new(StringBuilder::new()); + let mut band_index_iter = band_index_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let band_index = band_index_iter.next().unwrap().unwrap_or(1); + match raster_opt { + None => { + list_builder.append_null(); + Ok(()) + } + Some(raster) => { + if band_index < 1 || band_index > raster.num_bands() as i32 { + list_builder.append_null(); + return Ok(()); + } + let band = raster.band((band_index - 1) as usize).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "Band index {} out of range", + band_index + )) + })?; + for name in band.dim_names() { + list_builder.values().append_value(name); + } + list_builder.append(true); + Ok(()) + } + } + })?; + + executor.finish(Arc::new(list_builder.finish())) + } +} + +// =========================================================================== +// RS_DimSize +// =========================================================================== + +/// RS_DimSize(raster, dim_name [, band]) -> Int64 (nullable) +/// +/// Returns the size of the named dimension, or null if the dimension +/// does not exist. +pub fn rs_dimsize_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_dimsize", + vec![Arc::new(RsDimSize {}), Arc::new(RsDimSizeWithBand {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsDimSize {} + +impl SedonaScalarKernel for RsDimSize { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_string()], + SedonaType::Arrow(DataType::Int64), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let mut builder = Int64Builder::with_capacity(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + match (raster_opt, dim_name) { + (None, _) | (_, None) => { + builder.append_null(); + Ok(()) + } + (Some(raster), Some(name)) => { + let size = + check_band_agreement(raster, "RS_DimSize", "dimension sizes", |b| { + b.dim_size(name) + })?; + match size { + Some(s) => builder.append_value(s as i64), + None => builder.append_null(), + } + Ok(()) + } + } + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[derive(Debug)] +struct RsDimSizeWithBand {} + +impl SedonaScalarKernel for RsDimSizeWithBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_string(), + ArgMatcher::is_integer(), + ], + SedonaType::Arrow(DataType::Int64), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + let band_index_array = args[2].clone().cast_to(&DataType::Int32, None)?; + let band_index_array = band_index_array.into_array(executor.num_iterations())?; + let band_index_array = as_int32_array(&band_index_array)?; + + let mut builder = Int64Builder::with_capacity(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + let mut band_index_iter = band_index_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + let band_index = band_index_iter.next().unwrap().unwrap_or(1); + match (raster_opt, dim_name) { + (None, _) | (_, None) => { + builder.append_null(); + Ok(()) + } + (Some(raster), Some(name)) => { + if band_index < 1 || band_index > raster.num_bands() as i32 { + builder.append_null(); + return Ok(()); + } + let band = raster.band((band_index - 1) as usize).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "Band index {} out of range", + band_index + )) + })?; + match band.dim_size(name) { + Some(s) => builder.append_value(s as i64), + None => builder.append_null(), + } + Ok(()) + } + } + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +// =========================================================================== +// RS_Shape +// =========================================================================== + +/// RS_Shape(raster [, band]) -> List +/// +/// Returns the shape (size of each dimension) of the raster. +pub fn rs_shape_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_shape", + vec![Arc::new(RsShape {}), Arc::new(RsShapeWithBand {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsShape {} + +impl SedonaScalarKernel for RsShape { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(list_int64_type()), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut list_builder = ListBuilder::new(Int64Builder::new()); + + executor.execute_raster_void(|_i, raster_opt| match raster_opt { + None => { + list_builder.append_null(); + Ok(()) + } + Some(raster) => { + let shape = + check_band_agreement(raster, "RS_Shape", "shape", |b| b.shape().to_vec())?; + for &s in &shape { + list_builder.values().append_value(s as i64); + } + list_builder.append(true); + Ok(()) + } + })?; + + executor.finish(Arc::new(list_builder.finish())) + } +} + +#[derive(Debug)] +struct RsShapeWithBand {} + +impl SedonaScalarKernel for RsShapeWithBand { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_integer()], + SedonaType::Arrow(list_int64_type()), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let band_index_array = args[1].clone().cast_to(&DataType::Int32, None)?; + let band_index_array = band_index_array.into_array(executor.num_iterations())?; + let band_index_array = as_int32_array(&band_index_array)?; + + let mut list_builder = ListBuilder::new(Int64Builder::new()); + let mut band_index_iter = band_index_array.iter(); + executor.execute_raster_void(|_, raster_opt| { + let band_index = band_index_iter.next().unwrap().unwrap_or(1); + match raster_opt { + None => { + list_builder.append_null(); + Ok(()) + } + Some(raster) => { + if band_index < 1 || band_index > raster.num_bands() as i32 { + list_builder.append_null(); + return Ok(()); + } + let band = raster.band((band_index - 1) as usize).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "Band index {} out of range", + band_index + )) + })?; + for &s in band.shape() { + list_builder.values().append_value(s as i64); + } + list_builder.append(true); + Ok(()) + } + } + })?; + + executor.finish(Arc::new(list_builder.finish())) + } +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Array, Int32Array, ListArray, StringArray, StructArray}; + use datafusion_expr::ScalarUDF; + use sedona_raster::builder::RasterBuilder; + use sedona_schema::datatypes::RASTER; + use sedona_schema::raster::BandDataType; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; + + /// Build a single-row 2D raster StructArray. + fn build_2d_raster(width: u64, height: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(width, height, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder.start_band_2d(BandDataType::Float32, None).unwrap(); + let data = vec![0u8; (width * height * 4) as usize]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Build a single-row 3D raster StructArray with shape [time, height, width]. + fn build_3d_raster(time: u64, height: u64, width: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + builder + .start_band( + None, + &["time", "y", "x"], + &[time, height, width], + BandDataType::Float32, + None, + None, + ) + .unwrap(); + let data = vec![0u8; (time * height * width * 4) as usize]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + /// Build a raster with two bands that have different dimensionality. + fn build_mixed_dim_raster() -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + + // Band 0: 2D [4, 5] + builder + .start_band( + None, + &["y", "x"], + &[4, 5], + BandDataType::Float32, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 4 * 5 * 4]); + builder.finish_band().unwrap(); + + // Band 1: 3D [3, 4, 5] + builder + .start_band( + None, + &["time", "y", "x"], + &[3, 4, 5], + BandDataType::Float32, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 3 * 4 * 5 * 4]); + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + // ----------------------------------------------------------------------- + // RS_NumDimensions + // ----------------------------------------------------------------------- + + #[test] + fn numdimensions_2d() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + tester.assert_return_type(DataType::Int32); + + let rasters = build_2d_raster(4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int32Array"); + assert_eq!(arr.value(0), 2); + } + + #[test] + fn numdimensions_3d() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int32Array"); + assert_eq!(arr.value(0), 3); + } + + #[test] + fn numdimensions_with_band() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Int32)]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester + .invoke_array_scalar(Arc::new(rasters), 1_i32) + .unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int32Array"); + assert_eq!(arr.value(0), 3); + } + + #[test] + fn numdimensions_null_raster() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn numdimensions_mixed_bands_error() { + let udf: ScalarUDF = rs_numdimensions_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_mixed_dim_raster(); + let result = tester.invoke_array(Arc::new(rasters)); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("bands have different dimensionality"), + "Unexpected error: {err_msg}" + ); + } + + // ----------------------------------------------------------------------- + // RS_DimNames + // ----------------------------------------------------------------------- + + #[test] + fn dimnames_2d() { + let udf: ScalarUDF = rs_dimnames_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + tester.assert_return_type(list_utf8_type()); + + let rasters = build_2d_raster(4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let list_arr = result + .as_any() + .downcast_ref::() + .expect("Expected ListArray"); + let values = list_arr.value(0); + let str_arr = values + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + assert_eq!(str_arr.len(), 2); + assert_eq!(str_arr.value(0), "y"); + assert_eq!(str_arr.value(1), "x"); + } + + #[test] + fn dimnames_3d() { + let udf: ScalarUDF = rs_dimnames_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let list_arr = result + .as_any() + .downcast_ref::() + .expect("Expected ListArray"); + let values = list_arr.value(0); + let str_arr = values + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + assert_eq!(str_arr.len(), 3); + assert_eq!(str_arr.value(0), "time"); + assert_eq!(str_arr.value(1), "y"); + assert_eq!(str_arr.value(2), "x"); + } + + #[test] + fn dimnames_null_raster() { + let udf: ScalarUDF = rs_dimnames_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn dimnames_mixed_bands_error() { + let udf: ScalarUDF = rs_dimnames_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_mixed_dim_raster(); + let result = tester.invoke_array(Arc::new(rasters)); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("bands have different dimension names"), + "Unexpected error: {err_msg}" + ); + } + + // ----------------------------------------------------------------------- + // RS_DimSize + // ----------------------------------------------------------------------- + + #[test] + fn dimsize_2d_x() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_2d_raster(5, 4); + let result = tester.invoke_array_scalar(Arc::new(rasters), "x").unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(arr.value(0), 5); + } + + #[test] + fn dimsize_3d_time() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "time") + .unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(arr.value(0), 3); + } + + #[test] + fn dimsize_nonexistent() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_2d_raster(4, 5); + let result = tester + .invoke_array_scalar(Arc::new(rasters), "nonexistent") + .unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn dimsize_with_band() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int32), + ], + ); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester + .invoke_array_scalar_scalar(Arc::new(rasters), "time", 1_i32) + .unwrap(); + let arr = result + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(arr.value(0), 3); + } + + #[test] + fn dimsize_null_raster() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester.invoke_array_scalar(Arc::new(rasters), "x").unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn dimsize_mixed_bands_error() { + let udf: ScalarUDF = rs_dimsize_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Utf8)]); + + let rasters = build_mixed_dim_raster(); + let result = tester.invoke_array_scalar(Arc::new(rasters), "time"); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("bands have different dimension sizes"), + "Unexpected error: {err_msg}" + ); + } + + // ----------------------------------------------------------------------- + // RS_Shape + // ----------------------------------------------------------------------- + + #[test] + fn shape_2d() { + let udf: ScalarUDF = rs_shape_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + tester.assert_return_type(list_int64_type()); + + let rasters = build_2d_raster(5, 4); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let list_arr = result + .as_any() + .downcast_ref::() + .expect("Expected ListArray"); + let values = list_arr.value(0); + let int_arr = values + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(int_arr.len(), 2); + assert_eq!(int_arr.value(0), 4); // height + assert_eq!(int_arr.value(1), 5); // width + } + + #[test] + fn shape_3d() { + let udf: ScalarUDF = rs_shape_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_3d_raster(3, 4, 5); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + let list_arr = result + .as_any() + .downcast_ref::() + .expect("Expected ListArray"); + let values = list_arr.value(0); + let int_arr = values + .as_any() + .downcast_ref::() + .expect("Expected Int64Array"); + assert_eq!(int_arr.len(), 3); + assert_eq!(int_arr.value(0), 3); // time + assert_eq!(int_arr.value(1), 4); // height + assert_eq!(int_arr.value(2), 5); // width + } + + #[test] + fn shape_null_raster() { + let udf: ScalarUDF = rs_shape_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + assert!(result.is_null(0)); + } + + #[test] + fn shape_mixed_bands_error() { + let udf: ScalarUDF = rs_shape_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_mixed_dim_raster(); + let result = tester.invoke_array(Arc::new(rasters)); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("bands have different shape"), + "Unexpected error: {err_msg}" + ); + } +} diff --git a/rust/sedona-raster-functions/src/rs_envelope.rs b/rust/sedona-raster-functions/src/rs_envelope.rs index 2177a18ae..78f958b7b 100644 --- a/rust/sedona-raster-functions/src/rs_envelope.rs +++ b/rust/sedona-raster-functions/src/rs_envelope.rs @@ -105,8 +105,18 @@ impl SedonaScalarKernel for RsEnvelope { /// derives the min/max X and Y to produce an axis-aligned bounding box. /// For skewed/rotated rasters, this differs from the convex hull. fn write_envelope_wkb(raster: &dyn RasterRef, out: &mut impl std::io::Write) -> Result<()> { - let width = raster.metadata().width() as i64; - let height = raster.metadata().height() as i64; + let Some(width) = raster.width() else { + return Err(DataFusionError::Execution( + "Raster has no bands; cannot determine width".into(), + )); + }; + let Some(height) = raster.height() else { + return Err(DataFusionError::Execution( + "Raster has no bands; cannot determine height".into(), + )); + }; + let width = width as i64; + let height = height as i64; // Compute the four corners in world coordinates let (ulx, uly) = to_world_coordinate(raster, 0, 0); diff --git a/rust/sedona-raster-functions/src/rs_example.rs b/rust/sedona-raster-functions/src/rs_example.rs index 48e2fd5ce..f83da24d1 100644 --- a/rust/sedona-raster-functions/src/rs_example.rs +++ b/rust/sedona-raster-functions/src/rs_example.rs @@ -21,13 +21,8 @@ use datafusion_common::error::Result; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_raster::builder::RasterBuilder; -use sedona_raster::traits::BandMetadata; -use sedona_raster::traits::RasterMetadata; use sedona_schema::{ - crs::lnglat, - datatypes::SedonaType, - matchers::ArgMatcher, - raster::{BandDataType, StorageType}, + crs::lnglat, datatypes::SedonaType, matchers::ArgMatcher, raster::BandDataType, }; /// RS_Example() scalar UDF implementation @@ -60,30 +55,15 @@ impl SedonaScalarKernel for RsExample { let executor = RasterExecutor::new(arg_types, args); let mut builder = RasterBuilder::new(1); - let raster_metadata = RasterMetadata { - width: 64, - height: 32, - upperleft_x: 43.08, - upperleft_y: 79.07, - scale_x: 2.0, - scale_y: 2.0, - skew_x: 1.0, - skew_y: 1.0, - }; + let width: u64 = 64; + let height: u64 = 32; let crs = lnglat().unwrap().to_crs_string(); - builder.start_raster(&raster_metadata, Some(&crs))?; + builder.start_raster_2d(width, height, 43.08, 79.07, 2.0, 2.0, 1.0, 1.0, Some(&crs))?; let nodata_value = 127u8; for band_id in 1..=3 { - builder.start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![nodata_value]), - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - })?; - - let mut band_data = - vec![band_id as u8; (raster_metadata.width * raster_metadata.height) as usize]; + builder.start_band_2d(BandDataType::UInt8, Some(&[nodata_value]))?; + + let mut band_data = vec![band_id as u8; (width * height) as usize]; band_data[0] = nodata_value; // set the top corner to nodata builder.band_data_writer().append_value(&band_data); @@ -121,16 +101,13 @@ mod tests { assert_eq!(raster_array.len(), 1); let raster = raster_array.get(0).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), 64); - assert_eq!(metadata.height(), 32); - - let bands = raster.bands(); - let band = bands.band(1).unwrap(); - let band_metadata = band.metadata(); - assert_eq!(band_metadata.data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band_metadata.nodata_value(), Some(&[127u8][..])); - assert_eq!(band_metadata.storage_type().unwrap(), StorageType::InDb); + assert_eq!(raster.width().unwrap(), 64); + assert_eq!(raster.height().unwrap(), 32); + + let band = raster.band(0).unwrap(); + assert_eq!(band.data_type(), BandDataType::UInt8); + assert_eq!(band.nodata(), Some(&[127u8][..])); + assert!(band.outdb_uri().is_none()); } else { panic!("Expected scalar struct result"); } diff --git a/rust/sedona-raster-functions/src/rs_georeference.rs b/rust/sedona-raster-functions/src/rs_georeference.rs index bf9b7470b..6afb9f120 100644 --- a/rust/sedona-raster-functions/src/rs_georeference.rs +++ b/rust/sedona-raster-functions/src/rs_georeference.rs @@ -158,13 +158,13 @@ fn format_georeference( match raster_opt { None => builder.append_null(), Some(raster) => { - let metadata = raster.metadata(); - let scale_x = metadata.scale_x(); - let scale_y = metadata.scale_y(); - let skew_x = metadata.skew_x(); - let skew_y = metadata.skew_y(); - let upper_left_x = metadata.upper_left_x(); - let upper_left_y = metadata.upper_left_y(); + let t = raster.transform(); + let scale_x = t[1]; + let scale_y = t[5]; + let skew_x = t[2]; + let skew_y = t[4]; + let upper_left_x = t[0]; + let upper_left_y = t[3]; let georeference = match format { GeoReferenceFormat::Gdal => { diff --git a/rust/sedona-raster-functions/src/rs_geotransform.rs b/rust/sedona-raster-functions/src/rs_geotransform.rs index 9c5a9ee46..0206b7bcd 100644 --- a/rust/sedona-raster-functions/src/rs_geotransform.rs +++ b/rust/sedona-raster-functions/src/rs_geotransform.rs @@ -162,22 +162,18 @@ impl SedonaScalarKernel for RsGeoTransform { match raster_opt { None => builder.append_null(), Some(raster) => { - let metadata = raster.metadata(); + let t = raster.transform(); match self.param { GeoTransformParam::Rotation => { let rotation = rotation(raster); builder.append_value(rotation); } - GeoTransformParam::ScaleX => builder.append_value(metadata.scale_x()), - GeoTransformParam::ScaleY => builder.append_value(metadata.scale_y()), - GeoTransformParam::SkewX => builder.append_value(metadata.skew_x()), - GeoTransformParam::SkewY => builder.append_value(metadata.skew_y()), - GeoTransformParam::UpperLeftX => { - builder.append_value(metadata.upper_left_x()) - } - GeoTransformParam::UpperLeftY => { - builder.append_value(metadata.upper_left_y()) - } + GeoTransformParam::ScaleX => builder.append_value(t[1]), + GeoTransformParam::ScaleY => builder.append_value(t[5]), + GeoTransformParam::SkewX => builder.append_value(t[2]), + GeoTransformParam::SkewY => builder.append_value(t[4]), + GeoTransformParam::UpperLeftX => builder.append_value(t[0]), + GeoTransformParam::UpperLeftY => builder.append_value(t[3]), } } } diff --git a/rust/sedona-raster-functions/src/rs_numbands.rs b/rust/sedona-raster-functions/src/rs_numbands.rs index f25c4df47..d52002c11 100644 --- a/rust/sedona-raster-functions/src/rs_numbands.rs +++ b/rust/sedona-raster-functions/src/rs_numbands.rs @@ -61,7 +61,7 @@ impl SedonaScalarKernel for RsNumBands { match raster_opt { None => builder.append_null(), Some(raster) => { - let num_bands = raster.bands().len() as u32; + let num_bands = raster.num_bands() as u32; builder.append_value(num_bands); } } diff --git a/rust/sedona-raster-functions/src/rs_pixel_functions.rs b/rust/sedona-raster-functions/src/rs_pixel_functions.rs index c6bb048bf..3e880b2f2 100644 --- a/rust/sedona-raster-functions/src/rs_pixel_functions.rs +++ b/rust/sedona-raster-functions/src/rs_pixel_functions.rs @@ -191,7 +191,7 @@ impl SedonaScalarKernel for RsPixelAsCentroid { let grid_x = (col_x - 1) as f64 + 0.5; let grid_y = (row_y - 1) as f64 + 0.5; - let affine = AffineMatrix::from_metadata(raster.metadata()); + let affine = AffineMatrix::from_transform(raster.transform()); let (wx, wy) = affine.transform(grid_x, grid_y); write_wkb_point(&mut builder, (wx, wy)) diff --git a/rust/sedona-raster-functions/src/rs_setsrid.rs b/rust/sedona-raster-functions/src/rs_setsrid.rs index 2ff6134e4..165e8a60e 100644 --- a/rust/sedona-raster-functions/src/rs_setsrid.rs +++ b/rust/sedona-raster-functions/src/rs_setsrid.rs @@ -516,29 +516,21 @@ mod tests { let modified = result_array.get(i).unwrap(); // Metadata preserved - assert_eq!(original.metadata().width(), modified.metadata().width()); - assert_eq!(original.metadata().height(), modified.metadata().height()); - assert_eq!( - original.metadata().upper_left_x(), - modified.metadata().upper_left_x() - ); - assert_eq!( - original.metadata().upper_left_y(), - modified.metadata().upper_left_y() - ); + assert_eq!(original.width().unwrap(), modified.width().unwrap()); + assert_eq!(original.height().unwrap(), modified.height().unwrap()); + assert_eq!(original.transform()[0], modified.transform()[0]); + assert_eq!(original.transform()[3], modified.transform()[3]); // Band data preserved - let orig_bands = original.bands(); - let mod_bands = modified.bands(); - assert_eq!(orig_bands.len(), mod_bands.len()); - for band_idx in 0..orig_bands.len() { - let orig_band = orig_bands.band(band_idx + 1).unwrap(); - let mod_band = mod_bands.band(band_idx + 1).unwrap(); - assert_eq!(orig_band.data(), mod_band.data()); + assert_eq!(original.num_bands(), modified.num_bands()); + for band_idx in 0..original.num_bands() { + let orig_band = original.band(band_idx).unwrap(); + let mod_band = modified.band(band_idx).unwrap(); assert_eq!( - orig_band.metadata().data_type().unwrap(), - mod_band.metadata().data_type().unwrap() + orig_band.contiguous_data().unwrap().as_ref(), + mod_band.contiguous_data().unwrap().as_ref() ); + assert_eq!(orig_band.data_type(), mod_band.data_type()); } // CRS changed diff --git a/rust/sedona-raster-functions/src/rs_size.rs b/rust/sedona-raster-functions/src/rs_size.rs index 6616bc56e..35135befe 100644 --- a/rust/sedona-raster-functions/src/rs_size.rs +++ b/rust/sedona-raster-functions/src/rs_size.rs @@ -20,6 +20,7 @@ use crate::executor::RasterExecutor; use arrow_array::builder::UInt64Builder; use arrow_schema::DataType; use datafusion_common::error::Result; +use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_raster::traits::RasterRef; @@ -85,11 +86,19 @@ impl SedonaScalarKernel for RsSize { None => builder.append_null(), Some(raster) => match self.size_type { SizeType::Width => { - let width = raster.metadata().width(); + let Some(width) = raster.width() else { + return Err(DataFusionError::Execution( + "Raster has no bands; cannot determine width".into(), + )); + }; builder.append_value(width); } SizeType::Height => { - let height = raster.metadata().height(); + let Some(height) = raster.height() else { + return Err(DataFusionError::Execution( + "Raster has no bands; cannot determine height".into(), + )); + }; builder.append_value(height); } }, diff --git a/rust/sedona-raster-functions/src/rs_slice.rs b/rust/sedona-raster-functions/src/rs_slice.rs new file mode 100644 index 000000000..c5623df1c --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_slice.rs @@ -0,0 +1,545 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_schema::DataType; +use datafusion_common::cast::{as_int64_array, as_string_array}; +use datafusion_common::error::Result; +use datafusion_common::exec_err; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::builder::RasterBuilder; +use sedona_raster::traits::{BandRef, RasterRef}; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::matchers::ArgMatcher; + +use crate::executor::RasterExecutor; + +// =========================================================================== +// RS_Slice +// =========================================================================== + +/// RS_Slice(raster, dim_name, index) -> Raster +/// +/// Slices each band along the named dimension at the given index, removing +/// that dimension from the output. Spatial dimensions cannot be sliced. +pub fn rs_slice_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_slice", + vec![Arc::new(RsSlice {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsSlice {} + +impl SedonaScalarKernel for RsSlice { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_string(), + ArgMatcher::is_integer(), + ], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let index_array = args[2].clone().cast_to(&DataType::Int64, None)?; + let index_array = index_array.into_array(executor.num_iterations())?; + let index_array = as_int64_array(&index_array)?; + + let mut new_builder = RasterBuilder::new(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + let mut index_iter = index_array.iter(); + + executor.execute_raster_void(|_i, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + let index = index_iter.next().unwrap(); + + match (raster_opt, dim_name, index) { + (None, _, _) | (_, None, _) | (_, _, None) => { + new_builder.append_null()?; + Ok(()) + } + (Some(raster), Some(name), Some(idx)) => { + let idx = idx as u64; + validate_not_spatial(raster, name, "RS_Slice")?; + + let t: [f64; 6] = raster.transform().try_into().unwrap(); + new_builder.start_raster(&t, raster.x_dim(), raster.y_dim(), raster.crs())?; + + for band_idx in 0..raster.num_bands() { + let band = raster.band(band_idx).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "RS_Slice: band {band_idx} not found" + )) + })?; + + let dim_idx = band.dim_index(name).ok_or_else(|| { + datafusion_common::DataFusionError::Execution(format!( + "RS_Slice: dimension '{name}' not found in band {band_idx}" + )) + })?; + + let shape = band.shape(); + if idx >= shape[dim_idx] { + return exec_err!( + "RS_Slice: index {idx} out of range for dimension '{name}' with size {}", + shape[dim_idx] + ); + } + + let new_dim_names: Vec<&str> = band + .dim_names() + .into_iter() + .enumerate() + .filter(|&(i, _)| i != dim_idx) + .map(|(_, n)| n) + .collect(); + let new_shape: Vec = shape + .iter() + .enumerate() + .filter(|&(i, _)| i != dim_idx) + .map(|(_, &s)| s) + .collect(); + + let sliced_data = + extract_slice(band.as_ref(), dim_idx, idx, 1)?; + + let band_name = raster.band_name(band_idx); + let new_dim_name_refs: Vec<&str> = + new_dim_names.to_vec(); + new_builder.start_band( + band_name, + &new_dim_name_refs, + &new_shape, + band.data_type(), + band.nodata(), + None, + )?; + new_builder.band_data_writer().append_value(&sliced_data); + new_builder.finish_band()?; + } + + new_builder.finish_raster()?; + Ok(()) + } + } + })?; + + executor.finish(Arc::new(new_builder.finish()?)) + } +} + +// =========================================================================== +// RS_SliceRange +// =========================================================================== + +/// RS_SliceRange(raster, dim_name, start, end) -> Raster +/// +/// Narrows each band along the named dimension to the half-open range +/// `[start, end)`, keeping the dimension in the output with reduced size. +pub fn rs_slicerange_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_slicerange", + vec![Arc::new(RsSliceRange {})], + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct RsSliceRange {} + +impl SedonaScalarKernel for RsSliceRange { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_string(), + ArgMatcher::is_integer(), + ArgMatcher::is_integer(), + ], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let dim_name_array = args[1].clone().cast_to(&DataType::Utf8, None)?; + let dim_name_array = dim_name_array.into_array(executor.num_iterations())?; + let dim_name_array = as_string_array(&dim_name_array)?; + + let start_array = args[2].clone().cast_to(&DataType::Int64, None)?; + let start_array = start_array.into_array(executor.num_iterations())?; + let start_array = as_int64_array(&start_array)?; + + let end_array = args[3].clone().cast_to(&DataType::Int64, None)?; + let end_array = end_array.into_array(executor.num_iterations())?; + let end_array = as_int64_array(&end_array)?; + + let mut new_builder = RasterBuilder::new(executor.num_iterations()); + let mut dim_name_iter = dim_name_array.iter(); + let mut start_iter = start_array.iter(); + let mut end_iter = end_array.iter(); + + executor.execute_raster_void(|_i, raster_opt| { + let dim_name = dim_name_iter.next().unwrap(); + let start = start_iter.next().unwrap(); + let end = end_iter.next().unwrap(); + + match (raster_opt, dim_name, start, end) { + (None, _, _, _) | (_, None, _, _) | (_, _, None, _) | (_, _, _, None) => { + new_builder.append_null()?; + Ok(()) + } + (Some(raster), Some(name), Some(start_val), Some(end_val)) => { + let start_val = start_val as u64; + let end_val = end_val as u64; + validate_not_spatial(raster, name, "RS_SliceRange")?; + + if start_val >= end_val { + return exec_err!( + "RS_SliceRange: start ({start_val}) must be less than end ({end_val})" + ); + } + + let t: [f64; 6] = raster.transform().try_into().unwrap(); + new_builder.start_raster(&t, raster.x_dim(), raster.y_dim(), raster.crs())?; + + for band_idx in 0..raster.num_bands() { + let band = raster.band(band_idx).ok_or_else(|| { + datafusion_common::DataFusionError::Internal(format!( + "RS_SliceRange: band {band_idx} not found" + )) + })?; + + let dim_idx = band.dim_index(name).ok_or_else(|| { + datafusion_common::DataFusionError::Execution(format!( + "RS_SliceRange: dimension '{name}' not found in band {band_idx}" + )) + })?; + + let shape = band.shape(); + if end_val > shape[dim_idx] { + return exec_err!( + "RS_SliceRange: end ({end_val}) out of range for dimension '{name}' with size {}", + shape[dim_idx] + ); + } + + let range_len = end_val - start_val; + let dim_names = band.dim_names(); + let dim_name_refs: Vec<&str> = dim_names.to_vec(); + let mut new_shape: Vec = shape.to_vec(); + new_shape[dim_idx] = range_len; + + let sliced_data = + extract_slice(band.as_ref(), dim_idx, start_val, range_len)?; + + let band_name = raster.band_name(band_idx); + new_builder.start_band( + band_name, + &dim_name_refs, + &new_shape, + band.data_type(), + band.nodata(), + None, + )?; + new_builder.band_data_writer().append_value(&sliced_data); + new_builder.finish_band()?; + } + + new_builder.finish_raster()?; + Ok(()) + } + } + })?; + + executor.finish(Arc::new(new_builder.finish()?)) + } +} + +// =========================================================================== +// Shared helpers +// =========================================================================== + +/// Validate that the dimension name is not a spatial dimension. +fn validate_not_spatial(raster: &dyn RasterRef, dim_name: &str, func_name: &str) -> Result<()> { + if dim_name == raster.x_dim() || dim_name == raster.y_dim() { + return exec_err!("{func_name}: cannot slice spatial dimension '{dim_name}'"); + } + Ok(()) +} + +/// Extract a slice of data from a band along a given dimension. +/// +/// For `count == 1`, this extracts a single index (used by RS_Slice). +/// For `count > 1`, this extracts a contiguous range `[start, start+count)` +/// (used by RS_SliceRange). +/// +/// The algorithm works on C-order (row-major) layout: +/// - `outer_count`: product of shape dimensions before `dim_idx` +/// - `inner_size`: product of shape dimensions after `dim_idx` * elem_size +/// - `stride`: `shape[dim_idx] * inner_size` (bytes between outer elements) +/// +/// For each outer element, we copy `count * inner_size` bytes starting at +/// `start * inner_size` within that stride. +pub(crate) fn extract_slice( + band: &dyn BandRef, + dim_idx: usize, + start: u64, + count: u64, +) -> Result> { + let shape = band.shape(); + let elem_size = band.data_type().byte_size() as u64; + let data = band.contiguous_data()?; + + let outer_count: u64 = shape[..dim_idx].iter().product(); + let inner_size: u64 = shape[dim_idx + 1..].iter().product::() * elem_size; + let stride = shape[dim_idx] * inner_size; + let copy_size = (count * inner_size) as usize; + let offset_within_stride = start * inner_size; + + let total_output = (outer_count as usize) * copy_size; + let mut output = Vec::with_capacity(total_output); + + for outer in 0..outer_count { + let src_start = (outer * stride + offset_within_stride) as usize; + output.extend_from_slice(&data[src_start..src_start + copy_size]); + } + + Ok(output) +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::StructArray; + use arrow_schema::DataType; + use datafusion_common::ScalarValue; + use datafusion_expr::ScalarUDF; + use sedona_raster::array::RasterStructArray; + use sedona_raster::builder::RasterBuilder; + use sedona_raster::traits::RasterRef; + use sedona_schema::datatypes::RASTER; + use sedona_schema::raster::BandDataType; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; + + /// Build a single-row 3D raster with shape [time, height, width] and + /// sequential UInt8 data so we can verify slicing correctness. + fn build_3d_raster_sequential(time: u64, height: u64, width: u64) -> StructArray { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + builder + .start_band( + None, + &["time", "y", "x"], + &[time, height, width], + BandDataType::UInt8, + None, + None, + ) + .unwrap(); + let total = (time * height * width) as usize; + let data: Vec = (0..total).map(|i| i as u8).collect(); + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + builder.finish().unwrap() + } + + #[test] + fn slice_3d_on_time() { + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + // shape [time=3, y=4, x=5], sequential data 0..60 + let rasters = build_3d_raster_sequential(3, 4, 5); + let result = tester + .invoke_array_scalar_scalar(Arc::new(rasters), "time", 1_i64) + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + + // Should now be 2D: [y=4, x=5] + let band = raster.band(0).unwrap(); + assert_eq!(band.ndim(), 2); + assert_eq!(band.dim_names(), vec!["y", "x"]); + assert_eq!(band.shape(), &[4, 5]); + + // Data should be time slice 1: bytes 20..40 of original + let data = band.contiguous_data().unwrap(); + let expected: Vec = (20..40).collect(); + assert_eq!(data.as_ref(), &expected[..]); + } + + #[test] + fn slicerange_3d_on_time() { + let kernel = RsSliceRange {}; + let arg_types = vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + SedonaType::Arrow(DataType::Int64), + ]; + + // shape [time=3, y=4, x=5], sequential data 0..60 + let rasters = build_3d_raster_sequential(3, 4, 5); + let args = vec![ + ColumnarValue::Array(Arc::new(rasters)), + ColumnarValue::Scalar(ScalarValue::Utf8(Some("time".to_string()))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(0))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), + ]; + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + let result_struct = match result { + ColumnarValue::Array(arr) => arr, + _ => panic!("Expected array result"), + }; + let result_struct = result_struct + .as_any() + .downcast_ref::() + .unwrap(); + let raster_array = RasterStructArray::new(result_struct); + let raster = raster_array.get(0).unwrap(); + + // Should still be 3D: [time=2, y=4, x=5] + let band = raster.band(0).unwrap(); + assert_eq!(band.ndim(), 3); + assert_eq!(band.dim_names(), vec!["time", "y", "x"]); + assert_eq!(band.shape(), &[2, 4, 5]); + + // Data should be first 2 time slices: bytes 0..40 + let data = band.contiguous_data().unwrap(); + let expected: Vec = (0..40).collect(); + assert_eq!(data.as_ref(), &expected[..]); + } + + #[test] + fn slice_spatial_dim_error() { + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + let rasters = build_3d_raster_sequential(3, 4, 5); + + // Try to slice "x" + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters.clone()), "x", 0_i64); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("cannot slice spatial dimension"), + "Unexpected error: {err_msg}" + ); + + // Try to slice "y" + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters), "y", 0_i64); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("cannot slice spatial dimension"), + "Unexpected error: {err_msg}" + ); + } + + #[test] + fn slice_index_out_of_range() { + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + let rasters = build_3d_raster_sequential(3, 4, 5); + let result = tester.invoke_array_scalar_scalar(Arc::new(rasters), "time", 3_i64); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("out of range"), + "Unexpected error: {err_msg}" + ); + } + + #[test] + fn slice_null_raster() { + let udf: ScalarUDF = rs_slice_udf().into(); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int64), + ], + ); + + let rasters = generate_test_rasters(1, Some(0)).unwrap(); + let result = tester + .invoke_array_scalar_scalar(Arc::new(rasters), "time", 0_i64) + .unwrap(); + + let result_struct = result.as_any().downcast_ref::().unwrap(); + let raster_array = RasterStructArray::new(result_struct); + assert!(raster_array.is_null(0)); + } +} diff --git a/rust/sedona-raster-functions/src/rs_spatial_predicates.rs b/rust/sedona-raster-functions/src/rs_spatial_predicates.rs index b0eaa0574..d71d6a18b 100644 --- a/rust/sedona-raster-functions/src/rs_spatial_predicates.rs +++ b/rust/sedona-raster-functions/src/rs_spatial_predicates.rs @@ -377,8 +377,18 @@ const CONVEXHULL_WKB_SIZE: usize = 93; /// Create WKB for a convex hull polygon for the raster fn write_convexhull_wkb(raster: &dyn RasterRef, out: &mut impl std::io::Write) -> Result<()> { - let width = raster.metadata().width() as i64; - let height = raster.metadata().height() as i64; + let Some(width) = raster.width() else { + return Err(DataFusionError::Execution( + "Raster has no bands; cannot determine width".into(), + )); + }; + let Some(height) = raster.height() else { + return Err(DataFusionError::Execution( + "Raster has no bands; cannot determine height".into(), + )); + }; + let width = width as i64; + let height = height as i64; let (ulx, uly) = to_world_coordinate(raster, 0, 0); let (urx, ury) = to_world_coordinate(raster, width, 0); @@ -401,13 +411,12 @@ mod tests { use datafusion_expr::ScalarUDF; use rstest::rstest; use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; use sedona_schema::crs::deserialize_crs; use sedona_schema::crs::OGC_CRS84_PROJJSON; use sedona_schema::datatypes::Edges; use sedona_schema::datatypes::RASTER; use sedona_schema::datatypes::WKB_GEOMETRY; - use sedona_schema::raster::{BandDataType, StorageType}; + use sedona_schema::raster::BandDataType; use sedona_testing::compare::assert_array_equal; use sedona_testing::create::create_array as create_geom_array; use sedona_testing::rasters::generate_test_rasters; @@ -435,26 +444,10 @@ mod tests { /// If `crs` is `None`, the raster has no CRS. fn build_unit_raster(crs: Option<&str>) -> arrow_array::StructArray { let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 1, - height: 1, - upperleft_x: 0.0, - upperleft_y: 1.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&metadata, crs).unwrap(); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_raster_2d(1, 1, 0.0, 1.0, 1.0, -1.0, 0.0, 0.0, crs) .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); builder.band_data_writer().append_value([0u8]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); diff --git a/rust/sedona-raster-functions/src/rs_srid.rs b/rust/sedona-raster-functions/src/rs_srid.rs index a9b472aeb..614efa0b2 100644 --- a/rust/sedona-raster-functions/src/rs_srid.rs +++ b/rust/sedona-raster-functions/src/rs_srid.rs @@ -126,9 +126,8 @@ mod tests { use datafusion_common::ScalarValue; use datafusion_expr::ScalarUDF; use sedona_raster::builder::RasterBuilder; - use sedona_raster::traits::{BandMetadata, RasterMetadata}; use sedona_schema::datatypes::RASTER; - use sedona_schema::raster::{BandDataType, StorageType}; + use sedona_schema::raster::BandDataType; use sedona_testing::compare::assert_array_equal; use sedona_testing::rasters::generate_test_rasters; use sedona_testing::testers::ScalarUdfTester; @@ -224,26 +223,10 @@ mod tests { } fn append_1x1_raster_with_crs(builder: &mut RasterBuilder, crs: Option<&str>) { - let raster_metadata = RasterMetadata { - width: 1, - height: 1, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&raster_metadata, crs).unwrap(); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_raster_2d(1, 1, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, crs) .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); builder.band_data_writer().append_value([0u8]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); diff --git a/rust/sedona-raster/src/affine_transformation.rs b/rust/sedona-raster/src/affine_transformation.rs index ca6441e73..e84fb84a0 100644 --- a/rust/sedona-raster/src/affine_transformation.rs +++ b/rust/sedona-raster/src/affine_transformation.rs @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -use crate::traits::{MetadataRef, RasterRef}; +use crate::traits::RasterRef; use arrow_schema::ArrowError; -/// Pre-computed affine transformation coefficients extracted from raster metadata. +/// Pre-computed affine transformation coefficients. /// -/// Constructing this struct pays the cost of reading metadata once (which may involve -/// vtable dispatch for Arrow-backed rasters). Subsequent `transform` / `inv_transform` -/// calls are pure arithmetic with no virtual calls. +/// Constructing this struct pays the cost of reading the transform once. +/// Subsequent `transform` / `inv_transform` calls are pure arithmetic. +/// +/// The 6-element GDAL GeoTransform convention is: +/// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` #[derive(Debug, Clone)] pub struct AffineMatrix { pub offset_x: f64, @@ -34,16 +36,23 @@ pub struct AffineMatrix { } impl AffineMatrix { - /// Build an `AffineMatrix` from any `MetadataRef` implementer. + /// Build an `AffineMatrix` from a 6-element GDAL GeoTransform slice. + /// + /// Index mapping: `[0]=origin_x, [1]=scale_x, [2]=skew_x, [3]=origin_y, [4]=skew_y, [5]=scale_y` #[inline] - pub fn from_metadata(m: &dyn MetadataRef) -> Self { + pub fn from_transform(t: &[f64]) -> Self { + debug_assert!( + t.len() >= 6, + "transform slice must have at least 6 elements, got {}", + t.len() + ); Self { - offset_x: m.upper_left_x(), - offset_y: m.upper_left_y(), - scale_x: m.scale_x(), - scale_y: m.scale_y(), - skew_x: m.skew_x(), - skew_y: m.skew_y(), + offset_x: t[0], + scale_x: t[1], + skew_x: t[2], + offset_y: t[3], + skew_y: t[4], + scale_y: t[5], } } @@ -92,29 +101,28 @@ impl AffineMatrix { } } -/// Computes the rotation angle (in radians) of the raster based on its geotransform metadata. +/// Computes the rotation angle (in radians) of the raster based on its geotransform. #[inline] pub fn rotation(raster: &dyn RasterRef) -> f64 { - let metadata = raster.metadata(); - (-metadata.skew_x()).atan2(metadata.scale_x()) + let t = raster.transform(); + (-t[2]).atan2(t[1]) // skew_x=t[2], scale_x=t[1] } -/// Performs an affine transformation on the provided x and y coordinates based on the geotransform -/// data in the raster. +/// Performs an affine transformation on the provided x and y coordinates based on the geotransform. /// /// # Arguments -/// * `raster` - Reference to the raster containing metadata +/// * `raster` - Reference to the raster containing transform /// * `x` - X coordinate in pixel space (column) /// * `y` - Y coordinate in pixel space (row) #[inline] pub fn to_world_coordinate(raster: &dyn RasterRef, x: i64, y: i64) -> (f64, f64) { - AffineMatrix::from_metadata(raster.metadata()).transform(x as f64, y as f64) + AffineMatrix::from_transform(raster.transform()).transform(x as f64, y as f64) } /// Performs the inverse affine transformation to convert world coordinates back to raster pixel coordinates. /// /// # Arguments -/// * `raster` - Reference to the raster containing metadata +/// * `raster` - Reference to the raster containing transform /// * `world_x` - X coordinate in world space /// * `world_y` - Y coordinate in world space #[inline] @@ -124,139 +132,123 @@ pub fn to_raster_coordinate( world_y: f64, ) -> Result<(i64, i64), ArrowError> { let (rx, ry) = - AffineMatrix::from_metadata(raster.metadata()).inv_transform(world_x, world_y)?; + AffineMatrix::from_transform(raster.transform()).inv_transform(world_x, world_y)?; Ok((rx as i64, ry as i64)) } #[cfg(test)] mod tests { use super::*; - use crate::traits::{MetadataRef, RasterMetadata}; use approx::assert_relative_eq; use std::f64::consts::FRAC_1_SQRT_2; use std::f64::consts::PI; + /// Minimal RasterRef implementation for testing affine transforms. struct TestRaster { - metadata: RasterMetadata, + transform: [f64; 6], + } + + impl TestRaster { + fn new( + origin_x: f64, + origin_y: f64, + scale_x: f64, + scale_y: f64, + skew_x: f64, + skew_y: f64, + ) -> Self { + Self { + transform: [origin_x, scale_x, skew_x, origin_y, skew_y, scale_y], + } + } } impl RasterRef for TestRaster { - fn metadata(&self) -> &dyn MetadataRef { - &self.metadata + fn num_bands(&self) -> usize { + 0 + } + fn band(&self, _index: usize) -> Option> { + None + } + fn band_name(&self, _index: usize) -> Option<&str> { + None } fn crs(&self) -> Option<&str> { None } - fn bands(&self) -> &dyn crate::traits::BandsRef { - unimplemented!() + fn transform(&self) -> &[f64] { + &self.transform + } + fn x_dim(&self) -> &str { + "x" + } + fn y_dim(&self) -> &str { + "y" } } #[test] fn test_rotation() { - // 0 degree rotation -> gt[1.0, 0.0, 0.0, -1.0] - let raster = rotation_raster(1.0, -1.0, 0.0, 0.0); - let rot = rotation(&raster); - assert_eq!(rot, 0.0); - - // pi/2 -> gt[0.0, -1.0, 1.0, 0.0] - let raster = rotation_raster(0.0, 0.0, -1.0, 1.0); - let rot = rotation(&raster); - assert_relative_eq!(rot, PI / 2.0, epsilon = 1e-6); // 90 degrees in radians - - // pi/4 -> gt[0.70710678, -0.70710678, 0.70710678, 0.70710678] - let raster = rotation_raster(FRAC_1_SQRT_2, FRAC_1_SQRT_2, -FRAC_1_SQRT_2, FRAC_1_SQRT_2); - let rot = rotation(&raster); - assert_relative_eq!(rot, PI / 4.0, epsilon = 1e-6); // 45 degrees in radians - - // pi/3 -> gt[0.5, -0.866025, 0.866025, 0.5] - let raster = rotation_raster(0.5, 0.5, -0.866025, 0.866025); - let rot = rotation(&raster); - assert_relative_eq!(rot, PI / 3.0, epsilon = 1e-6); // 60 degrees in radians - - // pi -> gt[-1.0, 0.0, 0.0, -1.0] - let raster = rotation_raster(-1.0, -1.0, 0.0, 0.0); - let rot = rotation(&raster); - assert_relative_eq!(rot, -PI, epsilon = 1e-6); // 180 degrees in radians + // 0 degree rotation + let raster = TestRaster::new(0.0, 0.0, 1.0, -1.0, 0.0, 0.0); + assert_eq!(rotation(&raster), 0.0); + + // pi/2 + let raster = TestRaster::new(0.0, 0.0, 0.0, 0.0, -1.0, 1.0); + assert_relative_eq!(rotation(&raster), PI / 2.0, epsilon = 1e-6); + + // pi/4 + let raster = TestRaster::new( + 0.0, + 0.0, + FRAC_1_SQRT_2, + FRAC_1_SQRT_2, + -FRAC_1_SQRT_2, + FRAC_1_SQRT_2, + ); + assert_relative_eq!(rotation(&raster), PI / 4.0, epsilon = 1e-6); + + // pi/3 + let raster = TestRaster::new(0.0, 0.0, 0.5, 0.5, -0.866025, 0.866025); + assert_relative_eq!(rotation(&raster), PI / 3.0, epsilon = 1e-6); + + // pi + let raster = TestRaster::new(0.0, 0.0, -1.0, -1.0, 0.0, 0.0); + assert_relative_eq!(rotation(&raster), -PI, epsilon = 1e-6); } #[test] fn test_to_world_coordinate() { - // Test case with rotation/skew - let raster = TestRaster { - metadata: RasterMetadata { - width: 10, - height: 20, - upperleft_x: 100.0, - upperleft_y: 200.0, - scale_x: 1.0, - scale_y: -2.0, - skew_x: 0.25, - skew_y: 0.5, - }, - }; - - let (wx, wy) = to_world_coordinate(&raster, 0, 0); - assert_eq!((wx, wy), (100.0, 200.0)); - - let (wx, wy) = to_world_coordinate(&raster, 5, 10); - assert_eq!((wx, wy), (107.5, 182.5)); + let raster = TestRaster::new(100.0, 200.0, 1.0, -2.0, 0.25, 0.5); - let (wx, wy) = to_world_coordinate(&raster, 9, 19); - assert_eq!((wx, wy), (113.75, 166.5)); - - let (wx, wy) = to_world_coordinate(&raster, 1, 0); - assert_eq!((wx, wy), (101.0, 200.5)); - - let (wx, wy) = to_world_coordinate(&raster, 0, 1); - assert_eq!((wx, wy), (100.25, 198.0)); + assert_eq!(to_world_coordinate(&raster, 0, 0), (100.0, 200.0)); + assert_eq!(to_world_coordinate(&raster, 5, 10), (107.5, 182.5)); + assert_eq!(to_world_coordinate(&raster, 9, 19), (113.75, 166.5)); + assert_eq!(to_world_coordinate(&raster, 1, 0), (101.0, 200.5)); + assert_eq!(to_world_coordinate(&raster, 0, 1), (100.25, 198.0)); } #[test] fn test_to_raster_coordinate() { - // Test case with rotation/skew - let raster = TestRaster { - metadata: RasterMetadata { - width: 10, - height: 20, - upperleft_x: 100.0, - upperleft_y: 200.0, - scale_x: 1.0, - scale_y: -2.0, - skew_x: 0.25, - skew_y: 0.5, - }, - }; - - // Reverse of the to_world_coordinate tests - let (wx, wy) = to_raster_coordinate(&raster, 100.0, 200.0).unwrap(); - assert_eq!((wx, wy), (0, 0)); - - let (wx, wy) = to_raster_coordinate(&raster, 107.5, 182.5).unwrap(); - assert_eq!((wx, wy), (5, 10)); - - let (wx, wy) = to_raster_coordinate(&raster, 113.75, 166.5).unwrap(); - assert_eq!((wx, wy), (9, 19)); - - let (wx, wy) = to_raster_coordinate(&raster, 101.0, 200.5).unwrap(); - assert_eq!((wx, wy), (1, 0)); - - let (wx, wy) = to_raster_coordinate(&raster, 100.25, 198.0).unwrap(); - assert_eq!((wx, wy), (0, 1)); - - // Check error handling for zero determinant - let bad_raster = TestRaster { - metadata: RasterMetadata { - width: 10, - height: 20, - upperleft_x: 100.0, - upperleft_y: 200.0, - scale_x: 1.0, - scale_y: 0.0, - skew_x: 0.0, - skew_y: 0.0, - }, - }; + let raster = TestRaster::new(100.0, 200.0, 1.0, -2.0, 0.25, 0.5); + + assert_eq!(to_raster_coordinate(&raster, 100.0, 200.0).unwrap(), (0, 0)); + assert_eq!( + to_raster_coordinate(&raster, 107.5, 182.5).unwrap(), + (5, 10) + ); + assert_eq!( + to_raster_coordinate(&raster, 113.75, 166.5).unwrap(), + (9, 19) + ); + assert_eq!(to_raster_coordinate(&raster, 101.0, 200.5).unwrap(), (1, 0)); + assert_eq!( + to_raster_coordinate(&raster, 100.25, 198.0).unwrap(), + (0, 1) + ); + + // Zero determinant + let bad_raster = TestRaster::new(100.0, 200.0, 1.0, 0.0, 0.0, 0.0); let result = to_raster_coordinate(&bad_raster, 100.0, 200.0); assert!(result.is_err()); assert!(result @@ -266,21 +258,6 @@ mod tests { .contains("determinant is zero.")); } - fn rotation_raster(scale_x: f64, scale_y: f64, skew_x: f64, skew_y: f64) -> TestRaster { - TestRaster { - metadata: RasterMetadata { - width: 10, - height: 20, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x, - scale_y, - skew_x, - skew_y, - }, - } - } - fn test_affine() -> AffineMatrix { AffineMatrix { offset_x: 100.0, @@ -324,11 +301,6 @@ mod tests { }; let result = a.inv_transform(0.0, 0.0); assert!(result.is_err()); - assert!(result - .err() - .unwrap() - .to_string() - .contains("determinant is zero.")); } #[test] @@ -345,23 +317,14 @@ mod tests { } #[test] - fn test_affine_from_metadata() { - let m = RasterMetadata { - width: 10, - height: 20, - upperleft_x: 100.0, - upperleft_y: 200.0, - scale_x: 1.0, - scale_y: -2.0, - skew_x: 0.25, - skew_y: 0.5, - }; - let a = AffineMatrix::from_metadata(&m); + fn test_affine_from_transform() { + let t = [100.0, 1.0, 0.25, 200.0, 0.5, -2.0]; + let a = AffineMatrix::from_transform(&t); assert_eq!(a.offset_x, 100.0); - assert_eq!(a.offset_y, 200.0); assert_eq!(a.scale_x, 1.0); - assert_eq!(a.scale_y, -2.0); assert_eq!(a.skew_x, 0.25); + assert_eq!(a.offset_y, 200.0); assert_eq!(a.skew_y, 0.5); + assert_eq!(a.scale_y, -2.0); } } diff --git a/rust/sedona-raster/src/array.rs b/rust/sedona-raster/src/array.rs index 07a4bce04..e67ebaa39 100644 --- a/rust/sedona-raster/src/array.rs +++ b/rust/sedona-raster/src/array.rs @@ -15,484 +15,326 @@ // specific language governing permissions and limitations // under the License. +use std::borrow::Cow; + use arrow_array::{ - Array, BinaryArray, BinaryViewArray, Float64Array, ListArray, StringArray, StringViewArray, - StructArray, UInt32Array, UInt64Array, + Array, BinaryArray, BinaryViewArray, Float64Array, Int64Array, ListArray, StringArray, + StringViewArray, StructArray, UInt32Array, UInt64Array, }; use arrow_schema::ArrowError; -use crate::traits::{ - BandIterator, BandMetadataRef, BandRef, BandsRef, MetadataRef, RasterMetadata, RasterRef, -}; -use sedona_schema::raster::{ - band_indices, band_metadata_indices, metadata_indices, raster_indices, BandDataType, - StorageType, -}; +use crate::traits::{BandRef, NdBuffer, RasterRef}; +use sedona_schema::raster::{band_indices, raster_indices, BandDataType}; -/// Implement MetadataRef for RasterMetadata to allow direct use with builder -impl MetadataRef for RasterMetadata { - fn width(&self) -> u64 { - self.width - } - fn height(&self) -> u64 { - self.height - } - fn upper_left_x(&self) -> f64 { - self.upperleft_x - } - fn upper_left_y(&self) -> f64 { - self.upperleft_y - } - fn scale_x(&self) -> f64 { - self.scale_x - } - fn scale_y(&self) -> f64 { - self.scale_y - } - fn skew_x(&self) -> f64 { - self.skew_x - } - fn skew_y(&self) -> f64 { - self.skew_y - } -} +// --------------------------------------------------------------------------- +// Band implementation (Arrow-backed) +// --------------------------------------------------------------------------- -/// Implementation of MetadataRef for Arrow StructArray -struct MetadataRefImpl<'a> { - width_array: &'a UInt64Array, - height_array: &'a UInt64Array, - upper_left_x_array: &'a Float64Array, - upper_left_y_array: &'a Float64Array, - scale_x_array: &'a Float64Array, - scale_y_array: &'a Float64Array, - skew_x_array: &'a Float64Array, - skew_y_array: &'a Float64Array, - index: usize, +/// Arrow-backed implementation of BandRef for a single band within a raster. +struct BandRefImpl<'a> { + // Band metadata arrays (indexed by absolute band row) + dim_names_list: &'a ListArray, + dim_names_values: &'a StringArray, + shape_list: &'a ListArray, + shape_values: &'a UInt64Array, + datatype_array: &'a UInt32Array, + nodata_array: &'a BinaryArray, + strides_list: &'a ListArray, + strides_values: &'a Int64Array, + offset_array: &'a UInt64Array, + outdb_uri_array: &'a StringArray, + data_array: &'a BinaryViewArray, + /// Absolute row index within the flattened bands arrays + band_row: usize, } -impl<'a> MetadataRef for MetadataRefImpl<'a> { - #[inline(always)] - fn width(&self) -> u64 { - self.width_array.value(self.index) - } - - #[inline(always)] - fn height(&self) -> u64 { - self.height_array.value(self.index) - } - - #[inline(always)] - fn upper_left_x(&self) -> f64 { - self.upper_left_x_array.value(self.index) - } - - #[inline(always)] - fn upper_left_y(&self) -> f64 { - self.upper_left_y_array.value(self.index) - } - - #[inline(always)] - fn scale_x(&self) -> f64 { - self.scale_x_array.value(self.index) +impl<'a> BandRef for BandRefImpl<'a> { + fn ndim(&self) -> usize { + self.shape_list.value_length(self.band_row) as usize } - #[inline(always)] - fn scale_y(&self) -> f64 { - self.scale_y_array.value(self.index) + fn dim_names(&self) -> Vec<&str> { + let start = self.dim_names_list.value_offsets()[self.band_row] as usize; + let end = self.dim_names_list.value_offsets()[self.band_row + 1] as usize; + (start..end) + .map(|i| self.dim_names_values.value(i)) + .collect() } - #[inline(always)] - fn skew_x(&self) -> f64 { - self.skew_x_array.value(self.index) + fn shape(&self) -> &[u64] { + let start = self.shape_list.value_offsets()[self.band_row] as usize; + let end = self.shape_list.value_offsets()[self.band_row + 1] as usize; + &self.shape_values.values()[start..end] } - #[inline(always)] - fn skew_y(&self) -> f64 { - self.skew_y_array.value(self.index) + fn data_type(&self) -> BandDataType { + let value = self.datatype_array.value(self.band_row); + BandDataType::try_from_u32(value) + .unwrap_or_else(|| panic!("Unknown band data type: {value}")) } -} -/// Implementation of BandMetadataRef for Arrow StructArray -struct BandMetadataRefImpl<'a> { - nodata_array: &'a BinaryArray, - storage_type_array: &'a UInt32Array, - datatype_array: &'a UInt32Array, - outdb_url_array: &'a StringArray, - outdb_band_id_array: &'a UInt32Array, - band_index: usize, -} - -impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { - fn nodata_value(&self) -> Option<&[u8]> { - if self.nodata_array.is_null(self.band_index) { + fn nodata(&self) -> Option<&[u8]> { + if self.nodata_array.is_null(self.band_row) { None } else { - Some(self.nodata_array.value(self.band_index)) + Some(self.nodata_array.value(self.band_row)) } } - fn storage_type(&self) -> Result { - let value = self.storage_type_array.value(self.band_index); - let storage_type = match value { - 0 => StorageType::InDb, - 1 => StorageType::OutDbRef, - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Unknown storage type: {}", - value - ))) - } - }; - Ok(storage_type) - } - - fn data_type(&self) -> Result { - let value = self.datatype_array.value(self.band_index); - let band_data_type = match value { - 1 => BandDataType::UInt8, - 2 => BandDataType::UInt16, - 3 => BandDataType::Int16, - 4 => BandDataType::UInt32, - 5 => BandDataType::Int32, - 6 => BandDataType::Float32, - 7 => BandDataType::Float64, - 8 => BandDataType::UInt64, - 9 => BandDataType::Int64, - 10 => BandDataType::Int8, - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Unknown band data type: {}", - self.datatype_array.value(self.band_index) - ))) - } - }; - Ok(band_data_type) - } - - fn outdb_url(&self) -> Option<&str> { - if self.outdb_url_array.is_null(self.band_index) { - None - } else { - Some(self.outdb_url_array.value(self.band_index)) - } - } - - fn outdb_band_id(&self) -> Option { - if self.outdb_band_id_array.is_null(self.band_index) { + fn outdb_uri(&self) -> Option<&str> { + if self.outdb_uri_array.is_null(self.band_row) { None } else { - Some(self.outdb_band_id_array.value(self.band_index)) + Some(self.outdb_uri_array.value(self.band_row)) } } -} -/// Implementation of BandRef for accessing individual band data -struct BandRefImpl<'a> { - band_metadata: BandMetadataRefImpl<'a>, - band_data: &'a [u8], -} + fn nd_buffer(&self) -> Result, ArrowError> { + let strides_start = self.strides_list.value_offsets()[self.band_row] as usize; + let strides_end = self.strides_list.value_offsets()[self.band_row + 1] as usize; -impl<'a> BandRef for BandRefImpl<'a> { - fn metadata(&self) -> &dyn BandMetadataRef { - &self.band_metadata + Ok(NdBuffer { + buffer: self.data_array.value(self.band_row), + shape: self.shape(), + strides: &self.strides_values.values()[strides_start..strides_end], + offset: self.offset_array.value(self.band_row), + data_type: self.data_type(), + }) } - fn data(&self) -> &[u8] { - self.band_data + fn contiguous_data(&self) -> Result, ArrowError> { + // Phase 1: all data is contiguous, so always return Borrowed + Ok(Cow::Borrowed(self.data_array.value(self.band_row))) } } -/// Implementation of BandsRef for accessing all bands in a raster -struct BandsRefImpl<'a> { - bands_list: &'a ListArray, - raster_index: usize, - // Direct references to the metadata and data arrays - nodata_array: &'a BinaryArray, - storage_type_array: &'a UInt32Array, - datatype_array: &'a UInt32Array, - outdb_url_array: &'a StringArray, - outdb_band_id_array: &'a UInt32Array, - band_data_array: &'a BinaryViewArray, -} - -impl<'a> BandsRef for BandsRefImpl<'a> { - fn len(&self) -> usize { - self.bands_list.value_length(self.raster_index) as usize - } - - /// Get a specific band by number (1-based index) - fn band(&self, number: usize) -> Result, ArrowError> { - if number == 0 { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid band number {number}: band numbers must be 1-based" - ))); - } - // By convention, band numbers are 1-based. - // Convert to zero-based index. - let index = number - 1; - if index >= self.len() { - return Err(ArrowError::InvalidArgumentError(format!( - "Band number {} is out of range: this raster has {} bands", - number, - self.len() - ))); - } - - let start = self.bands_list.value_offsets()[self.raster_index] as usize; - let band_row = start + index; - - let band_metadata = BandMetadataRefImpl { - nodata_array: self.nodata_array, - storage_type_array: self.storage_type_array, - datatype_array: self.datatype_array, - outdb_url_array: self.outdb_url_array, - outdb_band_id_array: self.outdb_band_id_array, - band_index: band_row, - }; - - let band_data = self.band_data_array.value(band_row); - - Ok(Box::new(BandRefImpl { - band_metadata, - band_data, - })) - } - - fn iter(&self) -> Box + '_> { - Box::new(BandIteratorImpl { - bands: self, - current: 1, // Start at 1 for 1-based band numbering - }) - } -} +// --------------------------------------------------------------------------- +// Raster implementation (Arrow-backed) +// --------------------------------------------------------------------------- -/// Concrete implementation of BandIterator trait -pub struct BandIteratorImpl<'a> { - bands: &'a dyn BandsRef, - current: usize, +/// Arrow-backed implementation of RasterRef for a single raster row. +pub struct RasterRefImpl<'a> { + raster_struct_array: &'a RasterStructArray<'a>, + raster_index: usize, } -impl<'a> Iterator for BandIteratorImpl<'a> { - type Item = Box; - - fn next(&mut self) -> Option { - // current is 1-based, compare against len() + 1 - if self.current <= self.bands.len() { - let band = self.bands.band(self.current).ok(); // Convert Result to Option - self.current += 1; - band - } else { +impl<'a> RasterRefImpl<'a> { + /// Returns the raw CRS string reference with the array's lifetime. + pub fn crs_str_ref(&self) -> Option<&'a str> { + if self + .raster_struct_array + .crs_array + .is_null(self.raster_index) + { None + } else { + Some(self.raster_struct_array.crs_array.value(self.raster_index)) } } - - fn size_hint(&self) -> (usize, Option) { - // current is 1-based, so remaining calculation needs adjustment - let remaining = self.bands.len().saturating_sub(self.current - 1); - (remaining, Some(remaining)) - } } -impl<'a> BandIterator<'a> for BandIteratorImpl<'a> { - fn len(&self) -> usize { - // current is 1-based, so remaining calculation needs adjustment - self.bands.len().saturating_sub(self.current - 1) +impl<'a> RasterRef for RasterRefImpl<'a> { + fn num_bands(&self) -> usize { + self.raster_struct_array + .bands_list + .value_length(self.raster_index) as usize } -} - -impl ExactSizeIterator for BandIteratorImpl<'_> {} -/// Implementation of RasterRef for complete raster access -pub struct RasterRefImpl<'a> { - metadata: MetadataRefImpl<'a>, - crs: &'a StringViewArray, - bands: BandsRefImpl<'a>, -} - -impl<'a> RasterRefImpl<'a> { - /// Creates a new RasterRefImpl that provides zero-copy access to the raster at the specified index. - /// - /// # Arguments - /// * `raster_struct_array` - The Arrow StructArray containing raster data - /// * `raster_index` - The zero-based index of the raster to access - #[inline(always)] - pub fn new(raster_struct_array: &RasterStructArray<'a>, raster_index: usize) -> Self { - let metadata = MetadataRefImpl { - width_array: raster_struct_array.width_array, - height_array: raster_struct_array.height_array, - upper_left_x_array: raster_struct_array.upper_left_x_array, - upper_left_y_array: raster_struct_array.upper_left_y_array, - scale_x_array: raster_struct_array.scale_x_array, - scale_y_array: raster_struct_array.scale_y_array, - skew_x_array: raster_struct_array.skew_x_array, - skew_y_array: raster_struct_array.skew_y_array, - index: raster_index, - }; - - let bands = BandsRefImpl { - bands_list: raster_struct_array.bands_list, - raster_index, - nodata_array: raster_struct_array.band_nodata_array, - storage_type_array: raster_struct_array.band_storage_type_array, - datatype_array: raster_struct_array.band_datatype_array, - outdb_url_array: raster_struct_array.band_outdb_url_array, - outdb_band_id_array: raster_struct_array.band_outdb_band_id_array, - band_data_array: raster_struct_array.band_data_array, - }; - - Self { - metadata, - crs: raster_struct_array.crs, - bands, + fn band(&self, index: usize) -> Option> { + if index >= self.num_bands() { + return None; } + let start = self.raster_struct_array.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + Some(Box::new(BandRefImpl { + dim_names_list: self.raster_struct_array.band_dim_names_list, + dim_names_values: self.raster_struct_array.band_dim_names_values, + shape_list: self.raster_struct_array.band_shape_list, + shape_values: self.raster_struct_array.band_shape_values, + datatype_array: self.raster_struct_array.band_datatype_array, + nodata_array: self.raster_struct_array.band_nodata_array, + strides_list: self.raster_struct_array.band_strides_list, + strides_values: self.raster_struct_array.band_strides_values, + offset_array: self.raster_struct_array.band_offset_array, + outdb_uri_array: self.raster_struct_array.band_outdb_uri_array, + data_array: self.raster_struct_array.band_data_array, + band_row, + })) } - pub fn crs_str_ref(&self) -> Option<&'a str> { - if self.crs.is_null(self.bands.raster_index) { + fn band_name(&self, index: usize) -> Option<&str> { + if index >= self.num_bands() { + return None; + } + let start = self.raster_struct_array.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + if self.raster_struct_array.band_name_array.is_null(band_row) { None } else { - Some(self.crs.value(self.bands.raster_index)) + Some(self.raster_struct_array.band_name_array.value(band_row)) } } -} -impl<'a> RasterRef for RasterRefImpl<'a> { - #[inline(always)] - fn metadata(&self) -> &dyn MetadataRef { - &self.metadata - } - - #[inline(always)] fn crs(&self) -> Option<&str> { self.crs_str_ref() } - #[inline(always)] - fn bands(&self) -> &dyn BandsRef { - &self.bands + fn transform(&self) -> &[f64] { + let start = + self.raster_struct_array.transform_list.value_offsets()[self.raster_index] as usize; + let end = + self.raster_struct_array.transform_list.value_offsets()[self.raster_index + 1] as usize; + debug_assert!( + end - start >= 6, + "transform list must have at least 6 elements for raster {}, got {}", + self.raster_index, + end - start + ); + &self.raster_struct_array.transform_values.values()[start..start + 6] + } + + fn x_dim(&self) -> &str { + self.raster_struct_array + .x_dim_array + .value(self.raster_index) + } + + fn y_dim(&self) -> &str { + self.raster_struct_array + .y_dim_array + .value(self.raster_index) } } -/// Access rasters from the Arrow StructArray +// --------------------------------------------------------------------------- +// RasterStructArray — efficient columnar access to rasters +// --------------------------------------------------------------------------- + +/// Access rasters from the Arrow StructArray. /// -/// This provides efficient, zero-copy access to raster data stored in Arrow format. +/// Provides efficient, zero-copy access to N-D raster data stored in Arrow format. pub struct RasterStructArray<'a> { raster_array: &'a StructArray, - width_array: &'a UInt64Array, - height_array: &'a UInt64Array, - upper_left_x_array: &'a Float64Array, - upper_left_y_array: &'a Float64Array, - scale_x_array: &'a Float64Array, - scale_y_array: &'a Float64Array, - skew_x_array: &'a Float64Array, - skew_y_array: &'a Float64Array, - crs: &'a StringViewArray, + // Top-level fields + crs_array: &'a StringViewArray, + transform_list: &'a ListArray, + transform_values: &'a Float64Array, + x_dim_array: &'a StringViewArray, + y_dim_array: &'a StringViewArray, bands_list: &'a ListArray, - band_nodata_array: &'a BinaryArray, - band_storage_type_array: &'a UInt32Array, + // Band-level fields (flattened across all bands in all rasters) + band_name_array: &'a StringArray, + band_dim_names_list: &'a ListArray, + band_dim_names_values: &'a StringArray, + band_shape_list: &'a ListArray, + band_shape_values: &'a UInt64Array, band_datatype_array: &'a UInt32Array, - band_outdb_url_array: &'a StringArray, - band_outdb_band_id_array: &'a UInt32Array, + band_nodata_array: &'a BinaryArray, + band_strides_list: &'a ListArray, + band_strides_values: &'a Int64Array, + band_offset_array: &'a UInt64Array, + band_outdb_uri_array: &'a StringArray, band_data_array: &'a BinaryViewArray, } impl<'a> RasterStructArray<'a> { - /// Create a new RasterStructArray from an existing StructArray + /// Create a new RasterStructArray from an existing StructArray. #[inline] pub fn new(raster_array: &'a StructArray) -> Self { - let crs = raster_array + // Top-level fields + let crs_array = raster_array .column(raster_indices::CRS) .as_any() .downcast_ref::() .unwrap(); - - // Extract the metadata arrays for direct access - let metadata_struct = raster_array - .column(raster_indices::METADATA) + let transform_list = raster_array + .column(raster_indices::TRANSFORM) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let width_array = metadata_struct - .column(metadata_indices::WIDTH) + let transform_values = transform_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let height_array = metadata_struct - .column(metadata_indices::HEIGHT) + let x_dim_array = raster_array + .column(raster_indices::X_DIM) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let upper_left_x_array = metadata_struct - .column(metadata_indices::UPPERLEFT_X) + let y_dim_array = raster_array + .column(raster_indices::Y_DIM) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let upper_left_y_array = metadata_struct - .column(metadata_indices::UPPERLEFT_Y) + + // Bands list and nested struct + let bands_list = raster_array + .column(raster_indices::BANDS) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let scale_x_array = metadata_struct - .column(metadata_indices::SCALE_X) + let bands_struct = bands_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let scale_y_array = metadata_struct - .column(metadata_indices::SCALE_Y) + + // Band-level fields + let band_name_array = bands_struct + .column(band_indices::NAME) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let skew_x_array = metadata_struct - .column(metadata_indices::SKEW_X) + let band_dim_names_list = bands_struct + .column(band_indices::DIM_NAMES) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let skew_y_array = metadata_struct - .column(metadata_indices::SKEW_Y) + let band_dim_names_values = band_dim_names_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - - // Extract the band arrays for direct access - let bands_list = raster_array - .column(raster_indices::BANDS) + let band_shape_list = bands_struct + .column(band_indices::SHAPE) .as_any() .downcast_ref::() .unwrap(); - let bands_struct = bands_list + let band_shape_values = band_shape_list .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_metadata_struct = bands_struct - .column(band_indices::METADATA) + let band_datatype_array = bands_struct + .column(band_indices::DATA_TYPE) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_nodata_array = band_metadata_struct - .column(band_metadata_indices::NODATAVALUE) + let band_nodata_array = bands_struct + .column(band_indices::NODATA) .as_any() .downcast_ref::() .unwrap(); - let band_storage_type_array = band_metadata_struct - .column(band_metadata_indices::STORAGE_TYPE) + let band_strides_list = bands_struct + .column(band_indices::STRIDES) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_datatype_array = band_metadata_struct - .column(band_metadata_indices::DATATYPE) + let band_strides_values = band_strides_list + .values() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_outdb_url_array = band_metadata_struct - .column(band_metadata_indices::OUTDB_URL) + let band_offset_array = bands_struct + .column(band_indices::OFFSET) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); - let band_outdb_band_id_array = band_metadata_struct - .column(band_metadata_indices::OUTDB_BAND_ID) + let band_outdb_uri_array = bands_struct + .column(band_indices::OUTDB_URI) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); let band_data_array = bands_struct .column(band_indices::DATA) @@ -502,328 +344,56 @@ impl<'a> RasterStructArray<'a> { Self { raster_array, - width_array, - height_array, - upper_left_x_array, - upper_left_y_array, - scale_x_array, - scale_y_array, - skew_x_array, - skew_y_array, - crs, + crs_array, + transform_list, + transform_values, + x_dim_array, + y_dim_array, bands_list, - band_nodata_array, - band_storage_type_array, + band_name_array, + band_dim_names_list, + band_dim_names_values, + band_shape_list, + band_shape_values, band_datatype_array, - band_outdb_url_array, - band_outdb_band_id_array, + band_nodata_array, + band_strides_list, + band_strides_values, + band_offset_array, + band_outdb_uri_array, band_data_array, } } - /// Get the total number of rasters in the array + /// Get the total number of rasters in the array. #[inline(always)] pub fn len(&self) -> usize { self.raster_array.len() } - /// Check if the array is empty + /// Check if the array is empty. #[inline(always)] pub fn is_empty(&self) -> bool { self.raster_array.is_empty() } - /// Get a specific raster by index without consuming the iterator + /// Get a specific raster by index. #[inline(always)] - pub fn get(&self, index: usize) -> Result, ArrowError> { + pub fn get(&'a self, index: usize) -> Result, ArrowError> { if index >= self.raster_array.len() { return Err(ArrowError::InvalidArgumentError(format!( "Invalid raster index: {index}" ))); } - - Ok(RasterRefImpl::new(self, index)) + Ok(RasterRefImpl { + raster_struct_array: self, + raster_index: index, + }) } + /// Check if a raster at the given index is null. #[inline(always)] pub fn is_null(&self, index: usize) -> bool { self.raster_array.is_null(index) } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::builder::RasterBuilder; - use crate::traits::{BandMetadata, RasterMetadata}; - use arrow_schema::DataType; - use sedona_schema::raster::{BandDataType, StorageType}; - use sedona_testing::rasters::generate_test_rasters; - - #[test] - fn test_array_basic_functionality() { - // Create a simple raster for testing using the correct API - let mut builder = RasterBuilder::new(10); // capacity - - let metadata = RasterMetadata { - width: 10, - height: 10, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - let epsg4326 = "EPSG:4326"; - - builder.start_raster(&metadata, Some(epsg4326)).unwrap(); - - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - - // Add a single band with some test data using the correct API - builder.start_band(band_metadata.clone()).unwrap(); - let test_data = vec![1u8; 100]; // 10x10 raster with value 1 - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); - let result = builder.finish_raster(); - assert!(result.is_ok()); - - let raster_array = builder.finish().unwrap(); - - // Test the array - let rasters = RasterStructArray::new(&raster_array); - - assert_eq!(rasters.len(), 1); - assert!(!rasters.is_empty()); - - let raster = rasters.get(0).unwrap(); - let metadata = raster.metadata(); - - assert_eq!(metadata.width(), 10); - assert_eq!(metadata.height(), 10); - assert_eq!(metadata.scale_x(), 1.0); - assert_eq!(metadata.scale_y(), -1.0); - - let bands = raster.bands(); - assert_eq!(bands.len(), 1); - assert!(!bands.is_empty()); - - // Access band with 1-based band_number - let band = bands.band(1).unwrap(); - assert_eq!(band.data().len(), 100); - assert_eq!(band.data()[0], 1u8); - - let band_meta = band.metadata(); - assert_eq!(band_meta.storage_type().unwrap(), StorageType::InDb); - assert_eq!(band_meta.data_type().unwrap(), BandDataType::UInt8); - - let crs = raster.crs().unwrap(); - assert_eq!(crs, epsg4326); - - // Test array over bands - let band_iter: Vec<_> = bands.iter().collect(); - assert_eq!(band_iter.len(), 1); - } - - #[test] - fn test_multi_band_array() { - let mut builder = RasterBuilder::new(3); - - let metadata = RasterMetadata { - width: 5, - height: 5, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - builder.start_raster(&metadata, None).unwrap(); - - // Add three bands using the correct API - for band_idx in 0..3 { - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - - builder.start_band(band_metadata).unwrap(); - let test_data = vec![band_idx as u8; 25]; // 5x5 raster - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); - } - - let result = builder.finish_raster(); - assert!(result.is_ok()); - - let raster_array = builder.finish().unwrap(); - - let rasters = RasterStructArray::new(&raster_array); - let raster = rasters.get(0).unwrap(); - let bands = raster.bands(); - - assert_eq!(bands.len(), 3); - - // Test each band has different data - // Use 1-based band numbers - for i in 0..3 { - // Access band with 1-based band_number - let band = bands.band(i + 1).unwrap(); - let expected_value = i as u8; - assert!(band.data().iter().all(|&x| x == expected_value)); - } - - // Test array - let band_values: Vec = bands - .iter() - .enumerate() - .map(|(i, band)| { - assert_eq!(band.data()[0], i as u8); - band.data()[0] - }) - .collect(); - - assert_eq!(band_values, vec![0, 1, 2]); - } - - #[test] - fn test_raster_is_null() { - let raster_array = generate_test_rasters(2, Some(1)).unwrap(); - let rasters = RasterStructArray::new(&raster_array); - assert_eq!(rasters.len(), 2); - assert!(!rasters.is_null(0)); - assert!(rasters.is_null(1)); - } - - /// Test that `data_type()` and `storage_type()` return `Err` for invalid values - /// instead of panicking. - #[test] - fn test_invalid_band_metadata_returns_err() { - use arrow_buffer::{OffsetBuffer, ScalarBuffer}; - use sedona_schema::raster::RasterSchema; - use std::sync::Arc; - - // Build a valid single-band raster first - let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&metadata, None).unwrap(); - let band_meta = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - builder.start_band(band_meta).unwrap(); - builder.band_data_writer().append_value([1u8; 4]); - builder.finish_band().unwrap(); - builder.finish_raster().unwrap(); - let valid_array = builder.finish().unwrap(); - - // Extract original columns from the valid raster - let metadata_col = valid_array.column(raster_indices::METADATA).clone(); - let crs_col = valid_array.column(raster_indices::CRS).clone(); - let bands_list = valid_array - .column(raster_indices::BANDS) - .as_any() - .downcast_ref::() - .unwrap(); - let bands_struct = bands_list - .values() - .as_any() - .downcast_ref::() - .unwrap(); - let orig_band_meta_struct = bands_struct - .column(band_indices::METADATA) - .as_any() - .downcast_ref::() - .unwrap(); - let band_data_col = bands_struct.column(band_indices::DATA).clone(); - - // Build tampered band metadata with invalid storage_type=99 and datatype=99 - let DataType::Struct(band_metadata_fields) = RasterSchema::band_metadata_type() else { - panic!("Expected struct type for band metadata"); - }; - let tampered_band_metadata = StructArray::new( - band_metadata_fields, - vec![ - orig_band_meta_struct - .column(band_metadata_indices::NODATAVALUE) - .clone(), - Arc::new(UInt32Array::from(vec![99u32])), // invalid storage_type - Arc::new(UInt32Array::from(vec![99u32])), // invalid datatype - orig_band_meta_struct - .column(band_metadata_indices::OUTDB_URL) - .clone(), - orig_band_meta_struct - .column(band_metadata_indices::OUTDB_BAND_ID) - .clone(), - ], - None, - ); - - // Rebuild band struct - let DataType::Struct(band_fields) = RasterSchema::band_type() else { - panic!("Expected struct type for band"); - }; - let tampered_band_struct = StructArray::new( - band_fields, - vec![Arc::new(tampered_band_metadata), band_data_col], - None, - ); - - // Rebuild bands list - let DataType::List(band_field) = RasterSchema::bands_type() else { - panic!("Expected list type for bands"); - }; - let tampered_bands_list = ListArray::new( - band_field, - OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 1])), - Arc::new(tampered_band_struct), - None, - ); - - // Rebuild the top-level raster struct - let tampered_raster = StructArray::new( - RasterSchema::fields(), - vec![metadata_col, crs_col, Arc::new(tampered_bands_list)], - None, - ); - - // Read back and verify that data_type() and storage_type() return Err - let rasters = RasterStructArray::new(&tampered_raster); - let raster = rasters.get(0).unwrap(); - let band = raster.bands().band(1).unwrap(); - let band_meta = band.metadata(); - - let storage_err = band_meta.storage_type().unwrap_err(); - assert!(storage_err.to_string().contains("Unknown storage type: 99")); - - let data_type_err = band_meta.data_type().unwrap_err(); - assert!(data_type_err - .to_string() - .contains("Unknown band data type: 99")); - } -} diff --git a/rust/sedona-raster/src/builder.rs b/rust/sedona-raster/src/builder.rs index 3db236cb4..b0d358a94 100644 --- a/rust/sedona-raster/src/builder.rs +++ b/rust/sedona-raster/src/builder.rs @@ -17,325 +17,412 @@ use arrow_array::{ builder::{ - BinaryBuilder, BinaryViewBuilder, BooleanBuilder, Float64Builder, StringBuilder, - StringViewBuilder, UInt32Builder, UInt64Builder, + ArrayBuilder, BinaryBuilder, BinaryViewBuilder, BooleanBuilder, Float64Builder, + Int64Builder, StringBuilder, StringViewBuilder, UInt32Builder, UInt64Builder, }, Array, ArrayRef, ListArray, StructArray, }; use arrow_buffer::{OffsetBuffer, ScalarBuffer}; -use arrow_schema::{ArrowError, DataType}; +use arrow_schema::ArrowError; use std::sync::Arc; +use sedona_schema::raster::BandDataType; use sedona_schema::raster::RasterSchema; -use crate::traits::{BandMetadata, MetadataRef}; +use arrow_schema::DataType; -/// Builder for constructing raster arrays with zero-copy band data writing +/// Builder for constructing N-D raster arrays. /// -/// Required steps to build a raster: -/// 1. Create a RasterBuilder with a specified capacity -/// 2. For each raster to add: -/// - Call `start_raster` with the appropriate metadata, CRS -/// - For each band in the raster: -/// - Call `start_band` with the band metadata -/// - Use `band_data_writer` to get a BinaryViewBuilder and write the band data -/// - Call `finish_band` to complete the band -/// - Call `finish_raster` to complete the raster -/// 3. After all rasters are added, call `finish` to get the final StructArray +/// # Usage /// -/// Example usage: /// ``` -/// use sedona_raster::traits::{RasterMetadata, BandMetadata}; -/// use sedona_schema::raster::{StorageType, BandDataType}; /// use sedona_raster::builder::RasterBuilder; +/// use sedona_schema::raster::BandDataType; /// /// let mut builder = RasterBuilder::new(1); -/// let metadata = RasterMetadata { -/// width: 100, height: 100, -/// upperleft_x: 0.0, upperleft_y: 0.0, -/// scale_x: 1.0, scale_y: -1.0, -/// skew_x: 0.0, skew_y: 0.0, -/// }; -/// // Start a raster from RasterMetadata struct -/// builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); /// -/// // Add a band: -/// let band_metadata = BandMetadata { -/// nodata_value: Some(vec![0u8]), -/// storage_type: StorageType::InDb, -/// datatype: BandDataType::UInt8, -/// outdb_url: None, -/// outdb_band_id: None, -/// }; -/// builder.start_band(band_metadata).unwrap(); -/// let band_writer = builder.band_data_writer(); -/// band_writer.append_value(&vec![/* band data bytes */]); -/// builder.finish_band().unwrap(); +/// // 2D raster convenience: sets transform, x_dim="x", y_dim="y" +/// builder.start_raster_2d(100, 100, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, Some("EPSG:4326")).unwrap(); /// -/// // Finish the raster +/// // 2D band convenience: sets dim_names=["y","x"], shape=[h,w], contiguous strides +/// builder.start_band_2d(BandDataType::UInt8, Some(&[0u8])).unwrap(); +/// builder.band_data_writer().append_value(&vec![0u8; 10000]); +/// builder.finish_band().unwrap(); /// builder.finish_raster().unwrap(); /// -/// // Finish building and get the StructArray /// let raster_array = builder.finish().unwrap(); /// ``` pub struct RasterBuilder { - // Metadata fields - width: UInt64Builder, - height: UInt64Builder, - upper_left_x: Float64Builder, - upper_left_y: Float64Builder, - scale_x: Float64Builder, - scale_y: Float64Builder, - skew_x: Float64Builder, - skew_y: Float64Builder, - - // CRS field + // Top-level raster fields crs: StringViewBuilder, - - // Band metadata fields - band_nodata: BinaryBuilder, - band_storage_type: UInt32Builder, + transform_values: Float64Builder, + transform_offsets: Vec, + x_dim: StringViewBuilder, + y_dim: StringViewBuilder, + + // Band fields (flattened across all bands) + band_name: StringBuilder, + band_dim_names_values: StringBuilder, + band_dim_names_offsets: Vec, + band_shape_values: UInt64Builder, + band_shape_offsets: Vec, band_datatype: UInt32Builder, - band_outdb_url: StringBuilder, - band_outdb_band_id: UInt32Builder, - - // Band data field + band_nodata: BinaryBuilder, + band_strides_values: Int64Builder, + band_strides_offsets: Vec, + band_offset: UInt64Builder, + band_outdb_uri: StringBuilder, band_data: BinaryViewBuilder, // List structure tracking band_offsets: Vec, // Track where each raster's bands start/end current_band_count: i32, // Track bands in current raster - raster_validity: BooleanBuilder, // Track which rasters are null + // Current raster state (needed for start_band_2d) + current_width: u64, + current_height: u64, + + // Track band_data count at the start of each band for finish_band validation + band_data_count_at_start: usize, + + raster_validity: BooleanBuilder, } impl RasterBuilder { - /// Create a new raster builder with the specified capacity + /// Create a new raster builder with the specified capacity. pub fn new(capacity: usize) -> Self { Self { - // Metadata builders - width: UInt64Builder::with_capacity(capacity), - height: UInt64Builder::with_capacity(capacity), - upper_left_x: Float64Builder::with_capacity(capacity), - upper_left_y: Float64Builder::with_capacity(capacity), - scale_x: Float64Builder::with_capacity(capacity), - scale_y: Float64Builder::with_capacity(capacity), - skew_x: Float64Builder::with_capacity(capacity), - skew_y: Float64Builder::with_capacity(capacity), - - // CRS builder crs: StringViewBuilder::with_capacity(capacity), - - // Band builders - estimate some bands per raster - // The capacity is at raster level, but each raster has multiple bands and - // are large. We may want to add an optional parameter to control expected - // bands per raster or even band size in the future - band_nodata: BinaryBuilder::with_capacity(capacity, capacity), - band_storage_type: UInt32Builder::with_capacity(capacity), + transform_values: Float64Builder::with_capacity(capacity * 6), + transform_offsets: vec![0], + x_dim: StringViewBuilder::with_capacity(capacity), + y_dim: StringViewBuilder::with_capacity(capacity), + + band_name: StringBuilder::with_capacity(capacity, capacity), + band_dim_names_values: StringBuilder::with_capacity(capacity * 2, capacity * 4), + band_dim_names_offsets: vec![0], + band_shape_values: UInt64Builder::with_capacity(capacity * 2), + band_shape_offsets: vec![0], band_datatype: UInt32Builder::with_capacity(capacity), - band_outdb_url: StringBuilder::with_capacity(capacity, capacity), - band_outdb_band_id: UInt32Builder::with_capacity(capacity), + band_nodata: BinaryBuilder::with_capacity(capacity, capacity), + band_strides_values: Int64Builder::with_capacity(capacity * 2), + band_strides_offsets: vec![0], + band_offset: UInt64Builder::with_capacity(capacity), + band_outdb_uri: StringBuilder::with_capacity(capacity, capacity), band_data: BinaryViewBuilder::with_capacity(capacity), - // List tracking band_offsets: vec![0], current_band_count: 0, + current_width: 0, + current_height: 0, + + band_data_count_at_start: 0, - // Raster-level validity (keeps track of null rasters) raster_validity: BooleanBuilder::with_capacity(capacity), } } - /// Start a new raster with metadata and optional CRS + /// Start a new raster with explicit N-D parameters. + /// + /// `transform` must be a 6-element GDAL GeoTransform: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` pub fn start_raster( &mut self, - metadata: &dyn MetadataRef, + transform: &[f64; 6], + x_dim: &str, + y_dim: &str, crs: Option<&str>, ) -> Result<(), ArrowError> { - self.append_metadata_from_ref(metadata)?; - self.append_crs(crs)?; + // Transform + for &v in transform { + self.transform_values.append_value(v); + } + let next = *self.transform_offsets.last().unwrap() + 6; + self.transform_offsets.push(next); + + // Spatial dim names + self.x_dim.append_value(x_dim); + self.y_dim.append_value(y_dim); + + // CRS + match crs { + Some(crs_data) => self.crs.append_value(crs_data), + None => self.crs.append_null(), + } - // Reset band count for this raster self.current_band_count = 0; + self.current_width = 0; + self.current_height = 0; Ok(()) } - /// Start a new band - this must be called before writing band data - pub fn start_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { - // Append band metadata - match band_metadata.nodata_value { - Some(nodata) => self.band_nodata.append_value(&nodata), - None => self.band_nodata.append_null(), + /// Convenience: start a 2D raster with the legacy 8-parameter interface. + /// + /// Sets `x_dim="x"`, `y_dim="y"`, and builds the 6-element GDAL transform + /// from the individual parameters. + #[allow(clippy::too_many_arguments)] + pub fn start_raster_2d( + &mut self, + width: u64, + height: u64, + origin_x: f64, + origin_y: f64, + scale_x: f64, + scale_y: f64, + skew_x: f64, + skew_y: f64, + crs: Option<&str>, + ) -> Result<(), ArrowError> { + let transform = [origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]; + self.start_raster(&transform, "x", "y", crs)?; + self.current_width = width; + self.current_height = height; + Ok(()) + } + + /// Start a new band with explicit N-D parameters. + pub fn start_band( + &mut self, + name: Option<&str>, + dim_names: &[&str], + shape: &[u64], + data_type: BandDataType, + nodata: Option<&[u8]>, + outdb_uri: Option<&str>, + ) -> Result<(), ArrowError> { + // Name + match name { + Some(n) => self.band_name.append_value(n), + None => self.band_name.append_null(), + } + + // Dim names + for dn in dim_names { + self.band_dim_names_values.append_value(dn); } + let next = *self.band_dim_names_offsets.last().unwrap() + dim_names.len() as i32; + self.band_dim_names_offsets.push(next); - self.band_storage_type - .append_value(band_metadata.storage_type as u32); - self.band_datatype - .append_value(band_metadata.datatype as u32); + // Shape + for &s in shape { + self.band_shape_values.append_value(s); + } + let next = *self.band_shape_offsets.last().unwrap() + shape.len() as i32; + self.band_shape_offsets.push(next); + + // Data type + self.band_datatype.append_value(data_type as u32); - match band_metadata.outdb_url { - Some(url) => self.band_outdb_url.append_value(&url), - None => self.band_outdb_url.append_null(), + // Nodata + match nodata { + Some(nodata_bytes) => self.band_nodata.append_value(nodata_bytes), + None => self.band_nodata.append_null(), } - match band_metadata.outdb_band_id { - Some(band_id) => self.band_outdb_band_id.append_value(band_id), - None => self.band_outdb_band_id.append_null(), + // Strides: standard C-order contiguous strides + let elem_size = data_type.byte_size() as i64; + let ndim = shape.len(); + let mut strides = vec![0i64; ndim]; + if ndim > 0 { + strides[ndim - 1] = elem_size; + for i in (0..ndim - 1).rev() { + strides[i] = strides[i + 1] * shape[i + 1] as i64; + } + } + for &s in &strides { + self.band_strides_values.append_value(s); + } + let next = *self.band_strides_offsets.last().unwrap() + ndim as i32; + self.band_strides_offsets.push(next); + + // Offset (always 0 in Phase 1) + self.band_offset.append_value(0); + + // OutDb URI + match outdb_uri { + Some(uri) => self.band_outdb_uri.append_value(uri), + None => self.band_outdb_uri.append_null(), } self.current_band_count += 1; + self.band_data_count_at_start = self.band_data.len(); Ok(()) } - /// Get direct access to the BinaryViewBuilder for writing the current band's data - /// Must be called after start_band() to write data to the current band + /// Convenience: start a 2D band with `dim_names=["y","x"]` and `shape=[height, width]`. + /// + /// Must be called after `start_raster_2d` which sets the current width/height. + pub fn start_band_2d( + &mut self, + data_type: BandDataType, + nodata: Option<&[u8]>, + ) -> Result<(), ArrowError> { + if self.current_width == 0 && self.current_height == 0 { + return Err(ArrowError::InvalidArgumentError( + "start_band_2d requires prior start_raster_2d (width and height are 0)".into(), + )); + } + self.start_band( + None, + &["y", "x"], + &[self.current_height, self.current_width], + data_type, + nodata, + None, + ) + } + + /// Get direct access to the BinaryViewBuilder for writing the current band's data. pub fn band_data_writer(&mut self) -> &mut BinaryViewBuilder { &mut self.band_data } - /// Finish writing the current band + /// Finish writing the current band. + /// + /// Validates that exactly one data value was appended since `start_band()`. pub fn finish_band(&mut self) -> Result<(), ArrowError> { - // Band data should already be written via band_data_writer - // Nothing additional needed here since we're building flat + let current_count = self.band_data.len(); + if current_count != self.band_data_count_at_start + 1 { + return Err(ArrowError::InvalidArgumentError( + format!( + "Expected exactly one band data value per band, but got {} appended since start_band()", + current_count - self.band_data_count_at_start + ), + )); + } Ok(()) } - /// Finish all bands for the current raster + /// Finish all bands for the current raster. pub fn finish_raster(&mut self) -> Result<(), ArrowError> { - // Record the end offset for this raster's bands let next_offset = self.band_offsets.last().unwrap() + self.current_band_count; self.band_offsets.push(next_offset); - self.raster_validity.append_value(true); - Ok(()) } - /// Append raster metadata from a MetadataRef trait object - fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { - self.width.append_value(metadata.width()); - self.height.append_value(metadata.height()); - self.upper_left_x.append_value(metadata.upper_left_x()); - self.upper_left_y.append_value(metadata.upper_left_y()); - self.scale_x.append_value(metadata.scale_x()); - self.scale_y.append_value(metadata.scale_y()); - self.skew_x.append_value(metadata.skew_x()); - self.skew_y.append_value(metadata.skew_y()); - - Ok(()) - } - - /// Set the CRS for the current raster - pub fn append_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { - match crs { - Some(crs_data) => self.crs.append_value(crs_data), - None => self.crs.append_null(), + /// Append a null raster. + pub fn append_null(&mut self) -> Result<(), ArrowError> { + // Transform: append 6 zeros + for _ in 0..6 { + self.transform_values.append_value(0.0); } - Ok(()) - } + let next = *self.transform_offsets.last().unwrap() + 6; + self.transform_offsets.push(next); - /// Append a null raster - pub fn append_null(&mut self) -> Result<(), ArrowError> { - // Since metadata fields are non-nullable, provide default values - self.width.append_value(0u64); - self.height.append_value(0u64); - self.upper_left_x.append_value(0.0f64); - self.upper_left_y.append_value(0.0f64); - self.scale_x.append_value(0.0f64); - self.scale_y.append_value(0.0f64); - self.skew_x.append_value(0.0f64); - self.skew_y.append_value(0.0f64); - - // Append null CRS + // Spatial dims: defaults + self.x_dim.append_value("x"); + self.y_dim.append_value("y"); + + // CRS: null self.crs.append_null(); - // No bands for null raster + // No bands let current_offset = *self.band_offsets.last().unwrap(); self.band_offsets.push(current_offset); - // Mark raster as null + // Mark null self.raster_validity.append_null(); Ok(()) } - /// Finish building and return the constructed StructArray + /// Finish building and return the constructed StructArray. pub fn finish(mut self) -> Result { - // Build the metadata struct using the schema - let metadata_fields = if let DataType::Struct(fields) = RasterSchema::metadata_type() { - fields - } else { + // Build transform list + let transform_values = self.transform_values.finish(); + let transform_offsets = OffsetBuffer::new(ScalarBuffer::from(self.transform_offsets)); + let DataType::List(transform_field) = RasterSchema::transform_type() else { return Err(ArrowError::SchemaError( - "Expected struct type for metadata".to_string(), + "Expected list type for transform".to_string(), )); }; + let transform_list = ListArray::new( + transform_field, + transform_offsets, + Arc::new(transform_values), + None, + ); - let metadata_arrays: Vec = vec![ - Arc::new(self.width.finish()), - Arc::new(self.height.finish()), - Arc::new(self.upper_left_x.finish()), - Arc::new(self.upper_left_y.finish()), - Arc::new(self.scale_x.finish()), - Arc::new(self.scale_y.finish()), - Arc::new(self.skew_x.finish()), - Arc::new(self.skew_y.finish()), - ]; - let metadata_array = StructArray::new(metadata_fields, metadata_arrays, None); - - // Build the band metadata struct using the schema - let band_metadata_fields = - if let DataType::Struct(fields) = RasterSchema::band_metadata_type() { - fields - } else { - return Err(ArrowError::SchemaError( - "Expected struct type for band metadata".to_string(), - )); - }; - - let band_metadata_arrays: Vec = vec![ - Arc::new(self.band_nodata.finish()), - Arc::new(self.band_storage_type.finish()), - Arc::new(self.band_datatype.finish()), - Arc::new(self.band_outdb_url.finish()), - Arc::new(self.band_outdb_band_id.finish()), - ]; - let band_metadata_array = - StructArray::new(band_metadata_fields, band_metadata_arrays, None); + // Build band dim_names nested list + let dim_names_values = self.band_dim_names_values.finish(); + let dim_names_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_dim_names_offsets)); + let DataType::List(dim_names_field) = RasterSchema::dim_names_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for dim_names".to_string(), + )); + }; + let dim_names_list = ListArray::new( + dim_names_field, + dim_names_offsets, + Arc::new(dim_names_values), + None, + ); - // Build the band struct using the schema - let band_fields = if let DataType::Struct(fields) = RasterSchema::band_type() { - fields - } else { + // Build band shape nested list + let shape_values = self.band_shape_values.finish(); + let shape_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_shape_offsets)); + let DataType::List(shape_field) = RasterSchema::shape_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for shape".to_string(), + )); + }; + let shape_list = ListArray::new(shape_field, shape_offsets, Arc::new(shape_values), None); + + // Build band strides nested list + let strides_values = self.band_strides_values.finish(); + let strides_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_strides_offsets)); + let DataType::List(strides_field) = RasterSchema::strides_type() else { + return Err(ArrowError::SchemaError( + "Expected list type for strides".to_string(), + )); + }; + let strides_list = ListArray::new( + strides_field, + strides_offsets, + Arc::new(strides_values), + None, + ); + + // Build band struct + let DataType::Struct(band_fields) = RasterSchema::band_type() else { return Err(ArrowError::SchemaError( "Expected struct type for band".to_string(), )); }; let band_arrays: Vec = vec![ - Arc::new(band_metadata_array), + Arc::new(self.band_name.finish()), + Arc::new(dim_names_list), + Arc::new(shape_list), + Arc::new(self.band_datatype.finish()), + Arc::new(self.band_nodata.finish()), + Arc::new(strides_list), + Arc::new(self.band_offset.finish()), + Arc::new(self.band_outdb_uri.finish()), Arc::new(self.band_data.finish()), ]; - let band_struct_array = StructArray::new(band_fields, band_arrays, None); + let band_struct = StructArray::new(band_fields, band_arrays, None); - // Build the bands list array using the schema - let band_field = if let DataType::List(field) = RasterSchema::bands_type() { - field - } else { + // Build bands list + let DataType::List(bands_field) = RasterSchema::bands_type() else { return Err(ArrowError::SchemaError( "Expected list type for bands".to_string(), )); }; + let band_list_offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_offsets)); + let bands_list = + ListArray::new(bands_field, band_list_offsets, Arc::new(band_struct), None); - let offsets = OffsetBuffer::new(ScalarBuffer::from(self.band_offsets)); - let bands_list = ListArray::new(band_field, offsets, Arc::new(band_struct_array), None); - - // Build the final raster struct using the schema + // Build top-level raster struct let raster_fields = RasterSchema::fields(); let raster_arrays: Vec = vec![ - Arc::new(metadata_array), Arc::new(self.crs.finish()), + Arc::new(transform_list), + Arc::new(self.x_dim.finish()), + Arc::new(self.y_dim.finish()), Arc::new(bands_list), ]; @@ -350,500 +437,413 @@ impl RasterBuilder { mod tests { use super::*; use crate::array::RasterStructArray; - use crate::traits::{RasterMetadata, RasterRef}; - use sedona_schema::raster::{BandDataType, StorageType}; + use crate::traits::RasterRef; #[test] - fn test_iterator_basic_functionality() { - // Create a simple raster for testing using the correct API - let mut builder = RasterBuilder::new(10); // capacity - - let metadata = RasterMetadata { - width: 10, - height: 10, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - let epsg4326 = "EPSG:4326"; - builder.start_raster(&metadata, Some(epsg4326)).unwrap(); - - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - - // Add a single band with some test data using the correct API - builder.start_band(band_metadata.clone()).unwrap(); - let test_data = vec![1u8; 100]; // 10x10 raster with value 1 - builder.band_data_writer().append_value(&test_data); + fn test_roundtrip_2d_raster() { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d( + 10, + 20, + 100.0, + 200.0, + 1.0, + -2.0, + 0.25, + 0.5, + Some("EPSG:4326"), + ) + .unwrap(); + builder + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) + .unwrap(); + builder.band_data_writer().append_value(vec![1u8; 200]); builder.finish_band().unwrap(); - let result = builder.finish_raster(); - assert!(result.is_ok()); - - let raster_array = builder.finish().unwrap(); - - // Test the iterator - let rasters = RasterStructArray::new(&raster_array); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); assert_eq!(rasters.len(), 1); - assert!(!rasters.is_empty()); - - let raster = rasters.get(0).unwrap(); - let metadata = raster.metadata(); - - assert_eq!(metadata.width(), 10); - assert_eq!(metadata.height(), 10); - assert_eq!(metadata.scale_x(), 1.0); - assert_eq!(metadata.scale_y(), -1.0); - - let bands = raster.bands(); - assert_eq!(bands.len(), 1); - assert!(!bands.is_empty()); - // Access band with 1-based band_number - let band = bands.band(1).unwrap(); - assert_eq!(band.data().len(), 100); - assert_eq!(band.data()[0], 1u8); - - let band_meta = band.metadata(); - assert_eq!(band_meta.storage_type().unwrap(), StorageType::InDb); - assert_eq!(band_meta.data_type().unwrap(), BandDataType::UInt8); - - let crs = raster.crs().unwrap(); - assert_eq!(crs, epsg4326); - - // Test iterator over bands - let band_iter: Vec<_> = bands.iter().collect(); - assert_eq!(band_iter.len(), 1); + let r = rasters.get(0).unwrap(); + assert_eq!(r.width(), Some(10)); + assert_eq!(r.height(), Some(20)); + assert_eq!(r.transform(), &[100.0, 1.0, 0.25, 200.0, 0.5, -2.0]); + assert_eq!(r.x_dim(), "x"); + assert_eq!(r.y_dim(), "y"); + assert_eq!(r.crs(), Some("EPSG:4326")); + assert_eq!(r.num_bands(), 1); + + let band = r.band(0).unwrap(); + assert_eq!(band.ndim(), 2); + assert_eq!(band.dim_names(), vec!["y", "x"]); + assert_eq!(band.shape(), &[20, 10]); + assert_eq!(band.data_type(), BandDataType::UInt8); + assert_eq!(band.nodata(), Some(&[255u8][..])); + assert_eq!(band.contiguous_data().unwrap().len(), 200); } #[test] - fn test_multi_band_iterator() { - let mut builder = RasterBuilder::new(3); - - let metadata = RasterMetadata { - width: 5, - height: 5, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - builder.start_raster(&metadata, None).unwrap(); - - // Add three bands using the correct API - for band_idx in 0..3 { - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; - - builder.start_band(band_metadata).unwrap(); - let test_data = vec![band_idx as u8; 25]; // 5x5 raster - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); - } - - let result = builder.finish_raster(); - assert!(result.is_ok()); + fn test_roundtrip_multi_band() { + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(2, 2, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); - let raster_array = builder.finish().unwrap(); + // Band 0: UInt8 + builder + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) + .unwrap(); + builder.band_data_writer().append_value([1u8, 2, 3, 4]); + builder.finish_band().unwrap(); - let rasters = RasterStructArray::new(&raster_array); - let raster = rasters.get(0).unwrap(); - let bands = raster.bands(); + // Band 1: Float32 + builder.start_band_2d(BandDataType::Float32, None).unwrap(); + let f32_data: Vec = [1.5f32, 2.5, 3.5, 4.5] + .iter() + .flat_map(|v| v.to_le_bytes()) + .collect(); + builder.band_data_writer().append_value(&f32_data); + builder.finish_band().unwrap(); - assert_eq!(bands.len(), 3); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - // Test each band has different data - // Use 1-based band numbers - for i in 0..3 { - // Access band with 1-based band_number - let band = bands.band(i + 1).unwrap(); - let expected_value = i as u8; - assert!(band.data().iter().all(|&x| x == expected_value)); - } + assert_eq!(r.num_bands(), 2); - // Test iterator - let band_values: Vec = bands - .iter() - .enumerate() - .map(|(i, band)| { - assert_eq!(band.data()[0], i as u8); - band.data()[0] - }) - .collect(); + let b0 = r.band(0).unwrap(); + assert_eq!(b0.data_type(), BandDataType::UInt8); + assert_eq!(b0.nodata(), Some(&[255u8][..])); - assert_eq!(band_values, vec![0, 1, 2]); + let b1 = r.band(1).unwrap(); + assert_eq!(b1.data_type(), BandDataType::Float32); + assert_eq!(b1.nodata(), None); } #[test] - fn test_copy_metadata_from_iterator() { - // Create an original raster - let mut source_builder = RasterBuilder::new(10); - - let original_metadata = RasterMetadata { - width: 42, - height: 24, - upperleft_x: -122.0, - upperleft_y: 37.8, - scale_x: 0.1, - scale_y: -0.1, - skew_x: 0.0, - skew_y: 0.0, - }; - - source_builder - .start_raster(&original_metadata, None) + fn test_null_raster() { + let mut builder = RasterBuilder::new(2); + builder + .start_raster_2d(1, 1, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value([0u8]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; + builder.append_null().unwrap(); - source_builder.start_band(band_metadata).unwrap(); - let test_data = vec![42u8; 1008]; // 42x24 raster - source_builder.band_data_writer().append_value(&test_data); - source_builder.finish_band().unwrap(); - source_builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + assert_eq!(rasters.len(), 2); + assert!(!rasters.is_null(0)); + assert!(rasters.is_null(1)); + } - let source_array = source_builder.finish().unwrap(); + #[test] + fn test_nd_band() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + + // 3D band: [time=3, y=4, x=5] + builder + .start_band( + Some("temperature"), + &["time", "y", "x"], + &[3, 4, 5], + BandDataType::Float32, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 3 * 4 * 5 * 4]; // 3*4*5 Float32 elements + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - // Create a new raster using metadata from the iterator - let mut target_builder = RasterBuilder::new(10); - let iterator = RasterStructArray::new(&source_array); - let source_raster = iterator.get(0).unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.band_name(0), Some("temperature")); + let band = r.band(0).unwrap(); + assert_eq!(band.ndim(), 3); + assert_eq!(band.dim_names(), vec!["time", "y", "x"]); + assert_eq!(band.shape(), &[3, 4, 5]); + assert_eq!(band.dim_size("time"), Some(3)); + assert_eq!(band.dim_size("y"), Some(4)); + assert_eq!(band.dim_size("x"), Some(5)); + assert_eq!(band.dim_size("z"), None); + + // Verify strides are standard C-order: [4*5*4, 5*4, 4] = [80, 20, 4] + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[80, 20, 4]); + assert_eq!(buf.offset, 0); + } - target_builder - .start_raster(source_raster.metadata(), source_raster.crs()) + #[test] + fn test_nonstandard_spatial_dim_names() { + // Zarr-style dataset with lat/lon instead of y/x + let mut builder = RasterBuilder::new(1); + let transform = [10.0, 0.01, 0.0, 50.0, 0.0, -0.01]; + builder + .start_raster(&transform, "longitude", "latitude", Some("EPSG:4326")) .unwrap(); + builder + .start_band( + Some("sst"), + &["latitude", "longitude"], + &[180, 360], + BandDataType::Float32, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 180 * 360 * 4]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - // Add new band data while preserving original metadata - let new_band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt16, - outdb_url: None, - outdb_band_id: None, - }; - - target_builder.start_band(new_band_metadata).unwrap(); - let new_data = vec![100u16; 1008]; // Different data, same dimensions - let new_data_bytes: Vec = new_data.iter().flat_map(|&x| x.to_le_bytes()).collect(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - target_builder - .band_data_writer() - .append_value(&new_data_bytes); - target_builder.finish_band().unwrap(); - target_builder.finish_raster().unwrap(); - - let target_array = target_builder.finish().unwrap(); - - // Verify the metadata was copied correctly - let target_iterator = RasterStructArray::new(&target_array); - let target_raster = target_iterator.get(0).unwrap(); - let target_metadata = target_raster.metadata(); - - // All metadata should match the original - assert_eq!(target_metadata.width(), 42); - assert_eq!(target_metadata.height(), 24); - assert_eq!(target_metadata.upper_left_x(), -122.0); - assert_eq!(target_metadata.upper_left_y(), 37.8); - assert_eq!(target_metadata.scale_x(), 0.1); - assert_eq!(target_metadata.scale_y(), -0.1); - - // But band data and metadata should be different - let target_band = target_raster.bands().band(1).unwrap(); - let target_band_meta = target_band.metadata(); - assert_eq!(target_band_meta.data_type().unwrap(), BandDataType::UInt16); - assert!(target_band_meta.nodata_value().is_none()); - assert_eq!(target_band.data().len(), 2016); // 1008 * 2 bytes per u16 - - let result = target_raster.bands().band(0); - assert!(result.is_err(), "Band number 0 should be invalid"); - - let result = target_raster.bands().band(2); - assert!(result.is_err(), "Band number 2 should be out of range"); + assert_eq!(r.x_dim(), "longitude"); + assert_eq!(r.y_dim(), "latitude"); + // width = size of "longitude" dim, height = size of "latitude" dim + assert_eq!(r.width(), Some(360)); + assert_eq!(r.height(), Some(180)); } #[test] - fn test_band_data_types() { - // Create a test raster with bands of different data types + fn test_mixed_dimensionality_bands() { + // One 3D band and one 2D band in the same raster let mut builder = RasterBuilder::new(1); - - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - builder.start_raster(&metadata, None).unwrap(); - - // Test all BandDataType variants - let test_cases = vec![ - (BandDataType::UInt8, vec![1u8, 2u8, 3u8, 4u8]), - (BandDataType::Int8, vec![255u8, 254u8, 253u8, 252u8]), // -1, -2, -3, -4 as i8 - ( - BandDataType::UInt16, - vec![1u8, 0u8, 2u8, 0u8, 3u8, 0u8, 4u8, 0u8], - ), // little-endian u16 - ( - BandDataType::Int16, - vec![255u8, 255u8, 254u8, 255u8, 253u8, 255u8, 252u8, 255u8], - ), // little-endian i16 - ( - BandDataType::UInt32, - vec![ - 1u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, 0u8, 3u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, - ], - ), // little-endian u32 - ( - BandDataType::Int32, - vec![ - 255u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8, 253u8, 255u8, 255u8, - 255u8, 252u8, 255u8, 255u8, 255u8, - ], - ), // little-endian i32 - ( - BandDataType::UInt64, - vec![ - 1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, - 3u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, - ], - ), // little-endian u64 - ( - BandDataType::Int64, - vec![ - 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, - 255u8, 255u8, 255u8, 255u8, 255u8, 253u8, 255u8, 255u8, 255u8, 255u8, 255u8, - 255u8, 255u8, 252u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, - ], - ), // little-endian i64: -1, -2, -3, -4 - ( + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + + // Band 0: 3D [time=12, y=64, x=64] + builder + .start_band( + Some("temperature"), + &["time", "y", "x"], + &[12, 64, 64], BandDataType::Float32, - vec![ - 0u8, 0u8, 128u8, 63u8, 0u8, 0u8, 0u8, 64u8, 0u8, 0u8, 64u8, 64u8, 0u8, 0u8, - 128u8, 64u8, - ], - ), // little-endian f32: 1.0, 2.0, 3.0, 4.0 - ( + None, + None, + ) + .unwrap(); + let data_3d = vec![0u8; 12 * 64 * 64 * 4]; + builder.band_data_writer().append_value(&data_3d); + builder.finish_band().unwrap(); + + // Band 1: 2D [y=64, x=64] + builder + .start_band( + Some("elevation"), + &["y", "x"], + &[64, 64], BandDataType::Float64, - vec![ - 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 240u8, 63u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, - 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 8u8, 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, - 16u8, 64u8, - ], - ), // little-endian f64: 1.0, 2.0, 3.0, 4.0 - ]; + None, + None, + ) + .unwrap(); + let data_2d = vec![0u8; 64 * 64 * 8]; + builder.band_data_writer().append_value(&data_2d); + builder.finish_band().unwrap(); - for (expected_data_type, test_data) in test_cases { - let band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: expected_data_type, - outdb_url: None, - outdb_band_id: None, - }; - - builder.start_band(band_metadata).unwrap(); - builder.band_data_writer().append_value(&test_data); - builder.finish_band().unwrap(); - } + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + assert_eq!(r.num_bands(), 2); + // width/height derived from band(0) which is 3D + assert_eq!(r.width(), Some(64)); + assert_eq!(r.height(), Some(64)); + + let b0 = r.band(0).unwrap(); + assert_eq!(b0.ndim(), 3); + assert_eq!(b0.dim_names(), vec!["time", "y", "x"]); + assert_eq!(b0.shape(), &[12, 64, 64]); + assert_eq!(b0.dim_size("time"), Some(12)); + + let b1 = r.band(1).unwrap(); + assert_eq!(b1.ndim(), 2); + assert_eq!(b1.dim_names(), vec!["y", "x"]); + assert_eq!(b1.shape(), &[64, 64]); + assert_eq!(b1.dim_size("time"), None); + } + #[test] + fn test_dim_index_lookup() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + builder + .start_band( + None, + &["time", "pressure", "y", "x"], + &[6, 10, 32, 32], + BandDataType::Float32, + None, + None, + ) + .unwrap(); + let data = vec![0u8; 6 * 10 * 32 * 32 * 4]; + builder.band_data_writer().append_value(&data); + builder.finish_band().unwrap(); builder.finish_raster().unwrap(); - let raster_array = builder.finish().unwrap(); - - // Test the data type conversion for each band - let iterator = RasterStructArray::new(&raster_array); - let raster = iterator.get(0).unwrap(); - let bands = raster.bands(); - - assert_eq!(bands.len(), 10, "Expected 10 bands for all data types"); - - // Verify each band returns the correct data type - let expected_types = [ - BandDataType::UInt8, - BandDataType::Int8, - BandDataType::UInt16, - BandDataType::Int16, - BandDataType::UInt32, - BandDataType::Int32, - BandDataType::UInt64, - BandDataType::Int64, - BandDataType::Float32, - BandDataType::Float64, - ]; - // i is zero-based index - for (i, expected_type) in expected_types.iter().enumerate() { - // Bands are 1-based band_number - let band = bands.band(i + 1).unwrap(); - let band_metadata = band.metadata(); - let actual_type = band_metadata.data_type().unwrap(); - - assert_eq!( - actual_type, *expected_type, - "Band {i} expected data type {expected_type:?}, got {actual_type:?}" - ); - } + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + assert_eq!(band.dim_index("time"), Some(0)); + assert_eq!(band.dim_index("pressure"), Some(1)); + assert_eq!(band.dim_index("y"), Some(2)); + assert_eq!(band.dim_index("x"), Some(3)); + assert_eq!(band.dim_index("wavelength"), None); + + assert_eq!(band.dim_size("time"), Some(6)); + assert_eq!(band.dim_size("pressure"), Some(10)); + assert_eq!(band.dim_size("wavelength"), None); } #[test] - fn test_outdb_metadata_fields() { - // Test creating raster with OutDb reference metadata - let mut builder = RasterBuilder::new(10); - - let metadata = RasterMetadata { - width: 1024, - height: 1024, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + fn test_contiguous_data_is_borrowed() { + use std::borrow::Cow; + + let mut builder = RasterBuilder::new(1); + builder + .start_raster_2d(4, 4, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, None) + .unwrap(); + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value([1u8; 16]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); - builder.start_raster(&metadata, None).unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); - // Test InDb band (should have null OutDb fields) - let indb_band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; + let data = band.contiguous_data().unwrap(); + // Phase 1: all data is contiguous, so should be Cow::Borrowed + assert!(matches!(data, Cow::Borrowed(_))); + assert_eq!(data.len(), 16); + } - builder.start_band(indb_band_metadata).unwrap(); - let test_data = vec![1u8; 100]; - builder.band_data_writer().append_value(&test_data); + #[test] + fn test_nd_buffer_strides_various_types() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + + // UInt8: element size = 1, shape [3, 4] → strides [4, 1] + builder + .start_band(None, &["y", "x"], &[3, 4], BandDataType::UInt8, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 12]); builder.finish_band().unwrap(); - // Test OutDbRef band (should have OutDb fields populated) - let outdb_band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::OutDbRef, - datatype: BandDataType::Float32, - outdb_url: Some("s3://mybucket/satellite_image.tif".to_string()), - outdb_band_id: Some(2), - }; + // Float64: element size = 8, shape [2, 3, 5] → strides [120, 40, 8] + builder + .start_band( + None, + &["z", "y", "x"], + &[2, 3, 5], + BandDataType::Float64, + None, + None, + ) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8; 2 * 3 * 5 * 8]); + builder.finish_band().unwrap(); - builder.start_band(outdb_band_metadata).unwrap(); - // For OutDbRef, data field could be empty or contain metadata/thumbnail - builder.band_data_writer().append_value([]); + // UInt16: element size = 2, shape [10] → strides [2] + builder + .start_band(None, &["x"], &[10], BandDataType::UInt16, None, None) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 20]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); - let raster_array = builder.finish().unwrap(); - - // Verify the band metadata - let iterator = RasterStructArray::new(&raster_array); - let raster = iterator.get(0).unwrap(); - let bands = raster.bands(); - - assert_eq!(bands.len(), 2); - - // Test InDb band - let indb_band = bands.band(1).unwrap(); - let indb_metadata = indb_band.metadata(); - assert_eq!(indb_metadata.storage_type().unwrap(), StorageType::InDb); - assert_eq!(indb_metadata.data_type().unwrap(), BandDataType::UInt8); - assert!(indb_metadata.outdb_url().is_none()); - assert!(indb_metadata.outdb_band_id().is_none()); - assert_eq!(indb_band.data().len(), 100); - - // Test OutDbRef band - let outdb_band = bands.band(2).unwrap(); - let outdb_metadata = outdb_band.metadata(); - assert_eq!( - outdb_metadata.storage_type().unwrap(), - StorageType::OutDbRef - ); - assert_eq!(outdb_metadata.data_type().unwrap(), BandDataType::Float32); - assert_eq!( - outdb_metadata.outdb_url().unwrap(), - "s3://mybucket/satellite_image.tif" - ); - assert_eq!(outdb_metadata.outdb_band_id().unwrap(), 2); - assert_eq!(outdb_band.data().len(), 0); // Empty data for OutDbRef + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + + let b0 = r.band(0).unwrap(); + let buf0 = b0.nd_buffer().unwrap(); + assert_eq!(buf0.strides, &[4, 1]); // UInt8 [3, 4] + + let b1 = r.band(1).unwrap(); + let buf1 = b1.nd_buffer().unwrap(); + assert_eq!(buf1.strides, &[120, 40, 8]); // Float64 [2, 3, 5] + + let b2 = r.band(2).unwrap(); + let buf2 = b2.nd_buffer().unwrap(); + assert_eq!(buf2.strides, &[2]); // UInt16 [10] } #[test] - fn test_band_access_errors() { - // Create a simple raster with one band + fn test_width_height_no_bands() { let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + // No bands added + builder.finish_raster().unwrap(); - let metadata = RasterMetadata { - width: 10, - height: 10, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - builder.start_raster(&metadata, None).unwrap(); + assert_eq!(r.num_bands(), 0); + assert_eq!(r.width(), None); + assert_eq!(r.height(), None); + } - let band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - outdb_url: None, - outdb_band_id: None, - }; + #[test] + fn test_band_name_nullable() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster(&transform, "x", "y", None).unwrap(); + + // Named band + builder + .start_band( + Some("temperature"), + &["y", "x"], + &[4, 4], + BandDataType::Float32, + None, + None, + ) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 64]); + builder.finish_band().unwrap(); - builder.start_band(band_metadata).unwrap(); - builder.band_data_writer().append_value([1u8; 100]); + // Unnamed band (via start_band_2d which passes None for name) + builder.current_width = 4; + builder.current_height = 4; + builder.start_band_2d(BandDataType::UInt8, None).unwrap(); + builder.band_data_writer().append_value(vec![0u8; 16]); builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); - let raster_array = builder.finish().unwrap(); - let iterator = RasterStructArray::new(&raster_array); - let raster = iterator.get(0).unwrap(); - let bands = raster.bands(); - - // Test invalid band number (0-based) - let result = bands.band(0); - assert!(result.is_err()); - let err = result.err().unwrap().to_string(); - assert!(err.contains("band numbers must be 1-based")); - - // Test out of range band number - let result = bands.band(2); - assert!(result.is_err()); - let err = result.err().unwrap().to_string(); - assert!(err.contains("is out of range")); - - // Test valid band number should still work - let result = bands.band(1); - assert!(result.is_ok()); - let band = result.unwrap(); - assert_eq!(band.data().len(), 100); + assert_eq!(r.band_name(0), Some("temperature")); + assert_eq!(r.band_name(1), None); // unnamed + assert_eq!(r.band_name(99), None); // out of range } } diff --git a/rust/sedona-raster/src/display.rs b/rust/sedona-raster/src/display.rs index 400658a0a..475959491 100644 --- a/rust/sedona-raster/src/display.rs +++ b/rust/sedona-raster/src/display.rs @@ -19,7 +19,6 @@ use std::fmt; use crate::affine_transformation::to_world_coordinate; use crate::traits::RasterRef; -use sedona_schema::raster::StorageType; /// Wrapper for formatting a raster reference as a human-readable string. /// @@ -39,33 +38,17 @@ use sedona_schema::raster::StorageType; /// ```text /// [WxH/nbands] @ [xmin ymin xmax ymax] / CRS /// ``` -/// -/// Without CRS: -/// ```text -/// [WxH/nbands] @ [xmin ymin xmax ymax] -/// ``` -/// -/// # Examples -/// -/// ```text -/// [64x32/3] @ [43.08 79.07 171.08 143.07] / OGC:CRS84 -/// [3x4/1] @ [3 2.4 3.84 4.24] skew=(0.06, 0.08) / EPSG:2193 -/// [10x10/1] @ [0 0 10 10] / OGC:CRS84 -/// ``` pub struct RasterDisplay<'a>(pub &'a dyn RasterRef); impl fmt::Display for RasterDisplay<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let raster = self.0; - let metadata = raster.metadata(); - let bands = raster.bands(); - let width = metadata.width(); - let height = metadata.height(); - let nbands = bands.len(); + let width = raster.width().unwrap_or(0); + let height = raster.height().unwrap_or(0); + let nbands = raster.num_bands(); // Compute axis-aligned bounding box from 4 corners in world coordinates. - // This handles both skewed and non-skewed rasters correctly. let w = width as i64; let h = height as i64; let (ulx, uly) = to_world_coordinate(raster, 0, 0); @@ -78,26 +61,23 @@ impl fmt::Display for RasterDisplay<'_> { let ymin = uly.min(ury).min(lry).min(lly); let ymax = uly.max(ury).max(lry).max(lly); - let skew_x = metadata.skew_x(); - let skew_y = metadata.skew_y(); + let t = raster.transform(); + let skew_x = t[2]; + let skew_y = t[4]; let has_skew = skew_x != 0.0 || skew_y != 0.0; - let has_outdb = bands - .iter() - .any(|band| matches!(band.metadata().storage_type(), Ok(StorageType::OutDbRef))); + let has_outdb = + (0..nbands).any(|i| raster.band(i).is_some_and(|b| b.outdb_uri().is_some())); - // Write: [WxH/nbands] @ [xmin ymin xmax ymax] write!( f, "[{width}x{height}/{nbands}] @ [{xmin} {ymin} {xmax} {ymax}]" )?; - // Conditionally append skew info when the raster is rotated/skewed if has_skew { write!(f, " skew=({skew_x}, {skew_y})")?; } - // Append CRS if present. For PROJJSON (starts with '{'), show compact placeholder. if let Some(crs) = raster.crs() { if crs.starts_with('{') { write!(f, " / {{...}}")?; @@ -113,51 +93,3 @@ impl fmt::Display for RasterDisplay<'_> { Ok(()) } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::array::RasterStructArray; - use sedona_testing::rasters::generate_test_rasters; - - #[test] - fn display_non_skewed_raster() { - // i=0: w=1, h=2, scale=(0.1, -0.2), skew=(0, 0), CRS=OGC:CRS84 - // Bounds: xmin=1, ymin=1.6, xmax=1.1, ymax=2 - let rasters = generate_test_rasters(1, None).unwrap(); - let raster_array = RasterStructArray::new(&rasters); - let raster = raster_array.get(0).unwrap(); - - let display = format!("{}", RasterDisplay(&raster)); - assert_eq!(display, "[1x2/1] @ [1 1.6 1.1 2] / OGC:CRS84"); - } - - #[test] - fn display_skewed_raster() { - // i=2: w=3, h=4, scale=(0.2, -0.4), skew=(0.06, 0.08), CRS=OGC:CRS84 - // Corners: (3,4), (3.6,4.24), (3.84,2.64), (3.24,2.4) - // AABB: xmin=3, ymin=2.4, xmax=3.84, ymax=4.24 - let rasters = generate_test_rasters(3, None).unwrap(); - let raster_array = RasterStructArray::new(&rasters); - let raster = raster_array.get(2).unwrap(); - - let display = format!("{}", RasterDisplay(&raster)); - assert_eq!( - display, - "[3x4/1] @ [3 2.4 3.84 4.24] skew=(0.06, 0.08) / OGC:CRS84" - ); - } - - #[test] - fn display_write_to_fmt_write() { - // Verify RasterDisplay works with any fmt::Write target (e.g., String) - let rasters = generate_test_rasters(1, None).unwrap(); - let raster_array = RasterStructArray::new(&rasters); - let raster = raster_array.get(0).unwrap(); - - let mut buf = String::new(); - use std::fmt::Write; - write!(buf, "{}", RasterDisplay(&raster)).unwrap(); - assert_eq!(buf, "[1x2/1] @ [1 1.6 1.1 2] / OGC:CRS84"); - } -} diff --git a/rust/sedona-raster/src/lib.rs b/rust/sedona-raster/src/lib.rs index 77db0c0dd..2e39a5655 100644 --- a/rust/sedona-raster/src/lib.rs +++ b/rust/sedona-raster/src/lib.rs @@ -19,4 +19,5 @@ pub mod affine_transformation; pub mod array; pub mod builder; pub mod display; +pub mod outdb_uri; pub mod traits; diff --git a/rust/sedona-raster/src/outdb_uri.rs b/rust/sedona-raster/src/outdb_uri.rs new file mode 100644 index 000000000..55b757b37 --- /dev/null +++ b/rust/sedona-raster/src/outdb_uri.rs @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Parsed components of an outdb_uri. +/// +/// The outdb_uri format is `scheme://path#fragment`, e.g.: +/// - `geotiff://s3://bucket/file.tif#band=1` +/// - `zarr://s3://bucket/store#temperature/0.0.0` +/// +/// The scheme determines which loader to dispatch to. +/// The path is the external resource location (what RS_BandPath returns to users). +/// The fragment encodes loader-specific details (band id, chunk coords, etc.). +/// Each loader defines its own fragment convention. +/// +/// TODO: For formats like Zarr that may need complex metadata (array path, chunk +/// coordinates, byte ranges), a simple key-value fragment may not be sufficient. +/// If this becomes a limitation, consider switching the fragment to a JSON object +/// or making the entire outdb_uri a JSON string for those formats. +#[derive(Debug, PartialEq)] +pub struct OutDbUri<'a> { + /// Loader scheme (e.g., "geotiff", "zarr") + pub scheme: &'a str, + /// External resource path (e.g., "s3://bucket/file.tif") + pub path: &'a str, + /// Loader-specific fragment (e.g., "band=1"), or None if absent + pub fragment: Option<&'a str>, +} + +/// Parse an outdb_uri into its components. +/// +/// Returns `None` if the URI doesn't contain `://` (not a valid outdb_uri). +/// +/// # Examples +/// ``` +/// use sedona_raster::outdb_uri::parse_outdb_uri; +/// +/// let parsed = parse_outdb_uri("geotiff://s3://bucket/file.tif#band=1").unwrap(); +/// assert_eq!(parsed.scheme, "geotiff"); +/// assert_eq!(parsed.path, "s3://bucket/file.tif"); +/// assert_eq!(parsed.fragment, Some("band=1")); +/// +/// let parsed = parse_outdb_uri("zarr://s3://bucket/store").unwrap(); +/// assert_eq!(parsed.scheme, "zarr"); +/// assert_eq!(parsed.path, "s3://bucket/store"); +/// assert_eq!(parsed.fragment, None); +/// ``` +pub fn parse_outdb_uri(uri: &str) -> Option> { + let scheme_end = uri.find("://")?; + let scheme = &uri[..scheme_end]; + let rest = &uri[scheme_end + 3..]; + + let (path, fragment) = match rest.rfind('#') { + Some(hash_pos) => (&rest[..hash_pos], Some(&rest[hash_pos + 1..])), + None => (rest, None), + }; + + Some(OutDbUri { + scheme, + path, + fragment, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_geotiff_with_fragment() { + let parsed = parse_outdb_uri("geotiff://s3://bucket/file.tif#band=1").unwrap(); + assert_eq!(parsed.scheme, "geotiff"); + assert_eq!(parsed.path, "s3://bucket/file.tif"); + assert_eq!(parsed.fragment, Some("band=1")); + } + + #[test] + fn test_zarr_with_fragment() { + let parsed = parse_outdb_uri("zarr://s3://bucket/store#temperature/0.0.0").unwrap(); + assert_eq!(parsed.scheme, "zarr"); + assert_eq!(parsed.path, "s3://bucket/store"); + assert_eq!(parsed.fragment, Some("temperature/0.0.0")); + } + + #[test] + fn test_no_fragment() { + let parsed = parse_outdb_uri("zarr://s3://bucket/store").unwrap(); + assert_eq!(parsed.scheme, "zarr"); + assert_eq!(parsed.path, "s3://bucket/store"); + assert_eq!(parsed.fragment, None); + } + + #[test] + fn test_local_path() { + let parsed = parse_outdb_uri("geotiff:///data/rasters/dem.tif#band=1").unwrap(); + assert_eq!(parsed.scheme, "geotiff"); + assert_eq!(parsed.path, "/data/rasters/dem.tif"); + assert_eq!(parsed.fragment, Some("band=1")); + } + + #[test] + fn test_plain_s3_url_parsed_as_scheme() { + // A plain s3:// URL is technically parseable — s3 becomes the scheme + let parsed = parse_outdb_uri("s3://bucket/file.tif").unwrap(); + assert_eq!(parsed.scheme, "s3"); + assert_eq!(parsed.path, "bucket/file.tif"); + assert_eq!(parsed.fragment, None); + } + + #[test] + fn test_invalid_no_scheme() { + assert!(parse_outdb_uri("/local/path/file.tif").is_none()); + assert!(parse_outdb_uri("just-a-string").is_none()); + } + + #[test] + fn test_invalid_empty() { + assert!(parse_outdb_uri("").is_none()); + } +} diff --git a/rust/sedona-raster/src/traits.rs b/rust/sedona-raster/src/traits.rs index f8541ff33..9091f8879 100644 --- a/rust/sedona-raster/src/traits.rs +++ b/rust/sedona-raster/src/traits.rs @@ -15,112 +15,134 @@ // specific language governing permissions and limitations // under the License. -use arrow_schema::ArrowError; +use std::borrow::Cow; -use sedona_schema::raster::{BandDataType, StorageType}; - -/// Metadata for a raster -#[derive(Debug, Clone)] -pub struct RasterMetadata { - pub width: u64, - pub height: u64, - pub upperleft_x: f64, - pub upperleft_y: f64, - pub scale_x: f64, - pub scale_y: f64, - pub skew_x: f64, - pub skew_y: f64, -} +use arrow_schema::ArrowError; +use sedona_schema::raster::BandDataType; -/// Metadata for a single band -#[derive(Debug, Clone)] -pub struct BandMetadata { - pub nodata_value: Option>, - pub storage_type: StorageType, - pub datatype: BandDataType, - /// URL for OutDb reference (only used when storage_type == OutDbRef) - pub outdb_url: Option, - /// Band ID within the OutDb resource (only used when storage_type == OutDbRef) - pub outdb_band_id: Option, +/// Zero-copy view into a band's N-D data buffer with layout metadata. +/// +/// In Phase 1, strides are always standard C-order contiguous and offset is 0. +/// Phase 2 will introduce non-standard strides for zero-copy slicing. +#[derive(Debug)] +pub struct NdBuffer<'a> { + pub buffer: &'a [u8], + pub shape: &'a [u64], + pub strides: &'a [i64], + pub offset: u64, + pub data_type: BandDataType, } -/// Trait for accessing complete raster data +/// Trait for accessing an N-dimensional raster (top level). +/// +/// Replaces the legacy `RasterRef` + `MetadataRef` + `BandsRef` hierarchy with +/// a single flat interface. Bands are 0-indexed. pub trait RasterRef { - /// Raster metadata accessor - fn metadata(&self) -> &dyn MetadataRef; - /// CRS accessor + /// Number of bands/variables + fn num_bands(&self) -> usize; + + /// Access a band by 0-based index + fn band(&self, index: usize) -> Option>; + + /// Band name (e.g., Zarr variable name). None for unnamed bands. + fn band_name(&self, index: usize) -> Option<&str>; + + /// CRS string (PROJJSON, WKT, or authority code). None if not set. fn crs(&self) -> Option<&str>; - /// Bands accessor - fn bands(&self) -> &dyn BandsRef; -} -/// Trait for accessing raster metadata (dimensions, geotransform, bounding box, etc.) -pub trait MetadataRef { - /// Width of the raster in pixels - fn width(&self) -> u64; - /// Height of the raster in pixels - fn height(&self) -> u64; - /// X coordinate of the upper-left corner - fn upper_left_x(&self) -> f64; - /// Y coordinate of the upper-left corner - fn upper_left_y(&self) -> f64; - /// X-direction pixel size (scale) - fn scale_x(&self) -> f64; - /// Y-direction pixel size (scale) - fn scale_y(&self) -> f64; - /// X-direction skew/rotation - fn skew_x(&self) -> f64; - /// Y-direction skew/rotation - fn skew_y(&self) -> f64; -} -/// Trait for accessing all bands in a raster -pub trait BandsRef { - /// Number of bands in the raster - fn len(&self) -> usize; - /// Check if no bands are present - fn is_empty(&self) -> bool { - self.len() == 0 + /// 6-element affine transform in GDAL GeoTransform order: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` + fn transform(&self) -> &[f64]; + + /// Name of the X spatial dimension (e.g., "x", "lon", "easting") + fn x_dim(&self) -> &str; + + /// Name of the Y spatial dimension (e.g., "y", "lat", "northing") + fn y_dim(&self) -> &str; + + /// Width in pixels — size of the X spatial dimension in band(0). + fn width(&self) -> Option { + self.band(0).and_then(|b| b.dim_size(self.x_dim())) + } + + /// Height in pixels — size of the Y spatial dimension in band(0). + fn height(&self) -> Option { + self.band(0).and_then(|b| b.dim_size(self.y_dim())) + } + + /// Look up a band by name. Returns None if no band has that name. + fn band_by_name(&self, name: &str) -> Option> { + (0..self.num_bands()) + .find(|&i| self.band_name(i) == Some(name)) + .and_then(|i| self.band(i)) } - /// Get a specific band by number (returns Error if out of bounds) - /// By convention, band numbers are 1-based - fn band(&self, number: usize) -> Result, ArrowError>; - /// Iterator over all bands - fn iter(&self) -> Box + '_>; } -/// Trait for accessing individual band data +/// Trait for accessing a single band/variable within an N-D raster. +/// +/// This is the consumer interface. Implementations handle storage details +/// Two data access paths: +/// - `contiguous_data()` — flat row-major bytes for consumers that don't need +/// stride awareness (most RS_* functions, GDAL boundary, serialization). +/// - `nd_buffer()` — raw buffer + shape + strides + offset for stride-aware +/// consumers (numpy zero-copy views, Arrow FFI) that want to avoid copies. pub trait BandRef { - /// Band metadata accessor - fn metadata(&self) -> &dyn BandMetadataRef; - /// Raw band data as bytes (zero-copy access) - fn data(&self) -> &[u8]; -} + // -- Dimension metadata -- -/// Trait for accessing individual band metadata -pub trait BandMetadataRef { - /// No-data value as raw bytes (None if null) - fn nodata_value(&self) -> Option<&[u8]>; - /// Storage type (InDb, OutDbRef, etc) - fn storage_type(&self) -> Result; - /// Band data type (UInt8, Float32, etc.) - fn data_type(&self) -> Result; - /// OutDb URL (only used when storage_type == OutDbRef) - fn outdb_url(&self) -> Option<&str>; - /// OutDb band ID (only used when storage_type == OutDbRef) - fn outdb_band_id(&self) -> Option; - - /// No-data value interpreted as f64. + /// Number of dimensions in this band + fn ndim(&self) -> usize; + + /// Dimension names in order (e.g., `["time", "y", "x"]`) + fn dim_names(&self) -> Vec<&str>; + + /// Shape (size of each dimension) + fn shape(&self) -> &[u64]; + + /// Size of a named dimension (None if doesn't exist) + fn dim_size(&self, name: &str) -> Option { + let idx = self.dim_index(name)?; + Some(self.shape()[idx]) + } + + /// Index of a named dimension (None if doesn't exist) + fn dim_index(&self, name: &str) -> Option { + self.dim_names().iter().position(|n| *n == name) + } + + // -- Band metadata -- + + /// Data type for all elements in this band + fn data_type(&self) -> BandDataType; + + /// Nodata value as raw bytes (None if not set) + fn nodata(&self) -> Option<&[u8]>; + + /// OutDb URI (None for in-memory bands) + fn outdb_uri(&self) -> Option<&str> { + None + } + + // -- Data access -- + + /// Raw backing buffer + layout. Triggers load for lazy impls. + /// Returns an NdBuffer with shape, strides, offset, and raw byte buffer. + fn nd_buffer(&self) -> Result, ArrowError>; + + /// Contiguous row-major bytes. Zero-copy (`Cow::Borrowed`) when data + /// has standard C-order strides; copies into a new buffer only when + /// strides are non-standard. Most RS_* functions use this. + fn contiguous_data(&self) -> Result, ArrowError>; + + /// Nodata value interpreted as f64. /// /// Returns `Ok(None)` when no nodata value is defined, `Ok(Some(f64))` on - /// success, or an error when the raw bytes have an unexpected length for - /// the band's data type. - fn nodata_value_as_f64(&self) -> Result, ArrowError> { - let bytes = match self.nodata_value() { + /// success, or an error when the raw bytes have an unexpected length. + fn nodata_as_f64(&self) -> Result, ArrowError> { + let bytes = match self.nodata() { Some(b) => b, None => return Ok(None), }; - let dt = self.data_type()?; - nodata_bytes_to_f64(bytes, &dt).map(Some) + nodata_bytes_to_f64(bytes, &self.data_type()).map(Some) } } @@ -128,7 +150,7 @@ pub trait BandMetadataRef { /// /// The bytes are expected to be in little-endian order and exactly match the /// byte size of the data type. -fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result { +pub fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result { macro_rules! read_le { ($t:ty, $n:expr) => {{ let arr: [u8; $n] = bytes.try_into().map_err(|_| { @@ -173,15 +195,6 @@ fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result: Iterator> { - fn len(&self) -> usize; - /// Check if there are no more bands - fn is_empty(&self) -> bool { - self.len() == 0 - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/rust/sedona-schema/src/raster.rs b/rust/sedona-schema/src/raster.rs index b5b8745c4..16216db0c 100644 --- a/rust/sedona-schema/src/raster.rs +++ b/rust/sedona-schema/src/raster.rs @@ -16,34 +16,33 @@ // under the License. use arrow_schema::{DataType, Field, FieldRef, Fields}; -/// Schema for storing raster data in Apache Arrow format. -/// Utilizing nested structs and lists to represent raster metadata and bands. +/// Schema for storing N-dimensional raster data in Apache Arrow format. +/// +/// Each raster has a CRS, an affine transform, explicit spatial dimension names +/// (`x_dim`, `y_dim`), and a list of bands. Each band is an N-D chunk with named +/// dimensions, a shape, and optional strides for zero-copy slicing. +/// +/// Legacy 2D rasters are represented as bands with `dim_names=["y","x"]` and +/// `shape=[height, width]`. #[derive(Debug, PartialEq, Clone)] pub struct RasterSchema; + impl RasterSchema { /// Returns the top-level fields for the raster schema structure. pub fn fields() -> Fields { Fields::from(vec![ - Field::new(column::METADATA, Self::metadata_type(), false), - Field::new(column::CRS, Self::crs_type(), true), // Optional: may be inferred from data + Field::new(column::CRS, Self::crs_type(), true), + Field::new(column::TRANSFORM, Self::transform_type(), false), + Field::new(column::X_DIM, DataType::Utf8View, false), + Field::new(column::Y_DIM, DataType::Utf8View, false), Field::new(column::BANDS, Self::bands_type(), true), ]) } - /// Raster metadata schema - pub fn metadata_type() -> DataType { - DataType::Struct(Fields::from(vec![ - // Raster dimensions - Field::new(column::WIDTH, DataType::UInt64, false), - Field::new(column::HEIGHT, DataType::UInt64, false), - // Geospatial transformation parameters - Field::new(column::UPPERLEFT_X, DataType::Float64, false), - Field::new(column::UPPERLEFT_Y, DataType::Float64, false), - Field::new(column::SCALE_X, DataType::Float64, false), - Field::new(column::SCALE_Y, DataType::Float64, false), - Field::new(column::SKEW_X, DataType::Float64, false), - Field::new(column::SKEW_Y, DataType::Float64, false), - ])) + /// Affine transform schema — 6-element GDAL GeoTransform: + /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]` + pub fn transform_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Float64, false))) } /// Bands list schema @@ -55,32 +54,37 @@ impl RasterSchema { ))) } - /// Individual band schema + /// Individual band schema — flattened N-D band with dimension metadata pub fn band_type() -> DataType { DataType::Struct(Fields::from(vec![ - Field::new(column::METADATA, Self::band_metadata_type(), false), - Field::new(column::DATA, Self::band_data_type(), false), + Field::new(column::NAME, DataType::Utf8, true), + Field::new(column::DIM_NAMES, Self::dim_names_type(), false), + Field::new(column::SHAPE, Self::shape_type(), false), + Field::new(column::DATATYPE, DataType::UInt32, false), + Field::new(column::NODATA, DataType::Binary, true), + Field::new(column::STRIDES, Self::strides_type(), false), + Field::new(column::OFFSET, DataType::UInt64, false), + Field::new(column::OUTDB_URI, DataType::Utf8, true), + Field::new(column::DATA, DataType::BinaryView, false), ])) } - /// Band metadata schema - pub fn band_metadata_type() -> DataType { - DataType::Struct(Fields::from(vec![ - Field::new(column::NODATAVALUE, DataType::Binary, true), // Optional: null means no nodata value specified - Field::new(column::STORAGE_TYPE, DataType::UInt32, false), - Field::new(column::DATATYPE, DataType::UInt32, false), - // OutDb reference fields - only used when storage_type == OutDbRef - Field::new(column::OUTDB_URL, DataType::Utf8, true), - Field::new(column::OUTDB_BAND_ID, DataType::UInt32, true), - ])) + /// Dimension names list type + pub fn dim_names_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Utf8, false))) + } + + /// Shape list type (per-dimension sizes) + pub fn shape_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::UInt64, false))) } - /// Band data schema - stores the actual raster pixel data as a binary blob - pub fn band_data_type() -> DataType { - DataType::BinaryView + /// Strides list type (per-dimension byte strides) + pub fn strides_type() -> DataType { + DataType::List(FieldRef::new(Field::new("item", DataType::Int64, false))) } - /// Coordinate Reference System (CRS) schema - stores CRS as JSON string (PROJ or WKT format) + /// Coordinate Reference System (CRS) schema — stores CRS as JSON string pub fn crs_type() -> DataType { DataType::Utf8View } @@ -116,6 +120,23 @@ impl BandDataType { } } + /// Try to convert from a u32 discriminant value. + pub fn try_from_u32(value: u32) -> Option { + match value { + 1 => Some(BandDataType::UInt8), + 2 => Some(BandDataType::UInt16), + 3 => Some(BandDataType::Int16), + 4 => Some(BandDataType::UInt32), + 5 => Some(BandDataType::Int32), + 6 => Some(BandDataType::Float32), + 7 => Some(BandDataType::Float64), + 8 => Some(BandDataType::UInt64), + 9 => Some(BandDataType::Int64), + 10 => Some(BandDataType::Int8), + _ => None, + } + } + /// Java/Sedona-compatible pixel type name (e.g. `"UNSIGNED_8BITS"`). pub fn pixel_type_name(&self) -> &'static str { match self { @@ -125,7 +146,6 @@ impl BandDataType { BandDataType::Int32 => "SIGNED_32BITS", BandDataType::Float32 => "REAL_32BITS", BandDataType::Float64 => "REAL_64BITS", - // Extra types present in Rust but not in Java Sedona BandDataType::UInt32 => "UNSIGNED_32BITS", BandDataType::UInt64 => "UNSIGNED_64BITS", BandDataType::Int64 => "SIGNED_64BITS", @@ -134,230 +154,118 @@ impl BandDataType { } } -/// Storage strategy for raster band data within Apache Arrow arrays. -/// -/// This enum defines how raster data is physically stored and accessed: -/// -/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. -/// - Self-contained, no external dependencies, fast access for small-medium rasters -/// - Increases Arrow array size, memory usage grows and copy times increase with raster size -/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) -/// -/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. -/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading -/// - Requires external storage management, potential for broken references -/// - Best for: Large satellite imagery, time series data, cloud-native workflows -/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints -#[repr(u16)] -#[derive(Clone, Debug, PartialEq, Eq, Hash, Copy)] -pub enum StorageType { - InDb = 0, - OutDbRef = 1, -} - /// Hard-coded column indices for performant access to nested struct fields. /// These indices must match the exact order defined in the RasterSchema methods. -/// -/// Using compile-time constants avoids string lookups and provides type safety -/// when accessing nested struct fields in Arrow arrays. -pub mod metadata_indices { - pub const WIDTH: usize = 0; - pub const HEIGHT: usize = 1; - pub const UPPERLEFT_X: usize = 2; - pub const UPPERLEFT_Y: usize = 3; - pub const SCALE_X: usize = 4; - pub const SCALE_Y: usize = 5; - pub const SKEW_X: usize = 6; - pub const SKEW_Y: usize = 7; -} - -pub mod band_metadata_indices { - pub const NODATAVALUE: usize = 0; - pub const STORAGE_TYPE: usize = 1; - pub const DATATYPE: usize = 2; - pub const OUTDB_URL: usize = 3; - pub const OUTDB_BAND_ID: usize = 4; +pub mod raster_indices { + pub const CRS: usize = 0; + pub const TRANSFORM: usize = 1; + pub const X_DIM: usize = 2; + pub const Y_DIM: usize = 3; + pub const BANDS: usize = 4; } pub mod band_indices { - pub const METADATA: usize = 0; - pub const DATA: usize = 1; -} - -pub mod raster_indices { - pub const METADATA: usize = 0; - pub const CRS: usize = 1; - pub const BANDS: usize = 2; + pub const NAME: usize = 0; + pub const DIM_NAMES: usize = 1; + pub const SHAPE: usize = 2; + pub const DATA_TYPE: usize = 3; + pub const NODATA: usize = 4; + pub const STRIDES: usize = 5; + pub const OFFSET: usize = 6; + pub const OUTDB_URI: usize = 7; + pub const DATA: usize = 8; } /// Column name constants used throughout the raster schema definition. -/// These string constants ensure consistency across schema creation and field access. pub mod column { - pub const METADATA: &str = "metadata"; + // Top-level raster fields + pub const CRS: &str = "crs"; + pub const TRANSFORM: &str = "transform"; + pub const X_DIM: &str = "x_dim"; + pub const Y_DIM: &str = "y_dim"; pub const BANDS: &str = "bands"; pub const BAND: &str = "band"; - pub const DATA: &str = "data"; - - // Raster metadata fields - pub const WIDTH: &str = "width"; - pub const HEIGHT: &str = "height"; - pub const UPPERLEFT_X: &str = "upperleft_x"; - pub const UPPERLEFT_Y: &str = "upperleft_y"; - pub const SCALE_X: &str = "scale_x"; - pub const SCALE_Y: &str = "scale_y"; - pub const SKEW_X: &str = "skew_x"; - pub const SKEW_Y: &str = "skew_y"; - // Raster CRS field - pub const CRS: &str = "crs"; - // Band metadata fields - pub const NODATAVALUE: &str = "nodata_value"; - pub const STORAGE_TYPE: &str = "storage_type"; + // Band fields + pub const NAME: &str = "name"; + pub const DIM_NAMES: &str = "dim_names"; + pub const SHAPE: &str = "shape"; pub const DATATYPE: &str = "data_type"; - pub const OUTDB_URL: &str = "outdb_url"; - pub const OUTDB_BAND_ID: &str = "outdb_band_id"; + pub const NODATA: &str = "nodata"; + pub const STRIDES: &str = "strides"; + pub const OFFSET: &str = "offset"; + pub const OUTDB_URI: &str = "outdb_uri"; + pub const DATA: &str = "data"; } #[cfg(test)] mod tests { use super::*; - /// Tests that the top-level raster schema has the expected number and names of fields. + #[test] fn test_raster_schema_fields() { let fields = RasterSchema::fields(); - assert_eq!(fields.len(), 3); - assert_eq!(fields[0].name(), column::METADATA); - assert_eq!(fields[1].name(), column::CRS); - assert_eq!(fields[2].name(), column::BANDS); + assert_eq!(fields.len(), 5); + assert_eq!(fields[0].name(), column::CRS); + assert_eq!(fields[1].name(), column::TRANSFORM); + assert_eq!(fields[2].name(), column::X_DIM); + assert_eq!(fields[3].name(), column::Y_DIM); + assert_eq!(fields[4].name(), column::BANDS); } - /// Comprehensive test to verify all hard-coded indices match the actual schema. - /// This ensures that performance optimizations using direct index access remain valid - /// when the schema structure changes. #[test] fn test_hardcoded_indices_match_schema() { // Test raster-level indices let raster_fields = RasterSchema::fields(); - assert_eq!(raster_fields.len(), 3, "Expected exactly 3 raster fields"); - assert_eq!( - raster_fields[raster_indices::METADATA].name(), - column::METADATA, - "Raster metadata index mismatch" - ); + assert_eq!(raster_fields.len(), 5, "Expected exactly 5 raster fields"); assert_eq!( raster_fields[raster_indices::CRS].name(), column::CRS, "Raster CRS index mismatch" ); + assert_eq!( + raster_fields[raster_indices::TRANSFORM].name(), + column::TRANSFORM, + "Raster TRANSFORM index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::X_DIM].name(), + column::X_DIM, + "Raster X_DIM index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::Y_DIM].name(), + column::Y_DIM, + "Raster Y_DIM index mismatch" + ); assert_eq!( raster_fields[raster_indices::BANDS].name(), column::BANDS, "Raster BANDS index mismatch" ); - // Test metadata indices - let metadata_type = RasterSchema::metadata_type(); - if let DataType::Struct(metadata_fields) = metadata_type { - assert_eq!( - metadata_fields.len(), - 8, - "Expected exactly 8 metadata fields" - ); - assert_eq!( - metadata_fields[metadata_indices::WIDTH].name(), - column::WIDTH, - "Metadata width index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::HEIGHT].name(), - column::HEIGHT, - "Metadata height index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::UPPERLEFT_X].name(), - column::UPPERLEFT_X, - "Metadata upperleft_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::UPPERLEFT_Y].name(), - column::UPPERLEFT_Y, - "Metadata upperleft_y index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SCALE_X].name(), - column::SCALE_X, - "Metadata scale_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SCALE_Y].name(), - column::SCALE_Y, - "Metadata scale_y index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SKEW_X].name(), - column::SKEW_X, - "Metadata skew_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SKEW_Y].name(), - column::SKEW_Y, - "Metadata skew_y index mismatch" - ); - } else { - panic!("Expected Struct type for metadata"); - } - - // Test band metadata indices - let band_metadata_type = RasterSchema::band_metadata_type(); - if let DataType::Struct(band_metadata_fields) = band_metadata_type { - assert_eq!( - band_metadata_fields.len(), - 5, - "Expected exactly 5 band metadata fields" - ); - assert_eq!( - band_metadata_fields[band_metadata_indices::NODATAVALUE].name(), - column::NODATAVALUE, - "Band metadata nodatavalue index mismatch" - ); - assert_eq!( - band_metadata_fields[band_metadata_indices::STORAGE_TYPE].name(), - column::STORAGE_TYPE, - "Band metadata storage_type index mismatch" - ); - assert_eq!( - band_metadata_fields[band_metadata_indices::DATATYPE].name(), - column::DATATYPE, - "Band metadata datatype index mismatch" - ); - assert_eq!( - band_metadata_fields[band_metadata_indices::OUTDB_URL].name(), - column::OUTDB_URL, - "Band metadata outdb_url index mismatch" - ); - assert_eq!( - band_metadata_fields[band_metadata_indices::OUTDB_BAND_ID].name(), - column::OUTDB_BAND_ID, - "Band metadata outdb_band_id index mismatch" - ); - } else { - panic!("Expected Struct type for band metadata"); - } - // Test band indices let band_type = RasterSchema::band_type(); if let DataType::Struct(band_fields) = band_type { - assert_eq!(band_fields.len(), 2, "Expected exactly 2 band fields"); + assert_eq!(band_fields.len(), 9, "Expected exactly 9 band fields"); + assert_eq!(band_fields[band_indices::NAME].name(), column::NAME); assert_eq!( - band_fields[band_indices::METADATA].name(), - column::METADATA, - "Band metadata index mismatch" + band_fields[band_indices::DIM_NAMES].name(), + column::DIM_NAMES ); + assert_eq!(band_fields[band_indices::SHAPE].name(), column::SHAPE); assert_eq!( - band_fields[band_indices::DATA].name(), - column::DATA, - "Band data index mismatch" + band_fields[band_indices::DATA_TYPE].name(), + column::DATATYPE ); + assert_eq!(band_fields[band_indices::NODATA].name(), column::NODATA); + assert_eq!(band_fields[band_indices::STRIDES].name(), column::STRIDES); + assert_eq!(band_fields[band_indices::OFFSET].name(), column::OFFSET); + assert_eq!( + band_fields[band_indices::OUTDB_URI].name(), + column::OUTDB_URI + ); + assert_eq!(band_fields[band_indices::DATA].name(), column::DATA); } else { panic!("Expected Struct type for band"); } @@ -377,6 +285,48 @@ mod tests { assert_eq!(BandDataType::Float64.byte_size(), 8); } + #[test] + fn test_band_data_type_try_from_u32() { + assert_eq!(BandDataType::try_from_u32(1), Some(BandDataType::UInt8)); + assert_eq!(BandDataType::try_from_u32(2), Some(BandDataType::UInt16)); + assert_eq!(BandDataType::try_from_u32(3), Some(BandDataType::Int16)); + assert_eq!(BandDataType::try_from_u32(4), Some(BandDataType::UInt32)); + assert_eq!(BandDataType::try_from_u32(5), Some(BandDataType::Int32)); + assert_eq!(BandDataType::try_from_u32(6), Some(BandDataType::Float32)); + assert_eq!(BandDataType::try_from_u32(7), Some(BandDataType::Float64)); + assert_eq!(BandDataType::try_from_u32(8), Some(BandDataType::UInt64)); + assert_eq!(BandDataType::try_from_u32(9), Some(BandDataType::Int64)); + assert_eq!(BandDataType::try_from_u32(10), Some(BandDataType::Int8)); + assert_eq!(BandDataType::try_from_u32(0), None); + assert_eq!(BandDataType::try_from_u32(11), None); + assert_eq!(BandDataType::try_from_u32(u32::MAX), None); + } + + #[test] + fn test_band_data_type_roundtrip_u32() { + // Verify that discriminant → try_from_u32 round-trips for all variants + let all_types = [ + BandDataType::UInt8, + BandDataType::UInt16, + BandDataType::Int16, + BandDataType::UInt32, + BandDataType::Int32, + BandDataType::Float32, + BandDataType::Float64, + BandDataType::UInt64, + BandDataType::Int64, + BandDataType::Int8, + ]; + for dt in all_types { + let value = dt as u32; + assert_eq!( + BandDataType::try_from_u32(value), + Some(dt), + "Round-trip failed for {dt:?} (discriminant {value})" + ); + } + } + #[test] fn test_band_data_type_pixel_type_name() { assert_eq!(BandDataType::UInt8.pixel_type_name(), "UNSIGNED_8BITS"); diff --git a/rust/sedona-testing/src/benchmark_util.rs b/rust/sedona-testing/src/benchmark_util.rs index 0a998a12c..91e968e97 100644 --- a/rust/sedona-testing/src/benchmark_util.rs +++ b/rust/sedona-testing/src/benchmark_util.rs @@ -970,8 +970,7 @@ mod test { let rasters = RasterStructArray::new(raster_array); assert_eq!(rasters.len(), ROWS_PER_BATCH); let raster = rasters.get(0).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), 10); - assert_eq!(metadata.height(), 5); + assert_eq!(raster.width(), Some(10)); + assert_eq!(raster.height(), Some(5)); } } diff --git a/rust/sedona-testing/src/rasters.rs b/rust/sedona-testing/src/rasters.rs index d30940473..834a7df70 100644 --- a/rust/sedona-testing/src/rasters.rs +++ b/rust/sedona-testing/src/rasters.rs @@ -19,12 +19,12 @@ use datafusion_common::Result; use fastrand::Rng; use sedona_raster::array::RasterStructArray; use sedona_raster::builder::RasterBuilder; -use sedona_raster::traits::{BandMetadata, RasterMetadata, RasterRef}; +use sedona_raster::traits::RasterRef; use sedona_schema::crs::lnglat; -use sedona_schema::raster::{BandDataType, StorageType}; +use sedona_schema::raster::BandDataType; -/// Generate a StructArray of rasters with sequentially increasing dimensions and pixel values -/// These tiny rasters are to provide fast, easy and predictable test data for unit tests. +/// Generate a StructArray of rasters with sequentially increasing dimensions and pixel values. +/// These tiny rasters provide fast, easy and predictable test data for unit tests. pub fn generate_test_rasters( count: usize, null_raster_index: Option, @@ -32,34 +32,28 @@ pub fn generate_test_rasters( let mut builder = RasterBuilder::new(count); let crs = lnglat().unwrap().to_crs_string(); for i in 0..count { - // If a null raster index is specified and that matches the current index, - // append a null raster if matches!(null_raster_index, Some(index) if index == i) { builder.append_null()?; continue; } - let raster_metadata = RasterMetadata { - width: i as u64 + 1, - height: i as u64 + 2, - upperleft_x: i as f64 + 1.0, - upperleft_y: i as f64 + 2.0, - scale_x: i.max(1) as f64 * 0.1, - scale_y: i.max(1) as f64 * -0.2, - skew_x: i as f64 * 0.03, - skew_y: i as f64 * 0.04, - }; - builder.start_raster(&raster_metadata, Some(&crs))?; - builder.start_band(BandMetadata { - datatype: BandDataType::UInt16, - nodata_value: Some(vec![0u8; 2]), - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - })?; + let width = i as u64 + 1; + let height = i as u64 + 2; + builder.start_raster_2d( + width, + height, + i as f64 + 1.0, // origin_x + i as f64 + 2.0, // origin_y + i.max(1) as f64 * 0.1, // scale_x + i.max(1) as f64 * -0.2, // scale_y + i as f64 * 0.03, // skew_x + i as f64 * 0.04, // skew_y + Some(&crs), + )?; + builder.start_band_2d(BandDataType::UInt16, Some(&[0u8, 0u8]))?; let pixel_count = (i + 1) * (i + 2); // width * height - let mut band_data = Vec::with_capacity(pixel_count * 2); // 2 bytes per u16 + let mut band_data = Vec::with_capacity(pixel_count * 2); for pixel_value in 0..pixel_count as u16 { band_data.extend_from_slice(&pixel_value.to_le_bytes()); } @@ -72,11 +66,8 @@ pub fn generate_test_rasters( Ok(builder.finish()?) } -/// Generates a set of tiled rasters arranged in a grid -/// - Each raster tile has specified dimensions and random pixel values -/// - Each raster has 3 bands which can be interpreted as RGB values -/// and the result can be visualized as a mosaic of tiles. -/// - There are nodata values at the 4 corners of the overall mosaic. +/// Generates a set of tiled rasters arranged in a grid. +/// Each raster has 3 bands (RGB) with random pixel values. pub fn generate_tiled_rasters( tile_size: (usize, usize), number_of_tiles: (usize, usize), @@ -98,38 +89,25 @@ pub fn generate_tiled_rasters( let origin_x = (tile_x * tile_width) as f64; let origin_y = (tile_y * tile_height) as f64; - let raster_metadata = RasterMetadata { - width: tile_width as u64, - height: tile_height as u64, - upperleft_x: origin_x, - upperleft_y: origin_y, - scale_x: 1.0, - scale_y: 1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - raster_builder.start_raster(&raster_metadata, Some(&crs))?; + raster_builder.start_raster_2d( + tile_width as u64, + tile_height as u64, + origin_x, + origin_y, + 1.0, + 1.0, + 0.0, + 0.0, + Some(&crs), + )?; for _ in 0..band_count { - // Set a nodata value appropriate for the data type let nodata_value = get_nodata_value_for_type(&data_type); - let nodata_value_bytes = nodata_value.clone(); - let band_metadata = BandMetadata { - nodata_value, - storage_type: StorageType::InDb, - datatype: data_type, - outdb_url: None, - outdb_band_id: None, - }; - - raster_builder.start_band(band_metadata)?; + raster_builder.start_band_2d(data_type, nodata_value.as_deref())?; let pixel_count = tile_width * tile_height; - - // Determine which corner position (if any) should have nodata in this tile let corner_position = get_corner_position(tile_x, tile_y, x_tiles, y_tiles, tile_width, tile_height); let band_data = generate_random_band_data( @@ -152,31 +130,14 @@ pub fn generate_tiled_rasters( } /// Builds a 1x1 single-band raster with a non-invertible geotransform (zero scales and skews). -/// Useful for testing error handling of inverse affine transforms. pub fn build_noninvertible_raster() -> StructArray { let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: 1, - height: 1, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 0.0, - scale_y: 0.0, - skew_x: 0.0, - skew_y: 0.0, - }; let crs = lnglat().unwrap().to_crs_string(); builder - .start_raster(&metadata, Some(&crs)) + .start_raster_2d(1, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, Some(&crs)) .expect("start raster"); builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_band_2d(BandDataType::UInt8, None) .expect("start band"); builder.band_data_writer().append_value([0u8]); builder.finish_band().expect("finish band"); @@ -193,61 +154,37 @@ pub fn raster_from_single_band( crs: Option<&str>, ) -> StructArray { let mut builder = RasterBuilder::new(1); - let metadata = RasterMetadata { - width: width as u64, - height: height as u64, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - }; - - builder.start_raster(&metadata, crs).expect("start raster"); builder - .start_band(BandMetadata { - datatype: data_type, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) - .expect("start band"); + .start_raster_2d( + width as u64, + height as u64, + 0.0, + 0.0, + 1.0, + -1.0, + 0.0, + 0.0, + crs, + ) + .expect("start raster"); + builder.start_band_2d(data_type, None).expect("start band"); builder.band_data_writer().append_value(band_bytes); builder.finish_band().expect("finish band"); builder.finish_raster().expect("finish raster"); - builder.finish().expect("finish") } /// Builds a single raster with 3 bands of different types for testing multi-band operations. -/// Band 1: UInt8 (nodata=255), Band 2: UInt16 (nodata=0), Band 3: Float32 (no nodata). -/// Each band is 2x2 pixels. pub fn generate_multi_band_raster() -> StructArray { let mut builder = RasterBuilder::new(1); let crs = lnglat().unwrap().to_crs_string(); - let metadata = RasterMetadata { - width: 2, - height: 2, - upperleft_x: 10.0, - upperleft_y: 20.0, - scale_x: 0.5, - scale_y: -0.5, - skew_x: 0.0, - skew_y: 0.0, - }; - builder.start_raster(&metadata, Some(&crs)).unwrap(); + builder + .start_raster_2d(2, 2, 10.0, 20.0, 0.5, -0.5, 0.0, 0.0, Some(&crs)) + .unwrap(); // Band 1: UInt8, nodata=255 builder - .start_band(BandMetadata { - datatype: BandDataType::UInt8, - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_band_2d(BandDataType::UInt8, Some(&[255u8])) .unwrap(); builder .band_data_writer() @@ -256,13 +193,7 @@ pub fn generate_multi_band_raster() -> StructArray { // Band 2: UInt16, nodata=0 builder - .start_band(BandMetadata { - datatype: BandDataType::UInt16, - nodata_value: Some(vec![0u8, 0u8]), - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) + .start_band_2d(BandDataType::UInt16, Some(&[0u8, 0u8])) .unwrap(); let band2_data: Vec = [100u16, 200u16, 300u16, 400u16] .iter() @@ -272,15 +203,7 @@ pub fn generate_multi_band_raster() -> StructArray { builder.finish_band().unwrap(); // Band 3: Float32, no nodata - builder - .start_band(BandMetadata { - datatype: BandDataType::Float32, - nodata_value: None, - storage_type: StorageType::InDb, - outdb_url: None, - outdb_band_id: None, - }) - .unwrap(); + builder.start_band_2d(BandDataType::Float32, None).unwrap(); let band3_data: Vec = [1.5f32, 2.5f32, 3.5f32, 4.5f32] .iter() .flat_map(|v| v.to_le_bytes()) @@ -292,8 +215,6 @@ pub fn generate_multi_band_raster() -> StructArray { builder.finish().unwrap() } -/// Determine if this tile contains a corner of the overall grid and return its position -/// Returns Some(position) if this tile contains a corner, None otherwise fn get_corner_position( tile_x: usize, tile_y: usize, @@ -302,19 +223,15 @@ fn get_corner_position( tile_width: usize, tile_height: usize, ) -> Option { - // Top-left corner (tile 0,0, pixel 0) if tile_x == 0 && tile_y == 0 { return Some(0); } - // Top-right corner (tile x_tiles-1, 0, pixel tile_width-1) if tile_x == x_tiles - 1 && tile_y == 0 { return Some(tile_width - 1); } - // Bottom-left corner (tile 0, y_tiles-1, pixel (tile_height-1)*tile_width) if tile_x == 0 && tile_y == y_tiles - 1 { return Some((tile_height - 1) * tile_width); } - // Bottom-right corner (tile x_tiles-1, y_tiles-1, pixel tile_height*tile_width-1) if tile_x == x_tiles - 1 && tile_y == y_tiles - 1 { return Some(tile_height * tile_width - 1); } @@ -328,8 +245,6 @@ fn generate_random_band_data( corner_position: Option, rng: &mut Rng, ) -> Vec { - /// Generate random band data for a given pixel type and set the corner pixel - /// to the nodata value if applicable. macro_rules! gen_band { ($byte_size:expr, $rng_expr:expr) => {{ let byte_size: usize = $byte_size; @@ -376,7 +291,7 @@ fn get_nodata_value_for_type(data_type: &BandDataType) -> Option> { } } -/// Compare two RasterStructArrays for equality +/// Compare two RasterStructArrays for equality. pub fn assert_raster_arrays_equal( raster_array1: &RasterStructArray, raster_array2: &RasterStructArray, @@ -394,86 +309,72 @@ pub fn assert_raster_arrays_equal( } } -/// Compare two rasters for equality +/// Compare two rasters for equality. pub fn assert_raster_equal(raster1: &impl RasterRef, raster2: &impl RasterRef) { - // Compare metadata - let meta1 = raster1.metadata(); - let meta2 = raster2.metadata(); - assert_eq!(meta1.width(), meta2.width(), "Raster widths do not match"); - assert_eq!( - meta1.height(), - meta2.height(), - "Raster heights do not match" - ); assert_eq!( - meta1.upper_left_x(), - meta2.upper_left_x(), - "Raster upper left x does not match" + raster1.width(), + raster2.width(), + "Raster widths do not match" ); assert_eq!( - meta1.upper_left_y(), - meta2.upper_left_y(), - "Raster upper left y does not match" + raster1.height(), + raster2.height(), + "Raster heights do not match" ); assert_eq!( - meta1.scale_x(), - meta2.scale_x(), - "Raster scale x does not match" + raster1.transform(), + raster2.transform(), + "Raster transforms do not match" ); assert_eq!( - meta1.scale_y(), - meta2.scale_y(), - "Raster scale y does not match" + raster1.x_dim(), + raster2.x_dim(), + "Raster x_dim does not match" ); assert_eq!( - meta1.skew_x(), - meta2.skew_x(), - "Raster skew x does not match" + raster1.y_dim(), + raster2.y_dim(), + "Raster y_dim does not match" ); assert_eq!( - meta1.skew_y(), - meta2.skew_y(), - "Raster skew y does not match" + raster1.num_bands(), + raster2.num_bands(), + "Number of bands do not match" ); - // Compare bands - let bands1 = raster1.bands(); - let bands2 = raster2.bands(); - assert_eq!(bands1.len(), bands2.len(), "Number of bands do not match"); + for band_index in 0..raster1.num_bands() { + let band1 = raster1 + .band(band_index) + .unwrap_or_else(|| panic!("Band {band_index} missing from raster1")); + let band2 = raster2 + .band(band_index) + .unwrap_or_else(|| panic!("Band {band_index} missing from raster2")); - for band_index in 0..bands1.len() { - let band1 = bands1.band(band_index + 1).unwrap(); - let band2 = bands2.band(band_index + 1).unwrap(); - - let band_meta1 = band1.metadata(); - let band_meta2 = band2.metadata(); assert_eq!( - band_meta1.data_type().unwrap(), - band_meta2.data_type().unwrap(), - "Band data types do not match" + band1.data_type(), + band2.data_type(), + "Band {band_index} data types do not match" ); assert_eq!( - band_meta1.nodata_value(), - band_meta2.nodata_value(), - "Band nodata values do not match" + band1.nodata(), + band2.nodata(), + "Band {band_index} nodata values do not match" ); assert_eq!( - band_meta1.storage_type().unwrap(), - band_meta2.storage_type().unwrap(), - "Band storage types do not match" + band1.dim_names(), + band2.dim_names(), + "Band {band_index} dim_names do not match" ); assert_eq!( - band_meta1.outdb_url(), - band_meta2.outdb_url(), - "Band outdb URLs do not match" + band1.shape(), + band2.shape(), + "Band {band_index} shapes do not match" ); assert_eq!( - band_meta1.outdb_band_id(), - band_meta2.outdb_band_id(), - "Band outdb band IDs do not match" + band1.contiguous_data().unwrap().as_ref(), + band2.contiguous_data().unwrap().as_ref(), + "Band {band_index} data does not match" ); - - assert_eq!(band1.data(), band2.data(), "Band data does not match"); } } @@ -492,29 +393,24 @@ mod tests { for i in 0..count { let raster = raster_array.get(i).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), i as u64 + 1); - assert_eq!(metadata.height(), i as u64 + 2); - assert_eq!(metadata.upper_left_x(), i as f64 + 1.0); - assert_eq!(metadata.upper_left_y(), i as f64 + 2.0); - assert_eq!(metadata.scale_x(), (i.max(1) as f64) * 0.1); - assert_eq!(metadata.scale_y(), (i.max(1) as f64) * -0.2); - assert_eq!(metadata.skew_x(), (i as f64) * 0.03); - assert_eq!(metadata.skew_y(), (i as f64) * 0.04); - - let bands = raster.bands(); - let band = bands.band(1).unwrap(); - let band_metadata = band.metadata(); - assert_eq!(band_metadata.data_type().unwrap(), BandDataType::UInt16); - assert_eq!(band_metadata.nodata_value(), Some(&[0u8, 0u8][..])); - assert_eq!(band_metadata.storage_type().unwrap(), StorageType::InDb); - assert_eq!(band_metadata.outdb_url(), None); - assert_eq!(band_metadata.outdb_band_id(), None); - - let band_data = band.data(); - let expected_pixel_count = (i + 1) * (i + 2); // width * height - - // Convert raw bytes back to u16 values for comparison + assert_eq!(raster.width(), Some(i as u64 + 1)); + assert_eq!(raster.height(), Some(i as u64 + 2)); + + let t = raster.transform(); + assert_eq!(t[0], i as f64 + 1.0); // origin_x + assert_eq!(t[3], i as f64 + 2.0); // origin_y + assert_eq!(t[1], (i.max(1) as f64) * 0.1); // scale_x + assert_eq!(t[5], (i.max(1) as f64) * -0.2); // scale_y + assert_eq!(t[2], (i as f64) * 0.03); // skew_x + assert_eq!(t[4], (i as f64) * 0.04); // skew_y + + assert_eq!(raster.num_bands(), 1); + let band = raster.band(0).unwrap(); + assert_eq!(band.data_type(), BandDataType::UInt16); + assert_eq!(band.nodata(), Some(&[0u8, 0u8][..])); + + let band_data = band.contiguous_data().unwrap(); + let expected_pixel_count = (i + 1) * (i + 2); let mut actual_pixel_values = Vec::new(); for chunk in band_data.chunks_exact(2) { let value = u16::from_le_bytes([chunk[0], chunk[1]]); @@ -533,32 +429,52 @@ mod tests { let struct_array = generate_tiled_rasters(tile_size, number_of_tiles, data_type, Some(43)).unwrap(); let raster_array = RasterStructArray::new(&struct_array); - assert_eq!(raster_array.len(), 16); // 4x4 tiles + assert_eq!(raster_array.len(), 16); for i in 0..16 { let raster = raster_array.get(i).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), 64); - assert_eq!(metadata.height(), 64); - assert_eq!(metadata.upper_left_x(), ((i % 4) * 64) as f64); - assert_eq!(metadata.upper_left_y(), ((i / 4) * 64) as f64); - let bands = raster.bands(); - assert_eq!(bands.len(), 3); + assert_eq!(raster.width(), Some(64)); + assert_eq!(raster.height(), Some(64)); + let t = raster.transform(); + assert_eq!(t[0], ((i % 4) * 64) as f64); // origin_x + assert_eq!(t[3], ((i / 4) * 64) as f64); // origin_y + assert_eq!(raster.num_bands(), 3); for band_index in 0..3 { - let band = bands.band(band_index + 1).unwrap(); - let band_metadata = band.metadata(); - assert_eq!(band_metadata.data_type().unwrap(), BandDataType::UInt8); - assert_eq!(band_metadata.storage_type().unwrap(), StorageType::InDb); - let band_data = band.data(); - assert_eq!(band_data.len(), 64 * 64); // 4096 pixels + let band = raster.band(band_index).unwrap(); + assert_eq!(band.data_type(), BandDataType::UInt8); + assert_eq!(band.contiguous_data().unwrap().len(), 64 * 64); } } } + #[test] + fn test_generate_multi_band_raster() { + let struct_array = generate_multi_band_raster(); + let raster_array = RasterStructArray::new(&struct_array); + assert_eq!(raster_array.len(), 1); + + let raster = raster_array.get(0).unwrap(); + assert_eq!(raster.width(), Some(2)); + assert_eq!(raster.height(), Some(2)); + assert_eq!(raster.num_bands(), 3); + + let b1 = raster.band(0).unwrap(); + assert_eq!(b1.data_type(), BandDataType::UInt8); + assert_eq!(b1.nodata(), Some(&[255u8][..])); + assert_eq!(b1.contiguous_data().unwrap().as_ref(), &[1u8, 2, 3, 4]); + + let b2 = raster.band(1).unwrap(); + assert_eq!(b2.data_type(), BandDataType::UInt16); + assert_eq!(b2.nodata(), Some(&[0u8, 0][..])); + + let b3 = raster.band(2).unwrap(); + assert_eq!(b3.data_type(), BandDataType::Float32); + assert_eq!(b3.nodata(), None); + } + #[test] fn test_raster_arrays_equal() { let raster_array1 = generate_test_rasters(3, None).unwrap(); let raster_struct_array1 = RasterStructArray::new(&raster_array1); - // Test that identical arrays are equal assert_raster_arrays_equal(&raster_struct_array1, &raster_struct_array1); } @@ -567,8 +483,6 @@ mod tests { fn test_raster_arrays_not_equal() { let raster_array1 = generate_test_rasters(3, None).unwrap(); let raster_struct_array1 = RasterStructArray::new(&raster_array1); - - // Test that arrays with different lengths are not equal let raster_array2 = generate_test_rasters(4, None).unwrap(); let raster_struct_array2 = RasterStructArray::new(&raster_array2); assert_raster_arrays_equal(&raster_struct_array1, &raster_struct_array2); @@ -578,65 +492,33 @@ mod tests { fn test_raster_equal() { let raster_array1 = generate_tiled_rasters((256, 256), (1, 1), BandDataType::UInt8, Some(43)).unwrap(); - let raster1 = RasterStructArray::new(&raster_array1).get(0).unwrap(); - - // Assert that the rasters are equal to themselves + let rsa = RasterStructArray::new(&raster_array1); + let raster1 = rsa.get(0).unwrap(); assert_raster_equal(&raster1, &raster1); } #[test] - #[should_panic = "Band data does not match"] + #[should_panic = "Band 0 data does not match"] fn test_raster_different_band_data() { let raster_array1 = generate_tiled_rasters((128, 128), (1, 1), BandDataType::UInt8, Some(43)).unwrap(); let raster_array2 = generate_tiled_rasters((128, 128), (1, 1), BandDataType::UInt8, Some(47)).unwrap(); - - let raster1 = RasterStructArray::new(&raster_array1).get(0).unwrap(); - let raster2 = RasterStructArray::new(&raster_array2).get(0).unwrap(); + let rsa1 = RasterStructArray::new(&raster_array1); + let rsa2 = RasterStructArray::new(&raster_array2); + let raster1 = rsa1.get(0).unwrap(); + let raster2 = rsa2.get(0).unwrap(); assert_raster_equal(&raster1, &raster2); } #[test] - fn test_generate_multi_band_raster() { - let struct_array = generate_multi_band_raster(); - let raster_array = RasterStructArray::new(&struct_array); - assert_eq!(raster_array.len(), 1); - - let raster = raster_array.get(0).unwrap(); - let metadata = raster.metadata(); - assert_eq!(metadata.width(), 2); - assert_eq!(metadata.height(), 2); - assert_eq!(metadata.upper_left_x(), 10.0); - assert_eq!(metadata.upper_left_y(), 20.0); - - let bands = raster.bands(); - assert_eq!(bands.len(), 3); - - // Band 1: UInt8, nodata=255 - let b1 = bands.band(1).unwrap(); - assert_eq!(b1.metadata().data_type().unwrap(), BandDataType::UInt8); - assert_eq!(b1.metadata().nodata_value(), Some(&[255u8][..])); - assert_eq!(b1.data(), &[1u8, 2, 3, 4]); - - // Band 2: UInt16, nodata=0 - let b2 = bands.band(2).unwrap(); - assert_eq!(b2.metadata().data_type().unwrap(), BandDataType::UInt16); - assert_eq!(b2.metadata().nodata_value(), Some(&[0u8, 0][..])); - - // Band 3: Float32, no nodata - let b3 = bands.band(3).unwrap(); - assert_eq!(b3.metadata().data_type().unwrap(), BandDataType::Float32); - assert_eq!(b3.metadata().nodata_value(), None); - } - - #[test] - #[should_panic = "Raster upper left x does not match"] + #[should_panic = "Raster transforms do not match"] fn test_raster_different_metadata() { let raster_array = generate_tiled_rasters((128, 128), (2, 1), BandDataType::UInt8, Some(43)).unwrap(); - let raster1 = RasterStructArray::new(&raster_array).get(0).unwrap(); - let raster2 = RasterStructArray::new(&raster_array).get(1).unwrap(); + let rsa = RasterStructArray::new(&raster_array); + let raster1 = rsa.get(0).unwrap(); + let raster2 = rsa.get(1).unwrap(); assert_raster_equal(&raster1, &raster2); } }