From 8acd976eea694952908e1e79e4c64f52ddff1a88 Mon Sep 17 00:00:00 2001 From: WanKun Date: Wed, 15 Apr 2026 17:27:10 +0800 Subject: [PATCH 1/2] Ignore null values when copy velox buffer to arrow --- cpp/velox/shuffle/VeloxHashShuffleWriter.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc index f57a0a624dc5..0eb00821e6e5 100644 --- a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc +++ b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc @@ -111,7 +111,7 @@ arrow::Status collectFlatVectorBufferStringView( auto* rawLength = reinterpret_cast(lengthBuffer->mutable_data()); uint64_t offset = 0; for (int32_t i = 0; i < flatVector->size(); i++) { - auto length = rawValues[i].size(); + auto length = flatVector->isNullAt(i) ? 0 : rrawValues[i].size(); *rawLength++ = length; offset += length; } @@ -120,6 +120,9 @@ arrow::Status collectFlatVectorBufferStringView( ARROW_ASSIGN_OR_RAISE(auto valueBuffer, arrow::AllocateResizableBuffer(offset, pool)); auto raw = reinterpret_cast(valueBuffer->mutable_data()); for (int32_t i = 0; i < flatVector->size(); i++) { + if (flatVector->isNullAt(i)) { + continue; + } gluten::fastCopy(raw, rawValues[i].data(), rawValues[i].size()); raw += rawValues[i].size(); } From 80d21ce81e3b45828039eb61309924ca208939ff Mon Sep 17 00:00:00 2001 From: WanKun Date: Wed, 15 Apr 2026 18:13:53 +0800 Subject: [PATCH 2/2] Ignore null values when copy velox buffer to arrow --- cpp/velox/shuffle/VeloxHashShuffleWriter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc index 0eb00821e6e5..6024f04ab19b 100644 --- a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc +++ b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc @@ -111,7 +111,7 @@ arrow::Status collectFlatVectorBufferStringView( auto* rawLength = reinterpret_cast(lengthBuffer->mutable_data()); uint64_t offset = 0; for (int32_t i = 0; i < flatVector->size(); i++) { - auto length = flatVector->isNullAt(i) ? 0 : rrawValues[i].size(); + auto length = flatVector->isNullAt(i) ? 0 : rawValues[i].size(); *rawLength++ = length; offset += length; }