Skip to content

Commit b03ccb2

Browse files
duckdblabs-botstaticlibs
authored andcommitted
Update vendored DuckDB sources to be5250ba48
1 parent 309bd5b commit b03ccb2

40 files changed

+673
-224
lines changed

src/duckdb/extension/parquet/parquet_reader.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,8 @@ static bool IsGeometryType(const SchemaElement &s_ele, const ParquetFileMetadata
504504
// geoparquet types have to be at the root of the schema, and have to be present in the kv metadata.
505505
const auto is_at_root = depth == 1;
506506
const auto is_in_gpq_metadata = metadata.geo_metadata && metadata.geo_metadata->IsGeometryColumn(s_ele.name);
507-
const auto is_leaf = s_ele.num_children == 0;
507+
// A leaf node has a type set (as per Parquet spec)
508+
const auto is_leaf = s_ele.__isset.type;
508509
const auto is_geoparquet_geom = is_at_root && is_in_gpq_metadata && is_leaf;
509510

510511
if (is_geoparquet_geom) {
@@ -557,11 +558,23 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d
557558
ParquetColumnSchemaType::GEOMETRY);
558559
}
559560

560-
if (s_ele.__isset.num_children && s_ele.num_children > 0) { // inner node
561+
// Determine if this is an inner node
562+
// According to Parquet spec: nodes without 'type' set are inner nodes (groups)
563+
// For backwards compatibility with non-standard files, also check the old condition
564+
bool is_inner_node = !s_ele.__isset.type || (s_ele.__isset.num_children && s_ele.num_children > 0);
565+
566+
if (is_inner_node && s_ele.__isset.type) {
567+
// This case handles non-standard files where both type and num_children are set
568+
// Prioritize num_children if set and > 0
569+
is_inner_node = s_ele.__isset.num_children && s_ele.num_children > 0;
570+
}
571+
572+
if (is_inner_node) { // inner node
561573
vector<ParquetColumnSchema> child_schemas;
562574

563575
idx_t c_idx = 0;
564-
while (c_idx < NumericCast<idx_t>(s_ele.num_children)) {
576+
idx_t num_children = (s_ele.__isset.num_children) ? NumericCast<idx_t>(s_ele.num_children) : 0;
577+
while (c_idx < num_children) {
565578
next_schema_idx++;
566579

567580
auto child_schema =

src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ void LogicalUpdate::BindExtraColumns(TableCatalogEntry &table, LogicalGet &get,
264264
found_columns.insert(update.columns[i]);
265265
}
266266
}
267-
if (found_column_count > 0 && found_column_count != bound_columns.size()) {
267+
if (found_column_count != bound_columns.size()) {
268268
// columns that were required are not all part of the UPDATE
269269
// add them to the scan and update set
270270
for (auto &physical_id : bound_columns) {

src/duckdb/src/catalog/default/default_functions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ static const DefaultMacro internal_macros[] = {
164164
{DEFAULT_SCHEMA, "days_in_month", {"date", nullptr}, {{nullptr, nullptr}}, "day(last_day(date))"},
165165

166166
// timestamptz functions
167-
{DEFAULT_SCHEMA, "ago", {"interval", nullptr}, {{nullptr, nullptr}}, "current_timestamp - interval"},
167+
{DEFAULT_SCHEMA, "ago", {"i", nullptr}, {{nullptr, nullptr}}, "current_timestamp - i::interval"},
168168

169169
// regexp functions
170170
{DEFAULT_SCHEMA, "regexp_split_to_table", {"text", "pattern", nullptr}, {{nullptr, nullptr}}, "unnest(string_split_regex(text, pattern))"},

src/duckdb/src/common/sort/sorted_run_merger.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -824,13 +824,13 @@ unique_ptr<SortedRun> SortedRunMergerLocalState::TemplatedMaterializePartition(S
824824
//===--------------------------------------------------------------------===//
825825
SortedRunMerger::SortedRunMerger(const Sort &sort_p, vector<unique_ptr<SortedRun>> &&sorted_runs_p,
826826
idx_t partition_size_p, bool external_p, bool is_index_sort_p)
827-
: db(*sort_p.context.db), sort(sort_p), sorted_runs(std::move(sorted_runs_p)),
827+
: scheduler(TaskScheduler::GetScheduler(*sort_p.context.db)), sort(sort_p), sorted_runs(std::move(sorted_runs_p)),
828828
total_count(SortedRunsTotalCount(sorted_runs)), partition_size(partition_size_p), external(external_p),
829829
is_index_sort(is_index_sort_p) {
830830
}
831831

832832
SortedRunMerger::~SortedRunMerger() {
833-
ParallelDestroyTask<decltype(sorted_runs)>::Schedule(db, sorted_runs);
833+
ParallelDestroyTask<decltype(sorted_runs)>::Schedule(scheduler, sorted_runs);
834834
}
835835

836836
unique_ptr<LocalSourceState> SortedRunMerger::GetLocalSourceState(ExecutionContext &,

src/duckdb/src/common/types/row/tuple_data_collection.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ using ValidityBytes = TupleDataLayout::ValidityBytes;
1616

1717
TupleDataCollection::TupleDataCollection(BufferManager &buffer_manager, shared_ptr<TupleDataLayout> layout_ptr_p,
1818
MemoryTag tag_p, shared_ptr<ArenaAllocator> stl_allocator_p)
19-
: db(buffer_manager.GetDatabase()),
19+
: scheduler(TaskScheduler::GetScheduler(buffer_manager.GetDatabase())),
2020
stl_allocator(stl_allocator_p ? std::move(stl_allocator_p)
2121
: make_shared_ptr<ArenaAllocator>(buffer_manager.GetBufferAllocator())),
2222
layout_ptr(std::move(layout_ptr_p)), layout(*layout_ptr), tag(tag_p),
@@ -32,7 +32,7 @@ TupleDataCollection::TupleDataCollection(ClientContext &context, shared_ptr<Tupl
3232
}
3333

3434
TupleDataCollection::~TupleDataCollection() {
35-
ParallelDestroyTask<decltype(segments)>::Schedule(db, segments);
35+
ParallelDestroyTask<decltype(segments)>::Schedule(scheduler, segments);
3636
}
3737

3838
void TupleDataCollection::Initialize() {

src/duckdb/src/execution/index/art/art.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -894,20 +894,23 @@ string ART::GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t
894894
void ART::TransformToDeprecated() {
895895
auto idx = Node::GetAllocatorIdx(NType::PREFIX);
896896
auto &block_manager = (*allocators)[idx]->block_manager;
897-
unsafe_unique_ptr<FixedSizeAllocator> deprecated_allocator;
898-
897+
unsafe_unique_ptr<FixedSizeAllocator> deprecated_allocator = nullptr;
899898
if (prefix_count != Prefix::DEPRECATED_COUNT) {
900899
auto prefix_size = NumericCast<idx_t>(Prefix::DEPRECATED_COUNT) + NumericCast<idx_t>(Prefix::METADATA_SIZE);
901900
deprecated_allocator = make_unsafe_uniq<FixedSizeAllocator>(prefix_size, block_manager);
902901
}
903902

903+
unique_ptr<TransformToDeprecatedState> state =
904+
make_uniq<TransformToDeprecatedState>(std::move(deprecated_allocator));
905+
904906
// Transform all leaves, and possibly the prefixes.
905907
if (tree.HasMetadata()) {
906-
Node::TransformToDeprecated(*this, tree, deprecated_allocator);
908+
Node::TransformToDeprecated(*this, tree, *state);
907909
}
908910

909911
// Replace the prefix allocator with the deprecated allocator.
910-
if (deprecated_allocator) {
912+
if (state->HasAllocator()) {
913+
deprecated_allocator = state->TakeAllocator();
911914
prefix_count = Prefix::DEPRECATED_COUNT;
912915

913916
D_ASSERT((*allocators)[idx]->Empty());

src/duckdb/src/execution/index/art/art_merger.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ void ARTMerger::MergePrefixes(NodeEntry &entry) {
255255

256256
Prefix l_prefix(art, entry.left, true);
257257
Prefix r_prefix(art, entry.right, true);
258-
const auto count = Prefix::Count(art);
258+
const auto count = art.PrefixCount();
259259

260260
// Find a byte at pos where the prefixes differ.
261261
// If they match up to max_count, then pos stays invalid.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include "duckdb/execution/index/art/const_prefix_handle.hpp"
2+
3+
#include "duckdb/execution/index/art/art.hpp"
4+
#include "duckdb/execution/index/art/node.hpp"
5+
6+
namespace duckdb {
7+
8+
ConstPrefixHandle::ConstPrefixHandle(const ART &art, const Node node)
9+
: segment_handle(Node::GetAllocator(art, PREFIX).GetHandle(node)) {
10+
data = segment_handle.GetPtr();
11+
child = reinterpret_cast<Node *>(data + art.PrefixCount() + 1);
12+
// Read-only: don't mark segment as modified
13+
}
14+
15+
uint8_t ConstPrefixHandle::GetCount(const ART &art) const {
16+
return data[art.PrefixCount()];
17+
}
18+
19+
uint8_t ConstPrefixHandle::GetByte(const idx_t pos) const {
20+
return data[pos];
21+
}
22+
23+
string ConstPrefixHandle::ToString(ART &art, const Node &node, const ToStringOptions &options) {
24+
auto indent = [](string &str, const idx_t n) {
25+
str.append(n, ' ');
26+
};
27+
auto format_byte = [&](const uint8_t byte) {
28+
if (!options.inside_gate && options.display_ascii && byte >= 32 && byte <= 126) {
29+
return string(1, static_cast<char>(byte));
30+
}
31+
return to_string(byte);
32+
};
33+
34+
string str = "";
35+
indent(str, options.indent_level);
36+
reference<const Node> ref(node);
37+
ToStringOptions child_options = options;
38+
Iterator(art, ref, true, [&](const ConstPrefixHandle &handle) {
39+
str += "Prefix: |";
40+
for (idx_t i = 0; i < handle.data[art.PrefixCount()]; i++) {
41+
str += format_byte(handle.data[i]) + "|";
42+
if (options.key_path) {
43+
child_options.key_depth++;
44+
}
45+
}
46+
});
47+
48+
auto child = ref.get().ToString(art, child_options);
49+
return str + "\n" + child;
50+
}
51+
52+
void ConstPrefixHandle::Verify(ART &art, const Node &node) {
53+
reference<const Node> ref(node);
54+
55+
Iterator(art, ref, true, [&](const ConstPrefixHandle &handle) {
56+
D_ASSERT(handle.data[art.PrefixCount()] != 0);
57+
D_ASSERT(handle.data[art.PrefixCount()] <= art.PrefixCount());
58+
});
59+
60+
ref.get().Verify(art);
61+
}
62+
63+
} // namespace duckdb

src/duckdb/src/execution/index/art/iterator.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ void Iterator::FindMinimum(const Node &node) {
115115
// Traverse the prefix.
116116
if (ref.get().GetType() == NType::PREFIX) {
117117
Prefix prefix(art, ref.get());
118-
for (idx_t i = 0; i < prefix.data[Prefix::Count(art)]; i++) {
118+
for (idx_t i = 0; i < prefix.data[art.PrefixCount()]; i++) {
119119
current_key.Push(prefix.data[i]);
120120
if (status == GateStatus::GATE_SET) {
121121
row_id[nested_depth] = prefix.data[i];
@@ -195,13 +195,13 @@ bool Iterator::LowerBound(const Node &node, const ARTKey &key, const bool equal)
195195

196196
// Push back all prefix bytes.
197197
Prefix prefix(art, ref.get());
198-
for (idx_t i = 0; i < prefix.data[Prefix::Count(art)]; i++) {
198+
for (idx_t i = 0; i < prefix.data[art.PrefixCount()]; i++) {
199199
current_key.Push(prefix.data[i]);
200200
}
201201
nodes.emplace(ref.get(), 0);
202202

203203
// We compare the prefix bytes with the key bytes.
204-
for (idx_t i = 0; i < prefix.data[Prefix::Count(art)]; i++) {
204+
for (idx_t i = 0; i < prefix.data[art.PrefixCount()]; i++) {
205205
// We found a prefix byte that is less than its corresponding key byte.
206206
// I.e., the subsequent node is lesser than the key. Thus, the next node
207207
// is the lower bound.
@@ -219,7 +219,7 @@ bool Iterator::LowerBound(const Node &node, const ARTKey &key, const bool equal)
219219
}
220220

221221
// The prefix matches the key. Move to the child and update depth.
222-
depth += prefix.data[Prefix::Count(art)];
222+
depth += prefix.data[art.PrefixCount()];
223223
ref = *prefix.ptr;
224224
}
225225
// Should always have a node with metadata.
@@ -278,7 +278,7 @@ void Iterator::PopNode() {
278278
} else {
279279
// Pop all prefix bytes and the node.
280280
Prefix prefix(art, nodes.top().node);
281-
auto prefix_byte_count = prefix.data[Prefix::Count(art)];
281+
auto prefix_byte_count = prefix.data[art.PrefixCount()];
282282
current_key.Pop(prefix_byte_count);
283283

284284
if (status == GateStatus::GATE_SET) {

src/duckdb/src/execution/index/art/node.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
#include "duckdb/execution/index/art/node256.hpp"
1212
#include "duckdb/execution/index/art/node256_leaf.hpp"
1313
#include "duckdb/execution/index/art/node48.hpp"
14+
#include "duckdb/execution/index/art/const_prefix_handle.hpp"
1415
#include "duckdb/execution/index/art/prefix.hpp"
16+
#include "duckdb/execution/index/art/prefix_handle.hpp"
1517
#include "duckdb/storage/table_io_manager.hpp"
1618

1719
namespace duckdb {
@@ -357,8 +359,7 @@ bool Node::IsAnyLeaf() const {
357359
// TransformToDeprecated
358360
//===--------------------------------------------------------------------===//
359361

360-
void Node::TransformToDeprecated(ART &art, Node &node,
361-
unsafe_unique_ptr<FixedSizeAllocator> &deprecated_prefix_allocator) {
362+
void Node::TransformToDeprecated(ART &art, Node &node, TransformToDeprecatedState &state) {
362363
D_ASSERT(node.HasMetadata());
363364

364365
if (node.GetGateStatus() == GateStatus::GATE_SET) {
@@ -369,19 +370,19 @@ void Node::TransformToDeprecated(ART &art, Node &node,
369370
auto type = node.GetType();
370371
switch (type) {
371372
case NType::PREFIX:
372-
return Prefix::TransformToDeprecated(art, node, deprecated_prefix_allocator);
373+
return PrefixHandle::TransformToDeprecated(art, node, state);
373374
case NType::LEAF_INLINED:
374375
return;
375376
case NType::LEAF:
376377
return;
377378
case NType::NODE_4:
378-
return TransformToDeprecatedInternal(art, InMemoryRef<Node4>(art, node, type), deprecated_prefix_allocator);
379+
return TransformToDeprecatedInternal(art, InMemoryRef<Node4>(art, node, type), state);
379380
case NType::NODE_16:
380-
return TransformToDeprecatedInternal(art, InMemoryRef<Node16>(art, node, type), deprecated_prefix_allocator);
381+
return TransformToDeprecatedInternal(art, InMemoryRef<Node16>(art, node, type), state);
381382
case NType::NODE_48:
382-
return TransformToDeprecatedInternal(art, InMemoryRef<Node48>(art, node, type), deprecated_prefix_allocator);
383+
return TransformToDeprecatedInternal(art, InMemoryRef<Node48>(art, node, type), state);
383384
case NType::NODE_256:
384-
return TransformToDeprecatedInternal(art, InMemoryRef<Node256>(art, node, type), deprecated_prefix_allocator);
385+
return TransformToDeprecatedInternal(art, InMemoryRef<Node256>(art, node, type), state);
385386
default:
386387
throw InternalException("invalid node type for TransformToDeprecated: %d", type);
387388
}
@@ -402,7 +403,7 @@ void Node::Verify(ART &art) const {
402403
Leaf::DeprecatedVerify(art, *this);
403404
return;
404405
case NType::PREFIX: {
405-
Prefix::Verify(art, *this);
406+
ConstPrefixHandle::Verify(art, *this);
406407
return;
407408
}
408409
default:
@@ -496,7 +497,7 @@ string Node::ToString(ART &art, const ToStringOptions &options) const {
496497
case NType::PREFIX: {
497498
ToStringOptions prefix_options = options;
498499
prefix_options.inside_gate = propagate_gate;
499-
string str = Prefix::ToString(art, *this, prefix_options);
500+
string str = ConstPrefixHandle::ToString(art, *this, prefix_options);
500501
if (is_gate) {
501502
string s = "";
502503
indent(s, options.indent_level);

0 commit comments

Comments
 (0)