diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index ba2e534f..69f0a58c 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,21 +14,21 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.3 with: extension_name: httpfs - duckdb_version: v1.4.2 - ci_tools_version: main + duckdb_version: v1.4.3 + ci_tools_version: v1.4.3 duckdb-stable-deploy: name: Deploy extension binaries needs: duckdb-stable-build - uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main + uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.4.3 secrets: inherit with: extension_name: httpfs - duckdb_version: v1.4.2 - ci_tools_version: main + duckdb_version: v1.4.3 + ci_tools_version: v1.4.3 deploy_latest: ${{ startsWith(github.ref, 'refs/heads/v') }} deploy_versioned: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }} diff --git a/duckdb b/duckdb index 68d7555f..d1dc88f9 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 68d7555f68bd25c1a251ccca2e6338949c33986a +Subproject commit d1dc88f950d456d72493df452dabdcd13aa413dd diff --git a/extension-ci-tools b/extension-ci-tools index ba18d4f1..5b96f639 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit ba18d4f106a6cc1d5597f442bac06a1d7db098ef +Subproject commit 5b96f6390b7eb51ef395c81e26b3627c2049ec28 diff --git a/src/create_secret_functions.cpp b/src/create_secret_functions.cpp index b94f7f24..be306433 100644 --- a/src/create_secret_functions.cpp +++ b/src/create_secret_functions.cpp @@ -118,6 +118,14 @@ unique_ptr CreateS3SecretFunctions::CreateSecretFunctionInternal(Cli secret->secret_map["bearer_token"] = named_param.second.ToString(); // Mark it as sensitive secret->redact_keys.insert("bearer_token"); + } else if (lower_name == "http_proxy") { + secret->secret_map["http_proxy"] = named_param.second; + } else if (lower_name == "http_proxy_password") { + secret->secret_map["http_proxy_password"] = named_param.second; + } else if (lower_name == "http_proxy_username") { + secret->secret_map["http_proxy_username"] = named_param.second; + } else if (lower_name == "extra_http_headers") { + secret->secret_map["extra_http_headers"] = named_param.second; } else { throw InvalidInputException("Unknown named parameter passed to CreateSecretFunctionInternal: " + lower_name); @@ -200,6 +208,12 @@ void CreateS3SecretFunctions::SetBaseNamedParams(CreateSecretFunction &function, // Whether a secret refresh attempt should be made when the secret appears to be incorrect function.named_parameters["refresh"] = LogicalType::VARCHAR; + // Params for HTTP configuration + function.named_parameters["http_proxy"] = LogicalType::VARCHAR; + function.named_parameters["http_proxy_password"] = LogicalType::VARCHAR; + function.named_parameters["http_proxy_username"] = LogicalType::VARCHAR; + function.named_parameters["extra_http_headers"] = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR); + // Refresh Modes // - auto // - disabled diff --git a/src/httpfs.cpp b/src/httpfs.cpp index a11af95b..1f4c1329 100644 --- a/src/httpfs.cpp +++ b/src/httpfs.cpp @@ -20,6 +20,8 @@ #include #include +#include "s3fs.hpp" + namespace duckdb { shared_ptr HTTPFSUtil::GetHTTPUtil(optional_ptr opener) { @@ -34,7 +36,7 @@ unique_ptr HTTPFSUtil::InitializeParameters(optional_ptr auto result = make_uniq(*this); result->Initialize(opener); - // No point in continueing without an opener + // No point in continuing without an opener if (!opener) { return std::move(result); } @@ -65,23 +67,41 @@ unique_ptr HTTPFSUtil::InitializeParameters(optional_ptr } } + unique_ptr settings_reader; + if (info && !S3FileSystem::TryGetPrefix(info->file_path).empty()) { + // This is an S3-type url, we should + const char *s3_secret_types[] = {"s3", "r2", "gcs", "aws", "http"}; + + idx_t secret_type_count = 5; + Value merge_http_secret_into_s3_request; + FileOpener::TryGetCurrentSetting(opener, "merge_http_secret_into_s3_request", + merge_http_secret_into_s3_request); + + if (!merge_http_secret_into_s3_request.IsNull() && !merge_http_secret_into_s3_request.GetValue()) { + // Drop the http secret from the lookup + secret_type_count = 4; + } + settings_reader = make_uniq(*opener, info, s3_secret_types, secret_type_count); + } else { + settings_reader = make_uniq(*opener, info, "http"); + } + // HTTP Secret lookups - KeyValueSecretReader settings_reader(*opener, info, "http"); string proxy_setting; - if (settings_reader.TryGetSecretKey("http_proxy", proxy_setting) && !proxy_setting.empty()) { + if (settings_reader->TryGetSecretKey("http_proxy", proxy_setting) && !proxy_setting.empty()) { idx_t port; string host; HTTPUtil::ParseHTTPProxyHost(proxy_setting, host, port); result->http_proxy = host; result->http_proxy_port = port; } - settings_reader.TryGetSecretKey("http_proxy_username", result->http_proxy_username); - settings_reader.TryGetSecretKey("http_proxy_password", result->http_proxy_password); - settings_reader.TryGetSecretKey("bearer_token", result->bearer_token); + settings_reader->TryGetSecretKey("http_proxy_username", result->http_proxy_username); + settings_reader->TryGetSecretKey("http_proxy_password", result->http_proxy_password); + settings_reader->TryGetSecretKey("bearer_token", result->bearer_token); Value extra_headers; - if (settings_reader.TryGetSecretKey("extra_http_headers", extra_headers)) { + if (settings_reader->TryGetSecretKey("extra_http_headers", extra_headers)) { auto children = MapValue::GetChildren(extra_headers); for (const auto &child : children) { auto kv = StructValue::GetChildren(child); @@ -115,6 +135,14 @@ static void AddUserAgentIfAvailable(HTTPFSParams &http_params, HTTPHeaders &head } } +static void AddHandleHeaders(HTTPFileHandle &handle, HTTPHeaders &header_map) { + // Inject headers from the http param extra_headers into the request + for (auto &header : handle.http_params.extra_headers) { + header_map[header.first] = header.second; + } + handle.http_params.pre_merged_headers = true; +} + unique_ptr HTTPFileSystem::PostRequest(FileHandle &handle, string url, HTTPHeaders header_map, string &buffer_out, char *buffer_in, idx_t buffer_in_len, string params) { @@ -122,6 +150,7 @@ unique_ptr HTTPFileSystem::PostRequest(FileHandle &handle, string auto &http_util = hfh.http_params.http_util; AddUserAgentIfAvailable(hfh.http_params, header_map); + AddHandleHeaders(hfh, header_map); PostRequestInfo post_request(url, header_map, hfh.http_params, const_data_ptr_cast(buffer_in), buffer_in_len); auto result = http_util.Request(post_request); @@ -135,6 +164,7 @@ unique_ptr HTTPFileSystem::PutRequest(FileHandle &handle, string u auto &http_util = hfh.http_params.http_util; AddUserAgentIfAvailable(hfh.http_params, header_map); + AddHandleHeaders(hfh, header_map); string content_type = "application/octet-stream"; PutRequestInfo put_request(url, header_map, hfh.http_params, (const_data_ptr_t)buffer_in, buffer_in_len, @@ -147,6 +177,7 @@ unique_ptr HTTPFileSystem::HeadRequest(FileHandle &handle, string auto &http_util = hfh.http_params.http_util; AddUserAgentIfAvailable(hfh.http_params, header_map); + AddHandleHeaders(hfh, header_map); auto http_client = hfh.GetClient(); @@ -162,6 +193,7 @@ unique_ptr HTTPFileSystem::DeleteRequest(FileHandle &handle, strin auto &http_util = hfh.http_params.http_util; AddUserAgentIfAvailable(hfh.http_params, header_map); + AddHandleHeaders(hfh, header_map); auto http_client = hfh.GetClient(); DeleteRequestInfo delete_request(url, header_map, hfh.http_params); @@ -187,6 +219,7 @@ unique_ptr HTTPFileSystem::GetRequest(FileHandle &handle, string u auto &http_util = hfh.http_params.http_util; AddUserAgentIfAvailable(hfh.http_params, header_map); + AddHandleHeaders(hfh, header_map); D_ASSERT(hfh.cached_file_handle); @@ -238,6 +271,7 @@ unique_ptr HTTPFileSystem::GetRangeRequest(FileHandle &handle, str auto &http_util = hfh.http_params.http_util; AddUserAgentIfAvailable(hfh.http_params, header_map); + AddHandleHeaders(hfh, header_map); // send the Range header to read only subset of file string range_expr = "bytes=" + to_string(file_offset) + "-" + to_string(file_offset + buffer_out_len - 1); @@ -399,14 +433,49 @@ unique_ptr HTTPFileSystem::OpenFileExtended(const OpenFileInfo &file return std::move(handle); } +void HTTPFileHandle::AddStatistics(idx_t read_offset, idx_t read_length, idx_t read_duration) { + range_request_statistics.push_back({read_offset, read_length, read_duration}); +} + +void HTTPFileHandle::AdaptReadBufferSize(idx_t next_read_offset) { + D_ASSERT(!SkipBuffer()); + if (range_request_statistics.empty()) { + return; // No requests yet - nothing to do + } + + const auto &last_read = range_request_statistics.back(); + if (last_read.offset + last_read.length != next_read_offset) { + return; // Not reading sequentially + } + + if (read_buffer.GetSize() >= MAXIMUM_READ_BUFFER_LEN) { + return; // Already at maximum size + } + + // Grow the buffer + // TODO: can use statistics to estimate per-byte and round-trip cost using least squares, and do something smarter + read_buffer = read_buffer.GetAllocator()->Allocate(read_buffer.GetSize() * 2); +} + bool HTTPFileSystem::TryRangeRequest(FileHandle &handle, string url, HTTPHeaders header_map, idx_t file_offset, char *buffer_out, idx_t buffer_out_len) { + auto &hfh = handle.Cast(); + + const auto timestamp_before = Timestamp::GetCurrentTimestamp(); auto res = GetRangeRequest(handle, url, header_map, file_offset, buffer_out, buffer_out_len); if (res) { // Request succeeded TODO: fix upstream that 206 is not considered success if (res->Success() || res->status == HTTPStatusCode::PartialContent_206 || res->status == HTTPStatusCode::Accepted_202) { + + if (!hfh.flags.RequireParallelAccess()) { + // Update range request statistics + const auto duration = + NumericCast(Timestamp::GetCurrentTimestamp().value - timestamp_before.value); + hfh.AddStatistics(file_offset, buffer_out_len, duration); + } + return true; } @@ -438,6 +507,9 @@ bool HTTPFileSystem::ReadInternal(FileHandle &handle, void *buffer, int64_t nr_b if (!hfh.cached_file_handle->Initialized()) { throw InternalException("Cached file not initialized properly"); } + if (hfh.cached_file_handle->GetSize() < location + nr_bytes) { + throw InternalException("Cached file length can't satisfy the requested Read"); + } memcpy(buffer, hfh.cached_file_handle->GetData() + location, nr_bytes); DUCKDB_LOG_FILE_SYSTEM_READ(handle, nr_bytes, location); hfh.file_offset = location + nr_bytes; @@ -448,8 +520,7 @@ bool HTTPFileSystem::ReadInternal(FileHandle &handle, void *buffer, int64_t nr_b idx_t buffer_offset = 0; // Don't buffer when DirectIO is set or when we are doing parallel reads - bool skip_buffer = hfh.flags.DirectIO() || hfh.flags.RequireParallelAccess(); - if (skip_buffer && to_read > 0) { + if (hfh.SkipBuffer() && to_read > 0) { if (!TryRangeRequest(hfh, hfh.path, {}, location, (char *)buffer, to_read)) { return false; } @@ -494,7 +565,7 @@ bool HTTPFileSystem::ReadInternal(FileHandle &handle, void *buffer, int64_t nr_b } if (to_read > 0 && hfh.buffer_available == 0) { - auto new_buffer_available = MinValue(hfh.READ_BUFFER_LEN, hfh.length - start_offset); + auto new_buffer_available = MinValue(hfh.read_buffer.GetSize(), hfh.length - start_offset); // Bypass buffer if we read more than buffer size if (to_read > new_buffer_available) { @@ -507,6 +578,8 @@ bool HTTPFileSystem::ReadInternal(FileHandle &handle, void *buffer, int64_t nr_b start_offset += to_read; break; } else { + hfh.AdaptReadBufferSize(start_offset); + new_buffer_available = MinValue(hfh.read_buffer.GetSize(), hfh.length - start_offset); if (!TryRangeRequest(hfh, hfh.path, {}, start_offset, (char *)hfh.read_buffer.get(), new_buffer_available)) { return false; @@ -729,7 +802,8 @@ void HTTPFileHandle::LoadFileInfo() { return; } else { // HEAD request fail, use Range request for another try (read only one byte) - if (flags.OpenForReading() && res->status != HTTPStatusCode::NotFound_404 && res->status != HTTPStatusCode::MovedPermanently_301) { + if (flags.OpenForReading() && res->status != HTTPStatusCode::NotFound_404 && + res->status != HTTPStatusCode::MovedPermanently_301) { auto range_res = hfs.GetRangeRequest(*this, path, {}, 0, nullptr, 2); if (range_res->status != HTTPStatusCode::PartialContent_206 && range_res->status != HTTPStatusCode::Accepted_202 && range_res->status != HTTPStatusCode::OK_200) { @@ -774,6 +848,14 @@ void HTTPFileHandle::TryAddLogger(FileOpener &opener) { } } +void HTTPFileHandle::AllocateReadBuffer(optional_ptr opener) { + D_ASSERT(!SkipBuffer()); + D_ASSERT(!read_buffer.IsSet()); + auto &allocator = opener && opener->TryGetClientContext() ? BufferAllocator::Get(*opener->TryGetClientContext()) + : Allocator::DefaultAllocator(); + read_buffer = allocator.Allocate(INITIAL_READ_BUFFER_LEN); +} + void HTTPFileHandle::Initialize(optional_ptr opener) { auto &hfs = file_system.Cast(); http_params.state = HTTPState::TryGetState(opener); @@ -803,8 +885,8 @@ void HTTPFileHandle::Initialize(optional_ptr opener) { length = value.length; etag = value.etag; - if (flags.OpenForReading()) { - read_buffer = duckdb::unique_ptr(new data_t[READ_BUFFER_LEN]); + if (flags.OpenForReading() && !SkipBuffer()) { + AllocateReadBuffer(opener); } return; } @@ -822,8 +904,10 @@ void HTTPFileHandle::Initialize(optional_ptr opener) { current_cache->Insert(path, {length, last_modified, etag}); } - // Initialize the read buffer now that we know the file exists - read_buffer = duckdb::unique_ptr(new data_t[READ_BUFFER_LEN]); + if (!SkipBuffer()) { + // Initialize the read buffer now that we know the file exists + AllocateReadBuffer(opener); + } } // If we're writing to a file, we might as well remove it from the cache diff --git a/src/httpfs_curl_client.cpp b/src/httpfs_curl_client.cpp index a7e4637a..d4e39bf5 100644 --- a/src/httpfs_curl_client.cpp +++ b/src/httpfs_curl_client.cpp @@ -119,7 +119,7 @@ class HTTPFSCurlClient : public HTTPClient { Initialize(http_params); } void Initialize(HTTPParams &http_p) override { - HTTPFSParams &http_params = (HTTPFSParams&)http_p; + HTTPFSParams &http_params = (HTTPFSParams &)http_p; auto bearer_token = ""; if (!http_params.bearer_token.empty()) { bearer_token = http_params.bearer_token.c_str(); diff --git a/src/httpfs_extension.cpp b/src/httpfs_extension.cpp index 9070621f..b61e5154 100644 --- a/src/httpfs_extension.cpp +++ b/src/httpfs_extension.cpp @@ -96,6 +96,9 @@ static void LoadInternal(ExtensionLoader &loader) { config.AddExtensionOption("hf_max_per_page", "Debug option to limit number of items returned in list requests", LogicalType::UBIGINT, Value::UBIGINT(0)); + config.AddExtensionOption("merge_http_secret_into_s3_request", "Merges http secret params into S3 requests", + LogicalType::BOOLEAN, Value(true)); + auto callback_httpfs_client_implementation = [](ClientContext &context, SetScope scope, Value ¶meter) { auto &config = DBConfig::GetConfig(context); string value = StringValue::Get(parameter); diff --git a/src/httpfs_httplib_client.cpp b/src/httpfs_httplib_client.cpp index cf1b8540..3a94cb82 100644 --- a/src/httpfs_httplib_client.cpp +++ b/src/httpfs_httplib_client.cpp @@ -12,7 +12,7 @@ class HTTPFSClient : public HTTPClient { Initialize(http_params); } void Initialize(HTTPParams &http_p) override { - HTTPFSParams &http_params = (HTTPFSParams&)http_p; + HTTPFSParams &http_params = (HTTPFSParams &)http_p; client->set_follow_location(http_params.follow_location); client->set_keep_alive(http_params.keep_alive); if (!http_params.ca_cert_file.empty()) { @@ -106,18 +106,26 @@ class HTTPFSClient : public HTTPClient { info.buffer_out += string(data, data_length); return true; }; + // First assign body, this is the body that will be uploaded req.body.assign(const_char_ptr_cast(info.buffer_in), info.buffer_in_len); - return TransformResult(client->send(req)); + auto transformed_req = TransformResult(client->send(req)); + // Then, after actual re-quest, re-assign body to the response value of the POST request + transformed_req->body.assign(const_char_ptr_cast(info.buffer_in), info.buffer_in_len); + return std::move(transformed_req); } private: duckdb_httplib_openssl::Headers TransformHeaders(const HTTPHeaders &header_map, const HTTPParams ¶ms) { + auto &httpfs_params = params.Cast(); + duckdb_httplib_openssl::Headers headers; for (auto &entry : header_map) { headers.insert(entry); } - for (auto &entry : params.extra_headers) { - headers.insert(entry); + if (!httpfs_params.pre_merged_headers) { + for (auto &entry : params.extra_headers) { + headers.insert(entry); + } } return headers; } diff --git a/src/include/httpfs.hpp b/src/include/httpfs.hpp index 804968a1..c6baf131 100644 --- a/src/include/httpfs.hpp +++ b/src/include/httpfs.hpp @@ -85,8 +85,13 @@ class HTTPFileHandle : public FileHandle { std::mutex mu; // Read buffer - duckdb::unique_ptr read_buffer; - constexpr static idx_t READ_BUFFER_LEN = 1000000; + AllocatedData read_buffer; + constexpr static idx_t INITIAL_READ_BUFFER_LEN = 1048576; + constexpr static idx_t MAXIMUM_READ_BUFFER_LEN = 33554432; + + // Adaptively resizes read_buffer based on range_request_statistics + void AddStatistics(idx_t read_offset, idx_t read_length, idx_t read_duration); + void AdaptReadBufferSize(idx_t next_read_offset); void AddHeaders(HTTPHeaders &map); @@ -95,6 +100,22 @@ class HTTPFileHandle : public FileHandle { // Return the client for re-use void StoreClient(unique_ptr client); + // Whether to bypass the read buffer + bool SkipBuffer() const { + return flags.DirectIO() || flags.RequireParallelAccess(); + } + +private: + void AllocateReadBuffer(optional_ptr opener); + + // Statistics that are used to adaptively grow the read_buffer + struct RangeRequestStatistics { + idx_t offset; + idx_t length; + idx_t duration; + }; + vector range_request_statistics; + public: void Close() override { } diff --git a/src/include/httpfs_client.hpp b/src/include/httpfs_client.hpp index ab462cd7..4fa9b17a 100644 --- a/src/include/httpfs_client.hpp +++ b/src/include/httpfs_client.hpp @@ -27,6 +27,8 @@ struct HTTPFSParams : public HTTPParams { bool unsafe_disable_etag_checks {false}; shared_ptr state; string user_agent = {""}; + bool pre_merged_headers = false; + // Additional fields needs to be appended at the end and need to be propagated to duckdb-wasm // TODO: make this unnecessary }; diff --git a/src/include/s3fs.hpp b/src/include/s3fs.hpp index a7e933ea..153dfeb5 100644 --- a/src/include/s3fs.hpp +++ b/src/include/s3fs.hpp @@ -212,6 +212,8 @@ class S3FileSystem : public HTTPFileSystem { static string UrlEncode(const string &input, bool encode_slash = false); static string UrlDecode(string input); + static string TryGetPrefix(const string &url); + // Uploads the contents of write_buffer to S3. // Note: caller is responsible to not call this method twice on the same buffer static void UploadBuffer(S3FileHandle &file_handle, shared_ptr write_buffer); @@ -238,6 +240,7 @@ class S3FileSystem : public HTTPFileSystem { protected: static void NotifyUploadsInProgress(S3FileHandle &file_handle); + static string GetPrefix(const string &url); duckdb::unique_ptr CreateHandle(const OpenFileInfo &file, FileOpenFlags flags, optional_ptr opener) override; diff --git a/src/s3fs.cpp b/src/s3fs.cpp index b858ac3c..9f016750 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -625,15 +625,22 @@ void S3FileSystem::ReadQueryParams(const string &url_query_param, S3AuthParams & } } -static string GetPrefix(string url) { +string S3FileSystem::TryGetPrefix(const string &url) { const string prefixes[] = {"s3://", "s3a://", "s3n://", "gcs://", "gs://", "r2://"}; for (auto &prefix : prefixes) { - if (StringUtil::StartsWith(url, prefix)) { + if (StringUtil::StartsWith(StringUtil::Lower(url), prefix)) { return prefix; } } - throw IOException("URL needs to start with s3://, gcs:// or r2://"); - return string(); + return {}; +} + +string S3FileSystem::GetPrefix(const string &url) { + auto prefix = TryGetPrefix(url); + if (prefix.empty()) { + throw IOException("URL needs to start with s3://, gcs:// or r2://"); + } + return prefix; } ParsedS3Url S3FileSystem::S3UrlParse(string url, S3AuthParams ¶ms) { @@ -916,8 +923,6 @@ void S3FileHandle::Initialize(optional_ptr opener) { HTTPFileHandle::Initialize(opener); } - auto &s3fs = file_system.Cast(); - if (flags.OpenForWriting()) { auto aws_minimum_part_size = 5242880; // 5 MiB https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html auto max_part_count = config_params.max_parts_per_file; diff --git a/test/sql/copy/s3/http_secret.test b/test/sql/copy/s3/http_secret.test index ea526c6b..9b627cf5 100644 --- a/test/sql/copy/s3/http_secret.test +++ b/test/sql/copy/s3/http_secret.test @@ -1,5 +1,5 @@ # name: test/sql/copy/s3/http_secret.test -# description: Test http secret +# description: Test http secret params in s3 secret usage # group: [s3] require parquet @@ -28,17 +28,71 @@ COPY (SELECT 'value-1' as value) TO 's3://test-bucket/http-secret-test/test.parq statement ok PRAGMA enable_verification -# Create some wonky headers +# Add http secret header statement ok CREATE SECRET http3 ( - TYPE HTTP, + TYPE HTTP, EXTRA_HTTP_HEADERS MAP{ - 'Authorization': 'Im very important', 'CustomHeader': 'fliepflap' } ); +statement ok +call enable_logging('HTTP'); + +query I +FROM 's3://test-bucket/http-secret-test/test.parquet' +---- +value-1 + +# Note that this header is now added to all requests +query I +SELECT distinct request.headers['CustomHeader'] FROM duckdb_logs_parsed('HTTP') +---- +fliepflap + +statement ok +CALL truncate_duckdb_logs() + +# Disabling this setting will stop this +statement ok +set merge_http_secret_into_s3_request=false + +query I +FROM 's3://test-bucket/http-secret-test/test.parquet' +---- +value-1 + +query I +SELECT distinct request.headers['CustomHeader'] FROM duckdb_logs_parsed('HTTP') +---- +NULL + +statement ok +CALL truncate_duckdb_logs() + +# Header field can be set directly in S3 secret though +statement ok +CREATE SECRET ( + TYPE S3, + PROVIDER config, + KEY_ID '${AWS_ACCESS_KEY_ID}', + SECRET '${AWS_SECRET_ACCESS_KEY}', + REGION '${AWS_DEFAULT_REGION}', + ENDPOINT '${DUCKDB_S3_ENDPOINT}', + USE_SSL '${DUCKDB_S3_USE_SSL}', + EXTRA_HTTP_HEADERS MAP{ + 'CustomHeader': 'fliepflap' + } +) + query I FROM 's3://test-bucket/http-secret-test/test.parquet' ---- value-1 + +# Now header is back in the request logs +query I +SELECT distinct request.headers['CustomHeader'] FROM duckdb_logs_parsed('HTTP') +---- +fliepflap \ No newline at end of file diff --git a/test/sql/crypto/test_openssl_crypto.test b/test/sql/crypto/test_openssl_crypto.test index 3af3924c..95270518 100644 --- a/test/sql/crypto/test_openssl_crypto.test +++ b/test/sql/crypto/test_openssl_crypto.test @@ -1,6 +1,6 @@ -# name: test/sql/attach/attach_encryption_fallback_readonly.test +# name: test/sql/crypto/test_openssl_crypto.test # description: Test the openssl based crypto util -# group: [attach] +# group: [crypto] require httpfs diff --git a/test/sql/json/table/internal_issue_6807.test_slow b/test/sql/json/table/internal_issue_6807.test_slow new file mode 100644 index 00000000..9241c8e9 --- /dev/null +++ b/test/sql/json/table/internal_issue_6807.test_slow @@ -0,0 +1,18 @@ +# name: test/sql/json/table/internal_issue_6807.test +# description: Test logarithmic growth of read buffer for sequential reads +# group: [table] + +require json + +require httpfs + +statement ok +CALL enable_logging('HTTP'); + +statement ok +CREATE TABLE T AS FROM 'https://data.gharchive.org/2023-02-08-0.json.gz'; + +query I +SELECT count(*) FROM duckdb_logs_parsed('HTTP') WHERE request.type = 'GET' GROUP BY request.type; +---- +9