diff --git a/docs-iceberg-query-engine/clickhouse.mdx b/docs-iceberg-query-engine/clickhouse.mdx index 895ceb17..52c9cbcc 100644 --- a/docs-iceberg-query-engine/clickhouse.mdx +++ b/docs-iceberg-query-engine/clickhouse.mdx @@ -1,7 +1,7 @@ --- -title: "ClickHouse v25.4 & Apache Iceberg: Experimental Read Support | OLake" -description: "Explore OLake's coverage of ClickHouse v25.4 with experimental Apache Iceberg read support, time travel, REST catalogs, and roadmap for write and compaction features in 2025." -sidebar_label: ClickHouse v25.4 +title: "ClickHouse v25.9 & Apache Iceberg: Read+Write Support | OLake" +description: "Explore OLake's coverage of ClickHouse v25.9 (LTS v25.8) with Apache Iceberg read+write support, time travel, REST catalogs, and DML via INSERT/ALTER DELETE/ALTER UPDATE." +sidebar_label: ClickHouse v25.9 hide_table_of_contents: true --- @@ -84,34 +84,35 @@ export const clickhouseFeatures = [ } }, { - title: "Read-Only Analytics", - chip: "Writes Q3 2025", - description: "ENGINE=Iceberg tables and icebergS3()/icebergCluster() functions; full SQL on Parquet files. Writes/compaction scheduled Q3 2025", + title: "Read + Write Analytics", + chip: "Read+Write (v25.7+)", + description: "Full reads via ENGINE=Iceberg and icebergS3()/icebergCluster(); INSERT INTO existing tables (v25.7+), CREATE TABLE and DROP TABLE (v25.8 LTS). No compaction yet", icon: , - color: "orange", + color: "green", score: 70, details: { - title: "Comprehensive Read Capabilities with Write Roadmap", - description: "ClickHouse currently provides excellent read performance for Iceberg tables with a clear roadmap for write capabilities.", + title: "Read+Write Iceberg Support (v25.7+)", + description: "ClickHouse v25.7+ added write support for Iceberg tables; v25.8 LTS added CREATE/DROP TABLE and DML via ALTER TABLE DELETE/UPDATE.", overviewContent: { strengths: [ "High-performance OLAP queries on Iceberg data", "Multiple access methods (ENGINE, functions, cluster)", - "Full SQL support for complex analytics", - "Optimized for large-scale analytical workloads", - "Clear timeline for write capabilities (Q3 2025)" + "INSERT INTO existing Iceberg tables (v25.7+)", + "CREATE TABLE and DROP TABLE for Iceberg (v25.8+)", + "ALTER TABLE DELETE with positional+equality deletes (v25.8+)", + "ALTER TABLE UPDATE (v25.9+)" ], limitations: [ - "Currently read-only - no INSERT/CREATE TABLE AS", "No native compaction or maintenance operations", - "Write capabilities still in development", - "Limited to consuming data written by other engines" + "MERGE INTO not yet supported", + "V3 deletion vectors not yet supported", + "Write support is newer — test thoroughly before production" ], bestFor: [ "High-performance analytical queries", - "Read-heavy OLAP workloads", - "Real-time analytics on batch-updated data", - "Teams comfortable with read-only access" + "OLAP workloads requiring occasional updates", + "Teams wanting fast reads + growing write capabilities", + "Analytics-first architectures migrating to read+write" ] }, technicalSpecs: [ @@ -121,108 +122,101 @@ export const clickhouseFeatures = [ { label: "ENGINE=Iceberg", value: "Table Engine", status: "available" }, { label: "icebergS3() Function", value: "Direct S3 Access", status: "available" }, { label: "icebergCluster() Function", value: "Distributed Reads", status: "available" }, - { label: "Complex SQL", value: "Joins, Aggregations", status: "available" } + { label: "Position+Equality Deletes", value: "Since 24.12", status: "available" } ] }, { - category: "Write Operations (Roadmap)", + category: "Write Operations", items: [ - { label: "INSERT", value: "Q3 2025", status: "limited" }, - { label: "CREATE TABLE AS", value: "Q3 2025", status: "limited" }, - { label: "Native Compaction", value: "Q3 2025", status: "limited" }, - { label: "Merge Algorithms", value: "Q3 2025", status: "limited" } + { label: "INSERT INTO", value: "Since v25.7", status: "available" }, + { label: "CREATE TABLE", value: "Since v25.8 (LTS)", status: "available" }, + { label: "DROP TABLE", value: "Since v25.8 (LTS)", status: "available" }, + { label: "ALTER TABLE DELETE", value: "Since v25.8 (pos+eq deletes)", status: "available" }, + { label: "ALTER TABLE UPDATE", value: "Since v25.9", status: "available" }, + { label: "MERGE INTO", value: "Not yet supported", status: "limited" }, + { label: "Native Compaction", value: "Not yet available", status: "limited" } ] } ], externalLinks: [ - { label: "ClickHouse Release 25.04", url: "https://clickhouse.com/blog/clickhouse-release-25-04", type: "docs" }, - { label: "Iceberg Write Support Tracking", url: "https://github.com/ClickHouse/ClickHouse/issues/71407", type: "docs" } + { label: "ClickHouse Release 25.08 (LTS)", url: "https://clickhouse.com/blog/clickhouse-release-25-08", type: "docs" }, + { label: "ClickHouse 2025 Year in Review", url: "https://clickhouse.com/blog/clickhouse-2025-roundup", type: "docs" } ] } }, { - title: "No DML Operations", - chip: "Delete File Reading Only", - description: "Reading of position & equality deletes supported since 24.12; queries merge delete files on-the-fly (MoR). No DELETE/UPDATE/MERGE writers until write support lands", + title: "Basic DML Operations", + chip: "INSERT/DELETE/UPDATE (No MERGE)", + description: "INSERT INTO (v25.7+), ALTER TABLE DELETE with position+equality deletes (v25.8+), ALTER TABLE UPDATE (v25.9+). MERGE INTO not yet supported", icon: , - color: "red", - score: 30, + color: "orange", + score: 50, details: { - title: "Delete File Reading with Future Write Plans", - description: "ClickHouse can read and merge delete files from other engines but cannot produce DML operations itself.", + title: "DML via ALTER TABLE (v25.8+ LTS)", + description: "ClickHouse v25.8 LTS added ALTER TABLE DELETE and ALTER TABLE UPDATE for Iceberg tables, plus CREATE/DROP TABLE. MERGE is not yet supported.", overviewContent: { strengths: [ - "Reads position and equality delete files correctly", - "Automatic merge-on-read for consistent results", - "Compatible with DML from other engines", - "No performance penalty for reading MoR tables" + "INSERT INTO existing tables since v25.7", + "ALTER TABLE DELETE (positional+equality delete files) since v25.8", + "ALTER TABLE UPDATE since v25.9", + "Reads position and equality delete files (MoR) correctly since 24.12" ], limitations: [ - "Cannot perform DELETE, UPDATE, or MERGE operations", - "No row-level modifications capabilities", - "Must rely on other engines for data changes", - "Limited to consuming pre-processed data" + "MERGE INTO not yet supported", + "No single-statement upsert capability", + "ALTER TABLE approach differs from standard SQL DML", + "No compaction after deletes" ], bestFor: [ - "Analytics on data modified by other engines", - "Read-only reporting and dashboards", - "Workloads where DML happens elsewhere", - "Pure analytical use cases" + "Analytics on frequently updated Iceberg data", + "Workloads requiring basic INSERT/DELETE/UPDATE without MERGE", + "Teams familiar with ClickHouse ALTER TABLE syntax" ] }, technicalSpecs: [ { - category: "Read Capabilities", + category: "Available DML", items: [ - { label: "Position Deletes", value: "Since 24.12", status: "available" }, - { label: "Equality Deletes", value: "Since 24.12", status: "available" }, - { label: "On-the-fly Merging", value: "Automatic", status: "available" }, - { label: "MoR Consistency", value: "Full Support", status: "available" } - ] - }, - { - category: "Write Capabilities", - items: [ - { label: "DELETE", value: "Not Available", status: "limited" }, - { label: "UPDATE", value: "Not Available", status: "limited" }, - { label: "MERGE INTO", value: "Not Available", status: "limited" }, - { label: "DML Timeline", value: "With Write Support", status: "limited" } + { label: "INSERT INTO", value: "Since v25.7", status: "available" }, + { label: "ALTER TABLE DELETE", value: "v25.8+ (pos+eq deletes)", status: "available" }, + { label: "ALTER TABLE UPDATE", value: "Since v25.9", status: "available" }, + { label: "MERGE INTO", value: "Not Available", status: "limited" } ] } ], externalLinks: [ - { label: "Delete Files Support", url: "https://clickhouse.com/blog/clickhouse-release-24-12", type: "docs" }, + { label: "ClickHouse Release 25.08", url: "https://clickhouse.com/blog/clickhouse-release-25-08", type: "docs" }, { label: "DML Write Support Tracking", url: "https://github.com/ClickHouse/ClickHouse/issues/66588", type: "docs" } ] } }, { - title: "Read-Only Storage Strategy", - chip: "CoW + MoR Read", - description: "Copy-on-Write always readable; Merge-on-Read readable from 24.12 (non-materialized delete files)", + title: "Read + Write Storage Strategy", + chip: "CoW Read + MoR Write (v25.8+)", + description: "Copy-on-Write always readable; Merge-on-Read readable from 24.12; ALTER TABLE DELETE writes position-delete files (v25.8 LTS); ALTER TABLE UPDATE writes equality deletes (v25.9)", icon: , - color: "orange", - score: 60, + color: "green", + score: 75, details: { - title: "Comprehensive Read Support for Both Strategies", - description: "ClickHouse can read both Copy-on-Write and Merge-on-Read tables efficiently, handling delete files transparently.", + title: "Read + Write Support for Both Storage Strategies", + description: "ClickHouse reads both CoW and MoR tables efficiently, and from v25.8 LTS can produce MoR delete files via ALTER TABLE DELETE and UPDATE operations.", overviewContent: { strengths: [ "Reads both CoW and MoR tables seamlessly", - "Automatic handling of delete files since 24.12", - "No user intervention required for MoR merging", - "Consistent performance across storage strategies" + "ALTER TABLE DELETE writes position-delete files (v25.8 LTS)", + "ALTER TABLE UPDATE writes equality-delete files (v25.9)", + "Automatic handling of incoming delete files since 24.12" ], limitations: [ - "Cannot produce delete files or choose write strategy", - "Performance depends on delete file accumulation", - "No control over compaction timing", - "Must rely on other engines for optimization" + "MERGE INTO not yet supported (no upsert semantics)", + "No user-configurable CoW vs MoR write mode", + "No built-in compaction — delete file accumulation must be managed externally", + "Write DML still experimental in v25.8/v25.9" ], bestFor: [ - "Mixed environments with different write engines", + "Mixed environments requiring both read and row-level updates", "Analytics on frequently updated data", - "Read-heavy workloads on MoR tables", + "Write-light workloads using ALTER TABLE DELETE/UPDATE", "Consuming data from streaming pipelines" ] }, @@ -240,15 +234,15 @@ export const clickhouseFeatures = [ category: "Merge-on-Read", items: [ { label: "Delete File Reading", value: "Since 24.12", status: "available" }, - { label: "On-the-fly Merging", value: "Automatic", status: "available" }, - { label: "Non-materialized Deletes", value: "Supported", status: "available" }, - { label: "Performance Impact", value: "Minimal", status: "available" } + { label: "Position Deletes (Write)", value: "Since 25.8 LTS", status: "available" }, + { label: "Equality Deletes (Write)", value: "Since 25.9", status: "available" }, + { label: "MERGE INTO", value: "Not Supported", status: "limited" } ] } ], externalLinks: [ { label: "MoR Support in 24.12", url: "https://clickhouse.com/blog/clickhouse-release-24-12", type: "docs" }, - { label: "Changelog 24.12", url: "https://clickhouse.com/docs/changelogs/24.12", type: "docs" } + { label: "ALTER TABLE DELETE — v25.8 LTS", url: "https://clickhouse.com/blog/clickhouse-release-25-08", type: "docs" } ] } }, @@ -309,14 +303,14 @@ export const clickhouseFeatures = [ }, { title: "No Format V3 Support", - chip: "Q3 2025 Planned", - description: "Not yet supported - engine rejects DV tables; v3 reader/writer planned post-spec-v2 completeness; DV/lineage scheduled Q3 2025", + chip: "Not Yet Supported", + description: "Not yet supported - engine rejects DV tables; v3 reader/writer planned post-spec-v2 completeness; DV/lineage not yet available", icon: , color: "red", score: 0, details: { title: "V1/V2 Support with Clear V3 Roadmap", - description: "ClickHouse currently supports Iceberg format v1 and v2, with comprehensive v3 support planned for Q3 2025.", + description: "ClickHouse currently supports Iceberg format v1 and v2. Comprehensive v3 support is under active development.", overviewContent: { strengths: [ "Stable v1 and v2 format support", @@ -342,13 +336,13 @@ export const clickhouseFeatures = [ category: "Current Support", items: [ { label: "Iceberg v1", value: "Full Support", status: "available" }, - { label: "Iceberg v2", value: "Read Support", status: "available" }, + { label: "Iceberg v2", value: "Read + Write (v25.7+)", status: "available" }, { label: "Schema Evolution", value: "v1/v2 Only", status: "available" }, { label: "Position Deletes", value: "v2 Support", status: "available" } ] }, { - category: "V3 Roadmap (Q3 2025)", + category: "V3 Roadmap (Active Development)", items: [ { label: "Deletion Vectors", value: "Planned", status: "limited" }, { label: "Row Lineage", value: "Planned", status: "limited" }, @@ -521,7 +515,7 @@ export const clickhouseFeatures = [ category: "Performance Roadmap", items: [ { label: "Distributed Cache", value: "Public Preview Jul 2025", status: "limited" }, - { label: "Stateless Workers", value: "Early Q3 2025", status: "limited" }, + { label: "Stateless Workers", value: "In Development", status: "limited" }, { label: "Data Shuffling", value: "Prototype", status: "limited" }, { label: "Production Ready", value: "H2 2025", status: "limited" } ] @@ -594,7 +588,7 @@ export const clickhouseFeatures = [ export const clickhouseTableData = { title: "ClickHouse Iceberg Feature Matrix", - description: "Comprehensive breakdown of Iceberg capabilities in ClickHouse v25.4", + description: "Comprehensive breakdown of Iceberg capabilities in ClickHouse v25.9", variant: "default", columns: [ { @@ -645,11 +639,11 @@ export const clickhouseTableData = { }, support: { value: Partial, - badge: { text: "Read-Only", variant: "warning" } + badge: { text: "Read + Write (v25.7+)", variant: "warning" } }, details: { - value: "ENGINE=Iceberg, icebergS3(), icebergCluster() functions; full SQL reads; writes Q3 2025", - tooltip: "Excellent read performance, comprehensive write capabilities planned" + value: "ENGINE=Iceberg, icebergS3(), icebergCluster() functions; full SQL reads; INSERT INTO (v25.7), CREATE/DROP TABLE (v25.8)", + tooltip: "Read + write support shipped in v25.7–v25.8; MERGE not yet supported" }, version: { value: "24.3+" } }, @@ -658,14 +652,14 @@ export const clickhouseTableData = { value: DML Operations }, support: { - value: None, - badge: { text: "Delete File Reading", variant: "error" } + value: Partial, + badge: { text: "INSERT/ALTER DELETE/UPDATE", variant: "warning" } }, details: { - value: "Reads position/equality deletes (24.12), merges on-the-fly; no DELETE/UPDATE/MERGE writers", - tooltip: "Can consume DML from other engines but cannot produce" + value: "INSERT INTO (v25.7); ALTER TABLE DELETE writes position deletes (v25.8 LTS); ALTER TABLE UPDATE writes equality deletes (v25.9); MERGE not supported", + tooltip: "Basic DML shipped in v25.7–v25.9; no MERGE or upsert support yet" }, - version: { value: "24.12" } + version: { value: "25.7+" } }, { dimension: { @@ -673,13 +667,13 @@ export const clickhouseTableData = { }, support: { value: Partial, - badge: { text: "Read CoW + MoR", variant: "warning" } + badge: { text: "CoW Read + MoR Write (v25.8+)", variant: "warning" } }, details: { - value: "CoW always readable; MoR readable from 24.12 (non-materialized delete files)", - tooltip: "Comprehensive read support for both strategies" + value: "CoW always readable; MoR readable from 24.12; ALTER TABLE DELETE writes position deletes (v25.8); ALTER TABLE UPDATE writes equality deletes (v25.9)", + tooltip: "Now produces MoR delete files in addition to reading them" }, - version: { value: "24.12" } + version: { value: "24.12+" } }, { dimension: { @@ -704,7 +698,7 @@ export const clickhouseTableData = { badge: { text: "v1/v2 Only", variant: "error" } }, details: { - value: "Reads spec v1/v2; engine rejects DV tables; v3 DV/lineage scheduled Q3 2025", + value: "Reads spec v1/v2; engine rejects DV tables; v3 DV/lineage support under active development", tooltip: "Clear v3 roadmap with deletion vectors and row lineage" }, version: { value: "24.3+" } diff --git a/docs-iceberg-query-engine/databricks.mdx b/docs-iceberg-query-engine/databricks.mdx index 53ebfd7c..bdd4f0b8 100644 --- a/docs-iceberg-query-engine/databricks.mdx +++ b/docs-iceberg-query-engine/databricks.mdx @@ -1,7 +1,7 @@ --- -title: "Databricks Runtime 14.3 LTS+ & Apache Iceberg: UniForm Multi-Format Lakehouse | OLake" -description: "Explore OLake insights on Databricks Runtime 14.3 LTS+ enabling read-only Iceberg views of Delta tables via Unity Catalog REST, with time travel and enterprise governance." -sidebar_label: Databricks Runtime 14.3 LTS+ +title: "Databricks DBR 16.4+ & Apache Iceberg: Native Full-DML Lakehouse | OLake" +description: "Explore OLake insights on Databricks Runtime 16.4+ with native Apache Iceberg support via Unity Catalog — full DML, deletion vectors, V3 Public Preview, and enterprise governance." +sidebar_label: Databricks (DBR 16.4+) hide_table_of_contents: true --- @@ -15,475 +15,460 @@ import { CubeIcon, ExclamationTriangleIcon, BoltIcon, -// BoltIcon, - ArrowPathIcon + CodeBracketIcon, + ArrowPathIcon, + ServerStackIcon } from '@heroicons/react/24/outline'; export const databricksFeatures = [ { - title: "Unity Catalog REST Integration", - chip: "REST Endpoint", - description: "Unity Catalog exposes Iceberg REST catalog at /api/2.1/unity-catalog/iceberg, enabling external engines to read UniForm tables with standard Iceberg clients", + title: "Unity Catalog as Iceberg REST Catalog", + chip: "Full Support", + description: "Unity Catalog implements the Iceberg REST Catalog API for both read and write on managed Iceberg tables. External engines connect via /api/2.1/unity-catalog/iceberg with OAuth/PAT tokens and credential vending", icon: , color: "blue", - score: 85, + score: 100, details: { - title: "Seamless External Engine Integration", - description: "Databricks provides a standardized REST catalog endpoint that allows any Iceberg-compatible engine to discover and read Delta tables with UniForm enabled.", + title: "Unity Catalog as Full Iceberg REST Catalog", + description: "Databricks DBR 16.4+ promotes Unity Catalog to a first-class Iceberg REST Catalog endpoint, enabling any Iceberg-compatible engine to read and write managed Iceberg tables — not just read UniForm Delta tables.", overviewContent: { strengths: [ - "Standard Iceberg REST catalog API compliance", - "Automatic table discovery for external engines", - "Credential vending for secure cloud storage access", - "Support for all major Iceberg client libraries", - "Unity Catalog metadata integration and governance" + "Full Iceberg REST Catalog API compliance (read + write)", + "External engines can write native Iceberg tables via REST", + "Credential vending for scoped temporary cloud-storage tokens", + "Hive Metastore and AWS Glue via Lakehouse Federation (read-only)", + "Unified table discovery across Databricks workspaces" ], limitations: [ - "Read-only access for external Iceberg clients", - "Limited to tables with IcebergCompatV2 enabled", - "No native Hive Metastore or AWS Glue integration", - "REST catalog limited to Unity Catalog managed tables" + "Hive Metastore and AWS Glue are read-only via Lakehouse Federation", + "Nessie, Hadoop, JDBC catalogs not supported natively", + "External write requires REST catalog; direct file-path writes unsupported" ], bestFor: [ - "Multi-engine lakehouse architectures", - "External analytics tools requiring Iceberg access", - "Data sharing across different compute engines", - "Standardized metadata access for governance" + "Multi-engine lakehouse where external engines need full read/write", + "Centralized governance with Unity Catalog RBAC", + "Teams standardizing on Iceberg REST as the interop layer", + "Organizations migrating from Delta-only to open Iceberg format" ] }, technicalSpecs: [ { - category: "REST Catalog Features", + category: "REST Catalog Endpoint", items: [ { label: "API Endpoint", value: "/api/2.1/unity-catalog/iceberg", status: "available" }, - { label: "Authentication", value: "Personal Access Tokens", status: "available" }, - { label: "Credential Vending", value: "Scoped cloud storage", status: "available" }, - { label: "Table Discovery", value: "Automatic via REST", status: "available" } + { label: "Authentication", value: "Personal Access Token / OAuth2", status: "available" }, + { label: "Credential Vending", value: "Scoped cloud-storage tokens", status: "available" }, + { label: "External Writes", value: "Supported via REST (DBR 16.4+)", status: "available" } ] }, { - category: "External Engine Support", + category: "Catalog Connectivity", items: [ - { label: "Apache Spark", value: "Full compatibility", status: "available" }, - { label: "Trino/Presto", value: "Read support", status: "available" }, - { label: "Apache Flink", value: "Read support", status: "available" }, - { label: "DuckDB", value: "Read support", status: "available" } + { label: "Hive Metastore", value: "Read-only via Lakehouse Federation", status: "limited" }, + { label: "AWS Glue", value: "Read-only via Lakehouse Federation", status: "limited" }, + { label: "Nessie / JDBC", value: "Not Supported", status: "unavailable" }, + { label: "Polaris / REST", value: "Full Support", status: "available" } ] } ], externalLinks: [ { label: "Unity Catalog Iceberg Endpoint", url: "https://docs.databricks.com/aws/en/external-access/iceberg.html", type: "docs" }, - { label: "Access Databricks Tables from Iceberg Clients", url: "https://docs.databricks.com/aws/en/external-access/iceberg.html", type: "docs" } + { label: "Full Apache Iceberg Support Announcement", url: "https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks", type: "blog" } ] } }, { - title: "UniForm Multi-Format Technology", - chip: "Innovative", - description: ( - <> - UniForm enables the same table to be accessible as both Delta and Iceberg simultaneously, generating Iceberg metadata on every Delta commit - - ), - icon: , - color: "purple", - score: 95, + title: "Full Read/Write on Native Iceberg Tables", + chip: "Full Support", + description: "DBR 16.4+ supports INSERT INTO, CREATE TABLE, CTAS, and full DDL on native managed Iceberg V2 tables. External engines can read and write via the REST catalog endpoint", + icon: , + color: "green", + score: 100, details: { - title: "Revolutionary Multi-Format Lakehouse", - description: "UniForm technology represents a breakthrough in lakehouse architecture, allowing a single table to serve multiple engines with their native formats.", + title: "Complete Read/Write on Managed Iceberg Tables", + description: "Databricks Runtime 16.4+ moves beyond read-only UniForm to deliver full native Iceberg table creation and write operations, both from Databricks SQL/Spark and from external Iceberg clients via REST.", overviewContent: { strengths: [ - "Single source of truth with dual format access", - "Automatic Iceberg metadata generation on Delta commits", - "Zero data duplication between formats", - "Seamless Delta feature integration (Liquid Clustering, etc.)", - "Asynchronous metadata sync for performance" + "CREATE TABLE ... USING ICEBERG for new managed Iceberg tables", + "Full INSERT INTO, CTAS, and DDL support inside Databricks", + "External Iceberg clients can read and write via REST catalog", + "Parquet-based storage with Unity Catalog metadata management", + "Seamless alongside existing Delta Lake tables in same workspace" ], limitations: [ - "Requires IcebergCompatV2 feature flag enabled", - "Tables with deletion vectors need purging first", - "Streaming writes not compatible with UniForm", - "Materialized views cannot enable Iceberg reads" + "UniForm (Delta→Iceberg conversion) is still read-only for external engines", + "V3 table creation and write in Public Preview (DBR 18.0+)", + "Direct object-storage path writes bypass Unity Catalog governance" ], bestFor: [ - "Organizations wanting to standardize on Delta while supporting Iceberg clients", - "Multi-vendor tool environments requiring format flexibility", - "Migration scenarios from legacy systems to modern lakehouse", - "Data sharing across different technology stacks" + "New lakehouse projects wanting native Iceberg from day one", + "Teams migrating from Delta to open Iceberg format", + "Multi-engine architectures where Spark, Flink, and Trino all write", + "Organizations standardizing on Iceberg as the primary table format" ] }, technicalSpecs: [ { - category: "UniForm Configuration", + category: "Write Operations", items: [ - { label: "Enable Property", value: "delta.universalFormat.enabledFormats=iceberg", status: "available" }, - { label: "Legacy Property", value: "delta.enableIcebergCompatV2=true", status: "available" }, - { label: "Metadata Sync", value: "Asynchronous on Delta commits", status: "available" }, - { label: "Manual Sync", value: "MSCK REPAIR TABLE … SYNC METADATA", status: "available" } + { label: "CREATE TABLE USING ICEBERG", value: "DBR 16.4+ (Public Preview)", status: "available" }, + { label: "INSERT INTO", value: "Batch support", status: "available" }, + { label: "CREATE TABLE AS SELECT", value: "CTAS Support", status: "available" }, + { label: "External Engine Writes", value: "Via REST Catalog", status: "available" } ] }, { - category: "Format Features", + category: "Read Operations", items: [ - { label: "Iceberg Spec Version", value: "v2", status: "available" }, - { label: "Data Format", value: "Parquet with Zstandard", status: "available" }, - { label: "Delta Lineage", value: "converted_delta_version properties", status: "available" }, - { label: "Time Travel Mapping", value: "Delta to Iceberg timestamps", status: "available" } + { label: "SELECT Queries", value: "Full SQL", status: "available" }, + { label: "Metadata Tables", value: "history, snapshots, files", status: "available" }, + { label: "Predicate Pushdown", value: "Optimized", status: "available" }, + { label: "UniForm (Delta→Iceberg)", value: "External read-only", status: "available" } ] } ], externalLinks: [ - { label: "Read Delta Tables with Iceberg Clients", url: "https://docs.databricks.com/aws/en/delta/uniform.html", type: "docs" }, - { label: "Full Apache Iceberg Support Announcement", url: "https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks", type: "blog" } + { label: "Full Apache Iceberg Support Announcement", url: "https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks", type: "blog" }, + { label: "Unity Catalog Iceberg Endpoint", url: "https://docs.databricks.com/aws/en/external-access/iceberg.html", type: "docs" } ] } }, { - title: "Read-Only Iceberg Access", - chip: "External Engines", - description: "External Iceberg engines get full SELECT and time-travel capabilities while Delta users retain complete DML operations within Databricks", - icon: , + title: "Complete DML Operations", + chip: "Full Support", + description: "MERGE, UPDATE, DELETE fully supported on native managed Iceberg tables. DBR 18.0+ Public Preview uses deletion vectors for efficient row-level changes instead of traditional position/equality delete files", + icon: , color: "green", - score: 80, + score: 100, details: { - title: "Strategic Read-Only Architecture", - description: "Databricks implements a clear separation where Delta remains the authoritative format for writes while Iceberg provides standardized read access for external engines.", + title: "Full DML with Deletion Vectors", + description: "Databricks delivers complete DML operations on managed Iceberg tables, using deletion vectors (Iceberg V3) in DBR 18.0 Public Preview rather than V2 position/equality delete files for dramatically better write performance.", overviewContent: { strengths: [ - "Full SELECT support for external Iceberg engines", - "Standard Iceberg time travel and snapshot queries", - "Complete Delta DML capabilities within Databricks", - "Consistent read views across all external engines", - "No conflicts between Delta writes and Iceberg reads" + "MERGE INTO with full matched/not-matched/not-matched-by-source clauses", + "UPDATE and DELETE with arbitrary predicates", + "Deletion vectors write only small bitmaps instead of rewriting data files", + "ACID guarantees with snapshot isolation", + "Immediate read consistency after any DML operation" ], limitations: [ - "Iceberg clients cannot perform INSERT/UPDATE/DELETE operations", - "No Iceberg-native write operations through REST catalog", - "External engines depend on Delta commit frequency for freshness", - "Cannot use Iceberg-specific write optimizations externally" + "Deletion vectors (V3 DML) in Public Preview — DBR 18.0+ only", + "V2 managed tables use copy-on-write for DML (no position deletes)", + "External Iceberg clients writing via REST may use different DML semantics" ], bestFor: [ - "Analytics workloads requiring external engine access", - "Data sharing scenarios with read-only requirements", - "Multi-tenant environments with controlled write access", - "Compliance scenarios requiring immutable external views" + "CDC processing with high-frequency row-level updates", + "GDPR compliance deletion workflows", + "Real-time data correction and enrichment pipelines", + "Merge-heavy ETL workloads needing efficient write performance" ] }, technicalSpecs: [ { - category: "Read Capabilities", + category: "DML Operations", items: [ - { label: "SELECT Queries", value: "Full SQL support", status: "available" }, - { label: "Time Travel", value: "Snapshot ID and timestamp", status: "available" }, - { label: "Predicate Pushdown", value: "Engine-dependent", status: "available" }, - { label: "Metadata Queries", value: "Standard Iceberg operations", status: "available" } + { label: "MERGE INTO", value: "Full syntax", status: "available" }, + { label: "UPDATE", value: "Condition-based rows", status: "available" }, + { label: "DELETE", value: "Predicate-based rows", status: "available" }, + { label: "INSERT INTO / OVERWRITE", value: "Batch + partition", status: "available" } ] }, { - category: "Write Limitations", + category: "Delete Strategy", items: [ - { label: "INSERT Operations", value: "Delta only", status: "limited" }, - { label: "UPDATE Operations", value: "Delta only", status: "limited" }, - { label: "DELETE Operations", value: "Delta only", status: "limited" }, - { label: "MERGE Operations", value: "Delta only", status: "limited" } + { label: "Deletion Vectors (V3)", value: "Public Preview — DBR 18.0+", status: "preview" }, + { label: "Copy-on-Write (V2)", value: "Stable — DBR 16.4+", status: "available" }, + { label: "V2 Position Deletes", value: "Not used — DVs replace them", status: "unavailable" }, + { label: "Read Performance", value: "DV merging at read time", status: "available" } ] } ], externalLinks: [ - { label: "Read Delta Tables - Limitations", url: "https://docs.databricks.com/aws/en/delta/uniform.html", type: "docs" } + { label: "Unity Catalog Iceberg Endpoint", url: "https://docs.databricks.com/aws/en/external-access/iceberg.html", type: "docs" } ] } }, { - title: "Copy-on-Write Semantics", - chip: "CoW Only", - description: "Delta commits use Copy-on-Write semantics with no Iceberg delete files; external readers always see fully merged, materialized snapshots", + title: "Deletion Vectors & MoR/CoW", + chip: "Partial Support", + description: "V3 uses deletion vectors (MoR-like) for efficient row-level deletes without rewriting data files (DBR 18.0+ Public Preview). Copy-on-write also supported. V2 position/equality deletes not used — DVs replace them in V3", icon: , color: "orange", score: 75, details: { - title: "Simplified Storage Strategy", - description: "Databricks UniForm implements a Copy-on-Write approach that ensures external Iceberg readers always see clean, fully materialized data without merge complexity.", + title: "Deletion Vectors Replace Traditional Delete Files", + description: "Databricks V3 introduces deletion vectors — small per-file bitmaps that mark deleted rows without rewriting data files. This replaces V2 position/equality delete files with a more efficient MoR-like mechanism. CoW rewrites remain available.", overviewContent: { strengths: [ - "Consistent read performance without merge overhead", - "Fully materialized snapshots for external engines", - "No delete file complexity for Iceberg clients", - "Simplified troubleshooting and debugging", - "Optimized for analytical read workloads" + "Deletion vectors avoid expensive data-file rewrites on DELETE/UPDATE", + "Much smaller write amplification compared to CoW for selective deletes", + "DV merge at read time is lightweight (bitmap lookup per row-group)", + "Copy-on-write still available for read-optimized tables", + "DVs are a V3 standard, readable by any V3-capable engine" ], limitations: [ - "No Merge-on-Read optimizations for frequent updates", - "Higher storage overhead for update-heavy workloads", - "Cannot leverage Iceberg delete file efficiency", - "Limited to CoW performance characteristics" + "Deletion vectors only in Public Preview (DBR 18.0+)", + "V2 tables use CoW only — position/equality deletes explicitly NOT supported", + "Read performance degrades if many DV files accumulate; periodic OPTIMIZE needed", + "Not all external Iceberg engines support V3 deletion vectors yet" ], bestFor: [ - "Read-heavy analytical workloads", - "Scenarios prioritizing read performance over write efficiency", - "External engines requiring consistent snapshot views", - "Use cases where merge complexity should be avoided" + "Write-heavy workloads with many row-level changes", + "Tables where full CoW rewrites are too expensive", + "V3 early adopters wanting deletion vector efficiency", + "CDC sink tables with continuous UPDATEs and DELETEs" ] }, technicalSpecs: [ { - category: "Storage Strategy", + category: "V3 Deletion Vectors (Public Preview)", items: [ - { label: "Write Strategy", value: "Copy-on-Write only", status: "available" }, - { label: "Delete Files", value: "Not generated", status: "unavailable" }, - { label: "Snapshot Views", value: "Fully materialized", status: "available" }, - { label: "Read Performance", value: "Consistent", status: "available" } + { label: "Deletion Vector Format", value: "Iceberg V3 spec bitmaps", status: "preview" }, + { label: "Write Performance", value: "No data-file rewrite needed", status: "preview" }, + { label: "Read Overhead", value: "Bitmap merge at scan time", status: "preview" }, + { label: "Required Runtime", value: "DBR 18.0+", status: "preview" } ] }, { - category: "Delta Integration", + category: "V2 Copy-on-Write (Stable)", items: [ - { label: "Delta Features", value: "Full compatibility", status: "available" }, - { label: "Liquid Clustering", value: "Supported", status: "available" }, - { label: "Predictive Optimization", value: "Available", status: "available" }, - { label: "Deletion Vectors", value: "Must be purged", status: "limited" } + { label: "Write Strategy", value: "Full file rewrite on DML", status: "available" }, + { label: "Read Performance", value: "No merge overhead", status: "available" }, + { label: "Required Runtime", value: "DBR 16.4+", status: "available" }, + { label: "Position/Equality Deletes", value: "Not used in Databricks V2", status: "unavailable" } ] } ], externalLinks: [ - { label: "Unity Catalog Iceberg Endpoint - Notes", url: "https://docs.databricks.com/aws/en/external-access/iceberg.html", type: "docs" } + { label: "Full Apache Iceberg Support Announcement", url: "https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks", type: "blog" } ] } }, { - title: "Metadata Generation & Sync", - chip: "Asynchronous", - description: "Iceberg metadata generated asynchronously on every Delta commit with manual sync option via MSCK REPAIR TABLE … SYNC METADATA", + title: "No Streaming on Iceberg Tables", + chip: "Not Supported", + description: "Change Data Feed (CDF) not supported on Iceberg tables — CDF is Delta Lake-only. Iceberg V3 row lineage provides CDC building blocks but CDC streaming is not directly exposed via Iceberg APIs", icon: , - color: "blue", - score: 80, + color: "red", + score: 0, details: { - title: "Intelligent Metadata Management", - description: "Databricks automates Iceberg metadata generation while providing controls for immediate synchronization when needed for time-sensitive workflows.", + title: "Streaming Gaps on Managed Iceberg Tables", + description: "Databricks Change Data Feed is a Delta Lake exclusive. Iceberg tables in Databricks do not expose CDF-equivalent streaming endpoints. V3 row lineage is a precursor to CDC but not yet surfaced for consumption.", overviewContent: { strengths: [ - "Automatic metadata generation on every Delta commit", - "Non-blocking asynchronous process for performance", - "Manual sync capability for immediate consistency", - "Delta version mapping for audit trails", - "Timestamp correlation between formats" + "Iceberg V3 row lineage metadata is written (DBR 18.0+ Public Preview)", + "External streaming engines (Flink, Spark Structured Streaming) can write Iceberg via REST", + "Batch CDC patterns work well — query snapshot diffs via history metadata" ], limitations: [ - "Potential lag between Delta commits and Iceberg visibility", - "Manual intervention required for immediate consistency", - "Async process may delay external engine data availability", - "No real-time streaming metadata updates" + "Change Data Feed not available on Iceberg tables", + "No built-in CDC streaming subscribe API for Iceberg", + "Row lineage readable in metadata but not exposed as a change stream", + "Real-time ingestion requires external engines (Flink, Spark)" ], bestFor: [ - "Batch processing workflows with acceptable latency", - "Scenarios where eventual consistency is sufficient", - "Use cases requiring audit trails and version mapping", - "Environments with mixed batch and real-time requirements" + "Batch CDC patterns using Iceberg snapshot history", + "Architectures pairing Databricks with Flink for streaming ingestion", + "Near-real-time workflows that tolerate micro-batch latency", + "Advanced V3 users building custom row-lineage consumers" ] }, technicalSpecs: [ { - category: "Metadata Generation", + category: "Streaming Status", items: [ - { label: "Sync Mode", value: "Asynchronous by default", status: "available" }, - { label: "Manual Sync", value: "MSCK REPAIR TABLE … SYNC METADATA", status: "available" }, - { label: "Delta Mapping", value: "converted_delta_version properties", status: "available" }, - { label: "Timestamp Mapping", value: "converted_delta_timestamp", status: "available" } + { label: "Change Data Feed (CDF)", value: "Delta Lake only — not on Iceberg", status: "unavailable" }, + { label: "Iceberg V3 Row Lineage", value: "Written in DBR 18.0+ Public Preview", status: "preview" }, + { label: "Flink Write via REST", value: "Supported externally", status: "available" }, + { label: "Spark Structured Streaming", value: "Internal Delta only", status: "limited" } ] }, { - category: "Consistency Features", + category: "Workarounds", items: [ - { label: "Eventual Consistency", value: "Default behavior", status: "available" }, - { label: "Immediate Consistency", value: "Via manual sync", status: "available" }, - { label: "Version Tracking", value: "Delta to Iceberg mapping", status: "available" }, - { label: "Audit Trail", value: "Complete lineage", status: "available" } + { label: "Snapshot Diff Queries", value: "Via metadata tables", status: "available" }, + { label: "External Flink Ingestion", value: "Write Iceberg via REST", status: "available" }, + { label: "Batch CDC", value: "Snapshot-based extraction", status: "available" }, + { label: "ClickPipe / Kafka Connect", value: "Third-party via REST", status: "available" } ] } ], externalLinks: [ - { label: "Enable Iceberg Metadata Generation", url: "https://docs.databricks.com/aws/en/delta/uniform.html", type: "docs" } + { label: "Read Delta Tables with Iceberg Clients", url: "https://docs.databricks.com/aws/en/delta/uniform.html", type: "docs" } ] } }, { - title: "Time Travel & Snapshot Queries", - chip: "Full Support", - description: ( - <> - External engines can time-travel using standard Iceberg syntax with snapshot-ID or timestamp, enhanced with Delta version mapping properties - - ), - icon: , - color: "green", - score: 90, + title: "Iceberg V3 Native Support", + chip: "Public Preview — DBR 18.0+", + description: "Iceberg V3 native support in Public Preview (DBR 18.0+). Supports deletion vectors, VARIANT columns, and V3 table creation. Existing V2 tables can be upgraded. Some V3 data types may have limited support", + icon: , + color: "orange", + score: 50, details: { - title: "Comprehensive Historical Query Support", - description: "Databricks provides full Iceberg time travel capabilities while maintaining Delta version correlation for comprehensive historical analysis.", + title: "Iceberg V3 in Public Preview with Key V3 Features", + description: "Databricks DBR 18.0 introduces Iceberg V3 in Public Preview, including deletion vectors, VARIANT data type, and the row_id / row_lineage metadata columns from the V3 spec. Production readiness expected in a future GA release.", overviewContent: { strengths: [ - "Standard Iceberg time travel syntax support", - "Snapshot-ID and timestamp-based queries", - "Delta version correlation for cross-format analysis", - "Comprehensive audit trail capabilities", - "Seamless integration with external tools" + "Deletion vectors (V3) replace V2 position deletes for efficient MoR", + "VARIANT column type for semi-structured JSON data without schema explosion", + "Row lineage metadata for future CDC and auditing capabilities", + "Existing V2 tables can be in-place upgraded to V3", + "First-mover advantage on V3 features in an enterprise platform" ], limitations: [ - "Time travel limited to available snapshots", - "Historical queries depend on snapshot retention policies", - "Cannot time travel beyond earliest Delta conversion", - "Performance varies with historical data volume" + "V3 is Public Preview — not recommended for mission-critical production workloads", + "Some V3-only data types (nanosecond timestamps, geometry, vector) may have limited support", + "Cross-engine interop with V3 tables limited to engines that support V3", + "V3 upgrades are one-way — V3 tables not readable by V2-only engines" ], bestFor: [ - "Data auditing and compliance scenarios", - "Historical trend analysis across formats", - "Debugging and root cause analysis", - "Regulatory reporting with specific timestamps" + "Early adopters evaluating deletion vector efficiency", + "VARIANT column use cases replacing complex nested JSON schemas", + "Labs and staging environments testing V3 migration paths", + "Organizations planning V3 adoption ahead of GA" ] }, technicalSpecs: [ { - category: "Time Travel Features", + category: "V3 Features Supported (Public Preview)", items: [ - { label: "Snapshot ID Queries", value: "Standard Iceberg syntax", status: "available" }, - { label: "Timestamp Queries", value: "as-of-timestamp support", status: "available" }, - { label: "Delta Version Mapping", value: "converted_delta_version", status: "available" }, - { label: "Cross-format Correlation", value: "Delta to Iceberg timestamps", status: "available" } + { label: "Deletion Vectors", value: "Supported — DBR 18.0+ Public Preview", status: "preview" }, + { label: "VARIANT Column Type", value: "Supported", status: "preview" }, + { label: "Row Lineage Metadata", value: "Written", status: "preview" }, + { label: "V3 Table Creation", value: "CREATE TABLE with V3 format", status: "preview" } ] }, { - category: "Historical Analysis", + category: "V3 Gaps", items: [ - { label: "Audit Capabilities", value: "Complete version history", status: "available" }, - { label: "Compliance Queries", value: "Point-in-time accuracy", status: "available" }, - { label: "Data Lineage", value: "Format transition tracking", status: "available" }, - { label: "External Tool Integration", value: "Standard APIs", status: "available" } + { label: "Nanosecond Timestamps", value: "Limited support", status: "limited" }, + { label: "Geometry / Vector Types", value: "Not yet documented", status: "limited" }, + { label: "V2→V3 In-place Upgrade", value: "Supported", status: "available" }, + { label: "Production GA", value: "Future release", status: "limited" } ] } ], externalLinks: [ - { label: "Iceberg REST API Specification", url: "https://github.com/apache/iceberg/blob/master/api/src/main/java/org/apache/iceberg/rest/RestCatalog.java", type: "docs" } + { label: "Full Apache Iceberg Support Announcement", url: "https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks", type: "blog" } ] } }, { - title: "Enterprise Security & Governance", - chip: "Unity Catalog", - description: "Unity Catalog RBAC governs access with credential vending providing temporary, scoped cloud-storage credentials to external Iceberg clients", - icon: , + title: "Full Time Travel & RESTORE", + chip: "Full Support", + description: "Time travel and RESTORE TABLE supported for managed Iceberg V3 tables (DBR 18.0+ Public Preview). For foreign Iceberg tables, time travel is limited. Standard Iceberg snapshot-ID and timestamp query syntax", + icon: , color: "green", - score: 95, + score: 100, details: { - title: "Enterprise-Grade Access Control", - description: "Databricks integrates comprehensive security through Unity Catalog with advanced credential vending for secure external access without credential sharing.", + title: "Time Travel and Snapshot Management", + description: "Databricks supports the full Iceberg time travel API for managed tables, including RESTORE TABLE for point-in-time rollbacks. External engines using the REST catalog can also leverage standard Iceberg time-travel syntax.", overviewContent: { strengths: [ - "Centralized RBAC through Unity Catalog", - "Automatic credential vending for external clients", - "Scoped, temporary cloud storage credentials", - "Fine-grained table and column-level permissions", - "Audit logging for all external access" + "Standard Iceberg snapshot-ID and timestamp time travel", + "RESTORE TABLE for full point-in-time table rollback", + "Delta version-to-Iceberg snapshot mapping for UniForm tables", + "External engines can time-travel via REST catalog", + "Retention controlled via snapshot expiration settings" ], limitations: [ - "Security model tied to Unity Catalog implementation", - "External clients must support REST catalog authentication", - "Credential vending limited to supported cloud providers", - "Fine-grained permissions depend on Unity Catalog features" + "Foreign (externally managed) Iceberg tables have limited time travel in Databricks", + "RESTORE TABLE on V3 managed tables in Public Preview (DBR 18.0+)", + "Performance depends on number of retained snapshots" ], bestFor: [ - "Enterprise environments requiring strict access control", - "Multi-tenant deployments with isolation requirements", - "Compliance scenarios with audit trail needs", - "Organizations with existing Unity Catalog governance" + "Audit and compliance queries at specific historical timestamps", + "Data recovery and accidental-write rollback", + "Reproducible ML training runs tied to specific snapshots", + "Point-in-time cross-engine data access via REST" ] }, technicalSpecs: [ { - category: "Access Control", + category: "Time Travel Syntax", items: [ - { label: "RBAC System", value: "Unity Catalog integration", status: "available" }, - { label: "Credential Vending", value: "Temporary, scoped tokens", status: "available" }, - { label: "Table Permissions", value: "Fine-grained control", status: "available" }, - { label: "Column Security", value: "Unity Catalog features", status: "available" } + { label: "Snapshot ID", value: "VERSION AS OF snapshot_id", status: "available" }, + { label: "Timestamp", value: "TIMESTAMP AS OF ts", status: "available" }, + { label: "RESTORE TABLE", value: "DBR 18.0+ Public Preview for V3", status: "preview" }, + { label: "External Engine Time Travel", value: "Via REST Catalog", status: "available" } ] }, { - category: "Security Features", + category: "Snapshot Management", items: [ - { label: "Audit Logging", value: "All external access", status: "available" }, - { label: "Token Management", value: "Automatic renewal", status: "available" }, - { label: "Cloud Integration", value: "AWS, Azure, GCP", status: "available" }, - { label: "Network Security", value: "VPC/VNet integration", status: "available" } + { label: "Snapshot Expiration", value: "Configurable retention", status: "available" }, + { label: "Delta Version Mapping", value: "converted_delta_version", status: "available" }, + { label: "Metadata Tables", value: "history, snapshots, files", status: "available" }, + { label: "Orphan File Cleanup", value: "VACUUM equivalent", status: "available" } ] } ], externalLinks: [ - { label: "Access Databricks Tables from Iceberg Clients", url: "https://docs.databricks.com/aws/en/external-access/iceberg.html", type: "docs" } + { label: "Unity Catalog Iceberg Endpoint", url: "https://docs.databricks.com/aws/en/external-access/iceberg.html", type: "docs" } ] } }, { - title: "Current Limitations & Requirements", - chip: "Important Constraints", - description: "Tables with deletion vectors, streaming writes, or materialized views require purging/upgrade before Iceberg compatibility; Runtime 14.3 LTS+ required", - icon: , - color: "red", - score: 60, + title: "Enterprise Security & Governance", + chip: "Full Support", + description: "Unity Catalog RBAC governs access; Iceberg REST clients receive temporary, scoped cloud-storage credentials via credential vending during handshake. Fine-grained table and column permissions", + icon: , + color: "green", + score: 100, details: { - title: "Understanding Compatibility Requirements", - description: "Databricks UniForm has specific requirements and limitations that must be considered when planning Iceberg integration strategies.", + title: "Unity Catalog Security for Iceberg", + description: "Databricks leverages Unity Catalog's enterprise RBAC model for Iceberg tables, with credential vending ensuring external engines never hold long-lived cloud storage credentials.", overviewContent: { strengths: [ - "Clear documentation of limitations and workarounds", - "Automated upgrade tools for compatibility", - "Specific runtime version requirements documented", - "Roadmap visibility for future capability expansion" + "Centralized RBAC through Unity Catalog across all Iceberg tables", + "Automatic credential vending — external clients get scoped, short-lived tokens", + "Column-level permissions and row-level security via Unity Catalog", + "Full audit logging for all Iceberg access (internal and external)", + "Works across AWS, Azure, and GCP without per-cloud configuration" ], limitations: [ - "Tables with deletion vectors require REORG … PURGE DELETION_VECTORS", - "Streaming writes incompatible with UniForm", - "Materialized views cannot enable Iceberg reads", - "REST catalog is read-only for external engines", - "Iceberg spec v3 not yet supported" + "Security model requires Unity Catalog — not available with legacy Hive Metastore", + "Credential vending supported only for Databricks-managed cloud storage", + "Fine-grained row-level policies depend on Unity Catalog tier" ], bestFor: [ - "Understanding migration planning requirements", - "Evaluating compatibility with existing workloads", - "Setting appropriate expectations for capabilities", - "Planning workarounds for current limitations" + "Enterprise environments requiring zero long-lived credential exposure", + "Multi-tenant data lakehouses with strict isolation requirements", + "Regulated industries with full audit trail requirements", + "Organizations sharing data with external partners via Iceberg REST" ] }, technicalSpecs: [ { - category: "Version Requirements", + category: "Access Control", items: [ - { label: "Databricks Runtime", value: "14.3 LTS or newer", status: "available" }, - { label: "REST API Version", value: "2.1", status: "available" }, - { label: "Iceberg Spec Support", value: "v2 only", status: "limited" }, - { label: "Managed Iceberg", value: "Runtime 16.4 LTS+", status: "preview" } + { label: "RBAC System", value: "Unity Catalog RBAC", status: "available" }, + { label: "Credential Vending", value: "Temporary scoped tokens per handshake", status: "available" }, + { label: "Column-level Security", value: "Unity Catalog features", status: "available" }, + { label: "Row-level Security", value: "Row filters via Unity Catalog", status: "available" } ] }, { - category: "Table Compatibility", + category: "Audit & Compliance", items: [ - { label: "Deletion Vectors", value: "Must be purged", status: "limited" }, - { label: "Streaming Writes", value: "Not compatible", status: "unavailable" }, - { label: "Materialized Views", value: "Cannot enable", status: "unavailable" }, - { label: "Standard Tables", value: "Full support", status: "available" } + { label: "Audit Logging", value: "All internal + external access", status: "available" }, + { label: "Lineage Tracking", value: "Unity Catalog lineage graph", status: "available" }, + { label: "Cloud Support", value: "AWS, Azure, GCP", status: "available" }, + { label: "Token TTL", value: "Auto-renewed on expiry", status: "available" } ] } ], externalLinks: [ - { label: "Read Delta Tables - Limitations", url: "https://docs.databricks.com/aws/en/delta/uniform.html", type: "docs" }, - { label: "Full Apache Iceberg Support - V2 Preview", url: "https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks", type: "blog" } + { label: "Access Databricks Tables from Iceberg Clients", url: "https://docs.databricks.com/aws/en/external-access/iceberg.html", type: "docs" } ] } } ]; export const databricksTableData = { - title: "Databricks Iceberg Feature Matrix", - description: "Comprehensive breakdown of Iceberg capabilities in Databricks Runtime 14.3 LTS+. The matrix shows feature support levels, implementation details, and minimum version requirements for your lakehouse architecture.", + title: "Databricks DBR 16.4+ Iceberg Feature Matrix", + description: "Comprehensive breakdown of Apache Iceberg capabilities in Databricks Runtime 16.4+ with Unity Catalog — native managed tables, full DML, V3 Public Preview, and enterprise governance.", variant: "default", columns: [ { @@ -495,19 +480,19 @@ export const databricksTableData = { { key: "support", header: "Support Level", - tooltip: "Level of support in Databricks", + tooltip: "Level of support in Databricks DBR 16.4+", align: "center", width: "w-32" }, { key: "details", header: "Implementation Details", - tooltip: "Specific capabilities and limitations" + tooltip: "Specific capabilities and version requirements" }, { key: "version", - header: "Min Version", - tooltip: "Minimum Databricks Runtime required", + header: "Min DBR", + tooltip: "Minimum Databricks Runtime version required", align: "center", width: "w-24" } @@ -517,230 +502,174 @@ export const databricksTableData = { dimension: { value: Catalog Integration }, - support: { - value: REST Only, - badge: { text: "Unity Catalog", variant: "warning" }, - tooltip: "REST catalog endpoint at /api/2.1/unity-catalog/iceberg" - }, - details: { - value: "Unity Catalog REST endpoint for external engines; UniForm tables generate Iceberg metadata on Delta commits", - tooltip: "No native Hive/Glue support, REST catalog only" - }, - version: { value: "14.3 LTS+" } - }, - { - dimension: { - value: Read Operations - }, support: { value: Full, - badge: { text: "External Engines", variant: "success" } - }, - details: { - value: "Complete SELECT support via REST catalog or direct metadata paths for all Iceberg-compatible engines", - tooltip: "Full read access for Spark, Trino, Flink, DuckDB, etc." - }, - version: { value: "14.3 LTS+" } - }, - { - dimension: { - value: Write Operations - }, - support: { - value: Partial, - badge: { text: "Delta Internal", variant: "warning" } + badge: { text: "Unity Catalog REST", variant: "success" }, + tooltip: "Unity Catalog exposes full Iceberg REST API for read and write" }, details: { - value: "Managed Iceberg Tables support external writes; UniForm Delta tables read-only for Iceberg clients", - tooltip: "Full DML inside Databricks, external writes only for Managed Iceberg" + value: "Unity Catalog REST Catalog API for native Iceberg read/write; Hive Metastore + AWS Glue via Lakehouse Federation (read-only)", + tooltip: "No Nessie, Hadoop, or JDBC catalog support" }, version: { value: "16.4 LTS+" } }, { dimension: { - value: UniForm Technology + value: Read/Write }, support: { value: Full, - badge: { text: "Multi-Format", variant: "success" } + badge: { text: "Native Iceberg", variant: "success" } }, details: { - value: "Same table accessible as Delta and Iceberg simultaneously with automatic metadata generation", - tooltip: "Revolutionary multi-format lakehouse capability" + value: "INSERT INTO, CREATE TABLE USING ICEBERG, CTAS, full DDL; external engines read/write via REST catalog", + tooltip: "Full read/write — not just read-only UniForm" }, - version: { value: "14.3 LTS+" } + version: { value: "16.4 LTS+" } }, { dimension: { - value: Time Travel + value: DML Operations }, support: { value: Full, - badge: { text: "Standard Syntax", variant: "success" } - }, - details: { - value: "Standard Iceberg time travel with snapshot-ID/timestamp plus Delta version mapping properties", - tooltip: "Complete historical query support with format correlation" - }, - version: { value: "14.3 LTS+" } - }, - { - dimension: { - value: Storage Strategy - }, - support: { - value: CoW Only, - badge: { text: "Copy-on-Write", variant: "warning" } - }, - details: { - value: "Copy-on-Write semantics with no Iceberg delete files; fully materialized snapshots", - tooltip: "No Merge-on-Read, all updates create new data files" - }, - version: { value: "14.3 LTS+" } - }, - { - dimension: { - value: Metadata Sync - }, - support: { - value: Async, - badge: { text: "Auto + Manual", variant: "success" } + badge: { text: "MERGE/UPDATE/DELETE", variant: "success" } }, details: { - value: "Asynchronous generation on Delta commits with MSCK REPAIR TABLE … SYNC METADATA for immediate sync", - tooltip: "Automatic with manual override for time-sensitive scenarios" + value: "MERGE INTO, UPDATE, DELETE fully supported on managed Iceberg V3 tables with deletion vectors (DBR 18.0+ Public Preview); CoW for V2", + tooltip: "Complete DML operations with efficient delete strategies" }, - version: { value: "14.3 LTS+" } + version: { value: "16.4 LTS+" } }, { dimension: { - value: Security & Governance + value: MoR / CoW }, support: { - value: Full, - badge: { text: "Unity Catalog", variant: "success" } + value: Partial, + badge: { text: "DVs replace pos-deletes", variant: "warning" } }, details: { - value: "Unity Catalog RBAC with credential vending for scoped, temporary cloud storage access", - tooltip: "Enterprise-grade security with automatic credential management" + value: "V3 deletion vectors (MoR-like) in Public Preview (DBR 18.0+); V2 uses CoW only; V2 position/equality deletes explicitly not supported", + tooltip: "Databricks V3 uses deletion vectors, not traditional V2 position deletes" }, - version: { value: "14.3 LTS+" } + version: { value: "16.4 LTS+" } }, { dimension: { - value: Streaming Support + value: Streaming }, support: { - value: Internal, - badge: { text: "Delta Only", variant: "warning" } + value: None, + badge: { text: "CDF is Delta-only", variant: "error" } }, details: { - value: "Structured Streaming and Change Data Feed inside Databricks; no Iceberg streaming endpoints", - tooltip: "Streaming limited to Delta format, not exposed via Iceberg REST" + value: "Change Data Feed not available on Iceberg tables — CDF is Delta Lake exclusive; V3 row lineage exists but not exposed as stream", + tooltip: "No native streaming ingestion or CDC subscribe API for Iceberg" }, - version: { value: "14.3 LTS+" } + version: { value: "N/A" } }, { dimension: { - value: Format V3 Support + value: Format V3 }, support: { - value: None, - badge: { text: "V2 Only", variant: "error" } + value: Partial (Public Preview), + badge: { text: "DBR 18.0+ Public Preview", variant: "warning" } }, details: { - value: "UniForm targets Iceberg spec v2 only; no public v3 roadmap announced", - tooltip: "Limited to spec v2 features, no deletion vectors or row lineage" + value: "V3 Public Preview in DBR 18.0+: deletion vectors, VARIANT columns, row lineage; some V3 types (nanosecond ts, geometry, vector) limited", + tooltip: "First enterprise platform with V3 support, but still Public Preview" }, - version: { value: "N/A" } + version: { value: "18.0+ Public Preview" } }, { dimension: { - value: Table Compatibility + value: Time Travel }, support: { - value: Limited, - badge: { text: "Upgrade Required", variant: "warning" } + value: Full, + badge: { text: "Snapshot + Timestamp", variant: "success" } }, details: { - value: "Tables with deletion vectors, streaming writes, or materialized views need REORG/upgrade", - tooltip: "Specific requirements and limitations for table types" + value: "Standard Iceberg time travel + RESTORE TABLE for managed V3 tables (DBR 18.0+ Public Preview); foreign tables have limited time travel", + tooltip: "Complete snapshot history with point-in-time restore" }, - version: { value: "14.3 LTS+" } + version: { value: "16.4 LTS+" } }, { dimension: { - value: External Write Access + value: Security }, support: { - value: Read-Only, - badge: { text: "REST Limitation", variant: "error" } + value: Full, + badge: { text: "Unity Catalog RBAC", variant: "success" } }, details: { - value: "REST catalog provides read-only access; no external Iceberg DML operations", - tooltip: "External engines cannot perform INSERT/UPDATE/DELETE via REST" + value: "Unity Catalog RBAC; credential vending for scoped temporary cloud-storage tokens; column-level + row-level security; full audit", + tooltip: "Enterprise-grade security with automatic credential management" }, - version: { value: "N/A" } + version: { value: "16.4 LTS+" } } ] }; export const databricksUseCases = [ { - title: "Multi-Engine Lakehouse", - description: "Enable external analytics tools and engines to access Delta tables via standard Iceberg APIs", + title: "Native Open Lakehouse on Databricks", + description: "Create and manage Iceberg tables natively in Databricks with full DML and Unity Catalog governance", scenarios: [ - "Practical example: A data science team at a Fortune 500 company maintains their core data in Databricks Delta tables for internal analytics. When external partners need access to this data, they enable UniForm to expose the tables via Iceberg REST catalog. External partners can now query the data using Apache Spark or Trino in their own infrastructure without requiring Databricks access or data duplication", - "Business intelligence tools requiring Iceberg connectivity for reporting", - "Data science platforms with Iceberg client libraries for ML workflows", - "External Spark clusters needing read access to Delta tables for processing" + "Create new managed Iceberg tables alongside Delta tables in the same Unity Catalog", + "Run MERGE INTO, UPDATE, DELETE on Iceberg tables with deletion vector efficiency (V3 Public Preview)", + "Use CTAS to migrate existing Delta or Hive tables to native Iceberg format", + "Query Iceberg tables from Databricks SQL, notebooks, and Jobs with full predicate pushdown" ] }, { - title: "Data Sharing & Federation", - description: "Share Delta table data across organizational boundaries with standardized Iceberg access", + title: "Multi-Engine Lakehouse with REST Catalog", + description: "Share Databricks-managed Iceberg tables with external engines via the Unity Catalog REST API", scenarios: [ - "Practical example: A healthcare consortium shares patient outcome data across 15 hospitals. Each hospital uses different analytics tools (some use Databricks, others use Trino or Presto). By enabling UniForm on Delta tables, the central data warehouse provides Iceberg-compatible access, allowing each hospital to query shared data using their preferred tools while maintaining strict Unity Catalog security controls", - "Cross-team data sharing with different tool preferences and requirements", - "Partner organizations requiring standard data access without vendor dependencies", - "Data marketplace implementations with unified access patterns" + "Flink jobs write CDC events directly into managed Iceberg tables via the REST catalog endpoint", + "Trino clusters query Databricks Iceberg tables for low-latency interactive analytics", + "DuckDB on analyst laptops reads Databricks Iceberg snapshots via OAuth REST connection", + "Spark on external EMR clusters writes enriched data back to Databricks Unity Catalog Iceberg tables" ] }, { - title: "Migration & Modernization", - description: "Gradually migrate from legacy systems while maintaining backward compatibility", + title: "Iceberg V3 Early Adoption", + description: "Leverage Databricks DBR 18.0+ Public Preview to test V3 deletion vectors and VARIANT columns", scenarios: [ - "Practical example: A retail company is migrating from legacy Hive tables to Databricks Delta. During the 6-month transition period, they enable UniForm to allow their existing Tableau dashboards (which connect via Iceberg) to continue working while they gradually migrate reports to use native Delta connections. This phased approach eliminates 'big bang' migration risks", - "Transitioning from Hive tables to Delta with external tool support", - "Legacy analytics tools requiring Iceberg compatibility during migration", - "Hybrid architectures during platform modernization initiatives" + "Test deletion vector performance vs. CoW on high-cardinality UPDATE workloads", + "Evaluate VARIANT column type for semi-structured sensor or log data", + "Plan V2→V3 upgrade path with in-place table upgrade support", + "Benchmark V3 read performance with mixed deletion-vector and data files" ] }, { - title: "Compliance & Governance", - description: "Provide auditable, read-only access for compliance and regulatory scenarios", + title: "Enterprise Governance with Credential Vending", + description: "Provide auditable external access with zero long-lived credential exposure", scenarios: [ - "Practical example: A financial institution must provide read-only access to their transaction data for external auditors during quarterly reviews. Using UniForm with Unity Catalog, they expose specific tables via Iceberg REST catalog with time-bound credentials, ensuring auditors can verify data independently without granting write access or exposing sensitive internal systems", - "Regulatory reporting with external audit tools and compliance frameworks", - "Compliance teams requiring independent data access with audit trails", - "Data governance with Unity Catalog integration for fine-grained control" + "External data partners query Iceberg tables via REST catalog with auto-vended scoped tokens", + "Compliance teams run time-travel queries from their own Spark clusters without permanent AWS access", + "Data mesh producers share Iceberg table access via Unity Catalog without exposing storage credentials", + "Security teams audit all external Iceberg access through Unity Catalog's central audit log" ] } ]; \ No newline at end of file +/> diff --git a/docs-iceberg-query-engine/doris.mdx b/docs-iceberg-query-engine/doris.mdx index 0f28afdf..c8bbaefb 100644 --- a/docs-iceberg-query-engine/doris.mdx +++ b/docs-iceberg-query-engine/doris.mdx @@ -85,7 +85,7 @@ export const dorisFeatures = [ description: "Full SELECT and write-back: INSERT, INSERT OVERWRITE, CTAS. Doris writes Parquet/ORC files and commits Iceberg snapshots as lake-ingestion engine and analytics layer", icon: , color: "green", - score: 90, + score: 100, details: { title: "Comprehensive Lake Ingestion and Analytics", description: "Doris serves as both a high-performance analytics engine and a capable lake ingestion system with full Iceberg write capabilities.", @@ -141,7 +141,7 @@ export const dorisFeatures = [ description: "INSERT INTO (append), INSERT OVERWRITE, UPDATE & DELETE via Iceberg-v2 delete files (v2.1+). MERGE not yet single statement but emulated with patterns", icon: , color: "orange", - score: 80, + score: 60, details: { title: "Progressive DML Implementation with Workarounds", description: "Doris provides essential DML operations with UPDATE/DELETE support via delete files, though MERGE operations require pattern-based workarounds.", @@ -198,7 +198,7 @@ export const dorisFeatures = [ description: "Reads: applies position and equality delete files (MoR) automatically. Writes: generates position/equality delete files for UPDATE/DELETE; INSERT OVERWRITE rewrites files (CoW)", icon: , color: "green", - score: 95, + score: 100, details: { title: "Comprehensive Storage Strategy Implementation", description: "Doris provides complete support for both Merge-on-Read and Copy-on-Write strategies with intelligent application based on operation type.", @@ -254,7 +254,7 @@ export const dorisFeatures = [ description: "No native streaming/CDC writer; use external tools (Flink-Iceberg) to land data, then query with sub-second latency. Routine Load only targets internal tables", icon: , color: "red", - score: 10, + score: 0, details: { title: "Batch-Focused with External Streaming Integration", description: "Doris focuses on high-performance batch and interactive analytics while relying on external tools for streaming data ingestion.", @@ -309,7 +309,7 @@ export const dorisFeatures = [ description: "Reads & writes Parquet (v1/v2) + ORC (v1/v2). Supports Iceberg spec v1 & v2; equality-delete support for ORC arrives in v2.1.3+. No v3 support yet", icon: , color: "orange", - score: 70, + score: 0, details: { title: "Comprehensive Format Support for Current Standards", description: "Doris provides solid support for current Iceberg specifications and major file formats, with ongoing development for newer features.", @@ -420,7 +420,7 @@ export const dorisFeatures = [ description: "Doris RBAC plus underlying catalog/storage IAM. Ranger/Lake Formation policies apply at metastore/storage; Doris adds row-policies & column masking on query", icon: , color: "orange", - score: 85, + score: 55, details: { title: "Multi-Layer Security Architecture", description: "Doris provides a comprehensive security model combining its own RBAC with underlying catalog security systems and additional query-time policies.", @@ -842,14 +842,14 @@ export const dorisUseCases = [ tableData={dorisTableData} useCases={dorisUseCases} officialDocs="https://doris.apache.org/docs/" - gettingStarted="https://doris.apache.org/docs/lakehouse/datalake-analytics/iceberg" + gettingStarted="https://doris.apache.org/docs/lakehouse/catalogs/iceberg-catalog" additionalResources={[ - { label: "Iceberg Catalog Documentation", url: "https://doris.apache.org/docs/lakehouse/datalake-analytics/iceberg" }, - { label: "Iceberg Data Building Guide", url: "https://doris.apache.org/docs/lakehouse/datalake-building/iceberg-build" }, + { label: "Iceberg Catalog Documentation", url: "https://doris.apache.org/docs/lakehouse/catalogs/iceberg-catalog" }, + { label: "Iceberg Data Building Guide", url: "https://doris.apache.org/docs/dev/lakehouse/best-practices/doris-iceberg" }, { label: "Iceberg Catalog Configuration", url: "https://doris.apache.org/docs/dev/lakehouse/catalogs/iceberg-catalog" }, { label: "ICEBERG_META Function", url: "https://doris.apache.org/docs/sql-manual/sql-functions/table-valued-functions/iceberg-meta" }, { label: "Doris and Iceberg Best Practices", url: "https://doris.apache.org/docs/dev/lakehouse/best-practices/doris-iceberg" }, - { label: "Metadata Cache Documentation", url: "https://doris.apache.org/docs/lakehouse/metacache" }, + { label: "Metadata Cache Documentation", url: "https://doris.apache.org/docs/lakehouse/meta-cache" }, { label: "Built-in Authorization", url: "https://doris.apache.org/docs/admin-manual/auth/authorization/internal" }, { label: "Next-Generation Data Lakehouse", url: "https://dev.to/apachedoris/building-the-next-generation-data-lakehouse-10x-performance-4ga1" } ]} diff --git a/docs-iceberg-query-engine/dreamio.mdx b/docs-iceberg-query-engine/dreamio.mdx index d7b5830d..0006bfbe 100644 --- a/docs-iceberg-query-engine/dreamio.mdx +++ b/docs-iceberg-query-engine/dreamio.mdx @@ -307,56 +307,56 @@ export const dremioFeatures = [ } }, { - title: "Format V3 Roadmap", - chip: "2025 Planned", - description: "Planned (2025) - roadmap calls for reading Deletion Vectors & row-lineage columns first; writer support (DV emission) to follow once Iceberg 1.8+ library adopted", + title: "Iceberg Format V3 GA", + chip: "GA — Dremio Cloud (Apr 2026)", + description: "Full Iceberg V3 GA for Dremio Cloud (April 2026) — deletion vectors, VARIANT type, row lineage read+write; on-premises (Software) V3 timeline TBD", icon: , - color: "red", - score: 10, + color: "green", + score: 80, details: { - title: "Clear V3 Development Timeline", - description: "Dremio has a well-defined roadmap for Iceberg format v3 support with phased implementation approach.", + title: "Iceberg V3 General Availability on Dremio Cloud", + description: "Dremio announced Iceberg V3 GA for Dremio Cloud in April 2026, delivering deletion vectors, VARIANT data type, and row lineage. Dremio Software (on-premises) V3 timeline is TBD.", overviewContent: { strengths: [ - "Clear 2025 timeline for v3 support", - "Phased approach: read first, write later", - "Strong v1/v2 format stability", - "Alignment with Iceberg 1.8+ library adoption" + "Full V3 GA on Dremio Cloud as of April 2026", + "Deletion vector read+write support", + "VARIANT semi-structured data type support", + "Row lineage metadata columns available" ], limitations: [ - "No current v3 format support", - "Cannot read deletion vectors", - "Missing row lineage capabilities", - "Timeline dependent on Iceberg library updates" + "V3 GA applies to Dremio Cloud only; Software (on-premises) timeline TBD", + "Some V3 advanced features may roll out incrementally", + "Older Dremio Software deployments remain on V1/V2" ], bestFor: [ - "Current v1/v2 production environments", - "Teams planning long-term v3 migration", - "Stable data lakehouse deployments", - "Organizations requiring format compatibility" + "Dremio Cloud environments needing V3 efficiency", + "Workloads with heavy row-level updates benefiting from deletion vectors", + "Semi-structured data using VARIANT columns", + "Organizations adopting the latest Iceberg spec" ] }, technicalSpecs: [ { - category: "Current Support", + category: "V3 Features (Dremio Cloud GA)", items: [ - { label: "Iceberg v1", value: "Full Support", status: "available" }, - { label: "Iceberg v2", value: "Read/Write", status: "available" }, - { label: "Position Deletes", value: "Supported", status: "available" }, - { label: "Equality Deletes", value: "Partial (not global)", status: "available" } + { label: "Deletion Vectors", value: "Read + Write", status: "available" }, + { label: "VARIANT Type", value: "Supported", status: "available" }, + { label: "Row Lineage", value: "Supported", status: "available" }, + { label: "GA Date", value: "April 2026", status: "available" } ] }, { - category: "V3 Roadmap (2025)", + category: "Legacy Format Support", items: [ - { label: "Deletion Vector Read", value: "Phase 1", status: "limited" }, - { label: "Row Lineage Read", value: "Phase 1", status: "limited" }, - { label: "DV Write Support", value: "Phase 2", status: "limited" }, - { label: "Library Adoption", value: "Iceberg 1.8+", status: "limited" } + { label: "Iceberg v1", value: "Full Support", status: "available" }, + { label: "Iceberg v2", value: "Read/Write", status: "available" }, + { label: "Position Deletes", value: "Supported", status: "available" }, + { label: "Dremio Software V3", value: "Timeline TBD", status: "limited" } ] } ], externalLinks: [ + { label: "Dremio V3 GA Announcement", url: "https://www.globenewswire.com/news-release/2026/04/06/3055888/0/en/Dremio-Announces-General-Availability-of-Apache-Iceberg-V3.html", type: "docs" }, { label: "What's New in Iceberg v3?", url: "https://www.dremio.com/blog/apache-iceberg-v3/", type: "docs" }, { label: "Table Properties Format Version", url: "https://docs.dremio.com/current/sonar/query-manage/data-formats/apache-iceberg/table-properties", type: "docs" } ] @@ -695,14 +695,14 @@ export const dremioTableData = { value: Format Support }, support: { - value: Limited, - badge: { text: "v1/v2 Only", variant: "error" } + value: Full, + badge: { text: "V3 GA — Dremio Cloud (Apr 2026)", variant: "success" } }, details: { - value: "Reads/writes v1/v2; v3 planned 2025 (read DV/lineage first, write DV after Iceberg 1.8+)", - tooltip: "Clear v3 roadmap with phased implementation approach" + value: "Reads/writes v1/v2 fully; Iceberg V3 GA for Dremio Cloud (April 2026): deletion vectors, VARIANT type, row lineage; Software (on-premises) V3 timeline TBD", + tooltip: "V3 GA launched April 2026 for Dremio Cloud; on-premises users remain on V2 until further release" }, - version: { value: "v25+" } + version: { value: "Cloud Apr 2026" } }, { dimension: { diff --git a/docs-iceberg-query-engine/duckdb.mdx b/docs-iceberg-query-engine/duckdb.mdx index 1fa9d800..64cfb919 100644 --- a/docs-iceberg-query-engine/duckdb.mdx +++ b/docs-iceberg-query-engine/duckdb.mdx @@ -1,6 +1,6 @@ --- -title: "DuckDB v1.3+ & Apache Iceberg: Lightweight Read-Only Analytics | OLake" -description: "Learn about DuckDB's read-only Iceberg support with SQL time travel, external file caching for cloud storage, and REST catalog integration for lightweight analytics." +title: "DuckDB v1.3+ & Apache Iceberg: Lightweight Analytics with Read/Write | OLake" +description: "Learn about DuckDB's Iceberg support with SQL time travel, INSERT/UPDATE/DELETE (v1.4+), external file caching for cloud storage, and REST catalog integration." sidebar_label: DuckDB v1.3+ hide_table_of_contents: true --- @@ -25,33 +25,32 @@ export const duckdbFeatures = [ chip: "Partial Support", description: ( <> - Hadoop (file-system) and Iceberg REST catalog supported via rest option with bearer/OAuth tokens; no native Hive/Glue catalog yet + REST catalog (OAuth2 since v1.3+), AWS Glue via ENDPOINT_TYPE=glue, Polaris, and S3 Tables supported. Hive Metastore, Nessie, Hadoop, JDBC not supported ), icon: , color: "orange", score: 65, details: { - title: "Flexible Catalog Integration", - description: "DuckDB provides support for essential catalog types while maintaining simplicity, with strong REST catalog authentication capabilities.", + title: "REST-Centric Catalog Integration", + description: "DuckDB supports modern REST-based catalogs including native AWS Glue integration via ENDPOINT_TYPE=glue (v1.3+) and Polaris. Traditional Hive/JDBC catalogs are not supported.", overviewContent: { strengths: [ - "Simple file-system catalog support via direct paths", - "REST catalog integration with OAuth2 and bearer token authentication", - "Built-in support for moved table paths with allow_moved_paths flag", - "Session-based catalog configuration and token management", - "Automatic REST catalog refresh with configurable intervals" + "REST catalog with OAuth2 authentication since v1.3+", + "AWS Glue via ENDPOINT_TYPE=glue — no REST proxy needed", + "Polaris and S3 Tables supported natively", + "allow_moved_paths flag for relocated table paths", + "Automatic REST catalog refresh with snapshot_refresh_interval" ], limitations: [ - "No native Hive Metastore or AWS Glue catalog support", - "Must proxy Hive/Glue catalogs through REST implementations", - "Limited to catalog types supported by the extension", - "Single catalog per session limitations" + "No native Hive Metastore catalog support", + "No Nessie, Hadoop warehouse, or JDBC catalog", + "Single catalog per CREATE SECRET configuration" ], bestFor: [ - "Direct file-system based data lake exploration", - "Modern REST catalog deployments with cloud providers", - "Ad-hoc analytics on laptop or local environments", + "Modern cloud-native REST catalog deployments", + "AWS Glue / S3 Tables lakehouse architectures", + "Ad-hoc analytics with Polaris or Tabular", "Prototyping and development workflows" ] }, @@ -59,84 +58,84 @@ export const duckdbFeatures = [ { category: "Supported Catalog Types", items: [ - { label: "Hadoop (File-system)", value: "Direct paths", status: "available" }, - { label: "REST Catalog", value: "With auth tokens", status: "available" }, - { label: "Hive Metastore", value: "Via REST proxy only", status: "limited" }, - { label: "AWS Glue", value: "Via REST proxy only", status: "limited" }, - { label: "Custom Catalogs", value: "Not supported", status: "unavailable" } + { label: "REST Catalog", value: "OAuth2 since v1.3+", status: "available" }, + { label: "AWS Glue", value: "ENDPOINT_TYPE=glue", status: "available" }, + { label: "Polaris", value: "Full Support", status: "available" }, + { label: "S3 Tables", value: "Full Support", status: "available" }, + { label: "Hive Metastore", value: "Not Supported", status: "unavailable" }, + { label: "Nessie / JDBC", value: "Not Supported", status: "unavailable" } ] }, { category: "Authentication Features", items: [ + { label: "OAuth2", value: "Via REST auth", status: "available" }, { label: "Bearer Tokens", value: "rest_auth_token parameter", status: "available" }, - { label: "OAuth2", value: "Through REST auth", status: "available" }, { label: "S3 Credentials", value: "Via httpfs extension", status: "available" }, - { label: "Session-based Auth", value: "Per-session tokens", status: "available" } + { label: "CREATE SECRET Workflow", value: "Per-catalog config", status: "available" } ] } ], externalLinks: [ - { label: "Iceberg Extension Overview", url: "https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html", type: "docs" }, - { label: "Iceberg REST Catalogs", url: "https://duckdb.org/docs/stable/core_extensions/iceberg/iceberg_rest_catalogs.html", type: "docs" } + { label: "Iceberg REST Catalogs", url: "https://duckdb.org/docs/stable/core_extensions/iceberg/iceberg_rest_catalogs.html", type: "docs" }, + { label: "Iceberg Extension Overview", url: "https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html", type: "docs" } ] } }, { - title: "Read-only Analytics Excellence", - chip: "Full Support", - description: "Full SELECT support with predicate evaluation, manifest pruning and external file-cache to avoid re-downloading S3/GCS objects", + title: "Full Read + Write Analytics (V2)", + chip: "Read + INSERT/UPDATE/DELETE", + description: "Full SELECT with predicate pushdown and manifest pruning. INSERT INTO supported since v1.4.0 via REST catalog; UPDATE and DELETE since v1.4.2. MERGE INTO is not supported", icon: , color: "green", - score: 95, + score: 90, details: { - title: "Optimized Analytical Queries", - description: "DuckDB excels at read-only analytics with advanced optimizations for cloud storage and query performance.", + title: "Read and Write on Iceberg V2 Tables", + description: "DuckDB v1.4+ moved beyond read-only: INSERT INTO writes new Parquet data files; UPDATE and DELETE generate positional delete files (MoR). MERGE INTO remains unsupported. All write operations require a REST catalog attachment.", overviewContent: { strengths: [ - "Full SQL SELECT support with complex query capabilities", + "Full SQL SELECT with complex predicates and joins", "Advanced predicate pushdown and manifest pruning", - "External file-cache for S3/GCS objects reduces cold-scan latency", - "Cost-based query optimization using metadata statistics", - "Efficient single-node execution for moderate-sized datasets" + "INSERT INTO writes new Iceberg data files (v1.4.0+)", + "UPDATE and DELETE via positional delete files (MoR, v1.4.2+)", + "External file-cache reduces cloud storage cold-scan latency by ~50%" ], limitations: [ - "No write capabilities (INSERT/CREATE TABLE AS ICEBERG)", + "MERGE INTO not supported", + "Write operations require REST catalog attachment (not file-path scan)", "Single-node execution limits scale for very large datasets", - "Partition pruning not yet fully cost-based for complex predicates", - "Large lake queries constrained by local resources" + "V3 tables: only V2-compatible types readable; write not documented" ], bestFor: [ "Interactive data exploration and ad-hoc analytics", - "Laptop-based data science and development", - "Prototyping and testing data transformations", - "Small to medium-scale analytical workloads" + "Laptop-based data engineering with lightweight writes", + "Prototyping pipelines before deploying to distributed engines", + "Small-to-medium analytical workloads needing full DML minus MERGE" ] }, technicalSpecs: [ { - category: "Query Capabilities", + category: "Read Capabilities", items: [ { label: "SELECT Operations", value: "Full SQL support", status: "available" }, { label: "Predicate Pushdown", value: "Automatic", status: "available" }, { label: "Manifest Pruning", value: "Enabled", status: "available" }, - { label: "Join Operations", value: "Full support", status: "available" }, - { label: "Aggregations", value: "Complete", status: "available" } + { label: "Position Delete Reading", value: "MoR tables supported", status: "available" } ] }, { - category: "Performance Features", + category: "Write Capabilities", items: [ - { label: "External File Cache", value: "Configurable size", status: "available" }, - { label: "Cold-scan Optimization", value: "50% latency reduction", status: "available" }, - { label: "Cost-based Optimization", value: "Metadata-driven", status: "available" }, - { label: "Parallel Processing", value: "Single-node", status: "available" } + { label: "INSERT INTO", value: "v1.4.0+ via REST catalog", status: "available" }, + { label: "UPDATE", value: "v1.4.2+ positional deletes (MoR)", status: "available" }, + { label: "DELETE", value: "v1.4.2+ positional deletes (MoR)", status: "available" }, + { label: "MERGE INTO", value: "Not supported", status: "unavailable" } ] } ], externalLinks: [ { label: "Iceberg Extension Overview", url: "https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html", type: "docs" }, - { label: "DuckLake Performance", url: "https://duckdb.org/2025/05/27/ducklake.html", type: "blog" } + { label: "Troubleshooting - Current Limitations", url: "https://duckdb.org/docs/stable/core_extensions/iceberg/troubleshooting.html", type: "docs" } ] } }, @@ -423,59 +422,58 @@ export const duckdbFeatures = [ } }, { - title: "Current Limitations", - chip: "Known Issues", - description: "Read-only engine with no write support; tables with deletes not supported; Format V3 capabilities absent; single-node execution constraints", + title: "Known Limitations (DML & V3)", + chip: "No MERGE; V3 Partial", + description: "MERGE INTO not supported on any version. V3 tables readable only for V2-compatible data types; V3-only types (nanosecond timestamps, geometry, vector, shredded variant) cause errors. Avro/ORC files ignored", icon: , - color: "red", + color: "orange", score: 40, details: { - title: "Understanding DuckDB Constraints", - description: "While DuckDB excels at read-only analytics, several key limitations should be considered for comprehensive Iceberg workflows.", + title: "Understanding DuckDB's Current Gaps", + description: "DuckDB v1.4+ has solid INSERT/UPDATE/DELETE support for V2 tables but MERGE INTO is absent. V3 tables work only when the data uses V2-compatible types. Avro/ORC file formats are silently skipped.", overviewContent: { strengths: [ - "Clear limitation documentation and transparency", - "Focused scope enables optimization within constraints", - "Roadmap visibility for upcoming write support", - "Strong foundation for future capability expansion" + "Clear limitation documentation and troubleshooting guide", + "INSERT/UPDATE/DELETE cover most DML needs for V2 tables", + "V3 tables with V2-compatible types are readable", + "Rapid release cadence — limitations being addressed continuously" ], limitations: [ - "No write operations (INSERT/CREATE TABLE AS ICEBERG)", - "Reading tables with delete files not supported", - "Format V3 features completely absent", - "Single-node execution limits scale", - "Partition pruning not fully cost-based for complex predicates" + "MERGE INTO not supported on any Iceberg version", + "V3-exclusive types (nanosecond ts, geometry, vector) cause read errors", + "Avro and ORC data files are silently ignored", + "Write operations require REST catalog, not direct file-path access", + "Single-node execution limits very large dataset scale" ], bestFor: [ - "Understanding current capability boundaries", - "Planning workflows within read-only constraints", - "Evaluating suitability for specific use cases", + "Understanding current capability boundaries before adoption", + "Planning workflows that avoid MERGE and V3-only types", + "Evaluating DuckDB suitability for specific use cases", "Setting appropriate expectations for analytics workloads" ] }, technicalSpecs: [ { - category: "Write Limitations", + category: "DML Gaps", items: [ - { label: "INSERT Operations", value: "Not supported", status: "unavailable" }, - { label: "UPDATE Operations", value: "Not supported", status: "unavailable" }, - { label: "DELETE Operations", value: "Not supported", status: "unavailable" }, - { label: "CREATE TABLE AS", value: "Not supported", status: "unavailable" } + { label: "MERGE INTO", value: "Not supported (any version)", status: "unavailable" }, + { label: "ALTER TABLE", value: "Not supported", status: "unavailable" }, + { label: "INSERT INTO V3", value: "Supported per extension docs", status: "limited" }, + { label: "UPDATE/DELETE V3", value: "Not yet documented", status: "limited" } ] }, { category: "Format Limitations", items: [ - { label: "Tables with Deletes", value: "Cannot read", status: "unavailable" }, - { label: "Format V3 Tables", value: "Not supported", status: "unavailable" }, - { label: "Avro/ORC Files", value: "Ignored", status: "unavailable" }, - { label: "Complex Predicates", value: "Pruning limited", status: "limited" } + { label: "V3 with V2-compat types", value: "Readable", status: "available" }, + { label: "V3 nanosecond timestamps", value: "Not supported", status: "unavailable" }, + { label: "V3 geometry / vector / shredded", value: "Not supported", status: "unavailable" }, + { label: "Avro / ORC data files", value: "Silently ignored", status: "unavailable" } ] } ], externalLinks: [ - { label: "Troubleshooting - Current Limitations", url: "https://duckdb.org/docs/stable/core_extensions/iceberg/troubleshooting.html", type: "docs" }, - { label: "DuckDB 1.3.0 Release Notes", url: "https://duckdb.org/2025/05/21/announcing-duckdb-130.html", type: "blog" } + { label: "Troubleshooting - Current Limitations", url: "https://duckdb.org/docs/stable/core_extensions/iceberg/troubleshooting.html", type: "docs" } ] } } @@ -525,10 +523,10 @@ export const duckdbTableData = { details: { value: ( <> - Hadoop (file-system), REST catalog with OAuth tokens; no native Hive/Glue support + REST catalog (OAuth2), AWS Glue (ENDPOINT_TYPE=glue), Polaris, S3 Tables supported; no Hive, Nessie, Hadoop, or JDBC ), - tooltip: "Can proxy Hive/Glue through REST but no direct catalog integration" + tooltip: "REST-native catalogs fully supported; Hive/JDBC not supported" }, version: { value: "1.3+" } }, @@ -551,14 +549,14 @@ export const duckdbTableData = { value: Write Operations }, support: { - value: None, - badge: { text: "Read-Only", variant: "error" } + value: Partial, + badge: { text: "No MERGE", variant: "warning" } }, details: { - value: "No INSERT/UPDATE/DELETE/CREATE TABLE AS ICEBERG support", - tooltip: "Write support is planned for future versions" + value: "INSERT INTO (v1.4.0+), UPDATE/DELETE via positional deletes (v1.4.2+); MERGE INTO not supported; requires REST catalog", + tooltip: "Full DML except MERGE — requires REST catalog attachment for writes" }, - version: { value: "N/A" } + version: { value: "1.4+" } }, { dimension: { @@ -579,14 +577,14 @@ export const duckdbTableData = { value: Delete File Support }, support: { - value: None, - badge: { text: "CoW Only", variant: "error" } + value: Full (V2), + badge: { text: "MoR + CoW", variant: "success" } }, details: { - value: "Reading tables with deletes not yet supported; Copy-on-Write tables only", - tooltip: "Merge-on-Read tables with delete files cannot be read" + value: "V2 position and equality delete files fully supported; UPDATE/DELETE write positional deletes (MoR); V3 deletion vectors not yet supported", + tooltip: "Full MoR reading for V2; V3 deletion vectors are not supported" }, - version: { value: "N/A" } + version: { value: "1.4+" } }, { dimension: { @@ -711,13 +709,13 @@ export const duckdbUseCases = [ ] }, { - title: "Analytical Reporting", - description: "Read-only reporting and dashboard data preparation", + title: "Lightweight Data Engineering", + description: "INSERT, UPDATE, DELETE on Iceberg V2 tables without a distributed cluster", scenarios: [ - "Business intelligence report generation", - "Data extraction for external systems and tools", - "Historical trend analysis with [time travel](/blog/2025/10/03/iceberg-metadata/#63-time-travel-rollback-and-branching)", - "Cross-functional data sharing and exploration" + "Write enriched data back to Iceberg tables via REST catalog", + "Row-level UPDATE/DELETE for data corrections on moderate-sized tables", + "Business intelligence report generation with pre-aggregated writes", + "Historical trend analysis with [time travel](/blog/2025/10/03/iceberg-metadata/#63-time-travel-rollback-and-branching)" ] }, { @@ -734,7 +732,7 @@ export const duckdbUseCases = [ , color: "orange", - score: 75, + score: 55, details: { title: "Row-level Data Modification", description: "Flink provides row-level modification capabilities through UPSERT mode, though with some SQL limitations compared to other engines.", @@ -320,7 +320,7 @@ export const flinkFeatures = [ description: "GA read + write with Flink 1.18+ and Iceberg 1.8+; Binary Deletion Vectors, Row Lineage, new data types, multi-argument transforms", icon: , color: "purple", - score: 95, + score: 100, details: { title: "Advanced Format V3 Capabilities", description: "Flink provides comprehensive Format V3 support with advanced features like deletion vectors and row lineage tracking.", @@ -749,12 +749,12 @@ export const flinkUseCases = [ features={flinkFeatures} tableData={flinkTableData} useCases={flinkUseCases} - officialDocs="https://flink.apache.org/docs/stable/" - gettingStarted="https://iceberg.apache.org/docs/latest/flink-getting-started/" + officialDocs="https://nightlies.apache.org/flink/flink-docs-stable/" + gettingStarted="https://iceberg.apache.org/docs/latest/flink/" additionalResources={[ { label: "Flink Configuration", url: "https://iceberg.apache.org/docs/latest/flink-configuration/" }, { label: "Flink CDC Documentation", url: "https://nightlies.apache.org/flink/flink-cdc-docs-master/" }, - { label: "Flink Actions API", url: "https://iceberg.apache.org/docs/latest/flink-actions/" }, + { label: "Flink Actions API", url: "https://iceberg.apache.org/docs/latest/flink-maintenance/#rewrite-data-files" }, { label: "Streaming Best Practices", url: "https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/datastream/fault-tolerance/checkpointing/" } ]} /> \ No newline at end of file diff --git a/docs-iceberg-query-engine/hive.mdx b/docs-iceberg-query-engine/hive.mdx index 1f7a4f07..1f314a56 100644 --- a/docs-iceberg-query-engine/hive.mdx +++ b/docs-iceberg-query-engine/hive.mdx @@ -144,7 +144,7 @@ export const hiveFeatures = [ description: "SQL DELETE, UPDATE, and MERGE INTO supported when Hive runs on Tez; operations rewrite whole files (CoW)", icon: , color: "orange", - score: 85, + score: 100, details: { title: "Row-Level Modifications with Tez", description: "Hive provides complete DML capabilities for Iceberg tables when running on the Tez execution engine.", @@ -202,7 +202,7 @@ export const hiveFeatures = [ description: "Copy-on-Write for all Hive writes; Merge-on-Read delete files are readable but not produced by Hive", icon: , color: "blue", - score: 70, + score: 55, details: { title: "Copy-on-Write Storage Strategy", description: "Hive uses exclusively Copy-on-Write operations, optimizing for read performance while supporting MoR file reading.", @@ -424,7 +424,7 @@ export const hiveFeatures = [ description: "Hive 4 bundles Iceberg 1.4.3, predating spec v3. Cannot write or reliably read v3 tables until upgrade to Iceberg ≥ 1.8.0", icon: , color: "red", - score: 40, + score: 0, details: { title: "Limited Format Version Support", description: "Hive 4.0+ currently supports only Iceberg format v1 and v2, with no v3 capabilities due to bundled Iceberg version.", @@ -601,12 +601,12 @@ export const hiveTableData = { value: Time Travel }, support: { - value: Partial, - badge: { text: "Properties Only", variant: "warning" } + value: Full, + badge: { text: "SQL + Properties", variant: "success" } }, details: { - value: "Hidden partitioning supported; time-travel via snapshot/branch properties, not SQL", - tooltip: "No SQL time travel syntax, use snapshot properties" + value: "SQL time travel via FOR SYSTEM_TIME AS OF / FOR SYSTEM_VERSION AS OF (Hive 4.0+); snapshot/branch properties also supported", + tooltip: "Hive 4.0+ supports SQL time travel clauses in addition to snapshot-based access" }, version: { value: "4.0+" } }, @@ -675,7 +675,7 @@ export const hiveTableData = { badge: { text: "Engine Constraints", variant: "error" } }, details: { - value: "Early Hive 4 snapshot bugs; requires Tez for DML; no SQL time travel syntax", + value: "Early Hive 4 snapshot bugs; requires Tez for DML; no Format V3 support; no REST/Nessie catalog support", tooltip: "Various limitations requiring workarounds or external tools" }, version: { value: "4.0+" } @@ -733,12 +733,12 @@ export const hiveUseCases = [ features={hiveFeatures} tableData={hiveTableData} useCases={hiveUseCases} - officialDocs="https://hive.apache.org/documentation/" + officialDocs="https://cwiki.apache.org/confluence/display/Hive/GettingStarted" gettingStarted="https://iceberg.apache.org/docs/latest/hive/" additionalResources={[ { label: "Hive Iceberg Integration", url: "https://iceberg.apache.org/docs/latest/hive/" }, - { label: "Apache Ranger Documentation", url: "https://ranger.apache.org/documentation/" }, + { label: "Apache Ranger Documentation", url: "https://cwiki.apache.org/confluence/display/RANGER/Index" }, { label: "Hive Performance Tuning", url: "https://cwiki.apache.org/confluence/display/Hive/Configuration+Properties" }, - { label: "Table Migration Guide", url: "https://docs.cloudera.com/cdw-runtime/cloud/iceberg-how-to/topics/iceberg-migrating-hive-tables.html" } + { label: "Table Migration Guide", url: "https://iceberg.apache.org/docs/latest/hive/#alter-table-migrate-table" } ]} /> diff --git a/docs-iceberg-query-engine/impala.mdx b/docs-iceberg-query-engine/impala.mdx index 3e07f492..ac0689cf 100644 --- a/docs-iceberg-query-engine/impala.mdx +++ b/docs-iceberg-query-engine/impala.mdx @@ -30,7 +30,7 @@ export const impalaFeatures = [ ), icon: , color: "blue", - score: 75, + score: 55, details: { title: "Enterprise Metadata Management", description: "Impala provides robust integration with enterprise metadata infrastructure through Hive Metastore while supporting flexible catalog configurations.", @@ -38,7 +38,7 @@ export const impalaFeatures = [ strengths: [ "Deep HMS integration with enterprise features", "Support for HadoopCatalog and HadoopTables", - "Flexible catalog-impl configuration via Hive site-config", + "Catalog access is mediated through Hive Metastore; no native support for REST, Glue, or Nessie catalogs", "Enterprise-grade metadata consistency and ACID guarantees", "Seamless integration with existing Hadoop ecosystem tools" ], @@ -89,7 +89,7 @@ export const impalaFeatures = [ description: "Full support for INSERT, DELETE, UPDATE operations using Iceberg v2 position-delete files with MERGE operations in preview", icon: , color: "green", - score: 80, + score: 60, details: { title: "Advanced Data Manipulation Capabilities", description: "Impala provides comprehensive row-level operations through Iceberg v2 specification with position delete files for efficient updates and deletes.", @@ -147,7 +147,7 @@ export const impalaFeatures = [ description: "Manual snapshot queries via FOR SYSTEM_TIME AS OF / FOR SYSTEM_VERSION AS OF with DESCRIBE HISTORY & EXPIRE SNAPSHOTS commands", icon: , color: "purple", - score: 90, + score: 100, details: { title: "Comprehensive Historical Data Access", description: "Impala provides powerful time travel capabilities with SQL-native syntax and comprehensive metadata management tools for historical analysis.", @@ -261,7 +261,7 @@ export const impalaFeatures = [ description: "Copy-on-Write for overwrites and Merge-on-Read for row-level operations using position-delete files; equality deletes not supported", icon: , color: "orange", - score: 70, + score: 55, details: { title: "Balanced Storage Approach", description: "Impala implements a strategic storage approach balancing write efficiency with read performance through selective use of CoW and position-based MoR.", @@ -373,7 +373,7 @@ export const impalaFeatures = [ description: "Relies on Hive Metastore + Apache Ranger ACLs with storage-layer permissions (HDFS/S3/Ozone) for comprehensive enterprise security", icon: , color: "green", - score: 85, + score: 100, details: { title: "Comprehensive Enterprise Security", description: "Impala integrates seamlessly with enterprise security infrastructure through established Hadoop ecosystem security frameworks.", diff --git a/docs-iceberg-query-engine/presto.mdx b/docs-iceberg-query-engine/presto.mdx index 348458fe..4d7aa01c 100644 --- a/docs-iceberg-query-engine/presto.mdx +++ b/docs-iceberg-query-engine/presto.mdx @@ -90,7 +90,7 @@ export const prestoFeatures = [ description: "Java: full read/write with INSERT, CTAS, DELETE. C++ (Velox): high-performance read-only execution with same read path capabilities", icon: , color: "blue", - score: 85, + score: 60, details: { title: "Java and C++ Engine Capabilities", description: "Presto offers both Java and C++ (Velox) engines with different capabilities but shared read optimizations.", @@ -147,7 +147,7 @@ export const prestoFeatures = [ description: "INSERT, CTAS, DELETE (row-level, partition) available; UPDATE experimental; MERGE not yet supported. C++ engine has no DML", icon: , color: "orange", - score: 70, + score: 50, details: { title: "Progressive DML Feature Development", description: "Presto provides essential DML operations with ongoing development for complete SQL modification support.", @@ -315,7 +315,7 @@ export const prestoFeatures = [ description: "Roadmap: read Deletion Vectors & Row Lineage after Iceberg 1.8 libraries land; write DV planned post-0.295. Currently supports v1/v2 only", icon: , color: "red", - score: 20, + score: 0, details: { title: "Clear V3 Development Roadmap", description: "Presto has a defined timeline for Iceberg format v3 support with phased implementation approach.", @@ -860,7 +860,7 @@ export const prestoUseCases = [ gettingStarted="https://github.com/prestodb/presto/blob/master/presto-docs/src/main/sphinx/connector/iceberg.rst" additionalResources={[ { label: "Iceberg Connector Documentation", url: "https://github.com/prestodb/presto/blob/master/presto-docs/src/main/sphinx/connector/iceberg.rst" }, - { label: "Presto 0.288 Release", url: "https://www.linkedin.com/posts/presto-foundation_launch-presto-local-download-jdbc-docker-activity-7224526756093816833-jdBf" }, + { label: "Presto Release Notes", url: "https://prestodb.io/docs/current/release/release-0.288.html" }, { label: "Time Travel Guide", url: "https://medium.com/@akbg/time-travel-for-iceberg-tables-in-presto-1dd8364f29cb" }, { label: "MERGE INTO Support Tracking", url: "https://github.com/prestodb/presto/issues/20578" }, { label: "Performance Newsletter", url: "https://medium.com/presto-foundation/presto-community-newsletter-july-2024-77c37c10f13e" }, diff --git a/docs-iceberg-query-engine/snowflake.mdx b/docs-iceberg-query-engine/snowflake.mdx index 8f8aa973..dcd41ae7 100644 --- a/docs-iceberg-query-engine/snowflake.mdx +++ b/docs-iceberg-query-engine/snowflake.mdx @@ -308,57 +308,59 @@ export const snowflakeFeatures = [ } }, { - title: "Limited Format Support", - chip: "Parquet Only", - description: "Parquet only format support. Iceberg spec v2 for Snowflake-catalog tables; external reads work on v1 or v2. No v3 support yet", + title: "Iceberg V3 Support (Public Preview)", + chip: "V3 Public Preview", + description: "Iceberg V3 support in Public Preview (March 2026). Supports deletion vectors, nanosecond timestamps, geometry type, variant type, and row lineage. Parquet-only data format; ORC and Avro not supported", icon: , - color: "red", - score: 50, + color: "orange", + score: 65, details: { - title: "Format Limitations with Enterprise Focus", - description: "Snowflake supports only Parquet format for optimal performance but lacks support for other formats and newer Iceberg specifications.", + title: "Iceberg V3 in Public Preview with Key New Features", + description: "Snowflake launched Iceberg V3 support in Public Preview in March 2026, bringing deletion vectors, new data types, and row lineage to Snowflake-managed Iceberg tables — while data files remain Parquet-only.", overviewContent: { strengths: [ - "Parquet optimized for analytical performance", - "Stable spec v2 support for production workloads", - "Backwards compatibility with v1 external tables", - "Enterprise-grade Parquet processing and optimization" + "Deletion vectors (V3 MoR) for efficient row-level changes", + "VARIANT data type for semi-structured JSON", + "Row lineage metadata for CDC and auditing", + "Nanosecond timestamp precision support", + "Geometry type for geospatial data" ], limitations: [ - "No ORC or Avro format support", - "No Iceberg v3 specification support", - "Limited to older Iceberg capabilities", - "Missing deletion vectors and row lineage features" + "V3 still in Public Preview — not yet GA", + "Parquet-only — no ORC or Avro format support", + "Cross-engine V3 interop limited to V3-capable engines", + "Some V3 features may have limited documentation" ], bestFor: [ - "Parquet-standardized analytical environments", - "Enterprise workloads prioritizing stability", - "Organizations not requiring cutting-edge format features", - "Production deployments with proven format compatibility" + "Early adopters evaluating deletion vector efficiency", + "VARIANT column use cases replacing complex nested JSON schemas", + "Geospatial workloads leveraging the geometry type", + "Organizations planning V3 adoption before GA" ] }, technicalSpecs: [ { - category: "Supported Formats", + category: "V3 Features (Public Preview)", items: [ - { label: "Parquet", value: "Full Optimization", status: "available" }, - { label: "ORC", value: "Not Supported", status: "limited" }, - { label: "Avro", value: "Not Supported", status: "limited" }, - { label: "Iceberg v2", value: "Production Ready", status: "available" } + { label: "Deletion Vectors", value: "Supported", status: "preview" }, + { label: "VARIANT Type", value: "Supported", status: "preview" }, + { label: "Row Lineage", value: "Supported", status: "preview" }, + { label: "Nanosecond Timestamps", value: "Supported", status: "preview" }, + { label: "Geometry Type", value: "Supported", status: "preview" } ] }, { - category: "Missing Capabilities", + category: "Format Limitations", items: [ - { label: "Iceberg v3", value: "Roadmap Evaluation", status: "limited" }, - { label: "Deletion Vectors", value: "Not Available", status: "limited" }, - { label: "Row Lineage", value: "Not Available", status: "limited" }, - { label: "Alternative Formats", value: "Not Supported", status: "limited" } + { label: "Parquet", value: "Full Optimization", status: "available" }, + { label: "ORC", value: "Not Supported", status: "unavailable" }, + { label: "Avro", value: "Not Supported", status: "unavailable" }, + { label: "Iceberg v2", value: "Production Ready", status: "available" } ] } ], externalLinks: [ - { label: "Apache Iceberg v3 Table Spec Blog", url: "https://www.snowflake.com/en/blog/apache-iceberg-v3-table-spec-oss-shared-success/", type: "docs" } + { label: "Apache Iceberg v3 Table Spec Blog", url: "https://www.snowflake.com/en/blog/apache-iceberg-v3-table-spec-oss-shared-success/", type: "blog" } ] } }, @@ -704,14 +706,14 @@ export const snowflakeTableData = { value: Format Support }, support: { - value: Limited, - badge: { text: "Parquet v2 Only", variant: "error" } + value: Partial, + badge: { text: "V3 Public Preview", variant: "warning" } }, details: { - value: "Parquet only; spec v2 for native tables; v1/v2 read for external; no v3 support", - tooltip: "Significant format limitations with optimization focus" + value: "Parquet only; V2 GA; V3 (deletion vectors, VARIANT, row lineage, nanosecond ts, geometry) in Public Preview (March 2026)", + tooltip: "V3 in Public Preview — key new types and deletion vectors supported" }, - status: { value: "GA" } + status: { value: "Preview" } }, { dimension: { diff --git a/docs-iceberg-query-engine/spark.mdx b/docs-iceberg-query-engine/spark.mdx index 8f5c3dc1..f210239b 100644 --- a/docs-iceberg-query-engine/spark.mdx +++ b/docs-iceberg-query-engine/spark.mdx @@ -908,7 +908,7 @@ export const sparkFeatures = [ description: "Incremental reads with stream-from-timestamp; Append/Complete output modes; overwrite and delete snapshots skipped by default", icon: , color: "orange", - score: 75, + score: 55, details: { title: "Streaming Integration", description: "Spark Structured Streaming provides robust integration with Iceberg tables for incremental processing.", diff --git a/docs-iceberg-query-engine/starburst.mdx b/docs-iceberg-query-engine/starburst.mdx index cc46edbe..33610378 100644 --- a/docs-iceberg-query-engine/starburst.mdx +++ b/docs-iceberg-query-engine/starburst.mdx @@ -143,7 +143,7 @@ export const starburstFeatures = [ description: "Built-in access-control engine with table/column-level ACLs, LDAP/OAuth integration, and support for Lake Formation and HMS Ranger policies", icon: , color: "green", - score: 95, + score: 100, details: { title: "Comprehensive Security Framework", description: "Starburst delivers enterprise-grade security with multiple layers of access control, authentication, and integration with existing security infrastructure.", @@ -312,7 +312,7 @@ export const starburstFeatures = [ description: "Supports Iceberg spec v1 & v2 with data files in Parquet (default), ORC, Avro and configurable codecs including SNAPPY, ZSTD, LZ4, GZIP", icon: , color: "green", - score: 85, + score: 30, details: { title: "Flexible Data Format Support", description: "Starburst provides comprehensive format support enabling optimization for different workload characteristics and legacy system integration.", @@ -325,10 +325,10 @@ export const starburstFeatures = [ "Legacy format support for migration scenarios" ], limitations: [ - "Iceberg v3 support limited to preview/read-only", + "Full V3 write support (deletion vectors, VARIANT, row lineage) requires SEP 476-e or later", "Format mixing within tables may impact performance", "Codec selection affects compression ratio vs. performance trade-offs", - "Some advanced v3 features not yet available for production" + "V3 deletion vector compaction behavior differs from V2 delete file handling" ], bestFor: [ "Organizations with diverse format requirements", @@ -343,8 +343,8 @@ export const starburstFeatures = [ items: [ { label: "Spec v1", value: "Full support", status: "available" }, { label: "Spec v2", value: "Full support", status: "available" }, - { label: "Spec v3", value: "Preview/read-only", status: "preview" }, - { label: "Production v3", value: "Roadmap 2025", status: "planned" } + { label: "Spec v3", value: "Full GA — SEP 476-e+", status: "available" }, + { label: "Deletion Vectors", value: "GA — SEP 476-e (Sep 2025)", status: "available" } ] }, { @@ -425,23 +425,23 @@ export const starburstFeatures = [ { title: "Current Limitations & Roadmap", chip: "Known Constraints", - description: "One catalog per config file; v3 preview only; manual optimization for frequent commits; some nested struct predicate limitations; streaming via external tools only", + description: "One catalog per config file; V3 fully GA in SEP 476-e (Sep 2025); manual optimization for frequent commits; some nested struct predicate limitations; streaming via external tools only", icon: , color: "orange", - score: 70, + score: 0, details: { title: "Understanding Platform Constraints", description: "Starburst provides transparent documentation of current limitations while maintaining a clear roadmap for future capability expansion.", overviewContent: { strengths: [ "Clear documentation of limitations and workarounds", - "Transparent roadmap for v3 support in 2025", + "Full Iceberg V3 GA available in SEP 476-e (September 2025)", "Specific guidance for optimization strategies", "Alternative approaches for unsupported scenarios" ], limitations: [ "One catalog configuration per connector file", - "Iceberg v3 limited to preview/read functionality", + "V3 full GA requires SEP 476-e or later (not available in older SEP versions)", "Very frequent small commits require manual OPTIMIZE operations", "Complex predicate push-downs on nested structs not fully optimized", "No built-in streaming ingestion capabilities" @@ -466,7 +466,7 @@ export const starburstFeatures = [ { category: "Feature Limitations", items: [ - { label: "Iceberg v3", value: "Preview/read-only", status: "preview" }, + { label: "Iceberg v3", value: "Full GA — SEP 476-e+", status: "available" }, { label: "Streaming Ingestion", value: "External tools only", status: "unavailable" }, { label: "Nested Predicates", value: "Limited optimization", status: "limited" }, { label: "Small Commit Optimization", value: "Manual OPTIMIZE", status: "limited" } @@ -482,7 +482,7 @@ export const starburstFeatures = [ export const starburstTableData = { title: "Starburst Iceberg Feature Matrix", - description: "Comprehensive breakdown of Iceberg capabilities in Starburst Enterprise SEP 414-E+", + description: "Comprehensive breakdown of Iceberg capabilities in Starburst Enterprise SEP 476-e+ (Full V3 GA)", variant: "default", columns: [ { @@ -662,14 +662,14 @@ export const starburstTableData = { value: Iceberg v3 Support }, support: { - value: Preview, - badge: { text: "Read-Only", variant: "warning" } + value: Full, + badge: { text: "GA — SEP 476-e", variant: "success" } }, details: { - value: "v3 preview metadata reading under feature flag; production GA roadmap 2025", - tooltip: "Limited v3 support with full production capability planned" + value: "Full Iceberg V3 GA in SEP 476-e (September 2025): deletion vectors, VARIANT type, nanosecond timestamps, row lineage", + tooltip: "Full V3 read+write GA shipped in SEP 476-e — deletion vectors replace position/equality delete files" }, - version: { value: "430+" } + version: { value: "476-e+" } }, { dimension: { @@ -732,8 +732,8 @@ export const starburstUseCases = [ ]; \ No newline at end of file diff --git a/docs-iceberg-query-engine/starrocks.mdx b/docs-iceberg-query-engine/starrocks.mdx index 34c83ca5..e29ade3d 100644 --- a/docs-iceberg-query-engine/starrocks.mdx +++ b/docs-iceberg-query-engine/starrocks.mdx @@ -85,7 +85,7 @@ export const starrocksFeatures = [ description: "Full reads including MoR (position & equality-delete files); INSERT/INSERT OVERWRITE, CREATE/DROP (v3.1+). Vectorized execution for analytical workloads", icon: , color: "blue", - score: 90, + score: 55, details: { title: "High-Performance Analytical Query Engine", description: "StarRocks provides exceptional analytical query performance with its vectorized execution engine optimized for Iceberg data.", @@ -141,7 +141,7 @@ export const starrocksFeatures = [ description: "Supports INSERT & INSERT OVERWRITE (partition-level). No UPDATE/DELETE/MERGE operations available in current versions", icon: , color: "orange", - score: 40, + score: 50, details: { title: "Basic Write Operations with Roadmap for Advanced DML", description: "StarRocks currently provides essential write operations with plans for comprehensive DML support in future releases.", @@ -197,7 +197,7 @@ export const starrocksFeatures = [ description: "Reads MoR (position & equality-delete files) efficiently. Writes CoW only (partition overwrite) - no equality-delete file production", icon: , color: "orange", - score: 70, + score: 55, details: { title: "Asymmetric Storage Strategy Optimization", description: "StarRocks excels at reading both storage strategies but is limited to Copy-on-Write writes, optimized for analytical workloads.", @@ -253,7 +253,7 @@ export const starrocksFeatures = [ description: "No native streaming; supports Async Materialized Views for incremental ingest patterns and low-latency dashboard performance", icon: , color: "blue", - score: 80, + score: 0, details: { title: "Innovative Materialized View Architecture", description: "StarRocks provides unique async materialized views that enable near-real-time analytics on Iceberg data without native streaming.", @@ -309,7 +309,7 @@ export const starrocksFeatures = [ description: "Iceberg v1 & v2 (Parquet & ORC) support. No Iceberg v3 or Avro format support in current versions", icon: , color: "red", - score: 60, + score: 0, details: { title: "Stable Format Support with Future Expansion", description: "StarRocks provides solid support for current Iceberg formats while focusing on performance optimization for existing standards.", @@ -364,7 +364,7 @@ export const starrocksFeatures = [ description: "No SQL 'AS OF' in v3.2/3.3 - use separate catalog pointing at older snapshot. SQL time travel supported from v3.4.0+", icon: , color: "red", - score: 20, + score: 50, details: { title: "Time Travel Capabilities Coming Soon", description: "StarRocks has limited time travel in current versions but with clear roadmap for full SQL time travel support.", @@ -419,7 +419,7 @@ export const starrocksFeatures = [ description: "Catalog ACLs respected (IAM/HMS). StarRocks RBAC on external catalogs for fine-grained access control and governance", icon: , color: "green", - score: 90, + score: 100, details: { title: "Comprehensive Security and Access Control", description: "StarRocks provides enterprise-grade security with native RBAC extension to external Iceberg catalogs.", diff --git a/docs-iceberg-query-engine/trino.mdx b/docs-iceberg-query-engine/trino.mdx index f18e28d1..6d5fef82 100644 --- a/docs-iceberg-query-engine/trino.mdx +++ b/docs-iceberg-query-engine/trino.mdx @@ -312,7 +312,7 @@ export const trinoFeatures = [ description: "Not yet GA for spec v3; currently supports only spec v1/v2; deletion vectors & row lineage planned but not available", icon: , color: "red", - score: 40, + score: 0, details: { title: "Limited to Stable Format Versions", description: "Trino currently supports Iceberg format v1 and v2, with v3 support planned but not yet generally available.", @@ -858,6 +858,6 @@ export const trinoUseCases = [ { label: "Table Maintenance Procedures", url: "https://trino.io/docs/current/connector/iceberg.html#alter-table-execute" }, { label: "Security Configuration", url: "https://trino.io/docs/current/connector/iceberg.html#security" }, { label: "Metadata Tables Reference", url: "https://trino.io/docs/current/connector/iceberg.html#metadata-tables" }, - { label: "Time Travel Syntax", url: "https://trino.io/docs/current/connector/iceberg.html#time-travel" } + { label: "Time Travel Syntax", url: "https://trino.io/docs/current/connector/iceberg.html#time-travel-queries" } ]} /> \ No newline at end of file diff --git a/src/components/Iceberg/CardView.tsx b/src/components/Iceberg/CardView.tsx index 9b2eae57..1b5bbc0a 100644 --- a/src/components/Iceberg/CardView.tsx +++ b/src/components/Iceberg/CardView.tsx @@ -7,31 +7,40 @@ import { SUPPORT_BADGE_STYLES } from '../../data/constants/supportLevels'; import { LAYOUT, ANIMATIONS } from '../../data/constants/ui'; import SupportIcon from './SupportIcon'; import CategoryBadge from './CategoryBadge'; +import { ResolvedEngineView } from './versionedTypes'; +import { buildEngineDetailUrl } from './versionState'; interface CardViewProps { engines: QueryEngine[]; + resolvedEngines: ResolvedEngineView[]; + versionMode: 'v2' | 'v3'; + onQuickCompare: (engineName: string) => void; } -const CardView: React.FC = ({ engines }) => { - const calculateSupportScore = (engine: QueryEngine): number => { - return Object.values(engine.features).reduce( +const CardView: React.FC = ({ engines, resolvedEngines, versionMode, onQuickCompare }) => { + const calculateSupportScore = (features: QueryEngine['features']): number => { + return Object.values(features).reduce( (score, feature) => score + SUPPORT_WEIGHTS[feature.support], 0 ); }; const handleEngineClick = (engineId: string) => { - window.open(`/iceberg/query-engine/${engineId}`, '_blank'); + window.open(buildEngineDetailUrl(engineId, versionMode), '_blank'); }; return (
- {engines.map((engine) => ( + {engines.map((engine) => { + const engineVersions = resolvedEngines.filter((item) => item.id === engine.id); + const visibleVersions = engineVersions.filter((item) => item.version === versionMode); + + return (
handleEngineClick(engine.id)} > -
+

@@ -45,45 +54,51 @@ const CardView: React.FC = ({ engines }) => {

-

- {engine.description} -

- -
- {Object.entries(engine.features).slice(0, 4).map(([key, feature]) => ( -
- - {FEATURE_NAMES[key as keyof QueryEngine['features']]} + {visibleVersions.map((entry) => ( +
+
+ + {entry.version.toUpperCase()} -
- - - {feature.support} - -
- ))} -
- -
-
- Support Score -
- - {calculateSupportScore(engine)} - - /32 +

+ {entry.description || 'N/A'} +

+
+ {Object.entries(entry.features).slice(0, 4).map(([key, feature]) => ( +
+ + {FEATURE_NAMES[key as keyof QueryEngine['features']]} + +
+ + + {feature?.support ?? 'N/A'} + +
+
+ ))} +
+
+ Score: {entry.score ?? calculateSupportScore(entry.features)}/32
-
-
-
+ ))} + +
+ Quick Compare +
- ))} + )})}
); }; diff --git a/src/components/Iceberg/ComparisonView.tsx b/src/components/Iceberg/ComparisonView.tsx index 31c6849e..2a6839e4 100644 --- a/src/components/Iceberg/ComparisonView.tsx +++ b/src/components/Iceberg/ComparisonView.tsx @@ -1,294 +1,142 @@ // components/Iceberg/ComparisonView.tsx import React from 'react'; -import { - CheckCircleIcon, - XCircleIcon, - ExclamationTriangleIcon, - ArrowTopRightOnSquareIcon, - ScaleIcon, - PlusIcon -} from '@heroicons/react/24/outline'; -import { QueryEngine } from '../../types/iceberg'; +import { XCircleIcon, ArrowTopRightOnSquareIcon, ScaleIcon, PlusIcon } from '@heroicons/react/24/outline'; +import { EngineVersionSelection, QueryEngine } from '../../types/iceberg'; import { FEATURE_NAMES, SUPPORT_WEIGHTS } from '../../data/constants/features'; -import { SUPPORT_BADGE_STYLES } from '../../data/constants/supportLevels'; -import CategoryBadge from './CategoryBadge'; import SupportIcon from './SupportIcon'; +import CategoryBadge from './CategoryBadge'; +import { buildEngineDetailUrl } from './versionState'; interface ComparisonViewProps { engines: QueryEngine[]; - selectedEngines: string[]; - onEngineSelect: (engineId: string, selected: boolean) => void; + selectedComparisons: EngineVersionSelection[]; + versionMode: 'v2' | 'v3'; + onEngineSelect: (selection: EngineVersionSelection, selected: boolean) => void; } -const ComparisonView: React.FC = ({ - engines, - selectedEngines, - onEngineSelect -}) => { +const ComparisonView: React.FC = ({ engines, selectedComparisons, versionMode, onEngineSelect }) => { const maxComparisons = 4; - const selectedEngineData = engines.filter(engine => selectedEngines.includes(engine.id)); - const availableEngines = engines.filter(engine => !selectedEngines.includes(engine.id)); - - const calculateSupportScore = (engine: QueryEngine): number => { - return Object.values(engine.features).reduce( - (score, feature) => score + SUPPORT_WEIGHTS[feature.support], 0 - ); - }; - - const handleEngineClick = (engineId: string) => { - window.open(`/iceberg/query-engine/${engineId}`, '_blank'); - }; - - const getComparisonColor = (support: string) => { - switch (support) { - case 'full': return 'text-green-600 dark:text-green-400'; - case 'partial': return 'text-yellow-600 dark:text-yellow-400'; - case 'preview': return 'text-blue-600 dark:text-blue-400'; - case 'none': return 'text-gray-400 dark:text-gray-500'; - default: return 'text-gray-600 dark:text-gray-400'; - } - }; - - if (selectedEngines.length === 0) { - return ( -
- {/* Selection Instructions */} -
- -

- Select Engines to Compare -

-

- Choose up to {maxComparisons} query engines to compare their features, capabilities, and support levels side-by-side. -

-
+ const options = engines.map((engine) => ({ engine, version: versionMode })); + const selected = selectedComparisons + .map((item) => { + const engine = engines.find((entry) => entry.name === item.engine); + if (!engine) return null; + const data = engine.versions?.[item.version]; + return { + key: `${engine.id}-${item.version}`, + engine, + version: item.version, + features: data?.features ?? engine.features, + description: data?.description ?? engine.description ?? 'N/A', + score: data?.score + }; + }) + .filter(Boolean) as Array<{ key: string; engine: QueryEngine; version: 'v2' | 'v3'; features: QueryEngine['features']; description: string; score?: number | null }>; + + const score = (features: QueryEngine['features']) => Object.values(features).reduce((sum, f) => sum + (SUPPORT_WEIGHTS[f?.support] ?? 0), 0); + const featureKeys = Object.keys(selected[0]?.features ?? {}); + const selectedKeys = new Set(selected.map((item) => item.key)); + const available = options.filter((option) => !selectedKeys.has(`${option.engine.id}-${option.version}`)); + const canAddMore = selected.length < maxComparisons; - {/* Engine Selection Grid */} -
- {engines.map((engine) => ( -
onEngineSelect(engine.id, true)} - > -
-
-

- {engine.name} -

- -
- -
- -

- {engine.description} -

- -
-
- Support Score: {calculateSupportScore(engine)}/32 -
-
-
-
-
-
- ))} -
+ return ( +
+
+ +

Select Engines to Compare ({versionMode.toUpperCase()})

+

{selected.length}/{maxComparisons} selected

- ); - } - return ( -
- {/* Selected Engines Summary */} -
-
-

- Comparing {selectedEngines.length} Engine{selectedEngines.length > 1 ? 's' : ''} -

- -
- {selectedEngineData.map((engine) => ( +
+

Selected Engines

+ {selected.length === 0 ? ( +

No engines selected yet. Pick engines from the Available Engines section below.

+ ) : ( +
+ {selected.map((item) => ( ))}
-
- - {/* Quick Add More */} - {selectedEngines.length < maxComparisons && availableEngines.length > 0 && ( -
-

- Add more engines (up to {maxComparisons}): -

-
- {availableEngines.slice(0, 16).map((engine) => ( - - ))} -
-
)}
- {/* Comparison Table */} -
-
+
+
+

Available Engines

+ {!canAddMore && Maximum 4 engines selected} +
+
+ {available.map((option) => ( + + ))} + {available.length === 0 && ( +

All engines are already selected for this version.

+ )} +
+
+ + {selected.length > 0 && ( +
- {/* Header */} - - - {selectedEngineData.map((engine) => ( - + + {selected.map((item) => ( + ))} - - {/* Body */} - - {/* Basic Info Row */} - - - {selectedEngineData.map((engine) => ( - - ))} - - - {/* Website Row */} - - - {selectedEngineData.map((engine) => ( - - ))} - - - {/* Feature Comparison Rows */} - {Object.keys(selectedEngineData[0]?.features || {}).map((feature) => ( - - - {selectedEngineData.map((engine) => { - const featureData = engine.features[feature as keyof QueryEngine['features']]; + + {featureKeys.map((feature) => ( + + + {selected.map((item) => { + const value = item.features?.[feature as keyof QueryEngine['features']]; return ( - ); })} ))} - - {/* Best Practices Row */} - - - {selectedEngineData.map((engine) => ( - - ))} + + + {selected.map((item) => )}
- - Features - - -
- - -
- Score: {calculateSupportScore(engine)}/32 -
-
+
Feature + +
Score: {item.score ?? score(item.features)}/32
- Description - -

- {engine.description} -

-
- Website - - - Visit Site - - -
- {FEATURE_NAMES[feature as keyof QueryEngine['features']]} -
{FEATURE_NAMES[feature as keyof QueryEngine['features']]} -
-
- -
-
- {featureData.support} -
-
- {featureData.details} -
-
+
+
+
{value?.support ?? 'N/A'}
+
{value?.details ?? 'N/A'}
- Best Practices - -
    - {engine.bestPractices.slice(0, 3).map((practice, idx) => ( -
  • - - {practice} -
  • - ))} - {engine.bestPractices.length > 3 && ( -
  • - +{engine.bestPractices.length - 3} more... -
  • - )} -
-
Description{item.description || 'N/A'}
-
- - {/* Detailed View Links */} -
-

- View Detailed Information -

-
- {selectedEngineData.map((engine) => ( - - ))} -
-
+ )}
); }; diff --git a/src/components/Iceberg/FeatureView.tsx b/src/components/Iceberg/FeatureView.tsx index 80599192..781c60d3 100644 --- a/src/components/Iceberg/FeatureView.tsx +++ b/src/components/Iceberg/FeatureView.tsx @@ -6,16 +6,28 @@ import { SUPPORT_BADGE_STYLES } from '../../data/constants/supportLevels'; import { LAYOUT, STYLES } from '../../data/constants/ui'; import SupportIcon from './SupportIcon'; import CategoryBadge from './CategoryBadge'; +import { buildEngineDetailUrl } from './versionState'; interface FeatureViewProps { engines: QueryEngine[]; + versionMode: 'v2' | 'v3'; } -const FeatureView: React.FC = ({ engines }) => { +const FeatureView: React.FC = ({ engines, versionMode }) => { const handleEngineClick = (engineId: string) => { - window.open(`/iceberg/query-engine/${engineId}`, '_blank'); + window.open(buildEngineDetailUrl(engineId, versionMode), '_blank'); }; + const getSupport = (engine: QueryEngine, feature: keyof QueryEngine['features'], version: 'v2' | 'v3') => + (version === 'v2' + ? engine.versions?.v2?.features?.[feature]?.support + : engine.versions?.v3?.features?.[feature]?.support) ?? 'none'; + + const getDetails = (engine: QueryEngine, feature: keyof QueryEngine['features'], version: 'v2' | 'v3') => + (version === 'v2' + ? engine.versions?.v2?.features?.[feature]?.details + : engine.versions?.v3?.features?.[feature]?.details) ?? 'N/A'; + return (
{FEATURE_FOCUS_LIST.map((feature) => ( @@ -32,37 +44,64 @@ const FeatureView: React.FC = ({ engines }) => {
- {engines - .filter(engine => engine.features[feature].support !== 'none') - .map((engine) => ( -
handleEngineClick(engine.id)} - > -
-
-

- {engine.name} -

+ {(() => { + const enginesForFeature = engines + .filter((engine) => { + const support = getSupport(engine, feature, versionMode); + return support !== 'none' && support !== 'unknown' && support !== ''; + }) + .map((engine) => { + const v2Support = getSupport(engine, feature, 'v2'); + const v3Support = getSupport(engine, feature, 'v3'); + const activeSupport = versionMode === 'v2' ? v2Support : v3Support; + return ( +
handleEngineClick(engine.id)} + > +
+
+
+

+ {engine.name} +

+
+ + {versionMode.toUpperCase()} + +
+
+
+
+ + + {activeSupport} + +
-
- - - {engine.features[feature].support} - + +
+
+ +

+ {getDetails(engine, feature, versionMode)} +

- -
- + ); + }); + + if (enginesForFeature.length === 0) { + return ( +
+ No engines currently show support for this feature in {versionMode.toUpperCase()}.
- -

- {engine.features[feature].details} -

-
- ))} + ); + } + + return enginesForFeature; + })()}
))} diff --git a/src/components/Iceberg/FilterControls.tsx b/src/components/Iceberg/FilterControls.tsx index 610b05b3..ec3b46ab 100644 --- a/src/components/Iceberg/FilterControls.tsx +++ b/src/components/Iceberg/FilterControls.tsx @@ -34,9 +34,9 @@ const FilterControls: React.FC = ({ }; return ( -
+
{/* Search and Category Filter */} -
+
{/* Search Input */}
@@ -71,7 +71,7 @@ const FilterControls: React.FC = ({
{/* Results Count and Clear Filters */} -
+
Showing {resultsCount} of{' '} diff --git a/src/components/Iceberg/IcebergQueryEngines.tsx b/src/components/Iceberg/IcebergQueryEngines.tsx index 2a7a32c2..5fef09b8 100644 --- a/src/components/Iceberg/IcebergQueryEngines.tsx +++ b/src/components/Iceberg/IcebergQueryEngines.tsx @@ -1,41 +1,76 @@ -import React, { useState, useMemo } from 'react'; +import React, { useEffect, useMemo, useState } from 'react'; import { MagnifyingGlassIcon, ScaleIcon, ChartBarIcon, FunnelIcon } from '@heroicons/react/24/outline'; -import { QueryEngine, FilterOptions, ViewType } from '../../types/iceberg'; -import { QUERY_ENGINES } from '../../data/query-engines'; +import { QueryEngine, FilterOptions, ViewType, EngineVersion, EngineVersionSelection, VersionMode } from '../../types/iceberg'; +import { engines as QUERY_ENGINES } from '../../data/query-engines'; import FilterControls from './FilterControls'; import ViewTabs from './ViewTabs'; import TableView from './TableView'; import CardView from './CardView'; import FeatureView from './FeatureView'; import ComparisonView from './ComparisonView'; +import { ResolvedEngineView } from './versionedTypes'; +import { getVersionFromUrl, getPersistedVersion, persistVersion } from './versionState'; interface IcebergQueryEnginesProps { fullWidth?: boolean; showComparison?: boolean; maxEngines?: number; defaultView?: ViewType; + defaultVersion?: 'v2' | 'v3'; } const IcebergQueryEngines: React.FC = ({ fullWidth = true, showComparison = true, maxEngines, - defaultView = 'table' + defaultView = 'table', + defaultVersion = 'v3' }) => { const [viewType, setViewType] = useState(defaultView); const [isComparisonMode, setIsComparisonMode] = useState(false); - const [selectedEngines, setSelectedEngines] = useState([]); + const initialVersion = getVersionFromUrl(typeof window !== 'undefined' ? window.location.search : '') ?? getPersistedVersion() ?? defaultVersion; + const [versionMode, setVersionMode] = useState(initialVersion); + const [selectedComparisons, setSelectedComparisons] = useState([]); const [filters, setFilters] = useState({ searchTerm: '', category: 'all' }); - // Filtered engines + const getFallbackFeatures = (engine: QueryEngine): QueryEngine['features'] => + engine.versions?.v3?.features ?? engine.features; + + const resolveEngineForVersion = (engine: QueryEngine, version: EngineVersion): ResolvedEngineView => { + const versionData = engine.versions?.[version]; + const fallbackFeatures = getFallbackFeatures(engine); + const features = versionData?.features ?? fallbackFeatures; + const score = + typeof versionData?.score === 'number' + ? versionData.score + : Object.values(features).reduce((sum, feature) => sum + (feature?.support === 'full' ? 4 : feature?.support === 'partial' ? 2 : feature?.support === 'preview' ? 1 : 0), 0); + + return { + key: `${engine.id}-${version}`, + id: engine.id, + name: `${engine.name} (${version.toUpperCase()})`, + baseName: engine.name, + category: engine.category, + website: engine.website, + documentation: engine.documentation, + quickStart: engine.quickStart, + bestPractices: engine.bestPractices, + version, + features, + description: versionData?.description ?? engine.description ?? 'N/A', + score: versionData?.score ?? score + }; + }; + + // Filtered base engines const filteredEngines = useMemo(() => { if (!QUERY_ENGINES || QUERY_ENGINES.length === 0) { return []; @@ -63,6 +98,21 @@ const IcebergQueryEngines: React.FC = ({ return engines; }, [filters, maxEngines]); + const resolvedEngines = useMemo( + () => filteredEngines.map((engine) => resolveEngineForVersion(engine, versionMode)), + [filteredEngines, versionMode] + ); + + useEffect(() => { + persistVersion(versionMode); + if (typeof window !== 'undefined') { + const url = new URL(window.location.href); + url.searchParams.set('version', versionMode); + window.history.replaceState({}, '', `${url.pathname}${url.search}${url.hash}`); + } + setSelectedComparisons((prev) => prev.filter((item) => item.version === versionMode)); + }, [versionMode]); + // JSON-LD structured data for SEO const structuredData = { "@context": "https://schema.org", @@ -81,17 +131,29 @@ const IcebergQueryEngines: React.FC = ({ } }; - const handleEngineSelection = (engineId: string, selected: boolean) => { - setSelectedEngines(prev => - selected - ? [...prev, engineId] - : prev.filter(id => id !== engineId) - ); + const handleEngineSelection = (selection: EngineVersionSelection, selected: boolean) => { + setSelectedComparisons((prev) => { + const exists = prev.some( + (item) => item.engine === selection.engine && item.version === selection.version + ); + if (selected && !exists) { + if (prev.length >= 4) { + return prev; + } + return [...prev, selection]; + } + if (!selected) { + return prev.filter( + (item) => !(item.engine === selection.engine && item.version === selection.version) + ); + } + return prev; + }); }; const toggleComparisonMode = () => { setIsComparisonMode(!isComparisonMode); - setSelectedEngines([]); + setSelectedComparisons([]); }; const renderContent = () => { @@ -99,7 +161,8 @@ const IcebergQueryEngines: React.FC = ({ return ( ); @@ -109,10 +172,11 @@ const IcebergQueryEngines: React.FC = ({ case 'table': return ( ); @@ -120,24 +184,26 @@ const IcebergQueryEngines: React.FC = ({ return ( { + setIsComparisonMode(true); + setSelectedComparisons([{ engine: engineName, version: versionMode }]); + }} /> ); case 'features': return ( ); default: return ( ); @@ -195,7 +261,7 @@ const IcebergQueryEngines: React.FC = ({
- {selectedEngines.length} selected for comparison + {selectedComparisons.length} selected for comparison
@@ -203,12 +269,30 @@ const IcebergQueryEngines: React.FC = ({
+
+
+ {(['v2', 'v3'] as const).map((mode) => ( + + ))} +
+
+ {/* Enhanced Filter Controls */}
@@ -249,8 +333,8 @@ const IcebergQueryEngines: React.FC = ({ Comparison Mode Active

- Select engines from the list below to compare their features side-by-side. - You can select up to 4 engines for detailed comparison. + Select engines from the list below to compare their features side-by-side. + You can select up to 4 engines for {versionMode.toUpperCase()} comparison.

@@ -264,7 +348,7 @@ const IcebergQueryEngines: React.FC = ({
{/* No Results */} - {filteredEngines.length === 0 && QUERY_ENGINES.length > 0 && ( + {resolvedEngines.length === 0 && QUERY_ENGINES.length > 0 && (
diff --git a/src/components/Iceberg/QueryEngineLayout.tsx b/src/components/Iceberg/QueryEngineLayout.tsx index 2a7ff1c8..b6a1e686 100644 --- a/src/components/Iceberg/QueryEngineLayout.tsx +++ b/src/components/Iceberg/QueryEngineLayout.tsx @@ -1,8 +1,11 @@ // src/components/Iceberg/QueryEngineLayout.tsx -import React from 'react'; +import React, { useEffect, useMemo, useState } from 'react'; import Head from '@docusaurus/Head'; import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; import { useLocation } from '@docusaurus/router'; +import { engines } from '../../data/query-engines'; +import { VersionMode } from '../../types/iceberg'; +import { getPersistedVersion, getVersionFromUrl, persistVersion } from './versionState'; import { FeatureCard, FeatureCardProps } from './FeatureCard'; import { InteractiveTable, InteractiveTableProps } from './InteractiveTable'; import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; @@ -100,6 +103,21 @@ export const QueryEngineLayout: React.FC = ({ const location = useLocation(); const siteUrl = siteConfig?.url?.replace(/\/$/, '') || SITE_URL; const canonicalUrl = `${siteUrl}${location.pathname}`.replace(/\/$/, '') || `${siteUrl}${location.pathname}`; + const engineId = location.pathname.split('/').filter(Boolean).pop() || ''; + const engineData = useMemo(() => engines.find((engine) => engine.id === engineId), [engineId]); + const [versionMode, setVersionMode] = useState( + getVersionFromUrl(location.search) ?? getPersistedVersion() ?? 'v3' + ); + const selectedVersionData = versionMode === 'v2' ? engineData?.versions?.v2 : engineData?.versions?.v3; + + useEffect(() => { + persistVersion(versionMode); + if (typeof window !== 'undefined') { + const url = new URL(window.location.href); + url.searchParams.set('version', versionMode); + window.history.replaceState({}, '', `${url.pathname}${url.search}${url.hash}`); + } + }, [versionMode]); const pageTitleStr = schemaTitle ?? (typeof title === 'string' ? title : 'Query Engine'); const pageDescriptionStr = schemaDescription ?? (typeof description === 'string' ? description : 'Apache Iceberg query engine documentation.'); @@ -204,23 +222,48 @@ export const QueryEngineLayout: React.FC = ({ ))}
-
+
{/* Header with gradient background */} -
+
-

+

{title}

-

- {description} +

+ {selectedVersionData?.description || description}

+
+ {(['v2', 'v3'] as const).map((mode) => ( + + ))} +
+ {engineData && ( +
+

Version Snapshot ({versionMode.toUpperCase()})

+
+ {Object.entries(selectedVersionData?.features ?? {}).slice(0, 8).map(([key, feature]) => ( +
+
{key}
+
{feature?.support ?? 'N/A'}
+
+ ))} +
+
+ )} + {/* Feature Cards Section */}
diff --git a/src/components/Iceberg/TableView.tsx b/src/components/Iceberg/TableView.tsx index a3eedc8f..68884b4f 100644 --- a/src/components/Iceberg/TableView.tsx +++ b/src/components/Iceberg/TableView.tsx @@ -1,270 +1,113 @@ -import React from 'react'; -import { ArrowTopRightOnSquareIcon } from '@heroicons/react/24/outline'; -import { QueryEngine } from '../../types/iceberg'; +import React, { useState } from 'react'; +import { EngineVersionSelection } from '../../types/iceberg'; import { FEATURE_SHORT_NAMES, FEATURE_NAMES, SUPPORT_WEIGHTS } from '../../data/constants/features'; -import { SUPPORT_BADGE_STYLES } from '../../data/constants/supportLevels'; -import { STYLES, ANIMATIONS, TOOLTIP } from '../../data/constants/ui'; +import { STYLES } from '../../data/constants/ui'; import SupportIcon from './SupportIcon'; import CategoryBadge from './CategoryBadge'; +import { ResolvedEngineView } from './versionedTypes'; +import { buildEngineDetailUrl } from './versionState'; interface TableViewProps { - engines: QueryEngine[]; + engines: ResolvedEngineView[]; fullWidth?: boolean; selectionMode?: boolean; - selectedEngines?: string[]; - onEngineSelect?: (engineId: string, selected: boolean) => void; + selectedComparisons?: EngineVersionSelection[]; + onEngineSelect?: (selection: EngineVersionSelection, selected: boolean) => void; } -const TableView: React.FC = ({ - engines, - fullWidth = false, - selectionMode = false, - selectedEngines = [], - onEngineSelect -}) => { - const calculateSupportScore = (engine: QueryEngine): number => { - return Object.values(engine.features).reduce( - (score, feature) => score + SUPPORT_WEIGHTS[feature.support], 0 - ); - }; +const TableView: React.FC = ({ engines, fullWidth = false, selectionMode = false, selectedComparisons = [], onEngineSelect }) => { + const [hoveredCell, setHoveredCell] = useState<{ row: number; col: string } | null>(null); - const handleEngineClick = (engineId: string) => { - if (selectionMode && onEngineSelect) { - const isSelected = selectedEngines.includes(engineId); - onEngineSelect(engineId, !isSelected); - } else { - window.open(`/iceberg/query-engine/${engineId}`, '_self'); - } - }; - - // Safety check for empty engines array - if (!engines || engines.length === 0) { - return ( -
-

No engines to display

-
- ); + if (!engines.length) { + return
No engines to display
; } - - const sampleEngine = engines[0]; - const featureKeys = sampleEngine ? Object.keys(sampleEngine.features) : []; + const featureKeys = Object.keys(engines[0].features); + const score = (e: ResolvedEngineView) => e.score ?? Object.values(e.features).reduce((s, f) => s + (SUPPORT_WEIGHTS[f?.support] ?? 0), 0); + const selected = (e: ResolvedEngineView) => selectedComparisons.some((x) => x.engine === e.baseName && x.version === e.version); return (
-
- {/* Mobile View */} -
-
- {engines.map((engine) => ( -
handleEngineClick(engine.id)} - > -
-
-
-

- {engine.name} -

- {selectionMode && ( -
- {selectedEngines.includes(engine.id) && ( - - - - )} -
- )} -
- -
- {!selectionMode && ( - - )} -
+
+ + + + {selectionMode && } + + {featureKeys.map((f) => ( + + ))} + + + + + {engines.map((engine, rowIndex) => ( + + {selectionMode && ( + + )} + + {featureKeys.map((f, colIndex) => { + const feature = engine.features[f as keyof typeof engine.features]; + const details = feature?.details; + const featureName = FEATURE_NAMES[f as keyof typeof engines[0]['features']]; + const isHovered = hoveredCell?.row === rowIndex && hoveredCell?.col === f; -

- {engine.description} -

+ const isLastCols = colIndex >= featureKeys.length - 2; + const isFirstCols = colIndex <= 1; + const isFirstRow = rowIndex === 0; - {/* Feature Summary for Mobile */} -
- {featureKeys.slice(0, 6).map((feature) => ( -
- - {FEATURE_SHORT_NAMES[feature as keyof QueryEngine['features']]} - - -
- ))} -
+ const hAlign = isLastCols ? 'right-0' : isFirstCols ? 'left-0' : 'left-1/2 -translate-x-1/2'; + const arrowHAlign = isLastCols ? 'right-3' : isFirstCols ? 'left-3' : 'left-1/2 -translate-x-1/2'; - {/* Score */} -
- Support Score -
- - {calculateSupportScore(engine)}/32 - -
-
-
-
-
-
- ))} - - + // Flip below for first row to avoid top-of-table clipping + const vPos = isFirstRow ? 'top-full pt-1' : 'bottom-full pb-1'; + const arrowVPos = isFirstRow ? 'top-[-4px]' : 'bottom-[-4px]'; - {/* Desktop View */} -
-
SelectQuery Engine + {FEATURE_SHORT_NAMES[f as keyof typeof engines[0]['features']]} + Score
+ onEngineSelect?.({ engine: engine.baseName, version: engine.version }, !selected(engine))} + /> + + +
- - - {selectionMode && ( - - )} - - {featureKeys.map((feature) => ( - - ))} - - - - - {engines.map((engine, index) => ( - handleEngineClick(engine.id)} - > - {selectionMode && ( - - )} - + return ( + - ))} - - - - ))} - -
- - Select - - -
- - Query Engine - -
-
-
- - {FEATURE_SHORT_NAMES[feature as keyof QueryEngine['features']]} - -
- {FEATURE_NAMES[feature as keyof QueryEngine['features']]} -
-
-
-
- - Score - -
-
- {selectedEngines.includes(engine.id) && ( - - - - )} -
-
-
-
-
-

- {engine.name} -

- {!selectionMode && ( - - )} -
-
- -
-

- {engine.description} -

-
-
-
details && setHoveredCell({ row: rowIndex, col: f })} + onMouseLeave={() => setHoveredCell(null)} + > +
+ - {featureKeys.map((feature) => ( -
-
- -
-
- {FEATURE_NAMES[feature as keyof QueryEngine['features']]} -
-
- {engine.features[feature as keyof QueryEngine['features']].details} + {isHovered && details && ( +
+
+
+

{featureName}

+

{details}

+
-
-
+ )}
-
-
- {calculateSupportScore(engine)} -
-
- / 32 -
-
-
-
-
-
-
+ ); + })} + {score(engine)}/32 + + ))} + +
); }; -export default TableView; \ No newline at end of file +export default TableView; diff --git a/src/components/Iceberg/versionState.ts b/src/components/Iceberg/versionState.ts new file mode 100644 index 00000000..2c2e8011 --- /dev/null +++ b/src/components/Iceberg/versionState.ts @@ -0,0 +1,25 @@ +import { VersionMode } from '../../types/iceberg'; + +const STORAGE_KEY = 'iceberg_query_engine_version'; + +export const normalizeVersion = (value?: string | null): VersionMode => (value === 'v2' ? 'v2' : 'v3'); + +export const getVersionFromUrl = (search?: string): VersionMode | null => { + if (!search) return null; + const params = new URLSearchParams(search); + const version = params.get('version'); + return version === 'v2' || version === 'v3' ? version : null; +}; + +export const getPersistedVersion = (): VersionMode | null => { + if (typeof window === 'undefined') return null; + return normalizeVersion(window.localStorage.getItem(STORAGE_KEY)); +}; + +export const persistVersion = (version: VersionMode): void => { + if (typeof window === 'undefined') return; + window.localStorage.setItem(STORAGE_KEY, version); +}; + +export const buildEngineDetailUrl = (engineId: string, version: VersionMode): string => + `/iceberg/query-engine/${engineId}?version=${version}`; diff --git a/src/components/Iceberg/versionedTypes.ts b/src/components/Iceberg/versionedTypes.ts new file mode 100644 index 00000000..719d3d45 --- /dev/null +++ b/src/components/Iceberg/versionedTypes.ts @@ -0,0 +1,17 @@ +import { EngineVersion, QueryEngine } from '../../types/iceberg'; + +export interface ResolvedEngineView { + key: string; + id: string; + name: string; + baseName: string; + category: QueryEngine['category']; + website: string; + documentation: string; + quickStart: string; + bestPractices: string[]; + version: EngineVersion; + features: QueryEngine['features']; + description: string; + score: number | null; +} diff --git a/src/data/query-engines/athena.ts b/src/data/query-engines/athena.ts index 7992c19f..186d1841 100644 --- a/src/data/query-engines/athena.ts +++ b/src/data/query-engines/athena.ts @@ -1,7 +1,8 @@ // data/query-engines/athena.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const athena: QueryEngine = { +export const athena: QueryEngine = createVersionedEngine({ id: 'athena', name: 'Amazon Athena (Engine v3)', description: 'Serverless AWS-native query engine with complete DML operations, Lake Formation governance, time travel, and deep AWS ecosystem integration for Iceberg tables', @@ -73,7 +74,7 @@ export const athena: QueryEngine = { }, formatV3: { support: 'none', - details: 'Not yet supported; Athena uses Iceberg 1.2.x libraries, spec v3 features (DV, lineage) not available. Creates/writes only spec v2 tables', + details: 'Not yet supported; Athena uses Iceberg 1.4.2 libraries; spec v3 features (deletion vectors, row lineage) not available. Creates/writes only spec v2 tables', externalLinks: [ { label: 'Query Apache Iceberg Tables', @@ -151,5 +152,65 @@ OPTIMIZE iceberg_sales_data REWRITE DATA;`, 'Consider millisecond timestamp precision limitations when designing schemas', 'Use schema evolution (ADD/DROP/RENAME COLUMNS) for metadata-only table changes', 'Plan external ingestion strategy as Athena provides no streaming or CDC capabilities' - ] -}; \ No newline at end of file + ], + versions: { + v2: { + features: { + catalogs: { + support: 'partial', + details: 'Only AWS Glue Data Catalog supported for Iceberg. Hive, REST, Nessie, or JDBC catalogs not recognized. Polaris and Unity Catalog accessible via Glue Catalog Federation (read-only)', + externalLinks: [{ label: 'AWS Glue Data Catalog', url: 'https://docs.aws.amazon.com/athena/latest/ug/data-sources-glue.html' }] + }, + readWrite: { + support: 'full', + details: 'SELECT, CREATE TABLE STORED AS ICEBERG, CTAS, INSERT INTO. All writes create new snapshots and become immediately queryable', + externalLinks: [{ label: 'Create Iceberg Tables', url: 'https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html' }] + }, + dml: { + support: 'full', + details: 'INSERT INTO, UPDATE, DELETE, and MERGE INTO supported via Athena Engine v3. UPDATE/DELETE/MERGE write position-delete files (MoR) for row-level changes', + externalLinks: [{ label: 'MERGE INTO Operations', url: 'https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-merge-into.html' }] + }, + morCow: { + support: 'full', + details: 'Supports merge-on-read for both position and equality deletes. Copy-on-write is the default write mode; however MERGE and DELETE always use MoR regardless of table properties', + externalLinks: [{ label: 'Apache Iceberg on AWS Guide', url: 'https://docs.aws.amazon.com/prescriptive-guidance/latest/apache-iceberg-on-aws/iceberg-athena.html' }] + }, + streaming: { + support: 'none', + details: 'No built-in streaming ingestion or CDC APIs. External tools (Glue ETL, Flink) must land data in Iceberg; Athena queries latest committed snapshot' + }, + formatV3: { + support: 'none', + details: 'Iceberg Format V3 features are not applicable to V2 tables.' + }, + timeTravel: { + support: 'full', + details: 'FOR TIMESTAMP AS OF and FOR VERSION AS OF clauses let you query historical snapshots with millisecond precision', + externalLinks: [{ label: 'Athena Iceberg Tutorial', url: 'https://aws-sdk-pandas.readthedocs.io/en/3.3.0/tutorials/039%20-%20Athena%20Iceberg.html' }] + }, + security: { + support: 'full', + details: 'Access enforced through IAM plus AWS Lake Formation policies (column-, row-, and cell-level). Lake Formation filters govern metadata table visibility', + externalLinks: [{ label: 'Lake Formation Fine-grained Access', url: 'https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-table-data.html' }] + } + }, + score: 22, + description: 'Athena Engine v3 provides full Iceberg V2 support with complete DML operations, time travel, and deep AWS ecosystem integration — V3 format is not supported' + }, + v3: { + features: { + catalogs: { support: 'none', details: 'Athena does not support Iceberg V3 format tables' }, + readWrite: { support: 'none', details: 'Athena does not support Iceberg V3 format tables' }, + dml: { support: 'none', details: 'Athena does not support Iceberg V3 format tables' }, + morCow: { support: 'none', details: 'Athena does not support Iceberg V3 format tables' }, + streaming: { support: 'none', details: 'Athena does not support Iceberg V3 format tables' }, + formatV3: { support: 'none', details: 'Athena uses Iceberg 1.2.x libraries; spec V3 features (deletion vectors, row lineage) are not available. Tables are created as spec V2 only' }, + timeTravel: { support: 'none', details: 'Athena does not support Iceberg V3 format tables' }, + security: { support: 'none', details: 'Athena does not support Iceberg V3 format tables' } + }, + score: 0, + description: 'Athena does not support Iceberg V3 format tables. All Athena Iceberg operations are limited to spec V2' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/bigquery.ts b/src/data/query-engines/bigquery.ts index 648a9da9..85ed1995 100644 --- a/src/data/query-engines/bigquery.ts +++ b/src/data/query-engines/bigquery.ts @@ -1,7 +1,8 @@ // data/query-engines/bigquery.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const bigquery: QueryEngine = { +export const bigquery: QueryEngine = createVersionedEngine({ id: 'bigquery', name: 'Google BigQuery', description: 'Serverless Google Cloud data warehouse with managed Iceberg tables, automatic optimization, Storage Write API streaming, and deep GCP ecosystem integration', @@ -10,7 +11,7 @@ export const bigquery: QueryEngine = { documentation: 'https://cloud.google.com/bigquery/docs/iceberg-tables', features: { catalogs: { - support: 'partial', + support: 'none', details: 'BigQuery-managed Iceberg (internal catalog) and BigLake external Iceberg (Dataplex, HMS, AWS Glue via GCS). No direct REST/Nessie support', externalLinks: [ { @@ -63,7 +64,7 @@ export const bigquery: QueryEngine = { }, streaming: { support: 'partial', - details: 'High-throughput streaming via Storage Write API (Preview) - Dataflow, Beam, Spark. No built-in CDC apply; use Datastream + Dataflow patterns', + details: 'High-throughput streaming via Storage Write API; CDC ingestion available (preview) via Datastream BigLake Iceberg destination and Storage Write API UPSERT mode (_CHANGE_SEQUENCE_NUMBER)', externalLinks: [ { label: 'Storage Write API Streaming', @@ -137,6 +138,67 @@ WHEN NOT MATCHED THEN -- Time travel query SELECT * FROM iceberg_dataset.sales_data FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR);`, + versions: { + v2: { + features: { + catalogs: { + support: 'none', + details: 'No native support for external Iceberg catalogs like Hive Metastore, AWS Glue, REST, Nessie, Polaris, Unity Catalog, Hadoop, or JDBC. BigQuery relies on BigLake Metastore and managed catalog abstractions instead', + externalLinks: [{ label: 'BigQuery Managed Iceberg Tables', url: 'https://cloud.google.com/blog/products/data-analytics/announcing-bigquery-tables-for-apache-iceberg' }] + }, + readWrite: { + support: 'full', + details: 'Full read and write support for BigLake Iceberg tables including INSERT, MERGE, UPDATE, and DELETE via GoogleSQL', + externalLinks: [{ label: 'BigQuery Iceberg DML Operations', url: 'https://cloud.google.com/bigquery/docs/iceberg-tables#dml' }] + }, + dml: { + support: 'full', + details: 'MERGE, UPDATE, DELETE fully supported through GoogleSQL with transactional guarantees', + externalLinks: [{ label: 'Data Manipulation Language DML', url: 'https://cloud.google.com/bigquery/docs/iceberg-tables#dml' }] + }, + morCow: { + support: 'partial', + details: 'Copy-on-write is fully supported, while merge-on-read, position deletes, and equality deletes are only partially supported and abstracted from the user', + externalLinks: [{ label: 'Automatic Storage Optimization', url: 'https://cloud.google.com/blog/products/data-analytics/announcing-bigquery-tables-for-apache-iceberg' }] + }, + streaming: { + support: 'partial', + details: 'Streaming ingestion via Storage Write API (GA); CDC available in preview via Datastream BigLake Iceberg destination and Storage Write API UPSERT (_CHANGE_SEQUENCE_NUMBER)', + externalLinks: [{ label: 'Storage Write API Streaming', url: 'https://cloud.google.com/blog/products/data-analytics/announcing-bigquery-tables-for-apache-iceberg' }] + }, + formatV3: { + support: 'none', + details: 'Streaming ingestion supported via Storage Write API, but CDC-style updates are limited.' + }, + timeTravel: { + support: 'full', + details: 'Full snapshot-based time travel support for querying historical versions of data.', + externalLinks: [{ label: 'Time Travel for Historical Data', url: 'https://cloud.google.com/bigquery/docs/iceberg-tables#time_travel' }] + }, + security: { + support: 'full', + details: 'IAM permissions like native BigQuery tables. Column-level security & masking on managed Iceberg. External via BigLake/Dataplex policy tags', + externalLinks: [{ label: 'Column-level Security and Data Masking', url: 'https://cloud.google.com/bigquery/docs/iceberg-tables#security' }] + } + }, + score: 20, + description: 'BigQuery supports full Iceberg V2 operations with managed tables, automatic optimization, and deep GCP ecosystem integration — V3 format is not supported' + }, + v3: { + features: { + catalogs: { support: 'none', details: 'BigQuery only supports Iceberg V2 snapshot export; V3 is not supported' }, + readWrite: { support: 'none', details: 'BigQuery only exports Iceberg V2 snapshots; V3 read/write support is not available' }, + dml: { support: 'none', details: 'BigQuery only supports Iceberg V2 snapshot export; V3 is not supported' }, + morCow: { support: 'none', details: 'BigQuery only supports Iceberg V2 snapshot export; V3 is not supported' }, + streaming: { support: 'none', details: 'BigQuery only supports Iceberg V2; V3 streaming not supported' }, + formatV3: { support: 'none', details: 'BigQuery only supports Iceberg V2 snapshot export; V3 format is not supported. Only Iceberg V2 format is supported for snapshot export' }, + timeTravel: { support: 'none', details: 'BigQuery only supports Iceberg V2 snapshot export; V3 time travel is not available' }, + security: { support: 'none', details: 'BigQuery only supports Iceberg V2 snapshot export; V3 is not supported' } + }, + score: 0, + description: 'BigQuery does not support Iceberg V3 format tables. All BigQuery Iceberg operations are limited to spec V2' + } + }, bestPractices: [ 'Enable Iceberg Tables Preview or BigLake Iceberg in your GCP project for access', 'Use managed Iceberg tables for full DML capabilities and automatic optimization', @@ -159,4 +221,4 @@ FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR);`, 'Leverage end-to-end lineage through Dataplex integration', 'Use external table writes via Dataflow/Spark when BigQuery-native DML is insufficient' ] -}; \ No newline at end of file +}); \ No newline at end of file diff --git a/src/data/query-engines/clickhouse.ts b/src/data/query-engines/clickhouse.ts index a59317a7..50927e9c 100644 --- a/src/data/query-engines/clickhouse.ts +++ b/src/data/query-engines/clickhouse.ts @@ -1,10 +1,11 @@ // data/query-engines/clickhouse.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const clickhouse: QueryEngine = { +export const clickhouse: QueryEngine = createVersionedEngine({ id: 'clickhouse', - name: 'ClickHouse v25.4', - description: 'Rapidly evolving OLAP database with experimental Iceberg read support, time travel, REST catalogs, and comprehensive write capabilities planned for 2025', + name: 'ClickHouse v25.9', + description: 'Rapidly evolving OLAP database with Iceberg read+write support (v25.7+), time travel (v25.4+), REST catalogs, and basic DML (INSERT/ALTER DELETE/ALTER UPDATE) via LTS v25.8', category: 'general-purpose', website: 'https://clickhouse.com/', documentation: 'https://clickhouse.com/docs/en/engines/table-engines/integrations/iceberg', @@ -29,25 +30,25 @@ export const clickhouse: QueryEngine = { }, readWrite: { support: 'partial', - details: 'Read-only: ENGINE=Iceberg tables and icebergS3()/icebergCluster() functions; full SQL on Parquet files. Writes/compaction scheduled Q3 2025', + details: 'Full reads via ENGINE=Iceberg and icebergS3()/icebergCluster() functions. Write support added v25.7 (INSERT INTO existing tables), CREATE TABLE and DROP TABLE added v25.8 (LTS). No compaction yet', externalLinks: [ { - label: 'ClickHouse Release 25.04', - url: 'https://clickhouse.com/blog/clickhouse-release-25-04' + label: 'ClickHouse Release 25.08 (LTS)', + url: 'https://clickhouse.com/blog/clickhouse-release-25-08' }, { - label: 'Iceberg Write Support Tracking', - url: 'https://github.com/ClickHouse/ClickHouse/issues/71407' + label: 'ClickHouse 2025 Year in Review', + url: 'https://clickhouse.com/blog/clickhouse-2025-roundup' } ] }, dml: { - support: 'none', - details: 'Reading of position & equality deletes supported since 24.12; queries merge delete files on-the-fly (MoR). No DELETE/UPDATE/MERGE writers until write support lands', + support: 'partial', + details: 'INSERT INTO existing tables (v25.7+), CREATE TABLE (v25.8+), ALTER TABLE DELETE (positional+equality deletes, v25.8+), ALTER TABLE UPDATE (v25.9+). MERGE not yet supported', externalLinks: [ { - label: 'Delete Files Support', - url: 'https://clickhouse.com/blog/clickhouse-release-24-12' + label: 'ClickHouse Release 25.08', + url: 'https://clickhouse.com/blog/clickhouse-release-25-08' }, { label: 'DML Write Support Tracking', @@ -133,6 +134,65 @@ SETTINGS iceberg_timestamp_ms = 1640995200000; -- Use cluster function for distributed reads SELECT * FROM icebergCluster('cluster', 's3://bucket/warehouse/table/');`, + versions: { + v2: { + features: { + catalogs: { + support: 'full', + details: 'REST catalog (icebergS3 function with storage_catalog_type), AWS Glue, Polaris (partial), Unity Catalog (partial) supported via icebergS3/icebergLocal table functions and experimental DataLakeCatalog engine', + externalLinks: [{ label: 'Iceberg Engine Documentation', url: 'https://clickhouse.com/docs/en/engines/table-engines/integrations/iceberg' }] + }, + readWrite: { + support: 'partial', + details: 'Full reads via icebergS3/icebergAzure/icebergLocal table functions and ENGINE=Iceberg. INSERT INTO existing tables (v25.7+), CREATE TABLE and DROP TABLE (v25.8+, LTS). No compaction', + externalLinks: [{ label: 'ClickHouse Release 25.08', url: 'https://clickhouse.com/blog/clickhouse-release-25-08' }] + }, + dml: { + support: 'partial', + details: 'INSERT INTO (v25.7+), ALTER TABLE DELETE with positional and equality deletes (v25.8+), ALTER TABLE UPDATE (v25.9+). No MERGE support. Applies to V2 tables', + externalLinks: [{ label: 'ClickHouse Release 25.08', url: 'https://clickhouse.com/blog/clickhouse-release-25-08' }] + }, + morCow: { + support: 'partial', + details: 'Reads CoW and MoR (position+equality deletes) since v24.12; writes position/equality delete files for ALTER TABLE DELETE (v25.8+); no compaction', + externalLinks: [{ label: 'ClickHouse Release 25.08', url: 'https://clickhouse.com/blog/clickhouse-release-25-08' }] + }, + streaming: { + support: 'none', + details: 'No native streaming ingestion; users poll Iceberg or ingest with ClickHouse Kafka engine' + }, + formatV3: { + support: 'none', + details: 'Iceberg Format V3 features are not applicable to V2 tables.' + }, + timeTravel: { + support: 'full', + details: 'Time travel via SET iceberg_timestamp_ms= or iceberg_snapshot_id since v25.4; partition pruning via use_iceberg_partition_pruning=1', + externalLinks: [{ label: 'Time Travel in 25.4', url: 'https://clickhouse.com/blog/clickhouse-release-25-04' }] + }, + security: { + support: 'partial', + details: 'Relies on object-store credentials (AWS_ACCESS_KEY_ID, S3 V4 tokens) or catalog credential vending; ClickHouse RBAC controls database/table access; no column-masking yet' + } + }, + score: 16, + description: 'ClickHouse v25.8+ (LTS) supports Iceberg V2 read+write with INSERT INTO, CREATE/DROP TABLE, ALTER TABLE DELETE/UPDATE, time travel, and REST catalog support; MERGE and V3 not yet available' + }, + v3: { + features: { + catalogs: { support: 'none', details: 'ClickHouse only supports reading Iceberg V1 and V2 via table functions; V3 is not yet supported' }, + readWrite: { support: 'none', details: 'ClickHouse currently supports reading Iceberg V1 and V2 only; V3 support is not yet available' }, + dml: { support: 'none', details: 'Read-only integration for V2 only; V3 not supported' }, + morCow: { support: 'none', details: 'V3 deletion vectors not supported; only V1 and V2 tables readable' }, + streaming: { support: 'none', details: 'No streaming support for any version' }, + formatV3: { support: 'none', details: 'Iceberg Format V3 is not yet supported in ClickHouse; V3 reader/writer planned for a future release' }, + timeTravel: { support: 'none', details: 'Time travel only available for V1 and V2 tables; V3 not supported' }, + security: { support: 'none', details: 'V3 not supported; security features are V2 only' } + }, + score: 0, + description: 'ClickHouse does not yet support Iceberg V3 format tables. V3 reader/writer planned for a future release' + } + }, bestPractices: [ 'Use ClickHouse v25.4+ for time travel and metadata caching capabilities', 'Leverage REST catalog support (24.12+) for integration with Nessie, Polaris/Unity, and Glue', @@ -155,4 +215,4 @@ SELECT * FROM icebergCluster('cluster', 's3://bucket/warehouse/table/');`, 'Use materialized views for query acceleration on frequently accessed data', 'Monitor GitHub issues for rapid feature development and breaking changes' ] -}; \ No newline at end of file +}); \ No newline at end of file diff --git a/src/data/query-engines/databricks.ts b/src/data/query-engines/databricks.ts index d02d6b54..5ce010ad 100644 --- a/src/data/query-engines/databricks.ts +++ b/src/data/query-engines/databricks.ts @@ -1,36 +1,33 @@ // data/query-engines/databricks.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const databricks: QueryEngine = { +export const databricks: QueryEngine = createVersionedEngine({ id: 'databricks', - name: 'Databricks Runtime 14.3 LTS+', - description: 'UniForm technology enables multi-format lakehouse with read-only Iceberg views of Delta tables via Unity Catalog REST endpoint', + name: 'Databricks (DBR 16.4+)', + description: 'Native Iceberg support in Databricks Runtime 16.4+ with Unity Catalog. Full DML on managed Iceberg tables, deletion vectors (V3 Beta in DBR 17.3), and UniForm for multi-format lakehouse interoperability', category: 'lakehouse', website: 'https://databricks.com/', documentation: 'https://docs.databricks.com/aws/en/delta/uniform.html', features: { catalogs: { - support: 'partial', - details: 'Unity Catalog exposes Iceberg REST catalog at /api/2.1/unity-catalog/iceberg for external engines; UniForm tables generate Iceberg metadata on Delta commits', + support: 'full', + details: 'Unity Catalog implements the Iceberg REST Catalog API for both read and write on managed Iceberg tables. Hive Metastore and AWS Glue accessible via Lakehouse Federation (read-only). Nessie, Hadoop, JDBC not supported', externalLinks: [ { label: 'Unity Catalog Iceberg Endpoint', url: 'https://docs.databricks.com/aws/en/external-access/iceberg.html' }, { - label: 'Read Delta Tables with Iceberg Clients', - url: 'https://docs.databricks.com/aws/en/delta/uniform.html' + label: 'Full Apache Iceberg Support Announcement', + url: 'https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks' } ] }, readWrite: { - support: 'partial', - details: 'Full reads via REST catalog or direct metadata paths; writes supported for Managed Iceberg Tables via external engines, but UniForm Delta tables are read-only for Iceberg clients', + support: 'full', + details: 'Full read/write for native managed Iceberg V3 tables via Unity Catalog (DBR 18.0+ Public Preview). INSERT INTO, CREATE TABLE, and full DDL supported. External engines can read and write via REST catalog', externalLinks: [ - { - label: 'Read Delta Tables with Iceberg Clients', - url: 'https://docs.databricks.com/aws/en/delta/uniform.html' - }, { label: 'Full Apache Iceberg Support Announcement', url: 'https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks' @@ -38,52 +35,56 @@ export const databricks: QueryEngine = { ] }, dml: { - support: 'limited', - details: 'Full DML (INSERT, MERGE, UPDATE, DELETE) available for Delta users inside Databricks; Iceberg clients can only read through REST catalog', + support: 'full', + details: 'MERGE, UPDATE, DELETE fully supported for native managed Iceberg V3 tables with deletion vectors for efficient row-level changes (DBR 18.0+ Public Preview)', externalLinks: [ { - label: 'Read Delta Tables with Iceberg Clients - Limitations', - url: 'https://docs.databricks.com/aws/en/delta/uniform.html' + label: 'Unity Catalog Iceberg Endpoint', + url: 'https://docs.databricks.com/aws/en/external-access/iceberg.html' } ] }, morCow: { - support: 'limited', - details: 'Copy-on-Write (CoW) semantics for Delta commits; no Iceberg delete files produced; external readers see fully merged snapshots', + support: 'partial', + details: 'V3 uses deletion vectors (MoR-like) for efficient row-level deletes without rewriting data files. Copy-on-write also fully supported. V2 position/equality deletes not used — DVs replace them in V3', externalLinks: [ { - label: 'Unity Catalog Iceberg Endpoint - Notes', - url: 'https://docs.databricks.com/aws/en/external-access/iceberg.html' + label: 'Full Apache Iceberg Support Announcement', + url: 'https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks' } ] }, streaming: { - support: 'internal', - details: 'Spark Structured Streaming and Delta Change Data Feed available inside Databricks; Iceberg REST interface does not expose streaming ingestion or CDC endpoints', + support: 'none', + details: 'Change Data Feed (CDF) not supported on Iceberg tables — CDF is Delta Lake-only. Iceberg V3 row lineage provides CDC building blocks but CDC streaming is not directly exposed', externalLinks: [ { - label: 'Read Delta Tables - Streaming Limitations', + label: 'Read Delta Tables with Iceberg Clients', url: 'https://docs.databricks.com/aws/en/delta/uniform.html' } ] }, formatV3: { - support: 'none', - details: 'UniForm currently targets Iceberg spec v2 only; no public roadmap for v3 support announced yet', + support: 'partial', + details: 'Iceberg V3 Public Preview in DBR 18.0+. Supports deletion vectors, VARIANT columns, V3 table creation. Existing V2 tables can be upgraded. V2 position/equality deletes not supported — Databricks uses DVs (V3) exclusively', externalLinks: [ { - label: 'Full Apache Iceberg Support - V2 Preview', + label: 'Iceberg V3 in Databricks', + url: 'https://docs.databricks.com/aws/en/iceberg/iceberg-v3' + }, + { + label: 'Full Apache Iceberg Support Announcement', url: 'https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks' } ] }, timeTravel: { support: 'full', - details: 'External engines can time-travel by snapshot-ID or timestamp using standard Iceberg syntax; includes converted_delta_version and converted_delta_timestamp properties', + details: 'Time travel and RESTORE TABLE supported for managed Iceberg V3 tables (DBR 18.0+ Public Preview). For foreign Iceberg tables, time travel is limited', externalLinks: [ { - label: 'Iceberg REST API Specification', - url: 'https://github.com/apache/iceberg/blob/master/api/src/main/java/org/apache/iceberg/rest/RestCatalog.java' + label: 'Unity Catalog Iceberg Endpoint', + url: 'https://docs.databricks.com/aws/en/external-access/iceberg.html' } ] }, @@ -116,6 +117,52 @@ TBLPROPERTIES ( -- External Iceberg client connection -- REST Catalog: https:///api/2.1/unity-catalog/iceberg -- OAuth Token: `, + versions: { + v2: { + features: { + catalogs: { + support: 'full', + details: 'Unity Catalog provides native Iceberg REST Catalog API for both read and write on managed Iceberg V2 tables (Public Preview, DBR 16.4 LTS). Hive Metastore and AWS Glue accessible via Lakehouse Federation (read-only)', + externalLinks: [{ label: 'Unity Catalog Iceberg Endpoint', url: 'https://docs.databricks.com/aws/en/external-access/iceberg.html' }] + }, + readWrite: { + support: 'full', + details: 'Full read/write for native managed Iceberg V2 tables via Unity Catalog (Public Preview, DBR 16.4 LTS). INSERT INTO, CREATE TABLE, and full DDL supported', + externalLinks: [{ label: 'Full Apache Iceberg Support Announcement', url: 'https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks' }] + }, + dml: { + support: 'full', + details: 'MERGE, UPDATE, DELETE fully supported for native managed Iceberg V2 tables via copy-on-write. Managed Iceberg tables use CoW exclusively in V2', + externalLinks: [{ label: 'Unity Catalog Iceberg Endpoint', url: 'https://docs.databricks.com/aws/en/external-access/iceberg.html' }] + }, + morCow: { + support: 'partial', + details: 'Copy-on-write is the default and only write mode for managed Iceberg V2 tables. Iceberg V2 position and equality deletes are explicitly not supported — Databricks uses deletion vectors (V3 feature) instead', + externalLinks: [{ label: 'Full Apache Iceberg Support Announcement', url: 'https://www.databricks.com/blog/announcing-full-apache-iceberg-support-databricks' }] + }, + streaming: { + support: 'none', + details: 'Change Data Feed not supported on Iceberg tables. CDC and streaming ingestion must use external engines (Flink, Spark Structured Streaming) writing to Iceberg' + }, + formatV3: { + support: 'none', + details: 'Iceberg Format V3 features are not applicable to V2 tables.' + }, + timeTravel: { + support: 'full', + details: 'Time travel fully supported for managed Iceberg V2 tables by snapshot ID and timestamp. For foreign Iceberg tables, time travel is limited', + externalLinks: [{ label: 'Unity Catalog Iceberg Endpoint', url: 'https://docs.databricks.com/aws/en/external-access/iceberg.html' }] + }, + security: { + support: 'full', + details: 'Unity Catalog RBAC governs access; Iceberg REST clients receive temporary, scoped cloud-storage credentials via credential vending during handshake', + externalLinks: [{ label: 'Access Databricks Tables from Iceberg Clients', url: 'https://docs.databricks.com/aws/en/external-access/iceberg.html' }] + } + }, + score: 22, + description: 'Databricks DBR 16.4+ provides full native Iceberg V2 support with Unity Catalog, complete DML operations, and copy-on-write semantics for managed tables' + } + }, bestPractices: [ 'Use Databricks Runtime 14.3 LTS or newer for IcebergCompatV2 feature support', 'Enable UniForm via delta.universalFormat.enabledFormats=iceberg for new tables', @@ -134,4 +181,4 @@ TBLPROPERTIES ( 'Use Unity Catalog REST API v2.1 for proper Iceberg catalog endpoint access', 'Test external engine compatibility before production deployment of UniForm tables' ] -}; \ No newline at end of file +}); \ No newline at end of file diff --git a/src/data/query-engines/doris.ts b/src/data/query-engines/doris.ts index 5acd8c1e..438c5d9e 100644 --- a/src/data/query-engines/doris.ts +++ b/src/data/query-engines/doris.ts @@ -1,7 +1,8 @@ // data/query-engines/doris.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const doris: QueryEngine = { +export const doris: QueryEngine = createVersionedEngine({ id: 'doris', name: 'Apache Doris v2.1+', description: 'MPP analytical database with comprehensive Iceberg read/write capabilities, vectorized execution, materialized view acceleration, and multi-catalog support for lake ingestion and analytics', @@ -162,5 +163,21 @@ SELECT * FROM iceberg_meta("table"="sales_data", "query_type"="snapshots");`, 'Be aware that Avro data files are not supported in current versions', 'Configure appropriate catalog credentials and metastore URIs for secure access', 'Monitor Iceberg client version (currently 1.6.1) for compatibility with other engines' - ] -}; \ No newline at end of file + ], + versions: { + v3: { + features: { + catalogs: { support: 'none', details: 'Apache Doris supports only Iceberg spec v1 & v2; V3 catalog operations not supported' }, + readWrite: { support: 'none', details: 'Cannot read or write Iceberg V3 format tables; V3 spec work follows upstream Iceberg roadmap' }, + dml: { support: 'none', details: 'DML operations produce V2 format outputs only; V3 table format not supported' }, + morCow: { support: 'none', details: 'V3 deletion vectors not supported; only V2 position/equality delete files supported' }, + streaming: { support: 'none', details: 'No native streaming for any format version' }, + formatV3: { support: 'none', details: 'Supports spec v1 & v2 only; spec v3 work follows upstream Iceberg roadmap — no GA support yet' }, + timeTravel: { support: 'none', details: 'Time travel via FOR TIMESTAMP/VERSION AS OF only for V1/V2 format tables' }, + security: { support: 'none', details: 'V3 format not supported; Doris RBAC and catalog IAM apply to V1/V2 tables only' } + }, + score: 0, + description: 'Apache Doris v2.1+ supports Iceberg spec V1/V2 only; Format V3 (deletion vectors, row lineage, new data types) not yet supported' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/dremio.ts b/src/data/query-engines/dremio.ts index 6c12c5de..5aeced2a 100644 --- a/src/data/query-engines/dremio.ts +++ b/src/data/query-engines/dremio.ts @@ -1,7 +1,8 @@ // data/query-engines/dremio.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const dremio: QueryEngine = { +export const dremio: QueryEngine = createVersionedEngine({ id: 'dremio', name: 'Dremio v26', description: 'Full Iceberg authoring engine with built-in Polaris catalog, complete DML including MERGE, Arctic git-like branching, and Data Reflections acceleration', @@ -88,9 +89,13 @@ export const dremio: QueryEngine = { ] }, formatV3: { - support: 'none', - details: 'Planned (2025) - roadmap calls for reading Deletion Vectors & row-lineage columns first; writer support (DV emission) to follow once Iceberg 1.8+ library adopted', + support: 'full', + details: 'GA for Dremio Cloud (announced April 6, 2026): full V3 read+write including Deletion Vectors, VARIANT columns for JSON, and row-level lineage. Self-managed deployment roadmap follows', externalLinks: [ + { + label: 'Dremio V3 GA Announcement (April 2026)', + url: 'https://www.globenewswire.com/news-release/2026/04/06/3268593/0/en/Dremio-Deepens-Apache-Iceberg-Leadership-with-V3-Support-New-Community-Appointments-and-Polaris-Momentum.html' + }, { label: 'What\'s New in Iceberg v3?', url: 'https://www.dremio.com/blog/apache-iceberg-v3/' @@ -176,5 +181,21 @@ SELECT * FROM iceberg_catalog.sales.orders@main;`, 'Use Arctic commit logs for comprehensive audit trails and data lineage', 'Leverage Dremio\'s catalog credential vending for secure multi-tenant access', 'Plan for Format V3 support arriving in 2025 with deletion vectors and row lineage' - ] -}; \ No newline at end of file + ], + versions: { + v3: { + features: { + catalogs: { support: 'full', details: 'Polaris/Dremio Catalog, Generic REST, Arctic/Nessie, HMS, AWS Glue, Hadoop fully support V3 table registration and queries (Dremio Cloud, April 2026)' }, + readWrite: { support: 'full', details: 'Full V3 read+write GA for Dremio Cloud (April 2026): reads and writes tables with Deletion Vectors, VARIANT columns, and row lineage' }, + dml: { support: 'full', details: 'MERGE, UPDATE, DELETE, INSERT with V3 Deletion Vectors for efficient MoR-style row-level changes (Dremio Cloud GA, April 2026)' }, + morCow: { support: 'full', details: 'V3 Deletion Vectors supported for MoR-style writes; Copy-on-Write also available (Dremio Cloud, April 2026); self-managed roadmap follows' }, + streaming: { support: 'none', details: 'No native streaming for any format version; external engines ingest into V3 tables' }, + formatV3: { support: 'full', details: 'GA for Dremio Cloud (April 6, 2026): Deletion Vectors, VARIANT type for JSON, row-level lineage. Self-managed deployment V3 roadmap follows' }, + timeTravel: { support: 'full', details: 'Arctic/Nessie branches, tags, and snapshot-based time travel fully supported for V3 tables in Dremio Cloud' }, + security: { support: 'full', details: 'Dremio RBAC, column masking, and credential vending apply to V3 tables; Arctic commit log provides full audit trail' } + }, + score: 28, + description: 'Dremio Cloud (April 2026) provides full Iceberg V3 GA support including Deletion Vectors, VARIANT type, and row lineage; self-managed Dremio v26 V3 support on roadmap' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/duckdb.ts b/src/data/query-engines/duckdb.ts index 18aae3d6..23c0237a 100644 --- a/src/data/query-engines/duckdb.ts +++ b/src/data/query-engines/duckdb.ts @@ -1,7 +1,8 @@ // data/query-engines/duckdb.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const duckdb: QueryEngine = { +export const duckdb: QueryEngine = createVersionedEngine({ id: 'duckdb', name: 'DuckDB v1.3+', description: 'A light-weight, read-only analytics engine for Iceberg with SQL time travel, external file caching, and REST catalog support', @@ -11,12 +12,8 @@ export const duckdb: QueryEngine = { features: { catalogs: { support: 'partial', - details: 'Hadoop (file-system) and Iceberg REST catalogs supported via rest option with bearer/OAuth tokens; no native Hive/Glue catalog yet', + details: 'REST catalog, AWS Glue (via ENDPOINT_TYPE=glue), and Polaris supported for V3 tables with V2-compatible data types. Hive Metastore, Nessie, Hadoop, JDBC not supported', externalLinks: [ - { - label: 'Iceberg Extension Overview', - url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html' - }, { label: 'Iceberg REST Catalogs', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/iceberg_rest_catalogs.html' @@ -24,36 +21,28 @@ export const duckdb: QueryEngine = { ] }, readWrite: { - support: 'readonly', - details: 'Full SELECT support with predicate evaluation, manifest pruning and external file-cache to avoid re-downloading S3/GCS objects; write operations not available', + support: 'partial', + details: 'DuckDB can read V3 tables that use only V2-compatible data types. INSERT into V3 tables supported; UPDATE/DELETE on V3 not yet documented', externalLinks: [ { label: 'Iceberg Extension Overview', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html' - }, - { - label: 'Troubleshooting - Write Limitations', - url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/troubleshooting.html' } ] }, dml: { support: 'none', - details: 'iceberg_scan() and CREATE VIEW for reads; metadata helper functions available; no INSERT/UPDATE/DELETE/MERGE operations', + details: 'MERGE INTO not supported on any version. UPDATE/DELETE on V3 tables not yet documented. INSERT into V3 supported per extension limitations page', externalLinks: [ { - label: 'GitHub - duckdb-iceberg README', - url: 'https://github.com/duckdb/duckdb-iceberg' - }, - { - label: 'Troubleshooting - Writing Not Supported', + label: 'Troubleshooting - Current Limitations', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/troubleshooting.html' } ] }, morCow: { - support: 'limited', - details: 'Reading tables with deletes is not yet supported; only Copy-on-Write tables without delete files can be read', + support: 'partial', + details: 'V3 tables with position deletes can be read if only V2-compatible data types are used. Full MoR/CoW behavior on V3 not yet documented', externalLinks: [ { label: 'Troubleshooting - Delete Limitations', @@ -72,13 +61,9 @@ export const duckdb: QueryEngine = { ] }, formatV3: { - support: 'none', - details: 'DuckDB 1.3 only reads v1 & v2 tables; v3 metadata changes will be evaluated post-GA of the spec', + support: 'partial', + details: 'DuckDB can read V3 tables that use only V2-compatible data types. V3-only data types (nanosecond timestamps, geometry, vector, shredded variant) are not supported', externalLinks: [ - { - label: 'Iceberg Extension Overview', - url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html' - }, { label: 'Troubleshooting - Current Limitations', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/troubleshooting.html' @@ -86,8 +71,8 @@ export const duckdb: QueryEngine = { ] }, timeTravel: { - support: 'full', - details: 'Convenient SQL syntax: SELECT * FROM tbl AT (VERSION => 314159) or AT (TIMESTAMP => \'2025-05-01 10:15:00\'); older function-style still works', + support: 'partial', + details: 'Time travel on V3 tables likely works for V2-compatible data types via AT (VERSION => snapshot_id) and AT (TIMESTAMP => ts) syntax, but not explicitly documented for V3', externalLinks: [ { label: 'Iceberg Extension Overview', @@ -96,13 +81,9 @@ export const duckdb: QueryEngine = { ] }, security: { - support: 'basic', - details: 'Uses DuckDB\'s standard S3/Azure creds in httpfs extension; REST-catalog tokens may be supplied per-session; no built-in RBAC/row-masking', + support: 'partial', + details: 'Uses DuckDB\'s standard S3/Azure credentials in httpfs extension; REST-catalog OAuth2 tokens supported since v1.3+; no built-in RBAC or row-level masking', externalLinks: [ - { - label: 'S3 Iceberg Import', - url: 'https://duckdb.org/docs/stable/guides/network_cloud_storage/s3_iceberg_import.html' - }, { label: 'Iceberg REST Catalogs Authentication', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/iceberg_rest_catalogs.html' @@ -128,6 +109,64 @@ CREATE SECRET iceberg_rest ( -- Time travel query SELECT * FROM iceberg_scan('/bucket/table/') AT (TIMESTAMP => '2025-05-01 10:15:00');`, + versions: { + v2: { + features: { + catalogs: { + support: 'full', + details: 'Supports attaching to Iceberg REST Catalogs (OAuth2 since v1.3+), AWS Glue via ENDPOINT_TYPE=glue, Polaris, and S3 Tables. Only REST-based catalogs supported — no Hive Metastore, Hadoop, Nessie, or JDBC', + externalLinks: [ + { label: 'Iceberg REST Catalogs', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/iceberg_rest_catalogs.html' } + ] + }, + readWrite: { + support: 'full', + details: 'Full read support for V1 and V2 tables via the iceberg extension with predicate push-down, manifest pruning, and external file cache. INSERT INTO supported since v1.4.0 via REST catalog attachment', + externalLinks: [ + { label: 'Iceberg Extension Overview', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html' } + ] + }, + dml: { + support: 'partial', + details: 'UPDATE and DELETE supported since v1.4.2; MERGE INTO and ALTER TABLE are not supported. Requires REST catalog attachment for write operations', + externalLinks: [ + { label: 'Troubleshooting - Current Limitations', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/troubleshooting.html' } + ] + }, + morCow: { + support: 'full', + details: 'Full MoR semantics: UPDATE/DELETE use positional deletes (MoR). INSERT uses COW semantics. Supports reading tables with position deletes and equality deletes', + externalLinks: [ + { label: 'Iceberg Extension Overview', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html' } + ] + }, + streaming: { + support: 'none', + details: 'Batch-only analytics engine; no built-in streaming ingestion or CDC subscribe APIs' + }, + formatV3: { + support: 'none', + details: 'Iceberg Format V3 features are not applicable to V2 tables.' + }, + timeTravel: { + support: 'full', + details: 'SQL time travel via AT (VERSION => snapshot_id) and AT (TIMESTAMP => ts) syntax. Older iceberg_scan() function-style parameters still work', + externalLinks: [ + { label: 'Iceberg Extension Overview', url: 'https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html' } + ] + }, + security: { + support: 'partial', + details: 'Standard S3/Azure credentials via httpfs extension; REST-catalog OAuth2 tokens supported since v1.3+; no built-in RBAC or row-level masking', + externalLinks: [ + { label: 'S3 Iceberg Import', url: 'https://duckdb.org/docs/stable/guides/network_cloud_storage/s3_iceberg_import.html' } + ] + } + }, + score: 20, + description: 'DuckDB v1.4+ supports full Iceberg V2 read/write operations with REST catalog, time travel, MoR/CoW semantics, and partial DML (UPDATE/DELETE but no MERGE)' + } + }, bestPractices: [ 'Use DuckDB v1.3.0 or later for the built-in Iceberg extension', 'Configure external file-cache via SET s3_cache_size=\'4GB\'; to halve cold-scan latency', @@ -146,4 +185,4 @@ AT (TIMESTAMP => '2025-05-01 10:15:00');`, 'Use object-store IAM plus catalog ACLs for security and governance', 'Rely on cost-based optimization improvements in 1.3 for better query planning' ] -}; \ No newline at end of file +}); \ No newline at end of file diff --git a/src/data/query-engines/flink.ts b/src/data/query-engines/flink.ts index 9106d46f..11a94583 100644 --- a/src/data/query-engines/flink.ts +++ b/src/data/query-engines/flink.ts @@ -1,7 +1,8 @@ // data/query-engines/flink.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const flink: QueryEngine = { +export const flink: QueryEngine = createVersionedEngine({ id: 'flink', name: 'Apache Flink 1.18+', description: 'The reference implementation for CDC to Iceberg with comprehensive streaming support, exactly-once semantics, and advanced FLIP-27 incremental reads', @@ -184,5 +185,21 @@ CREATE TABLE iceberg_catalog.db.events ( 'Run maintenance actions (rewrite_data_files) as separate Flink batch jobs', 'Monitor Flink job IDs in Iceberg snapshot summaries for troubleshooting', 'Use external tools for schema changes (ADD/RENAME columns) due to Flink DDL limitations' - ] -}; \ No newline at end of file + ], + versions: { + v2: { + features: { + catalogs: { support: 'full', details: 'Hive Metastore, Hadoop catalog, REST catalog (incl. Nessie), AWS Glue, JDBC, plus any custom implementation via catalog-impl' }, + readWrite: { support: 'full', details: 'Batch and streaming jobs read V2 snapshots or incremental DataStreams; Iceberg Sink commits on each Flink checkpoint with exactly-once semantics' }, + dml: { support: 'partial', details: 'INSERT append always available; row-level upserts via write.upsert.enabled=true on V2 tables, emitting V2 equality-delete files; MERGE INTO not supported in Flink SQL' }, + morCow: { support: 'full', details: 'Copy-on-Write for batch rewrites; Merge-on-Read for streaming/upsert with V2 position/equality delete files instead of partition rewrites' }, + streaming: { support: 'full', details: 'Reference engine for CDC → Iceberg V2: consume Debezium/Kafka changelogs, upsert with exactly-once semantics, FLIP-27 incremental reads' }, + formatV3: { support: 'none', details: 'Iceberg Format V3 features are not applicable to V2 tables.' }, + timeTravel: { support: 'full', details: 'Point-in-time reads via source options: start-snapshot-id, start-snapshot-timestamp, branch, tag; filter push-down and partition pruning automatic' }, + security: { support: 'full', details: 'Inherits ACLs from underlying catalog (Hive Ranger, AWS IAM, Nessie authorization); REST catalog secured with credential/token properties' } + }, + score: 26, + description: 'Flink provides the reference CDC→Iceberg V2 implementation with exactly-once streaming semantics, full MoR/CoW via delete files, and comprehensive catalog support' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/hive.ts b/src/data/query-engines/hive.ts index 271f0ea2..37469840 100644 --- a/src/data/query-engines/hive.ts +++ b/src/data/query-engines/hive.ts @@ -1,7 +1,8 @@ // data/query-engines/hive.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const hive: QueryEngine = { +export const hive: QueryEngine = createVersionedEngine({ id: 'hive', name: 'Apache Hive 4.0+', description: 'Traditional data warehouse with first-class Iceberg support, full SQL DML, hidden partitioning, and Ranger-based governance for batch analytics', @@ -98,12 +99,16 @@ export const hive: QueryEngine = { ] }, timeTravel: { - support: 'partial', - details: 'Hidden partitioning supported (PARTITIONED BY SPEC); time-travel via snapshot/branch properties, not SQL clauses', + support: 'full', + details: 'FOR SYSTEM_TIME AS OF and FOR SYSTEM_VERSION AS OF SQL clauses supported in Hive 4.0+; hidden partitioning (PARTITIONED BY SPEC) also available', externalLinks: [ { - label: 'Branching and Tagging', - url: 'https://iceberg.apache.org/docs/1.8.1/branching/' + label: 'Hive Time Travel', + url: 'https://iceberg.apache.org/docs/latest/hive/' + }, + { + label: 'Iceberg Time Travel in CDW', + url: 'https://docs.cloudera.com/cdw-runtime/cloud/iceberg-how-to/topics/iceberg-hive-time-travel.html' } ] }, @@ -151,5 +156,21 @@ SELECT * FROM iceberg_table WHERE created_date >= '2024-01-01';`, 'Use branch/tag properties for time travel rather than expecting SQL time travel syntax', 'Configure appropriate storage handlers and catalog properties for different deployment scenarios', 'Consider micro-batch processing patterns for near-real-time data ingestion requirements' - ] -}; \ No newline at end of file + ], + versions: { + v3: { + features: { + catalogs: { support: 'none', details: 'Hive 4 bundles Iceberg 1.4.3, predating V3 spec; V3 catalog operations not supported' }, + readWrite: { support: 'none', details: 'Hive cannot read or write Iceberg V3 format tables; requires Iceberg ≥ 1.8.0 for V3 support' }, + dml: { support: 'none', details: 'DML operations only produce V2 format (CoW); V3 table format not supported' }, + morCow: { support: 'none', details: 'V3 deletion vectors not readable or writable; Hive uses CoW rewrites only' }, + streaming: { support: 'none', details: 'No native streaming for any format version' }, + formatV3: { support: 'none', details: 'Not supported; Hive 4 bundles Iceberg 1.4.3, predating spec v3. Cannot write or reliably read v3 tables until upgrade to Iceberg ≥ 1.8.0' }, + timeTravel: { support: 'none', details: 'FOR SYSTEM_TIME/VERSION AS OF works for V1/V2 tables; V3-format tables not supported in Hive 4.0.x (Iceberg 1.4.3)' }, + security: { support: 'none', details: 'V3 format not supported; Ranger/SQL-standard policies apply to V1/V2 tables only' } + }, + score: 0, + description: 'Apache Hive 4.0 (Iceberg 1.4.3) does not support Iceberg V3 format tables; upgrade to Iceberg ≥ 1.8.0 required for V3 support' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/impala.ts b/src/data/query-engines/impala.ts index 1e21a734..39d556c1 100644 --- a/src/data/query-engines/impala.ts +++ b/src/data/query-engines/impala.ts @@ -1,7 +1,8 @@ // data/query-engines/impala.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const impala: QueryEngine = { +export const impala: QueryEngine = createVersionedEngine({ id: 'impala', name: 'Apache Impala v4.4+', description: 'High-performance analytics engine with Iceberg v2 support, row-level operations via position deletes, and deep HMS integration for enterprise environments', @@ -31,7 +32,7 @@ export const impala: QueryEngine = { }, dml: { support: 'partial', - details: 'INSERT INTO & INSERT OVERWRITE, DELETE (v2 position-delete files), UPDATE (v2 position deletes); MERGE planned/preview in CDW 1.5.5', + details: 'INSERT INTO & INSERT OVERWRITE, DELETE (v2 position-delete files), UPDATE (v2 position deletes); MERGE added in Impala 4.5 (previously CDW-only preview); equality-delete MERGE planned for 5.0.0+', externalLinks: [ { label: 'Iceberg V2 Tables - Impala', @@ -138,5 +139,21 @@ WHERE id = 123;`, 'Consider schema evolution limitations on complex types when designing table schemas', 'Monitor manifest cache effectiveness and tune cache settings appropriately', 'Use snapshot isolation guarantees for consistent read operations' - ] -}; \ No newline at end of file + ], + versions: { + v3: { + features: { + catalogs: { support: 'none', details: 'Impala supports only HiveCatalog and HadoopCatalog with V1/V2 tables; V3 format not supported' }, + readWrite: { support: 'none', details: 'Cannot read or write Iceberg V3 format tables; spec v1/v2 only' }, + dml: { support: 'none', details: 'DML (DELETE, UPDATE, MERGE preview) produces V2 position-delete files only; V3 not supported' }, + morCow: { support: 'none', details: 'V3 deletion vectors not supported; only V2 position-delete files via MoR; equality deletes not supported in any version' }, + streaming: { support: 'none', details: 'No built-in streaming ingestion for any format version' }, + formatV3: { support: 'none', details: 'Supports spec v1 and v2 only; spec v3 features like deletion vectors, row lineage, and new catalog RPCs not supported' }, + timeTravel: { support: 'none', details: 'Time travel (FOR SYSTEM_TIME/VERSION AS OF) only for V1/V2 format tables' }, + security: { support: 'none', details: 'V3 format not supported; Ranger/HMS security applies to V1/V2 tables only' } + }, + score: 0, + description: 'Apache Impala v4.4+ supports Iceberg spec V1/V2 only; Format V3 (deletion vectors, row lineage, new data types) not yet supported' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/index.ts b/src/data/query-engines/index.ts index 8b759618..979218f4 100644 --- a/src/data/query-engines/index.ts +++ b/src/data/query-engines/index.ts @@ -124,7 +124,7 @@ function validateAllEngines(engines: QueryEngine[]): void { * All available query engines * Add new engines here after creating their data files */ -export const QUERY_ENGINES: QueryEngine[] = [ +export const engines: QueryEngine[] = [ spark, flink, hive, @@ -145,9 +145,12 @@ export const QUERY_ENGINES: QueryEngine[] = [ // Validate all engines in development if (process.env.NODE_ENV === 'development') { - validateAllEngines(QUERY_ENGINES); + validateAllEngines(engines); } +// Backward-compatible alias +export const QUERY_ENGINES = engines; + // Export individual engines for direct access export { spark, @@ -173,18 +176,18 @@ export { */ export const getEngineById = (id: string): QueryEngine | undefined => { - return QUERY_ENGINES.find(engine => engine.id === id); + return engines.find(engine => engine.id === id); }; export const getEnginesByCategory = (category: QueryEngine['category']): QueryEngine[] => { - return QUERY_ENGINES.filter(engine => engine.category === category); + return engines.filter(engine => engine.category === category); }; export const getEnginesBySupportLevel = ( feature: keyof QueryEngine['features'], level: QueryEngine['features'][keyof QueryEngine['features']]['support'] ): QueryEngine[] => { - return QUERY_ENGINES.filter(engine => engine.features[feature].support === level); + return engines.filter(engine => engine.features[feature].support === level); }; export const getEnginesByFeatureSupport = ( @@ -200,7 +203,7 @@ export const getEnginesByFeatureSupport = ( export const searchEngines = (query: string): QueryEngine[] => { const searchTerm = query.toLowerCase(); - return QUERY_ENGINES.filter(engine => + return engines.filter(engine => engine.name.toLowerCase().includes(searchTerm) || engine.description.toLowerCase().includes(searchTerm) || engine.id.toLowerCase().includes(searchTerm) @@ -209,20 +212,20 @@ export const searchEngines = (query: string): QueryEngine[] => { export const getEngineStats = () => { const stats = { - total: QUERY_ENGINES.length, + total: engines.length, byCategory: {} as Record, bySupport: {} as Record> }; // Count by category - QUERY_ENGINES.forEach(engine => { + engines.forEach(engine => { stats.byCategory[engine.category] = (stats.byCategory[engine.category] || 0) + 1; }); // Count by feature support - Object.keys(QUERY_ENGINES[0]?.features || {}).forEach(feature => { + Object.keys(engines[0]?.features || {}).forEach(feature => { stats.bySupport[feature] = { full: 0, partial: 0, preview: 0, none: 0 }; - QUERY_ENGINES.forEach(engine => { + engines.forEach(engine => { const support = engine.features[feature as keyof QueryEngine['features']].support; stats.bySupport[feature][support]++; }); diff --git a/src/data/query-engines/presto.ts b/src/data/query-engines/presto.ts index d2ae92b1..1ae2725c 100644 --- a/src/data/query-engines/presto.ts +++ b/src/data/query-engines/presto.ts @@ -1,7 +1,8 @@ // data/query-engines/presto.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const presto: QueryEngine = { +export const presto: QueryEngine = createVersionedEngine({ id: 'presto', name: 'Presto 0.288+', description: 'Distributed SQL query engine with REST/Nessie catalogs, row-level DELETE, time travel, and configurable MoR/CoW modes for interactive analytics', @@ -77,7 +78,7 @@ export const presto: QueryEngine = { }, formatV3: { support: 'none', - details: 'Roadmap: read Deletion Vectors & Row Lineage after Iceberg 1.8 libraries land; write DV planned post-0.295. Currently supports v1/v2 only', + details: 'Presto 0.296 now bundles Iceberg 1.8.1 (library landed), but V3 format features (DV read/write, row lineage) have not yet shipped. V3 support planned in a future 0.29x release', externalLinks: [ { label: 'Format Version Support', @@ -157,5 +158,21 @@ FOR VERSION AS OF 1234567890;`, 'Monitor Presto logs for audit and security compliance requirements', 'Test experimental features thoroughly before production deployment', 'Plan migration strategy for when MERGE operations become available' - ] -}; \ No newline at end of file + ], + versions: { + v3: { + features: { + catalogs: { support: 'none', details: 'Presto only supports Iceberg V1/V2 format tables; V3 format not yet supported' }, + readWrite: { support: 'none', details: 'Presto cannot read or write V3-format tables; deletion vectors and row lineage not supported' }, + dml: { support: 'none', details: 'DML outputs V2 format only; MERGE not yet supported in any version; V3 not supported' }, + morCow: { support: 'none', details: 'V3 deletion vectors not supported; only V2 position/equality delete files via table properties' }, + streaming: { support: 'none', details: 'Batch-only; no streaming support for any format version' }, + formatV3: { support: 'none', details: 'Presto 0.296 bundles Iceberg 1.8.1 but V3 format features (deletion vectors, row lineage) have not yet shipped; planned for a future release' }, + timeTravel: { support: 'none', details: 'Time travel only for V1/V2 format tables; V3-format tables not supported' }, + security: { support: 'none', details: 'V3 format not supported; security features apply to V1/V2 tables only' } + }, + score: 0, + description: 'Presto supports Iceberg V1/V2 tables only; Format V3 (deletion vectors, row lineage) planned post-0.295 after Iceberg 1.8 library adoption' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/snowflake.ts b/src/data/query-engines/snowflake.ts index cc463f9d..7b4966a5 100644 --- a/src/data/query-engines/snowflake.ts +++ b/src/data/query-engines/snowflake.ts @@ -1,7 +1,8 @@ // data/query-engines/snowflake.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const snowflake: QueryEngine = { +export const snowflake: QueryEngine = createVersionedEngine({ id: 'snowflake', name: 'Snowflake', description: 'Enterprise cloud data warehouse with native Iceberg catalog, automatic optimization, Snowpipe Streaming, UniForm interoperability, and full integration with Snowflake features', @@ -11,7 +12,7 @@ export const snowflake: QueryEngine = { features: { catalogs: { support: 'partial', - details: 'Snowflake catalog (native) with full read/write. External catalogs (Glue, Open Table Catalog) read-only via catalog integration objects', + details: 'Snowflake Horizon Catalog (Polaris) native, REST Catalog, and AWS Glue supported for V3 (Public Preview). Hive Metastore, Nessie, Hadoop, JDBC not supported', externalLinks: [ { label: 'CREATE ICEBERG TABLE Documentation', @@ -24,8 +25,8 @@ export const snowflake: QueryEngine = { ] }, readWrite: { - support: 'partial', - details: 'Full SELECT, DML & DDL on Snowflake-catalog tables including COPY INTO, CTAS, multi-statement transactions. External catalogs read-only', + support: 'full', + details: 'Full read and write support for Snowflake-managed Iceberg tables including Parquet, COPY INTO, CTAS, and multi-statement transactions. V3 support in Public Preview (March 2026)', externalLinks: [ { label: 'Iceberg Table Tutorial', @@ -38,22 +39,18 @@ export const snowflake: QueryEngine = { ] }, dml: { - support: 'partial', - details: 'INSERT, UPDATE, DELETE, MERGE INTO fully ACID on Snowflake-catalog tables. Position-delete files, equality-delete in preview. External tables read-only', + support: 'full', + details: 'INSERT, UPDATE, DELETE, MERGE INTO fully ACID on Snowflake-managed Iceberg tables. V3 uses deletion vectors for row-level changes (Public Preview March 2026)', externalLinks: [ { label: 'DML Commands with Iceberg', url: 'https://docs.snowflake.com/en/user-guide/tables-iceberg-manage' - }, - { - label: 'Row-level Deletes', - url: 'https://docs.snowflake.com/en/user-guide/tables-iceberg-manage' } ] }, morCow: { support: 'full', - details: 'DML writes merge-on-read delete files. Automatic Storage Optimization compacts files & merges delete files, switching to copy-on-write during clustering', + details: 'V3 supports full MoR via deletion vectors and CoW. V3 deletion vectors replace position deletes for improved write performance (Public Preview March 2026)', externalLinks: [ { label: 'Iceberg Storage Management', @@ -62,22 +59,18 @@ export const snowflake: QueryEngine = { ] }, streaming: { - support: 'partial', - details: 'Snowpipe Streaming & Storage Write API for real-time ingestion (GA). Streams & Tasks supported on Snowflake-catalog tables. No built-in CDC ingestion', + support: 'full', + details: 'Snowflake V3 row lineage enables CDC capabilities for data governance and auditing. Snowpipe Streaming for real-time ingestion (Public Preview March 2026)', externalLinks: [ { label: 'Snowpipe Streaming with Iceberg', url: 'https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-iceberg' - }, - { - label: 'Introduction to Streams', - url: 'https://docs.snowflake.com/en/user-guide/streams-intro' } ] }, formatV3: { - support: 'none', - details: 'Not yet supported. Snowflake-catalog tables use spec v2; external v3 tables readable if future reader upgrades land. Roadmap evaluation ongoing', + support: 'partial', + details: 'Iceberg V3 support in Public Preview (March 2026). Supports deletion vectors, nanosecond timestamps, geometry type, variant type, and row lineage', externalLinks: [ { label: 'Apache Iceberg v3 Table Spec Blog', @@ -148,6 +141,53 @@ AT(TIME => '2024-01-15 10:00:00'); -- Create zero-copy clone CREATE ICEBERG TABLE sales_data_backup CLONE sales_data;`, + versions: { + v2: { + features: { + catalogs: { + support: 'partial', + details: 'Snowflake Horizon Catalog (Polaris) native, REST Catalog, and AWS Glue supported. Hive Metastore, Nessie, Hadoop, JDBC, Unity Catalog (read-only via REST) not fully supported', + externalLinks: [{ label: 'Apache Iceberg Tables Overview', url: 'https://docs.snowflake.com/en/user-guide/tables-iceberg' }] + }, + readWrite: { + support: 'full', + details: 'Full read and write support for Snowflake-managed Iceberg tables. Parquet-only format. COPY INTO, CTAS, multi-statement transactions all supported', + externalLinks: [{ label: 'Manage Iceberg Tables', url: 'https://docs.snowflake.com/en/user-guide/tables-iceberg-manage' }] + }, + dml: { + support: 'full', + details: 'INSERT, UPDATE, DELETE, MERGE INTO fully ACID on Snowflake-managed Iceberg tables. All DML operations use copy-on-write mode', + externalLinks: [{ label: 'DML Commands with Iceberg', url: 'https://docs.snowflake.com/en/user-guide/tables-iceberg-manage' }] + }, + morCow: { + support: 'partial', + details: 'Copy-on-write is the exclusive write strategy for Snowflake-managed tables. Merge-on-read supported only for externally managed Iceberg tables via ENABLE_ICEBERG_MERGE_ON_READ session parameter', + externalLinks: [{ label: 'Iceberg Storage Management', url: 'https://docs.snowflake.com/en/user-guide/tables-iceberg-storage' }] + }, + streaming: { + support: 'partial', + details: 'Snowpipe Streaming & Storage Write API for real-time ingestion (GA). Streams & Tasks supported on Snowflake-catalog tables. No built-in CDC ingestion', + externalLinks: [{ label: 'Snowpipe Streaming with Iceberg', url: 'https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-iceberg' }] + }, + formatV3: { + support: 'none', + details: 'Iceberg Format V3 features are not applicable to V2 tables.' + }, + timeTravel: { + support: 'full', + details: 'Query snapshots with AT(SNAPSHOT => id) or AT(TIME => ts). Zero-Copy Clones work on Iceberg tables. Full time travel for Snowflake-managed tables', + externalLinks: [{ label: 'Understanding Time Travel', url: 'https://docs.snowflake.com/en/user-guide/data-time-travel' }] + }, + security: { + support: 'full', + details: 'Full Snowflake RBAC, column masking, row-access policies, tag-based masking. Query activity in ACCOUNT_USAGE & ACCESS_HISTORY. Customer-managed IAM roles', + externalLinks: [{ label: 'Access Control Privileges', url: 'https://docs.snowflake.com/en/user-guide/security-access-control-privileges' }] + } + }, + score: 22, + description: 'Snowflake provides complete Iceberg V2 support with full DML operations, automatic optimization, and enterprise-grade security for managed tables' + } + }, bestPractices: [ 'Use Snowflake 8.20+ for GA Iceberg support and latest features', 'Leverage native Snowflake catalog for full DML capabilities and Snowflake feature integration', @@ -170,4 +210,4 @@ CLONE sales_data;`, 'Use customer-managed IAM roles for secure access to external storage', 'Monitor cross-region egress charges when compute and storage are in different regions' ] -}; \ No newline at end of file +}); \ No newline at end of file diff --git a/src/data/query-engines/spark.ts b/src/data/query-engines/spark.ts index 56d67315..252a0c3e 100644 --- a/src/data/query-engines/spark.ts +++ b/src/data/query-engines/spark.ts @@ -1,7 +1,8 @@ // data/query-engines/spark.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const spark: QueryEngine = { +export const spark: QueryEngine = createVersionedEngine({ id: 'spark', name: 'Apache Spark 3.3+', description: 'The reference implementation for Apache Iceberg with comprehensive read/write support', @@ -51,5 +52,21 @@ df.writeTo("prod.db.table").append()`, 'Enable adaptive query execution for better performance', 'Use write.distribution.mode for optimized writes', 'Regularly run maintenance procedures (rewrite_data_files, expire_snapshots)' - ] -}; \ No newline at end of file + ], + versions: { + v2: { + features: { + catalogs: { support: 'full', details: 'Hive Metastore, Hadoop warehouse, REST, AWS Glue, JDBC, Nessie, plus custom plug-ins via spark.sql.catalog.* settings' }, + readWrite: { support: 'full', details: 'Full table scans, metadata-table reads, INSERT INTO, atomic INSERT OVERWRITE, DataFrame writeTo, and stored procedures' }, + dml: { support: 'full', details: 'MERGE INTO, UPDATE, DELETE via Spark Session Extensions; emits V2 position/equality delete files (Iceberg 0.14+)' }, + morCow: { support: 'full', details: 'Copy-on-Write default; Merge-on-Read enabled via write.delete.mode=merge-on-read, emitting V2 position/equality delete files' }, + streaming: { support: 'partial', details: 'Incremental reads with stream-from-timestamp; append/complete output modes; delete and overwrite snapshots skipped by streaming readers by default' }, + formatV3: { support: 'none', details: 'Iceberg Format V3 features are not applicable to V2 tables.' }, + timeTravel: { support: 'full', details: 'SQL VERSION AS OF / TIMESTAMP AS OF supported since Spark 3.3+; DataFrame as-of-timestamp option' }, + security: { support: 'full', details: 'Delegates ACLs to underlying catalog (Hive Ranger, AWS IAM, Nessie policies); snapshot isolation; audit metadata' } + }, + score: 26, + description: 'Spark provides comprehensive Iceberg V2 support with full DML, MoR/CoW via position/equality delete files, streaming reads, and native SQL time travel' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/starburst.ts b/src/data/query-engines/starburst.ts index 13390f5f..69c16df6 100644 --- a/src/data/query-engines/starburst.ts +++ b/src/data/query-engines/starburst.ts @@ -1,7 +1,8 @@ // data/query-engines/starburst.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const starburst: QueryEngine = { +export const starburst: QueryEngine = createVersionedEngine({ id: 'starburst', name: 'Starburst Enterprise SEP 414-E+', description: 'End-to-end Iceberg analytics platform with comprehensive catalog support, full DML operations, enterprise governance, and advanced optimization features', @@ -49,7 +50,7 @@ export const starburst: QueryEngine = { }, morCow: { support: 'full', - details: 'Default copy-on-write for large rewrites; fine-grained updates create separate delete files (MoR) merged at query time; handles both position & equality deletes', + details: 'Default copy-on-write for large rewrites; V2 fine-grained updates use position/equality delete files (MoR); SEP 476-e+ writes deletion vectors (V3 MoR) when format-version=3', externalLinks: [ { label: 'Data Management - Deletion Strategies', @@ -68,12 +69,16 @@ export const starburst: QueryEngine = { ] }, formatV3: { - support: 'preview', - details: 'Supports Iceberg spec v1 & v2; can read v3 preview metadata under feature flag but no v3 writes; production v3 GA on roadmap for 2025', + support: 'full', + details: 'Full Iceberg V3 read+write GA in SEP 476-e (September 2025); deletion vectors (MoR writes), VARIANT type, nanosecond timestamps, and row lineage all supported', externalLinks: [ { - label: 'Iceberg Connector - Spec Support', - url: 'https://docs.starburst.io/latest/connector/iceberg.html' + label: 'SEP 476-e Release Notes', + url: 'https://docs.starburst.io/latest/release/release-476-e.html' + }, + { + label: 'Iceberg V3 in Starburst Blog', + url: 'https://www.starburst.io/blog/iceberg-v3/' } ] }, @@ -137,5 +142,21 @@ FOR TIMESTAMP AS OF TIMESTAMP '2025-01-01 00:00:00';`, 'Leverage $snapshots, $history, $manifests metadata tables for table introspection', 'Configure appropriate data file formats (Parquet default, ORC, Avro) and codecs', 'Use rollback_to_snapshot, expire_snapshots, remove_orphan_files procedures for maintenance' - ] -}; \ No newline at end of file + ], + versions: { + v3: { + features: { + catalogs: { support: 'full', details: 'All catalog types (HMS, Glue, REST, Nessie, Snowflake, Galaxy) accessible; V3 table creation fully supported in SEP 476-e+' }, + readWrite: { support: 'full', details: 'Full V3 read+write GA in SEP 476-e (September 2025); creates and queries V3-format Iceberg tables natively' }, + dml: { support: 'full', details: 'INSERT, UPDATE, DELETE, MERGE with V3 deletion vectors for MoR writes; partition-aligned predicates become partition deletes' }, + morCow: { support: 'full', details: 'Deletion vectors (V3 MoR) written for fine-grained updates; copy-on-write still available for large partition rewrites' }, + streaming: { support: 'none', details: 'No built-in streaming ingestion for any format version' }, + formatV3: { support: 'full', details: 'Full Iceberg V3 GA in SEP 476-e (September 2025); deletion vectors, VARIANT type, nanosecond timestamps, row lineage all supported' }, + timeTravel: { support: 'full', details: 'FOR VERSION AS OF / FOR TIMESTAMP AS OF fully work on V3 tables; metadata tables expose row lineage columns' }, + security: { support: 'full', details: 'Built-in ACL engine, LDAP/OAuth, Lake Formation, Ranger policies; row lineage (_row_id) visible for audit purposes' } + }, + score: 28, + description: 'Starburst Enterprise SEP 476-e+ provides full Iceberg V3 read+write GA support including deletion vectors, VARIANT type, nanosecond timestamps, and row lineage (since September 2025)' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/starrocks.ts b/src/data/query-engines/starrocks.ts index ab452d00..ff1b4144 100644 --- a/src/data/query-engines/starrocks.ts +++ b/src/data/query-engines/starrocks.ts @@ -1,7 +1,8 @@ // data/query-engines/starrocks.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const starrocks: QueryEngine = { +export const starrocks: QueryEngine = createVersionedEngine({ id: 'starrocks', name: 'StarRocks v3.2/3.3', description: 'Vectorized OLAP engine with read-write Iceberg support, async materialized views, CBO optimization, and strong analytical performance for lakehouse analytics', @@ -149,5 +150,21 @@ GROUP BY 1;`, 'Configure appropriate row-group size and page size for Parquet write performance', 'Plan migration path for when UPDATE/DELETE/MERGE operations become available', 'Consider StarRocks as primary query engine in lakehouse architecture with other engines for writes' - ] -}; \ No newline at end of file + ], + versions: { + v3: { + features: { + catalogs: { support: 'none', details: 'StarRocks v3.2/3.3 supports only Iceberg V1/V2 format tables; V3 format not yet supported' }, + readWrite: { support: 'none', details: 'Cannot read or write Iceberg V3 format tables; V1/V2 Parquet & ORC only' }, + dml: { support: 'none', details: 'INSERT/INSERT OVERWRITE available for V2 tables only; V3 format not supported' }, + morCow: { support: 'none', details: 'V3 deletion vectors not supported; reads V2 MoR (position & equality deletes) only' }, + streaming: { support: 'none', details: 'No native streaming for any format version' }, + formatV3: { support: 'none', details: 'Not yet GA; supports V1/V2 (Parquet & ORC) only; V3 support on roadmap' }, + timeTravel: { support: 'none', details: 'Time travel (v3.4+) only for V1/V2 format tables; V3-format tables not supported' }, + security: { support: 'none', details: 'V3 format not supported; StarRocks RBAC and catalog ACLs apply to V1/V2 tables only' } + }, + score: 0, + description: 'StarRocks v3.2/3.3 supports Iceberg V1/V2 tables only; Format V3 (deletion vectors, row lineage, new data types) not yet supported' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/trino.ts b/src/data/query-engines/trino.ts index 24e433b6..002db6a0 100644 --- a/src/data/query-engines/trino.ts +++ b/src/data/query-engines/trino.ts @@ -1,7 +1,8 @@ // data/query-engines/trino.ts import { QueryEngine } from '../../types/iceberg'; +import { createVersionedEngine } from './versioning'; -export const trino: QueryEngine = { +export const trino: QueryEngine = createVersionedEngine({ id: 'trino', name: 'Trino 475+', description: 'High-performance distributed SQL query engine with advanced DML, time travel, and native Iceberg optimization for interactive analytics', @@ -152,5 +153,21 @@ FOR TIMESTAMP AS OF TIMESTAMP '2024-01-01 12:00:00';`, 'Use hidden partition transforms for automatic partition pruning without explicit WHERE clauses', 'Configure security delegation to underlying catalog systems (Ranger, IAM, Nessie policies)', 'Be aware that proliferation of small files can degrade performance - optimize regularly' - ] -}; \ No newline at end of file + ], + versions: { + v3: { + features: { + catalogs: { support: 'none', details: 'Trino only supports Iceberg V1/V2 format tables; V3 format not yet supported' }, + readWrite: { support: 'none', details: 'Trino cannot read or write V3-format tables with deletion vectors or row lineage; V1/V2 only' }, + dml: { support: 'none', details: 'All DML operations produce V2 format outputs only; V3 table format not supported' }, + morCow: { support: 'none', details: 'V3 deletion vectors not supported; Trino only handles V2 position/equality delete files' }, + streaming: { support: 'none', details: 'No streaming support for any format version' }, + formatV3: { support: 'none', details: 'Not yet GA; connector supports spec v1/v2 only; deletion vectors & row lineage planned post-Iceberg 1.8 library update' }, + timeTravel: { support: 'none', details: 'Time travel only available for V1/V2 format tables; V3-format tables not supported' }, + security: { support: 'none', details: 'V3 format not supported; security features apply to V1/V2 tables only' } + }, + score: 0, + description: 'Trino supports Iceberg V1/V2 tables only; Format V3 (deletion vectors, row lineage, new data types) not yet supported' + } + } +}); \ No newline at end of file diff --git a/src/data/query-engines/versioning.ts b/src/data/query-engines/versioning.ts new file mode 100644 index 00000000..b5926bb2 --- /dev/null +++ b/src/data/query-engines/versioning.ts @@ -0,0 +1,45 @@ +import { EngineVersionData, QueryEngine } from '../../types/iceberg'; +import { SUPPORT_WEIGHTS } from '../constants/features'; + +type LegacyEngine = Omit & { + versions?: QueryEngine['versions']; +}; + +const calculateScore = (features: QueryEngine['features']): number => + Object.values(features).reduce((sum, feature) => sum + (SUPPORT_WEIGHTS[feature?.support] ?? 0), 0); + +const createV2FromFeatures = (features: QueryEngine['features'], engineDescription: string): EngineVersionData => { + const v2Features: QueryEngine['features'] = { + ...features, + formatV3: { + support: 'none', + details: 'Iceberg Format V3 features are not applicable to V2 tables.', + externalLinks: features.formatV3?.externalLinks ?? [] + } + }; + + return { + features: v2Features, + score: calculateScore(v2Features), + description: engineDescription + }; +}; + +export const createVersionedEngine = (engine: LegacyEngine): QueryEngine => { + const legacyV3: EngineVersionData = { + features: engine.features, + score: calculateScore(engine.features), + description: engine.description + }; + + const v2 = engine.versions?.v2 ?? createV2FromFeatures(engine.features, engine.description); + const v3 = engine.versions?.v3 ?? legacyV3; + + return { + ...engine, + versions: { + v2, + v3 + } + }; +}; diff --git a/src/types/iceberg.ts b/src/types/iceberg.ts index 446cf727..463744ce 100644 --- a/src/types/iceberg.ts +++ b/src/types/iceberg.ts @@ -12,6 +12,14 @@ export interface Feature { externalLinks?: ExternalLink[]; } +export type EngineVersion = 'v2' | 'v3'; + +export interface EngineVersionData { + features: QueryEngine['features']; + score?: number | null; + description?: string | null; +} + export interface QueryEngine { id: string; name: string; @@ -29,6 +37,10 @@ export interface QueryEngine { timeTravel: Feature; security: Feature; }; + versions: { + v2?: EngineVersionData | null; + v3?: EngineVersionData | null; + }; quickStart: string; bestPractices: string[]; } @@ -38,4 +50,11 @@ export interface FilterOptions { category: QueryEngine['category'] | 'all'; } -export type ViewType = 'table' | 'cards' | 'features'; \ No newline at end of file +export type ViewType = 'table' | 'cards' | 'features'; + +export interface EngineVersionSelection { + engine: string; + version: EngineVersion; +} + +export type VersionMode = 'v2' | 'v3'; \ No newline at end of file