-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.example.yaml
More file actions
118 lines (109 loc) · 5.12 KB
/
Copy pathconfig.example.yaml
File metadata and controls
118 lines (109 loc) · 5.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# pg2iceberg config example. Mirrors the Go reference's YAML shape;
# operators can drop their existing pg2iceberg.yaml in unchanged.
# Fields that the Rust port doesn't yet consume (query mode,
# control-plane metadata, materializer cycle knobs, etc.) are still
# parsed cleanly and ignored.
tables:
# Schema is discovered from `information_schema.columns` +
# `pg_index` at startup — same path the Go reference uses. PK
# columns are picked up automatically when the source has a
# PRIMARY KEY constraint. Fall through to explicit `columns:`
# below only when the source lacks a PK or you want to override
# the discovered shape.
- name: public.orders
# primary_key: [id] # optional — overrides the
# discovered PK
# columns: # optional — overrides discovery
# - { name: id, pg_type: int4 }
# - { name: qty, pg_type: int8, nullable: true }
# iceberg:
# partition: # Iceberg partition spec
# - day(created_at) # day/month/year/hour
# - bucket[16](id) # murmur3_x86_32 % N
# - region # identity
# - truncate[4](name) # first W code points (strings)
# # or floor to multiple of W (ints)
#
# All six transforms work end-to-end (identity, year/month/day/hour,
# bucket[N], truncate[W] — including over Numeric columns).
#
# Operational note for `Delete`: when partition source columns
# are NOT in the primary key, configure `ALTER TABLE ... REPLICA
# IDENTITY FULL` on the Postgres source so logical replication
# carries old-row partition values. Otherwise deletes on
# partitioned tables error with `partition column missing`.
source:
mode: logical # "logical" or "query"
postgres:
host: db.example.com
port: 5432
database: src
user: pg2iceberg
password: REDACTED
sslmode: require # "disable" | "require" | "verify-ca" | "verify-full"
# — Rust port treats anything non-disable as
# webpki-roots verification (mTLS / custom
# CA / hostname-only flavors are follow-ons).
logical:
publication_name: pg2iceberg_pub
slot_name: pg2iceberg_slot
# standby_interval: 10s
sink:
catalog_uri: https://catalog.example.com
# Catalog auth flavor: "" (none) | "sigv4" (AWS Glue) | "bearer" |
# "oauth2". Only "bearer" and "" are currently exercised against
# Polaris/Tabular/Snowflake-managed-catalog and the Iceberg REST
# reference; "sigv4" + "oauth2" plumb through but aren't yet
# tested end-to-end.
catalog_auth: bearer
catalog_token: REDACTED-bearer-token
# catalog_client_id: ...
# catalog_client_secret: ...
# Credential mode for staging blobs:
# "static" (default) — explicit S3 keys below.
# "iam" — instance profile / AWS SSO / env-var chain
# (object_store::aws::AmazonS3Builder::from_env).
# "vended" — temporary creds from the catalog's LoadTable
# response. NOT YET WIRED in the Rust port; will
# error at startup (see plan §Phase 7).
credential_mode: static
warehouse: s3://my-warehouse/staged
namespace: default
s3_endpoint: https://s3.us-east-1.amazonaws.com
s3_access_key: REDACTED
s3_secret_key: REDACTED
s3_region: us-east-1
flush_interval: 10s
flush_rows: 1000
# Compaction thresholds — names match Go. Compaction runs at the end
# of every materializer cycle, gated by these counts. Set
# `target_file_size: 0` to disable compaction entirely.
#
# Output files are partition-aware (each output file carries a single
# partition tuple). Equality deletes are applied inline at compaction
# time; surviving rows are deduped and folded into the new files.
compaction_data_files: 8
compaction_delete_files: 4
target_file_size: 134217728 # 128 MiB
# `pg2iceberg maintain` knobs. The subcommand runs in two phases:
# snapshot expiry (drops snapshots older than retention; never the
# current one) followed by orphan-file cleanup (deletes blobs under
# `materialized_prefix` that no live snapshot references and that
# are older than grace). Operators typically run it via cron or a
# Kubernetes CronJob; both are one-shot, no in-process scheduler.
# Format matches Go's time.ParseDuration ("168h", "7d", "30m").
# maintenance_retention: 168h
# maintenance_grace: 30m
# materialized_prefix: materialized/ # must match the path scheme
# # the materializer writes to
# Free-form REST catalog props passthrough. Useful for
# vendor-specific settings the Go config doesn't have a dedicated
# field for.
# catalog_props:
# "header.X-Custom-Header": "value"
state:
# Optional dedicated PG for the _pg2iceberg.* coordinator schema.
# If absent, the source PG hosts it (matches Go).
# postgres_url: postgres://coord_user:secret@coord-db.example.com/coord?sslmode=require
coordinator_schema: _pg2iceberg
# group: default