Skip to content

Commit 47eff9e

Browse files
authored
Merge pull request #33848 from teskje/catalog-remove-protobuf
catalog-protos: remove protobuf dependency
2 parents faeae15 + 6d93396 commit 47eff9e

36 files changed

+23102
-14082
lines changed

Cargo.lock

Lines changed: 1 addition & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/buf.yaml

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,32 +15,6 @@ breaking:
1515
use:
1616
- WIRE
1717
ignore:
18-
# reason: does currently not require backward-compatibility
19-
- catalog-protos/protos/objects.proto
20-
# reason: does currently not require backward-compatibility
21-
- catalog-protos/protos/objects_v67.proto
22-
# reason: does currently not require backward-compatibility
23-
- catalog-protos/protos/objects_v68.proto
24-
# reason: does currently not require backward-compatibility
25-
- catalog-protos/protos/objects_v69.proto
26-
# reason: does currently not require backward-compatibility
27-
- catalog-protos/protos/objects_v70.proto
28-
# reason: does currently not require backward-compatibility
29-
- catalog-protos/protos/objects_v71.proto
30-
# reason: does currently not require backward-compatibility
31-
- catalog-protos/protos/objects_v72.proto
32-
# reason: does currently not require backward-compatibility
33-
- catalog-protos/protos/objects_v73.proto
34-
# reason: does currently not require backward-compatibility
35-
- catalog-protos/protos/objects_v74.proto
36-
# reason: does currently not require backward-compatibility
37-
- catalog-protos/protos/objects_v75.proto
38-
# reason: does currently not require backward-compatibility
39-
- catalog-protos/protos/objects_v76.proto
40-
# reason: does currently not require backward-compatibility
41-
- catalog-protos/protos/objects_v77.proto
42-
# reason: does currently not require backward-compatibility
43-
- catalog-protos/protos/objects_v78.proto
4418
# reason: Ignore because plans are currently not persisted.
4519
- expr/src/scalar.proto
4620
# reason: we very carefully evolve these protobuf definitions

src/catalog-protos/Cargo.toml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,19 @@ mz-proto = { path = "../proto" }
1616
mz-repr = { path = "../repr" }
1717
mz-sql = { path = "../sql" }
1818
mz-storage-types = { path = "../storage-types" }
19-
paste = "1.0.11"
19+
num_enum = "0.7.4"
2020
proptest = { version = "1.9.0", default-features = false, features = ["std"] }
21-
proptest-derive = { version = "0.7.0", features = ["boxed_union"] }
22-
prost = "0.13.5"
21+
proptest-derive = "0.7.0"
2322
serde = { version = "1.0.219", features = ["derive"] }
2423
workspace-hack = { version = "0.0.0", path = "../workspace-hack", optional = true }
2524

2625
[dev-dependencies]
27-
mz-build-tools = { path = "../build-tools", default-features = false }
2826
mz-ore = { path = "../ore", features = ["test"] }
29-
mz-proto = { path = "../proto" }
30-
proptest = { version = "1.9.0", default-features = false, features = ["std"] }
3127
similar-asserts = "1.7"
3228

3329
[build-dependencies]
3430
anyhow = "1.0.100"
3531
md-5 = "0.10.6"
36-
mz-build-tools = { path = "../build-tools", default-features = false, features = ["protobuf-src"] }
37-
prost-build = "0.13.5"
3832
serde = { version = "1.0.219", features = ["derive"] }
3933
serde_json = "1.0.145"
4034

src/catalog-protos/build.rs

Lines changed: 28 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -9,47 +9,46 @@
99

1010
use std::collections::BTreeMap;
1111
use std::fs;
12-
use std::io::{BufReader, Write};
12+
use std::io::Write;
13+
use std::path::PathBuf;
1314

1415
use anyhow::Context;
1516
use md5::{Digest, Md5};
1617
use serde::{Deserialize, Serialize};
1718

18-
/// The path of a protobuf file and its [`md5`] hash.
19+
/// The path of an object definition file and its [`md5`] hash.
1920
///
2021
/// We store a hash of all the files to make sure they don't accidentally change, which would
2122
/// invalidate our snapshotted types, and could silently introduce bugs.
2223
#[derive(Debug, Clone, Deserialize, Serialize)]
23-
struct ProtoHash {
24+
struct ObjectsHash {
2425
name: String,
2526
md5: String,
2627
}
2728

28-
const PROTO_DIRECTORY: &str = "protos";
29-
const PROTO_HASHES: &str = "protos/hashes.json";
29+
const OBJECTS_HASHES: &str = "objects_hashes.json";
3030

3131
fn main() -> anyhow::Result<()> {
32-
println!("cargo:rerun-if-changed={PROTO_DIRECTORY}");
32+
let crate_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
3333

3434
// Read in the persisted hashes from disk.
35-
let hashes = fs::File::open(PROTO_HASHES).context("opening proto hashes")?;
36-
let reader = BufReader::new(&hashes);
37-
let hashes: Vec<ProtoHash> = serde_json::from_reader(reader)?;
35+
let hashes_path = crate_root.join(OBJECTS_HASHES);
36+
let hashes_json = fs::read_to_string(&hashes_path)?;
37+
let hashes: Vec<ObjectsHash> = serde_json::from_str(&hashes_json)?;
3838
let mut persisted: BTreeMap<String, String> =
3939
hashes.into_iter().map(|e| (e.name, e.md5)).collect();
4040

41-
// Discover all of the protobuf files on disk.
42-
let protos: BTreeMap<String, String> = fs::read_dir(PROTO_DIRECTORY)?
41+
// Discover all of the object definition files on disk.
42+
let src_dir = crate_root.join("src");
43+
let objects: BTreeMap<String, String> = fs::read_dir(src_dir)?
4344
// If we fail to read one file, fail everything.
4445
.collect::<Result<Vec<_>, _>>()?
4546
.into_iter()
46-
// Filter to only files with the .proto extension.
47+
// Filter to only files with the of the form `objects*.rs`.
4748
.filter(|entry| {
48-
entry
49-
.path()
50-
.extension()
51-
.map(|e| e.to_string_lossy().contains("proto"))
52-
.unwrap_or(false)
49+
let name = entry.file_name();
50+
let s = name.to_string_lossy();
51+
s.starts_with("objects") && s.ends_with(".rs")
5352
})
5453
.map(|file| {
5554
let path = file.path();
@@ -71,34 +70,34 @@ fn main() -> anyhow::Result<()> {
7170
})
7271
.collect();
7372

74-
// After validating our hashes we'll re-write the file if any new protos
73+
// After validating our hashes we'll re-write the file if any new object definitions
7574
// have been added.
76-
let mut to_persist: Vec<ProtoHash> = Vec::new();
75+
let mut to_persist: Vec<ObjectsHash> = Vec::new();
7776
let mut any_new = false;
7877

7978
// Check the persisted hashes against what we just read in from disk.
80-
for (name, hash) in protos {
79+
for (name, hash) in objects {
8180
match persisted.remove(&name) {
8281
// Hashes have changed!
8382
Some(og_hash) if hash != og_hash => {
8483
anyhow::bail!(error_message(og_hash, hash, name));
8584
}
86-
// Found a proto file on disk that we didn't have persisted, we'll just persist it.
85+
// Found an objects file on disk that we didn't have persisted, we'll just persist it.
8786
None => {
88-
to_persist.push(ProtoHash { name, md5: hash });
87+
to_persist.push(ObjectsHash { name, md5: hash });
8988
any_new = true;
9089
}
9190
// We match!
92-
Some(_) => to_persist.push(ProtoHash { name, md5: hash }),
91+
Some(_) => to_persist.push(ObjectsHash { name, md5: hash }),
9392
}
9493
}
9594

96-
// Check if there are any proto files we should have had hashes for, but didn't exist.
95+
// Check if there are any objects files we should have had hashes for, but didn't exist.
9796
if !persisted.is_empty() {
9897
anyhow::bail!("Have persisted hashes, but no files on disk? {persisted:#?}");
9998
}
10099

101-
// Write the hashes back out to disk if and only if there are new protos. We
100+
// Write the hashes back out to disk if and only if there are new object definitions. We
102101
// don't do this unconditionally or we'll get stuck in a rebuild loop:
103102
// executing this build script will change the mtime on the hashes file,
104103
// which will force the next compile to rebuild the crate, even if nothing
@@ -107,103 +106,23 @@ fn main() -> anyhow::Result<()> {
107106
let mut file = fs::File::options()
108107
.write(true)
109108
.truncate(true)
110-
.open(PROTO_HASHES)
109+
.open(hashes_path)
111110
.context("opening hashes file to write")?;
112111
serde_json::to_writer_pretty(&mut file, &to_persist).context("persisting hashes")?;
113112
write!(&mut file, "\n").context("writing newline")?;
114113
}
115114

116-
// Generate protos!
117-
let paths: Vec<_> = to_persist
118-
.iter()
119-
.map(|entry| format!("protos/{}", entry.name))
120-
.collect();
121-
122-
const ATTR: &str = "#[derive(Eq, PartialOrd, Ord, ::serde::Serialize, ::serde::Deserialize)]";
123-
const ARBITRARY_ATTR: &str = "#[derive(::proptest_derive::Arbitrary)]";
124-
125-
// 'as' is okay here because we're using it to define the type of the empty slice, which is
126-
// necessary since the method takes the slice as a generic arg.
127-
#[allow(clippy::as_conversions)]
128-
// DO NOT change how JSON serialization works for these objects. The catalog relies on the JSON
129-
// serialization of these objects remaining stable for a specific objects_vX version. If you
130-
// want to change the JSON serialization format then follow these steps:
131-
//
132-
// 1. Create a new version of the `objects.proto` file.
133-
// 2. Update the path of .proto files given to this compile block so that it is only the
134-
// previous .proto files.
135-
// 3. Add a new `prost_build::Config::new()...compile_protos(...)` block that only compiles
136-
// the new and all future .proto files with the changed JSON serialization.
137-
//
138-
// Once we delete all the `.proto` that use the old JSON serialization, then we can delete
139-
// the compile block for them as well.
140-
prost_build::Config::new()
141-
.protoc_executable(mz_build_tools::protoc())
142-
.btree_map(["."])
143-
.bytes(["."])
144-
.message_attribute(".", ATTR)
145-
// Note(parkmycar): This is annoying, but we need to manually specify each oneof so we can
146-
// get them to implement Eq, PartialEq, and Ord. If you define a new oneof you should add
147-
// it here.
148-
.enum_attribute("CatalogItem.value", ATTR)
149-
.enum_attribute("ClusterConfig.variant", ATTR)
150-
.enum_attribute("GlobalId.value", ATTR)
151-
.enum_attribute("CatalogItemId.value", ATTR)
152-
.enum_attribute("ClusterId.value", ATTR)
153-
.enum_attribute("DatabaseId.value", ATTR)
154-
.enum_attribute("SchemaId.value", ATTR)
155-
.enum_attribute("ReplicaId.value", ATTR)
156-
.enum_attribute("RoleId.value", ATTR)
157-
.enum_attribute("NetworkPolicyId.value", ATTR)
158-
.enum_attribute("NetworkPolicyRule.action", ATTR)
159-
.enum_attribute("NetworkPolicyRule.direction", ATTR)
160-
.enum_attribute("ReplicaConfig.location", ATTR)
161-
.enum_attribute("AuditLogEventV1.details", ATTR)
162-
.enum_attribute("AuditLogKey.event", ATTR)
163-
.enum_attribute("StorageUsageKey.usage", ATTR)
164-
.enum_attribute("ResolvedDatabaseSpecifier.value", ATTR)
165-
.enum_attribute("CommentKey.object", ATTR)
166-
.enum_attribute("CommentKey.sub_component", ATTR)
167-
.enum_attribute("ResolvedDatabaseSpecifier.spec", ATTR)
168-
.enum_attribute("SchemaSpecifier.spec", ATTR)
169-
.enum_attribute("RoleVars.Entry.val", ATTR)
170-
.enum_attribute("StateUpdateKind.kind", ATTR)
171-
.enum_attribute("ClusterScheduleOptionValue.value", ATTR)
172-
.enum_attribute("ClusterSchedule.value", ATTR)
173-
.enum_attribute("CreateOrDropClusterReplicaReasonV1.reason", ATTR)
174-
.enum_attribute("RefreshDecisionWithReasonV1.decision", ATTR)
175-
.enum_attribute("RefreshDecisionWithReasonV2.decision", ATTR)
176-
// Serialize/deserialize the top-level enum in the persist-backed
177-
// catalog as "internally tagged"[^1] to set up persist pushdown
178-
// statistics for success.
179-
//
180-
// [^1]: https://serde.rs/enum-representations.html#internally-tagged
181-
.enum_attribute("StateUpdateKind.kind", "#[serde(tag = \"kind\")]")
182-
// We derive Arbitrary for all protobuf types for wire compatibility testing.
183-
.message_attribute(".", ARBITRARY_ATTR)
184-
.enum_attribute(".", ARBITRARY_ATTR)
185-
.compile_protos(
186-
&paths,
187-
&[ /*
188-
This is purposefully empty, and we should never
189-
add any includes because we don't want to allow
190-
our protos to have dependencies. This allows us
191-
to ensure our snapshots can't silently change.
192-
*/
193-
] as &[&str],
194-
)?;
195-
196115
Ok(())
197116
}
198117

199118
/// A (hopefully) helpful error message that describes what to do when the hashes differ.
200119
fn error_message(og_hash: String, hash: String, filename: String) -> String {
201-
let title = "Hashes changed for the persisted protobuf files!";
120+
let title = "Hashes changed for the persisted object definition files!";
202121
let body1 = format!(
203-
"If you changed '{filename}' without first making a snapshot, then you need to copy '{filename}' and rename it with a suffix like '_vX.proto'."
122+
"If you changed '{filename}' without first making a snapshot, then you need to copy '{filename}' and rename it with a suffix like '_vX.rs'."
204123
);
205124
let body2 = format!(
206-
"Otherwise you can update the hash for '{filename}' in '{PROTO_HASHES}' to be '{hash}'."
125+
"Otherwise you can update the hash for '{filename}' in '{OBJECTS_HASHES}' to be '{hash}'."
207126
);
208127
let hashes = format!("persisted_hash({og_hash}) != current_hash({hash})\nFile: {filename}");
209128

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
[
2+
{
3+
"name": "objects.rs",
4+
"md5": "26ae04e683a5462c3998d8f9d8d6a0fa"
5+
},
6+
{
7+
"name": "objects_v67.rs",
8+
"md5": "6f9b22cb3147dd2a685fe27b4cd016ee"
9+
},
10+
{
11+
"name": "objects_v68.rs",
12+
"md5": "d218ef6aabce8dc418ffde99d27dec03"
13+
},
14+
{
15+
"name": "objects_v69.rs",
16+
"md5": "55cb125ee8c1348acc1e129f41903931"
17+
},
18+
{
19+
"name": "objects_v70.rs",
20+
"md5": "767b231f11679065126c8d83ae6a763b"
21+
},
22+
{
23+
"name": "objects_v71.rs",
24+
"md5": "767b231f11679065126c8d83ae6a763b"
25+
},
26+
{
27+
"name": "objects_v72.rs",
28+
"md5": "767b231f11679065126c8d83ae6a763b"
29+
},
30+
{
31+
"name": "objects_v73.rs",
32+
"md5": "6e2c445773820d0579ff3d66f1fac7e8"
33+
},
34+
{
35+
"name": "objects_v74.rs",
36+
"md5": "b87d99e30098642f98b0bb1927516baa"
37+
},
38+
{
39+
"name": "objects_v75.rs",
40+
"md5": "67a4a7da3e3f7cb1fd01963d5b118b01"
41+
},
42+
{
43+
"name": "objects_v76.rs",
44+
"md5": "26ae04e683a5462c3998d8f9d8d6a0fa"
45+
},
46+
{
47+
"name": "objects_v77.rs",
48+
"md5": "26ae04e683a5462c3998d8f9d8d6a0fa"
49+
},
50+
{
51+
"name": "objects_v78.rs",
52+
"md5": "26ae04e683a5462c3998d8f9d8d6a0fa"
53+
}
54+
]

0 commit comments

Comments
 (0)