From 50d1bc24d823ce595312a9b7a28348be1475bc18 Mon Sep 17 00:00:00 2001 From: Curtis Anderson Date: Fri, 26 Jun 2026 16:08:00 -0700 Subject: [PATCH 1/2] test(rules): align generate_output_location tests with current args.* contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test file (test_generate_output_location.py) was written when generate_output_location's design accepted orgname/systemname as keyword-only kwargs threaded by the CLI dispatch layer. The function has since been rewritten (LAY-05) to read both from benchmark.args, which is populated upstream by main._main_impl()'s orgname-resolution gate (reads orgname.yaml written by `mlpstorage init`) and argparse from --systemname. The 25 tests in the file kept passing values as Python kwargs that the function silently ignored, so every test hit the "orgname is empty" ConfigurationError before reaching the behavior under test. Changes: mlpstorage_py/rules/utils.py - Remove unused orgname=/systemname= keyword-only parameters from the function signature. The body already reads args.orgname/args.systemname exclusively; keeping the kwargs in the signature misled the test author (the failure cascade for this whole file). - Add parameter="orgname" / parameter="systemname" to the two existing ConfigurationError raises so the dispatch layer can surface a typed user-facing error (the ConfigurationError class already exposes the attribute; the raise sites just weren't filling it in). - Fix the systemname suggestion env-var name from MLPERF_SYSTEMNAME to MLPSTORAGE_SYSTEMNAME to match the canonical MLPSTORAGE_SYSTEMNAME_ENVVAR constant exported by the same module. mlpstorage_py/benchmarks/base.py - Simplify the one production caller that was threading args._validated_orgname / args._validated_systemname through as kwargs. Those kwargs are gone now and the comment about "legacy / whatif modes skip the orgname/systemname requirement" no longer matches production behavior (whatif uses the same canonical shape as closed/open). The validation helper still stashes _validated_* on args; only the thread-through-as-kwargs step is removed. mlpstorage_py/tests/test_generate_output_location.py - Rewrite _benchmark() helper to put orgname/systemname on args by default (matching the production contract), with overrides for omit (None) and empty-string ("") cases. - All 19 happy-path / safety-guard tests drop the inline `orgname=`/ `systemname=` kwargs. - Drop test_missing_mode_attribute_keeps_legacy_shape and rename test_whatif_has_no_closed_open_prefix to test_whatif_uses_canonical_shape — production no longer special-cases either mode-missing or whatif. - The missing-orgname/empty-orgname/missing-systemname tests now omit the attribute from args (the realistic failure mode at runtime) and assert the typed `parameter` field that the production code now sets. Result: 25 previously-failing tests in this file → 32 passing (file gained one test from a parametrize-out collapse). --- mlpstorage_py/benchmarks/base.py | 18 +- mlpstorage_py/rules/utils.py | 13 +- .../tests/test_generate_output_location.py | 185 +++++++++--------- 3 files changed, 100 insertions(+), 116 deletions(-) diff --git a/mlpstorage_py/benchmarks/base.py b/mlpstorage_py/benchmarks/base.py index ef01260c..ed1833fa 100755 --- a/mlpstorage_py/benchmarks/base.py +++ b/mlpstorage_py/benchmarks/base.py @@ -861,18 +861,12 @@ def generate_output_location(self) -> str: """ if not self.BENCHMARK_TYPE: raise ValueError('No benchmark specified. Unable to generate output location') - # Thread the validated orgname/systemname stashed by - # capture_or_verify_code_image (code_image.py: args._validated_orgname / - # args._validated_systemname) so generate_output_location's - # OPEN/CLOSED ConfigurationError path doesn't fire. For legacy / - # whatif modes these attrs are absent (getattr default None) and the - # function's mode check skips the orgname/systemname requirement. - return generate_output_location( - self, - self.run_datetime, - orgname=getattr(self.args, "_validated_orgname", None), - systemname=getattr(self.args, "_validated_systemname", None), - ) + # orgname and systemname are read from self.args by the function: + # args.orgname is pinned upstream by main._main_impl()'s + # orgname-resolution gate (sourced from orgname.yaml written by + # `mlpstorage init`); args.systemname is set by argparse from + # --systemname / MLPSTORAGE_SYSTEMNAME. + return generate_output_location(self, self.run_datetime) _COLLISION_BUMP_BUDGET = DEFAULT_COLLISION_BUMP_BUDGET diff --git a/mlpstorage_py/rules/utils.py b/mlpstorage_py/rules/utils.py index 7139a0ad..3af09e50 100755 --- a/mlpstorage_py/rules/utils.py +++ b/mlpstorage_py/rules/utils.py @@ -168,9 +168,6 @@ def calculate_training_data_size(args, cluster_information, dataset_params, read def generate_output_location( benchmark, datetime_str=None, - *, - orgname: Optional[str] = None, - systemname: Optional[str] = None, **kwargs, ) -> str: """ @@ -193,9 +190,9 @@ def generate_output_location( This function is PURE with respect to args.{mode, orgname, systemname} — it does NOT resolve orgname from the sentinel or read MLPERF_SYSTEMNAME - here. Per RESEARCH.md Pitfall 1, orgname resolution lives upstream in - main._main_impl()'s sentinel-resolution gate (Slice 4); the universal - --systemname plumbing (Slice 3 / this plan) populates args.systemname. + here. orgname resolution lives upstream in main._main_impl()'s + orgname-resolution gate (reads `orgname.yaml` written by + `mlpstorage init`); --systemname plumbing populates args.systemname. Every path segment appended to results_dir is validated via _check_safe_path_component() to block path-traversal ('../') and @@ -242,6 +239,7 @@ def generate_output_location( raise ConfigurationError( "Cannot generate output location: orgname is empty " "(sentinel not resolved).", + parameter="orgname", suggestion=( "Internal error: the upstream orgname-resolution gate in " "main._main_impl() must populate args.orgname before " @@ -253,9 +251,10 @@ def generate_output_location( if not systemname: raise ConfigurationError( "Cannot generate output location: --systemname is empty.", + parameter="systemname", suggestion=( "Pass --systemname on the CLI or set the " - "MLPERF_SYSTEMNAME environment variable before re-running." + "MLPSTORAGE_SYSTEMNAME environment variable before re-running." ), code=ErrorCode.CONFIG_MISSING_REQUIRED, ) diff --git a/mlpstorage_py/tests/test_generate_output_location.py b/mlpstorage_py/tests/test_generate_output_location.py index 61d158b0..7c2b1997 100644 --- a/mlpstorage_py/tests/test_generate_output_location.py +++ b/mlpstorage_py/tests/test_generate_output_location.py @@ -1,22 +1,28 @@ """Unit tests for ``generate_output_location`` and the orgname/systemname -keyword-only contract (Plan 02-01, Task 2). +args contract. -Per 02-CONTEXT.md D-03 the runtime output path is restructured so results -land under ``{results_dir}/{closed|open}//...`` (with an additional -``results//`` segment for OPEN). Per the Gemini MEDIUM -trust-contract review (02-REVIEWS.md), ``generate_output_location`` does -NOT read environment variables — it accepts ``orgname`` and ``systemname`` -as keyword-only parameters threaded by the CLI dispatch layer (Plan 02-02). +The runtime output path is: + ///results////// + +``generate_output_location`` reads orgname and systemname from ``benchmark.args`` +(NOT from kwargs and NOT from env vars). At runtime: + + * ``args.orgname`` is pinned by ``main._main_impl()``'s orgname-resolution + gate, which reads ``orgname.yaml`` written by ``mlpstorage init``. + * ``args.systemname`` is populated by argparse from ``--systemname`` (with + an ``MLPSTORAGE_SYSTEMNAME`` env-var fallback). This test file exercises: - * the new path prefix for CLOSED and OPEN, - * the back-compat shape for ``whatif`` and any other non-{closed,open} mode, - * the typed ``ConfigurationError`` raised when the kwargs are missing for - closed/open modes (NOT a bare ``KeyError`` from a hidden env read), + * the path prefix for CLOSED, OPEN, and ``whatif`` modes, + * the typed ``ConfigurationError`` raised when ``args.orgname`` or + ``args.systemname`` is missing or empty (with ``parameter`` field set + for the dispatch layer to surface), + * the assertion that the function does NOT consult ``os.environ`` for + ``MLPSTORAGE_*`` — that is the CLI dispatch layer's job. * the module-level env-var-name constants ``MLPSTORAGE_ORGNAME_ENVVAR`` / ``MLPSTORAGE_SYSTEMNAME_ENVVAR`` - exported for Plan 02-02's helper to consume as a single source of truth. + exported as the single source of truth for the dispatch helper. """ import types @@ -29,7 +35,8 @@ def _benchmark(mode: str, model: str = "unet3d", command: str = "datagen", benchmark_type=BENCHMARK_TYPES.training, results_dir: str = "/tmp/r", - index_type: str | None = None, vdb_engine: str | None = None): + index_type: str | None = None, vdb_engine: str | None = None, + orgname: str | None = "acme", systemname: str | None = "sys-1"): """Build a minimal benchmark stand-in with the attributes ``generate_output_location`` reads. @@ -39,6 +46,11 @@ def _benchmark(mode: str, model: str = "unet3d", command: str = "datagen", directory uses the UPPERCASE token (DISKANN / HNSW / AISAQ), matching ``args.index_type`` and ``summary.json.index_type``. ``vdb_engine`` adds the engine segment between and . + + Pass ``orgname=None`` or ``systemname=None`` to omit the attribute + entirely (simulating an args Namespace built before the upstream + orgname-resolution gate populates it). Pass an empty string to simulate + a present-but-empty value. """ args = types.SimpleNamespace( mode=mode, @@ -46,6 +58,10 @@ def _benchmark(mode: str, model: str = "unet3d", command: str = "datagen", model=model, command=command, ) + if orgname is not None: + args.orgname = orgname + if systemname is not None: + args.systemname = systemname if index_type is not None: args.index_type = index_type if vdb_engine is not None: @@ -74,19 +90,17 @@ def test_envvar_constants_exported(): # --------------------------------------------------------------------------- def test_closed_training_prefix(): - """CLOSED training/// sits under - {results_dir}/closed//.""" + """CLOSED training path: {results_dir}/closed//results//training////.""" from mlpstorage_py.rules.utils import generate_output_location b = _benchmark(mode="closed") - path = generate_output_location(b, datetime_str="X", orgname="acme") - assert path.startswith("/tmp/r/closed/acme/training/unet3d/datagen/"), path + path = generate_output_location(b, datetime_str="X") + assert path.startswith("/tmp/r/closed/acme/results/sys-1/training/unet3d/datagen/"), path assert path.endswith("/X"), path def test_closed_checkpointing_prefix(): - """CLOSED checkpointing// sits under - {results_dir}/closed//.""" + """CLOSED checkpointing path omits the segment per LAY-05.""" from mlpstorage_py.rules.utils import generate_output_location b = _benchmark( @@ -95,8 +109,8 @@ def test_closed_checkpointing_prefix(): command="run", benchmark_type=BENCHMARK_TYPES.checkpointing, ) - path = generate_output_location(b, datetime_str="X", orgname="acme") - assert path.startswith("/tmp/r/closed/acme/checkpointing/llama3-8b/"), path + path = generate_output_location(b, datetime_str="X") + assert path.startswith("/tmp/r/closed/acme/results/sys-1/checkpointing/llama3-8b/"), path assert path.endswith("/X"), path @@ -105,14 +119,12 @@ def test_closed_checkpointing_prefix(): # --------------------------------------------------------------------------- def test_open_training_prefix(): - """OPEN training prepends both closed/open-segment and - results// before the per-type tail.""" + """OPEN training has the same shape as CLOSED — both modes thread through + the orgname/results/systemname prefix.""" from mlpstorage_py.rules.utils import generate_output_location b = _benchmark(mode="open") - path = generate_output_location( - b, datetime_str="X", orgname="acme", systemname="sys-1", - ) + path = generate_output_location(b, datetime_str="X") assert path.startswith( "/tmp/r/open/acme/results/sys-1/training/unet3d/datagen/" ), path @@ -135,9 +147,7 @@ def test_open_vector_database_prefix_includes_index_type(): index_type="DISKANN", vdb_engine="milvus", ) - path = generate_output_location( - b, datetime_str="X", orgname="acme", systemname="sys-1", - ) + path = generate_output_location(b, datetime_str="X") assert path.startswith( "/tmp/r/open/acme/results/sys-1/vector_database/milvus/DISKANN/run/" ), path @@ -158,95 +168,79 @@ def test_closed_vector_database_prefix_includes_index_type(): index_type="AISAQ", vdb_engine="milvus", ) - path = generate_output_location(b, datetime_str="X", orgname="acme") + path = generate_output_location(b, datetime_str="X") assert path.startswith( - "/tmp/r/closed/acme/vector_database/milvus/AISAQ/run/" + "/tmp/r/closed/acme/results/sys-1/vector_database/milvus/AISAQ/run/" ), path # --------------------------------------------------------------------------- -# Back-compat: whatif (and any non-{closed,open} mode) — unchanged shape +# whatif mode — uniform canonical shape (no special-case legacy) # --------------------------------------------------------------------------- -def test_whatif_has_no_closed_open_prefix(): - """Mode=whatif keeps the legacy shape — no closed/open segment, - no orgname/systemname.""" +def test_whatif_uses_canonical_shape(): + """`whatif` flows through the same orgname-resolution gate as `closed`/`open`, + so the path has the same canonical shape — only the leading mode segment + differs.""" from mlpstorage_py.rules.utils import generate_output_location b = _benchmark(mode="whatif") path = generate_output_location(b, datetime_str="X") - # No prefix segments appear. - assert "/closed/" not in path, path - assert "/open/" not in path, path - assert "/acme/" not in path, path - # Legacy shape preserved. - assert path.startswith("/tmp/r/training/unet3d/datagen/"), path + assert path.startswith( + "/tmp/r/whatif/acme/results/sys-1/training/unet3d/datagen/" + ), path assert path.endswith("/X"), path -def test_missing_mode_attribute_keeps_legacy_shape(): - """If args.mode is missing entirely (older callers), the function - returns the legacy shape and does not raise.""" - from mlpstorage_py.rules.utils import generate_output_location - - args = types.SimpleNamespace(results_dir="/tmp/r", model="unet3d", command="datagen") - b = types.SimpleNamespace(args=args, BENCHMARK_TYPE=BENCHMARK_TYPES.training) - path = generate_output_location(b, datetime_str="X") - assert path == "/tmp/r/training/unet3d/datagen/X" - - # --------------------------------------------------------------------------- # Typed-error trust contract: missing kwargs for closed/open modes # --------------------------------------------------------------------------- -def test_closed_missing_orgname_raises_configuration_error(): - """CLOSED without orgname raises a typed ConfigurationError that - identifies the missing parameter.""" +def test_missing_orgname_raises_configuration_error(): + """No args.orgname raises a typed ConfigurationError. The dispatch layer + surfaces it as actionable user-facing text via `parameter` + suggestion.""" from mlpstorage_py.rules.utils import generate_output_location - b = _benchmark(mode="closed") + b = _benchmark(mode="closed", orgname=None) with pytest.raises(ConfigurationError) as exc_info: generate_output_location(b, datetime_str="X") - # The CLI dispatch layer can recover the parameter name to surface in - # its own user-facing error. assert exc_info.value.parameter == "orgname" - # And the suggestion text references the env-var name constant so the - # user sees actionable guidance. - assert "MLPSTORAGE_ORGNAME" in str(exc_info.value) + # Suggestion text points the user at the right remediation. + assert "mlpstorage init" in str(exc_info.value) -def test_closed_empty_orgname_raises_configuration_error(): - """An empty-string orgname is treated as missing (avoids producing - a path with an empty path segment).""" +def test_empty_orgname_raises_configuration_error(): + """An empty-string args.orgname is treated as missing (avoids producing + a path with an empty segment).""" from mlpstorage_py.rules.utils import generate_output_location - b = _benchmark(mode="closed") + b = _benchmark(mode="closed", orgname="") with pytest.raises(ConfigurationError) as exc_info: - generate_output_location(b, datetime_str="X", orgname="") + generate_output_location(b, datetime_str="X") assert exc_info.value.parameter == "orgname" -def test_open_missing_systemname_raises_configuration_error(): - """OPEN with orgname but no systemname raises a typed - ConfigurationError that identifies systemname as the missing - parameter.""" +def test_missing_systemname_raises_configuration_error(): + """With orgname present but systemname missing, the function raises a + typed ConfigurationError identifying systemname as the missing parameter.""" from mlpstorage_py.rules.utils import generate_output_location - b = _benchmark(mode="open") + b = _benchmark(mode="open", systemname=None) with pytest.raises(ConfigurationError) as exc_info: - generate_output_location(b, datetime_str="X", orgname="acme") + generate_output_location(b, datetime_str="X") assert exc_info.value.parameter == "systemname" assert "MLPSTORAGE_SYSTEMNAME" in str(exc_info.value) -def test_open_missing_orgname_raises_configuration_error(): - """OPEN missing orgname is also a typed error — orgname is reported - first because it is the outer segment in the path.""" +def test_orgname_reported_before_systemname(): + """When BOTH are missing, orgname is reported first (it is the outer + segment in the path so the error name surfaces the first thing the + user needs to set).""" from mlpstorage_py.rules.utils import generate_output_location - b = _benchmark(mode="open") + b = _benchmark(mode="open", orgname=None, systemname=None) with pytest.raises(ConfigurationError) as exc_info: - generate_output_location(b, datetime_str="X", systemname="sys-1") + generate_output_location(b, datetime_str="X") assert exc_info.value.parameter == "orgname" @@ -256,19 +250,19 @@ def test_open_missing_orgname_raises_configuration_error(): def test_function_does_not_read_mlpstorage_env_vars(monkeypatch): """The function MUST NOT touch os.environ for MLPSTORAGE_* — that is the - CLI dispatch layer's job. We assert by patching the values to something - that would produce a wrong path if the function read them; the function's - explicit kwargs must win.""" + CLI dispatch layer's job. We assert by setting env vars to wrong values + and confirming the path uses args.orgname/args.systemname instead.""" monkeypatch.setenv("MLPSTORAGE_ORGNAME", "ENV-ORGNAME-WRONG") monkeypatch.setenv("MLPSTORAGE_SYSTEMNAME", "ENV-SYSTEMNAME-WRONG") from mlpstorage_py.rules.utils import generate_output_location - b = _benchmark(mode="closed") - path = generate_output_location(b, datetime_str="X", orgname="acme") - # Kwargs win: 'acme' appears, the env-var value does NOT. - assert "/closed/acme/" in path, path + b = _benchmark(mode="closed", orgname="acme", systemname="sys-1") + path = generate_output_location(b, datetime_str="X") + # args wins; the env-var value never appears. + assert "/closed/acme/results/sys-1/" in path, path assert "ENV-ORGNAME-WRONG" not in path, path + assert "ENV-SYSTEMNAME-WRONG" not in path, path # --------------------------------------------------------------------------- @@ -285,28 +279,25 @@ def test_function_does_not_read_mlpstorage_env_vars(monkeypatch): "acme/sub", # embedded separator "acme\x00", # null byte "acme name", # whitespace - "", # empty ]) def test_orgname_rejects_unsafe_path_components(bad_orgname): - """orgname comes from MLPSTORAGE_ORGNAME (user-controlled env). The path - generator must reject anything that isn't a single safe segment, even if - the CLI dispatch layer somehow forwards it (defense in depth).""" + """args.orgname is pinned upstream from orgname.yaml — but a programmatic + caller (test fixture, future internal API) that bypasses the sentinel + schema validation must still hit the path-traversal guard here.""" from mlpstorage_py.rules.utils import generate_output_location - b = _benchmark(mode="closed") + b = _benchmark(mode="closed", orgname=bad_orgname) with pytest.raises((ValueError, ConfigurationError)): - generate_output_location(b, datetime_str="X", orgname=bad_orgname) + generate_output_location(b, datetime_str="X") @pytest.mark.parametrize("bad_systemname", ["../etc", "..", "/absolute", "sys/sub"]) def test_systemname_rejects_unsafe_path_components(bad_systemname): from mlpstorage_py.rules.utils import generate_output_location - b = _benchmark(mode="open") + b = _benchmark(mode="open", systemname=bad_systemname) with pytest.raises(ValueError): - generate_output_location( - b, datetime_str="X", orgname="acme", systemname=bad_systemname, - ) + generate_output_location(b, datetime_str="X") @pytest.mark.parametrize("bad_index", ["../etc", "..", "/absolute", "DISKANN/sub"]) @@ -324,7 +315,7 @@ def test_vdb_index_rejects_unsafe_path_components(bad_index): vdb_engine="milvus", ) with pytest.raises(ValueError): - generate_output_location(b, datetime_str="X", orgname="acme") + generate_output_location(b, datetime_str="X") @pytest.mark.parametrize("bad_value", ["../bad", "..", "/abs", "a/b"]) @@ -333,7 +324,7 @@ def test_model_rejects_unsafe_path_components(bad_value): b = _benchmark(mode="closed", model=bad_value) with pytest.raises(ValueError): - generate_output_location(b, datetime_str="X", orgname="acme") + generate_output_location(b, datetime_str="X") def test_datetime_str_rejects_unsafe_path_components(): @@ -341,4 +332,4 @@ def test_datetime_str_rejects_unsafe_path_components(): b = _benchmark(mode="closed") with pytest.raises(ValueError): - generate_output_location(b, datetime_str="../escape", orgname="acme") + generate_output_location(b, datetime_str="../escape") From 35954a023c496b75570deb2b8bd735fccc7239bd Mon Sep 17 00:00:00 2001 From: Curtis Anderson Date: Fri, 26 Jun 2026 16:16:47 -0700 Subject: [PATCH 2/2] test(base): drop dead orgname/systemname kwargs from generate_output_location call assertion PR #557 (this branch) removed the unused `orgname=`/`systemname=` keyword-only params from `generate_output_location`'s signature and updated the production call site in `mlpstorage_py/benchmarks/base.py` to stop threading them through. The primary tests in `mlpstorage_py/tests/test_generate_output_location.py` were updated to match, but a sibling test in `tests/unit/test_benchmarks_base.py` (a separate test tree) still asserted the old kwarg-bearing call shape via `assert_called_once_with(benchmark, datetime_str, orgname=None, systemname=None)`, which `mock.assert_called_once_with` treats as a literal equality check on the call signature. After the production code dropped the kwargs the mock saw `generate_output_location(benchmark, datetime_str)` and the assertion failed with "expected call not found". The fix is to drop the dead kwargs from the assertion; the behavior under test is just "the production wrapper calls the helper with the benchmark and a datetime string." --- tests/unit/test_benchmarks_base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit/test_benchmarks_base.py b/tests/unit/test_benchmarks_base.py index 98b1f794..7ac1a97c 100755 --- a/tests/unit/test_benchmarks_base.py +++ b/tests/unit/test_benchmarks_base.py @@ -577,9 +577,7 @@ def test_calls_generate_output_location(self, tmp_path): result = benchmark.generate_output_location() - mock_gen.assert_called_once_with( - benchmark, "20250115_120000", orgname=None, systemname=None - ) + mock_gen.assert_called_once_with(benchmark, "20250115_120000") class TestBenchmarkIntegration: