Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions nam/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import abc as _abc
import logging as _logging
import math as _math
import wave as _wave
from collections import namedtuple as _namedtuple
from copy import deepcopy as _deepcopy
Expand Down Expand Up @@ -361,10 +362,30 @@ def scale(self) -> float:
def _apply_wavenet(self, model_dict: dict):
model_dict["config"]["head_scale"] *= self._scale
model_dict["weights"][-1] *= self._scale
self._adjust_metadata_loudness(model_dict)

def _apply_slimmable_container(self, model_dict: dict):
for submodel_config in model_dict["config"]["submodels"]:
self.apply(submodel_config["model"])
self._adjust_metadata_loudness(model_dict)

def _adjust_metadata_loudness(self, model_dict: dict) -> None:
"""
Shift `metadata.loudness` to describe the compensated model that
this hook just wrote into `config.head_scale`.

WaveNet (no top-level head) and SlimmableContainer outputs are
linear in `head_scale`, so the dB adjustment is exact:

loudness_new = loudness_old + 20 * log10(self._scale)

`metadata.gain` is a normalized compression heuristic that is
invariant under uniform output scaling, so it is not adjusted.
"""
metadata = model_dict.get("metadata")
if not isinstance(metadata, dict) or "loudness" not in metadata:
return
metadata["loudness"] += 20.0 * _math.log10(self._scale)

def __init__(
self,
Expand Down
84 changes: 84 additions & 0 deletions tests/test_nam/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,90 @@ def _t_apply_delay_int(self, n: int, delay: int):
return x_out, y_out


class TestScaleOutputHookLoudnessCompensation:
"""
`_ScaleOutputHook` undoes a dataset y_scale on export by scaling
`head_scale` (and the duplicated `weights[-1]`). `metadata.loudness`
must move along with it so it describes the compensated model that the
plugin actually loads — not the pre-compensation snapshot taken inside
`_get_export_dict()`.

Output of WaveNet (no top-level head) and SlimmableContainer is linear
in `head_scale`, so the dB adjustment is exact:

loudness_new = loudness_old + 20 * log10(scale)
"""

@staticmethod
def _hook(scale: float) -> data.Dataset._ScaleOutputHook:
return data.Dataset._ScaleOutputHook(scale=scale)

def test_wavenet_loudness_shifts_in_db_by_scale(self):
scale = 2.0
model_dict = {
"architecture": "WaveNet",
"config": {"head_scale": 0.01},
"metadata": {"loudness": -20.0, "gain": 0.5},
"weights": [0.01],
}
self._hook(scale).apply(model_dict)
assert model_dict["metadata"]["loudness"] == pytest.approx(
-20.0 + 20.0 * math.log10(scale)
)
# gain is invariant under uniform output scaling
assert model_dict["metadata"]["gain"] == 0.5

def test_slimmable_container_shifts_container_and_submodels(self):
scale = 0.5
container = {
"architecture": "SlimmableContainer",
"metadata": {"loudness": -18.0, "gain": 0.4},

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eek, these aren't supposed to be the averages of the values in the submodels 😅

It's just a test, but I think I'd prefer for these to track the values of the highest-quality submodel. If that's not happening already, then that's a bug that should also be squashed.

Ideally, there'd be validation (i.e. Pydantic) to enforce this.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not the end of the world if it's already happening elsewhere though--I have to admit I'm not sure I know the answer off the top of my head.

"config": {
"submodels": [
{
"max_value": 0.5,
"model": {
"architecture": "WaveNet",
"config": {"head_scale": 0.01},
"metadata": {"loudness": -19.0, "gain": 0.3},
"weights": [0.01],
},
},
{
"max_value": 1.0,
"model": {
"architecture": "WaveNet",
"config": {"head_scale": 0.01},
"metadata": {"loudness": -17.0, "gain": 0.5},
"weights": [0.01],
},
},
]
},
}
self._hook(scale).apply(container)
offset = 20.0 * math.log10(scale)
assert container["metadata"]["loudness"] == pytest.approx(-18.0 + offset)
assert container["config"]["submodels"][0]["model"]["metadata"][
"loudness"
] == pytest.approx(-19.0 + offset)
assert container["config"]["submodels"][1]["model"]["metadata"][
"loudness"
] == pytest.approx(-17.0 + offset)
assert container["metadata"]["gain"] == 0.4

def test_no_op_when_loudness_metadata_absent(self):
"""Hook is robust when called on a dict without loudness metadata."""
model_dict = {
"architecture": "WaveNet",
"config": {"head_scale": 0.01},
"weights": [0.01],
}
self._hook(2.0).apply(model_dict)
assert "metadata" not in model_dict
assert model_dict["config"]["head_scale"] == pytest.approx(0.02)


class TestWav(object):
tolerance = 1e-6

Expand Down
39 changes: 39 additions & 0 deletions tests/test_nam/test_models/test_packed_wavenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,45 @@ def test_packed_export_writes_slimmable_container(tmp_path):
_assert_container_contains_two_wavenets(container)


def test_packed_export_refreshes_loudness_after_head_scale_compensation(tmp_path):
"""
When an export hook scales `head_scale` (e.g. the dataset normalization
handshake), the exported `metadata.loudness` must describe the *compensated*
model that will be loaded at inference, not the pre-compensation snapshot.

Output of WaveNet (no top-level head) and SlimmableContainer is linear in
`head_scale`, so loudness moves by `20 * log10(scale)` exactly.
"""
import math as _math

model = _PackedWaveNet.init_from_config({**_packed_config(), "sample_rate": 48_000})
pre_container = model.export_container(tmp_path)
pre_container_loudness = pre_container["metadata"]["loudness"]
pre_submodel_loudnesses = [
entry["model"]["metadata"]["loudness"]
for entry in pre_container["config"]["submodels"]
]

scale = 2.0
model.export_model_dict_post_hooks.append(_data.Dataset._ScaleOutputHook(scale=scale))
post_container = model.export_container(tmp_path)

offset_db = 20.0 * _math.log10(scale)
assert post_container["metadata"]["loudness"] == _pytest.approx(
pre_container_loudness + offset_db, abs=1e-3
)
for entry, pre_loudness in zip(
post_container["config"]["submodels"], pre_submodel_loudnesses
):
assert entry["model"]["metadata"]["loudness"] == _pytest.approx(
pre_loudness + offset_db, abs=1e-3
)
# head_scale was actually compensated on disk
assert entry["model"]["config"]["head_scale"] == _pytest.approx(
0.25 * scale

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where's the 0.25 come from? Line 26 I think?

Chan you make this a const in this file _DEFAULT_HEAD_SCALE or pass it as an argument to packed_config so that it's not a literal?

)


def test_packed_export_applies_model_dict_post_hooks(tmp_path):
model = _PackedWaveNet.init_from_config({**_packed_config(), "sample_rate": 48_000})
model.export_model_dict_post_hooks.append(_data.Dataset._ScaleOutputHook(scale=2.0))
Expand Down
Loading