diff --git a/src/linkml_map/cli/cli.py b/src/linkml_map/cli/cli.py index 69081559..7c01c600 100644 --- a/src/linkml_map/cli/cli.py +++ b/src/linkml_map/cli/cli.py @@ -364,7 +364,7 @@ def compile( tr = ObjectTransformer() tr.source_schemaview = sv tr.load_transformer_specification(transformer_specification) - result = compiler.compile(tr.specification) + result = compiler.compile(tr.derived_specification) # dump as-is, no encoding dump_output(result.serialization, None, output) diff --git a/src/linkml_map/compiler/compiler.py b/src/linkml_map/compiler/compiler.py index a2630a98..7ad1c12a 100644 --- a/src/linkml_map/compiler/compiler.py +++ b/src/linkml_map/compiler/compiler.py @@ -64,11 +64,13 @@ class Compiler(ABC): def compile(self, specification: TransformationSpecification) -> CompiledSpecification: """ - Transform source object into an instance of the target class. + Compile a resolved transformation specification into an alternative representation. - :param specification: - :return: + :param specification: A fully resolved specification (e.g. from + ``Transformer.derived_specification``). Must not be ``None``. + :return: The compiled specification. """ + assert specification is not None, "compile() requires a resolved specification" s = self._compile_header(specification) for chunk in self._compile_iterator(specification): s += chunk diff --git a/src/linkml_map/compiler/python_compiler.py b/src/linkml_map/compiler/python_compiler.py index 7fa57448..28f2c3a4 100644 --- a/src/linkml_map/compiler/python_compiler.py +++ b/src/linkml_map/compiler/python_compiler.py @@ -1,5 +1,4 @@ from collections.abc import Iterator -from copy import deepcopy from dataclasses import dataclass from jinja2 import Template @@ -9,7 +8,6 @@ ClassDerivation, TransformationSpecification, ) -from linkml_map.inference.inference import induce_missing_values CD_TEMPLATE = """ {% macro gen_slot_derivation_value(sd, var) -%} @@ -88,8 +86,6 @@ def _compile_header(self, specification: TransformationSpecification) -> str: return s def _compile_iterator(self, specification: TransformationSpecification) -> Iterator[str]: - specification = deepcopy(specification) - induce_missing_values(specification, self.source_schemaview) for cd in specification.class_derivations: yield from self._compiled_class_derivations_iter(cd) diff --git a/src/linkml_map/transformer/transformer.py b/src/linkml_map/transformer/transformer.py index 04cf07e5..310c8bd8 100644 --- a/src/linkml_map/transformer/transformer.py +++ b/src/linkml_map/transformer/transformer.py @@ -41,7 +41,21 @@ class Transformer(ABC): an instance of a source class, making use of a specification. This is an abstract class. Different implementations will - subclass this + subclass this. + + Specification normalization has two phases: + + 1. **Load-time normalization** (``_normalize_spec_dict``): Structural fixes + applied to a raw dict before Pydantic instantiation — YAML quirk handling, + ``$ref`` expansion, dict-to-list conversion. Does not require a source schema. + All entry points (``load_transformer_specification``, + ``create_transformer_specification``, ``Session``, ``loaders``) go through + this single method. + + 2. **Schema-bind-time induction** (``derived_specification``): Semantic defaults + inferred from the source schema — ``populated_from``, ``range``, foreign-key + resolution. Runs lazily on first access to ``derived_specification`` and + requires ``source_schemaview`` to be set. """ specification: TransformationSpecification = None @@ -218,6 +232,16 @@ def _apply_source_schema_patches(self) -> None: @property def derived_specification(self) -> TransformationSpecification | None: + """Return the specification with schema-inferred defaults filled in. + + Creates a deep copy of ``self.specification``, applies any source schema + patches, then calls ``induce_missing_values`` to fill in ``populated_from``, + ``range``, and other fields that require knowledge of the source schema. + The result is cached for subsequent access. + + This is the second phase of normalization — see the class docstring for + the full two-phase pipeline. + """ if self._derived_specification is None: if self.specification is None: return None diff --git a/tests/input/examples/personinfo_basic/output/personinfo_compiled.md b/tests/input/examples/personinfo_basic/output/personinfo_compiled.md index d368f3a5..0a0b1d93 100644 --- a/tests/input/examples/personinfo_basic/output/personinfo_compiled.md +++ b/tests/input/examples/personinfo_basic/output/personinfo_compiled.md @@ -7,9 +7,9 @@ | Target | Target Range | Source | Source Range | Info | | ------ | ------ | ---- | ---- | ---- | -| agents | | None | persons | . | . | +| agents | | Agent | persons | . | . | -### Entity `<-` None +### Entity `<-` Entity | Target | Target Range | Source | Source Range | Info | | ------ | ------ | ---- | ---- | ---- | @@ -18,23 +18,23 @@ | Target | Target Range | Source | Source Range | Info | | ------ | ------ | ---- | ---- | ---- | -| id | | None | None | . | . | +| id | | None | id | . | . | | label | | None | name | . | . | | age | | None | [expression] | . | . | -| primary_email | | None | None | . | . | +| primary_email | | None | primary_email | . | . | | secondary_email | | None | [expression] | . | . | | gender | | None | [expression] | . | . | | driving_since | | None | [expression] | . | . | | first_known_event | | None | [expression] | . | . | | death_date | | None | [expression] | . | . | -| current_address | | None | None | . | . | -| has_familial_relationships | | None | has_familial_relationships | . | . | +| current_address | | Address | current_address | . | . | +| has_familial_relationships | | FamilialRelationship | has_familial_relationships | . | . | -### Job `<-` None +### Job `<-` Job | Target | Target Range | Source | Source Range | Info | | ------ | ------ | ---- | ---- | ---- | -| type | | None | None | . | . | +| type | | None | type | . | . | | current | | None | [expression] | . | . | ### Address `<-` Address @@ -42,17 +42,17 @@ | Target | Target Range | Source | Source Range | Info | | ------ | ------ | ---- | ---- | ---- | | address_of | | None | [expression] | . | . | -| street | | None | None | . | . | -| city | | None | None | . | . | +| street | | None | street | . | . | +| city | | None | city | . | . | ### FamilialRelationship `<-` FamilialRelationship | Target | Target Range | Source | Source Range | Info | | ------ | ------ | ---- | ---- | ---- | -| type | | None | None | . | . | -| related_to | | None | None | . | . | +| type | | None | type | . | . | +| related_to | | Agent | related_to | . | . | -### SequenceFeature `<-` None +### SequenceFeature `<-` SequenceFeature | Target | Target Range | Source | Source Range | Info | | ------ | ------ | ---- | ---- | ---- | @@ -62,9 +62,9 @@ | Target | Target Range | Source | Source Range | Info | | ------ | ------ | ---- | ---- | ---- | -| id | | None | None | . | . | -| creator | | None | None | . | . | -| license | | None | None | . | . | +| id | | None | id | . | . | +| creator | | None | creator | . | . | +| license | | None | license | . | . | | subject_id | | None | [expression] | . | . | | subject_name | | None | [expression] | . | . | | object_id | | None | [expression] | . | . | diff --git a/tests/test_compiler/test_python_compiler.py b/tests/test_compiler/test_python_compiler.py index e059c644..607e672a 100644 --- a/tests/test_compiler/test_python_compiler.py +++ b/tests/test_compiler/test_python_compiler.py @@ -8,7 +8,7 @@ import tests.input.examples.personinfo_basic.model.personinfo_model as src from linkml_map.compiler.python_compiler import PythonCompiler -from linkml_map.utils.loaders import load_specification +from linkml_map.transformer.object_transformer import ObjectTransformer from tests import SCHEMA1, SPECIFICATION @@ -24,8 +24,9 @@ def compiler() -> PythonCompiler: def test_compile(compiler: PythonCompiler) -> None: """Basic test of Python Compiler functionality.""" - spec = load_specification(SPECIFICATION) - pycode = compiler.compile(spec) + tr = ObjectTransformer(source_schemaview=SchemaView(SCHEMA1)) + tr.load_transformer_specification(SPECIFICATION) + pycode = compiler.compile(tr.derived_specification) # TODO: include imports so that code compiles print(pycode.serialization) mod = compile_python(pycode.serialization) diff --git a/tests/test_compliance/test_compliance_suite.py b/tests/test_compliance/test_compliance_suite.py index aec0a888..a9bb1811 100644 --- a/tests/test_compliance/test_compliance_suite.py +++ b/tests/test_compliance/test_compliance_suite.py @@ -154,15 +154,6 @@ def map_object( :param raises_error: if not None, the expected error to be raised during transformation :return: state object including transformed object plus intermediate objects """ - pc = PythonCompiler(source_schemaview=source_sv) - python_code = pc.compile(spec) - logger.debug(f"Python Code: {python_code}\n\n") - # TODO: enable this - # print("Python Code (Generated)\n\n") - # print("```python") - # print(python_code.serialization) - # print("```\n") - # mod = python_code.module schema_mapper = SchemaMapper(source_schemaview=source_sv) target_schema = schema_mapper.derive_schema(spec) target_sv = SchemaView(yaml_dumper.dumps(target_schema)) @@ -170,6 +161,9 @@ def map_object( mapper = ObjectTransformer(source_schemaview=source_sv, specification=spec) else: mapper = ObjectTransformer(specification=spec) + pc = PythonCompiler(source_schemaview=source_sv) + python_code = pc.compile(mapper.derived_specification or spec) + logger.debug(f"Python Code: {python_code}\n\n") if index: mapper.index(source_object, target=source_root) if raises_error: