diff --git a/php-transformer/src/HtmlToBlocks/HtmlTransformer.php b/php-transformer/src/HtmlToBlocks/HtmlTransformer.php index adb60df..bf3584a 100644 --- a/php-transformer/src/HtmlToBlocks/HtmlTransformer.php +++ b/php-transformer/src/HtmlToBlocks/HtmlTransformer.php @@ -1483,7 +1483,6 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca $controls = $this->formControls($element); $readableFormBlock = $this->readableFormBlockFromForm($element, true); - $boundedHtml = $this->boundedFallbackHtml($this->safeFallbackHtml($element)); $this->recordRuntimeIsland($element, 'form', 'form_requires_runtime', 'server_or_client_form_handler', array( 'form' => $this->formMetadata($element), 'controls' => $controls, @@ -1502,28 +1501,7 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca // Carrying the generic control list (tag/type/name/required/options) // keeps the transformer free of any provider or plugin knowledge. if ( null === $readableFormBlock || $this->formHasDataEntryControls($element) ) { - $fallbacks[] = FallbackDiagnostic::build(array( - 'type' => 'html', - 'reason' => 'form_requires_runtime', - 'diagnostic_code' => 'html_form_fallback', - 'message' => 'Form HTML requires runtime behavior and was preserved as safe fallback metadata.', - 'source_format' => 'html', - 'tag' => $tagName, - 'selector' => $this->elementSelector($element), - 'attributes' => $this->htmlAttributes($element), - 'form' => $this->formMetadata($element), - 'context' => $this->sourceContext($element), - 'classification' => $this->fallbackEmitter->classifyFallbackSubtree($element), - 'events' => $this->eventMetadata($element), - 'readable_blocks' => null !== $readableFormBlock ? array( $readableFormBlock ) : array(), - 'controls' => $controls, - 'control_count' => count($controls), - 'text_length' => strlen(trim($element->textContent ?? '')), - 'child_count' => $this->childElementCount($element), - 'html' => $boundedHtml['html'], - 'html_bytes' => $boundedHtml['bytes'], - 'html_truncated' => $boundedHtml['truncated'], - ), $this->fallbackProvenance); + $fallbacks[] = $this->formFallbackFinding($element, $readableFormBlock); } return $readableFormBlock; @@ -1537,6 +1515,19 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca } if ( in_array($tagName, array( 'article', 'aside', 'body', 'center', 'div', 'footer', 'header', 'main', 'nav', 'section' ), true) ) { + // Div-based pseudo-form (issue #315 follow-up): some signup/contact + // widgets pair data-entry controls with a submit-like control inside a + // plain container and never wrap them in a
. Without a + // element the form-detection path above never fires, so the controls + // flatten into prose plus a dead button. When the tightest container + // pairs a data-entry control with a submit-like control (and no real + // owns the subtree), emit the SAME html_form_fallback finding so + // the downstream materializer treats it identically to a real form. The + // readable content still renders below; this only adds the finding. + if ( $this->isDivBasedPseudoForm($element) ) { + $fallbacks[] = $this->formFallbackFinding($element, $this->readableFormBlockFromForm($element, true)); + } + $logo = $this->logoPattern->match( $element, fn (DOMElement $sourceElement): array => $this->presentationAttributes($sourceElement), @@ -4648,6 +4639,190 @@ private function formRequiresRuntimePreservation(DOMElement $form): bool || $this->formHasDataEntryControls($form); } + /** + * Build the shared html_form_fallback finding (issue #315) for an element that + * behaves as a form. Both the real path and the div-based pseudo-form + * path emit through here so the downstream materializer receives an identical + * shape (controls, form metadata, classification, bounded HTML) regardless of + * whether the source markup used a element. + * + * @param array|null $readableFormBlock + * @return array + */ + private function formFallbackFinding(DOMElement $element, ?array $readableFormBlock): array + { + $controls = $this->formControls($element); + $boundedHtml = $this->boundedFallbackHtml($this->safeFallbackHtml($element)); + + return FallbackDiagnostic::build(array( + 'type' => 'html', + 'reason' => 'form_requires_runtime', + 'diagnostic_code' => 'html_form_fallback', + 'message' => 'Form HTML requires runtime behavior and was preserved as safe fallback metadata.', + 'source_format' => 'html', + 'tag' => strtolower($element->tagName), + 'selector' => $this->elementSelector($element), + 'attributes' => $this->htmlAttributes($element), + 'form' => $this->formMetadata($element), + 'context' => $this->sourceContext($element), + 'classification' => $this->fallbackEmitter->classifyFallbackSubtree($element), + 'events' => $this->eventMetadata($element), + 'readable_blocks' => null !== $readableFormBlock ? array( $readableFormBlock ) : array(), + 'controls' => $controls, + 'control_count' => count($controls), + 'text_length' => strlen(trim($element->textContent ?? '')), + 'child_count' => $this->childElementCount($element), + 'html' => $boundedHtml['html'], + 'html_bytes' => $boundedHtml['bytes'], + 'html_truncated' => $boundedHtml['truncated'], + ), $this->fallbackProvenance); + } + + /** + * Whether a non- container behaves as a form: it is the tightest + * container that pairs at least one data-entry control with a submit-like + * control, and no real owns the subtree. + * + * Structural only — the signal is "data-entry control + submit-like control in + * one bounded container", never a fixture id/class/name. Conservative: a lone + * search box or a stray input with no submit control never qualifies, and a + * subtree owned by a real (as ancestor or descendant) is left to the + * path so the finding is emitted exactly once. + */ + private function isDivBasedPseudoForm(DOMElement $element): bool + { + if ( 'form' === strtolower($element->tagName) ) { + return false; + } + + // A real ancestor or descendant owns the controls; let the + // path emit the finding so it is never double-counted. + if ( $this->hasFormAncestor($element) ) { + return false; + } + if ( 0 < $element->getElementsByTagName('form')->length ) { + return false; + } + + if ( ! $this->containerPairsDataEntryWithSubmit($element) ) { + return false; + } + + // Bound the container to the tightest one: if a descendant container also + // pairs the controls, defer to it so a wrapper does not swallow a nested + // pseudo-form (and sibling pseudo-forms each emit their own finding). + foreach ( $element->getElementsByTagName('*') as $descendant ) { + if ( $descendant instanceof DOMElement + && ! $this->isFormControlElement($descendant) + && $this->containerPairsDataEntryWithSubmit($descendant) ) { + return false; + } + } + + return true; + } + + /** + * Whether a container holds at least one data-entry control AND at least one + * submit-like control. Reuses the issue #315 control-detection helpers + * (formControlElements / isDataEntryControl) so detection stays in one place. + */ + private function containerPairsDataEntryWithSubmit(DOMElement $element): bool + { + $hasDataEntry = false; + $hasSubmit = false; + + foreach ( $this->formControlElements($element) as $control ) { + if ( $this->isPseudoFormDataEntryControl($control) ) { + $hasDataEntry = true; + } elseif ( $this->isSubmitLikeControl($control) ) { + $hasSubmit = true; + } + + if ( $hasDataEntry && $hasSubmit ) { + return true; + } + } + + return false; + } + + /** + * A data-entry control that anchors a pseudo-form. Reuses #315's + * isDataEntryControl and additionally excludes search inputs, which already + * have dedicated standalone-search handling and should not be promoted into a + * form fallback. + */ + private function isPseudoFormDataEntryControl(DOMElement $control): bool + { + return $this->isDataEntryControl($control) && 'search' !== $this->formControlType($control); + } + + /** + * Whether a control submits a form: an explicit submit/image control, or a + * button/input whose text/value/type/class/id/name/aria carries submit, + * subscribe, sign-up, or send semantics. A plain " + }, + "expected_blocks": [ + { "path": "blocks.0", "name": "core/group" } + ], + "expected_fallbacks": [ + { "type": "html", "reason": "form_requires_runtime", "diagnostic_code": "html_form_fallback" } + ], + "expect": [ + { "path": "status", "assert": "equals", "value": "success" }, + { "path": "blocks", "assert": "count", "count": 1 }, + { "path": "fallbacks", "assert": "count", "count": 1 }, + { "path": "fallbacks.0.diagnostic_code", "assert": "equals", "value": "html_form_fallback" }, + { "path": "fallbacks.0.reason", "assert": "equals", "value": "form_requires_runtime" }, + { "path": "fallbacks.0.tag", "assert": "equals", "value": "div" }, + { "path": "fallbacks.0.control_count", "assert": "equals", "value": 2 }, + { "path": "fallbacks.0.controls.0.tag", "assert": "equals", "value": "input" }, + { "path": "fallbacks.0.controls.0.type", "assert": "equals", "value": "email" }, + { "path": "fallbacks.0.controls.0.name", "assert": "equals", "value": "email" }, + { "path": "fallbacks.0.controls.0.required", "assert": "equals", "value": true }, + { "path": "fallbacks.0.controls.1.tag", "assert": "equals", "value": "button" }, + { "path": "fallbacks.0.controls.1.type", "assert": "equals", "value": "submit" }, + { "path": "serialized_blocks", "assert": "contains", "value": "Email address: you@example.com (required)" }, + { "path": "serialized_blocks", "assert": "contains", "value": "Subscribe" } + ] +} diff --git a/php-transformer/tests/fixtures/parity/html-div-search-box-not-a-form.json b/php-transformer/tests/fixtures/parity/html-div-search-box-not-a-form.json new file mode 100644 index 0000000..7761b22 --- /dev/null +++ b/php-transformer/tests/fixtures/parity/html-div-search-box-not-a-form.json @@ -0,0 +1,22 @@ +{ + "schema": "blocks-engine/php-transformer/parity-fixture/v1", + "name": "html-div-search-box-not-a-form", + "description": "A div holding a search input and a search submit button is a search widget, not a data-entry form, so div-based pseudo-form detection stays silent and emits no html_form_fallback finding.", + "source_reference": { + "repo": "php-transformer", + "path": "tests/fixtures/parity/html-div-search-box-not-a-form.json", + "notes": "Search inputs have dedicated standalone-search handling and must not be promoted into a form fallback; pseudo-form detection only counts non-search data-entry controls as the form anchor." + }, + "legacy_comparison": { + "skip": true, + "reason": "This upstream primitive fixture has no downstream legacy comparison." + }, + "operation": "html_transformer.transform", + "input": { + "content": "
" + }, + "expect": [ + { "path": "status", "assert": "equals", "value": "success" }, + { "path": "fallbacks", "assert": "count", "count": 0 } + ] +} diff --git a/php-transformer/tests/fixtures/parity/html-div-text-links-not-a-form.json b/php-transformer/tests/fixtures/parity/html-div-text-links-not-a-form.json new file mode 100644 index 0000000..d11e0bf --- /dev/null +++ b/php-transformer/tests/fixtures/parity/html-div-text-links-not-a-form.json @@ -0,0 +1,24 @@ +{ + "schema": "blocks-engine/php-transformer/parity-fixture/v1", + "name": "html-div-text-links-not-a-form", + "description": "A plain content div holding only text and links carries no data-entry/submit control pairing, so div-based pseudo-form detection stays silent and no html_form_fallback finding is emitted.", + "source_reference": { + "repo": "php-transformer", + "path": "tests/fixtures/parity/html-div-text-links-not-a-form.json", + "notes": "Guards the conservative bound: ordinary containers (and lone inputs or stray links) must not be promoted into form fallbacks; the input+submit pairing is required." + }, + "legacy_comparison": { + "skip": true, + "reason": "This upstream primitive fixture has no downstream legacy comparison." + }, + "operation": "html_transformer.transform", + "input": { + "content": "

Read our latest stories for distillery updates.

About us
" + }, + "expect": [ + { "path": "status", "assert": "equals", "value": "success" }, + { "path": "fallbacks", "assert": "count", "count": 0 }, + { "path": "serialized_blocks", "assert": "not_contains", "value": "html_form_fallback" }, + { "path": "serialized_blocks", "assert": "contains", "value": "latest stories" } + ] +} diff --git a/php-transformer/tests/fixtures/parity/html-real-form-single-form-fallback.json b/php-transformer/tests/fixtures/parity/html-real-form-single-form-fallback.json new file mode 100644 index 0000000..0934355 --- /dev/null +++ b/php-transformer/tests/fixtures/parity/html-real-form-single-form-fallback.json @@ -0,0 +1,30 @@ +{ + "schema": "blocks-engine/php-transformer/parity-fixture/v1", + "name": "html-real-form-single-form-fallback", + "description": "A real with the same email + submit pairing as the div pseudo-form still emits exactly one html_form_fallback finding (tag form) and is never double-counted by the new div-based pseudo-form detection.", + "source_reference": { + "repo": "php-transformer", + "path": "tests/fixtures/parity/html-real-form-single-form-fallback.json", + "notes": "Guards against double emission: the path owns the subtree, so the div-based pseudo-form detection must not also fire for the form or any wrapper around it." + }, + "legacy_comparison": { + "skip": true, + "reason": "This upstream primitive fixture has no downstream legacy comparison." + }, + "operation": "html_transformer.transform", + "input": { + "content": "
" + }, + "expected_fallbacks": [ + { "type": "html", "reason": "form_requires_runtime", "diagnostic_code": "html_form_fallback" } + ], + "expect": [ + { "path": "status", "assert": "equals", "value": "success" }, + { "path": "fallbacks", "assert": "count", "count": 1 }, + { "path": "fallbacks.0.diagnostic_code", "assert": "equals", "value": "html_form_fallback" }, + { "path": "fallbacks.0.tag", "assert": "equals", "value": "form" }, + { "path": "fallbacks.0.control_count", "assert": "equals", "value": 2 }, + { "path": "source_reports.runtime_islands", "assert": "count", "count": 1 }, + { "path": "source_reports.runtime_islands.0.kind", "assert": "equals", "value": "form" } + ] +}