Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 198 additions & 23 deletions php-transformer/src/HtmlToBlocks/HtmlTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -1483,7 +1483,6 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca

$controls = $this->formControls($element);
$readableFormBlock = $this->readableFormBlockFromForm($element, true);
$boundedHtml = $this->boundedFallbackHtml($this->safeFallbackHtml($element));
$this->recordRuntimeIsland($element, 'form', 'form_requires_runtime', 'server_or_client_form_handler', array(
'form' => $this->formMetadata($element),
'controls' => $controls,
Expand All @@ -1502,28 +1501,7 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca
// Carrying the generic control list (tag/type/name/required/options)
// keeps the transformer free of any provider or plugin knowledge.
if ( null === $readableFormBlock || $this->formHasDataEntryControls($element) ) {
$fallbacks[] = FallbackDiagnostic::build(array(
'type' => 'html',
'reason' => 'form_requires_runtime',
'diagnostic_code' => 'html_form_fallback',
'message' => 'Form HTML requires runtime behavior and was preserved as safe fallback metadata.',
'source_format' => 'html',
'tag' => $tagName,
'selector' => $this->elementSelector($element),
'attributes' => $this->htmlAttributes($element),
'form' => $this->formMetadata($element),
'context' => $this->sourceContext($element),
'classification' => $this->fallbackEmitter->classifyFallbackSubtree($element),
'events' => $this->eventMetadata($element),
'readable_blocks' => null !== $readableFormBlock ? array( $readableFormBlock ) : array(),
'controls' => $controls,
'control_count' => count($controls),
'text_length' => strlen(trim($element->textContent ?? '')),
'child_count' => $this->childElementCount($element),
'html' => $boundedHtml['html'],
'html_bytes' => $boundedHtml['bytes'],
'html_truncated' => $boundedHtml['truncated'],
), $this->fallbackProvenance);
$fallbacks[] = $this->formFallbackFinding($element, $readableFormBlock);
}

return $readableFormBlock;
Expand All @@ -1537,6 +1515,19 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca
}

if ( in_array($tagName, array( 'article', 'aside', 'body', 'center', 'div', 'footer', 'header', 'main', 'nav', 'section' ), true) ) {
// Div-based pseudo-form (issue #315 follow-up): some signup/contact
// widgets pair data-entry controls with a submit-like control inside a
// plain container and never wrap them in a <form>. Without a <form>
// element the form-detection path above never fires, so the controls
// flatten into prose plus a dead button. When the tightest container
// pairs a data-entry control with a submit-like control (and no real
// <form> owns the subtree), emit the SAME html_form_fallback finding so
// the downstream materializer treats it identically to a real form. The
// readable content still renders below; this only adds the finding.
if ( $this->isDivBasedPseudoForm($element) ) {
$fallbacks[] = $this->formFallbackFinding($element, $this->readableFormBlockFromForm($element, true));
}

$logo = $this->logoPattern->match(
$element,
fn (DOMElement $sourceElement): array => $this->presentationAttributes($sourceElement),
Expand Down Expand Up @@ -4648,6 +4639,190 @@ private function formRequiresRuntimePreservation(DOMElement $form): bool
|| $this->formHasDataEntryControls($form);
}

/**
* Build the shared html_form_fallback finding (issue #315) for an element that
* behaves as a form. Both the real <form> path and the div-based pseudo-form
* path emit through here so the downstream materializer receives an identical
* shape (controls, form metadata, classification, bounded HTML) regardless of
* whether the source markup used a <form> element.
*
* @param array<string, mixed>|null $readableFormBlock
* @return array<string, mixed>
*/
private function formFallbackFinding(DOMElement $element, ?array $readableFormBlock): array
{
$controls = $this->formControls($element);
$boundedHtml = $this->boundedFallbackHtml($this->safeFallbackHtml($element));

return FallbackDiagnostic::build(array(
'type' => 'html',
'reason' => 'form_requires_runtime',
'diagnostic_code' => 'html_form_fallback',
'message' => 'Form HTML requires runtime behavior and was preserved as safe fallback metadata.',
'source_format' => 'html',
'tag' => strtolower($element->tagName),
'selector' => $this->elementSelector($element),
'attributes' => $this->htmlAttributes($element),
'form' => $this->formMetadata($element),
'context' => $this->sourceContext($element),
'classification' => $this->fallbackEmitter->classifyFallbackSubtree($element),
'events' => $this->eventMetadata($element),
'readable_blocks' => null !== $readableFormBlock ? array( $readableFormBlock ) : array(),
'controls' => $controls,
'control_count' => count($controls),
'text_length' => strlen(trim($element->textContent ?? '')),
'child_count' => $this->childElementCount($element),
'html' => $boundedHtml['html'],
'html_bytes' => $boundedHtml['bytes'],
'html_truncated' => $boundedHtml['truncated'],
), $this->fallbackProvenance);
}

/**
* Whether a non-<form> container behaves as a form: it is the tightest
* container that pairs at least one data-entry control with a submit-like
* control, and no real <form> owns the subtree.
*
* Structural only — the signal is "data-entry control + submit-like control in
* one bounded container", never a fixture id/class/name. Conservative: a lone
* search box or a stray input with no submit control never qualifies, and a
* subtree owned by a real <form> (as ancestor or descendant) is left to the
* <form> path so the finding is emitted exactly once.
*/
private function isDivBasedPseudoForm(DOMElement $element): bool
{
if ( 'form' === strtolower($element->tagName) ) {
return false;
}

// A real <form> ancestor or descendant owns the controls; let the <form>
// path emit the finding so it is never double-counted.
if ( $this->hasFormAncestor($element) ) {
return false;
}
if ( 0 < $element->getElementsByTagName('form')->length ) {
return false;
}

if ( ! $this->containerPairsDataEntryWithSubmit($element) ) {
return false;
}

// Bound the container to the tightest one: if a descendant container also
// pairs the controls, defer to it so a wrapper does not swallow a nested
// pseudo-form (and sibling pseudo-forms each emit their own finding).
foreach ( $element->getElementsByTagName('*') as $descendant ) {
if ( $descendant instanceof DOMElement
&& ! $this->isFormControlElement($descendant)
&& $this->containerPairsDataEntryWithSubmit($descendant) ) {
return false;
}
}

return true;
}

/**
* Whether a container holds at least one data-entry control AND at least one
* submit-like control. Reuses the issue #315 control-detection helpers
* (formControlElements / isDataEntryControl) so detection stays in one place.
*/
private function containerPairsDataEntryWithSubmit(DOMElement $element): bool
{
$hasDataEntry = false;
$hasSubmit = false;

foreach ( $this->formControlElements($element) as $control ) {
if ( $this->isPseudoFormDataEntryControl($control) ) {
$hasDataEntry = true;
} elseif ( $this->isSubmitLikeControl($control) ) {
$hasSubmit = true;
}

if ( $hasDataEntry && $hasSubmit ) {
return true;
}
}

return false;
}

/**
* A data-entry control that anchors a pseudo-form. Reuses #315's
* isDataEntryControl and additionally excludes search inputs, which already
* have dedicated standalone-search handling and should not be promoted into a
* form fallback.
*/
private function isPseudoFormDataEntryControl(DOMElement $control): bool
{
return $this->isDataEntryControl($control) && 'search' !== $this->formControlType($control);
}

/**
* Whether a control submits a form: an explicit submit/image control, or a
* button/input whose text/value/type/class/id/name/aria carries submit,
* subscribe, sign-up, or send semantics. A plain <button> defaults to type
* "submit" and qualifies directly; a type="reset" control never does.
*/
private function isSubmitLikeControl(DOMElement $control): bool
{
$tagName = strtolower($control->tagName);
if ( 'button' !== $tagName && 'input' !== $tagName ) {
return false;
}

$type = $this->formControlType($control);
if ( in_array($type, array( 'submit', 'image' ), true) ) {
return true;
}
if ( 'reset' === $type ) {
return false;
}

// Only generic clickable controls (button-typed) fall through to the
// semantic check; data-entry input types are never submit controls.
if ( 'input' === $tagName && 'button' !== $type ) {
return false;
}

return $this->hasSubmitSemantics($control);
}

/**
* Whether a control's text/attributes carry submit-like intent. Structural
* vocabulary only — no fixture-specific identifiers.
*/
private function hasSubmitSemantics(DOMElement $control): bool
{
$haystack = strtolower(implode(' ', array(
$control->textContent ?? '',
$this->attr($control, 'value'),
$this->attr($control, 'class'),
$this->attr($control, 'id'),
$this->attr($control, 'name'),
$this->attr($control, 'aria-label'),
)));

foreach ( array( 'submit', 'subscribe', 'sign up', 'sign-up', 'signup', 'send' ) as $needle ) {
if ( str_contains($haystack, $needle) ) {
return true;
}
}

return false;
}

private function hasFormAncestor(DOMElement $element): bool
{
for ( $parent = $element->parentNode; $parent instanceof DOMElement; $parent = $parent->parentNode ) {
if ( 'form' === strtolower($parent->tagName) ) {
return true;
}
}

return false;
}

/**
* Whether a form collects user input through at least one data-entry control.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"schema": "blocks-engine/php-transformer/parity-fixture/v1",
"name": "html-div-pseudo-form-materializable",
"description": "Surfaces a div-based pseudo-form (a non-<form> container pairing a data-entry control with a submit-like control) as the same materializable html_form_fallback finding a real <form> produces, so a downstream consumer renders it as a working form instead of dropping it to prose plus a dead button.",
"source_reference": {
"repo": "php-transformer",
"path": "tests/fixtures/parity/html-div-pseudo-form-materializable.json",
"notes": "Newsletter/signup widgets are frequently built without a <form> element (a div holding an email input and a Subscribe button). The issue #315 form-detection path keys off the <form> element, so such pseudo-forms are missed entirely; this extends detection to the structural input+submit pairing."
},
"legacy_comparison": {
"skip": true,
"reason": "This upstream primitive fixture has no downstream legacy comparison."
},
"operation": "html_transformer.transform",
"input": {
"content": "<main><div class=\"signup\"><label for=\"nl\">Email address</label><input type=\"email\" id=\"nl\" name=\"email\" placeholder=\"you@example.com\" required><button>Subscribe</button></div></main>"
},
"expected_blocks": [
{ "path": "blocks.0", "name": "core/group" }
],
"expected_fallbacks": [
{ "type": "html", "reason": "form_requires_runtime", "diagnostic_code": "html_form_fallback" }
],
"expect": [
{ "path": "status", "assert": "equals", "value": "success" },
{ "path": "blocks", "assert": "count", "count": 1 },
{ "path": "fallbacks", "assert": "count", "count": 1 },
{ "path": "fallbacks.0.diagnostic_code", "assert": "equals", "value": "html_form_fallback" },
{ "path": "fallbacks.0.reason", "assert": "equals", "value": "form_requires_runtime" },
{ "path": "fallbacks.0.tag", "assert": "equals", "value": "div" },
{ "path": "fallbacks.0.control_count", "assert": "equals", "value": 2 },
{ "path": "fallbacks.0.controls.0.tag", "assert": "equals", "value": "input" },
{ "path": "fallbacks.0.controls.0.type", "assert": "equals", "value": "email" },
{ "path": "fallbacks.0.controls.0.name", "assert": "equals", "value": "email" },
{ "path": "fallbacks.0.controls.0.required", "assert": "equals", "value": true },
{ "path": "fallbacks.0.controls.1.tag", "assert": "equals", "value": "button" },
{ "path": "fallbacks.0.controls.1.type", "assert": "equals", "value": "submit" },
{ "path": "serialized_blocks", "assert": "contains", "value": "Email address: you@example.com (required)" },
{ "path": "serialized_blocks", "assert": "contains", "value": "Subscribe" }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"schema": "blocks-engine/php-transformer/parity-fixture/v1",
"name": "html-div-search-box-not-a-form",
"description": "A div holding a search input and a search submit button is a search widget, not a data-entry form, so div-based pseudo-form detection stays silent and emits no html_form_fallback finding.",
"source_reference": {
"repo": "php-transformer",
"path": "tests/fixtures/parity/html-div-search-box-not-a-form.json",
"notes": "Search inputs have dedicated standalone-search handling and must not be promoted into a form fallback; pseudo-form detection only counts non-search data-entry controls as the form anchor."
},
"legacy_comparison": {
"skip": true,
"reason": "This upstream primitive fixture has no downstream legacy comparison."
},
"operation": "html_transformer.transform",
"input": {
"content": "<main><div class=\"searchbar\"><input type=\"search\" name=\"q\" placeholder=\"Search\"><button type=\"submit\">Search</button></div></main>"
},
"expect": [
{ "path": "status", "assert": "equals", "value": "success" },
{ "path": "fallbacks", "assert": "count", "count": 0 }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"schema": "blocks-engine/php-transformer/parity-fixture/v1",
"name": "html-div-text-links-not-a-form",
"description": "A plain content div holding only text and links carries no data-entry/submit control pairing, so div-based pseudo-form detection stays silent and no html_form_fallback finding is emitted.",
"source_reference": {
"repo": "php-transformer",
"path": "tests/fixtures/parity/html-div-text-links-not-a-form.json",
"notes": "Guards the conservative bound: ordinary containers (and lone inputs or stray links) must not be promoted into form fallbacks; the input+submit pairing is required."
},
"legacy_comparison": {
"skip": true,
"reason": "This upstream primitive fixture has no downstream legacy comparison."
},
"operation": "html_transformer.transform",
"input": {
"content": "<main><div class=\"promo\"><p>Read our <a href=\"/blog\">latest stories</a> for distillery updates.</p><a href=\"/about\">About us</a></div></main>"
},
"expect": [
{ "path": "status", "assert": "equals", "value": "success" },
{ "path": "fallbacks", "assert": "count", "count": 0 },
{ "path": "serialized_blocks", "assert": "not_contains", "value": "html_form_fallback" },
{ "path": "serialized_blocks", "assert": "contains", "value": "latest stories" }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"schema": "blocks-engine/php-transformer/parity-fixture/v1",
"name": "html-real-form-single-form-fallback",
"description": "A real <form> with the same email + submit pairing as the div pseudo-form still emits exactly one html_form_fallback finding (tag form) and is never double-counted by the new div-based pseudo-form detection.",
"source_reference": {
"repo": "php-transformer",
"path": "tests/fixtures/parity/html-real-form-single-form-fallback.json",
"notes": "Guards against double emission: the <form> path owns the subtree, so the div-based pseudo-form detection must not also fire for the form or any wrapper around it."
},
"legacy_comparison": {
"skip": true,
"reason": "This upstream primitive fixture has no downstream legacy comparison."
},
"operation": "html_transformer.transform",
"input": {
"content": "<main><div class=\"wrapper\"><form class=\"signup\"><label for=\"e\">Email address</label><input type=\"email\" id=\"e\" name=\"email\" placeholder=\"you@example.com\" required><button>Subscribe</button></form></div></main>"
},
"expected_fallbacks": [
{ "type": "html", "reason": "form_requires_runtime", "diagnostic_code": "html_form_fallback" }
],
"expect": [
{ "path": "status", "assert": "equals", "value": "success" },
{ "path": "fallbacks", "assert": "count", "count": 1 },
{ "path": "fallbacks.0.diagnostic_code", "assert": "equals", "value": "html_form_fallback" },
{ "path": "fallbacks.0.tag", "assert": "equals", "value": "form" },
{ "path": "fallbacks.0.control_count", "assert": "equals", "value": 2 },
{ "path": "source_reports.runtime_islands", "assert": "count", "count": 1 },
{ "path": "source_reports.runtime_islands.0.kind", "assert": "equals", "value": "form" }
]
}
Loading