diff --git a/src/core/citability.test.ts b/src/core/citability.test.ts index cafa6a9..eb11774 100644 --- a/src/core/citability.test.ts +++ b/src/core/citability.test.ts @@ -73,6 +73,61 @@ describe('scorePageCitability', () => { expect(longHint).toBeDefined(); }); + it('suggests leading with a direct answer when the first substantial paragraph is contextual', () => { + // First substantial paragraph starts with "This" (not an answer-quality opener). + // Second paragraph starts with a capital noun and is long enough — counts as an + // answer paragraph, so the suggestion (which requires answerCount > 0) fires. + const content = `# About aeo.js + +This page explains the background, positioning, scope, and implementation details that teams should understand before adopting the library in their projects. + +Aeo.js generates machine-readable answer-engine assets for websites at build time, including llms.txt, schema data, ai-index metadata, sitemap, and JSON-LD structured data.`; + const result = scorePageCitability(makePage(content)); + const answerFirstHint = result.hints.find(h => h.message.includes('Lead with a direct')); + expect(answerFirstHint).toBeDefined(); + expect(answerFirstHint?.line).toBe(3); + }); + + it('does not add an answer-first hint when zero answer paragraphs exist (warning covers it)', () => { + // When there are no answer-quality paragraphs at all, the standalone warning already + // tells the user to add some — the "lead with one" hint would be redundant noise. + const content = `# About aeo.js + +This page explains the background, positioning, scope, and history that teams should understand before adopting the library in their projects.`; + const result = scorePageCitability(makePage(content)); + const warning = result.hints.find(h => h.message.includes('No direct answer paragraphs')); + const suggestion = result.hints.find(h => h.message.includes('Lead with a direct')); + expect(warning).toBeDefined(); + expect(suggestion).toBeUndefined(); + }); + + it('does not add an answer-first hint when the first substantial paragraph is direct', () => { + const content = `# About aeo.js + +Aeo.js generates machine-readable answer-engine assets for websites at build time, including llms.txt files, schema data, and AI index metadata for crawler discovery. + +This page explains the background, positioning, and implementation details for teams adopting the library.`; + const result = scorePageCitability(makePage(content)); + const answerFirstHint = result.hints.find(h => h.message.includes('Lead with a direct')); + expect(answerFirstHint).toBeUndefined(); + }); + + it('does not add an answer-first hint when the opener exceeds 200 words but is a direct answer', () => { + // The "Lead with a direct answer" suggestion previously gated on isAnswerQualityParagraph, + // which caps at 200 words. A well-formed 200+ word opener that starts with a capital noun + // is still a direct answer — splitting it is a separate hint that the long-paragraph check + // handles. Regression test: this content must NOT trigger the answer-first suggestion. + const longOpener = 'Aeo.js generates machine-readable answer-engine assets for websites at build time. '.repeat(15); + const content = `# About aeo.js + +${longOpener} + +A second paragraph adds positioning context for teams adopting the library in production environments.`; + const result = scorePageCitability(makePage(content)); + const answerFirstHint = result.hints.find(h => h.message.includes('Lead with a direct')); + expect(answerFirstHint).toBeUndefined(); + }); + it('detects context-dependent paragraphs', () => { const content = `As mentioned above, our platform is great. diff --git a/src/core/citability.ts b/src/core/citability.ts index dfdfd4b..b2f07f5 100644 --- a/src/core/citability.ts +++ b/src/core/citability.ts @@ -85,11 +85,7 @@ function scoreAnswerBlocks(content: string, hints: ContentHint[]): CitabilityDim // Skip headings and very short paragraphs if (para.text.startsWith('#') || words < 15) continue; - // Good answer: starts with capital letter (not pronoun), 20-200 words - const startsWithSubject = /^[A-Z][a-z]/.test(para.text) && !/^(This|That|These|Those|It|They|We|He|She|I)\b/.test(para.text); - const goodLength = words >= 20 && words <= 200; - - if (startsWithSubject && goodLength) { + if (isAnswerQualityParagraph(para.text)) { answerCount++; } } @@ -106,6 +102,33 @@ function scoreAnswerBlocks(content: string, hints: ContentHint[]): CitabilityDim hints.push({ type: 'warning', message: 'No direct answer paragraphs found — add self-contained factual paragraphs that start with a clear subject' }); } + // Only suggest "lead with a direct answer" when at least one answer paragraph already + // exists on the page — otherwise the existing zero-answer warning above covers the + // same action item. Match the 20-word threshold used by isAnswerQualityParagraph so a + // 15–19-word direct opening doesn't false-positive against itself. + // + // We check only the "starts contextually" half of isAnswerQualityParagraph (capital + // noun, not a pronoun). The 200-word upper bound from that helper would mis-flag a + // well-formed 201+ word opener as needing rephrasing — the long-paragraph hint below + // is the right place to suggest splitting it. + if (answerCount > 0) { + const firstSubstantialParagraph = paragraphs.find( + para => !para.text.startsWith('#') && para.text.split(/\s+/).length >= 20 + ); + if (firstSubstantialParagraph) { + const startsContextually = + !/^[A-Z][a-z]/.test(firstSubstantialParagraph.text) || + /^(This|That|These|Those|It|They|We|He|She|I)\b/.test(firstSubstantialParagraph.text); + if (startsContextually) { + hints.push({ + type: 'suggestion', + message: 'Lead with a direct, self-contained answer paragraph before background context', + line: firstSubstantialParagraph.line, + }); + } + } + } + // Flag long paragraphs for (const para of paragraphs) { const words = para.text.split(/\s+/).length; @@ -117,6 +140,14 @@ function scoreAnswerBlocks(content: string, hints: ContentHint[]): CitabilityDim return { name: 'Answer Blocks', score, maxScore: 25, details: `${answerCount} answer-quality paragraphs found` }; } +function isAnswerQualityParagraph(text: string): boolean { + const words = text.split(/\s+/).length; + const startsWithSubject = /^[A-Z][a-z]/.test(text) && !/^(This|That|These|Those|It|They|We|He|She|I)\b/.test(text); + const goodLength = words >= 20 && words <= 200; + + return startsWithSubject && goodLength; +} + /** * Dimension 2: Self-Containment (0-25) * Penalizes paragraphs that depend on surrounding context.