Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions src/core/citability.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,61 @@ describe('scorePageCitability', () => {
expect(longHint).toBeDefined();
});

it('suggests leading with a direct answer when the first substantial paragraph is contextual', () => {
// First substantial paragraph starts with "This" (not an answer-quality opener).
// Second paragraph starts with a capital noun and is long enough — counts as an
// answer paragraph, so the suggestion (which requires answerCount > 0) fires.
const content = `# About aeo.js

This page explains the background, positioning, scope, and implementation details that teams should understand before adopting the library in their projects.

Aeo.js generates machine-readable answer-engine assets for websites at build time, including llms.txt, schema data, ai-index metadata, sitemap, and JSON-LD structured data.`;
const result = scorePageCitability(makePage(content));
const answerFirstHint = result.hints.find(h => h.message.includes('Lead with a direct'));
expect(answerFirstHint).toBeDefined();
expect(answerFirstHint?.line).toBe(3);
});

it('does not add an answer-first hint when zero answer paragraphs exist (warning covers it)', () => {
// When there are no answer-quality paragraphs at all, the standalone warning already
// tells the user to add some — the "lead with one" hint would be redundant noise.
const content = `# About aeo.js

This page explains the background, positioning, scope, and history that teams should understand before adopting the library in their projects.`;
const result = scorePageCitability(makePage(content));
const warning = result.hints.find(h => h.message.includes('No direct answer paragraphs'));
const suggestion = result.hints.find(h => h.message.includes('Lead with a direct'));
expect(warning).toBeDefined();
expect(suggestion).toBeUndefined();
});

it('does not add an answer-first hint when the first substantial paragraph is direct', () => {
const content = `# About aeo.js

Aeo.js generates machine-readable answer-engine assets for websites at build time, including llms.txt files, schema data, and AI index metadata for crawler discovery.

This page explains the background, positioning, and implementation details for teams adopting the library.`;
const result = scorePageCitability(makePage(content));
const answerFirstHint = result.hints.find(h => h.message.includes('Lead with a direct'));
expect(answerFirstHint).toBeUndefined();
});

it('does not add an answer-first hint when the opener exceeds 200 words but is a direct answer', () => {
// The "Lead with a direct answer" suggestion previously gated on isAnswerQualityParagraph,
// which caps at 200 words. A well-formed 200+ word opener that starts with a capital noun
// is still a direct answer — splitting it is a separate hint that the long-paragraph check
// handles. Regression test: this content must NOT trigger the answer-first suggestion.
const longOpener = 'Aeo.js generates machine-readable answer-engine assets for websites at build time. '.repeat(15);
const content = `# About aeo.js

${longOpener}

A second paragraph adds positioning context for teams adopting the library in production environments.`;
const result = scorePageCitability(makePage(content));
const answerFirstHint = result.hints.find(h => h.message.includes('Lead with a direct'));
expect(answerFirstHint).toBeUndefined();
});

it('detects context-dependent paragraphs', () => {
const content = `As mentioned above, our platform is great.

Expand Down
41 changes: 36 additions & 5 deletions src/core/citability.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,7 @@ function scoreAnswerBlocks(content: string, hints: ContentHint[]): CitabilityDim
// Skip headings and very short paragraphs
if (para.text.startsWith('#') || words < 15) continue;

// Good answer: starts with capital letter (not pronoun), 20-200 words
const startsWithSubject = /^[A-Z][a-z]/.test(para.text) && !/^(This|That|These|Those|It|They|We|He|She|I)\b/.test(para.text);
const goodLength = words >= 20 && words <= 200;

if (startsWithSubject && goodLength) {
if (isAnswerQualityParagraph(para.text)) {
answerCount++;
}
}
Expand All @@ -106,6 +102,33 @@ function scoreAnswerBlocks(content: string, hints: ContentHint[]): CitabilityDim
hints.push({ type: 'warning', message: 'No direct answer paragraphs found — add self-contained factual paragraphs that start with a clear subject' });
}

// Only suggest "lead with a direct answer" when at least one answer paragraph already
// exists on the page — otherwise the existing zero-answer warning above covers the
// same action item. Match the 20-word threshold used by isAnswerQualityParagraph so a
// 15–19-word direct opening doesn't false-positive against itself.
//
// We check only the "starts contextually" half of isAnswerQualityParagraph (capital
// noun, not a pronoun). The 200-word upper bound from that helper would mis-flag a
// well-formed 201+ word opener as needing rephrasing — the long-paragraph hint below
// is the right place to suggest splitting it.
if (answerCount > 0) {
const firstSubstantialParagraph = paragraphs.find(
para => !para.text.startsWith('#') && para.text.split(/\s+/).length >= 20
);
if (firstSubstantialParagraph) {
const startsContextually =
!/^[A-Z][a-z]/.test(firstSubstantialParagraph.text) ||
/^(This|That|These|Those|It|They|We|He|She|I)\b/.test(firstSubstantialParagraph.text);
if (startsContextually) {
hints.push({
type: 'suggestion',
message: 'Lead with a direct, self-contained answer paragraph before background context',
line: firstSubstantialParagraph.line,
});
}
}
}

// Flag long paragraphs
for (const para of paragraphs) {
const words = para.text.split(/\s+/).length;
Expand All @@ -117,6 +140,14 @@ function scoreAnswerBlocks(content: string, hints: ContentHint[]): CitabilityDim
return { name: 'Answer Blocks', score, maxScore: 25, details: `${answerCount} answer-quality paragraphs found` };
}

function isAnswerQualityParagraph(text: string): boolean {
const words = text.split(/\s+/).length;
const startsWithSubject = /^[A-Z][a-z]/.test(text) && !/^(This|That|These|Those|It|They|We|He|She|I)\b/.test(text);
const goodLength = words >= 20 && words <= 200;

return startsWithSubject && goodLength;
}

/**
* Dimension 2: Self-Containment (0-25)
* Penalizes paragraphs that depend on surrounding context.
Expand Down