dgunning · dgunning · May 25, 2026 · May 21, 2026 · dgunning · May 25, 2026
diff --git a/edgar/_filings.py b/edgar/_filings.py
@@ -2034,13 +2034,14 @@ def open(self):
     @lru_cache(maxsize=1)
     def sections(self) -> List[str]:
         html = self.html()
-        if html is None:
-            raise ValueError(
-                f"Filing {self.accession_no} ({self.form}, filed {self.filing_date}) "
-                "has no HTML primary document — likely a pre-2001 SGML/text submission. "
-                "Use filing.text() to access the raw submission content."
-            )
-        return html_sections(html)
+        if html is not None:
+            return html_sections(html)
+        # Old text-only filings (pre-2002) — chunk on <PAGE> markers.
+        text = self.text()
+        if not text:
+            return []
+        chunks = [c.strip() for c in re.split(r"<PAGE>|\n\s*\n", text) if len(c.strip()) >= 50]
+        return chunks if chunks else [text]
 
     @cached_property
     def __get_bm25_search_index(self):
@@ -2083,29 +2084,19 @@ def grep(self, pattern: str, *, regex: bool = False, document: Optional[str] = N
         from edgar.search.grep import GrepResult, _grep_text
 
         all_matches = []
+        found_any_text = False
 
         try:
             attachments = self.attachments
         except Exception:
-            return GrepResult(pattern, [])
+            attachments = []
 
         for attachment in attachments:
-            # Filter by document if specified
-            if document:
-                if document.lower() == "primary":
-                    if attachment.sequence_number != "1":
-                        continue
-                else:
-                    # Match by document_type (e.g. "EX-10.1") or document filename
-                    doc_type = (attachment.document_type or "").upper()
-                    if document.upper() not in doc_type and document.lower() not in (attachment.document or "").lower():
-                        continue
-
-            # Skip binary/non-text attachments
+            if document and not self._attachment_matches(attachment, document):
+                continue
             if attachment.empty or attachment.is_binary():
                 continue
 
-            # Get text content
             try:
                 text = attachment.text()
             except Exception as e:
@@ -2115,20 +2106,49 @@ def grep(self, pattern: str, *, regex: bool = False, document: Optional[str] = N
             if not text:
                 continue
 
-            # Determine location label
-            doc_type = attachment.document_type or ""
-            if attachment.sequence_number == "1":
-                location = "primary"
-            elif doc_type:
-                location = doc_type
-            else:
-                location = attachment.document or f"doc-{attachment.sequence_number}"
+            found_any_text = True
+            location = self._attachment_location(attachment)
+            all_matches.extend(_grep_text(text, pattern, location, regex=regex))
 
-            matches = _grep_text(text, pattern, location, regex=regex)
-            all_matches.extend(matches)
+        # Old text filings: SGML returns empty attachment shells, fall back to filing.text().
+        if not found_any_text and (document is None or document.lower() == "primary"):
+            all_matches.extend(self._grep_filing_text(pattern, regex))
 
         return GrepResult(pattern, all_matches)
 
+    @staticmethod
+    def _attachment_matches(attachment, document: str) -> bool:
+        """Whether `attachment` satisfies grep()'s `document` filter."""
+        if document.lower() == "primary":
+            return attachment.sequence_number == "1"
+        doc_type = (attachment.document_type or "").upper()
+        if document.upper() in doc_type:
+            return True
+        return document.lower() in (attachment.document or "").lower()
+
+    @staticmethod
+    def _attachment_location(attachment) -> str:
+        """Label for an attachment in grep result locations."""
+        if attachment.sequence_number == "1":
+            return "primary"
+        if attachment.document_type:
+            return attachment.document_type
+        return attachment.document or f"doc-{attachment.sequence_number}"
+
+    def _grep_filing_text(self, pattern: str, regex: bool) -> list:
+        """Grep the combined filing text as a 'primary' document.
+
+        Used by grep() when no attachment yields usable text — covers older
+        plain-text filings whose SGML decomposition emits empty shells.
+        """
+        from edgar.search.grep import _grep_text
+        try:
+            text = self.text()
+        except Exception as e:
+            log.debug(f"grep: could not extract filing text: {e}")
+            return []
+        return _grep_text(text, pattern, "primary", regex=regex) if text else []
+
     @property
     def filing_url(self) -> str:
         return f"{self.base_dir}/{self.document.document}"

diff --git a/tests/issues/regression/test_issue_819_search_grep_on_text_filings.py b/tests/issues/regression/test_issue_819_search_grep_on_text_filings.py
@@ -0,0 +1,60 @@
+"""Regression test for #819 — search()/grep() failed on plain-text filings."""
+
+import pytest
+
+from edgar import Company
+
+
+PCG_TEXT_10K_ACCESSION = "0000929624-00-000321"
+
+
+def _pcg_oldest_10k():
+    filings = Company("PCG").get_filings(form="10-K")
+    target = next(
+        (f for f in filings if f.accession_no == PCG_TEXT_10K_ACCESSION),
+        None,
+    )
+    assert target is not None, f"PCG 10-K {PCG_TEXT_10K_ACCESSION} missing from EDGAR results"
+    return target
+
+
+@pytest.mark.network
+def test_text_filing_html_is_none_but_text_is_populated():
+    filing = _pcg_oldest_10k()
+    assert filing.html() is None
+    text = filing.text()
+    assert text is not None
+    assert len(text) > 100_000
+    assert "employees" in text.lower()
+
+
+@pytest.mark.network
+def test_sections_falls_back_to_text_for_plain_text_filings():
+    filing = _pcg_oldest_10k()
+    sections = filing.sections()
+    assert isinstance(sections, list)
+    assert len(sections) > 1
+    assert any("employees" in s.lower() for s in sections)
+
+
+@pytest.mark.network
+def test_search_works_on_plain_text_filing():
+    filing = _pcg_oldest_10k()
+    results = filing.search("employees")
+    assert results is not None
+    assert len(results) >= 1
+
+
+@pytest.mark.network
+def test_grep_works_on_plain_text_filing():
+    filing = _pcg_oldest_10k()
+    matches = filing.grep("employees")
+    assert len(matches) >= 1
+
+
+@pytest.mark.network
+def test_search_grep_still_work_on_html_filings():
+    newest = Company("PCG").get_filings(form="10-K")[0]
+    assert newest.html() is not None
+    assert len(newest.search("employees")) > 0
+    assert len(newest.grep("employees")) > 0
diff --git a/tests/issues/regression/test_issue_819_text_filing_search.py b/tests/issues/regression/test_issue_819_text_filing_search.py