Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions edgar/thirteenf/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,11 @@ def holdings(self):
# No conversion needed - aggregate directly (saves 15 MB copy + 30ms)
df = infotable

# Aggregate by CUSIP
# Group by CUSIP + PutCall so option positions stay distinct from equity (GH #824).
group_keys = ['Cusip']
if 'PutCall' in df.columns:
group_keys.append('PutCall')

agg_dict = {}

# Keep first value for ID columns
Expand All @@ -445,13 +449,23 @@ def holdings(self):
if col in df.columns:
agg_dict[col] = 'sum'

# Handle Type and PutCall - keep if consistent across managers, otherwise use first
for col in ['Type', 'PutCall']:
if col in df.columns:
agg_dict[col] = 'first' # Use first value (typically consistent per CUSIP)
# Type keeps first; PutCall is now a grouping key so it's excluded from agg_dict.
if 'Type' in df.columns:
agg_dict['Type'] = 'first'

# Group by CUSIP and aggregate
holdings = df.groupby('Cusip', as_index=False).agg(agg_dict)
holdings = df.groupby(group_keys, as_index=False).agg(agg_dict)

# Restore PutCall column position. pandas groupby() with PutCall as a key
# places it as the second column (right after Cusip), silently shifting
# the column layout. This breaks positional column access, table
# rendering order, and notebook code with hardcoded indices. Re-insert
# PutCall after Ticker to preserve the pre-aggregation column contract.
if 'PutCall' in holdings.columns:
cols = [c for c in holdings.columns if c != 'PutCall']
insert_after = 'Ticker' if 'Ticker' in cols else (id_cols[-1] if id_cols[-1] in cols else cols[-1])
idx = cols.index(insert_after) + 1
cols.insert(idx, 'PutCall')
holdings = holdings[cols]

# Optimize dtypes for low-cardinality columns (saves ~1-2 MB)
# Include potential fillna values in categories for rendering compatibility
Expand All @@ -461,9 +475,10 @@ def holdings(self):
categories=['Shares', 'Principal', '-']
)
if 'PutCall' in holdings.columns:
# SEC XML emits title-case ('Put', 'Call'); uppercase categories silently dropped values.
holdings['PutCall'] = pd.Categorical(
holdings['PutCall'],
categories=['', 'PUT', 'CALL']
categories=['', 'Put', 'Call']
)

# Sort by value descending
Expand Down
69 changes: 69 additions & 0 deletions tests/issues/regression/test_issue_824_13f_putcall_aggregation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Regression test for #824 — 13F-HR holdings merged Put/Call rows into equity."""

import pytest

from edgar import Filing
from edgar.thirteenf import ThirteenF


SG_CAPITAL_13F_HR_A = {
"form": "13F-HR/A",
"filing_date": "2024-06-07",
"company": "SG Capital Management LLC",
"cik": 1510099,
"accession_no": "0001172661-24-002551",
}


def _sg_capital_thirteenf() -> ThirteenF:
return ThirteenF(Filing(**SG_CAPITAL_13F_HR_A))


@pytest.mark.network
def test_holdings_preserves_put_call_rows_separately():
thirteenf = _sg_capital_thirteenf()
holdings = thirteenf.holdings
assert holdings is not None
puts = holdings.query("PutCall == 'Put'")
assert len(puts) == 3
# Puts must reference distinct CUSIPs from any aggregated equity rows (they're separate positions)
put_cusips = set(puts["Cusip"])
assert len(put_cusips) == 3


@pytest.mark.network
def test_holdings_putcall_values_use_title_case():
thirteenf = _sg_capital_thirteenf()
holdings = thirteenf.holdings
putcall_values = set(holdings["PutCall"].dropna().astype(str))
# SEC XML emits 'Put' / 'Call' title case; categorical must accept those (not uppercase)
assert putcall_values.issubset({"", "Put", "Call"})
assert "Put" in putcall_values


@pytest.mark.network
def test_infotable_unchanged_baseline():
"""Ensure the raw infotable still shows 3 Puts (issue's stated ground truth)."""
thirteenf = _sg_capital_thirteenf()
puts = thirteenf.infotable.query("PutCall == 'Put'")
assert len(puts) == 3


@pytest.mark.network
def test_putcall_column_position_preserved():
"""PutCall must remain immediately after Ticker, not get bumped by groupby.

pandas groupby(['Cusip', 'PutCall'], as_index=False) places PutCall right
after Cusip in the output — which silently shifts the column layout. Any
positional column access (iloc, hardcoded notebook indices) and the table
rendering order depend on PutCall sitting after Ticker. Reviewed in #828.
"""
thirteenf = _sg_capital_thirteenf()
holdings = thirteenf.holdings
cols = list(holdings.columns)
assert "PutCall" in cols
assert "Ticker" in cols
# PutCall must sit immediately after Ticker (contract, not just "somewhere later")
assert cols.index("PutCall") == cols.index("Ticker") + 1, (
f"PutCall position regressed: expected immediately after Ticker, got cols={cols}"
)
43 changes: 26 additions & 17 deletions tests/thirteenf/test_holdings_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,32 @@ def test_holdings_aggregates_multi_manager_filing(state_street_13f_infotable, st

@pytest.mark.network
def test_holdings_aggregation_math_correct(state_street_13f_infotable, state_street_13f_holdings):
"""Test that holdings correctly sums values across managers for same CUSIP."""
"""Test that holdings correctly sums values across managers for same (CUSIP, PutCall)."""
infotable = state_street_13f_infotable
holdings = state_street_13f_holdings

# Find a CUSIP that appears multiple times in infotable
cusip_counts = infotable['Cusip'].value_counts()
multi_entry_cusips = cusip_counts[cusip_counts > 1].index.tolist()
# GH #824: aggregation key is (CUSIP, PutCall), not CUSIP alone — option positions
# stay distinct from equity for the same security.
group_cols = ['Cusip', 'PutCall'] if 'PutCall' in infotable.columns else ['Cusip']
group_counts = infotable.groupby(group_cols).size()
multi_entry_groups = group_counts[group_counts > 1].index.tolist()

# Note: State Street may or may not have multi-entry CUSIPs, so make test flexible
if len(multi_entry_cusips) > 0:
# Test first multi-entry CUSIP
test_cusip = multi_entry_cusips[0]
if len(multi_entry_groups) > 0:
test_key = multi_entry_groups[0]

# Get all infotable rows for this CUSIP
infotable_rows = infotable[infotable['Cusip'] == test_cusip]
if isinstance(test_key, tuple):
mask_info = (infotable['Cusip'] == test_key[0]) & (infotable['PutCall'] == test_key[1])
mask_hold = (holdings['Cusip'] == test_key[0]) & (holdings['PutCall'] == test_key[1])
test_label = f"CUSIP {test_key[0]} (PutCall={test_key[1]!r})"
else:
mask_info = infotable['Cusip'] == test_key
mask_hold = holdings['Cusip'] == test_key
test_label = f"CUSIP {test_key}"

# Get holdings row for this CUSIP
holdings_row = holdings[holdings['Cusip'] == test_cusip]
infotable_rows = infotable[mask_info]
holdings_row = holdings[mask_hold]

assert len(holdings_row) == 1, f"holdings should have exactly 1 row for CUSIP {test_cusip}"
assert len(holdings_row) == 1, f"holdings should have exactly 1 row for {test_label}"

# Verify aggregation sums
for col in ['SharesPrnAmount', 'Value', 'SoleVoting', 'SharedVoting', 'NonVoting']:
Expand All @@ -90,13 +96,13 @@ def test_holdings_aggregation_math_correct(state_street_13f_infotable, state_str
assert holdings_row['Issuer'].iloc[0] == infotable_rows['Issuer'].iloc[0], \
"Issuer should be preserved"

print(f"\n✓ Aggregation math verified for CUSIP {test_cusip}:")
print(f"\n✓ Aggregation math verified for {test_label}:")
print(f" - infotable entries: {len(infotable_rows)}")
print(f" - Managers: {infotable_rows['OtherManager'].tolist()}")
print(f" - Individual values: {infotable_rows['Value'].tolist()}")
print(f" - Aggregated value: {holdings_row['Value'].iloc[0]}")
else:
print("\n✓ No multi-entry CUSIPs in this filing (all single-manager holdings)")
print("\n✓ No multi-entry groups in this filing (all single-manager holdings)")


@pytest.mark.network
Expand Down Expand Up @@ -128,8 +134,11 @@ def test_holdings_preserves_all_securities(state_street_13f_infotable, state_str
assert infotable_cusips == holdings_cusips, \
"holdings should include all unique securities from infotable"

assert len(holdings_cusips) == len(holdings), \
"holdings should have one row per unique CUSIP"
# GH #824: holdings has one row per (CUSIP, PutCall) so equity + Put + Call on
# the same security produce separate rows. len(holdings) >= unique CUSIP count.
group_cols = ['Cusip', 'PutCall'] if 'PutCall' in holdings.columns else ['Cusip']
assert len(holdings) == len(holdings.groupby(group_cols)), \
"holdings should have one row per unique (CUSIP, PutCall) combination"

print(f"\n✓ All securities preserved:")
print(f" - Unique securities: {len(holdings_cusips)}")
Expand Down
Loading