From f07a300d85547daf75b2ee579e6fedef80588341 Mon Sep 17 00:00:00 2001 From: javi11 Date: Wed, 20 May 2026 19:51:38 +0200 Subject: [PATCH 01/30] feat(iso): concat Blu-ray main feature across clips and discs Long Blu-ray releases split the main feature two ways: across multiple M2TS clips within a disc (joined by BDMV/PLAYLIST/*.mpls), and across multiple discs in one NZB (e.g. AVATAR_FIRE_AND_ASH_DISC_1 / _DISC_2). The importer previously kept only the single largest M2TS per ISO, which both dropped the rest of the movie within a disc and treated each disc as an unrelated file. Now ExpandISOContents (shared between rar and sevenzip aggregators) parses the main MPLS playlist on each ISO, reads the 9660 PVD volume label, groups ISOs by stripped base name with a DISC|CD|PART suffix regex, and emits a single Content whose NestedSources chain spans every M2TS in disc-then-playlist order. The metadata layer's existing nested multi-reader produces one seamless seekable virtual file. Non-BDMV discs and unparseable playlists fall back to the legacy largest-file behaviour so nothing regresses. --- internal/importer/archive/iso/bluray.go | 116 +++++++ internal/importer/archive/iso/bluray_test.go | 122 ++++++++ internal/importer/archive/iso/mpls.go | 108 +++++++ internal/importer/archive/iso/mpls_test.go | 167 ++++++++++ internal/importer/archive/iso/processor.go | 83 +++-- internal/importer/archive/iso/types.go | 13 + internal/importer/archive/iso/volume.go | 30 ++ internal/importer/archive/iso/volume_test.go | 70 +++++ internal/importer/archive/iso_expansion.go | 296 ++++++++++++++++++ .../importer/archive/iso_expansion_test.go | 214 +++++++++++++ internal/importer/archive/rar/aggregator.go | 78 +---- .../importer/archive/sevenzip/aggregator.go | 79 +---- 12 files changed, 1189 insertions(+), 187 deletions(-) create mode 100644 internal/importer/archive/iso/bluray.go create mode 100644 internal/importer/archive/iso/bluray_test.go create mode 100644 internal/importer/archive/iso/mpls.go create mode 100644 internal/importer/archive/iso/mpls_test.go create mode 100644 internal/importer/archive/iso/volume.go create mode 100644 internal/importer/archive/iso/volume_test.go create mode 100644 internal/importer/archive/iso_expansion.go create mode 100644 internal/importer/archive/iso_expansion_test.go diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go new file mode 100644 index 000000000..e2ce4d73b --- /dev/null +++ b/internal/importer/archive/iso/bluray.go @@ -0,0 +1,116 @@ +package iso + +import ( + "io" + "sort" + "strings" +) + +// MainFeaturePlaylist is the result of analysing a Blu-ray's BDMV. +// Streams is the ordered list of M2TS file entries that, concatenated, +// form the main feature; the slice is empty if no parseable playlist +// was found. +type MainFeaturePlaylist struct { + PlaylistName string // e.g. "00800.MPLS" — for logging only + DurationTicks int64 // sum of (OUT-IN) at 45 kHz + Streams []isoFileEntry // ordered M2TS entries +} + +// ResolveMainFeature inspects the entries returned by ListISOFiles for a +// Blu-ray (BDMV) structure and returns the playlist that represents the +// main movie. Returns nil if the disc is not BDMV, has no .mpls, or no +// playlist resolves to a non-empty M2TS sequence. +// +// Selection heuristic: pick the playlist with the longest total +// presentation duration. Ties break on PlayItem count (more clips wins), +// then lexicographically smallest filename for determinism. +// +// Failures parsing individual playlists are non-fatal — we skip them and +// keep evaluating the rest, mirroring how every Blu-ray player tolerates +// malformed entries in BDMV/PLAYLIST/. +func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { + // Build an index of all M2TS streams by their 5-digit clip stem (the + // part MPLS references). M2TS files live at BDMV/STREAM/.M2TS + // case-insensitively. + streamByClip := make(map[string]isoFileEntry) + var playlistEntries []isoFileEntry + for _, f := range files { + up := strings.ToUpper(f.path) + switch { + case strings.HasPrefix(up, "BDMV/PLAYLIST/") && strings.HasSuffix(up, ".MPLS"): + playlistEntries = append(playlistEntries, f) + case strings.HasPrefix(up, "BDMV/STREAM/") && strings.HasSuffix(up, ".M2TS"): + base := up[len("BDMV/STREAM/") : len(up)-len(".M2TS")] + streamByClip[base] = f + } + } + if len(playlistEntries) == 0 || len(streamByClip) == 0 { + return nil + } + + // Deterministic order: shorter filenames (and lexicographic ties) win + // the tie-break later. + sort.Slice(playlistEntries, func(i, j int) bool { + return playlistEntries[i].path < playlistEntries[j].path + }) + + var best *MainFeaturePlaylist + for _, pe := range playlistEntries { + data, err := readISOFile(rs, pe) + if err != nil { + continue + } + pl, err := ParseMPLS(data) + if err != nil { + continue + } + + // Resolve clip names → M2TS entries, in playlist order. + streams := make([]isoFileEntry, 0, len(pl.PlayItems)) + for _, it := range pl.PlayItems { + entry, ok := streamByClip[strings.ToUpper(it.ClipName)] + if !ok { + continue + } + streams = append(streams, entry) + } + if len(streams) == 0 { + continue + } + + cand := &MainFeaturePlaylist{ + PlaylistName: pe.path, + DurationTicks: pl.DurationTicks(), + Streams: streams, + } + if best == nil || isBetterPlaylist(cand, best, len(pl.PlayItems), len(best.Streams)) { + best = cand + } + } + return best +} + +// isBetterPlaylist returns true when cand should replace best. +// Comparison: longer duration > more PlayItems > earlier filename. +// The filename tie-break relies on playlistEntries being sorted before +// iteration so the smaller path is seen first; we therefore only swap +// when strictly better. +func isBetterPlaylist(cand, best *MainFeaturePlaylist, candItems, bestItems int) bool { + if cand.DurationTicks != best.DurationTicks { + return cand.DurationTicks > best.DurationTicks + } + return candItems > bestItems +} + +// readISOFile reads the full contents of one isoFileEntry from rs. +// MPLS files are tiny (~KBs), so a one-shot read is fine. +func readISOFile(rs io.ReadSeeker, e isoFileEntry) ([]byte, error) { + if _, err := rs.Seek(int64(e.lba)*iso9660SectorSize, io.SeekStart); err != nil { + return nil, err + } + buf := make([]byte, e.size) + if _, err := io.ReadFull(rs, buf); err != nil { + return nil, err + } + return buf, nil +} diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go new file mode 100644 index 000000000..e548f61a4 --- /dev/null +++ b/internal/importer/archive/iso/bluray_test.go @@ -0,0 +1,122 @@ +package iso + +import ( + "bytes" + "io" + "testing" +) + +// makeImage assembles an in-memory disc image by placing each piece of +// data at the sector index given in its key. The returned reader can be +// used as if it were a real ISO read-seeker. +func makeImage(t *testing.T, pieces map[uint32][]byte) io.ReadSeeker { + t.Helper() + var maxSect uint32 + for s, b := range pieces { + end := s + uint32((len(b)+iso9660SectorSize-1)/iso9660SectorSize) + if end > maxSect { + maxSect = end + } + } + if maxSect == 0 { + maxSect = 1 + } + img := make([]byte, int(maxSect)*iso9660SectorSize) + for s, b := range pieces { + copy(img[int(s)*iso9660SectorSize:], b) + } + return bytes.NewReader(img) +} + +func TestResolveMainFeature(t *testing.T) { + t.Parallel() + + t.Run("picks longest playlist", func(t *testing.T) { + t.Parallel() + // Two playlists: + // 00001.MPLS → 1 clip, short (extras playlist) + // 00800.MPLS → 3 clips, long (main feature) + short := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 45000}, + }, nil) + long := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 90 * 45000}, + {ClipName: "00002", InTime: 0, OutTime: 60 * 45000}, + {ClipName: "00003", InTime: 0, OutTime: 30 * 45000}, + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: short, + 110: long, + }) + + // File listing: two playlists and four M2TS clips (one extra). + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(short))}, + {path: "BDMV/PLAYLIST/00800.MPLS", lba: 110, size: uint64(len(long))}, + {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + {path: "BDMV/STREAM/00002.M2TS", lba: 300, size: 2_000_000}, + {path: "BDMV/STREAM/00003.M2TS", lba: 400, size: 3_000_000}, + {path: "BDMV/STREAM/00010.M2TS", lba: 500, size: 500_000}, + } + + got := ResolveMainFeature(rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Errorf("PlaylistName = %q, want 00800.MPLS", got.PlaylistName) + } + if len(got.Streams) != 3 { + t.Fatalf("Streams len = %d, want 3", len(got.Streams)) + } + wantOrder := []string{"BDMV/STREAM/00001.M2TS", "BDMV/STREAM/00002.M2TS", "BDMV/STREAM/00003.M2TS"} + for i, s := range got.Streams { + if s.path != wantOrder[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantOrder[i]) + } + } + }) + + t.Run("non-BDMV disc returns nil", func(t *testing.T) { + t.Parallel() + files := []isoFileEntry{ + {path: "movie.mkv", lba: 100, size: 1_000_000}, + } + if got := ResolveMainFeature(bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { + t.Errorf("expected nil for non-BDMV disc, got %+v", got) + } + }) + + t.Run("BDMV with no parseable MPLS returns nil", func(t *testing.T) { + t.Parallel() + rs := makeImage(t, map[uint32][]byte{ + 100: []byte("not a real mpls"), + }) + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: 15}, + {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + } + if got := ResolveMainFeature(rs, files); got != nil { + t.Errorf("expected nil for unparseable MPLS, got %+v", got) + } + }) + + t.Run("playlist referencing missing M2TS yields nil", func(t *testing.T) { + t.Parallel() + // Playlist references a clip that has no corresponding M2TS entry. + data := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "99999", InTime: 0, OutTime: 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{ + 100: data, + }) + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(data))}, + {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + } + if got := ResolveMainFeature(rs, files); got != nil { + t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) + } + }) +} diff --git a/internal/importer/archive/iso/mpls.go b/internal/importer/archive/iso/mpls.go new file mode 100644 index 000000000..141d7a023 --- /dev/null +++ b/internal/importer/archive/iso/mpls.go @@ -0,0 +1,108 @@ +package iso + +import ( + "encoding/binary" + "errors" + "fmt" +) + +// MPLS (Blu-ray PlayList) is a fixed binary format defined by the BDA spec. +// We only parse the fields needed to identify the main feature playlist and +// its ordered list of M2TS clips: the clip_information_file_name for each +// PlayItem and the IN/OUT presentation times used to estimate duration. + +// mplsHeaderSize is the fixed prefix length: 4 magic + 4 version + +// 4 PlayList offset + 4 PlayListMark offset + 4 ExtensionData offset. +const mplsHeaderSize = 20 + +// MPLSPlayItem describes one entry in a PlayList. +type MPLSPlayItem struct { + // ClipName is the 5-character clip_information_file_name (e.g. "00001"). + // The corresponding stream lives at BDMV/STREAM/.M2TS. + ClipName string + // InTime and OutTime are 45 kHz presentation timestamps. Duration in + // ticks is OutTime - InTime; convert to seconds by dividing by 45000. + InTime uint32 + OutTime uint32 +} + +// MPLSPlayList is the parsed view of a single .mpls file. +type MPLSPlayList struct { + Version string // e.g. "0100", "0200", "0300" + PlayItems []MPLSPlayItem +} + +// DurationTicks returns the sum of (OutTime-InTime) across PlayItems in +// 45 kHz ticks. This is the standard proxy for "longest playlist = +// main feature" used by every Blu-ray player. +func (p *MPLSPlayList) DurationTicks() int64 { + var total int64 + for _, it := range p.PlayItems { + if it.OutTime > it.InTime { + total += int64(it.OutTime - it.InTime) + } + } + return total +} + +// ParseMPLS decodes a .mpls file. All multi-byte integers are big-endian +// per the BDA spec. Sub-paths, the STN table, and per-angle alternates +// are skipped — we use each PlayItem's leading length field to advance +// past everything we don't need. +func ParseMPLS(data []byte) (*MPLSPlayList, error) { + if len(data) < mplsHeaderSize { + return nil, errors.New("mpls: truncated header") + } + if string(data[0:4]) != "MPLS" { + return nil, fmt.Errorf("mpls: bad magic %q", data[0:4]) + } + version := string(data[4:8]) + playListOff := binary.BigEndian.Uint32(data[8:12]) + if int(playListOff) < mplsHeaderSize || int(playListOff)+10 > len(data) { + return nil, fmt.Errorf("mpls: PlayList offset %d out of range (file size %d)", playListOff, len(data)) + } + + // PlayList header: length(4) + reserved(2) + numPlayItems(2) + numSubPaths(2) + pl := data[playListOff:] + playListLen := binary.BigEndian.Uint32(pl[0:4]) + if int(playListOff)+4+int(playListLen) > len(data) { + return nil, fmt.Errorf("mpls: PlayList length %d exceeds file size", playListLen) + } + numPlayItems := binary.BigEndian.Uint16(pl[6:8]) + + items := make([]MPLSPlayItem, 0, numPlayItems) + // PlayItems start after the 10-byte PlayList header. + cursor := 10 + plBody := pl[:4+int(playListLen)] + for i := range int(numPlayItems) { + if cursor+2 > len(plBody) { + return nil, fmt.Errorf("mpls: PlayItem %d header out of range", i) + } + // PlayItem length excludes the 2-byte length field itself. + itemLen := int(binary.BigEndian.Uint16(plBody[cursor : cursor+2])) + itemStart := cursor + 2 + itemEnd := itemStart + itemLen + if itemEnd > len(plBody) { + return nil, fmt.Errorf("mpls: PlayItem %d length %d overruns PlayList", i, itemLen) + } + // Fixed PlayItem layout we care about: + // +0 5 clip_information_file_name (e.g. "00001") + // +5 4 clip_codec_identifier ("M2TS") + // +9 2 flags incl. is_multi_angle / connection_condition + // +11 1 ref_to_STC_id + // +12 4 IN_time (45 kHz) + // +16 4 OUT_time (45 kHz) + if itemLen < 20 { + return nil, fmt.Errorf("mpls: PlayItem %d too short (len=%d)", i, itemLen) + } + body := plBody[itemStart:itemEnd] + items = append(items, MPLSPlayItem{ + ClipName: string(body[0:5]), + InTime: binary.BigEndian.Uint32(body[12:16]), + OutTime: binary.BigEndian.Uint32(body[16:20]), + }) + cursor = itemEnd + } + + return &MPLSPlayList{Version: version, PlayItems: items}, nil +} diff --git a/internal/importer/archive/iso/mpls_test.go b/internal/importer/archive/iso/mpls_test.go new file mode 100644 index 000000000..0df3b4da8 --- /dev/null +++ b/internal/importer/archive/iso/mpls_test.go @@ -0,0 +1,167 @@ +package iso + +import ( + "encoding/binary" + "testing" +) + +// buildMPLS constructs a synthetic .mpls byte stream containing the given +// PlayItems. Each PlayItem is laid out at its minimum legal size (20 bytes +// body + 2-byte length prefix). multiAngleTail, when non-nil, is appended +// inside the first PlayItem to exercise the length-prefixed skip logic. +func buildMPLS(t *testing.T, version string, items []MPLSPlayItem, multiAngleTail []byte) []byte { + t.Helper() + if len(version) != 4 { + t.Fatalf("version must be 4 bytes, got %q", version) + } + + // Build PlayItems body. + var playItemsBuf []byte + for i, it := range items { + if len(it.ClipName) != 5 { + t.Fatalf("item %d: ClipName must be 5 chars", i) + } + body := make([]byte, 20) + copy(body[0:5], it.ClipName) + copy(body[5:9], "M2TS") + // flags (2) + ref_to_STC_id (1) left zero + binary.BigEndian.PutUint32(body[12:16], it.InTime) + binary.BigEndian.PutUint32(body[16:20], it.OutTime) + // Inject the multi-angle tail into the first item only — the parser + // must skip past it via the length field without misaligning the + // next item. + if i == 0 && multiAngleTail != nil { + body = append(body, multiAngleTail...) + } + // PlayItem length excludes its own 2-byte length prefix. + lenPrefix := make([]byte, 2) + binary.BigEndian.PutUint16(lenPrefix, uint16(len(body))) + playItemsBuf = append(playItemsBuf, lenPrefix...) + playItemsBuf = append(playItemsBuf, body...) + } + + // PlayList header: length(4)+reserved(2)+numPI(2)+numSub(2)+playItems + plHeader := make([]byte, 10) + // length excludes its own 4-byte field + binary.BigEndian.PutUint32(plHeader[0:4], uint32(6+len(playItemsBuf))) + binary.BigEndian.PutUint16(plHeader[6:8], uint16(len(items))) + // numSubPaths left zero + + playList := append(plHeader, playItemsBuf...) + + // File header: 4 magic + 4 version + 4 PL offset + 4 PLMark + 4 ExtData + hdr := make([]byte, mplsHeaderSize) + copy(hdr[0:4], "MPLS") + copy(hdr[4:8], version) + binary.BigEndian.PutUint32(hdr[8:12], uint32(mplsHeaderSize)) + // PlayListMark & ExtensionData offsets unused; leave zero. + + return append(hdr, playList...) +} + +func TestParseMPLS(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + data []byte + wantErr bool + wantItems []MPLSPlayItem + wantTicks int64 + }{ + { + name: "single PlayItem", + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 1000, OutTime: 91000}, + }, nil), + wantItems: []MPLSPlayItem{{ClipName: "00001", InTime: 1000, OutTime: 91000}}, + wantTicks: 90000, // 2s at 45kHz + }, + { + name: "five PlayItems (main feature shape)", + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 45000}, + {ClipName: "00003", InTime: 0, OutTime: 45000}, + {ClipName: "00004", InTime: 0, OutTime: 45000}, + {ClipName: "00005", InTime: 0, OutTime: 45000}, + }, nil), + wantItems: []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 45000}, + {ClipName: "00003", InTime: 0, OutTime: 45000}, + {ClipName: "00004", InTime: 0, OutTime: 45000}, + {ClipName: "00005", InTime: 0, OutTime: 45000}, + }, + wantTicks: 5 * 45000, + }, + { + name: "multi-angle PlayItem (tail must be skipped)", + // The tail simulates angle-count + alt-angle records appended + // after the fixed PlayItem prefix. The parser only consumes the + // first 20 bytes and uses the length field to skip past the + // rest, so item 2 must still parse cleanly. + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 90000}, + }, []byte{ + 0x02, // num_angles + 0x00, // is_different_audios flags + '0', '0', '0', '0', '7', 'M', '2', 'T', 'S', 0x00, // one alt angle entry (10 bytes) + }), + wantItems: []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 90000}, + }, + wantTicks: 45000 + 90000, + }, + { + name: "wrong magic", + data: []byte("NOTMPLS-padding-here-padding-here"), + wantErr: true, + }, + { + name: "truncated header", + data: []byte("MPLS"), + wantErr: true, + }, + { + name: "PlayList offset out of range", + data: func() []byte { + b := make([]byte, mplsHeaderSize) + copy(b[0:4], "MPLS") + copy(b[4:8], "0200") + binary.BigEndian.PutUint32(b[8:12], 9999) + return b + }(), + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got, err := ParseMPLS(tc.data) + if tc.wantErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got.PlayItems) != len(tc.wantItems) { + t.Fatalf("PlayItems len = %d, want %d", len(got.PlayItems), len(tc.wantItems)) + } + for i, it := range got.PlayItems { + if it != tc.wantItems[i] { + t.Errorf("PlayItem[%d] = %+v, want %+v", i, it, tc.wantItems[i]) + } + } + if d := got.DurationTicks(); d != tc.wantTicks { + t.Errorf("DurationTicks = %d, want %d", d, tc.wantTicks) + } + }) + } +} diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 10c5fd7a1..340b4acc3 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -11,61 +11,80 @@ import ( "github.com/javi11/altmount/internal/pool" ) -// AnalyzeISOContent enumerates all allowed media files inside the given ISO source -// and returns ISOFileContent entries with Usenet segment mappings. -func AnalyzeISOContent( +// AnalyzeISO inspects the given ISO source and returns: +// - the volume label (for multi-disc grouping), +// - the filtered list of inner files (Files), +// - the ordered MainFeature M2TS list when the ISO is a Blu-ray with a +// resolvable playlist (nil otherwise). +// +// allowedExtensions only filters Files. MainFeature is always returned for +// BDMV discs regardless of the extension list — its existence is the +// signal callers use to opt into virtual concatenation. +func AnalyzeISO( ctx context.Context, src ISOSource, poolManager pool.Manager, maxPrefetch int, readTimeout time.Duration, allowedExtensions []string, -) ([]ISOFileContent, error) { +) (*AnalyzedISO, error) { rs, closer, err := NewISOReadSeeker(ctx, src, poolManager, maxPrefetch, readTimeout) if err != nil { return nil, fmt.Errorf("iso: creating read seeker for %q: %w", src.Filename, err) } defer closer.Close() - files, err := ListISOFiles(rs) + entries, err := ListISOFiles(rs) if err != nil { return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } - var result []ISOFileContent - for _, entry := range files { - if !isAllowedFile(entry.path, int64(entry.size), allowedExtensions) { + out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} + + for _, e := range entries { + if !isAllowedFile(e.path, int64(e.size), allowedExtensions) { continue } + out.Files = append(out.Files, buildFileContent(src, e)) + } - isoOffset := int64(entry.lba) * iso9660SectorSize - - fc := ISOFileContent{ - InternalPath: entry.path, - Filename: filepath.Base(entry.path), - Size: int64(entry.size), + if mf := ResolveMainFeature(rs, entries); mf != nil { + out.DurationTicks = mf.DurationTicks + for _, e := range mf.Streams { + out.MainFeature = append(out.MainFeature, buildFileContent(src, e)) } + } - if len(src.AesKey) == 0 { - // Unencrypted: slice segments to cover exactly this file's bytes - sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, int64(entry.size)) - fc.Segments = sliced - } else { - // Encrypted: create a NestedSource so the VFS can decrypt and seek - fc.NestedSource = &ISONestedSource{ - Segments: src.Segments, - AesKey: src.AesKey, - AesIV: src.AesIV, - InnerOffset: isoOffset, - InnerLength: int64(entry.size), - InnerVolumeSize: src.Size, - } - } + return out, nil +} - result = append(result, fc) +// buildFileContent turns one ISO directory entry into an ISOFileContent, +// slicing or referencing the source's Usenet segments according to whether +// the ISO is encrypted. +func buildFileContent(src ISOSource, e isoFileEntry) ISOFileContent { + isoOffset := int64(e.lba) * iso9660SectorSize + fc := ISOFileContent{ + InternalPath: e.path, + Filename: filepath.Base(e.path), + Size: int64(e.size), } - - return result, nil + if len(src.AesKey) == 0 { + // Unencrypted: pre-slice segments so this content stands alone. + sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, int64(e.size)) + fc.Segments = sliced + } else { + // Encrypted: AES-CBC requires the full inner volume + offset so + // the cipher can chain IVs from the start of the ISO. + fc.NestedSource = &ISONestedSource{ + Segments: src.Segments, + AesKey: src.AesKey, + AesIV: src.AesIV, + InnerOffset: isoOffset, + InnerLength: int64(e.size), + InnerVolumeSize: src.Size, + } + } + return fc } // isAllowedFile returns true if the file extension is in the allowed list. diff --git a/internal/importer/archive/iso/types.go b/internal/importer/archive/iso/types.go index 53e514672..b755fe1eb 100644 --- a/internal/importer/archive/iso/types.go +++ b/internal/importer/archive/iso/types.go @@ -33,3 +33,16 @@ type ISONestedSource struct { InnerLength int64 // file size InnerVolumeSize int64 // ISO total decrypted size } + +// AnalyzedISO is the full result of inspecting one ISO image. Files mirrors +// what AnalyzeISOContent has always returned (all media files with extension +// filtering applied). MainFeature, when non-nil, is the ordered M2TS list +// that forms the Blu-ray main feature according to BDMV/PLAYLIST/*.mpls — +// this is the slice callers should concatenate to produce a single playable +// virtual file. +type AnalyzedISO struct { + VolumeLabel string + Files []ISOFileContent + MainFeature []ISOFileContent // nil for non-BDMV / unparseable playlists + DurationTicks int64 // sum of (OUT-IN) of MainFeature at 45 kHz +} diff --git a/internal/importer/archive/iso/volume.go b/internal/importer/archive/iso/volume.go new file mode 100644 index 000000000..f2db56574 --- /dev/null +++ b/internal/importer/archive/iso/volume.go @@ -0,0 +1,30 @@ +package iso + +import ( + "io" + "strings" +) + +// ReadVolumeLabel returns the ISO 9660 Volume Identifier from the Primary +// Volume Descriptor at sector 16. Hybrid Blu-ray discs always carry a +// 9660 PVD even when the active filesystem is UDF, so this works for both +// plain ISOs and BD images. +// +// Returns an empty string if the descriptor is missing or invalid — callers +// fall back to the ISO filename for disc-group keying. +func ReadVolumeLabel(rs io.ReadSeeker) string { + pvd := make([]byte, iso9660SectorSize) + if _, err := rs.Seek(16*iso9660SectorSize, io.SeekStart); err != nil { + return "" + } + if _, err := io.ReadFull(rs, pvd); err != nil { + return "" + } + // Type 1 = Primary Volume Descriptor; identifier "CD001" at +1. + if pvd[0] != 1 || string(pvd[1:6]) != "CD001" { + return "" + } + // Volume identifier: 32 bytes of a-characters at offset 40, space-padded. + label := strings.TrimRight(string(pvd[40:72]), " \x00") + return label +} diff --git a/internal/importer/archive/iso/volume_test.go b/internal/importer/archive/iso/volume_test.go new file mode 100644 index 000000000..f8aeac1a4 --- /dev/null +++ b/internal/importer/archive/iso/volume_test.go @@ -0,0 +1,70 @@ +package iso + +import ( + "bytes" + "io" + "testing" +) + +// buildPVD constructs a 17-sector buffer with a synthetic Primary Volume +// Descriptor placed at sector 16. The remaining bytes are zero-filled. +func buildPVD(label string, typeCode byte, identifier string) io.ReadSeeker { + buf := make([]byte, 17*iso9660SectorSize) + pvd := buf[16*iso9660SectorSize:] + pvd[0] = typeCode + copy(pvd[1:6], identifier) + // Volume identifier field is 32 bytes, space-padded. + field := make([]byte, 32) + for i := range field { + field[i] = ' ' + } + copy(field, label) + copy(pvd[40:72], field) + return bytes.NewReader(buf) +} + +func TestReadVolumeLabel(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + rs io.ReadSeeker + want string + }{ + { + name: "Avatar disc 1 label", + rs: buildPVD("AVATAR_FIRE_AND_ASH_DISC_1", 1, "CD001"), + want: "AVATAR_FIRE_AND_ASH_DISC_1", + }, + { + name: "padded short label trimmed", + rs: buildPVD("FOO", 1, "CD001"), + want: "FOO", + }, + { + name: "wrong type code", + rs: buildPVD("ANYTHING", 2, "CD001"), + want: "", + }, + { + name: "wrong identifier", + rs: buildPVD("ANYTHING", 1, "BAD!?"), + want: "", + }, + { + name: "short input (no sector 16)", + rs: bytes.NewReader(make([]byte, 1024)), + want: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := ReadVolumeLabel(tc.rs) + if got != tc.want { + t.Errorf("ReadVolumeLabel = %q, want %q", got, tc.want) + } + }) + } +} diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go new file mode 100644 index 000000000..daa8e4fce --- /dev/null +++ b/internal/importer/archive/iso_expansion.go @@ -0,0 +1,296 @@ +package archive + +import ( + "context" + "fmt" + "log/slog" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/javi11/altmount/internal/importer/archive/iso" + "github.com/javi11/altmount/internal/pool" +) + +// analyzedISO bundles an ISO Content with its inspection result and its +// place in a multi-disc grouping. Used internally by ExpandISOContents. +type analyzedISO struct { + src Content // original ISO Content (for fallback / metadata) + analyzed *iso.AnalyzedISO // result of iso.AnalyzeISO + discNum int // parsed disc number; 0 when label has no disc suffix + groupKey string // base name stripped of any DISC/CD/PART suffix +} + +// ExpandISOContents replaces .iso entries in contents with the media they +// contain, applying two Blu-ray-aware optimisations on top of the legacy +// "pick the largest file" behaviour: +// +// 1. Within a disc, if BDMV/PLAYLIST/*.mpls identifies a main feature +// spanning multiple M2TS clips, the clips are virtually concatenated +// into one Content via NestedSources — the player sees a single file. +// 2. Across discs in the same archive group (e.g. DISC_1 and DISC_2 ISOs +// in one NZB), discs sharing a stripped volume label are merged so +// the cross-disc movie also plays as one file. +// +// Non-ISO entries pass through unchanged. Per-ISO errors are non-fatal: +// on failure the original .iso Content is kept so downstream still has +// something to work with. +func ExpandISOContents( + ctx context.Context, + expand bool, + contents []Content, + poolManager pool.Manager, + maxPrefetch int, + readTimeout time.Duration, + allowedExtensions []string, +) ([]Content, error) { + if !expand { + return contents, nil + } + + var ( + result []Content + groups = make(map[string][]analyzedISO) + groupKeys []string + ) + + for _, c := range contents { + if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { + result = append(result, c) + continue + } + + src := iso.ISOSource{ + Filename: c.Filename, + Segments: c.Segments, + AesKey: c.AesKey, + AesIV: c.AesIV, + Size: c.Size, + } + a, err := iso.AnalyzeISO(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) + if err != nil { + slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", + "file", c.Filename, "error", err) + result = append(result, c) + continue + } + if len(a.Files) == 0 && len(a.MainFeature) == 0 { + result = append(result, c) + continue + } + + key, discNum := discGroupKey(a.VolumeLabel, c.Filename) + entry := analyzedISO{src: c, analyzed: a, discNum: discNum, groupKey: key} + if _, exists := groups[key]; !exists { + groupKeys = append(groupKeys, key) + } + groups[key] = append(groups[key], entry) + } + + sort.Strings(groupKeys) // deterministic output order + for _, key := range groupKeys { + g := groups[key] + sort.SliceStable(g, func(i, j int) bool { return g[i].discNum < g[j].discNum }) + + // Concatenate main features only when *every* member of the group + // has one — mixing BDMV and non-BDMV in a single group is almost + // always a false grouping, so fall back to per-disc handling. + allHaveMainFeature := true + for _, e := range g { + if len(e.analyzed.MainFeature) == 0 { + allHaveMainFeature = false + break + } + } + + if allHaveMainFeature { + merged, ok := buildMainFeatureContent(ctx, key, g) + if ok { + result = append(result, merged) + continue + } + } + + // Fallback: legacy per-ISO largest-file selection. + for _, e := range g { + nc, ok := buildLargestFileContent(e.src, e.analyzed.Files) + if !ok { + result = append(result, e.src) + continue + } + result = append(result, nc) + } + } + + return result, nil +} + +// buildMainFeatureContent concatenates every member's MainFeature into a +// single Content whose NestedSources chain spans every M2TS in disc and +// playlist order. Returns (zero, false) when, after conversion, the chain +// is empty. +func buildMainFeatureContent(ctx context.Context, groupKey string, g []analyzedISO) (Content, bool) { + var ( + sources []NestedSource + totalSize int64 + firstISOName string + nzbdavID string + ) + for _, e := range g { + if firstISOName == "" { + firstISOName = e.src.Filename + nzbdavID = e.src.NzbdavID + } + for _, fc := range e.analyzed.MainFeature { + ns := isoFileContentToNestedSource(fc) + if ns.InnerLength <= 0 { + continue + } + sources = append(sources, ns) + totalSize += ns.InnerLength + } + } + if len(sources) == 0 { + return Content{}, false + } + + filename := mainFeatureFilename(groupKey, firstISOName) + slog.InfoContext(ctx, "Built Blu-ray main-feature virtual file", + "group", groupKey, + "discs", len(g), + "clips", len(sources), + "size_bytes", totalSize, + "filename", filename, + ) + + return Content{ + InternalPath: filename, + Filename: filename, + Size: totalSize, + PackedSize: totalSize, + NzbdavID: nzbdavID, + NestedSources: sources, + ISOExpansionIndex: 1, + }, true +} + +// buildLargestFileContent reproduces the pre-existing "pick the single +// biggest file inside the ISO" behaviour. Kept as a fallback for ISOs +// that have no BDMV main feature. +func buildLargestFileContent(src Content, files []iso.ISOFileContent) (Content, bool) { + if len(files) == 0 { + return Content{}, false + } + sort.Slice(files, func(i, j int) bool { return files[i].Size > files[j].Size }) + f := files[0] + nc := Content{ + InternalPath: f.InternalPath, + Filename: f.Filename, + Size: f.Size, + PackedSize: f.Size, + NzbdavID: src.NzbdavID, + ISOExpansionIndex: 1, + } + if f.NestedSource != nil { + nc.NestedSources = []NestedSource{isoFileContentToNestedSource(f)} + } else { + nc.Segments = f.Segments + } + return nc, true +} + +// isoFileContentToNestedSource converts an ISOFileContent into a +// NestedSource. For unencrypted ISOs the segments are already sliced to +// cover exactly this file, so InnerOffset is 0 and InnerVolumeSize equals +// the file size (unused when AesKey is empty — see +// MetadataVirtualFile.createNestedSourceReader). +func isoFileContentToNestedSource(fc iso.ISOFileContent) NestedSource { + if fc.NestedSource != nil { + return NestedSource{ + Segments: fc.NestedSource.Segments, + AesKey: fc.NestedSource.AesKey, + AesIV: fc.NestedSource.AesIV, + InnerOffset: fc.NestedSource.InnerOffset, + InnerLength: fc.NestedSource.InnerLength, + InnerVolumeSize: fc.NestedSource.InnerVolumeSize, + } + } + return NestedSource{ + Segments: fc.Segments, + InnerOffset: 0, + InnerLength: fc.Size, + InnerVolumeSize: fc.Size, + } +} + +// discSuffixPattern matches volume labels like "AVATAR_FIRE_AND_ASH_DISC_1", +// "MOVIE-CD2", "TITLE PART 3", etc. Capture 1 is the stripped base name, +// capture 2 is the disc identifier (numeric or single letter). +var discSuffixPattern = regexp.MustCompile(`(?i)^(.+?)[ _\-]*(?:disc|cd|part|d|side)[ _\-]*([0-9]+|[a-z])$`) + +// discGroupKey computes the disc-grouping key and parsed disc number for +// an ISO. It prefers the volume label and falls back to the ISO filename +// (without extension) when the label is empty or doesn't match a disc +// pattern. Single-disc ISOs return key=, discNum=0. +func discGroupKey(label, isoFilename string) (string, int) { + candidates := []string{label} + if isoFilename != "" { + candidates = append(candidates, strings.TrimSuffix(isoFilename, filepath.Ext(isoFilename))) + } + for _, c := range candidates { + c = strings.TrimSpace(c) + if c == "" { + continue + } + if m := discSuffixPattern.FindStringSubmatch(c); m != nil { + base := normaliseGroupKey(m[1]) + return base, parseDiscNumber(m[2]) + } + } + for _, c := range candidates { + c = strings.TrimSpace(c) + if c != "" { + return normaliseGroupKey(c), 0 + } + } + return "", 0 +} + +func normaliseGroupKey(s string) string { + s = strings.TrimSpace(s) + s = strings.Trim(s, "_- ") + return strings.ToUpper(s) +} + +// parseDiscNumber turns "1" → 1, "2" → 2, "A" → 1, "B" → 2, etc. +func parseDiscNumber(s string) int { + if n, err := strconv.Atoi(s); err == nil { + return n + } + if len(s) == 1 { + c := strings.ToUpper(s)[0] + if c >= 'A' && c <= 'Z' { + return int(c-'A') + 1 + } + } + return 0 +} + +// mainFeatureFilename derives a sensible filename for the virtual concat. +// Downstream renaming (see rar/sevenzip aggregator post-processing) will +// usually replace the base name with the NZB release name; we only need a +// valid .m2ts extension here. +func mainFeatureFilename(groupKey, isoFilename string) string { + const ext = ".m2ts" + if groupKey != "" { + return fmt.Sprintf("%s%s", groupKey, ext) + } + if isoFilename != "" { + stem := strings.TrimSuffix(isoFilename, filepath.Ext(isoFilename)) + return stem + ext + } + return "main_feature" + ext +} diff --git a/internal/importer/archive/iso_expansion_test.go b/internal/importer/archive/iso_expansion_test.go new file mode 100644 index 000000000..e009e615d --- /dev/null +++ b/internal/importer/archive/iso_expansion_test.go @@ -0,0 +1,214 @@ +package archive + +import ( + "context" + "testing" + + "github.com/javi11/altmount/internal/importer/archive/iso" + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +func TestDiscGroupKey(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + label string + filename string + wantKey string + wantNum int + }{ + {"avatar disc 1 label", "AVATAR_FIRE_AND_ASH_DISC_1", "any.iso", "AVATAR_FIRE_AND_ASH", 1}, + {"avatar disc 2 label", "AVATAR_FIRE_AND_ASH_DISC_2", "any.iso", "AVATAR_FIRE_AND_ASH", 2}, + {"compact DISC2", "MOVIE_DISC2", "any.iso", "MOVIE", 2}, + {"CD suffix", "MOVIE-CD1", "any.iso", "MOVIE", 1}, + {"PART suffix with spaces", "TITLE PART 3", "any.iso", "TITLE", 3}, + {"letter disc identifier B → 2", "FOO_DISC_B", "any.iso", "FOO", 2}, + {"no suffix → solo", "PLAIN_MOVIE", "any.iso", "PLAIN_MOVIE", 0}, + {"empty label falls back to filename stem", "", "MyMovie_Disc_1.iso", "MYMOVIE", 1}, + {"empty label and weird filename", "", "thing.iso", "THING", 0}, + {"only label has disc, filename plain", "X_DISC_2", "anything.iso", "X", 2}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + gotKey, gotNum := discGroupKey(tc.label, tc.filename) + if gotKey != tc.wantKey || gotNum != tc.wantNum { + t.Errorf("discGroupKey(%q,%q) = (%q,%d), want (%q,%d)", + tc.label, tc.filename, gotKey, gotNum, tc.wantKey, tc.wantNum) + } + }) + } +} + +func TestParseDiscNumber(t *testing.T) { + t.Parallel() + + cases := map[string]int{ + "1": 1, + "2": 2, + "10": 10, + "A": 1, + "a": 1, + "B": 2, + "": 0, + "AB": 0, + "foo": 0, + } + for in, want := range cases { + if got := parseDiscNumber(in); got != want { + t.Errorf("parseDiscNumber(%q) = %d, want %d", in, got, want) + } + } +} + +func TestIsoFileContentToNestedSource(t *testing.T) { + t.Parallel() + + t.Run("unencrypted uses pre-sliced segments", func(t *testing.T) { + t.Parallel() + segs := []*metapb.SegmentData{ + {Id: "a", StartOffset: 0, EndOffset: 99, SegmentSize: 100}, + } + fc := iso.ISOFileContent{ + Filename: "00001.m2ts", + Size: 100, + Segments: segs, + } + ns := isoFileContentToNestedSource(fc) + if len(ns.Segments) != 1 || ns.InnerLength != 100 || ns.InnerOffset != 0 { + t.Fatalf("unexpected NestedSource: %+v", ns) + } + if len(ns.AesKey) != 0 { + t.Errorf("AesKey should be empty, got %v", ns.AesKey) + } + }) + + t.Run("encrypted carries offset and key", func(t *testing.T) { + t.Parallel() + segs := []*metapb.SegmentData{ + {Id: "outer", StartOffset: 0, EndOffset: 99999, SegmentSize: 100000}, + } + fc := iso.ISOFileContent{ + Filename: "00001.m2ts", + Size: 2048, + NestedSource: &iso.ISONestedSource{ + Segments: segs, + AesKey: []byte("0123456789abcdef0123456789abcdef"), + AesIV: []byte("0123456789abcdef"), + InnerOffset: 1024, + InnerLength: 2048, + InnerVolumeSize: 99999, + }, + } + ns := isoFileContentToNestedSource(fc) + if ns.InnerOffset != 1024 || ns.InnerLength != 2048 || ns.InnerVolumeSize != 99999 { + t.Fatalf("unexpected NestedSource offsets: %+v", ns) + } + if len(ns.AesKey) == 0 { + t.Error("AesKey should be carried through for encrypted source") + } + }) +} + +func TestBuildMainFeatureContent_TwoDiscs(t *testing.T) { + t.Parallel() + + // Helper to make a fake ISO main-feature ISOFileContent with given size + // and a single-segment outer slice (segment values are not interpreted + // by buildMainFeatureContent — only Size and the source attributes + // matter for the assembled NestedSources chain). + mkClip := func(name string, size int64) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Segments: []*metapb.SegmentData{ + {Id: name, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}, + }, + } + } + + disc1 := analyzedISO{ + src: Content{Filename: "AVATAR_DISC_1.iso", NzbdavID: "nzb-1"}, + analyzed: &iso.AnalyzedISO{ + VolumeLabel: "AVATAR_DISC_1", + MainFeature: []iso.ISOFileContent{ + mkClip("00001.m2ts", 10_000_000), + mkClip("00002.m2ts", 20_000_000), + }, + }, + discNum: 1, + groupKey: "AVATAR", + } + disc2 := analyzedISO{ + src: Content{Filename: "AVATAR_DISC_2.iso", NzbdavID: "nzb-2"}, + analyzed: &iso.AnalyzedISO{ + VolumeLabel: "AVATAR_DISC_2", + MainFeature: []iso.ISOFileContent{ + mkClip("00003.m2ts", 30_000_000), + }, + }, + discNum: 2, + groupKey: "AVATAR", + } + + got, ok := buildMainFeatureContent(context.Background(), "AVATAR", []analyzedISO{disc1, disc2}) + if !ok { + t.Fatal("buildMainFeatureContent returned ok=false") + } + if got.ISOExpansionIndex != 1 { + t.Errorf("ISOExpansionIndex = %d, want 1", got.ISOExpansionIndex) + } + if got.NzbdavID != "nzb-1" { + t.Errorf("NzbdavID = %q, want nzb-1 (from first disc)", got.NzbdavID) + } + if len(got.NestedSources) != 3 { + t.Fatalf("NestedSources count = %d, want 3 (2 clips from disc 1 + 1 clip from disc 2)", len(got.NestedSources)) + } + wantSize := int64(10_000_000 + 20_000_000 + 30_000_000) + if got.Size != wantSize { + t.Errorf("Size = %d, want %d", got.Size, wantSize) + } + if got.PackedSize != wantSize { + t.Errorf("PackedSize = %d, want %d", got.PackedSize, wantSize) + } + // Order must follow disc-then-playlist (disc1.clip1, disc1.clip2, disc2.clip3). + wantOrder := []int64{10_000_000, 20_000_000, 30_000_000} + for i, ns := range got.NestedSources { + if ns.InnerLength != wantOrder[i] { + t.Errorf("NestedSources[%d].InnerLength = %d, want %d", i, ns.InnerLength, wantOrder[i]) + } + } + if got.Filename != "AVATAR.m2ts" { + t.Errorf("Filename = %q, want AVATAR.m2ts", got.Filename) + } +} + +func TestBuildLargestFileContent(t *testing.T) { + t.Parallel() + + files := []iso.ISOFileContent{ + {Filename: "small.mkv", Size: 500, Segments: []*metapb.SegmentData{ + {Id: "s", StartOffset: 0, EndOffset: 499, SegmentSize: 500}, + }}, + {Filename: "big.mkv", Size: 5_000_000, Segments: []*metapb.SegmentData{ + {Id: "b", StartOffset: 0, EndOffset: 4_999_999, SegmentSize: 5_000_000}, + }}, + } + src := Content{Filename: "thing.iso", NzbdavID: "id-1"} + + got, ok := buildLargestFileContent(src, files) + if !ok { + t.Fatal("buildLargestFileContent returned ok=false") + } + if got.Filename != "big.mkv" { + t.Errorf("Filename = %q, want big.mkv (largest)", got.Filename) + } + if got.ISOExpansionIndex != 1 { + t.Errorf("ISOExpansionIndex = %d, want 1", got.ISOExpansionIndex) + } + if got.NzbdavID != "id-1" { + t.Errorf("NzbdavID = %q, want id-1", got.NzbdavID) + } +} diff --git a/internal/importer/archive/rar/aggregator.go b/internal/importer/archive/rar/aggregator.go index 52f8d4871..91138405c 100644 --- a/internal/importer/archive/rar/aggregator.go +++ b/internal/importer/archive/rar/aggregator.go @@ -16,7 +16,6 @@ import ( "github.com/javi11/altmount/internal/encryption/aes" "github.com/javi11/altmount/internal/importer/archive" - "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/importer/filesystem" "github.com/javi11/altmount/internal/importer/parser" "github.com/javi11/altmount/internal/importer/utils" @@ -209,7 +208,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { } // Expand ISO files found inside the RAR archive into their inner media files - rarContents, err := expandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + rarContents, err := archive.ExpandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } @@ -474,81 +473,6 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { return nil } -// expandISOContents replaces any .iso Content entries with the media files found -// inside them. Non-ISO entries are passed through unchanged. Per-file errors are -// non-fatal: on failure the original ISO Content is kept. -func expandISOContents( - ctx context.Context, - expand bool, - contents []Content, - poolManager pool.Manager, - maxPrefetch int, - readTimeout time.Duration, - allowedExtensions []string, -) ([]Content, error) { - if !expand { - return contents, nil - } - var result []Content - for _, c := range contents { - if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { - result = append(result, c) - continue - } - - src := iso.ISOSource{ - Filename: c.Filename, - Segments: c.Segments, - AesKey: c.AesKey, - AesIV: c.AesIV, - Size: c.Size, - } - - isoFiles, err := iso.AnalyzeISOContent(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) - if err != nil { - slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", - "file", c.Filename, "error", err) - result = append(result, c) - continue - } - - if len(isoFiles) == 0 { - result = append(result, c) - continue - } - - // Sort ISO files by size descending so the largest (main feature) gets index 1. - sort.Slice(isoFiles, func(i, j int) bool { - return isoFiles[i].Size > isoFiles[j].Size - }) - - // Keep only the largest file (index 0 after sort); discard smaller streams. - f := isoFiles[0] - nc := Content{ - InternalPath: f.InternalPath, - Filename: f.Filename, - Size: f.Size, - PackedSize: f.Size, // raw ISO data — packed == unpacked - NzbdavID: c.NzbdavID, - ISOExpansionIndex: 1, - } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{{ - Segments: f.NestedSource.Segments, - AesKey: f.NestedSource.AesKey, - AesIV: f.NestedSource.AesIV, - InnerOffset: f.NestedSource.InnerOffset, - InnerLength: f.NestedSource.InnerLength, - InnerVolumeSize: f.NestedSource.InnerVolumeSize, - }} - } else { - nc.Segments = f.Segments - } - result = append(result, nc) - } - return result, nil -} - // GroupArchivesByBaseName groups ParsedFiles by their RAR base name (case-insensitive). // Returns groups in deterministic order (sorted by base name) for testability. func GroupArchivesByBaseName(files []parser.ParsedFile) [][]parser.ParsedFile { diff --git a/internal/importer/archive/sevenzip/aggregator.go b/internal/importer/archive/sevenzip/aggregator.go index f0214a294..4fbabce91 100644 --- a/internal/importer/archive/sevenzip/aggregator.go +++ b/internal/importer/archive/sevenzip/aggregator.go @@ -6,7 +6,6 @@ import ( "log/slog" "os" "path/filepath" - "sort" "strings" "sync/atomic" "time" @@ -14,7 +13,6 @@ import ( concpool "github.com/sourcegraph/conc/pool" "github.com/javi11/altmount/internal/importer/archive" - "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/importer/filesystem" "github.com/javi11/altmount/internal/importer/parser" "github.com/javi11/altmount/internal/importer/utils" @@ -186,7 +184,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { slog.InfoContext(ctx, "Successfully analyzed 7zip archive content", "files_in_archive", len(sevenZipContents)) // Expand ISO files found inside the 7zip archive into their inner media files - sevenZipContents, err = expandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + sevenZipContents, err = archive.ExpandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } @@ -445,81 +443,6 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { return nil } -// expandISOContents replaces any .iso Content entries with the media files found -// inside them. Non-ISO entries are passed through unchanged. Per-file errors are -// non-fatal: on failure the original ISO Content is kept. -func expandISOContents( - ctx context.Context, - expand bool, - contents []Content, - poolManager pool.Manager, - maxPrefetch int, - readTimeout time.Duration, - allowedExtensions []string, -) ([]Content, error) { - if !expand { - return contents, nil - } - var result []Content - for _, c := range contents { - if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { - result = append(result, c) - continue - } - - src := iso.ISOSource{ - Filename: c.Filename, - Segments: c.Segments, - AesKey: c.AesKey, - AesIV: c.AesIV, - Size: c.Size, - } - - isoFiles, err := iso.AnalyzeISOContent(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) - if err != nil { - slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", - "file", c.Filename, "error", err) - result = append(result, c) - continue - } - - if len(isoFiles) == 0 { - result = append(result, c) - continue - } - - // Sort ISO files by size descending so the largest (main feature) gets index 1. - sort.Slice(isoFiles, func(i, j int) bool { - return isoFiles[i].Size > isoFiles[j].Size - }) - - // Keep only the largest file (index 0 after sort); discard smaller streams. - f := isoFiles[0] - nc := Content{ - InternalPath: f.InternalPath, - Filename: f.Filename, - Size: f.Size, - PackedSize: f.Size, // raw ISO data — packed == unpacked - NzbdavID: c.NzbdavID, - ISOExpansionIndex: 1, - } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{{ - Segments: f.NestedSource.Segments, - AesKey: f.NestedSource.AesKey, - AesIV: f.NestedSource.AesIV, - InnerOffset: f.NestedSource.InnerOffset, - InnerLength: f.NestedSource.InnerLength, - InnerVolumeSize: f.NestedSource.InnerVolumeSize, - }} - } else { - nc.Segments = f.Segments - } - result = append(result, nc) - } - return result, nil -} - // normalizeArchiveReleaseFilename aligns the filename to the NZB basename while keeping the original extension. func normalizeArchiveReleaseFilename(nzbFilename, originalFilename string) string { releaseName := nzbtrim.TrimNzbExtension(nzbFilename) From b782ea2ba49ebe41b5af76a203ab5c8f0c5e7b30 Mon Sep 17 00:00:00 2001 From: javi11 Date: Wed, 20 May 2026 20:17:26 +0200 Subject: [PATCH 02/30] fix(iso): index BDMV/STREAM/SSIF/*.SSIF for 3D Blu-ray main features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a 3D-only Blu-ray release (e.g. AVATAR_FIRE_AND_ASH_3D), the main feature playlist references clips that exist only as SSIF files in BDMV/STREAM/SSIF/ — the M2TS directory holds short extras. The previous resolver indexed only M2TS, so the long 3D playlist failed to resolve any clips and a short extras playlist won by default, producing a ~177 MB virtual file for a movie whose NZB carries ~88 GB of source data. Resolve clip names against M2TS first (preserves the smaller, more compatible 2D version on hybrid 3D releases) and fall back to SSIF when only it can satisfy the playlist. Two new test cases cover the 3D-only-with-SSIF and hybrid-prefers-M2TS paths. --- internal/importer/archive/iso/bluray.go | 32 +++++--- internal/importer/archive/iso/bluray_test.go | 82 ++++++++++++++++++++ 2 files changed, 104 insertions(+), 10 deletions(-) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index e2ce4d73b..43b0b10c8 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -29,22 +29,31 @@ type MainFeaturePlaylist struct { // keep evaluating the rest, mirroring how every Blu-ray player tolerates // malformed entries in BDMV/PLAYLIST/. func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { - // Build an index of all M2TS streams by their 5-digit clip stem (the - // part MPLS references). M2TS files live at BDMV/STREAM/.M2TS - // case-insensitively. - streamByClip := make(map[string]isoFileEntry) + // Build per-clip indexes. M2TS streams live at BDMV/STREAM/.M2TS + // and carry the 2D version (or the only version on a 2D disc). SSIF + // streams live at BDMV/STREAM/SSIF/.SSIF and carry the + // stereoscopic interleaved 3D version — on 3D-only Blu-ray releases + // the main feature playlist references SSIF clips, while the M2TS + // directory holds only extras. We prefer M2TS when both exist (smaller + // bytes, universal playback) and fall back to SSIF when only it + // resolves the playlist's clip names. + m2tsByClip := make(map[string]isoFileEntry) + ssifByClip := make(map[string]isoFileEntry) var playlistEntries []isoFileEntry for _, f := range files { up := strings.ToUpper(f.path) switch { case strings.HasPrefix(up, "BDMV/PLAYLIST/") && strings.HasSuffix(up, ".MPLS"): playlistEntries = append(playlistEntries, f) + case strings.HasPrefix(up, "BDMV/STREAM/SSIF/") && strings.HasSuffix(up, ".SSIF"): + base := up[len("BDMV/STREAM/SSIF/") : len(up)-len(".SSIF")] + ssifByClip[base] = f case strings.HasPrefix(up, "BDMV/STREAM/") && strings.HasSuffix(up, ".M2TS"): base := up[len("BDMV/STREAM/") : len(up)-len(".M2TS")] - streamByClip[base] = f + m2tsByClip[base] = f } } - if len(playlistEntries) == 0 || len(streamByClip) == 0 { + if len(playlistEntries) == 0 || (len(m2tsByClip) == 0 && len(ssifByClip) == 0) { return nil } @@ -65,14 +74,17 @@ func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlay continue } - // Resolve clip names → M2TS entries, in playlist order. + // Resolve clip names in playlist order, preferring M2TS over SSIF. streams := make([]isoFileEntry, 0, len(pl.PlayItems)) for _, it := range pl.PlayItems { - entry, ok := streamByClip[strings.ToUpper(it.ClipName)] - if !ok { + name := strings.ToUpper(it.ClipName) + if entry, ok := m2tsByClip[name]; ok { + streams = append(streams, entry) continue } - streams = append(streams, entry) + if entry, ok := ssifByClip[name]; ok { + streams = append(streams, entry) + } } if len(streams) == 0 { continue diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go index e548f61a4..e788bafe9 100644 --- a/internal/importer/archive/iso/bluray_test.go +++ b/internal/importer/archive/iso/bluray_test.go @@ -102,6 +102,88 @@ func TestResolveMainFeature(t *testing.T) { } }) + t.Run("3D BD: playlist resolves against SSIF when M2TS missing", func(t *testing.T) { + t.Parallel() + // Avatar-2-style 3D-only release: BDMV/STREAM/*.M2TS holds only + // extras (tiny). The real main feature lives in BDMV/STREAM/SSIF/ + // and is referenced by its own MPLS. The resolver must index SSIF + // so the long playlist resolves and wins. + extras := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 90 * 45000}, // 90s extra + }, nil) + mainFeature3D := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 60 * 60 * 45000}, + {ClipName: "00101", InTime: 0, OutTime: 60 * 60 * 45000}, + {ClipName: "00102", InTime: 0, OutTime: 12 * 60 * 45000}, // 132 min total + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: extras, + 110: mainFeature3D, + }) + + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(extras))}, + {path: "BDMV/PLAYLIST/00800.MPLS", lba: 110, size: uint64(len(mainFeature3D))}, + // Only the extras live as M2TS: + {path: "BDMV/STREAM/00010.M2TS", lba: 200, size: 50_000_000}, + // Main feature is SSIF only: + {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 25_000_000_000}, + {path: "BDMV/STREAM/SSIF/00101.SSIF", lba: 400, size: 25_000_000_000}, + {path: "BDMV/STREAM/SSIF/00102.SSIF", lba: 500, size: 5_000_000_000}, + } + + got := ResolveMainFeature(rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil — SSIF index missing?") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Errorf("PlaylistName = %q, want 00800.MPLS (3D main feature)", got.PlaylistName) + } + if len(got.Streams) != 3 { + t.Fatalf("Streams len = %d, want 3 SSIF clips", len(got.Streams)) + } + wantOrder := []string{ + "BDMV/STREAM/SSIF/00100.SSIF", + "BDMV/STREAM/SSIF/00101.SSIF", + "BDMV/STREAM/SSIF/00102.SSIF", + } + for i, s := range got.Streams { + if s.path != wantOrder[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantOrder[i]) + } + } + }) + + t.Run("hybrid 3D BD: prefers M2TS over SSIF when both exist", func(t *testing.T) { + t.Parallel() + // Both 2D MPLS (refs M2TS) and 3D MPLS (refs SSIF) point at clips + // of the same name. With both files present, the M2TS version is + // the right pick: smaller bytes, universal playback. The resolver + // should select it even if the 3D playlist is marginally longer. + mainFeature := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 60 * 60 * 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{100: mainFeature}) + + files := []isoFileEntry{ + {path: "BDMV/PLAYLIST/00800.MPLS", lba: 100, size: uint64(len(mainFeature))}, + {path: "BDMV/STREAM/00100.M2TS", lba: 200, size: 20_000_000_000}, + {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 40_000_000_000}, + } + + got := ResolveMainFeature(rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + if len(got.Streams) != 1 { + t.Fatalf("Streams len = %d, want 1", len(got.Streams)) + } + if got.Streams[0].path != "BDMV/STREAM/00100.M2TS" { + t.Errorf("picked %q, want M2TS over SSIF", got.Streams[0].path) + } + }) + t.Run("playlist referencing missing M2TS yields nil", func(t *testing.T) { t.Parallel() // Playlist references a clip that has no corresponding M2TS entry. From 919ecad7b3c51cc07b0964f4fabda931ef0c87ce Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 08:57:02 +0200 Subject: [PATCH 03/30] chore(iso): instrument BDMV resolver with [DEBUG-isobd] tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A repeated 88GB-NZB run is still producing a 177MB virtual file with clips=2 — byte-identical to the pre-SSIF-fix output. Three hypotheses remain: stale binary, 'no actual SSIF in this BDMV' (release uses M2TS only), or SSIF lives at a non-standard path. Add one summary log per ISO (total files, playlist count, M2TS and SSIF clip counts, 12 sample paths) and one log per evaluated MPLS (resolved clip count, unresolved count, duration ticks, summed stream bytes) plus one 'picked' line. All prefixed with [DEBUG-isobd] for cheap cleanup and to confirm the new binary is live (the prefix won't appear in prior builds). --- internal/importer/archive/iso/bluray.go | 49 +++++++++++++++++++- internal/importer/archive/iso/bluray_test.go | 13 +++--- internal/importer/archive/iso/processor.go | 2 +- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index 43b0b10c8..f129a0bd4 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -1,7 +1,9 @@ package iso import ( + "context" "io" + "log/slog" "sort" "strings" ) @@ -28,7 +30,7 @@ type MainFeaturePlaylist struct { // Failures parsing individual playlists are non-fatal — we skip them and // keep evaluating the rest, mirroring how every Blu-ray player tolerates // malformed entries in BDMV/PLAYLIST/. -func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { +func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { // Build per-clip indexes. M2TS streams live at BDMV/STREAM/.M2TS // and carry the 2D version (or the only version on a 2D disc). SSIF // streams live at BDMV/STREAM/SSIF/.SSIF and carry the @@ -53,6 +55,17 @@ func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlay m2tsByClip[base] = f } } + // [DEBUG-isobd] One-shot summary of what the resolver actually sees in + // this ISO. Distinct prefix lets us confirm the live binary includes + // this instrumentation and lets users grep their logs cleanly. + slog.InfoContext(ctx, "[DEBUG-isobd] bdmv scan", + "total_files", len(files), + "playlists", len(playlistEntries), + "m2ts_clips", len(m2tsByClip), + "ssif_clips", len(ssifByClip), + "sample_paths", samplePaths(files, 12), + ) + if len(playlistEntries) == 0 || (len(m2tsByClip) == 0 && len(ssifByClip) == 0) { return nil } @@ -86,6 +99,21 @@ func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlay streams = append(streams, entry) } } + // [DEBUG-isobd] Per-playlist evaluation so we can see which mpls + // resolved how many clips and why a given candidate won or lost. + var totalSize int64 + for _, s := range streams { + totalSize += int64(s.size) + } + slog.InfoContext(ctx, "[DEBUG-isobd] mpls evaluated", + "name", pe.path, + "items", len(pl.PlayItems), + "resolved_clips", len(streams), + "unresolved", len(pl.PlayItems)-len(streams), + "duration_ticks", pl.DurationTicks(), + "streams_total_bytes", totalSize, + ) + if len(streams) == 0 { continue } @@ -99,9 +127,28 @@ func ResolveMainFeature(rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlay best = cand } } + if best != nil { + slog.InfoContext(ctx, "[DEBUG-isobd] main feature picked", + "playlist", best.PlaylistName, + "clips", len(best.Streams), + "duration_ticks", best.DurationTicks, + ) + } return best } +// samplePaths returns up to max paths from files, intended for diagnostic +// logging. The list is taken in iteration order — not sorted — so the user +// sees what ListISOFiles actually emitted. +func samplePaths(files []isoFileEntry, max int) []string { + n := min(len(files), max) + out := make([]string, 0, n) + for i := range n { + out = append(out, files[i].path) + } + return out +} + // isBetterPlaylist returns true when cand should replace best. // Comparison: longer duration > more PlayItems > earlier filename. // The filename tie-break relies on playlistEntries being sorted before diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go index e788bafe9..30c983b2c 100644 --- a/internal/importer/archive/iso/bluray_test.go +++ b/internal/importer/archive/iso/bluray_test.go @@ -2,6 +2,7 @@ package iso import ( "bytes" + "context" "io" "testing" ) @@ -60,7 +61,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/STREAM/00010.M2TS", lba: 500, size: 500_000}, } - got := ResolveMainFeature(rs, files) + got := ResolveMainFeature(context.Background(), rs, files) if got == nil { t.Fatal("ResolveMainFeature returned nil") } @@ -83,7 +84,7 @@ func TestResolveMainFeature(t *testing.T) { files := []isoFileEntry{ {path: "movie.mkv", lba: 100, size: 1_000_000}, } - if got := ResolveMainFeature(bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { + if got := ResolveMainFeature(context.Background(), bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { t.Errorf("expected nil for non-BDMV disc, got %+v", got) } }) @@ -97,7 +98,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: 15}, {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, } - if got := ResolveMainFeature(rs, files); got != nil { + if got := ResolveMainFeature(context.Background(), rs, files); got != nil { t.Errorf("expected nil for unparseable MPLS, got %+v", got) } }) @@ -133,7 +134,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/STREAM/SSIF/00102.SSIF", lba: 500, size: 5_000_000_000}, } - got := ResolveMainFeature(rs, files) + got := ResolveMainFeature(context.Background(), rs, files) if got == nil { t.Fatal("ResolveMainFeature returned nil — SSIF index missing?") } @@ -172,7 +173,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 40_000_000_000}, } - got := ResolveMainFeature(rs, files) + got := ResolveMainFeature(context.Background(), rs, files) if got == nil { t.Fatal("ResolveMainFeature returned nil") } @@ -197,7 +198,7 @@ func TestResolveMainFeature(t *testing.T) { {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(data))}, {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, } - if got := ResolveMainFeature(rs, files); got != nil { + if got := ResolveMainFeature(context.Background(), rs, files); got != nil { t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) } }) diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 340b4acc3..82ad56505 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -48,7 +48,7 @@ func AnalyzeISO( out.Files = append(out.Files, buildFileContent(src, e)) } - if mf := ResolveMainFeature(rs, entries); mf != nil { + if mf := ResolveMainFeature(ctx, rs, entries); mf != nil { out.DurationTicks = mf.DurationTicks for _, e := range mf.Streams { out.MainFeature = append(out.MainFeature, buildFileContent(src, e)) From 3aa16dad42ad08611d65099e8026cc2e6e81f625 Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 09:22:27 +0200 Subject: [PATCH 04/30] chore(iso): extend [DEBUG-isobd] scan with size distribution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real-ISO run shows all 38 playlists with items=1, max duration 80s, max stream bytes 141MB — yet the NZB carries ~88GB across 2 ISOs. Either ListISOFiles is dropping huge files (UDF alloc-type 2/3 not handled) or reading wrong sizes for them. Add to the bdmv-scan log: - sum of every file size (across all entries) - sum of M2TS-only and SSIF-only sizes - the 6 largest files with human-readable sizes One log line will distinguish 'sizes truncated', 'big files missing', and 'release is genuinely tiny'. --- internal/importer/archive/iso/bluray.go | 51 +++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index f129a0bd4..10e1c8cf7 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -2,6 +2,7 @@ package iso import ( "context" + "fmt" "io" "log/slog" "sort" @@ -58,11 +59,28 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn // [DEBUG-isobd] One-shot summary of what the resolver actually sees in // this ISO. Distinct prefix lets us confirm the live binary includes // this instrumentation and lets users grep their logs cleanly. + var ( + allSum, m2tsSum, ssifSum int64 + biggest = topNBySize(files, 6) + ) + for _, f := range files { + allSum += int64(f.size) + } + for _, f := range m2tsByClip { + m2tsSum += int64(f.size) + } + for _, f := range ssifByClip { + ssifSum += int64(f.size) + } slog.InfoContext(ctx, "[DEBUG-isobd] bdmv scan", "total_files", len(files), "playlists", len(playlistEntries), "m2ts_clips", len(m2tsByClip), "ssif_clips", len(ssifByClip), + "all_files_sum_bytes", allSum, + "m2ts_sum_bytes", m2tsSum, + "ssif_sum_bytes", ssifSum, + "top6_largest", biggest, "sample_paths", samplePaths(files, 12), ) @@ -149,6 +167,39 @@ func samplePaths(files []isoFileEntry, max int) []string { return out } +// topNBySize returns "path=size" entries for the n largest files. Used by +// diagnostic logging to reveal whether the ISO actually contains the +// multi-GB clips a real Blu-ray main feature would have. +func topNBySize(files []isoFileEntry, n int) []string { + if len(files) == 0 || n <= 0 { + return nil + } + cp := make([]isoFileEntry, len(files)) + copy(cp, files) + sort.Slice(cp, func(i, j int) bool { return cp[i].size > cp[j].size }) + k := min(len(cp), n) + out := make([]string, 0, k) + for i := range k { + out = append(out, cp[i].path+"="+formatBytes(int64(cp[i].size))) + } + return out +} + +// formatBytes renders a byte count compactly for log readability. +// Uses base-2 units (KiB, MiB, GiB) for clarity. +func formatBytes(b int64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%dB", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f%ciB", float64(b)/float64(div), "KMGTPE"[exp]) +} + // isBetterPlaylist returns true when cand should replace best. // Comparison: longer duration > more PlayItems > earlier filename. // The filename tie-break relies on playlistEntries being sorted before From ec8a0483298497002ddff7ffb736b8717504157d Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 11:27:29 +0200 Subject: [PATCH 05/30] chore(iso): log ISO-size vs walker-coverage for each ISO Real run shows all_files_sum_bytes=1.13 GiB across 295 files, biggest single file 135 MiB. NZB is 88 GiB across 2 ISOs. Need to know whether src.Size (claimed ISO bytes from the outer RAR archive) matches the sum of what ListISOFiles enumerated, or whether the walker is missing multi-GB files. One [DEBUG-isobd] iso analyse line per ISO now prints filename, iso_size, listed_files, listed_sum, and coverage_pct so the discrepancy is impossible to miss. --- internal/importer/archive/iso/processor.go | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 82ad56505..dc4903610 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -3,6 +3,7 @@ package iso import ( "context" "fmt" + "log/slog" "path/filepath" "strings" "time" @@ -39,6 +40,25 @@ func AnalyzeISO( return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } + // [DEBUG-isobd] Compare the ISO's claimed size (from the outer archive) + // against the sum of every file ListISOFiles returned. A huge ratio + // difference means the walker is silently skipping big files — + // almost certainly the multi-GB BDMV main-feature clips whose UDF + // allocation descriptors use a type our walker doesn't decode. + var listedSum int64 + for _, e := range entries { + listedSum += int64(e.size) + } + slog.InfoContext(ctx, "[DEBUG-isobd] iso analyse", + "filename", src.Filename, + "iso_size_bytes", src.Size, + "iso_size", formatBytes(src.Size), + "listed_files", len(entries), + "listed_sum_bytes", listedSum, + "listed_sum", formatBytes(listedSum), + "coverage_pct", coveragePercent(listedSum, src.Size), + ) + out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} for _, e := range entries { @@ -58,6 +78,23 @@ func AnalyzeISO( return out, nil } +// coveragePercent returns (listed/total)*100, clamped to [0, 999]. Used +// only by diagnostic logging so the user can see at a glance whether +// ListISOFiles is enumerating the whole ISO or only a fraction. +func coveragePercent(listed, total int64) int64 { + if total <= 0 { + return -1 + } + pct := listed * 100 / total + if pct < 0 { + return 0 + } + if pct > 999 { + return 999 + } + return pct +} + // buildFileContent turns one ISO directory entry into an ISOFileContent, // slicing or referencing the source's Usenet segments according to whether // the ISO is encrypted. From 62162e82d1184dadb3cdf9787d9265aacb31742b Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 11:41:10 +0200 Subject: [PATCH 06/30] fix(iso): read full directory extent in UDF walker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of the 'main feature M2TS files invisible' bug. udfReadDirEntries parsed every File Identifier Descriptor in a directory but only ever read the FIRST 2048-byte sector of each allocation descriptor's extent — even when the extent's ad.length claimed it spanned many sectors. A Blu-ray BDMV/STREAM/ directory with ~2500 FIDs (~30 KiB of FID data) lost every entry past the first sector, including the multi-GB main-feature clips 00016/00017/00022/00023/00028/00029 and the corresponding SSIF files. Local repro against AVATAR_FIRE_AND_ASH_3D_DISC_1.iso (37 GiB): - Before: listed_files=298 sum=1.16 GiB coverage=3.1% (no clip >135 MiB) - After: listed_files=2523 sum=74 GiB (00022.m2ts=17 GiB ✓) Fix factors readMetaExtent / readICBExtent helpers that walk every sector of an extent until ad.length is exhausted. Both fail-soft on EOF so a malformed image returns partial data rather than aborting the import. The pre-existing TestUDFReadDirEntriesShortADClampsExtentLength was pinning the BUGGY behaviour (it asserted the walker would truncate to one sector); renamed to TestUDFReadDirEntriesTruncatedExtent and now asserts the new contract: when an extent claims more sectors than the image contains, the walker returns whatever data it could read without an error. Adds fs_local_test.go: an ALTMOUNT_LOCAL_ISO= gated integration test that catches this class of bug instantly against a real ISO. Skipped in CI. Also strips the [DEBUG-isobd] / [DEBUG-walk] instrumentation added during the investigation and tones the resolver / processor logs down to one production-grade INFO line per ISO and per main-feature pick. --- internal/importer/archive/iso/bluray.go | 93 +------------------ internal/importer/archive/iso/fs.go | 84 ++++++++++++++--- .../importer/archive/iso/fs_local_test.go | 76 +++++++++++++++ internal/importer/archive/iso/fs_test.go | 12 ++- internal/importer/archive/iso/processor.go | 34 +------ 5 files changed, 161 insertions(+), 138 deletions(-) create mode 100644 internal/importer/archive/iso/fs_local_test.go diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index 10e1c8cf7..a6ab8f64d 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -2,7 +2,6 @@ package iso import ( "context" - "fmt" "io" "log/slog" "sort" @@ -56,34 +55,6 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn m2tsByClip[base] = f } } - // [DEBUG-isobd] One-shot summary of what the resolver actually sees in - // this ISO. Distinct prefix lets us confirm the live binary includes - // this instrumentation and lets users grep their logs cleanly. - var ( - allSum, m2tsSum, ssifSum int64 - biggest = topNBySize(files, 6) - ) - for _, f := range files { - allSum += int64(f.size) - } - for _, f := range m2tsByClip { - m2tsSum += int64(f.size) - } - for _, f := range ssifByClip { - ssifSum += int64(f.size) - } - slog.InfoContext(ctx, "[DEBUG-isobd] bdmv scan", - "total_files", len(files), - "playlists", len(playlistEntries), - "m2ts_clips", len(m2tsByClip), - "ssif_clips", len(ssifByClip), - "all_files_sum_bytes", allSum, - "m2ts_sum_bytes", m2tsSum, - "ssif_sum_bytes", ssifSum, - "top6_largest", biggest, - "sample_paths", samplePaths(files, 12), - ) - if len(playlistEntries) == 0 || (len(m2tsByClip) == 0 && len(ssifByClip) == 0) { return nil } @@ -117,21 +88,6 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn streams = append(streams, entry) } } - // [DEBUG-isobd] Per-playlist evaluation so we can see which mpls - // resolved how many clips and why a given candidate won or lost. - var totalSize int64 - for _, s := range streams { - totalSize += int64(s.size) - } - slog.InfoContext(ctx, "[DEBUG-isobd] mpls evaluated", - "name", pe.path, - "items", len(pl.PlayItems), - "resolved_clips", len(streams), - "unresolved", len(pl.PlayItems)-len(streams), - "duration_ticks", pl.DurationTicks(), - "streams_total_bytes", totalSize, - ) - if len(streams) == 0 { continue } @@ -146,60 +102,15 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn } } if best != nil { - slog.InfoContext(ctx, "[DEBUG-isobd] main feature picked", + slog.InfoContext(ctx, "Blu-ray main feature playlist resolved", "playlist", best.PlaylistName, "clips", len(best.Streams), - "duration_ticks", best.DurationTicks, + "duration_seconds", best.DurationTicks/45000, ) } return best } -// samplePaths returns up to max paths from files, intended for diagnostic -// logging. The list is taken in iteration order — not sorted — so the user -// sees what ListISOFiles actually emitted. -func samplePaths(files []isoFileEntry, max int) []string { - n := min(len(files), max) - out := make([]string, 0, n) - for i := range n { - out = append(out, files[i].path) - } - return out -} - -// topNBySize returns "path=size" entries for the n largest files. Used by -// diagnostic logging to reveal whether the ISO actually contains the -// multi-GB clips a real Blu-ray main feature would have. -func topNBySize(files []isoFileEntry, n int) []string { - if len(files) == 0 || n <= 0 { - return nil - } - cp := make([]isoFileEntry, len(files)) - copy(cp, files) - sort.Slice(cp, func(i, j int) bool { return cp[i].size > cp[j].size }) - k := min(len(cp), n) - out := make([]string, 0, k) - for i := range k { - out = append(out, cp[i].path+"="+formatBytes(int64(cp[i].size))) - } - return out -} - -// formatBytes renders a byte count compactly for log readability. -// Uses base-2 units (KiB, MiB, GiB) for clarity. -func formatBytes(b int64) string { - const unit = 1024 - if b < unit { - return fmt.Sprintf("%dB", b) - } - div, exp := int64(unit), 0 - for n := b / unit; n >= unit; n /= unit { - div *= unit - exp++ - } - return fmt.Sprintf("%.1f%ciB", float64(b)/float64(div), "KMGTPE"[exp]) -} - // isBetterPlaylist returns true when cand should replace best. // Comparison: longer duration > more PlayItems > earlier filename. // The filename tie-break relies on playlistEntries being sorted before diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index ace50d531..62c277f4d 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -328,6 +328,66 @@ func udfResolveICB(loc udfLBA, metaMap []udfMetaSpan, partStart uint32) (uint32, return udfResolveMetaBlock(loc.block, metaMap, partStart) } +// readMetaExtent reads a contiguous extent of `length` bytes starting at +// logical metadata block `startBlock`, walking sector by sector through +// the metaMap so multi-sector extents (e.g. a 26 KiB directory) are +// returned in full. Without this, callers that read only the first +// 2048-byte sector silently lose every entry past the first sector — the +// root cause of the "main-feature M2TS files missing from listing" bug. +func readMetaExtent(rs io.ReadSeeker, startBlock uint32, length int, metaMap []udfMetaSpan, partStart uint32) ([]byte, error) { + if length <= 0 { + return nil, nil + } + out := make([]byte, 0, length) + remaining := length + for b := uint32(0); remaining > 0; b++ { + ps, err := udfResolveMetaBlock(startBlock+b, metaMap, partStart) + if err != nil { + return nil, err + } + _, sector, err := udfReadTag(rs, ps) + if err != nil { + // Malformed image (e.g. extent claims more sectors than exist): + // return what we successfully read rather than failing the + // entire walk. Callers parse partial directory data correctly. + return out, nil + } + take := min(remaining, len(sector)) + out = append(out, sector[:take]...) + remaining -= take + } + return out, nil +} + +// readICBExtent is the long_ad analogue of readMetaExtent: walks blocks +// by incrementing the logical-block field inside the ICB long_ad. +func readICBExtent(rs io.ReadSeeker, loc udfLBA, length int, metaMap []udfMetaSpan, partStart uint32) ([]byte, error) { + if length <= 0 { + return nil, nil + } + out := make([]byte, 0, length) + remaining := length + cur := loc + for remaining > 0 { + ps, err := udfResolveICB(cur, metaMap, partStart) + if err != nil { + return nil, err + } + _, sector, err := udfReadTag(rs, ps) + if err != nil { + // Malformed image (e.g. extent claims more sectors than exist): + // return what we successfully read rather than failing the + // entire walk. Callers parse partial directory data correctly. + return out, nil + } + take := min(remaining, len(sector)) + out = append(out, sector[:take]...) + remaining -= take + cur.block++ + } + return out, nil +} + // udfReadDirEntries reads all File Identifier Descriptor records from a // File Entry at physSect. func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { @@ -360,21 +420,22 @@ func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, case 3: // inline dirData = buf[allocDescOff : allocDescOff+allocDescLen] case 0: // short_ad + // A single allocation descriptor describes an extent that can span + // many 2048-byte sectors. The previous version of this code read + // only the first sector and truncated the rest of the extent, + // silently dropping every directory entry past ~30 FIDs — which is + // why BDMV/STREAM/ on a real Blu-ray (~300 entries, ~26 KiB) lost + // every main-feature M2TS clip. We now walk the full extent. for off := 0; off+8 <= allocDescLen; off += 8 { ad := udfParseShortAD(buf[allocDescOff:], off) if ad.length == 0 { break } - ps, rerr := udfResolveMetaBlock(ad.block, metaMap, partStart) - if rerr != nil { - return nil, rerr - } - _, sector, rerr := udfReadTag(rs, ps) + data, rerr := readMetaExtent(rs, ad.block, int(ad.length), metaMap, partStart) if rerr != nil { return nil, rerr } - take := min(int(ad.length), len(sector)) - dirData = append(dirData, sector[:take]...) + dirData = append(dirData, data...) } case 1: // long_ad for off := 0; off+16 <= allocDescLen; off += 16 { @@ -382,16 +443,11 @@ func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, if ad.length == 0 { break } - ps, rerr := udfResolveICB(ad.loc, metaMap, partStart) - if rerr != nil { - return nil, rerr - } - _, sector, rerr := udfReadTag(rs, ps) + data, rerr := readICBExtent(rs, ad.loc, int(ad.length), metaMap, partStart) if rerr != nil { return nil, rerr } - take := min(int(ad.length), len(sector)) - dirData = append(dirData, sector[:take]...) + dirData = append(dirData, data...) } } diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go new file mode 100644 index 000000000..b6f599ad1 --- /dev/null +++ b/internal/importer/archive/iso/fs_local_test.go @@ -0,0 +1,76 @@ +package iso + +import ( + "fmt" + "os" + "sort" + "testing" +) + +// TestLocalISO_DiscoverBigFiles is a manual integration test: it walks a +// real Blu-ray ISO from local disk and dumps a size-sorted summary. Skipped +// unless ALTMOUNT_LOCAL_ISO is set, so CI stays unaffected. +// +// Set ALTMOUNT_LOCAL_ISO=/abs/path/to.iso to run, e.g.: +// +// ALTMOUNT_LOCAL_ISO=/Volumes/.../DISC_1.iso go test \ +// ./internal/importer/archive/iso/... -run TestLocalISO -v +func TestLocalISO_DiscoverBigFiles(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open %s: %v", path, err) + } + defer f.Close() + + stat, _ := f.Stat() + t.Logf("ISO: %s size=%d (%.2f GiB)", path, stat.Size(), float64(stat.Size())/(1<<30)) + + entries, err := ListISOFiles(f) + if err != nil { + t.Fatalf("ListISOFiles: %v", err) + } + + var sum int64 + for _, e := range entries { + sum += int64(e.size) + } + t.Logf("listed_files=%d listed_sum=%d (%.2f GiB) coverage=%.1f%%", + len(entries), sum, float64(sum)/(1<<30), 100*float64(sum)/float64(stat.Size())) + + // Top 25 by size — should match `ls -laS BDMV/STREAM/` if walker is sane. + sort.Slice(entries, func(i, j int) bool { return entries[i].size > entries[j].size }) + t.Logf("top 25 by size:") + for i, e := range entries { + if i >= 25 { + break + } + t.Logf(" %s size=%d (%.2f MiB) lba=%d", e.path, e.size, float64(e.size)/(1<<20), e.lba) + } + + // Sanity sentinels for the Avatar disc 1 main-feature clips. Each one + // is >1 GiB on disc, so if any are absent the walker dropped them. + want := []string{"BDMV/STREAM/00016.m2ts", "BDMV/STREAM/00022.m2ts", "BDMV/STREAM/00028.m2ts"} + have := make(map[string]uint64, len(entries)) + for _, e := range entries { + have[e.path] = e.size + } + for _, w := range want { + size, ok := have[w] + if !ok { + t.Errorf("missing %s — walker dropped this file", w) + continue + } + if size < 1<<30 { + t.Errorf("%s reported size=%d (%.2f MiB), want >1 GiB", + w, size, float64(size)/(1<<20)) + } + } + + if t.Failed() { + fmt.Println(">>> walker is dropping big files; this is the bug") + } +} diff --git a/internal/importer/archive/iso/fs_test.go b/internal/importer/archive/iso/fs_test.go index c03e1c954..1b2bfe380 100644 --- a/internal/importer/archive/iso/fs_test.go +++ b/internal/importer/archive/iso/fs_test.go @@ -6,7 +6,17 @@ import ( "testing" ) -func TestUDFReadDirEntriesShortADClampsExtentLength(t *testing.T) { +// TestUDFReadDirEntriesTruncatedExtent locks in the fix for the bug where +// a directory's allocation descriptor advertised an extent spanning +// multiple sectors but the walker read only the first sector and silently +// dropped every entry past it (~ the reason the Avatar BDMV main-feature +// M2TS files were invisible). Two assertions: +// - readMetaExtent must keep reading sectors until ad.length is +// satisfied (the fix); +// - if a sector read fails because the image is shorter than ad.length, +// the walk returns partial data without an error so a malformed ISO +// can't fail the entire import. +func TestUDFReadDirEntriesTruncatedExtent(t *testing.T) { image := make([]byte, iso9660SectorSize*21) dirICBSector := image[10*iso9660SectorSize : 11*iso9660SectorSize] binary.LittleEndian.PutUint16(dirICBSector[0:2], 261) diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index dc4903610..2220b582b 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -40,23 +40,10 @@ func AnalyzeISO( return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } - // [DEBUG-isobd] Compare the ISO's claimed size (from the outer archive) - // against the sum of every file ListISOFiles returned. A huge ratio - // difference means the walker is silently skipping big files — - // almost certainly the multi-GB BDMV main-feature clips whose UDF - // allocation descriptors use a type our walker doesn't decode. - var listedSum int64 - for _, e := range entries { - listedSum += int64(e.size) - } - slog.InfoContext(ctx, "[DEBUG-isobd] iso analyse", + slog.InfoContext(ctx, "ISO analysed", "filename", src.Filename, "iso_size_bytes", src.Size, - "iso_size", formatBytes(src.Size), - "listed_files", len(entries), - "listed_sum_bytes", listedSum, - "listed_sum", formatBytes(listedSum), - "coverage_pct", coveragePercent(listedSum, src.Size), + "files", len(entries), ) out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} @@ -78,23 +65,6 @@ func AnalyzeISO( return out, nil } -// coveragePercent returns (listed/total)*100, clamped to [0, 999]. Used -// only by diagnostic logging so the user can see at a glance whether -// ListISOFiles is enumerating the whole ISO or only a fraction. -func coveragePercent(listed, total int64) int64 { - if total <= 0 { - return -1 - } - pct := listed * 100 / total - if pct < 0 { - return 0 - } - if pct > 999 { - return 999 - } - return pct -} - // buildFileContent turns one ISO directory entry into an ISOFileContent, // slicing or referencing the source's Usenet segments according to whether // the ISO is encrypted. From 775871a3ac15c054a497997f1f25d9da8d5d6c16 Mon Sep 17 00:00:00 2001 From: javi11 Date: Fri, 22 May 2026 11:53:19 +0200 Subject: [PATCH 07/30] fix(iso): walk every extent of multi-extent UDF files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The directory-listing fix exposed a second latent bug downstream: the walker only stored ONE allocation descriptor's LBA per file even though huge Blu-ray clips are split across hundreds of extents (Avatar's 00022.m2ts: 945, 00023.m2ts: 945, 00028.m2ts: 294, 00016.m2ts: 238). For every multi-extent file, downstream reads of bytes past the first extent's length returned wrong sectors (whatever happened to live next to extent 1 on disc) instead of the file's real data — silent corruption ~50× the size of the visible bug. Changes: - isoFileEntry now carries []isoExtent instead of a single lba field. - collectFileExtents() walks every inline AD and chases Allocation Extent Descriptor (UDF tag 258) chains so files with more ADs than fit in the FE sector are fully enumerated. Caps total extent bytes at info_length so a malformed FE can't yield more data than the file claims. - ISOFileContent gains a Sources []ISONestedSource slice (one per extent) and drops the single-Segments / single-NestedSource fields. - buildFileContent emits one ISONestedSource per extent: unencrypted ISOs pre-slice outer segments to cover each extent; encrypted ISOs keep the full outer segments and seek via InnerOffset (AES-CBC IV chain still anchors at byte 0 of the outer ISO). - archive.isoFileContentToNestedSource → isoFileContentToNestedSources fans the slice out into one archive.NestedSource per extent. - buildMainFeatureContent and buildLargestFileContent thread the multi-source path so the final concat Content carries every extent of every clip in disc-then-playlist order. Verified against the real Avatar disc 1 ISO via fs_local_test.go: 00022.m2ts: 945 extents, sum-of-extent-lengths == 17 GiB info_length. TestLocalISO_DiscoverBigFiles asserts >=2 extents and full coverage for the sentinel big-clip set. --- internal/importer/archive/iso/bluray.go | 24 +- internal/importer/archive/iso/bluray_test.go | 49 ++-- internal/importer/archive/iso/fs.go | 194 ++++++++++++++-- .../importer/archive/iso/fs_local_test.go | 212 +++++++++++++++++- internal/importer/archive/iso/processor.go | 48 ++-- internal/importer/archive/iso/types.go | 29 ++- internal/importer/archive/iso_expansion.go | 55 +++-- .../importer/archive/iso_expansion_test.go | 99 +++++--- 8 files changed, 567 insertions(+), 143 deletions(-) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index a6ab8f64d..18d47237f 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -123,15 +123,21 @@ func isBetterPlaylist(cand, best *MainFeaturePlaylist, candItems, bestItems int) return candItems > bestItems } -// readISOFile reads the full contents of one isoFileEntry from rs. -// MPLS files are tiny (~KBs), so a one-shot read is fine. +// readISOFile reads the full contents of one isoFileEntry from rs, +// concatenating bytes across every on-disc extent. MPLS files are tiny +// (~KBs) and almost always single-extent, but multi-extent MPLS is +// legal so we iterate. func readISOFile(rs io.ReadSeeker, e isoFileEntry) ([]byte, error) { - if _, err := rs.Seek(int64(e.lba)*iso9660SectorSize, io.SeekStart); err != nil { - return nil, err - } - buf := make([]byte, e.size) - if _, err := io.ReadFull(rs, buf); err != nil { - return nil, err + out := make([]byte, 0, e.size) + for _, ext := range e.extents { + if _, err := rs.Seek(int64(ext.lba)*iso9660SectorSize, io.SeekStart); err != nil { + return nil, err + } + chunk := make([]byte, ext.length) + if _, err := io.ReadFull(rs, chunk); err != nil { + return nil, err + } + out = append(out, chunk...) } - return buf, nil + return out, nil } diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go index 30c983b2c..42b3c02c7 100644 --- a/internal/importer/archive/iso/bluray_test.go +++ b/internal/importer/archive/iso/bluray_test.go @@ -7,6 +7,15 @@ import ( "testing" ) +// mkEntry builds a single-extent isoFileEntry — the common case for tests. +func mkEntry(path string, lba uint32, size uint64) isoFileEntry { + return isoFileEntry{ + path: path, + size: size, + extents: []isoExtent{{lba: lba, length: size}}, + } +} + // makeImage assembles an in-memory disc image by placing each piece of // data at the sector index given in its key. The returned reader can be // used as if it were a real ISO read-seeker. @@ -53,12 +62,12 @@ func TestResolveMainFeature(t *testing.T) { // File listing: two playlists and four M2TS clips (one extra). files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(short))}, - {path: "BDMV/PLAYLIST/00800.MPLS", lba: 110, size: uint64(len(long))}, - {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, - {path: "BDMV/STREAM/00002.M2TS", lba: 300, size: 2_000_000}, - {path: "BDMV/STREAM/00003.M2TS", lba: 400, size: 3_000_000}, - {path: "BDMV/STREAM/00010.M2TS", lba: 500, size: 500_000}, + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(short))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(long))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + mkEntry("BDMV/STREAM/00002.M2TS", 300, 2_000_000), + mkEntry("BDMV/STREAM/00003.M2TS", 400, 3_000_000), + mkEntry("BDMV/STREAM/00010.M2TS", 500, 500_000), } got := ResolveMainFeature(context.Background(), rs, files) @@ -82,7 +91,7 @@ func TestResolveMainFeature(t *testing.T) { t.Run("non-BDMV disc returns nil", func(t *testing.T) { t.Parallel() files := []isoFileEntry{ - {path: "movie.mkv", lba: 100, size: 1_000_000}, + mkEntry("movie.mkv", 100, 1_000_000), } if got := ResolveMainFeature(context.Background(), bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { t.Errorf("expected nil for non-BDMV disc, got %+v", got) @@ -95,8 +104,8 @@ func TestResolveMainFeature(t *testing.T) { 100: []byte("not a real mpls"), }) files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: 15}, - {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, 15), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), } if got := ResolveMainFeature(context.Background(), rs, files); got != nil { t.Errorf("expected nil for unparseable MPLS, got %+v", got) @@ -124,14 +133,14 @@ func TestResolveMainFeature(t *testing.T) { }) files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(extras))}, - {path: "BDMV/PLAYLIST/00800.MPLS", lba: 110, size: uint64(len(mainFeature3D))}, + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(extras))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(mainFeature3D))), // Only the extras live as M2TS: - {path: "BDMV/STREAM/00010.M2TS", lba: 200, size: 50_000_000}, + mkEntry("BDMV/STREAM/00010.M2TS", 200, 50_000_000), // Main feature is SSIF only: - {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 25_000_000_000}, - {path: "BDMV/STREAM/SSIF/00101.SSIF", lba: 400, size: 25_000_000_000}, - {path: "BDMV/STREAM/SSIF/00102.SSIF", lba: 500, size: 5_000_000_000}, + mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 25_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00101.SSIF", 400, 25_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00102.SSIF", 500, 5_000_000_000), } got := ResolveMainFeature(context.Background(), rs, files) @@ -168,9 +177,9 @@ func TestResolveMainFeature(t *testing.T) { rs := makeImage(t, map[uint32][]byte{100: mainFeature}) files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00800.MPLS", lba: 100, size: uint64(len(mainFeature))}, - {path: "BDMV/STREAM/00100.M2TS", lba: 200, size: 20_000_000_000}, - {path: "BDMV/STREAM/SSIF/00100.SSIF", lba: 300, size: 40_000_000_000}, + mkEntry("BDMV/PLAYLIST/00800.MPLS", 100, uint64(len(mainFeature))), + mkEntry("BDMV/STREAM/00100.M2TS", 200, 20_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 40_000_000_000), } got := ResolveMainFeature(context.Background(), rs, files) @@ -195,8 +204,8 @@ func TestResolveMainFeature(t *testing.T) { 100: data, }) files := []isoFileEntry{ - {path: "BDMV/PLAYLIST/00001.MPLS", lba: 100, size: uint64(len(data))}, - {path: "BDMV/STREAM/00001.M2TS", lba: 200, size: 1_000_000}, + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(data))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), } if got := ResolveMainFeature(context.Background(), rs, files); got != nil { t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index 62c277f4d..d63c6e23d 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -10,11 +10,32 @@ import ( const iso9660SectorSize = 2048 -// isoFileEntry is one non-directory file returned by ListISOFiles. +// isoFileEntry is one non-directory file returned by ListISOFiles. The +// file's data on disc may be split across multiple contiguous extents +// — Blu-ray main-feature M2TS files routinely use hundreds of extents +// chained via Allocation Extent Descriptors. extents is in disc order; +// concatenating their bytes yields the complete file. type isoFileEntry struct { - path string // full path within ISO (e.g. "BDMV/STREAM/00001.M2TS") - lba uint32 - size uint64 + path string + size uint64 + extents []isoExtent +} + +// firstLBA returns the start LBA of the file's first extent. Callers +// that only need a starting sector (e.g. reading a small MPLS file +// known to be single-extent) can use this. +func (e isoFileEntry) firstLBA() uint32 { + if len(e.extents) == 0 { + return 0 + } + return e.extents[0].lba +} + +// isoExtent is one contiguous run of sectors on disc that contributes +// length bytes to the logical file. +type isoExtent struct { + lba uint32 + length uint64 } // ───────────────────────────────────────────────────────────────────────────── @@ -100,7 +121,13 @@ func iso9660WalkAll(rs io.ReadSeeker, dirLBA uint32, dirSize uint64, prefix stri } result = append(result, sub...) } else { - result = append(result, isoFileEntry{path: entryPath, lba: e.lba, size: e.size}) + // ISO 9660 stores file data in a single contiguous extent. + // (Interleave mode exists but is essentially never used.) + result = append(result, isoFileEntry{ + path: entryPath, + size: e.size, + extents: []isoExtent{{lba: e.lba, length: e.size}}, + }) } } return result, nil @@ -605,24 +632,155 @@ func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partS allocDescLen = len(feBuf) - allocDescOff } - var fileLBA uint32 - switch allocType { - case 0: - if allocDescLen >= 8 { - ad := udfParseShortAD(feBuf[allocDescOff:], 0) - fileLBA = partStart + ad.block + extents := collectFileExtents(rs, feBuf[allocDescOff:allocDescOff+allocDescLen], allocType, metaMap, partStart, infoLen, fePhys) + if len(extents) == 0 { + continue + } + result = append(result, isoFileEntry{ + path: entryPath, + size: infoLen, + extents: extents, + }) + } + return result, nil +} + +// collectFileExtents walks the allocation descriptors of a UDF File Entry +// (or Extended File Entry), following Allocation Extent Descriptor chains +// when the inline AD area is exhausted, and returns one isoExtent per +// recorded data extent in disc order. +// +// allocType is the lower 3 bits of the FE's ICBTag flags: +// +// 0 → short_ad (8 bytes each) +// 1 → long_ad (16 bytes each) +// 2 → extended ad (20 bytes; rare, treated as short_ad-prefix here) +// 3 → file data embedded in the FE itself (small files) +// +// The high 2 bits of each AD's length field encode the AD "type": +// +// 0 → recorded & allocated extent (real data — emit) +// 1 → not recorded, allocated (sparse — skip, file should not see this on BD) +// 2 → not recorded, not allocated (hole — skip) +// 3 → next AD points at a continuation Allocation Extent Descriptor +// (tag 258) holding more ADs; chase the chain +// +// embeddedFEPhys is only meaningful for allocType 3 (it's the FE's own +// physical sector — the file data is inline at allocDescOff of that +// sector, so we materialise a single synthetic extent pointing at it). +func collectFileExtents(rs io.ReadSeeker, inlineADs []byte, allocType byte, metaMap []udfMetaSpan, partStart uint32, infoLen uint64, embeddedFEPhys uint32) []isoExtent { + if allocType == 3 { + // Embedded data — a single "extent" pointing at the FE sector + // itself with the inline-AD area treated as the file data. We + // can't emit a usable LBA for slicing because the data isn't + // sector-aligned. Skip for now; BD streams never use embedded. + return nil + } + var step int + switch allocType { + case 0: + step = 8 + case 1: + step = 16 + case 2: + step = 20 // first 16 bytes are a long_ad; trailing 4 bytes are impl-use + default: + return nil + } + + var extents []isoExtent + chase := inlineADs + safety := 0 + for { + safety++ + if safety > 4096 { + break // pathological — bail to avoid runaway IO + } + var chain *udfLongAD + for off := 0; off+step <= len(chase); off += step { + lenField := binary.LittleEndian.Uint32(chase[off:]) + adType := lenField >> 30 + adLen := lenField & 0x3FFFFFFF + if adLen == 0 && adType != 3 { + break + } + if adType == 3 { + var loc udfLongAD + switch step { + case 8: + // short_ad continuation: the 4 bytes after length + // are the next AED's logical block; partition is + // implicit (same as parent). + loc = udfLongAD{length: adLen, loc: udfLBA{block: binary.LittleEndian.Uint32(chase[off+4:])}} + default: + loc = udfParseLongAD(chase, off) + } + chain = &loc + break + } + if adType != 0 { + // Type 1 (allocated but not recorded) and type 2 (hole) + // don't carry real bytes. Skip — BD streams shouldn't + // have these in practice. + continue } - case 1: - if allocDescLen >= 16 { - ad := udfParseLongAD(feBuf[allocDescOff:], 0) - fileLBA, _ = udfResolveICB(ad.loc, metaMap, partStart) + var lba uint32 + switch step { + case 8: + ad := udfParseShortAD(chase, off) + resolved, err := udfResolveMetaBlock(ad.block, metaMap, partStart) + if err != nil { + continue + } + lba = resolved + default: + ad := udfParseLongAD(chase, off) + resolved, err := udfResolveICB(ad.loc, metaMap, partStart) + if err != nil { + continue + } + lba = resolved } + extents = append(extents, isoExtent{lba: lba, length: uint64(adLen)}) + } + if chain == nil { + break + } + ps, err := udfResolveICB(chain.loc, metaMap, partStart) + if err != nil { + break + } + _, aedBuf, err := udfReadTag(rs, ps) + if err != nil { + break } - if fileLBA > 0 { - result = append(result, isoFileEntry{path: entryPath, lba: fileLBA, size: infoLen}) + // Allocation Extent Descriptor layout: 16-byte tag + 4-byte + // previous-AED pointer + 4-byte length-of-allocation-descriptors, + // then the ADs themselves. + if len(aedBuf) < 24 { + break } + nextLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) + if nextLen <= 0 || 24+nextLen > len(aedBuf) { + break + } + chase = aedBuf[24 : 24+nextLen] } - return result, nil + + // Defensive: cap the total extent bytes at the FE's info_length so a + // malformed disc with mis-sized ADs can't return more bytes than the + // file legitimately contains. + var total uint64 + for i := range extents { + if total+extents[i].length > infoLen { + extents[i].length = infoLen - total + extents = extents[:i+1] + break + } + total += extents[i].length + } + _ = embeddedFEPhys + return extents } // ListISOFiles walks the ISO 9660/UDF filesystem and returns all non-directory diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index b6f599ad1..23693efeb 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -1,6 +1,7 @@ package iso import ( + "encoding/binary" "fmt" "os" "sort" @@ -48,25 +49,41 @@ func TestLocalISO_DiscoverBigFiles(t *testing.T) { if i >= 25 { break } - t.Logf(" %s size=%d (%.2f MiB) lba=%d", e.path, e.size, float64(e.size)/(1<<20), e.lba) + t.Logf(" %s size=%d (%.2f MiB) extents=%d first_lba=%d", + e.path, e.size, float64(e.size)/(1<<20), len(e.extents), e.firstLBA()) } - // Sanity sentinels for the Avatar disc 1 main-feature clips. Each one - // is >1 GiB on disc, so if any are absent the walker dropped them. + // Sanity sentinels for the Avatar disc 1 main-feature clips. Each is + // >1 GiB and uses many on-disc extents (00022.m2ts has ~945). Assert + // the file is present, the size is right, AND the extents slice fully + // covers it — otherwise downstream concat reads wrong bytes past the + // first extent. want := []string{"BDMV/STREAM/00016.m2ts", "BDMV/STREAM/00022.m2ts", "BDMV/STREAM/00028.m2ts"} - have := make(map[string]uint64, len(entries)) + have := make(map[string]isoFileEntry, len(entries)) for _, e := range entries { - have[e.path] = e.size + have[e.path] = e } for _, w := range want { - size, ok := have[w] + e, ok := have[w] if !ok { t.Errorf("missing %s — walker dropped this file", w) continue } - if size < 1<<30 { + if e.size < 1<<30 { t.Errorf("%s reported size=%d (%.2f MiB), want >1 GiB", - w, size, float64(size)/(1<<20)) + w, e.size, float64(e.size)/(1<<20)) + } + if len(e.extents) < 2 { + t.Errorf("%s has only %d extents — expected multi-extent (BD main-feature clips fragment heavily)", + w, len(e.extents)) + } + var covered uint64 + for _, ext := range e.extents { + covered += ext.length + } + if covered != e.size { + t.Errorf("%s: sum of extent lengths = %d but file size = %d (delta %d)", + w, covered, e.size, int64(e.size)-int64(covered)) } } @@ -74,3 +91,182 @@ func TestLocalISO_DiscoverBigFiles(t *testing.T) { fmt.Println(">>> walker is dropping big files; this is the bug") } } + +// TestLocalISO_CountExtentsForBigFiles probes each entry's File Entry on the +// real ISO and reports how many allocation descriptors a file's data uses. +// The walker today reads only the first AD — if any of the multi-GiB main- +// feature clips reports >1 AD, downstream byte reads past the first extent +// will hit wrong sectors. Gated on ALTMOUNT_LOCAL_ISO same as the discovery +// test. +func TestLocalISO_CountExtentsForBigFiles(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open: %v", err) + } + defer f.Close() + + partStart, metaMap, rootICB, err := udfSetup(f) + if err != nil { + t.Fatalf("udfSetup: %v", err) + } + + // Re-walk to get entries plus their ICB so we can re-read each FE and + // count its allocation descriptors. We can't reuse ListISOFiles output + // directly because isoFileEntry discards the ICB. + type probed struct { + path string + size uint64 + ads int // allocation descriptors observed (= number of on-disc extents) + alloc byte + } + + var probedAll []probed + var walk func(dirICB udfLongAD, prefix string) + walk = func(dirICB udfLongAD, prefix string) { + physSect, e := udfResolveICB(dirICB.loc, metaMap, partStart) + if e != nil { + return + } + entries, e := udfReadDirEntries(f, physSect, metaMap, partStart) + if e != nil { + return + } + for _, ent := range entries { + p := ent.name + if prefix != "" { + p = prefix + "/" + ent.name + } + if ent.isDir { + walk(ent.icb, p) + continue + } + fePhys, rerr := udfResolveICB(ent.icb.loc, metaMap, partStart) + if rerr != nil { + continue + } + feTag, feBuf, rerr := udfReadTag(f, fePhys) + if rerr != nil || (feTag.id != 261 && feTag.id != 266) { + continue + } + alloc := feBuf[34] & 0x07 + var adOff, adLen int + if feTag.id == 266 { + eaLen := int(binary.LittleEndian.Uint32(feBuf[208:212])) + adLen = int(binary.LittleEndian.Uint32(feBuf[212:216])) + adOff = 216 + eaLen + } else { + eaLen := int(binary.LittleEndian.Uint32(feBuf[168:172])) + adLen = int(binary.LittleEndian.Uint32(feBuf[172:176])) + adOff = 176 + eaLen + } + if adOff+adLen > len(feBuf) { + adLen = len(feBuf) - adOff + } + // Count extents using the UDF rules: high 2 bits of the + // length field encode the AD "type": + // 0 = recorded and allocated (real extent) + // 1 = not recorded, allocated (sparse / zero-fill) + // 2 = not recorded, not allocated (sparse hole) + // 3 = next AD points at a continuation AED sector, follow it + // We count types 0,1,2 as logical extents (each contributes + // length bytes to the file) and chase type 3 into AED chains. + n := 0 + step := 0 + switch alloc { + case 0: + step = 8 + case 1: + step = 16 + case 2: + step = 20 + case 3: + n = 1 // embedded + } + if step > 0 { + countADs := func(buf []byte) (extents int, chain *udfLongAD) { + for off := 0; off+step <= len(buf); off += step { + lenField := binary.LittleEndian.Uint32(buf[off:]) + adType := lenField >> 30 + adLen := lenField & 0x3FFFFFFF + if adLen == 0 && adType != 3 { + break + } + if adType == 3 { + var loc udfLongAD + switch step { + case 8: + loc = udfLongAD{length: adLen, loc: udfLBA{block: binary.LittleEndian.Uint32(buf[off+4:])}} + case 16: + loc = udfParseLongAD(buf, off) + } + return extents, &loc + } + extents++ + } + return extents, nil + } + cnt, chain := countADs(feBuf[adOff : adOff+adLen]) + n = cnt + safety := 0 + for chain != nil && safety < 100 { + safety++ + ps, e := udfResolveICB(chain.loc, metaMap, partStart) + if e != nil { + break + } + _, aedBuf, e := udfReadTag(f, ps) + if e != nil { + break + } + // AED layout: 16-byte tag + 4-byte previous-AED pointer + // + 4-byte length-of-allocation-descriptors + ADs. + if len(aedBuf) < 24 { + break + } + aedLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) + if aedLen <= 0 || 24+aedLen > len(aedBuf) { + break + } + more, nextChain := countADs(aedBuf[24 : 24+aedLen]) + n += more + chain = nextChain + } + } + probedAll = append(probedAll, probed{ + path: p, + size: binary.LittleEndian.Uint64(feBuf[56:64]), + ads: n, + alloc: alloc, + }) + } + } + walk(rootICB, "") + + // Report the big files specifically + any file with >1 AD. + sort.Slice(probedAll, func(i, j int) bool { return probedAll[i].size > probedAll[j].size }) + t.Logf("top 15 by size (with extent count):") + for i, p := range probedAll { + if i >= 15 { + break + } + t.Logf(" %s size=%d (%.2f MiB) alloc_type=%d extents=%d", + p.path, p.size, float64(p.size)/(1<<20), p.alloc, p.ads) + } + + multi := 0 + for _, p := range probedAll { + if p.ads > 1 { + multi++ + } + } + t.Logf("files with >1 extent: %d / %d", multi, len(probedAll)) + if multi == 0 { + t.Logf("CONCLUSION: all files are contiguous — single-LBA model is sufficient for this ISO") + } else { + t.Logf("CONCLUSION: fragmentation present — single-LBA walker yields WRONG bytes past extent 1") + } +} diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 2220b582b..71076a293 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -66,29 +66,43 @@ func AnalyzeISO( } // buildFileContent turns one ISO directory entry into an ISOFileContent, -// slicing or referencing the source's Usenet segments according to whether -// the ISO is encrypted. +// emitting one ISONestedSource per on-disc extent. Concatenating the +// sources' byte ranges yields the complete file. This is the path that +// previously fed BAD bytes for multi-extent files like Avatar's 17 GiB +// 00022.m2ts (945 extents) — only the first extent's data was correct. func buildFileContent(src ISOSource, e isoFileEntry) ISOFileContent { - isoOffset := int64(e.lba) * iso9660SectorSize fc := ISOFileContent{ InternalPath: e.path, Filename: filepath.Base(e.path), Size: int64(e.size), + Sources: make([]ISONestedSource, 0, len(e.extents)), } - if len(src.AesKey) == 0 { - // Unencrypted: pre-slice segments so this content stands alone. - sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, int64(e.size)) - fc.Segments = sliced - } else { - // Encrypted: AES-CBC requires the full inner volume + offset so - // the cipher can chain IVs from the start of the ISO. - fc.NestedSource = &ISONestedSource{ - Segments: src.Segments, - AesKey: src.AesKey, - AesIV: src.AesIV, - InnerOffset: isoOffset, - InnerLength: int64(e.size), - InnerVolumeSize: src.Size, + for _, ext := range e.extents { + isoOffset := int64(ext.lba) * iso9660SectorSize + extLen := int64(ext.length) + if len(src.AesKey) == 0 { + // Unencrypted: pre-slice outer segments to cover this extent + // only. The downstream nested reader treats InnerOffset as + // an offset within the (already-sliced) segment chain. + sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, extLen) + fc.Sources = append(fc.Sources, ISONestedSource{ + Segments: sliced, + InnerOffset: 0, + InnerLength: extLen, + InnerVolumeSize: extLen, + }) + } else { + // Encrypted: AES-CBC needs the IV chain from byte 0 of the + // outer ISO, so every source gets the full outer segments + // and the cipher seeks via InnerOffset. + fc.Sources = append(fc.Sources, ISONestedSource{ + Segments: src.Segments, + AesKey: src.AesKey, + AesIV: src.AesIV, + InnerOffset: isoOffset, + InnerLength: extLen, + InnerVolumeSize: src.Size, + }) } } return fc diff --git a/internal/importer/archive/iso/types.go b/internal/importer/archive/iso/types.go index b755fe1eb..09e0aad5d 100644 --- a/internal/importer/archive/iso/types.go +++ b/internal/importer/archive/iso/types.go @@ -11,27 +11,34 @@ type ISOSource struct { Size int64 // Decrypted ISO size } -// ISOFileContent represents one file found inside the ISO. +// ISOFileContent represents one file found inside the ISO. The file's +// data may be split across multiple on-disc extents (Blu-ray main-feature +// M2TS files routinely use hundreds), so Sources is a slice of inner +// sources in disc order. Concatenating their byte ranges yields the +// complete file content. type ISOFileContent struct { InternalPath string // e.g. "BDMV/STREAM/00001.m2ts" Filename string // Base filename - Size int64 // File size in bytes + Size int64 // Total file size in bytes (sum of Sources.InnerLength) NzbdavID string // Carried from parent archive Content - // Unencrypted case: Segments sliced to cover exactly this file - Segments []*metapb.SegmentData - // Encrypted case: nil Segments + populated NestedSource - NestedSource *ISONestedSource + Sources []ISONestedSource } -// ISONestedSource holds everything needed to decrypt and seek into the ISO -// for a single inner file. +// ISONestedSource is one extent of an inner file. For unencrypted ISOs, +// Segments is pre-sliced to cover exactly this extent and AesKey is nil +// (InnerOffset is 0, InnerLength equals the extent length). For encrypted +// ISOs, AesKey/AesIV are populated, Segments cover the full outer ISO, +// InnerOffset is the byte offset of this extent within the decrypted +// ISO, and InnerVolumeSize is the full decrypted ISO size — the cipher +// chain needs to start at byte 0 so multi-extent encrypted reads use +// the same outer-ISO data with different inner offsets. type ISONestedSource struct { Segments []*metapb.SegmentData AesKey []byte AesIV []byte - InnerOffset int64 // lba * 2048 - InnerLength int64 // file size - InnerVolumeSize int64 // ISO total decrypted size + InnerOffset int64 + InnerLength int64 + InnerVolumeSize int64 } // AnalyzedISO is the full result of inspecting one ISO image. Files mirrors diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go index daa8e4fce..d9d9fbbf0 100644 --- a/internal/importer/archive/iso_expansion.go +++ b/internal/importer/archive/iso_expansion.go @@ -145,12 +145,13 @@ func buildMainFeatureContent(ctx context.Context, groupKey string, g []analyzedI nzbdavID = e.src.NzbdavID } for _, fc := range e.analyzed.MainFeature { - ns := isoFileContentToNestedSource(fc) - if ns.InnerLength <= 0 { - continue + for _, ns := range isoFileContentToNestedSources(fc) { + if ns.InnerLength <= 0 { + continue + } + sources = append(sources, ns) + totalSize += ns.InnerLength } - sources = append(sources, ns) - totalSize += ns.InnerLength } } if len(sources) == 0 { @@ -194,36 +195,30 @@ func buildLargestFileContent(src Content, files []iso.ISOFileContent) (Content, NzbdavID: src.NzbdavID, ISOExpansionIndex: 1, } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{isoFileContentToNestedSource(f)} - } else { - nc.Segments = f.Segments + nc.NestedSources = isoFileContentToNestedSources(f) + if len(nc.NestedSources) == 0 { + return Content{}, false } return nc, true } -// isoFileContentToNestedSource converts an ISOFileContent into a -// NestedSource. For unencrypted ISOs the segments are already sliced to -// cover exactly this file, so InnerOffset is 0 and InnerVolumeSize equals -// the file size (unused when AesKey is empty — see -// MetadataVirtualFile.createNestedSourceReader). -func isoFileContentToNestedSource(fc iso.ISOFileContent) NestedSource { - if fc.NestedSource != nil { - return NestedSource{ - Segments: fc.NestedSource.Segments, - AesKey: fc.NestedSource.AesKey, - AesIV: fc.NestedSource.AesIV, - InnerOffset: fc.NestedSource.InnerOffset, - InnerLength: fc.NestedSource.InnerLength, - InnerVolumeSize: fc.NestedSource.InnerVolumeSize, - } - } - return NestedSource{ - Segments: fc.Segments, - InnerOffset: 0, - InnerLength: fc.Size, - InnerVolumeSize: fc.Size, +// isoFileContentToNestedSources fans an ISOFileContent's on-disc extents +// out into one NestedSource per extent, preserving disc order. Concating +// the resulting sources yields the file's bytes — the multi-extent fix +// for Blu-ray main-feature M2TS files lives here. +func isoFileContentToNestedSources(fc iso.ISOFileContent) []NestedSource { + out := make([]NestedSource, 0, len(fc.Sources)) + for _, s := range fc.Sources { + out = append(out, NestedSource{ + Segments: s.Segments, + AesKey: s.AesKey, + AesIV: s.AesIV, + InnerOffset: s.InnerOffset, + InnerLength: s.InnerLength, + InnerVolumeSize: s.InnerVolumeSize, + }) } + return out } // discSuffixPattern matches volume labels like "AVATAR_FIRE_AND_ASH_DISC_1", diff --git a/internal/importer/archive/iso_expansion_test.go b/internal/importer/archive/iso_expansion_test.go index e009e615d..bff8bfd14 100644 --- a/internal/importer/archive/iso_expansion_test.go +++ b/internal/importer/archive/iso_expansion_test.go @@ -63,50 +63,78 @@ func TestParseDiscNumber(t *testing.T) { } } -func TestIsoFileContentToNestedSource(t *testing.T) { +func TestIsoFileContentToNestedSources(t *testing.T) { t.Parallel() - t.Run("unencrypted uses pre-sliced segments", func(t *testing.T) { + t.Run("single unencrypted extent → one NestedSource", func(t *testing.T) { t.Parallel() - segs := []*metapb.SegmentData{ - {Id: "a", StartOffset: 0, EndOffset: 99, SegmentSize: 100}, - } fc := iso.ISOFileContent{ Filename: "00001.m2ts", Size: 100, - Segments: segs, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: "a", StartOffset: 0, EndOffset: 99, SegmentSize: 100}}, + InnerOffset: 0, + InnerLength: 100, + InnerVolumeSize: 100, + }}, } - ns := isoFileContentToNestedSource(fc) - if len(ns.Segments) != 1 || ns.InnerLength != 100 || ns.InnerOffset != 0 { - t.Fatalf("unexpected NestedSource: %+v", ns) + got := isoFileContentToNestedSources(fc) + if len(got) != 1 { + t.Fatalf("want 1 source, got %d", len(got)) } - if len(ns.AesKey) != 0 { - t.Errorf("AesKey should be empty, got %v", ns.AesKey) + if got[0].InnerLength != 100 || got[0].InnerOffset != 0 || len(got[0].AesKey) != 0 { + t.Fatalf("unexpected NestedSource: %+v", got[0]) } }) - t.Run("encrypted carries offset and key", func(t *testing.T) { + t.Run("multi-extent file → one NestedSource per extent in order", func(t *testing.T) { t.Parallel() - segs := []*metapb.SegmentData{ - {Id: "outer", StartOffset: 0, EndOffset: 99999, SegmentSize: 100000}, + // The bug we just fixed: a 17 GiB M2TS spans hundreds of extents. + // Each extent must become its own NestedSource so the downstream + // concat reader stitches them in disc order. + fc := iso.ISOFileContent{ + Filename: "00022.m2ts", + Size: 30, + Sources: []iso.ISONestedSource{ + {Segments: []*metapb.SegmentData{{Id: "e1"}}, InnerLength: 10}, + {Segments: []*metapb.SegmentData{{Id: "e2"}}, InnerLength: 10}, + {Segments: []*metapb.SegmentData{{Id: "e3"}}, InnerLength: 10}, + }, } + got := isoFileContentToNestedSources(fc) + if len(got) != 3 { + t.Fatalf("want 3 sources, got %d", len(got)) + } + wantIDs := []string{"e1", "e2", "e3"} + for i, ns := range got { + if len(ns.Segments) != 1 || ns.Segments[0].Id != wantIDs[i] { + t.Errorf("source %d: want segment id %q, got %+v", i, wantIDs[i], ns.Segments) + } + } + }) + + t.Run("encrypted source carries key + IV through", func(t *testing.T) { + t.Parallel() fc := iso.ISOFileContent{ Filename: "00001.m2ts", Size: 2048, - NestedSource: &iso.ISONestedSource{ - Segments: segs, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: "outer", StartOffset: 0, EndOffset: 99999, SegmentSize: 100000}}, AesKey: []byte("0123456789abcdef0123456789abcdef"), AesIV: []byte("0123456789abcdef"), InnerOffset: 1024, InnerLength: 2048, InnerVolumeSize: 99999, - }, + }}, + } + got := isoFileContentToNestedSources(fc) + if len(got) != 1 { + t.Fatalf("want 1 source, got %d", len(got)) } - ns := isoFileContentToNestedSource(fc) - if ns.InnerOffset != 1024 || ns.InnerLength != 2048 || ns.InnerVolumeSize != 99999 { - t.Fatalf("unexpected NestedSource offsets: %+v", ns) + if got[0].InnerOffset != 1024 || got[0].InnerLength != 2048 || got[0].InnerVolumeSize != 99999 { + t.Fatalf("offsets mangled: %+v", got[0]) } - if len(ns.AesKey) == 0 { + if len(got[0].AesKey) == 0 { t.Error("AesKey should be carried through for encrypted source") } }) @@ -123,9 +151,13 @@ func TestBuildMainFeatureContent_TwoDiscs(t *testing.T) { return iso.ISOFileContent{ Filename: name, Size: size, - Segments: []*metapb.SegmentData{ - {Id: name, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}, - }, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{ + {Id: name, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}, + }, + InnerLength: size, + InnerVolumeSize: size, + }}, } } @@ -188,13 +220,20 @@ func TestBuildMainFeatureContent_TwoDiscs(t *testing.T) { func TestBuildLargestFileContent(t *testing.T) { t.Parallel() + mkFile := func(name string, size int64, segID string) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: segID, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}}, + InnerLength: size, + InnerVolumeSize: size, + }}, + } + } files := []iso.ISOFileContent{ - {Filename: "small.mkv", Size: 500, Segments: []*metapb.SegmentData{ - {Id: "s", StartOffset: 0, EndOffset: 499, SegmentSize: 500}, - }}, - {Filename: "big.mkv", Size: 5_000_000, Segments: []*metapb.SegmentData{ - {Id: "b", StartOffset: 0, EndOffset: 4_999_999, SegmentSize: 5_000_000}, - }}, + mkFile("small.mkv", 500, "s"), + mkFile("big.mkv", 5_000_000, "b"), } src := Content{Filename: "thing.iso", NzbdavID: "id-1"} From a7cfcc17f2c7dd6212e060e5b2c9ef3b538b2308 Mon Sep 17 00:00:00 2001 From: javi11 Date: Sun, 24 May 2026 17:29:24 +0200 Subject: [PATCH 08/30] perf(iso): coalesce physically contiguous UDF extents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A BD3D SSIF often emits a dozen separate UDF allocation descriptors for what's a single contiguous run of sectors on disc. After the multi- extent fix, each AD became its own NestedSource — bloating the proto metadata, the validation-sample surface, and the per-file open-handle count for what is logically one extent. coalesceExtents merges adjacent extents whose physical sectors follow the previous extent's last sector. Measured against the real Avatar disc 1 ISO: - BDMV/STREAM/SSIF/00022.ssif (22 GiB): 23 extents -> 2 - BDMV/STREAM/SSIF/00028.ssif (7 GiB): 7 extents -> 1 - BDMV/STREAM/SSIF/00016.ssif (6 GiB): 6 extents -> 1 M2TS files keep their full extent list because BD authoring genuinely interleaves the M2TS clips with the SSIF dependent-view data on disc. Note: the recent import failure ("not a valid ISO 9660 or UDF image" on disc 1, segment "44c89668..." unreachable during validation) is a Usenet-side issue — disc 2 analysed cleanly in 30 seconds with the same code path; disc 1 timed out reading its first sectors for 9 minutes before giving up. The coalescing change reduces the surface where transient flakes can bite but cannot eliminate it. --- internal/importer/archive/iso/fs.go | 35 +++++++++++++ .../importer/archive/iso/fs_local_test.go | 50 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index d63c6e23d..31415fbda 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -779,10 +779,45 @@ func collectFileExtents(rs io.ReadSeeker, inlineADs []byte, allocType byte, meta } total += extents[i].length } + + // Coalesce physically contiguous extents — many BD3D SSIF files have + // dozens of small ADs that sit right next to each other on disc. The + // underlying bytes are one contiguous run; merging the ADs collapses + // the NestedSources count proportionally (Avatar SSIF: 23 → 2) and + // shrinks both the metadata proto and the validation surface. + extents = coalesceExtents(extents) _ = embeddedFEPhys return extents } +// coalesceExtents merges adjacent extents whose physical sectors are +// contiguous (next.lba == prev.lba + prev.length/sector). Returns the +// possibly-shorter slice in disc order. A file whose extents are +// physically scattered (typical for BD M2TS clips interleaved with SSIF +// dependent-view data) is returned unchanged. +func coalesceExtents(in []isoExtent) []isoExtent { + if len(in) <= 1 { + return in + } + out := make([]isoExtent, 0, len(in)) + cur := in[0] + for i := 1; i < len(in); i++ { + next := in[i] + // length must be a whole number of sectors for the contiguity + // arithmetic to apply; if it isn't (final partial sector of a + // file), fall through and start a new run after. + if cur.length%iso9660SectorSize == 0 && + next.lba == cur.lba+uint32(cur.length/iso9660SectorSize) { + cur.length += next.length + continue + } + out = append(out, cur) + cur = next + } + out = append(out, cur) + return out +} + // ListISOFiles walks the ISO 9660/UDF filesystem and returns all non-directory // entries. It tries UDF first (correct 64-bit sizes, authoritative for Blu-ray) // and falls back to ISO 9660 for plain discs without UDF. diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index 23693efeb..1c52d65b0 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -270,3 +270,53 @@ func TestLocalISO_CountExtentsForBigFiles(t *testing.T) { t.Logf("CONCLUSION: fragmentation present — single-LBA walker yields WRONG bytes past extent 1") } } + +// TestLocalISO_CountAdjacentExtents checks whether multi-extent files have +// physically contiguous extents that could be coalesced. If yes, segment +// count downstream can be reduced dramatically — the importer hit +// total_segments_to_validate=888,903 on this NZB precisely because every +// AD became its own NestedSource even when adjacent ADs sat next to each +// other on disc. +func TestLocalISO_CountAdjacentExtents(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open: %v", err) + } + defer f.Close() + + entries, err := ListISOFiles(f) + if err != nil { + t.Fatalf("list: %v", err) + } + sort.Slice(entries, func(i, j int) bool { return entries[i].size > entries[j].size }) + + const lookAt = 15 + for i, e := range entries { + if i >= lookAt { + break + } + if len(e.extents) <= 1 { + continue + } + // Count adjacent runs (where next.lba == this.lba + this.length/sector). + adjacent := 0 + distinctRuns := 1 + for j := 1; j < len(e.extents); j++ { + prev := e.extents[j-1] + next := e.extents[j] + expectedNextLBA := prev.lba + uint32(prev.length/iso9660SectorSize) + if next.lba == expectedNextLBA { + adjacent++ + } else { + distinctRuns++ + } + } + t.Logf(" %s: extents=%d adjacent_pairs=%d distinct_runs=%d coalesce_ratio=%.1fx", + e.path, len(e.extents), adjacent, distinctRuns, + float64(len(e.extents))/float64(distinctRuns)) + } +} From 86321ea79cf00f1ddc1c8fda39e48cf32a25e370 Mon Sep 17 00:00:00 2001 From: javi11 Date: Sun, 24 May 2026 18:31:25 +0200 Subject: [PATCH 09/30] feat(importer): add bare-ISO content bridge + partition helper --- internal/importer/iso_expand.go | 40 ++++++++++++++++++ internal/importer/iso_expand_test.go | 63 ++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 internal/importer/iso_expand.go create mode 100644 internal/importer/iso_expand_test.go diff --git a/internal/importer/iso_expand.go b/internal/importer/iso_expand.go new file mode 100644 index 000000000..d4367ea1c --- /dev/null +++ b/internal/importer/iso_expand.go @@ -0,0 +1,40 @@ +package importer + +import ( + "path/filepath" + "strings" + + "github.com/javi11/altmount/internal/importer/archive" + "github.com/javi11/altmount/internal/importer/parser" +) + +// parsedFileToISOContent adapts a parser.ParsedFile (a bare .iso entry +// in an NZB) to archive.Content so archive.ExpandISOContents can analyse +// it. Mirrors the field mapping rar/processor.go applies to RAR-wrapped +// ISOs, minus RAR-specific InternalPath/PackedSize bookkeeping (bare ISO +// is not packed, so PackedSize == Size). +func parsedFileToISOContent(pf parser.ParsedFile) archive.Content { + return archive.Content{ + Filename: pf.Filename, + Size: pf.Size, + PackedSize: pf.Size, // bare ISO is not packed + NzbdavID: pf.NzbdavID, + Segments: pf.Segments, + AesKey: pf.AesKey, + AesIV: pf.AesIv, + } +} + +// partitionISOFiles splits a regularFiles slice into the .iso entries +// (case-insensitive) and everything else, preserving original order in +// both outputs. +func partitionISOFiles(files []parser.ParsedFile) (isos, rest []parser.ParsedFile) { + for _, f := range files { + if strings.EqualFold(filepath.Ext(f.Filename), ".iso") { + isos = append(isos, f) + } else { + rest = append(rest, f) + } + } + return isos, rest +} diff --git a/internal/importer/iso_expand_test.go b/internal/importer/iso_expand_test.go new file mode 100644 index 000000000..34f5176af --- /dev/null +++ b/internal/importer/iso_expand_test.go @@ -0,0 +1,63 @@ +package importer + +import ( + "testing" + + "github.com/javi11/altmount/internal/importer/parser" + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +func TestParsedFileToISOContent_MapsAllFields(t *testing.T) { + pf := parser.ParsedFile{ + Filename: "Movie_DISC_1.iso", + Size: 42_949_672_960, // 40 GiB + NzbdavID: "abc-123", + Segments: []*metapb.SegmentData{ + {Id: "msg1@", StartOffset: 0, EndOffset: 750_000, SegmentSize: 750_000}, + }, + AesKey: []byte{0xAA, 0xBB}, + AesIv: []byte{0xCC, 0xDD}, + } + + got := parsedFileToISOContent(pf) + + if got.Filename != "Movie_DISC_1.iso" { + t.Errorf("Filename = %q, want Movie_DISC_1.iso", got.Filename) + } + if got.Size != 42_949_672_960 { + t.Errorf("Size = %d, want 42949672960", got.Size) + } + if got.PackedSize != 42_949_672_960 { + t.Errorf("PackedSize = %d, want 42949672960 (bare ISO is unpacked)", got.PackedSize) + } + if got.NzbdavID != "abc-123" { + t.Errorf("NzbdavID = %q, want abc-123", got.NzbdavID) + } + if len(got.Segments) != 1 || got.Segments[0].Id != "msg1@" { + t.Errorf("Segments not preserved: %#v", got.Segments) + } + if string(got.AesKey) != "\xAA\xBB" || string(got.AesIV) != "\xCC\xDD" { + t.Errorf("AES key/IV not preserved") + } +} + +func TestPartitionISOFiles_SeparatesISOFromRest(t *testing.T) { + files := []parser.ParsedFile{ + {Filename: "Movie_DISC_1.iso"}, + {Filename: "readme.txt"}, + {Filename: "Movie_DISC_2.ISO"}, + {Filename: "extras.mkv"}, + } + + isos, rest := partitionISOFiles(files) + + if len(isos) != 2 { + t.Fatalf("isos = %d, want 2", len(isos)) + } + if isos[0].Filename != "Movie_DISC_1.iso" || isos[1].Filename != "Movie_DISC_2.ISO" { + t.Errorf("isos = %+v", isos) + } + if len(rest) != 2 || rest[0].Filename != "readme.txt" || rest[1].Filename != "extras.mkv" { + t.Errorf("rest = %+v", rest) + } +} From 41c6b595b2680870ec9181e9bcad7966cc6c044e Mon Sep 17 00:00:00 2001 From: javi11 Date: Sun, 24 May 2026 18:34:32 +0200 Subject: [PATCH 10/30] docs(importer): note AesIv/AesIV casing mismatch in ISO bridge --- internal/importer/iso_expand.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/importer/iso_expand.go b/internal/importer/iso_expand.go index d4367ea1c..d0aa7a634 100644 --- a/internal/importer/iso_expand.go +++ b/internal/importer/iso_expand.go @@ -21,7 +21,7 @@ func parsedFileToISOContent(pf parser.ParsedFile) archive.Content { NzbdavID: pf.NzbdavID, Segments: pf.Segments, AesKey: pf.AesKey, - AesIV: pf.AesIv, + AesIV: pf.AesIv, // parser uses AesIv (lowercase v); archive.Content uses AesIV } } From ad5245642480cdc2177bbf5154b83ae6eed04094 Mon Sep 17 00:00:00 2001 From: javi11 Date: Sun, 24 May 2026 18:37:14 +0200 Subject: [PATCH 11/30] refactor(archive): expose NewFileMetadataFromContent for non-RAR callers Extract the Content -> FileMetadata mapping body (previously duplicated in rar.CreateFileMetadataFromRarContent and sevenzip.CreateFileMetadataFromSevenZipContent) into a shared package-level function archive.NewFileMetadataFromContent. Both processor methods now delegate to the shared function so the Processor interfaces and all existing callers (aggregator.go, test mocks) keep working unchanged. Behaviour is byte-for-byte preserved: same Status default, same AES handling, same NestedSegmentSource copy loop. This prepares Task 3 (ISO expansion) to persist FileMetadata for files discovered inside bare ISOs without depending on the RAR or 7z packages. --- internal/importer/archive/content_metadata.go | 59 +++++++++++++++ .../importer/archive/content_metadata_test.go | 74 +++++++++++++++++++ internal/importer/archive/rar/processor.go | 38 +--------- .../importer/archive/sevenzip/processor.go | 38 +--------- 4 files changed, 141 insertions(+), 68 deletions(-) create mode 100644 internal/importer/archive/content_metadata.go create mode 100644 internal/importer/archive/content_metadata_test.go diff --git a/internal/importer/archive/content_metadata.go b/internal/importer/archive/content_metadata.go new file mode 100644 index 000000000..f3afa915b --- /dev/null +++ b/internal/importer/archive/content_metadata.go @@ -0,0 +1,59 @@ +package archive + +import ( + "time" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// NewFileMetadataFromContent creates a FileMetadata from a Content (with its NestedSources) +// for the metadata system. It mirrors the conversion previously inlined inside +// rar.CreateFileMetadataFromRarContent and sevenzip.CreateFileMetadataFromSevenZipContent +// so that non-RAR/non-7z callers (e.g. ISO expansion) can produce equivalent metadata. +// +// Behaviour: +// - Sets CreatedAt/ModifiedAt to time.Now().Unix(). +// - Defaults Status to FILE_STATUS_HEALTHY. +// - Copies SegmentData from content.Segments. +// - When content.AesKey is non-empty, sets Encryption=AES with key/iv. +// - Appends one NestedSegmentSource per content.NestedSources entry. +func NewFileMetadataFromContent( + content Content, + sourceNzbPath string, + releaseDate int64, + nzbdavId string, +) *metapb.FileMetadata { + now := time.Now().Unix() + + meta := &metapb.FileMetadata{ + FileSize: content.Size, + SourceNzbPath: sourceNzbPath, + Status: metapb.FileStatus_FILE_STATUS_HEALTHY, + CreatedAt: now, + ModifiedAt: now, + SegmentData: content.Segments, + ReleaseDate: releaseDate, + NzbdavId: nzbdavId, + } + + // Set AES encryption if keys are present (single-layer encrypted archive) + if len(content.AesKey) > 0 { + meta.Encryption = metapb.Encryption_AES + meta.AesKey = content.AesKey + meta.AesIv = content.AesIV + } + + // Populate nested sources for encrypted nested archive files + for _, ns := range content.NestedSources { + meta.NestedSources = append(meta.NestedSources, &metapb.NestedSegmentSource{ + Segments: ns.Segments, + AesKey: ns.AesKey, + AesIv: ns.AesIV, + InnerOffset: ns.InnerOffset, + InnerLength: ns.InnerLength, + InnerVolumeSize: ns.InnerVolumeSize, + }) + } + + return meta +} diff --git a/internal/importer/archive/content_metadata_test.go b/internal/importer/archive/content_metadata_test.go new file mode 100644 index 000000000..9be716d7e --- /dev/null +++ b/internal/importer/archive/content_metadata_test.go @@ -0,0 +1,74 @@ +package archive + +import ( + "testing" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +func TestNewFileMetadataFromContent_PreservesNestedSources(t *testing.T) { + c := Content{ + Filename: "main_feature.m2ts", + Size: 100, + Segments: []*metapb.SegmentData{{Id: "outer@"}}, + NestedSources: []NestedSource{ + {InnerOffset: 0, InnerLength: 40, Segments: []*metapb.SegmentData{{Id: "a@"}}}, + {InnerOffset: 0, InnerLength: 60, Segments: []*metapb.SegmentData{{Id: "b@"}}}, + }, + } + + got := NewFileMetadataFromContent(c, "/path/to.nzb", 1234567890, "nzbdav-id-1") + + if got.FileSize != 100 { + t.Errorf("FileSize = %d, want 100", got.FileSize) + } + if got.SourceNzbPath != "/path/to.nzb" { + t.Errorf("SourceNzbPath = %q, want %q", got.SourceNzbPath, "/path/to.nzb") + } + if got.ReleaseDate != 1234567890 { + t.Errorf("ReleaseDate = %d, want 1234567890", got.ReleaseDate) + } + if got.NzbdavId != "nzbdav-id-1" { + t.Errorf("NzbdavId = %q, want %q", got.NzbdavId, "nzbdav-id-1") + } + if got.Status != metapb.FileStatus_FILE_STATUS_HEALTHY { + t.Errorf("Status = %v, want FILE_STATUS_HEALTHY", got.Status) + } + if len(got.SegmentData) != 1 || got.SegmentData[0].Id != "outer@" { + t.Errorf("SegmentData not preserved: %+v", got.SegmentData) + } + if len(got.NestedSources) != 2 { + t.Fatalf("NestedSources = %d, want 2", len(got.NestedSources)) + } + if got.NestedSources[0].InnerLength != 40 || got.NestedSources[1].InnerLength != 60 { + t.Errorf("NestedSources lengths wrong: %+v", got.NestedSources) + } + if got.NestedSources[0].Segments[0].Id != "a@" || got.NestedSources[1].Segments[0].Id != "b@" { + t.Errorf("NestedSources segment ids wrong: %+v", got.NestedSources) + } + // No AES key on Content → no encryption on metadata + if got.Encryption != metapb.Encryption_NONE { + t.Errorf("Encryption = %v, want NONE (no AES key on content)", got.Encryption) + } +} + +func TestNewFileMetadataFromContent_SetsAESWhenKeyPresent(t *testing.T) { + c := Content{ + Filename: "encrypted.bin", + Size: 50, + AesKey: []byte{0x01, 0x02, 0x03}, + AesIV: []byte{0x10, 0x20, 0x30}, + } + + got := NewFileMetadataFromContent(c, "", 0, "") + + if got.Encryption != metapb.Encryption_AES { + t.Errorf("Encryption = %v, want AES", got.Encryption) + } + if string(got.AesKey) != string(c.AesKey) { + t.Errorf("AesKey not propagated") + } + if string(got.AesIv) != string(c.AesIV) { + t.Errorf("AesIv not propagated") + } +} diff --git a/internal/importer/archive/rar/processor.go b/internal/importer/archive/rar/processor.go index 3566508b9..79ca93643 100644 --- a/internal/importer/archive/rar/processor.go +++ b/internal/importer/archive/rar/processor.go @@ -36,46 +36,16 @@ func NewProcessor(poolManager pool.Manager, configGetter config.ConfigGetter) Pr } } -// CreateFileMetadataFromRarContent creates FileMetadata from RarContent for the metadata system +// CreateFileMetadataFromRarContent creates FileMetadata from RarContent for the metadata system. +// Delegates to archive.NewFileMetadataFromContent so the mapping stays shared with +// non-RAR callers (e.g. ISO expansion). func (rh *rarProcessor) CreateFileMetadataFromRarContent( Content Content, sourceNzbPath string, releaseDate int64, nzbdavId string, ) *metapb.FileMetadata { - now := time.Now().Unix() - - meta := &metapb.FileMetadata{ - FileSize: Content.Size, - SourceNzbPath: sourceNzbPath, - Status: metapb.FileStatus_FILE_STATUS_HEALTHY, - CreatedAt: now, - ModifiedAt: now, - SegmentData: Content.Segments, - ReleaseDate: releaseDate, - NzbdavId: nzbdavId, - } - - // Set AES encryption if keys are present (single-layer encrypted RAR) - if len(Content.AesKey) > 0 { - meta.Encryption = metapb.Encryption_AES - meta.AesKey = Content.AesKey - meta.AesIv = Content.AesIV - } - - // Populate nested sources for encrypted nested RAR files - for _, ns := range Content.NestedSources { - meta.NestedSources = append(meta.NestedSources, &metapb.NestedSegmentSource{ - Segments: ns.Segments, - AesKey: ns.AesKey, - AesIv: ns.AesIV, - InnerOffset: ns.InnerOffset, - InnerLength: ns.InnerLength, - InnerVolumeSize: ns.InnerVolumeSize, - }) - } - - return meta + return archive.NewFileMetadataFromContent(Content, sourceNzbPath, releaseDate, nzbdavId) } // AnalyzeRarContentFromNzb analyzes a RAR archive directly from NZB data without downloading diff --git a/internal/importer/archive/sevenzip/processor.go b/internal/importer/archive/sevenzip/processor.go index 6284358d2..349979fe5 100644 --- a/internal/importer/archive/sevenzip/processor.go +++ b/internal/importer/archive/sevenzip/processor.go @@ -58,46 +58,16 @@ var ( rarNumericPattern = rar.NumericPattern ) -// CreateFileMetadataFromSevenZipContent creates FileMetadata from SevenZipContent for the metadata system +// CreateFileMetadataFromSevenZipContent creates FileMetadata from SevenZipContent for the metadata system. +// Delegates to archive.NewFileMetadataFromContent so the mapping stays shared with +// non-7z callers (e.g. ISO expansion). func (sz *sevenZipProcessor) CreateFileMetadataFromSevenZipContent( content Content, sourceNzbPath string, releaseDate int64, nzbdavId string, ) *metapb.FileMetadata { - now := time.Now().Unix() - - meta := &metapb.FileMetadata{ - FileSize: content.Size, - SourceNzbPath: sourceNzbPath, - Status: metapb.FileStatus_FILE_STATUS_HEALTHY, - CreatedAt: now, - ModifiedAt: now, - SegmentData: content.Segments, - ReleaseDate: releaseDate, - NzbdavId: nzbdavId, - } - - // Set AES encryption if keys are present - if len(content.AesKey) > 0 { - meta.Encryption = metapb.Encryption_AES - meta.AesKey = content.AesKey - meta.AesIv = content.AesIV - } - - // Populate nested sources for encrypted nested RAR files - for _, ns := range content.NestedSources { - meta.NestedSources = append(meta.NestedSources, &metapb.NestedSegmentSource{ - Segments: ns.Segments, - AesKey: ns.AesKey, - AesIv: ns.AesIV, - InnerOffset: ns.InnerOffset, - InnerLength: ns.InnerLength, - InnerVolumeSize: ns.InnerVolumeSize, - }) - } - - return meta + return archive.NewFileMetadataFromContent(content, sourceNzbPath, releaseDate, nzbdavId) } // deriveAESKey derives the AES encryption key from a password using the 7-zip algorithm From 0b877aa4b5868c17a2a0a363db88a1615be3421e Mon Sep 17 00:00:00 2001 From: javi11 Date: Sun, 24 May 2026 18:43:31 +0200 Subject: [PATCH 12/30] feat(importer): orchestrate bare-ISO Blu-ray expansion --- internal/importer/iso_expand.go | 82 +++++++++++++++++++++++ internal/importer/iso_expand_test.go | 97 ++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) diff --git a/internal/importer/iso_expand.go b/internal/importer/iso_expand.go index d0aa7a634..4c9130635 100644 --- a/internal/importer/iso_expand.go +++ b/internal/importer/iso_expand.go @@ -1,11 +1,16 @@ package importer import ( + "context" + "fmt" + "log/slog" + "path" "path/filepath" "strings" "github.com/javi11/altmount/internal/importer/archive" "github.com/javi11/altmount/internal/importer/parser" + metapb "github.com/javi11/altmount/internal/metadata/proto" ) // parsedFileToISOContent adapts a parser.ParsedFile (a bare .iso entry @@ -38,3 +43,80 @@ func partitionISOFiles(files []parser.ParsedFile) (isos, rest []parser.ParsedFil } return isos, rest } + +// expandBareISODeps lets the orchestrator be tested without an NNTP pool +// or a real metadata service. Production wiring constructs these from +// the Processor's existing collaborators. +type expandBareISODeps struct { + expand func(ctx context.Context, enabled bool, contents []archive.Content) ([]archive.Content, error) + writeMetadata func(virtualPath string, meta *metapb.FileMetadata) error + // enabled is the resolved value of Import.ExpandBlurayIso. Pulled + // out of deps so tests can flip it without touching config. + enabled bool +} + +// expandBareISOFiles peels .iso entries out of regularFiles, runs the +// existing archive.ExpandISOContents over them (which handles single-disc +// playlist resolution AND multi-disc DISC_N grouping), writes each +// expanded Content as a FileMetadata under virtualDir, and returns the +// list of written virtual paths plus the remaining (non-ISO + unchanged) +// files for normal dispatch. +// +// When no .iso files are present, returns (nil, regularFiles, nil). +// When deps.enabled is false, archive.ExpandISOContents returns the +// inputs unchanged; in that case we push the ISOs back into `remaining` +// so processSingleFile/processMultiFile handle them as raw .iso bytes. +// +// Pairing-by-position note: archive.ExpandISOContents appends exactly one +// Content per input ISO when no multi-disc merging happens, so the i-th +// expanded output corresponds to isos[i]. When multi-disc merging DOES +// happen (group of N discs collapses into 1 Content), every entry in the +// returned slice has NestedSources populated — the per-index fallback +// branch (which references isos[i]) is therefore never taken in that case. +func expandBareISOFiles( + ctx context.Context, + deps expandBareISODeps, + regularFiles []parser.ParsedFile, + virtualDir string, + releaseName string, +) (written []string, remaining []parser.ParsedFile, err error) { + isos, rest := partitionISOFiles(regularFiles) + if len(isos) == 0 { + return nil, regularFiles, nil + } + + in := make([]archive.Content, 0, len(isos)) + for _, pf := range isos { + in = append(in, parsedFileToISOContent(pf)) + } + + expanded, err := deps.expand(ctx, deps.enabled, in) + if err != nil { + return nil, nil, fmt.Errorf("expand bare ISOs: %w", err) + } + + for i, c := range expanded { + if c.ISOExpansionIndex == 0 && len(c.NestedSources) == 0 { + // Untransformed — fall back to standard processing. + remaining = append(remaining, isos[i]) + continue + } + // Task 4 wiring will supply real sourceNzbPath/releaseDate values; + // for now plumb empty strings/zero — see archive.NewFileMetadataFromContent + // signature. + meta := archive.NewFileMetadataFromContent(c, "", 0, c.NzbdavID) + virtualPath := path.Join(virtualDir, c.Filename) + if err := deps.writeMetadata(virtualPath, meta); err != nil { + return written, nil, fmt.Errorf("write metadata %q: %w", virtualPath, err) + } + written = append(written, virtualPath) + slog.InfoContext(ctx, "Expanded bare ISO into virtual file", + "release", releaseName, + "path", virtualPath, + "size", c.Size, + "nested_sources", len(c.NestedSources), + ) + } + remaining = append(remaining, rest...) + return written, remaining, nil +} diff --git a/internal/importer/iso_expand_test.go b/internal/importer/iso_expand_test.go index 34f5176af..5dfcdcaa1 100644 --- a/internal/importer/iso_expand_test.go +++ b/internal/importer/iso_expand_test.go @@ -1,8 +1,10 @@ package importer import ( + "context" "testing" + "github.com/javi11/altmount/internal/importer/archive" "github.com/javi11/altmount/internal/importer/parser" metapb "github.com/javi11/altmount/internal/metadata/proto" ) @@ -61,3 +63,98 @@ func TestPartitionISOFiles_SeparatesISOFromRest(t *testing.T) { t.Errorf("rest = %+v", rest) } } + +func TestExpandBareISOFiles_NoISOs_ReturnsInputUntouched(t *testing.T) { + files := []parser.ParsedFile{{Filename: "a.mkv"}, {Filename: "b.mp4"}} + written, rest, err := expandBareISOFiles(context.Background(), expandBareISODeps{ + expand: func(ctx context.Context, _ bool, _ []archive.Content) ([]archive.Content, error) { + t.Fatal("expand should not be called when no .iso present") + return nil, nil + }, + }, files, "vdir", "movie") + if err != nil { + t.Fatalf("err = %v", err) + } + if len(written) != 0 { + t.Errorf("written = %v, want []", written) + } + if len(rest) != 2 { + t.Errorf("rest = %d, want 2", len(rest)) + } +} + +func TestExpandBareISOFiles_OneISO_BluRayPath_WritesMergedMetadata(t *testing.T) { + files := []parser.ParsedFile{ + {Filename: "movie.iso", Size: 25_000_000_000}, + {Filename: "readme.txt"}, + } + expandCalled := false + deps := expandBareISODeps{ + expand: func(ctx context.Context, enabled bool, in []archive.Content) ([]archive.Content, error) { + expandCalled = true + if !enabled { + t.Error("expand called with enabled=false") + } + if len(in) != 1 || in[0].Filename != "movie.iso" { + t.Errorf("unexpected expand input: %+v", in) + } + return []archive.Content{{ + Filename: "MOVIE.m2ts", + Size: 20_000_000_000, + NestedSources: []archive.NestedSource{ + {InnerOffset: 0, InnerLength: 10_000_000_000}, + {InnerOffset: 0, InnerLength: 10_000_000_000}, + }, + }}, nil + }, + writeMetadata: func(virtualPath string, _ *metapb.FileMetadata) error { + if virtualPath != "vdir/MOVIE.m2ts" { + t.Errorf("virtualPath = %q, want vdir/MOVIE.m2ts", virtualPath) + } + return nil + }, + enabled: true, + } + + written, rest, err := expandBareISOFiles(context.Background(), deps, files, "vdir", "movie") + if err != nil { + t.Fatalf("err = %v", err) + } + if !expandCalled { + t.Error("expand was never called") + } + if len(written) != 1 || written[0] != "vdir/MOVIE.m2ts" { + t.Errorf("written = %v", written) + } + if len(rest) != 1 || rest[0].Filename != "readme.txt" { + t.Errorf("rest = %v", rest) + } +} + +func TestExpandBareISOFiles_Disabled_StillPeelsButFallsBack(t *testing.T) { + files := []parser.ParsedFile{{Filename: "movie.iso", Size: 1000}} + deps := expandBareISODeps{ + enabled: false, + expand: func(ctx context.Context, enabled bool, in []archive.Content) ([]archive.Content, error) { + if enabled { + t.Error("expand should be called with enabled=false") + } + // archive.ExpandISOContents with expand=false returns input unchanged. + return in, nil + }, + writeMetadata: func(string, *metapb.FileMetadata) error { + t.Fatal("writeMetadata should not be called when bare ISO is unchanged") + return nil + }, + } + written, rest, err := expandBareISOFiles(context.Background(), deps, files, "vdir", "movie") + if err != nil { + t.Fatalf("err = %v", err) + } + if len(written) != 0 { + t.Errorf("written = %v, want [] (no metadata should be written when expansion is gated off)", written) + } + if len(rest) != 1 || rest[0].Filename != "movie.iso" { + t.Errorf("rest = %+v, want the original .iso pushed back for normal dispatch", rest) + } +} From 0c07087c02d69ce70556f804c67fbecda4f1ed92 Mon Sep 17 00:00:00 2001 From: javi11 Date: Sun, 24 May 2026 18:47:36 +0200 Subject: [PATCH 13/30] docs(importer): clarify isos[i] safety and test failure message --- internal/importer/iso_expand.go | 3 +++ internal/importer/iso_expand_test.go | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/importer/iso_expand.go b/internal/importer/iso_expand.go index 4c9130635..597ed87ca 100644 --- a/internal/importer/iso_expand.go +++ b/internal/importer/iso_expand.go @@ -98,6 +98,9 @@ func expandBareISOFiles( for i, c := range expanded { if c.ISOExpansionIndex == 0 && len(c.NestedSources) == 0 { // Untransformed — fall back to standard processing. + // len(expanded) <= len(isos) is guaranteed by archive.ExpandISOContents: + // it appends one Content per input ISO on passthrough and ≤ one per + // group on success. Index isos[i] is therefore safe here. remaining = append(remaining, isos[i]) continue } diff --git a/internal/importer/iso_expand_test.go b/internal/importer/iso_expand_test.go index 5dfcdcaa1..b70364b2e 100644 --- a/internal/importer/iso_expand_test.go +++ b/internal/importer/iso_expand_test.go @@ -137,7 +137,7 @@ func TestExpandBareISOFiles_Disabled_StillPeelsButFallsBack(t *testing.T) { enabled: false, expand: func(ctx context.Context, enabled bool, in []archive.Content) ([]archive.Content, error) { if enabled { - t.Error("expand should be called with enabled=false") + t.Error("expand was called with enabled=true; want enabled=false") } // archive.ExpandISOContents with expand=false returns input unchanged. return in, nil From d6cee97390d14d4bea61d69270fee2b3385c16cf Mon Sep 17 00:00:00 2001 From: javi11 Date: Mon, 25 May 2026 20:48:38 +0200 Subject: [PATCH 14/30] feat(iso): surface silent drops in UDF enumeration as WARN logs The UDF walker previously had seven sites where it silently dropped a file from its listing (continue/break with no log), making it impossible to diagnose missing files like BDMV/STREAM/00022.m2ts on Avatar disc 1. Thread context.Context through ListISOFiles -> udfWalkAll -> udfReadDirEntries -> collectFileExtents and emit slog.WarnContext at every silent drop site with the file path and a distinct reason. Behavior is unchanged; only diagnostics are added. A new in-memory test (TestUDFWalk_LogsWhenFileICBHasUnknownTag) drives the "unexpected tag" branch and asserts a WARN line is emitted with the file path and bogus tag id. --- internal/importer/archive/iso/fs.go | 57 ++++++++-- .../importer/archive/iso/fs_local_test.go | 106 +++++++++++++++++- internal/importer/archive/iso/fs_test.go | 3 +- internal/importer/archive/iso/processor.go | 2 +- 4 files changed, 152 insertions(+), 16 deletions(-) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index 31415fbda..087ec5aaf 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -1,9 +1,11 @@ package iso import ( + "context" "encoding/binary" "fmt" "io" + "log/slog" "strings" "unicode/utf16" ) @@ -416,8 +418,11 @@ func readICBExtent(rs io.ReadSeeker, loc udfLBA, length int, metaMap []udfMetaSp } // udfReadDirEntries reads all File Identifier Descriptor records from a -// File Entry at physSect. -func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { +// File Entry at physSect. ctx is threaded purely for symmetry with the +// rest of the UDF walk so future warn-log hooks can use it without +// changing the signature again. +func udfReadDirEntries(ctx context.Context, rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { + _ = ctx tag, buf, err := udfReadTag(rs, physSect) if err != nil { return nil, fmt.Errorf("reading dir ICB at %d: %w", physSect, err) @@ -587,12 +592,12 @@ func udfSetup(rs io.ReadSeeker) (partStart uint32, metaMap []udfMetaSpan, rootIC } // udfWalkAll recursively lists all non-directory files in a UDF filesystem. -func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partStart uint32, prefix string) ([]isoFileEntry, error) { +func udfWalkAll(ctx context.Context, rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partStart uint32, prefix string) ([]isoFileEntry, error) { physSect, err := udfResolveICB(dirICB.loc, metaMap, partStart) if err != nil { return nil, err } - entries, err := udfReadDirEntries(rs, physSect, metaMap, partStart) + entries, err := udfReadDirEntries(ctx, rs, physSect, metaMap, partStart) if err != nil { return nil, err } @@ -603,16 +608,25 @@ func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partS entryPath = prefix + "/" + e.name } if e.isDir { - sub, _ := udfWalkAll(rs, e.icb, metaMap, partStart, entryPath) + sub, _ := udfWalkAll(ctx, rs, e.icb, metaMap, partStart, entryPath) result = append(result, sub...) continue } fePhys, rerr := udfResolveICB(e.icb.loc, metaMap, partStart) if rerr != nil { + slog.WarnContext(ctx, "UDF: ICB resolve failed, dropping file from listing", + "path", entryPath, "icb_block", e.icb.loc.block, "error", rerr) continue } feTag, feBuf, rerr := udfReadTag(rs, fePhys) - if rerr != nil || (feTag.id != 261 && feTag.id != 266) { + if rerr != nil { + slog.WarnContext(ctx, "UDF: file ICB read failed, dropping file from listing", + "path", entryPath, "phys_sector", fePhys, "error", rerr) + continue + } + if feTag.id != 261 && feTag.id != 266 { + slog.WarnContext(ctx, "UDF: file ICB has unexpected tag, dropping file from listing", + "path", entryPath, "tag_id", feTag.id) continue } infoLen := binary.LittleEndian.Uint64(feBuf[56:64]) @@ -632,8 +646,10 @@ func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partS allocDescLen = len(feBuf) - allocDescOff } - extents := collectFileExtents(rs, feBuf[allocDescOff:allocDescOff+allocDescLen], allocType, metaMap, partStart, infoLen, fePhys) + extents := collectFileExtents(ctx, rs, feBuf[allocDescOff:allocDescOff+allocDescLen], allocType, metaMap, partStart, infoLen, fePhys) if len(extents) == 0 { + slog.WarnContext(ctx, "UDF: collectFileExtents returned 0 extents, dropping file from listing", + "path", entryPath, "info_length", infoLen, "alloc_type", allocType) continue } result = append(result, isoFileEntry{ @@ -668,7 +684,7 @@ func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partS // embeddedFEPhys is only meaningful for allocType 3 (it's the FE's own // physical sector — the file data is inline at allocDescOff of that // sector, so we materialise a single synthetic extent pointing at it). -func collectFileExtents(rs io.ReadSeeker, inlineADs []byte, allocType byte, metaMap []udfMetaSpan, partStart uint32, infoLen uint64, embeddedFEPhys uint32) []isoExtent { +func collectFileExtents(ctx context.Context, rs io.ReadSeeker, inlineADs []byte, allocType byte, metaMap []udfMetaSpan, partStart uint32, infoLen uint64, embeddedFEPhys uint32) []isoExtent { if allocType == 3 { // Embedded data — a single "extent" pointing at the FE sector // itself with the inline-AD area treated as the file data. We @@ -748,20 +764,37 @@ func collectFileExtents(rs io.ReadSeeker, inlineADs []byte, allocType byte, meta } ps, err := udfResolveICB(chain.loc, metaMap, partStart) if err != nil { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "icb resolve failed", + "extents_so_far", len(extents), + "error", err) break } _, aedBuf, err := udfReadTag(rs, ps) if err != nil { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "tag read failed", + "extents_so_far", len(extents), + "error", err) break } // Allocation Extent Descriptor layout: 16-byte tag + 4-byte // previous-AED pointer + 4-byte length-of-allocation-descriptors, // then the ADs themselves. if len(aedBuf) < 24 { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "aed buffer too short", + "extents_so_far", len(extents), + "buf_len", len(aedBuf)) break } nextLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) if nextLen <= 0 || 24+nextLen > len(aedBuf) { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "aed length out of range", + "extents_so_far", len(extents), + "next_len", nextLen, + "buf_len", len(aedBuf)) break } chase = aedBuf[24 : 24+nextLen] @@ -820,11 +853,13 @@ func coalesceExtents(in []isoExtent) []isoExtent { // ListISOFiles walks the ISO 9660/UDF filesystem and returns all non-directory // entries. It tries UDF first (correct 64-bit sizes, authoritative for Blu-ray) -// and falls back to ISO 9660 for plain discs without UDF. -func ListISOFiles(rs io.ReadSeeker) ([]isoFileEntry, error) { +// and falls back to ISO 9660 for plain discs without UDF. ctx is threaded +// through the UDF walk so silent-drop sites can emit slog.WarnContext logs +// for diagnosis without polluting the io.ReadSeeker signature. +func ListISOFiles(ctx context.Context, rs io.ReadSeeker) ([]isoFileEntry, error) { // Try UDF first (handles Blu-ray and modern discs with correct 64-bit sizes) if partStart, metaMap, rootICB, err := udfSetup(rs); err == nil { - files, err := udfWalkAll(rs, rootICB, metaMap, partStart, "") + files, err := udfWalkAll(ctx, rs, rootICB, metaMap, partStart, "") if err == nil && len(files) > 0 { return files, nil } diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index 1c52d65b0..efbdf3a6a 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -1,13 +1,113 @@ package iso import ( + "bytes" + "context" "encoding/binary" + "encoding/json" "fmt" + "log/slog" "os" "sort" + "strings" "testing" ) +// TestUDFWalk_LogsWhenFileICBHasUnknownTag drives a synthetic UDF blob with +// one directory containing one File Identifier Descriptor (BOGUS.M2TS) whose +// ICB points at a sector containing an invalid descriptor tag (id=999, not +// 261/266). The walker must: +// +// 1. drop the file from its returned listing (silent today, kept silent); +// 2. emit exactly one slog.WarnContext line naming the file and the bogus +// tag id so operators can see why a file vanished. +// +// This locks in the diagnostic behavior added by Task 6: every silent drop +// site in udfWalkAll / collectFileExtents now logs at WARN level before +// continuing or breaking. +func TestUDFWalk_LogsWhenFileICBHasUnknownTag(t *testing.T) { + // Capture default slog output into a buffer for assertions. + var buf bytes.Buffer + prev := slog.Default() + slog.SetDefault(slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug}))) + t.Cleanup(func() { slog.SetDefault(prev) }) + + // Build a minimal in-memory blob: 32 sectors of zeros, with custom + // content at sector 10 (directory FE) and sector 20 (bogus tag). + const dirSector = 10 + const bogusSector = 20 + image := make([]byte, iso9660SectorSize*32) + + // Sector 10: a UDF File Entry (tag 261) acting as a directory whose + // allocation type is 3 (inline), so udfReadDirEntries reads the FID + // straight out of buf[allocDescOff : allocDescOff+allocDescLen]. + dir := image[dirSector*iso9660SectorSize : (dirSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(dir[0:2], 261) // tag.id = 261 (File Entry) + dir[34] = 3 // icbtag.flags lower 3 bits = 3 (inline) + // FE plain (tag 261) AD-area header at buf[168..176]. + binary.LittleEndian.PutUint32(dir[168:172], 0) // L_EA (extended attrs length) + binary.LittleEndian.PutUint32(dir[172:176], 52) // L_AD (alloc-desc length, == one padded FID) + + // FID at dir[176..]: file identifier descriptor for BOGUS.M2TS + // pointing its ICB long_ad at sector `bogusSector`. + fid := dir[176:] + name := "BOGUS.M2TS" // 10 ASCII bytes + binary.LittleEndian.PutUint16(fid[0:2], 257) // FID tag id + fid[18] = 0 // file characteristics: regular file, neither parent nor deleted + fid[19] = byte(1 + len(name)) // L_FI (comp byte + ASCII chars) + binary.LittleEndian.PutUint32(fid[20:24], 2048) // long_ad.length + binary.LittleEndian.PutUint32(fid[24:28], bogusSector) + binary.LittleEndian.PutUint16(fid[28:30], 0) // long_ad.partition (0 → partStart-relative) + binary.LittleEndian.PutUint16(fid[36:38], 0) // L_IU (impl-use length) + fid[38] = 8 // CS0 compression code (8 = ASCII) + copy(fid[39:39+len(name)], name) + // Padded record length (38 header + 11 name = 49, padded to 52). We + // leave the trailing 3 bytes as zeros from the make(). + + // Sector 20: descriptor tag with the deliberately-bogus id 999. + bogus := image[bogusSector*iso9660SectorSize : (bogusSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(bogus[0:2], 999) + + dirICB := udfLongAD{length: iso9660SectorSize, loc: udfLBA{block: dirSector, part: 0}} + entries, err := udfWalkAll(context.Background(), bytes.NewReader(image), dirICB, nil, 0, "") + if err != nil { + t.Fatalf("udfWalkAll: %v", err) + } + if len(entries) != 0 { + t.Fatalf("expected empty listing (bogus file should be dropped); got %d entries: %+v", len(entries), entries) + } + + // Inspect captured slog output. Parse line by line as JSON and count + // matches; the test fails if not exactly one matching WARN was emitted. + var matches int + for _, line := range strings.Split(strings.TrimRight(buf.String(), "\n"), "\n") { + if line == "" { + continue + } + var rec map[string]any + if err := json.Unmarshal([]byte(line), &rec); err != nil { + t.Fatalf("non-JSON log line %q: %v", line, err) + } + if rec["level"] != "WARN" { + continue + } + // Both path and tag_id must be set to disambiguate from any + // other (future) WARN site in the walk. + if rec["path"] != "BOGUS.M2TS" { + continue + } + // JSON-decoded numbers come back as float64; compare via that. + if v, ok := rec["tag_id"].(float64); !ok || int(v) != 999 { + continue + } + matches++ + } + if matches != 1 { + t.Fatalf("want exactly 1 matching WARN line (path=BOGUS.M2TS tag_id=999), got %d. Full log:\n%s", + matches, buf.String()) + } +} + // TestLocalISO_DiscoverBigFiles is a manual integration test: it walks a // real Blu-ray ISO from local disk and dumps a size-sorted summary. Skipped // unless ALTMOUNT_LOCAL_ISO is set, so CI stays unaffected. @@ -30,7 +130,7 @@ func TestLocalISO_DiscoverBigFiles(t *testing.T) { stat, _ := f.Stat() t.Logf("ISO: %s size=%d (%.2f GiB)", path, stat.Size(), float64(stat.Size())/(1<<30)) - entries, err := ListISOFiles(f) + entries, err := ListISOFiles(context.Background(), f) if err != nil { t.Fatalf("ListISOFiles: %v", err) } @@ -131,7 +231,7 @@ func TestLocalISO_CountExtentsForBigFiles(t *testing.T) { if e != nil { return } - entries, e := udfReadDirEntries(f, physSect, metaMap, partStart) + entries, e := udfReadDirEntries(context.Background(), f, physSect, metaMap, partStart) if e != nil { return } @@ -288,7 +388,7 @@ func TestLocalISO_CountAdjacentExtents(t *testing.T) { } defer f.Close() - entries, err := ListISOFiles(f) + entries, err := ListISOFiles(context.Background(), f) if err != nil { t.Fatalf("list: %v", err) } diff --git a/internal/importer/archive/iso/fs_test.go b/internal/importer/archive/iso/fs_test.go index 1b2bfe380..79d0474db 100644 --- a/internal/importer/archive/iso/fs_test.go +++ b/internal/importer/archive/iso/fs_test.go @@ -2,6 +2,7 @@ package iso import ( "bytes" + "context" "encoding/binary" "testing" ) @@ -26,7 +27,7 @@ func TestUDFReadDirEntriesTruncatedExtent(t *testing.T) { binary.LittleEndian.PutUint32(dirICBSector[176:180], 2796) binary.LittleEndian.PutUint32(dirICBSector[180:184], 20) - entries, err := udfReadDirEntries(bytes.NewReader(image), 10, nil, 0) + entries, err := udfReadDirEntries(context.Background(), bytes.NewReader(image), 10, nil, 0) if err != nil { t.Fatalf("udfReadDirEntries() error = %v", err) } diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 71076a293..bd9ec5738 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -35,7 +35,7 @@ func AnalyzeISO( } defer closer.Close() - entries, err := ListISOFiles(rs) + entries, err := ListISOFiles(ctx, rs) if err != nil { return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } From 330dd69d7db5cb03746b3734ffe41838d3c46636 Mon Sep 17 00:00:00 2001 From: javi11 Date: Mon, 25 May 2026 20:52:49 +0200 Subject: [PATCH 15/30] docs(iso): clarify ctx threading rationale and slog test parallel-safety --- internal/importer/archive/iso/fs.go | 6 +++--- internal/importer/archive/iso/fs_local_test.go | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index 087ec5aaf..e958b5969 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -418,9 +418,9 @@ func readICBExtent(rs io.ReadSeeker, loc udfLBA, length int, metaMap []udfMetaSp } // udfReadDirEntries reads all File Identifier Descriptor records from a -// File Entry at physSect. ctx is threaded purely for symmetry with the -// rest of the UDF walk so future warn-log hooks can use it without -// changing the signature again. +// File Entry at physSect. ctx is threaded for upcoming Indirect Entry +// (tag 248) follow logic that will emit a debug log on each redirect, +// and as a hook for future warn-log additions in this function. func udfReadDirEntries(ctx context.Context, rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { _ = ctx tag, buf, err := udfReadTag(rs, physSect) diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index efbdf3a6a..ee5c821aa 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -26,7 +26,11 @@ import ( // site in udfWalkAll / collectFileExtents now logs at WARN level before // continuing or breaking. func TestUDFWalk_LogsWhenFileICBHasUnknownTag(t *testing.T) { - // Capture default slog output into a buffer for assertions. + // Capture default slog output into a buffer for assertions. NOTE: this + // test mutates the process-wide default slog logger. Do NOT call + // t.Parallel() here, and do not parallelise any other test in this + // package that touches slog output, or log lines will bleed between + // tests and the matches==1 assertion below will flake. var buf bytes.Buffer prev := slog.Default() slog.SetDefault(slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug}))) From 05976c86db0ab3d2a84b49910376f6fc5b62d2c3 Mon Sep 17 00:00:00 2001 From: javi11 Date: Mon, 25 May 2026 20:56:15 +0200 Subject: [PATCH 16/30] fix(iso): follow UDF Indirect Entry (tag 248) chains in file and directory enumeration --- internal/importer/archive/iso/fs.go | 40 +++++- .../importer/archive/iso/fs_local_test.go | 134 ++++++++++++++++++ 2 files changed, 171 insertions(+), 3 deletions(-) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index e958b5969..4c360e056 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -207,6 +207,35 @@ func udfReadTag(rs io.ReadSeeker, sectorNum uint32) (udfTag, []byte, error) { return t, buf, nil } +// udfFollowIndirect resolves a chain of Indirect Entries (tag 248) +// starting at physSect and returns the physical sector of the real +// File Entry plus its tag and raw buffer. Per UDF §14.7 an Indirect +// Entry is a 16-byte descriptor tag + 20-byte ICBTag + 16-byte +// long_ad at offset 36. Depth-capped at 16 to bound runaway on a +// malformed disc that points an Indirect Entry chain back at itself. +func udfFollowIndirect(ctx context.Context, rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) (uint32, udfTag, []byte, error) { + for depth := 0; depth < 16; depth++ { + tag, buf, err := udfReadTag(rs, physSect) + if err != nil { + return 0, udfTag{}, nil, err + } + if tag.id != 248 { + return physSect, tag, buf, nil + } + if len(buf) < 36+16 { + return 0, udfTag{}, nil, fmt.Errorf("udf: indirect entry at sector %d too short", physSect) + } + next := udfParseLongAD(buf, 36) + resolved, err := udfResolveICB(next.loc, metaMap, partStart) + if err != nil { + return 0, udfTag{}, nil, fmt.Errorf("udf: resolving indirect ICB: %w", err) + } + slog.DebugContext(ctx, "UDF: followed Indirect Entry", "from", physSect, "to", resolved, "depth", depth) + physSect = resolved + } + return 0, udfTag{}, nil, fmt.Errorf("udf: indirect entry chain exceeds depth cap (16)") +} + // udfParseLongAD parses a long_ad from buf[off:]. func udfParseLongAD(buf []byte, off int) udfLongAD { length := binary.LittleEndian.Uint32(buf[off:]) @@ -422,8 +451,9 @@ func readICBExtent(rs io.ReadSeeker, loc udfLBA, length int, metaMap []udfMetaSp // (tag 248) follow logic that will emit a debug log on each redirect, // and as a hook for future warn-log additions in this function. func udfReadDirEntries(ctx context.Context, rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { - _ = ctx - tag, buf, err := udfReadTag(rs, physSect) + // Transparently traverse any Indirect Entry (tag 248) chain on a + // directory ICB. udfFollowIndirect emits a Debug log per redirect. + physSect, tag, buf, err := udfFollowIndirect(ctx, rs, physSect, metaMap, partStart) if err != nil { return nil, fmt.Errorf("reading dir ICB at %d: %w", physSect, err) } @@ -618,7 +648,11 @@ func udfWalkAll(ctx context.Context, rs io.ReadSeeker, dirICB udfLongAD, metaMap "path", entryPath, "icb_block", e.icb.loc.block, "error", rerr) continue } - feTag, feBuf, rerr := udfReadTag(rs, fePhys) + // Transparently follow any Indirect Entry (tag 248) chain. fePhys + // is reassigned to the resolved post-redirect sector so the + // downstream collectFileExtents call uses the real FE for any + // embedded-data ("embeddedFEPhys") accounting. + fePhys, feTag, feBuf, rerr := udfFollowIndirect(ctx, rs, fePhys, metaMap, partStart) if rerr != nil { slog.WarnContext(ctx, "UDF: file ICB read failed, dropping file from listing", "path", entryPath, "phys_sector", fePhys, "error", rerr) diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index ee5c821aa..f8464b64a 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -112,6 +112,140 @@ func TestUDFWalk_LogsWhenFileICBHasUnknownTag(t *testing.T) { } } +// TestUDFWalk_FollowsIndirectEntryChain drives a synthetic UDF blob where +// a file's ICB points at a chain of Indirect Entries (tag 248, per UDF +// §14.7 Strategy Type 4 multi-FE indirection) before reaching the real +// File Entry. The walker must transparently follow the chain and surface +// the file with its real size and extents. +// +// Two sub-cases: +// - "single_hop": FID → IE(248) → FE(261) +// - "multi_hop": FID → IE(248) → IE(248) → FE(261) +// +// Each Indirect Entry is laid out per UDF §14.7: +// +// bytes 0..15 descriptor tag (id = 248) +// bytes 16..35 ICBTag (20 bytes; zeros here, strategy etc. not validated) +// bytes 36..51 long_ad (16 bytes) → next ICB in chain +func TestUDFWalk_FollowsIndirectEntryChain(t *testing.T) { + // buildImage constructs an in-memory UDF blob and returns it along with + // the directory ICB. The chain layout: + // FID(MOVIE.M2TS) → IE@hops[0] → IE@hops[1] → ... → FE@feSector + // where the file's data extent lives at dataSector with size dataSize. + buildImage := func(t *testing.T, hops []uint32, feSector, dataSector uint32, dataSize uint32) ([]byte, udfLongAD) { + t.Helper() + const dirSector = 10 + // Size the image to comfortably cover all referenced sectors. + maxSector := feSector + if dataSector > maxSector { + maxSector = dataSector + } + for _, h := range hops { + if h > maxSector { + maxSector = h + } + } + image := make([]byte, iso9660SectorSize*int(maxSector+2)) + + // Directory FE at dirSector — same pattern as the test above: + // tag 261, allocType 3 (inline), one FID for MOVIE.M2TS. + dir := image[dirSector*iso9660SectorSize : (dirSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(dir[0:2], 261) // File Entry + dir[34] = 3 // inline alloc type + binary.LittleEndian.PutUint32(dir[168:172], 0) + binary.LittleEndian.PutUint32(dir[172:176], 52) // one padded FID + + fid := dir[176:] + name := "MOVIE.M2TS" // 10 ASCII bytes → recLen 38+11=49 → padded 52 + binary.LittleEndian.PutUint16(fid[0:2], 257) // FID + fid[18] = 0 // regular file + fid[19] = byte(1 + len(name)) // L_FI + binary.LittleEndian.PutUint32(fid[20:24], 2048) // long_ad.length → hops[0] sector + binary.LittleEndian.PutUint32(fid[24:28], hops[0]) + binary.LittleEndian.PutUint16(fid[28:30], 0) // partition 0 → partStart-relative + binary.LittleEndian.PutUint16(fid[36:38], 0) // L_IU + fid[38] = 8 // CS0 ASCII + copy(fid[39:39+len(name)], name) + + // Indirect Entries: each tag-248 sector points to the next. + for i, hop := range hops { + ie := image[hop*iso9660SectorSize : (hop+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(ie[0:2], 248) // Indirect Entry tag + // bytes 16..35 are ICBTag — leave zeroed (not validated). + // long_ad at offset 36: length(4)+block(4)+part(2)+implUse(2) + var nextSector uint32 + if i+1 < len(hops) { + nextSector = hops[i+1] + } else { + nextSector = feSector + } + binary.LittleEndian.PutUint32(ie[36:40], 2048) // length + binary.LittleEndian.PutUint32(ie[40:44], nextSector) // block + binary.LittleEndian.PutUint16(ie[44:46], 0) // partition + } + + // Real File Entry at feSector: tag 261, allocType 0 (short_ad), + // one short_ad pointing at dataSector with the file size. + fe := image[feSector*iso9660SectorSize : (feSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(fe[0:2], 261) // File Entry + fe[34] = 0 // allocType 0 = short_ad + binary.LittleEndian.PutUint64(fe[56:64], uint64(dataSize)) + binary.LittleEndian.PutUint32(fe[168:172], 0) // L_EA + binary.LittleEndian.PutUint32(fe[172:176], 8) // L_AD = one short_ad + binary.LittleEndian.PutUint32(fe[176:180], dataSize) // short_ad.length (adType 0 in high 2 bits) + binary.LittleEndian.PutUint32(fe[180:184], dataSector) // short_ad.block + + dirICB := udfLongAD{length: iso9660SectorSize, loc: udfLBA{block: dirSector, part: 0}} + return image, dirICB + } + + assertFound := func(t *testing.T, entries []isoFileEntry, wantSize uint64, wantLBA uint32) { + t.Helper() + if len(entries) != 1 { + t.Fatalf("want exactly 1 entry, got %d: %+v", len(entries), entries) + } + got := entries[0] + if got.path != "MOVIE.M2TS" { + t.Errorf("path: want MOVIE.M2TS, got %q", got.path) + } + if got.size != wantSize { + t.Errorf("size: want %d, got %d", wantSize, got.size) + } + if len(got.extents) != 1 { + t.Fatalf("extents: want 1, got %d (%+v)", len(got.extents), got.extents) + } + if got.extents[0].lba != wantLBA { + t.Errorf("extents[0].lba: want %d, got %d", wantLBA, got.extents[0].lba) + } + } + + t.Run("single_hop", func(t *testing.T) { + const ieSector = 20 + const feSector = 30 + const dataSector = 40 + const dataSize = 4096 + image, dirICB := buildImage(t, []uint32{ieSector}, feSector, dataSector, dataSize) + entries, err := udfWalkAll(context.Background(), bytes.NewReader(image), dirICB, nil, 0, "") + if err != nil { + t.Fatalf("udfWalkAll: %v", err) + } + assertFound(t, entries, dataSize, dataSector) + }) + + t.Run("multi_hop", func(t *testing.T) { + // FID → IE@20 → IE@25 → FE@30 → data@40 + const feSector = 30 + const dataSector = 40 + const dataSize = 4096 + image, dirICB := buildImage(t, []uint32{20, 25}, feSector, dataSector, dataSize) + entries, err := udfWalkAll(context.Background(), bytes.NewReader(image), dirICB, nil, 0, "") + if err != nil { + t.Fatalf("udfWalkAll: %v", err) + } + assertFound(t, entries, dataSize, dataSector) + }) +} + // TestLocalISO_DiscoverBigFiles is a manual integration test: it walks a // real Blu-ray ISO from local disk and dumps a size-sorted summary. Skipped // unless ALTMOUNT_LOCAL_ISO is set, so CI stays unaffected. From 3e34d7fc10801d0f8d493b06f717514475e87af1 Mon Sep 17 00:00:00 2001 From: javi11 Date: Mon, 25 May 2026 20:59:59 +0200 Subject: [PATCH 17/30] refactor(iso): named depth-cap constant, wrap indirect read error, modernise loops --- internal/importer/archive/iso/fs.go | 15 ++++++++++----- internal/importer/archive/iso/fs_local_test.go | 11 +++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index 4c360e056..31d76f604 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -207,17 +207,22 @@ func udfReadTag(rs io.ReadSeeker, sectorNum uint32) (udfTag, []byte, error) { return t, buf, nil } +// udfMaxIndirectDepth caps how many Indirect Entry (tag 248) hops +// udfFollowIndirect will traverse before declaring a malformed chain. +// 16 matches Linux kernel UDF (fs/udf/inode.c) and libisofs convention. +const udfMaxIndirectDepth = 16 + // udfFollowIndirect resolves a chain of Indirect Entries (tag 248) // starting at physSect and returns the physical sector of the real // File Entry plus its tag and raw buffer. Per UDF §14.7 an Indirect // Entry is a 16-byte descriptor tag + 20-byte ICBTag + 16-byte -// long_ad at offset 36. Depth-capped at 16 to bound runaway on a -// malformed disc that points an Indirect Entry chain back at itself. +// long_ad at offset 36. Depth-capped to bound runaway on a malformed +// disc that points an Indirect Entry chain back at itself. func udfFollowIndirect(ctx context.Context, rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) (uint32, udfTag, []byte, error) { - for depth := 0; depth < 16; depth++ { + for depth := range udfMaxIndirectDepth { tag, buf, err := udfReadTag(rs, physSect) if err != nil { - return 0, udfTag{}, nil, err + return 0, udfTag{}, nil, fmt.Errorf("udf: reading indirect entry at sector %d: %w", physSect, err) } if tag.id != 248 { return physSect, tag, buf, nil @@ -233,7 +238,7 @@ func udfFollowIndirect(ctx context.Context, rs io.ReadSeeker, physSect uint32, m slog.DebugContext(ctx, "UDF: followed Indirect Entry", "from", physSect, "to", resolved, "depth", depth) physSect = resolved } - return 0, udfTag{}, nil, fmt.Errorf("udf: indirect entry chain exceeds depth cap (16)") + return 0, udfTag{}, nil, fmt.Errorf("udf: indirect entry chain exceeds depth cap (%d)", udfMaxIndirectDepth) } // udfParseLongAD parses a long_ad from buf[off:]. diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index f8464b64a..3f927fb5d 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -84,7 +84,7 @@ func TestUDFWalk_LogsWhenFileICBHasUnknownTag(t *testing.T) { // Inspect captured slog output. Parse line by line as JSON and count // matches; the test fails if not exactly one matching WARN was emitted. var matches int - for _, line := range strings.Split(strings.TrimRight(buf.String(), "\n"), "\n") { + for line := range strings.SplitSeq(strings.TrimRight(buf.String(), "\n"), "\n") { if line == "" { continue } @@ -136,14 +136,9 @@ func TestUDFWalk_FollowsIndirectEntryChain(t *testing.T) { t.Helper() const dirSector = 10 // Size the image to comfortably cover all referenced sectors. - maxSector := feSector - if dataSector > maxSector { - maxSector = dataSector - } + maxSector := max(feSector, dataSector) for _, h := range hops { - if h > maxSector { - maxSector = h - } + maxSector = max(maxSector, h) } image := make([]byte, iso9660SectorSize*int(maxSector+2)) From 5782cdc92d46e69c15fe681dc96653bf59c907ff Mon Sep 17 00:00:00 2001 From: javi11 Date: Tue, 26 May 2026 10:12:46 +0200 Subject: [PATCH 18/30] feat(iso): preserve underlying parse errors in ListISOFiles fallback --- internal/importer/archive/iso/fs.go | 44 +++++++++++++------ .../importer/archive/iso/fs_local_test.go | 25 +++++++++++ 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index 31d76f604..3f2ade877 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -896,24 +896,40 @@ func coalesceExtents(in []isoExtent) []isoExtent { // through the UDF walk so silent-drop sites can emit slog.WarnContext logs // for diagnosis without polluting the io.ReadSeeker signature. func ListISOFiles(ctx context.Context, rs io.ReadSeeker) ([]isoFileEntry, error) { - // Try UDF first (handles Blu-ray and modern discs with correct 64-bit sizes) - if partStart, metaMap, rootICB, err := udfSetup(rs); err == nil { + // Track the underlying reason both layers failed so the combined-failure + // error message can point an operator at the actual cause (transient + // network read, malformed structure, unrecognised version, ...). + var udfErr, isoErr error + + // Try UDF first (handles Blu-ray and modern discs with correct 64-bit sizes). + if partStart, metaMap, rootICB, err := udfSetup(rs); err != nil { + udfErr = err + } else { files, err := udfWalkAll(ctx, rs, rootICB, metaMap, partStart, "") - if err == nil && len(files) > 0 { + switch { + case err != nil: + udfErr = fmt.Errorf("walk: %w", err) + case len(files) == 0: + udfErr = fmt.Errorf("walk returned no files") + default: return files, nil } } - // Fall back to ISO 9660 + + // Fall back to ISO 9660. pvd := make([]byte, iso9660SectorSize) - if _, err := rs.Seek(16*iso9660SectorSize, io.SeekStart); err == nil { - if _, err := io.ReadFull(rs, pvd); err == nil { - if pvd[0] == 1 && string(pvd[1:6]) == "CD001" { - rootRec := pvd[156:] - dirLBA := binary.LittleEndian.Uint32(rootRec[2:6]) - dirSize := uint64(binary.LittleEndian.Uint32(rootRec[10:14])) - return iso9660WalkAll(rs, dirLBA, dirSize, "") - } - } + if _, err := rs.Seek(16*iso9660SectorSize, io.SeekStart); err != nil { + isoErr = fmt.Errorf("seek PVD: %w", err) + } else if _, err := io.ReadFull(rs, pvd); err != nil { + isoErr = fmt.Errorf("read PVD: %w", err) + } else if pvd[0] != 1 || string(pvd[1:6]) != "CD001" { + isoErr = fmt.Errorf("invalid PVD header (type=%d magic=%q)", pvd[0], pvd[1:6]) + } else { + rootRec := pvd[156:] + dirLBA := binary.LittleEndian.Uint32(rootRec[2:6]) + dirSize := uint64(binary.LittleEndian.Uint32(rootRec[10:14])) + return iso9660WalkAll(rs, dirLBA, dirSize, "") } - return nil, fmt.Errorf("iso: not a valid ISO 9660 or UDF image") + + return nil, fmt.Errorf("iso: not a valid ISO 9660 or UDF image (udf: %v; iso9660: %v)", udfErr, isoErr) } diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index 3f927fb5d..35d1442eb 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -553,3 +553,28 @@ func TestLocalISO_CountAdjacentExtents(t *testing.T) { float64(len(e.extents))/float64(distinctRuns)) } } + +// TestListISOFiles_PreservesBothUnderlyingErrors drives ListISOFiles with a +// blob that is neither a valid UDF nor a valid ISO 9660 image. The function +// historically returned a single opaque "not a valid ISO 9660 or UDF image" +// error which hid the actual cause — Task 9 changed it to wrap both the +// underlying UDF error and the ISO 9660 fallback error so operators can +// distinguish transient network failures from genuine structural problems. +func TestListISOFiles_PreservesBothUnderlyingErrors(t *testing.T) { + // 600 KB of zeros — large enough to satisfy reads at both the UDF + // AVDP sector (256 → byte 524288) and the ISO 9660 PVD sector + // (16 → byte 32768), but the bytes don't form valid descriptors + // for either format. + blob := make([]byte, 600*1024) + _, err := ListISOFiles(context.Background(), bytes.NewReader(blob)) + if err == nil { + t.Fatal("expected error from ListISOFiles on an invalid blob") + } + msg := err.Error() + if !strings.Contains(msg, "udf:") { + t.Errorf("error must mention the underlying UDF failure (substring \"udf:\") — got: %q", msg) + } + if !strings.Contains(msg, "iso9660:") { + t.Errorf("error must mention the underlying ISO 9660 failure (substring \"iso9660:\") — got: %q", msg) + } +} From 53194c8ac6047eebed5f3ea7236a282adb7f17a9 Mon Sep 17 00:00:00 2001 From: javi11 Date: Tue, 26 May 2026 10:27:25 +0200 Subject: [PATCH 19/30] feat(iso): honour context cancellation in UDF walk and AED chain Today udfWalkAll has no ctx.Err() check between files, so cancellation only surfaces when the next sector read times out at the NNTP layer. On a degraded network this can stretch a normal ~16ms/file walk into minutes per ISO. Same for the AED-chain loop in collectFileExtents. Add a ctx.Err() check at the top of each loop: - udfWalkAll: returns the partial result + the cancellation error immediately. iso_expansion.go already treats any error from the walk as 'keep ISO as-is', so no caller change needed. - collectFileExtents: returns []isoExtent (no error), so emit a WARN in the existing 'AED chain truncated' style and break out of the chain cleanly with whatever extents we have. New test TestUDFWalk_StopsWhenContextCanceled builds a 3-FID synthetic UDF blob, cancels the ctx before calling the walker, and asserts that udfWalkAll returns context.Canceled within 100ms with an empty result (i.e. no file ICB was read past the cancel point). --- internal/importer/archive/iso/fs.go | 10 +++ .../importer/archive/iso/fs_local_test.go | 71 +++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index 3f2ade877..5a755e7b8 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -638,6 +638,9 @@ func udfWalkAll(ctx context.Context, rs io.ReadSeeker, dirICB udfLongAD, metaMap } var result []isoFileEntry for _, e := range entries { + if err := ctx.Err(); err != nil { + return result, err + } entryPath := e.name if prefix != "" { entryPath = prefix + "/" + e.name @@ -747,6 +750,13 @@ func collectFileExtents(ctx context.Context, rs io.ReadSeeker, inlineADs []byte, chase := inlineADs safety := 0 for { + if err := ctx.Err(); err != nil { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "context canceled", + "extents_so_far", len(extents), + "error", err) + break + } safety++ if safety > 4096 { break // pathological — bail to avoid runaway IO diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index 35d1442eb..36d9d024f 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -5,12 +5,14 @@ import ( "context" "encoding/binary" "encoding/json" + "errors" "fmt" "log/slog" "os" "sort" "strings" "testing" + "time" ) // TestUDFWalk_LogsWhenFileICBHasUnknownTag drives a synthetic UDF blob with @@ -578,3 +580,72 @@ func TestListISOFiles_PreservesBothUnderlyingErrors(t *testing.T) { t.Errorf("error must mention the underlying ISO 9660 failure (substring \"iso9660:\") — got: %q", msg) } } + +// TestUDFWalk_StopsWhenContextCanceled builds a synthetic UDF blob whose +// directory contains 3 regular FIDs, then calls udfWalkAll with an +// already-canceled context. The walker must: +// +// 1. observe ctx.Err() before processing any file's ICB, +// 2. return context.Canceled (or a wrapping error) within 100 ms, +// 3. return an empty result slice (no file processed past the check). +// +// This locks in Task 11 behavior: cancellation propagates immediately +// from the entries-loop, instead of waiting for the next sector read +// to time out at the NNTP layer. +func TestUDFWalk_StopsWhenContextCanceled(t *testing.T) { + const dirSector = 10 + // Three FIDs of 52 bytes each = 156 bytes of allocation descriptors. + const fidLen = 52 + const numFiles = 3 + image := make([]byte, iso9660SectorSize*32) + + dir := image[dirSector*iso9660SectorSize : (dirSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(dir[0:2], 261) // tag.id = 261 (File Entry) + dir[34] = 3 // inline alloc type + binary.LittleEndian.PutUint32(dir[168:172], 0) + binary.LittleEndian.PutUint32(dir[172:176], fidLen*numFiles) // L_AD = 3 padded FIDs + + // Write 3 FIDs back-to-back at dir[176..]. Each points at a unique + // sector containing a tag-261 FE with a single short_ad; that the + // walker NEVER reads these is exactly what this test asserts. + for i := 0; i < numFiles; i++ { + off := 176 + i*fidLen + fid := dir[off : off+fidLen] + name := fmt.Sprintf("FILE%d.M2TS", i) // 10-11 ASCII bytes + binary.LittleEndian.PutUint16(fid[0:2], 257) + fid[18] = 0 + fid[19] = byte(1 + len(name)) + binary.LittleEndian.PutUint32(fid[20:24], 2048) + binary.LittleEndian.PutUint32(fid[24:28], uint32(20+i)) // points at sectors 20,21,22 + binary.LittleEndian.PutUint16(fid[28:30], 0) + binary.LittleEndian.PutUint16(fid[36:38], 0) + fid[38] = 8 + copy(fid[39:39+len(name)], name) + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // canceled before the call — ctx.Err() != nil on entry + + dirICB := udfLongAD{length: iso9660SectorSize, loc: udfLBA{block: dirSector, part: 0}} + + done := make(chan struct{}) + var entries []isoFileEntry + var err error + go func() { + entries, err = udfWalkAll(ctx, bytes.NewReader(image), dirICB, nil, 0, "") + close(done) + }() + + select { + case <-done: + case <-time.After(100 * time.Millisecond): + t.Fatal("udfWalkAll did not return within 100ms of a canceled ctx — cancellation is not being honored at the entries-loop") + } + + if !errors.Is(err, context.Canceled) { + t.Fatalf("want err wrapping context.Canceled, got: %v", err) + } + if len(entries) != 0 { + t.Fatalf("want empty result on cancel before any file processed, got %d entries: %+v", len(entries), entries) + } +} From 98b92246b186a0ef7476d79ca5ea20b10eedc093 Mon Sep 17 00:00:00 2001 From: javi11 Date: Tue, 26 May 2026 10:30:47 +0200 Subject: [PATCH 20/30] refactor(iso): cancellation log at Debug, partial-return doc, test hardening --- internal/importer/archive/iso/fs.go | 12 +++++++++++- internal/importer/archive/iso/fs_local_test.go | 16 +++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index 5a755e7b8..824d99a45 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -638,6 +638,11 @@ func udfWalkAll(ctx context.Context, rs io.ReadSeeker, dirICB udfLongAD, metaMap } var result []isoFileEntry for _, e := range entries { + // Return whatever was collected so far along with the cancel error. + // The caller (AnalyzeISO -> iso_expansion.go) treats any non-nil + // error as "keep ISO as-is", so partial vs nil doesn't change the + // outcome — but preserving the slice gives downstream debug logs + // an accurate count of what was enumerated before the cancel. if err := ctx.Err(); err != nil { return result, err } @@ -751,7 +756,12 @@ func collectFileExtents(ctx context.Context, rs io.ReadSeeker, inlineADs []byte, safety := 0 for { if err := ctx.Err(); err != nil { - slog.WarnContext(ctx, "UDF: AED chain truncated", + // Cancellation is a normal operator-initiated event (shutdown, + // per-ISO deadline, user-cancelled import) — log at Debug so + // it doesn't pollute monitoring dashboards. The peer WARN sites + // in this loop stay at WARN because they indicate genuinely + // corrupt or unreachable AEDs. + slog.DebugContext(ctx, "UDF: AED chain truncated", "reason", "context canceled", "extents_so_far", len(extents), "error", err) diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go index 36d9d024f..3fbcc4885 100644 --- a/internal/importer/archive/iso/fs_local_test.go +++ b/internal/importer/archive/iso/fs_local_test.go @@ -608,7 +608,7 @@ func TestUDFWalk_StopsWhenContextCanceled(t *testing.T) { // Write 3 FIDs back-to-back at dir[176..]. Each points at a unique // sector containing a tag-261 FE with a single short_ad; that the // walker NEVER reads these is exactly what this test asserts. - for i := 0; i < numFiles; i++ { + for i := range numFiles { off := 176 + i*fidLen fid := dir[off : off+fidLen] name := fmt.Sprintf("FILE%d.M2TS", i) // 10-11 ASCII bytes @@ -636,15 +636,25 @@ func TestUDFWalk_StopsWhenContextCanceled(t *testing.T) { close(done) }() + // 1-second deadline: the function should return in microseconds since + // ctx.Err() is checked before any I/O, but goroutine scheduling on a + // loaded CI runner can add tens of milliseconds. 1s is plenty of + // headroom while still failing fast if the cancellation check is + // genuinely missing. select { case <-done: - case <-time.After(100 * time.Millisecond): - t.Fatal("udfWalkAll did not return within 100ms of a canceled ctx — cancellation is not being honored at the entries-loop") + case <-time.After(1 * time.Second): + t.Fatal("udfWalkAll did not return within 1s of a canceled ctx — cancellation is not being honored at the entries-loop") } if !errors.Is(err, context.Canceled) { t.Fatalf("want err wrapping context.Canceled, got: %v", err) } + // The ctx check fires at the top of the loop BEFORE any FID is + // processed, so result is empty here. If cancel had happened + // mid-walk a non-empty partial result would also be valid — the + // production contract (udfWalkAll returns "what was collected so + // far" plus the cancel error) tolerates both shapes. if len(entries) != 0 { t.Fatalf("want empty result on cancel before any file processed, got %d entries: %+v", len(entries), entries) } From 1f2e6b50161df7c0c9fb0c19c549674866182573 Mon Sep 17 00:00:00 2001 From: javi11 Date: Tue, 26 May 2026 10:39:33 +0200 Subject: [PATCH 21/30] feat(iso): bound AnalyzeISO with a configurable per-ISO timeout A degraded NNTP provider could stall iso.AnalyzeISO for 9+ minutes per disc, blocking the whole importer. Wrap each AnalyzeISO call in a hard context.WithTimeout (default 120s, knob: Import.IsoAnalyzeTimeoutSeconds) so the existing fallback at iso_expansion.go takes over within a bounded window instead of waiting indefinitely. --- internal/config/accessors.go | 10 +++ internal/config/manager.go | 5 +- internal/importer/archive/iso/processor.go | 24 ++++++ .../importer/archive/iso/processor_test.go | 80 +++++++++++++++++++ internal/importer/archive/iso_expansion.go | 3 +- internal/importer/archive/rar/aggregator.go | 4 +- .../importer/archive/sevenzip/aggregator.go | 4 +- internal/importer/processor.go | 2 + 8 files changed, 128 insertions(+), 4 deletions(-) create mode 100644 internal/importer/archive/iso/processor_test.go diff --git a/internal/config/accessors.go b/internal/config/accessors.go index ccabbbec8..6ec1174d1 100644 --- a/internal/config/accessors.go +++ b/internal/config/accessors.go @@ -126,6 +126,16 @@ func (c *Config) GetReadTimeout() time.Duration { return time.Duration(c.GetReadTimeoutSeconds()) * time.Second } +// GetIsoAnalyzeTimeout returns the per-ISO analyse deadline with a 120s +// default fallback. This bounds the entire iso.AnalyzeISO walk so a +// degraded NNTP provider cannot stall the importer indefinitely. +func (c *Config) GetIsoAnalyzeTimeout() time.Duration { + if c.Import.IsoAnalyzeTimeoutSeconds == nil || *c.Import.IsoAnalyzeTimeoutSeconds <= 0 { + return 120 * time.Second + } + return time.Duration(*c.Import.IsoAnalyzeTimeoutSeconds) * time.Second +} + // GetMetadataBackupKeep returns the number of metadata backups to keep with a default fallback. func (c *Config) GetMetadataBackupKeep() int { if c.Metadata.Backup.KeepBackups <= 0 { diff --git a/internal/config/manager.go b/internal/config/manager.go index d708fcdc3..810a6e021 100644 --- a/internal/config/manager.go +++ b/internal/config/manager.go @@ -265,6 +265,7 @@ type ImportConfig struct { MaxDownloadPrefetch int `yaml:"max_download_prefetch" mapstructure:"max_download_prefetch" json:"max_download_prefetch"` SegmentSamplePercentage int `yaml:"segment_sample_percentage" mapstructure:"segment_sample_percentage" json:"segment_sample_percentage"` ReadTimeoutSeconds int `yaml:"read_timeout_seconds" mapstructure:"read_timeout_seconds" json:"read_timeout_seconds"` + IsoAnalyzeTimeoutSeconds *int `yaml:"iso_analyze_timeout_seconds" mapstructure:"iso_analyze_timeout_seconds" json:"iso_analyze_timeout_seconds,omitempty"` ImportStrategy ImportStrategy `yaml:"import_strategy" mapstructure:"import_strategy" json:"import_strategy"` ImportDir *string `yaml:"import_dir" mapstructure:"import_dir" json:"import_dir,omitempty"` WatchDir *string `yaml:"watch_dir" mapstructure:"watch_dir" json:"watch_dir,omitempty"` @@ -1247,6 +1248,7 @@ func DefaultConfig(configDir ...string) *Config { watchIntervalSeconds := 10 // Default watch interval failedItemRetentionHours := 24 // Default: auto-remove failed items after 24 hours historyRetentionDays := 90 // Default: auto-remove import history after 90 days (3 months) + isoAnalyzeTimeoutSeconds := 120 // Default: 120s hard cap per ISO analyse (prevents stuck NNTP from stalling import for 9+ minutes) cleanupAutomaticImportFailure := false metadataBackupEnabled := false failureMaskingEnabled := false @@ -1378,7 +1380,8 @@ func DefaultConfig(configDir ...string) *Config { MaxImportConnections: 5, // Default: 5 concurrent NNTP connections for validation and archive processing MaxDownloadPrefetch: 10, // Default: 10 segments prefetched ahead for archive analysis SegmentSamplePercentage: 1, // Default: 1% segment sampling - ReadTimeoutSeconds: 300, // Default: 5 minutes read timeout + ReadTimeoutSeconds: 300, // Default: 5 minutes read timeout + IsoAnalyzeTimeoutSeconds: &isoAnalyzeTimeoutSeconds, ImportStrategy: ImportStrategyNone, // Default: no import strategy (direct import) ImportDir: nil, // No default import directory WatchDir: nil, diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index bd9ec5738..c6fdd3b9f 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -27,8 +27,25 @@ func AnalyzeISO( poolManager pool.Manager, maxPrefetch int, readTimeout time.Duration, + analyzeTimeout time.Duration, allowedExtensions []string, ) (*AnalyzedISO, error) { + start := time.Now() + // Hard cap the whole walk. A degraded NNTP provider can otherwise stall + // AnalyzeISO for minutes per ISO. analyzeTimeout <= 0 disables the cap + // (used by tests that exercise other paths). + if analyzeTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, analyzeTimeout) + defer cancel() + } + // Fail fast when the deadline is already exceeded (e.g. caller passed a + // past deadline, or analyzeTimeout fired between WithTimeout and the + // first NNTP read). + if err := ctx.Err(); err != nil { + return nil, fmt.Errorf("iso: analysing %q: %w", src.Filename, err) + } + rs, closer, err := NewISOReadSeeker(ctx, src, poolManager, maxPrefetch, readTimeout) if err != nil { return nil, fmt.Errorf("iso: creating read seeker for %q: %w", src.Filename, err) @@ -62,6 +79,13 @@ func AnalyzeISO( } } + slog.InfoContext(ctx, "ISO analyse complete", + "filename", src.Filename, + "duration_seconds", time.Since(start).Seconds(), + "files", len(out.Files), + "main_feature_clips", len(out.MainFeature), + ) + return out, nil } diff --git a/internal/importer/archive/iso/processor_test.go b/internal/importer/archive/iso/processor_test.go new file mode 100644 index 000000000..2c7c048c4 --- /dev/null +++ b/internal/importer/archive/iso/processor_test.go @@ -0,0 +1,80 @@ +package iso + +import ( + "context" + "errors" + "testing" + "time" +) + +// TestAnalyzeISO_HonorsTimeout verifies the hard per-ISO deadline added by +// the IsoAnalyzeTimeoutSeconds config knob. A 1ns analyseTimeout must +// trip the context.WithTimeout, hit the fail-fast ctx.Err() check, and +// return a DeadlineExceeded-wrapped error within a few ms — well before +// any NNTP read could be attempted. +// +// Passing a nil pool.Manager is deliberate: if the timeout check fails +// to fire, NewISOReadSeeker would dereference it and crash, making the +// regression unmissable. +func TestAnalyzeISO_HonorsTimeout(t *testing.T) { + t.Parallel() + + src := ISOSource{Filename: "stuck.iso", Size: 1 << 30} + + start := time.Now() + _, err := AnalyzeISO( + context.Background(), + src, + nil, // pool.Manager — must NOT be reached + 0, + 0, + 1*time.Nanosecond, // analyzeTimeout + nil, + ) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected error from past-deadline AnalyzeISO, got nil") + } + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("expected error wrapping context.DeadlineExceeded, got: %v", err) + } + if elapsed > 50*time.Millisecond { + t.Fatalf("AnalyzeISO took %v with a 1ns timeout — fail-fast ctx check is not firing", elapsed) + } +} + +// TestAnalyzeISO_HonorsTimeout_PreCanceled covers the case where the +// caller's context is already canceled before AnalyzeISO is invoked. +// With analyzeTimeout==0 (cap disabled), the function still needs to +// surface the parent's cancellation without touching the pool. +func TestAnalyzeISO_HonorsTimeout_PreCanceled(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + src := ISOSource{Filename: "stuck.iso", Size: 1 << 30} + + start := time.Now() + _, err := AnalyzeISO( + ctx, + src, + nil, + 0, + 0, + 0, // analyzeTimeout=0 → cap disabled, parent ctx still canceled + nil, + ) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected error from pre-canceled AnalyzeISO, got nil") + } + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected error wrapping context.Canceled, got: %v", err) + } + if elapsed > 50*time.Millisecond { + t.Fatalf("AnalyzeISO took %v with pre-canceled ctx — fail-fast ctx check is not firing", elapsed) + } +} diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go index d9d9fbbf0..3896abbca 100644 --- a/internal/importer/archive/iso_expansion.go +++ b/internal/importer/archive/iso_expansion.go @@ -45,6 +45,7 @@ func ExpandISOContents( poolManager pool.Manager, maxPrefetch int, readTimeout time.Duration, + analyzeTimeout time.Duration, allowedExtensions []string, ) ([]Content, error) { if !expand { @@ -70,7 +71,7 @@ func ExpandISOContents( AesIV: c.AesIV, Size: c.Size, } - a, err := iso.AnalyzeISO(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) + a, err := iso.AnalyzeISO(ctx, src, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedExtensions) if err != nil { slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", "file", c.Filename, "error", err) diff --git a/internal/importer/archive/rar/aggregator.go b/internal/importer/archive/rar/aggregator.go index 91138405c..a2f8fd28b 100644 --- a/internal/importer/archive/rar/aggregator.go +++ b/internal/importer/archive/rar/aggregator.go @@ -139,6 +139,7 @@ type ProcessArchiveOptions struct { ExtractedFiles []parser.ExtractedFileInfo MaxPrefetch int ReadTimeout time.Duration + IsoAnalyzeTimeout time.Duration ExpandBlurayIso bool FilterSamples bool RenameToNzbName bool @@ -164,6 +165,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { extractedFiles := opts.ExtractedFiles maxPrefetch := opts.MaxPrefetch readTimeout := opts.ReadTimeout + analyzeTimeout := opts.IsoAnalyzeTimeout expandBlurayIso := opts.ExpandBlurayIso filterSamples := opts.FilterSamples renameToNzbName := opts.RenameToNzbName @@ -208,7 +210,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { } // Expand ISO files found inside the RAR archive into their inner media files - rarContents, err := archive.ExpandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + rarContents, err := archive.ExpandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedFileExtensions) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } diff --git a/internal/importer/archive/sevenzip/aggregator.go b/internal/importer/archive/sevenzip/aggregator.go index 4fbabce91..1e01ffafa 100644 --- a/internal/importer/archive/sevenzip/aggregator.go +++ b/internal/importer/archive/sevenzip/aggregator.go @@ -136,6 +136,7 @@ type ProcessArchiveOptions struct { ExtractedFiles []parser.ExtractedFileInfo MaxPrefetch int ReadTimeout time.Duration + IsoAnalyzeTimeout time.Duration ExpandBlurayIso bool FilterSamples bool RenameToNzbName bool @@ -161,6 +162,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { extractedFiles := opts.ExtractedFiles maxPrefetch := opts.MaxPrefetch readTimeout := opts.ReadTimeout + analyzeTimeout := opts.IsoAnalyzeTimeout expandBlurayIso := opts.ExpandBlurayIso filterSamples := opts.FilterSamples renameToNzbName := opts.RenameToNzbName @@ -184,7 +186,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { slog.InfoContext(ctx, "Successfully analyzed 7zip archive content", "files_in_archive", len(sevenZipContents)) // Expand ISO files found inside the 7zip archive into their inner media files - sevenZipContents, err = archive.ExpandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + sevenZipContents, err = archive.ExpandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedFileExtensions) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } diff --git a/internal/importer/processor.go b/internal/importer/processor.go index a2e47742e..94e98b756 100644 --- a/internal/importer/processor.go +++ b/internal/importer/processor.go @@ -639,6 +639,7 @@ func (proc *Processor) processRarArchive( ExtractedFiles: extractedFiles, MaxPrefetch: maxPrefetch, ReadTimeout: readTimeout, + IsoAnalyzeTimeout: proc.configGetter().GetIsoAnalyzeTimeout(), ExpandBlurayIso: expandBlurayIso, FilterSamples: filterSampleFiles, RenameToNzbName: renameToNzbName, @@ -777,6 +778,7 @@ func (proc *Processor) processSevenZipArchive( ExtractedFiles: extractedFiles, MaxPrefetch: maxPrefetch, ReadTimeout: readTimeout, + IsoAnalyzeTimeout: proc.configGetter().GetIsoAnalyzeTimeout(), ExpandBlurayIso: expandBlurayIso, FilterSamples: filterSampleFiles, RenameToNzbName: renameToNzbName, From e8a6f407dc9025b2a197226df39a053b89a4d5c3 Mon Sep 17 00:00:00 2001 From: javi11 Date: Tue, 26 May 2026 10:43:32 +0200 Subject: [PATCH 22/30] refactor(iso): consolidate AnalyzeISO success log, document timeout=0 sentinel --- internal/config/accessors.go | 7 +++++++ internal/importer/archive/iso/processor.go | 16 ++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/internal/config/accessors.go b/internal/config/accessors.go index 6ec1174d1..30ba5d33d 100644 --- a/internal/config/accessors.go +++ b/internal/config/accessors.go @@ -129,6 +129,13 @@ func (c *Config) GetReadTimeout() time.Duration { // GetIsoAnalyzeTimeout returns the per-ISO analyse deadline with a 120s // default fallback. This bounds the entire iso.AnalyzeISO walk so a // degraded NNTP provider cannot stall the importer indefinitely. +// +// Sentinel handling: +// - nil (config field unset) → 120s default +// - 0 or negative (explicit "none") → 120s default; users cannot disable +// the cap — the whole purpose of this knob is to prevent unbounded +// waits. To approximate "unlimited", set a very large value (e.g. +// 86400 for a one-day budget). func (c *Config) GetIsoAnalyzeTimeout() time.Duration { if c.Import.IsoAnalyzeTimeoutSeconds == nil || *c.Import.IsoAnalyzeTimeoutSeconds <= 0 { return 120 * time.Second diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index c6fdd3b9f..1b96b29fe 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -57,12 +57,6 @@ func AnalyzeISO( return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } - slog.InfoContext(ctx, "ISO analysed", - "filename", src.Filename, - "iso_size_bytes", src.Size, - "files", len(entries), - ) - out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} for _, e := range entries { @@ -79,11 +73,17 @@ func AnalyzeISO( } } - slog.InfoContext(ctx, "ISO analyse complete", + // Single completion log: raw entry count, filtered file count, BD clip + // count, and total time. Previously this function emitted two separate + // INFO lines per successful analysis ("ISO analysed" + "ISO analyse + // complete"); they're consolidated here. + slog.InfoContext(ctx, "ISO analysed", "filename", src.Filename, - "duration_seconds", time.Since(start).Seconds(), + "iso_size_bytes", src.Size, + "entries", len(entries), "files", len(out.Files), "main_feature_clips", len(out.MainFeature), + "duration_seconds", time.Since(start).Seconds(), ) return out, nil From af2dbaeaffaeb1f88fa1b47e94f3fbc96154a86b Mon Sep 17 00:00:00 2001 From: javi11 Date: Wed, 27 May 2026 08:47:38 +0200 Subject: [PATCH 23/30] perf(metadata): partial-read lite fields in ReadFileMetadataLite to fix 32GB PROPFIND memory leak --- internal/metadata/service.go | 122 ++++++++++++++++++++++++++++-- internal/metadata/service_test.go | 107 ++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 5 deletions(-) diff --git a/internal/metadata/service.go b/internal/metadata/service.go index 3f099d0f8..35fffd9ce 100644 --- a/internal/metadata/service.go +++ b/internal/metadata/service.go @@ -14,6 +14,7 @@ import ( lru "github.com/hashicorp/golang-lru/v2" metapb "github.com/javi11/altmount/internal/metadata/proto" "github.com/javi11/altmount/internal/utils" + "google.golang.org/protobuf/encoding/protowire" "google.golang.org/protobuf/proto" ) @@ -167,16 +168,129 @@ func (ms *MetadataService) ReadFileMetadata(virtualPath string) (*metapb.FileMet return metadata, nil } +// liteScanBytes is how much of a .meta file we read up front when serving a +// directory listing. The lite fields (file_size=1, status=3, modified_at=5) +// are all varints near the start of the proto; the only intervening field +// that can be large is source_nzb_path=2 (a string). 4 KiB is comfortable +// headroom — virtually every real-world .meta has all three within the first +// ~200 bytes. Avoids reading and unmarshalling the full proto (which can be +// MBs for files with many NestedSources or SegmentData entries — the exact +// pattern that caused a 7.94 GB allocation spike during FileBrowser +// recursive PROPFIND walks). +const liteScanBytes = 4096 + // ReadFileMetadataLite reads only the lightweight fields (size, modtime, status) -// needed for directory listings. It uses a separate cache so that Readdir does not -// pull full FileMetadata protos (with SegmentData, etc.) into the main cache. +// needed for directory listings. On cache miss it reads at most liteScanBytes +// from the .meta file and scans the proto wire format for the three lite +// fields, never instantiating the full FileMetadata proto or its +// NestedSources/SegmentData slices. Falls back to a full read in the rare +// case the partial buffer doesn't cover the lite fields. func (ms *MetadataService) ReadFileMetadataLite(virtualPath string) (*FileMetadataLite, error) { // Check lite cache first if cached, ok := ms.liteCache.Get(virtualPath); ok { return cached, nil } - // Cache miss — read from disk and deserialize + // Cache miss — read the head of the file and scan wire-format fields. + filename := filepath.Base(virtualPath) + metadataDir := filepath.Join(ms.rootPath, filepath.Dir(virtualPath)) + metadataPath := filepath.Join(metadataDir, filename+".meta") + + f, err := os.Open(metadataPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("failed to open metadata file: %w", err) + } + defer f.Close() + + buf := make([]byte, liteScanBytes) + n, err := io.ReadFull(f, buf) + if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF { + return nil, fmt.Errorf("failed to read metadata head: %w", err) + } + buf = buf[:n] + + lite, ok := parseLiteFields(buf) + if !ok { + // Lite fields not located within liteScanBytes (extreme/unusual + // source_nzb_path length, future schema reordering, etc). Fall back + // to the full read so the listing is correct even at the cost of + // transient allocation. + return ms.readFileMetadataLiteFull(virtualPath) + } + ms.liteCache.Add(virtualPath, lite) + return lite, nil +} + +// parseLiteFields walks proto wire format inside buf and extracts the lite +// fields without allocating a full FileMetadata struct. Returns (lite, true) +// once both file_size (field 1) and status (field 3) are seen — modified_at +// (field 5) is best-effort within the same buffer. Returns (nil, false) if +// the buffer is exhausted without the required fields, signalling the +// caller to fall back to a full read. +// +// Field numbers must match metadata.proto. Tested via TestReadFileMetadataLite_* +// in service_test.go. +func parseLiteFields(buf []byte) (*FileMetadataLite, bool) { + var lite FileMetadataLite + var sawFileSize, sawStatus bool + for len(buf) > 0 { + num, typ, tagLen := protowire.ConsumeTag(buf) + if tagLen < 0 { + return nil, false + } + buf = buf[tagLen:] + switch num { + case 1: // file_size int64 (varint) + v, l := protowire.ConsumeVarint(buf) + if l < 0 { + return nil, false + } + lite.FileSize = int64(v) + sawFileSize = true + buf = buf[l:] + case 3: // status FileStatus (varint enum) + v, l := protowire.ConsumeVarint(buf) + if l < 0 { + return nil, false + } + lite.Status = metapb.FileStatus(v) + sawStatus = true + buf = buf[l:] + case 5: // modified_at int64 (varint) + v, l := protowire.ConsumeVarint(buf) + if l < 0 { + return nil, false + } + lite.ModifiedAt = int64(v) + buf = buf[l:] + default: + l := protowire.ConsumeFieldValue(num, typ, buf) + if l < 0 { + return nil, false + } + buf = buf[l:] + } + // Early exit once required fields are captured. modified_at is + // best-effort within the partial buffer; if it sits past + // liteScanBytes it stays zero and the listing still renders. + if sawFileSize && sawStatus && lite.ModifiedAt != 0 { + return &lite, true + } + } + if sawFileSize && sawStatus { + return &lite, true + } + return nil, false +} + +// readFileMetadataLiteFull is the legacy slow path: read the entire .meta +// file and unmarshal the full proto. Only used as a fallback when the +// partial-read scan in ReadFileMetadataLite fails to locate the lite +// fields within liteScanBytes. +func (ms *MetadataService) readFileMetadataLiteFull(virtualPath string) (*FileMetadataLite, error) { filename := filepath.Base(virtualPath) metadataDir := filepath.Join(ms.rootPath, filepath.Dir(virtualPath)) metadataPath := filepath.Join(metadataDir, filename+".meta") @@ -194,14 +308,12 @@ func (ms *MetadataService) ReadFileMetadataLite(virtualPath string) (*FileMetada return nil, fmt.Errorf("failed to unmarshal metadata: %w", err) } - // Store only the lightweight version — let the full proto be GC'd lite := &FileMetadataLite{ FileSize: metadata.FileSize, ModifiedAt: metadata.ModifiedAt, Status: metadata.Status, } ms.liteCache.Add(virtualPath, lite) - return lite, nil } diff --git a/internal/metadata/service_test.go b/internal/metadata/service_test.go index 3f684e2ee..8d40a871a 100644 --- a/internal/metadata/service_test.go +++ b/internal/metadata/service_test.go @@ -151,3 +151,110 @@ func TestCleanupOrphanedIDSymlinks_ContextCancellation(t *testing.T) { _, err := ms.CleanupOrphanedIDSymlinks(ctx) assert.ErrorIs(t, err, context.Canceled) } + +// TestReadFileMetadataLite_DoesNotReadFullProto pins the fast path: when the +// `.meta` proto is multi-MB (because the file has thousands of NestedSources +// or SegmentData entries — the exact shape that caused a 7.94 GB +// PROPFIND allocation spike), ReadFileMetadataLite must read only the head +// of the file and never instantiate the giant proto. We measure this via +// the file size we write vs. the bytes read by the lite path. +func TestReadFileMetadataLite_DoesNotReadFullProto(t *testing.T) { + root := t.TempDir() + ms := NewMetadataService(root) + + virtualPath := filepath.Join("movies", "huge.m2ts") + + // Build a FileMetadata with thousands of NestedSources so the on-disk + // proto is hundreds of KB — large enough that a regression to the + // full os.ReadFile + proto.Unmarshal path would allocate >>liteScanBytes + // and be caught by the heap-delta assertion below. + nested := make([]*metapb.NestedSegmentSource, 0, 5000) + for i := range 5000 { + nested = append(nested, &metapb.NestedSegmentSource{ + Segments: []*metapb.SegmentData{ + {Id: "msg-id-with-a-typical-length@server.example", StartOffset: int64(i * 1024), EndOffset: int64((i + 1) * 1024), SegmentSize: 1024}, + }, + InnerOffset: 0, + InnerLength: 1024, + InnerVolumeSize: 1024, + }) + } + meta := ms.CreateFileMetadata( + 17_860_995_072, "Avatar.nzb", metapb.FileStatus_FILE_STATUS_HEALTHY, + nil, metapb.Encryption_NONE, "", "", nil, nil, 0, nil, "huge-nzbdav-id", + ) + meta.NestedSources = nested + require.NoError(t, ms.WriteFileMetadata(virtualPath, meta)) + + // Confirm the on-disk file is at least 200 KB — the partial-read + // budget is 4 KB so anything substantially larger gives the heap-delta + // assertion enough headroom to catch a regression. + stat, err := os.Stat(ms.GetMetadataFilePath(virtualPath)) + require.NoError(t, err) + require.Greater(t, stat.Size(), int64(200<<10), "test setup should produce a >200KB .meta file to make the fast-path savings observable") + + // Drop the liteCache entry written by WriteFileMetadata so we hit the + // disk-read path under test. + ms.liteCache.Purge() + + // Snapshot heap allocations before / after the call. The full-read + // implementation would allocate at least stat.Size() bytes (for the + // os.ReadFile buffer) plus the unmarshalled proto. The partial-read + // implementation should allocate well under 64 KiB. + runtime.GC() + var before, after runtime.MemStats + runtime.ReadMemStats(&before) + + lite, err := ms.ReadFileMetadataLite(virtualPath) + require.NoError(t, err) + require.NotNil(t, lite) + + runtime.ReadMemStats(&after) + delta := after.TotalAlloc - before.TotalAlloc + t.Logf("ReadFileMetadataLite allocated %d bytes (on-disk .meta = %d bytes)", delta, stat.Size()) + + // Correctness: lite must reflect the values we wrote. + assert.Equal(t, int64(17_860_995_072), lite.FileSize) + assert.Equal(t, metapb.FileStatus_FILE_STATUS_HEALTHY, lite.Status) + + // Regression guard: the fast path must allocate dramatically less than + // the full file. Use 5× liteScanBytes as a comfortable upper bound that + // still catches a regression where the implementation re-reads the + // whole file. + const maxExpectedAlloc = 5 * liteScanBytes + assert.LessOrEqualf(t, delta, uint64(maxExpectedAlloc), + "ReadFileMetadataLite allocated %d bytes — should be ≤ %d. A regression to the full os.ReadFile + proto.Unmarshal would allocate >= the on-disk size (%d).", + delta, maxExpectedAlloc, stat.Size()) +} + +// TestReadFileMetadataLite_FallsBackOnLongHeader covers the edge where the +// lite fields aren't reachable within liteScanBytes (e.g., a future schema +// change places one after a very large field). The fallback path produces +// the same correct lite struct, just by reading the full file. +func TestReadFileMetadataLite_FallsBackOnLongHeader(t *testing.T) { + root := t.TempDir() + ms := NewMetadataService(root) + + virtualPath := filepath.Join("movies", "long-header.mkv") + + // Craft a SourceNzbPath longer than liteScanBytes so the lite fields + // after it (status, modified_at) fall past the partial-read window. + // file_size (field 1) is before it, so the partial-read scan sees + // FileSize but not Status/ModifiedAt → falls back to full read. + longPath := make([]byte, liteScanBytes+512) + for i := range longPath { + longPath[i] = 'a' + } + meta := ms.CreateFileMetadata( + 1234, string(longPath), metapb.FileStatus_FILE_STATUS_HEALTHY, + nil, metapb.Encryption_NONE, "", "", nil, nil, 0, nil, "fallback-id", + ) + require.NoError(t, ms.WriteFileMetadata(virtualPath, meta)) + ms.liteCache.Purge() + + lite, err := ms.ReadFileMetadataLite(virtualPath) + require.NoError(t, err) + require.NotNil(t, lite) + assert.Equal(t, int64(1234), lite.FileSize) + assert.Equal(t, metapb.FileStatus_FILE_STATUS_HEALTHY, lite.Status) +} From f7e8295b2b35eaa91643df9389989f1fc1c0bf7e Mon Sep 17 00:00:00 2001 From: javi11 Date: Wed, 27 May 2026 09:01:00 +0200 Subject: [PATCH 24/30] perf(metadata): dedupe outer segments across NestedSources to shrink encrypted-ISO .meta from 8GB to ~6MB --- internal/importer/archive/content_metadata.go | 109 +++++++++++++- .../importer/archive/content_metadata_test.go | 135 ++++++++++++++++++ internal/metadata/expand.go | 42 ++++++ internal/metadata/proto/metadata.pb.go | 57 ++++++-- internal/metadata/proto/metadata.proto | 18 +++ internal/metadata/service.go | 8 ++ 6 files changed, 351 insertions(+), 18 deletions(-) create mode 100644 internal/metadata/expand.go diff --git a/internal/importer/archive/content_metadata.go b/internal/importer/archive/content_metadata.go index f3afa915b..221e9dc3f 100644 --- a/internal/importer/archive/content_metadata.go +++ b/internal/importer/archive/content_metadata.go @@ -2,6 +2,7 @@ package archive import ( "time" + "unsafe" metapb "github.com/javi11/altmount/internal/metadata/proto" ) @@ -43,17 +44,113 @@ func NewFileMetadataFromContent( meta.AesIv = content.AesIV } - // Populate nested sources for encrypted nested archive files - for _, ns := range content.NestedSources { - meta.NestedSources = append(meta.NestedSources, &metapb.NestedSegmentSource{ + // Populate nested sources. For multi-extent encrypted volumes (e.g. a + // Blu-ray main feature with hundreds of extents that all read from the + // same encrypted RAR) every NestedSource shares the same Segments slice + // in memory. Serialising them naïvely duplicates the segment list per + // extent — for Avatar 3D that produced an 8 GB .meta file. We dedupe + // here by detecting shared segment-list backing arrays and emitting + // one entry in meta.SharedOuterSources per unique group; each + // NestedSource then carries only its inner_offset + inner_length plus + // a 1-based shared_outer_source_index. Sources without sharing fall + // through to the legacy on-disk layout so old code paths are unaffected. + appendNestedSourcesWithDedupe(meta, content.NestedSources) + + return meta +} + +// nestedSourceShareKey identifies a NestedSource by the backing array of its +// Segments slice plus the AES key/IV and inner volume size. Sources with the +// same key can share one entry in FileMetadata.SharedOuterSources. +type nestedSourceShareKey struct { + segmentsPtr uintptr + segmentsLen int + aesKey string + aesIv string + innerVolumeSize int64 +} + +// shareKeyFor builds a sharing key. It uses the backing-array pointer of +// the Segments slice (cheap O(1) check) plus the slice length to catch +// accidental pointer reuse across distinct slices. The AES key/iv and +// inner_volume_size complete the identity — two sources are only +// shareable when those match exactly. +func shareKeyFor(ns NestedSource) nestedSourceShareKey { + var ptr uintptr + if len(ns.Segments) > 0 { + ptr = uintptr(unsafe.Pointer(unsafe.SliceData(ns.Segments))) + } + return nestedSourceShareKey{ + segmentsPtr: ptr, + segmentsLen: len(ns.Segments), + aesKey: string(ns.AesKey), + aesIv: string(ns.AesIV), + innerVolumeSize: ns.InnerVolumeSize, + } +} + +// appendNestedSourcesWithDedupe writes the NestedSources into meta, +// deduplicating shared outer-segment data into meta.SharedOuterSources. +// When fewer than two sources qualify for sharing (e.g. a single source, +// or every source has a unique segment list) the legacy layout is used: +// every NestedSegmentSource carries its own Segments + AesKey + AesIv. +func appendNestedSourcesWithDedupe(meta *metapb.FileMetadata, sources []NestedSource) { + if len(sources) == 0 { + return + } + + // First pass: count how many sources share each key. Only keys that + // appear in >= 2 sources are worth deduping (single-use keys cost more + // to store as shared entries than as inline data). + counts := make(map[nestedSourceShareKey]int, len(sources)) + for _, ns := range sources { + if len(ns.Segments) == 0 { + continue + } + counts[shareKeyFor(ns)]++ + } + + // Build the SharedOuterSources slice, preserving first-appearance order. + keyToIndex := make(map[nestedSourceShareKey]int32, len(counts)) + for _, ns := range sources { + if len(ns.Segments) == 0 { + continue + } + key := shareKeyFor(ns) + if counts[key] < 2 { + continue + } + if _, seen := keyToIndex[key]; seen { + continue + } + meta.SharedOuterSources = append(meta.SharedOuterSources, &metapb.NestedSegmentSource{ Segments: ns.Segments, AesKey: ns.AesKey, AesIv: ns.AesIV, - InnerOffset: ns.InnerOffset, - InnerLength: ns.InnerLength, InnerVolumeSize: ns.InnerVolumeSize, }) + keyToIndex[key] = int32(len(meta.SharedOuterSources)) // 1-based } - return meta + // Second pass: emit one NestedSegmentSource per input, referencing + // the shared entry where applicable. + for _, ns := range sources { + entry := &metapb.NestedSegmentSource{ + InnerOffset: ns.InnerOffset, + InnerLength: ns.InnerLength, + } + if idx, ok := keyToIndex[shareKeyFor(ns)]; ok && len(ns.Segments) > 0 { + entry.SharedOuterSourceIndex = idx + } else { + entry.Segments = ns.Segments + entry.AesKey = ns.AesKey + entry.AesIv = ns.AesIV + entry.InnerVolumeSize = ns.InnerVolumeSize + } + meta.NestedSources = append(meta.NestedSources, entry) + } } + +// The read-side counterpart of the dedupe written here lives in +// internal/metadata.ExpandSharedOuterSources — called from +// MetadataService.ReadFileMetadata after proto.Unmarshal. diff --git a/internal/importer/archive/content_metadata_test.go b/internal/importer/archive/content_metadata_test.go index 9be716d7e..fcbb8b615 100644 --- a/internal/importer/archive/content_metadata_test.go +++ b/internal/importer/archive/content_metadata_test.go @@ -2,8 +2,10 @@ package archive import ( "testing" + "unsafe" metapb "github.com/javi11/altmount/internal/metadata/proto" + "google.golang.org/protobuf/proto" ) func TestNewFileMetadataFromContent_PreservesNestedSources(t *testing.T) { @@ -72,3 +74,136 @@ func TestNewFileMetadataFromContent_SetsAESWhenKeyPresent(t *testing.T) { t.Errorf("AesIv not propagated") } } + +// TestNewFileMetadataFromContent_DedupesSharedOuterSources pins the +// encrypted-multi-extent fix. Mimics the Avatar 3D shape: many +// NestedSources sharing the SAME outer-segment slice plus the same AES +// key/iv. Before the dedupe writer landed, marshalling this proto +// produced an 8 GB .meta on disk. The fix must: +// +// 1. Marshal to a size proportional to len(outer-segments) + len(extents), +// NOT len(outer-segments) × len(extents). +// 2. Round-trip cleanly: after Unmarshal + ExpandSharedOuterSources, all +// nested sources must point to the same underlying segments backing +// array (verified via unsafe.SliceData pointer equality), and per- +// source InnerOffset/InnerLength must be preserved exactly. +func TestNewFileMetadataFromContent_DedupesSharedOuterSources(t *testing.T) { + // Build an outer segment list large enough that duplicating it across + // 100 sources would cost ~5 MB if no dedupe ran. With dedupe the + // marshalled size is dominated by the one shared copy. + const numSegments = 1000 + const numExtents = 100 + outerSegs := make([]*metapb.SegmentData, numSegments) + for i := range outerSegs { + outerSegs[i] = &metapb.SegmentData{ + Id: "msg-id-of-typical-length@news.example.com", + StartOffset: int64(i) * 1024, + EndOffset: int64(i+1)*1024 - 1, + SegmentSize: 1024, + } + } + + nested := make([]NestedSource, 0, numExtents) + for i := range numExtents { + nested = append(nested, NestedSource{ + Segments: outerSegs, // SAME slice header — the dedupe target + AesKey: []byte{0xAA, 0xBB, 0xCC, 0xDD}, + AesIV: []byte{0x11, 0x22, 0x33, 0x44}, + InnerOffset: int64(i) * 4096, + InnerLength: 4096, + InnerVolumeSize: int64(numSegments * 1024), + }) + } + + content := Content{ + Filename: "huge.m2ts", + Size: int64(numExtents * 4096), + NestedSources: nested, + } + + meta := NewFileMetadataFromContent(content, "/nzb", 0, "") + + // Marshal the proto and assert the on-disk size reflects dedupe. + marshalled, err := proto.Marshal(meta) + if err != nil { + t.Fatalf("proto.Marshal: %v", err) + } + t.Logf("marshalled .meta size: %d bytes (%d segments × %d extents)", len(marshalled), numSegments, numExtents) + + // Estimate the marshalled size of one shared outer source: ~85 bytes + // per SegmentData on the wire × 1000 segments ≈ 85 KB. Plus 100 + // thin nested sources at ~30 bytes ≈ 3 KB. Plus header overhead. + // A regression to per-source duplication would produce ~8.5 MB + // (100 × 85 KB). Use 500 KB as a generous ceiling that catches any + // duplication regression. + const maxAllowed = 500 * 1024 + if len(marshalled) > maxAllowed { + t.Fatalf("marshalled proto is %d bytes — expected ≤ %d. Dedupe is not working; each NestedSource is duplicating the outer segments list.", + len(marshalled), maxAllowed) + } + + // Round-trip: unmarshal + expand, then verify all NestedSources point + // at the SAME segments backing array (pointer equality via + // unsafe.SliceData) so RAM cost stays at one shared array. + decoded := &metapb.FileMetadata{} + if err := proto.Unmarshal(marshalled, decoded); err != nil { + t.Fatalf("proto.Unmarshal: %v", err) + } + + if len(decoded.SharedOuterSources) != 1 { + t.Fatalf("SharedOuterSources = %d, want 1 (all extents share the same outer)", len(decoded.SharedOuterSources)) + } + if len(decoded.NestedSources) != numExtents { + t.Fatalf("NestedSources = %d, want %d", len(decoded.NestedSources), numExtents) + } + + // Before expansion: every nested source should have empty segments + // and a non-zero SharedOuterSourceIndex. + for i, ns := range decoded.NestedSources { + if len(ns.Segments) != 0 { + t.Errorf("nested source %d: expected empty Segments before expansion, got %d", i, len(ns.Segments)) + } + if ns.SharedOuterSourceIndex != 1 { + t.Errorf("nested source %d: SharedOuterSourceIndex = %d, want 1", i, ns.SharedOuterSourceIndex) + } + } + + // Reuse the production expand helper via the metadata package — but + // to avoid a test-time import cycle we inline an equivalent walk + // here. (The real read path in metadata.ReadFileMetadata calls + // metadata.ExpandSharedOuterSources, which performs the same walk.) + for _, ns := range decoded.NestedSources { + idx := int(ns.SharedOuterSourceIndex) - 1 + shared := decoded.SharedOuterSources[idx] + ns.Segments = shared.Segments + ns.AesKey = shared.AesKey + ns.AesIv = shared.AesIv + if ns.InnerVolumeSize == 0 { + ns.InnerVolumeSize = shared.InnerVolumeSize + } + } + + // After expansion: per-source offsets/lengths preserved. + for i, ns := range decoded.NestedSources { + if ns.InnerOffset != int64(i)*4096 { + t.Errorf("nested source %d: InnerOffset = %d, want %d", i, ns.InnerOffset, int64(i)*4096) + } + if ns.InnerLength != 4096 { + t.Errorf("nested source %d: InnerLength = %d, want 4096", i, ns.InnerLength) + } + if len(ns.Segments) != numSegments { + t.Errorf("nested source %d: post-expand Segments = %d, want %d", i, len(ns.Segments), numSegments) + } + } + + // All nested sources should share the same underlying segments + // backing array — proves the expansion didn't deep-copy. + firstBacking := uintptr(unsafe.Pointer(unsafe.SliceData(decoded.NestedSources[0].Segments))) + for i := 1; i < len(decoded.NestedSources); i++ { + thisBacking := uintptr(unsafe.Pointer(unsafe.SliceData(decoded.NestedSources[i].Segments))) + if firstBacking != thisBacking { + t.Errorf("nested source %d: expected shared backing array, got distinct pointer (was %x now %x)", i, firstBacking, thisBacking) + break + } + } +} diff --git a/internal/metadata/expand.go b/internal/metadata/expand.go new file mode 100644 index 000000000..f6b803775 --- /dev/null +++ b/internal/metadata/expand.go @@ -0,0 +1,42 @@ +package metadata + +import ( + "fmt" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// ExpandSharedOuterSources resolves NestedSegmentSource.SharedOuterSourceIndex +// references in-place. Sources with a non-zero index inherit Segments, AesKey, +// AesIv, and (if unset) InnerVolumeSize from +// meta.SharedOuterSources[index-1]. Slice headers share their underlying +// array — RAM cost is unchanged from the legacy layout. Safe to call on any +// FileMetadata; a no-op when SharedOuterSources is empty. +// +// The dedupe written by archive.NewFileMetadataFromContent is the +// write-side counterpart of this expansion. +func ExpandSharedOuterSources(meta *metapb.FileMetadata) error { + if len(meta.SharedOuterSources) == 0 { + return nil + } + for _, ns := range meta.NestedSources { + if ns.SharedOuterSourceIndex == 0 { + continue + } + idx := int(ns.SharedOuterSourceIndex) - 1 + if idx < 0 || idx >= len(meta.SharedOuterSources) { + return fmt.Errorf( + "metadata: nested source references shared_outer_source_index %d but only %d shared outer source(s) are defined", + ns.SharedOuterSourceIndex, len(meta.SharedOuterSources), + ) + } + shared := meta.SharedOuterSources[idx] + ns.Segments = shared.Segments + ns.AesKey = shared.AesKey + ns.AesIv = shared.AesIv + if ns.InnerVolumeSize == 0 { + ns.InnerVolumeSize = shared.InnerVolumeSize + } + } + return nil +} diff --git a/internal/metadata/proto/metadata.pb.go b/internal/metadata/proto/metadata.pb.go index 05a58b7e6..6a997a8bd 100644 --- a/internal/metadata/proto/metadata.pb.go +++ b/internal/metadata/proto/metadata.pb.go @@ -263,8 +263,15 @@ type NestedSegmentSource struct { InnerOffset int64 `protobuf:"varint,4,opt,name=inner_offset,json=innerOffset,proto3" json:"inner_offset,omitempty"` // Offset within decrypted inner volume where file data starts InnerLength int64 `protobuf:"varint,5,opt,name=inner_length,json=innerLength,proto3" json:"inner_length,omitempty"` // Bytes of target file in this source InnerVolumeSize int64 `protobuf:"varint,6,opt,name=inner_volume_size,json=innerVolumeSize,proto3" json:"inner_volume_size,omitempty"` // Total decrypted size of inner volume (for AES cipher) - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // When > 0 the `segments`, `aes_key`, `aes_iv`, `inner_volume_size` + // fields above are intentionally left empty on disk and inherit from + // FileMetadata.shared_outer_sources[shared_outer_source_index - 1]. + // Only `inner_offset` and `inner_length` are stored per-extent. 0 + // (proto default) means "no sharing" — identical to the legacy on-disk + // layout, so old .meta files keep working without migration. + SharedOuterSourceIndex int32 `protobuf:"varint,7,opt,name=shared_outer_source_index,json=sharedOuterSourceIndex,proto3" json:"shared_outer_source_index,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *NestedSegmentSource) Reset() { @@ -339,6 +346,13 @@ func (x *NestedSegmentSource) GetInnerVolumeSize() int64 { return 0 } +func (x *NestedSegmentSource) GetSharedOuterSourceIndex() int32 { + if x != nil { + return x.SharedOuterSourceIndex + } + return 0 +} + // FileMetadata represents a single virtual file in the filesystem // The filename comes from the actual metadata filename on disk type FileMetadata struct { @@ -358,8 +372,17 @@ type FileMetadata struct { Par2Files []*Par2FileReference `protobuf:"bytes,13,rep,name=par2_files,json=par2Files,proto3" json:"par2_files,omitempty"` // Associated PAR2 repair files NzbdavId string `protobuf:"bytes,14,opt,name=nzbdav_id,json=nzbdavId,proto3" json:"nzbdav_id,omitempty"` // ID to maintain compatibility with nzbdav NestedSources []*NestedSegmentSource `protobuf:"bytes,15,rep,name=nested_sources,json=nestedSources,proto3" json:"nested_sources,omitempty"` // Nested RAR sources (when file is inside inner RAR within outer RAR) - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // Outer sources shared by groups of NestedSegmentSource entries. + // Used for multi-extent encrypted volumes — e.g. a Blu-ray main feature + // with hundreds of extents that all read from the same encrypted RAR. + // Each entry holds the full Segments + AesKey + AesIv + InnerVolumeSize + // once; the corresponding NestedSegmentSource entries reference it by + // 1-based index via shared_outer_source_index, storing only + // inner_offset + inner_length per-extent. Cuts the on-disk .meta size + // from O(extents * segments) to O(extents + segments) for these files. + SharedOuterSources []*NestedSegmentSource `protobuf:"bytes,16,rep,name=shared_outer_sources,json=sharedOuterSources,proto3" json:"shared_outer_sources,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *FileMetadata) Reset() { @@ -497,6 +520,13 @@ func (x *FileMetadata) GetNestedSources() []*NestedSegmentSource { return nil } +func (x *FileMetadata) GetSharedOuterSources() []*NestedSegmentSource { + if x != nil { + return x.SharedOuterSources + } + return nil +} + var File_internal_metadata_proto_metadata_proto protoreflect.FileDescriptor const file_internal_metadata_proto_metadata_proto_rawDesc = "" + @@ -511,14 +541,15 @@ const file_internal_metadata_proto_metadata_proto_rawDesc = "" + "\x11Par2FileReference\x12\x1a\n" + "\bfilename\x18\x01 \x01(\tR\bfilename\x12\x1b\n" + "\tfile_size\x18\x02 \x01(\x03R\bfileSize\x128\n" + - "\fsegment_data\x18\x03 \x03(\v2\x15.metadata.SegmentDataR\vsegmentData\"\xea\x01\n" + + "\fsegment_data\x18\x03 \x03(\v2\x15.metadata.SegmentDataR\vsegmentData\"\xa5\x02\n" + "\x13NestedSegmentSource\x121\n" + "\bsegments\x18\x01 \x03(\v2\x15.metadata.SegmentDataR\bsegments\x12\x17\n" + "\aaes_key\x18\x02 \x01(\fR\x06aesKey\x12\x15\n" + "\x06aes_iv\x18\x03 \x01(\fR\x05aesIv\x12!\n" + "\finner_offset\x18\x04 \x01(\x03R\vinnerOffset\x12!\n" + "\finner_length\x18\x05 \x01(\x03R\vinnerLength\x12*\n" + - "\x11inner_volume_size\x18\x06 \x01(\x03R\x0finnerVolumeSize\"\xd3\x04\n" + + "\x11inner_volume_size\x18\x06 \x01(\x03R\x0finnerVolumeSize\x129\n" + + "\x19shared_outer_source_index\x18\a \x01(\x05R\x16sharedOuterSourceIndex\"\xa4\x05\n" + "\fFileMetadata\x12\x1b\n" + "\tfile_size\x18\x01 \x01(\x03R\bfileSize\x12&\n" + "\x0fsource_nzb_path\x18\x02 \x01(\tR\rsourceNzbPath\x12,\n" + @@ -540,7 +571,8 @@ const file_internal_metadata_proto_metadata_proto_rawDesc = "" + "\n" + "par2_files\x18\r \x03(\v2\x1b.metadata.Par2FileReferenceR\tpar2Files\x12\x1b\n" + "\tnzbdav_id\x18\x0e \x01(\tR\bnzbdavId\x12D\n" + - "\x0enested_sources\x18\x0f \x03(\v2\x1d.metadata.NestedSegmentSourceR\rnestedSources*8\n" + + "\x0enested_sources\x18\x0f \x03(\v2\x1d.metadata.NestedSegmentSourceR\rnestedSources\x12O\n" + + "\x14shared_outer_sources\x18\x10 \x03(\v2\x1d.metadata.NestedSegmentSourceR\x12sharedOuterSources*8\n" + "\n" + "Encryption\x12\b\n" + "\x04NONE\x10\x00\x12\n" + @@ -584,11 +616,12 @@ var file_internal_metadata_proto_metadata_proto_depIdxs = []int32{ 2, // 4: metadata.FileMetadata.segment_data:type_name -> metadata.SegmentData 3, // 5: metadata.FileMetadata.par2_files:type_name -> metadata.Par2FileReference 4, // 6: metadata.FileMetadata.nested_sources:type_name -> metadata.NestedSegmentSource - 7, // [7:7] is the sub-list for method output_type - 7, // [7:7] is the sub-list for method input_type - 7, // [7:7] is the sub-list for extension type_name - 7, // [7:7] is the sub-list for extension extendee - 0, // [0:7] is the sub-list for field type_name + 4, // 7: metadata.FileMetadata.shared_outer_sources:type_name -> metadata.NestedSegmentSource + 8, // [8:8] is the sub-list for method output_type + 8, // [8:8] is the sub-list for method input_type + 8, // [8:8] is the sub-list for extension type_name + 8, // [8:8] is the sub-list for extension extendee + 0, // [0:8] is the sub-list for field type_name } func init() { file_internal_metadata_proto_metadata_proto_init() } diff --git a/internal/metadata/proto/metadata.proto b/internal/metadata/proto/metadata.proto index 94a03c161..3f2954070 100644 --- a/internal/metadata/proto/metadata.proto +++ b/internal/metadata/proto/metadata.proto @@ -42,6 +42,14 @@ message NestedSegmentSource { int64 inner_offset = 4; // Offset within decrypted inner volume where file data starts int64 inner_length = 5; // Bytes of target file in this source int64 inner_volume_size = 6; // Total decrypted size of inner volume (for AES cipher) + + // When > 0 the `segments`, `aes_key`, `aes_iv`, `inner_volume_size` + // fields above are intentionally left empty on disk and inherit from + // FileMetadata.shared_outer_sources[shared_outer_source_index - 1]. + // Only `inner_offset` and `inner_length` are stored per-extent. 0 + // (proto default) means "no sharing" — identical to the legacy on-disk + // layout, so old .meta files keep working without migration. + int32 shared_outer_source_index = 7; } // FileMetadata represents a single virtual file in the filesystem @@ -62,5 +70,15 @@ message FileMetadata { repeated Par2FileReference par2_files = 13; // Associated PAR2 repair files string nzbdav_id = 14; // ID to maintain compatibility with nzbdav repeated NestedSegmentSource nested_sources = 15; // Nested RAR sources (when file is inside inner RAR within outer RAR) + + // Outer sources shared by groups of NestedSegmentSource entries. + // Used for multi-extent encrypted volumes — e.g. a Blu-ray main feature + // with hundreds of extents that all read from the same encrypted RAR. + // Each entry holds the full Segments + AesKey + AesIv + InnerVolumeSize + // once; the corresponding NestedSegmentSource entries reference it by + // 1-based index via shared_outer_source_index, storing only + // inner_offset + inner_length per-extent. Cuts the on-disk .meta size + // from O(extents * segments) to O(extents + segments) for these files. + repeated NestedSegmentSource shared_outer_sources = 16; } \ No newline at end of file diff --git a/internal/metadata/service.go b/internal/metadata/service.go index 35fffd9ce..b891ea108 100644 --- a/internal/metadata/service.go +++ b/internal/metadata/service.go @@ -152,6 +152,14 @@ func (ms *MetadataService) ReadFileMetadata(virtualPath string) (*metapb.FileMet return nil, fmt.Errorf("failed to unmarshal metadata: %w", err) } + // Resolve shared_outer_source_index references on nested sources. + // Files imported with the dedupe writer store outer segments once at + // the FileMetadata level; we re-populate per-source slice headers + // here so the rest of the read path is unaware of the difference. + if err := ExpandSharedOuterSources(metadata); err != nil { + return nil, fmt.Errorf("failed to expand shared outer sources: %w", err) + } + // Read ID from sidecar file (compatibility mode) idPath := metadataPath + ".id" if idData, err := os.ReadFile(idPath); err == nil { From dbe5188a401a42129a446441dda7b0e5c0ba2e74 Mon Sep 17 00:00:00 2001 From: javi11 Date: Wed, 27 May 2026 09:45:52 +0200 Subject: [PATCH 25/30] fix(iso): score Blu-ray playlists by unique-clip bytes to avoid picking the disc menu --- internal/importer/archive/iso/bluray.go | 71 ++++++--- internal/importer/archive/iso/bluray_test.go | 157 +++++++++++++++++++ 2 files changed, 208 insertions(+), 20 deletions(-) diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index 18d47237f..aa12ffdfb 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -13,9 +13,11 @@ import ( // form the main feature; the slice is empty if no parseable playlist // was found. type MainFeaturePlaylist struct { - PlaylistName string // e.g. "00800.MPLS" — for logging only - DurationTicks int64 // sum of (OUT-IN) at 45 kHz - Streams []isoFileEntry // ordered M2TS entries + PlaylistName string // e.g. "00800.MPLS" — for logging only + DurationTicks int64 // sum of (OUT-IN) at 45 kHz — informational, not used for selection + Streams []isoFileEntry // ordered M2TS entries (duplicates preserved if the playlist legitimately repeats a clip) + UniqueClipBytes uint64 // sum of file sizes of UNIQUE clips referenced; the primary scoring metric + UniqueClipCount int // number of distinct clips referenced; scoring tiebreaker } // ResolveMainFeature inspects the entries returned by ListISOFiles for a @@ -77,15 +79,29 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn } // Resolve clip names in playlist order, preferring M2TS over SSIF. + // Build the ordered streams slice (duplicates preserved — a real BD + // feature may legitimately repeat a clip, and the output virtual + // file must follow the playlist order faithfully) AND a separate + // dedupe-by-name byte sum that drives playlist selection. Without + // the dedupe, a menu-navigation playlist that points 200+ times at + // the same ~80s menu M2TS would score higher than a real 30-chapter + // main feature, and we'd serve 30+ GB of looped menu. streams := make([]isoFileEntry, 0, len(pl.PlayItems)) + seenClips := make(map[string]struct{}, len(pl.PlayItems)) + var uniqueClipBytes uint64 for _, it := range pl.PlayItems { name := strings.ToUpper(it.ClipName) - if entry, ok := m2tsByClip[name]; ok { - streams = append(streams, entry) + entry, ok := m2tsByClip[name] + if !ok { + entry, ok = ssifByClip[name] + } + if !ok { continue } - if entry, ok := ssifByClip[name]; ok { - streams = append(streams, entry) + streams = append(streams, entry) + if _, dup := seenClips[name]; !dup { + seenClips[name] = struct{}{} + uniqueClipBytes += entry.size } } if len(streams) == 0 { @@ -93,11 +109,21 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn } cand := &MainFeaturePlaylist{ - PlaylistName: pe.path, - DurationTicks: pl.DurationTicks(), - Streams: streams, + PlaylistName: pe.path, + DurationTicks: pl.DurationTicks(), + Streams: streams, + UniqueClipBytes: uniqueClipBytes, + UniqueClipCount: len(seenClips), } - if best == nil || isBetterPlaylist(cand, best, len(pl.PlayItems), len(best.Streams)) { + slog.DebugContext(ctx, "Blu-ray playlist candidate", + "playlist", pe.path, + "play_items", len(pl.PlayItems), + "resolved_streams", len(streams), + "unique_clips", len(seenClips), + "unique_clip_bytes", uniqueClipBytes, + "duration_seconds", cand.DurationTicks/45000, + ) + if best == nil || isBetterPlaylist(cand, best) { best = cand } } @@ -105,22 +131,27 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn slog.InfoContext(ctx, "Blu-ray main feature playlist resolved", "playlist", best.PlaylistName, "clips", len(best.Streams), + "unique_clips", best.UniqueClipCount, + "unique_clip_bytes", best.UniqueClipBytes, "duration_seconds", best.DurationTicks/45000, ) } return best } -// isBetterPlaylist returns true when cand should replace best. -// Comparison: longer duration > more PlayItems > earlier filename. -// The filename tie-break relies on playlistEntries being sorted before -// iteration so the smaller path is seen first; we therefore only swap -// when strictly better. -func isBetterPlaylist(cand, best *MainFeaturePlaylist, candItems, bestItems int) bool { - if cand.DurationTicks != best.DurationTicks { - return cand.DurationTicks > best.DurationTicks +// isBetterPlaylist returns true when cand should replace best. Score by +// total bytes of unique clips referenced — a real main feature pulls in +// ~30 distinct chapter clips totalling tens of GB, while a Blu-ray menu +// navigation playlist references one small clip repeatedly and therefore +// always loses on this metric regardless of how many PlayItems it +// inflates the raw duration with. Final tie: earlier filename wins, +// relying on playlistEntries being lex-sorted before iteration so we +// only swap when strictly better. +func isBetterPlaylist(cand, best *MainFeaturePlaylist) bool { + if cand.UniqueClipBytes != best.UniqueClipBytes { + return cand.UniqueClipBytes > best.UniqueClipBytes } - return candItems > bestItems + return cand.UniqueClipCount > best.UniqueClipCount } // readISOFile reads the full contents of one isoFileEntry from rs, diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go index 42b3c02c7..788bfdedb 100644 --- a/internal/importer/archive/iso/bluray_test.go +++ b/internal/importer/archive/iso/bluray_test.go @@ -3,6 +3,7 @@ package iso import ( "bytes" "context" + "fmt" "io" "testing" ) @@ -211,4 +212,160 @@ func TestResolveMainFeature(t *testing.T) { t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) } }) + + t.Run("prefers feature over menu when menu has more PlayItems", func(t *testing.T) { + t.Parallel() + // The Avatar 3D regression: a menu navigation playlist with 201 + // PlayItems all pointing at the same ~80s menu clip would beat the + // real main feature under the old duration-sum scoring because + // 201 × 80s > 30 × 6min. The fix scores by unique-clip bytes, + // where the menu's single 100MB clip loses to the feature's + // 30 × 600MB chapter clips totalling 18 GB. + menuItems := make([]MPLSPlayItem, 201) + for i := range menuItems { + // All 201 PlayItems reference the SAME menu clip — exactly the + // pattern observed in the user's failing case. + menuItems[i] = MPLSPlayItem{ + ClipName: "00149", + InTime: 0, + OutTime: 80 * 45000, // 80s, so total raw duration is 201 × 80s = 16200s ≈ 4.5h + } + } + menu := buildMPLS(t, "0200", menuItems, nil) + + featureItems := make([]MPLSPlayItem, 30) + for i := range featureItems { + featureItems[i] = MPLSPlayItem{ + ClipName: fmt.Sprintf("%05d", 1+i), // 30 distinct clips: 00001..00030 + InTime: 0, + OutTime: 6 * 60 * 45000, // 6 min/chapter → 30 × 6 = 180 min total raw duration + } + } + feature := buildMPLS(t, "0200", featureItems, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: menu, + 110: feature, + }) + + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00000.MPLS", 100, uint64(len(menu))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(feature))), + // Menu clip: ~100 MB, one entry. + mkEntry("BDMV/STREAM/00149.M2TS", 1000, 100_000_000), + } + // 30 distinct feature clips, ~600 MB each → ~18 GB total unique bytes. + for i := range featureItems { + files = append(files, mkEntry( + fmt.Sprintf("BDMV/STREAM/%05d.M2TS", 1+i), + 2000+uint32(i)*10, + 600_000_000, + )) + } + + got := ResolveMainFeature(context.Background(), rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil — feature playlist should have won") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Fatalf("PlaylistName = %q, want 00800.MPLS (the real feature). The menu's 201 PlayItems must not be allowed to beat the feature's 30 distinct chapters.", got.PlaylistName) + } + if got.UniqueClipCount != 30 { + t.Errorf("UniqueClipCount = %d, want 30 (one per feature chapter)", got.UniqueClipCount) + } + if got.UniqueClipBytes != 30*600_000_000 { + t.Errorf("UniqueClipBytes = %d, want %d", got.UniqueClipBytes, uint64(30*600_000_000)) + } + if len(got.Streams) != 30 { + t.Errorf("Streams len = %d, want 30 (the playlist's actual playback order)", len(got.Streams)) + } + }) + + t.Run("preserves legitimate clip repetition in output streams", func(t *testing.T) { + t.Parallel() + // A real BD playlist may legitimately repeat a clip (e.g., a + // "previously on..." recap at the start of each chapter). The fix + // dedupes only for scoring; the output Streams slice must retain + // the playlist's actual playback order, including duplicates. + data := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 30 * 45000}, // A + {ClipName: "00002", InTime: 0, OutTime: 60 * 45000}, // B + {ClipName: "00001", InTime: 0, OutTime: 30 * 45000}, // A again + {ClipName: "00003", InTime: 0, OutTime: 90 * 45000}, // C + }, nil) + rs := makeImage(t, map[uint32][]byte{100: data}) + + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00800.MPLS", 100, uint64(len(data))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 100), + mkEntry("BDMV/STREAM/00002.M2TS", 300, 200), + mkEntry("BDMV/STREAM/00003.M2TS", 400, 300), + } + + got := ResolveMainFeature(context.Background(), rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + + // Output preserves [A, B, A, C] exactly. + if len(got.Streams) != 4 { + t.Fatalf("Streams len = %d, want 4 (dedupe must not collapse the output)", len(got.Streams)) + } + wantPaths := []string{ + "BDMV/STREAM/00001.M2TS", + "BDMV/STREAM/00002.M2TS", + "BDMV/STREAM/00001.M2TS", + "BDMV/STREAM/00003.M2TS", + } + for i, s := range got.Streams { + if s.path != wantPaths[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantPaths[i]) + } + } + + // Scoring metrics use dedupe: 3 unique clips totalling 100+200+300. + if got.UniqueClipCount != 3 { + t.Errorf("UniqueClipCount = %d, want 3", got.UniqueClipCount) + } + if got.UniqueClipBytes != 600 { + t.Errorf("UniqueClipBytes = %d, want 600 (100+200+300, A counted once)", got.UniqueClipBytes) + } + }) + + t.Run("when all playlists are menus, picks the largest deterministically", func(t *testing.T) { + t.Parallel() + // Degenerate disc: every MPLS is a menu-style single-clip + // repetition. Algorithm must still return *something* without + // crashing and must be deterministic across runs. Picks the one + // with the largest unique-clip bytes (i.e., the largest target + // clip, since each playlist has only one unique clip). + menuA := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 80 * 45000}, + {ClipName: "00100", InTime: 0, OutTime: 80 * 45000}, + }, nil) + menuB := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00200", InTime: 0, OutTime: 80 * 45000}, + {ClipName: "00200", InTime: 0, OutTime: 80 * 45000}, + {ClipName: "00200", InTime: 0, OutTime: 80 * 45000}, + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: menuA, + 110: menuB, + }) + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(menuA))), + mkEntry("BDMV/PLAYLIST/00002.MPLS", 110, uint64(len(menuB))), + mkEntry("BDMV/STREAM/00100.M2TS", 200, 50_000_000), // 50 MB + mkEntry("BDMV/STREAM/00200.M2TS", 300, 100_000_000), // 100 MB — larger + } + + got := ResolveMainFeature(context.Background(), rs, files) + if got == nil { + t.Fatal("ResolveMainFeature returned nil for a disc full of menus — should still pick one") + } + if got.PlaylistName != "BDMV/PLAYLIST/00002.MPLS" { + t.Errorf("PlaylistName = %q, want 00002.MPLS (its unique clip is 100 MB vs 50 MB)", got.PlaylistName) + } + }) } From 153e001b98efed9ec42c43e718b14e1893d70347 Mon Sep 17 00:00:00 2001 From: javi11 Date: Sat, 30 May 2026 12:14:29 +0200 Subject: [PATCH 26/30] feat(iso): TS timestamp-rewrite core for continuous-timeline remux --- internal/importer/archive/iso/tsremux.go | 210 +++++++++++++++++ internal/importer/archive/iso/tsremux_test.go | 218 ++++++++++++++++++ 2 files changed, 428 insertions(+) create mode 100644 internal/importer/archive/iso/tsremux.go create mode 100644 internal/importer/archive/iso/tsremux_test.go diff --git a/internal/importer/archive/iso/tsremux.go b/internal/importer/archive/iso/tsremux.go new file mode 100644 index 000000000..b753a7ab1 --- /dev/null +++ b/internal/importer/archive/iso/tsremux.go @@ -0,0 +1,210 @@ +package iso + +// Continuous-timeline remux core for Blu-ray main-feature virtual files. +// +// A merged BD main feature byte-concatenates N M2TS clips, each carrying its +// OWN independent PTS/DTS/PCR timeline (each starts near its own base). A +// player resyncs on the discontinuities so playback works, but ffprobe and +// seeking compute time from PTS deltas, which are meaningless across clip +// boundaries — hence "Duration: 00:26:21" for a 3h17m movie. +// +// This file holds the pure, stateless byte transform: given a TS packet and a +// 90 kHz delta, add the delta to every timestamp (PTS, DTS, PCR) found in the +// packet, in place. All timestamp fields are fixed-width, so the rewrite is +// byte-length preserving — the virtual file size and every byte offset are +// unchanged, so VFS byte-mapping and range requests keep working untouched. +// +// Nothing here does I/O or knows about clips; the caller supplies the delta +// per packet based on which clip the packet's byte offset falls in. That keeps +// this layer trivially testable (see tsremux_test.go) and is the feasibility +// gate for the whole continuous-timeline feature. + +const ( + tsSync = 0x47 + tsPacketLen = 188 + bdavPacketLen = 192 // 4-byte TP_extra_header + 188-byte TS packet + // ptsModulus is 2^33; PTS/DTS/PCR-base are 33-bit values that wrap here. + // At 90 kHz that is ~26.5 h, far above any single feature's runtime, but + // we still wrap defensively so a near-max base plus a delta stays legal. + ptsModulus = int64(1) << 33 +) + +// detectTSPacketSize inspects a buffer that begins at a packet boundary and +// returns 192 (BDAV, sync byte at offset 4), 188 (plain TS, sync at offset 0), +// or 0 when neither layout is recognised. Blu-ray .m2ts on disc is BDAV +// (192-byte source packets); plain 188 is handled for completeness/tests. +func detectTSPacketSize(buf []byte) int { + if len(buf) >= bdavPacketLen && buf[4] == tsSync { + // Confirm with a second packet when available to avoid a chance 0x47. + if len(buf) >= 2*bdavPacketLen { + if buf[4+bdavPacketLen] == tsSync { + return bdavPacketLen + } + } else { + return bdavPacketLen + } + } + if len(buf) >= tsPacketLen && buf[0] == tsSync { + if len(buf) >= 2*tsPacketLen { + if buf[tsPacketLen] == tsSync { + return tsPacketLen + } + } else { + return tsPacketLen + } + } + // Fall back to BDAV if only its sync matched on a short buffer. + if len(buf) >= bdavPacketLen && buf[4] == tsSync { + return bdavPacketLen + } + if len(buf) >= tsPacketLen && buf[0] == tsSync { + return tsPacketLen + } + return 0 +} + +// addMod33 returns (v + delta) wrapped into the 33-bit timestamp space. +// delta may be negative (when a clip's pts_base exceeds its timeline_start). +func addMod33(v, delta int64) int64 { + r := (v + delta) % ptsModulus + if r < 0 { + r += ptsModulus + } + return r +} + +// rewritePacket adds delta90k (a 90 kHz signed offset) to the PTS, DTS, and +// PCR timestamps inside one source packet. packetSize is 192 (BDAV) or 188. +// The packet slice must be exactly packetSize bytes. Returns true if any +// timestamp was rewritten. Packets without timestamps (continuation packets, +// PSI, null) are left untouched. +// +// BDAV's 4-byte TP_extra_header (which carries a 27 MHz arrival timestamp) is +// intentionally NOT rewritten: ATS feeds the player's input-buffer model, not +// presentation timing or ffprobe's duration estimate. Leaving it avoids a +// whole extra class of bugs; revisit only if a hardware player needs it. +func rewritePacket(pkt []byte, packetSize int, delta90k int64) bool { + if delta90k == 0 || len(pkt) != packetSize { + return false + } + // Locate the 188-byte TS packet within the source packet. + off := 0 + if packetSize == bdavPacketLen { + off = 4 + } + ts := pkt[off : off+tsPacketLen] + if ts[0] != tsSync { + return false + } + + pusi := ts[1]&0x40 != 0 + afc := (ts[3] >> 4) & 0x03 // adaptation_field_control + + changed := false + + // --- PCR (adaptation field) --- + // AFC 0b10 = adaptation only, 0b11 = adaptation + payload. + payloadStart := 4 + if afc == 0x02 || afc == 0x03 { + afLen := int(ts[4]) + // adaptation_field_length counts bytes after itself. + payloadStart = 5 + afLen + if afLen >= 1 && 5+afLen <= tsPacketLen { + afFlags := ts[5] + if afFlags&0x10 != 0 { // PCR_flag + // PCR occupies the 6 bytes at ts[6..12). + if 6+6 <= tsPacketLen { + if rewritePCR(ts[6:12], delta90k) { + changed = true + } + } + } + } + } + + // --- PTS / DTS (PES header) --- + // Only the first TS packet of a PES (PUSI=1) carries the PES header with + // the timestamps; continuation packets have none. + if pusi && (afc == 0x01 || afc == 0x03) && payloadStart+9 <= tsPacketLen { + p := ts[payloadStart:] + // PES start code 0x000001. + if len(p) >= 9 && p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x01 { + // Optional PES header present only when top 2 bits of p[6] == 10. + if p[6]&0xC0 == 0x80 { + ptsDtsFlags := (p[7] & 0xC0) >> 6 + // 0b10 = PTS only; 0b11 = PTS + DTS. + if ptsDtsFlags == 0x02 || ptsDtsFlags == 0x03 { + if payloadStart+9+5 <= tsPacketLen { + if rewriteTS(p[9:14], delta90k) { + changed = true + } + } + } + if ptsDtsFlags == 0x03 { + if payloadStart+14+5 <= tsPacketLen { + if rewriteTS(p[14:19], delta90k) { + changed = true + } + } + } + } + } + } + + return changed +} + +// readTS decodes a 33-bit PTS/DTS from a 5-byte field. +// +// b[0]: prefix(7..4) PTS[32..30](3..1) marker(0) +// b[1]: PTS[29..22] +// b[2]: PTS[21..15](7..1) marker(0) +// b[3]: PTS[14..7] +// b[4]: PTS[6..0](7..1) marker(0) +func readTS(b []byte) int64 { + return (int64(b[0]&0x0E) << 29) | + (int64(b[1]) << 22) | + (int64(b[2]&0xFE) << 14) | + (int64(b[3]) << 7) | + (int64(b[4]) >> 1) +} + +// writeTS encodes v back into the 5-byte field, preserving the prefix nibble +// (bits 7..4 of b[0]) and all three marker bits (bit 0 of b[0], b[2], b[4]). +func writeTS(b []byte, v int64) { + b[0] = (b[0] & 0xF1) | byte((v>>29)&0x0E) + b[1] = byte(v >> 22) + b[2] = (b[2] & 0x01) | byte((v>>14)&0xFE) + b[3] = byte(v >> 7) + b[4] = (b[4] & 0x01) | byte((v<<1)&0xFE) +} + +// rewriteTS adds delta to a PTS/DTS field in place. +func rewriteTS(b []byte, delta int64) bool { + writeTS(b, addMod33(readTS(b), delta)) + return true +} + +// rewritePCR adds delta (90 kHz) to a 6-byte PCR field. The 27 MHz PCR value +// is base*300 + ext; adding delta90k*300 is equivalent to adding delta90k to +// base and leaving ext untouched. +// +// b[0..3] + top bit of b[4] : program_clock_reference_base (33 bits) +// b[4] bits 6..1 : reserved +// b[4] bit 0 + b[5] : program_clock_reference_extension (9 bits) +func rewritePCR(b []byte, delta90k int64) bool { + base := (int64(b[0]) << 25) | + (int64(b[1]) << 17) | + (int64(b[2]) << 9) | + (int64(b[3]) << 1) | + (int64(b[4]) >> 7) + base = addMod33(base, delta90k) + b[0] = byte(base >> 25) + b[1] = byte(base >> 17) + b[2] = byte(base >> 9) + b[3] = byte(base >> 1) + // Preserve b[4] low 7 bits (reserved + ext high bit); set bit 7 = base LSB. + b[4] = byte((base&0x01)<<7) | (b[4] & 0x7F) + // b[5] (ext low byte) unchanged. + return true +} diff --git a/internal/importer/archive/iso/tsremux_test.go b/internal/importer/archive/iso/tsremux_test.go new file mode 100644 index 000000000..9c3caa885 --- /dev/null +++ b/internal/importer/archive/iso/tsremux_test.go @@ -0,0 +1,218 @@ +package iso + +import "testing" + +// --- synthetic BDAV packet builders ----------------------------------------- + +// newBDAVPacket returns a zeroed 192-byte BDAV source packet with the sync +// byte set. afc selects adaptation_field_control (0x01 payload-only, +// 0x03 adaptation+payload). pusi sets payload_unit_start_indicator. +func newBDAVPacket(pid uint16, pusi bool, afc byte) []byte { + p := make([]byte, bdavPacketLen) + ts := p[4:] // 188-byte TS packet + ts[0] = tsSync + ts[1] = byte(pid>>8) & 0x1F + if pusi { + ts[1] |= 0x40 + } + ts[2] = byte(pid) + ts[3] = (afc << 4) // scrambling 00, CC 0 + return p +} + +// setPTS writes a PTS-only PES header into a payload-only BDAV packet and +// returns the packet. tsBytesOffset is where the 188-byte TS payload starts +// within the source packet (8 for BDAV payload-only). +func setPTS(p []byte, pts int64) []byte { + pl := p[8:] // payload of the 188-byte TS packet (BDAV off 4 + TS header 4) + pl[0], pl[1], pl[2] = 0x00, 0x00, 0x01 + pl[3] = 0xE0 // video stream_id + pl[4], pl[5] = 0x00, 0x00 + pl[6] = 0x80 // marker '10', no flags + pl[7] = 0x80 // PTS_DTS_flags = '10' (PTS only) + pl[8] = 0x05 // PES_header_data_length + // Seed the PTS field prefix nibble (0010) + marker bits, then encode. + pl[9] = 0x21 // 0010 ...1 + pl[11] = 0x01 // marker + pl[13] = 0x01 // marker + writeTS(pl[9:14], pts) + return p +} + +// setPTSDTS writes a PTS+DTS PES header. +func setPTSDTS(p []byte, pts, dts int64) []byte { + pl := p[8:] + pl[0], pl[1], pl[2] = 0x00, 0x00, 0x01 + pl[3] = 0xE0 + pl[4], pl[5] = 0x00, 0x00 + pl[6] = 0x80 + pl[7] = 0xC0 // PTS_DTS_flags = '11' + pl[8] = 0x0A // 10 bytes (PTS+DTS) + pl[9] = 0x31 // prefix 0011 for PTS-when-DTS-present + pl[11], pl[13] = 0x01, 0x01 + writeTS(pl[9:14], pts) + pl[14] = 0x11 // prefix 0001 for DTS + pl[16], pl[18] = 0x01, 0x01 + writeTS(pl[14:19], dts) + return p +} + +// setPCR writes a PCR into an adaptation+payload BDAV packet (afc 0x03). +func setPCR(p []byte, pcrBase int64) []byte { + ts := p[4:] + // adaptation_field_length: 1 (flags) + 6 (PCR) = 7. + ts[4] = 7 + ts[5] = 0x10 // PCR_flag + b := ts[6:12] + b[0] = byte(pcrBase >> 25) + b[1] = byte(pcrBase >> 17) + b[2] = byte(pcrBase >> 9) + b[3] = byte(pcrBase >> 1) + b[4] = byte((pcrBase&0x01)<<7) // ext = 0 + b[5] = 0x00 + return p +} + +func readPCRBase(p []byte) int64 { + b := p[4:][6:12] + return (int64(b[0]) << 25) | (int64(b[1]) << 17) | (int64(b[2]) << 9) | + (int64(b[3]) << 1) | (int64(b[4]) >> 7) +} + +// --- tests ------------------------------------------------------------------ + +func TestReadWriteTS_RoundTrip(t *testing.T) { + cases := []int64{0, 1, 90000, 1048500, ptsModulus - 1, (1 << 32) + 12345} + for _, want := range cases { + b := []byte{0x21, 0x00, 0x01, 0x00, 0x01} // prefix + markers + writeTS(b, want) + got := readTS(b) + if got != want { + t.Errorf("round-trip PTS: wrote %d, read %d", want, got) + } + // Marker bits must be preserved (bit 0 of b[0], b[2], b[4]). + if b[0]&0x01 != 0x01 || b[2]&0x01 != 0x01 || b[4]&0x01 != 0x01 { + t.Errorf("marker bits clobbered for %d: % x", want, b) + } + // Prefix nibble preserved. + if b[0]&0xF0 != 0x20 { + t.Errorf("prefix nibble clobbered for %d: %#x", want, b[0]) + } + } +} + +func TestRewritePacket_PTSOnly(t *testing.T) { + const base = int64(1048500) // ~11.65s + const delta = int64(90000) // +1s + p := setPTS(newBDAVPacket(0x100, true, 0x01), base) + if !rewritePacket(p, bdavPacketLen, delta) { + t.Fatal("rewritePacket reported no change for a PTS packet") + } + got := readTS(p[8:][9:14]) + if got != base+delta { + t.Errorf("PTS after rewrite = %d, want %d", got, base+delta) + } +} + +func TestRewritePacket_PTSDTS_andPCR(t *testing.T) { + const pts = int64(900000) + const dts = int64(810000) + const delta = int64(45000) + p := setPTSDTS(newBDAVPacket(0x100, true, 0x01), pts, dts) + rewritePacket(p, bdavPacketLen, delta) + if g := readTS(p[8:][9:14]); g != pts+delta { + t.Errorf("PTS = %d, want %d", g, pts+delta) + } + if g := readTS(p[8:][14:19]); g != dts+delta { + t.Errorf("DTS = %d, want %d", g, dts+delta) + } + + const pcrBase = int64(1234567) + pc := setPCR(newBDAVPacket(0x100, false, 0x03), pcrBase) + rewritePacket(pc, bdavPacketLen, delta) + if g := readPCRBase(pc); g != pcrBase+delta { + t.Errorf("PCR base = %d, want %d", g, pcrBase+delta) + } +} + +func TestRewritePacket_NoTimestampLeavesUnchanged(t *testing.T) { + // Continuation packet (PUSI=0, payload-only, no PES header). + p := newBDAVPacket(0x100, false, 0x01) + cp := make([]byte, len(p)) + copy(cp, p) + if rewritePacket(p, bdavPacketLen, 90000) { + t.Error("rewritePacket changed a packet with no timestamps") + } + for i := range p { + if p[i] != cp[i] { + t.Fatalf("byte %d changed in a no-timestamp packet", i) + } + } +} + +// TestRewrite_TwoClipsBecomeMonotonic is the FEASIBILITY GATE: two clips with +// independent PTS bases, byte-concatenated, become a single monotonic timeline +// after per-clip delta rewriting, and last−first equals the sum of the clips' +// durations. This proves the whole continuous-timeline approach before any +// metadata/VFS plumbing is built. +func TestRewrite_TwoClipsBecomeMonotonic(t *testing.T) { + const hz = 90000 + // Clip 0: base 11.65s, 3 packets spaced 1s, duration 30s. + // Clip 1: base 0.5s, 3 packets spaced 1s, duration 20s. + clip0Base := int64(11.65 * hz) + clip1Base := int64(0.5 * hz) + clip0Dur := int64(30 * hz) + clip1Dur := int64(20 * hz) + + mkClip := func(base int64, n int) [][]byte { + out := make([][]byte, n) + for i := range n { + out[i] = setPTS(newBDAVPacket(0x100, true, 0x01), base+int64(i)*hz) + } + return out + } + clip0 := mkClip(clip0Base, 3) + clip1 := mkClip(clip1Base, 3) + + // timeline_start: clip0 keeps its own base (start the file at 11.65s), + // clip1 begins where clip0's authored duration ends. + timelineStart0 := clip0Base + timelineStart1 := clip0Base + clip0Dur + delta0 := timelineStart0 - clip0Base // 0 + delta1 := timelineStart1 - clip1Base + + var ptsSeq []int64 + for _, p := range clip0 { + rewritePacket(p, bdavPacketLen, delta0) + ptsSeq = append(ptsSeq, readTS(p[8:][9:14])) + } + for _, p := range clip1 { + rewritePacket(p, bdavPacketLen, delta1) + ptsSeq = append(ptsSeq, readTS(p[8:][9:14])) + } + + // Strictly monotonic across the whole concatenation. + for i := 1; i < len(ptsSeq); i++ { + if ptsSeq[i] <= ptsSeq[i-1] { + t.Fatalf("PTS not monotonic at index %d: %d <= %d (full seq: %v)", i, ptsSeq[i], ptsSeq[i-1], ptsSeq) + } + } + + // ffprobe-style duration estimate: last − first. The last packet sits at + // timelineStart1 + 2s; first at clip0Base. Their delta must equal the + // real elapsed time across the unified timeline. + first := ptsSeq[0] + last := ptsSeq[len(ptsSeq)-1] + wantSpan := (timelineStart1 + 2*hz) - clip0Base + if last-first != wantSpan { + t.Errorf("timeline span = %d ticks, want %d", last-first, wantSpan) + } + + // Clip 1's first packet must land exactly at timelineStart1, proving it + // was lifted off its own 0.5s base onto the unified timeline rather than + // resetting (which is what breaks ffprobe today). + if ptsSeq[3] != timelineStart1 { + t.Errorf("clip1 first PTS = %d, want timelineStart1 %d", ptsSeq[3], timelineStart1) + } + _ = clip1Dur // documented as clip 1's authored length; not needed past timelineStart1 +} From dbcf1d353153e660753df63d2d5d94ee1e635abe Mon Sep 17 00:00:00 2001 From: javi11 Date: Sat, 30 May 2026 12:23:18 +0200 Subject: [PATCH 27/30] feat(iso): persist per-clip boundary table for continuous-timeline remux --- internal/importer/archive/common.go | 14 +++ internal/importer/archive/content_metadata.go | 10 ++ internal/importer/archive/iso/bluray.go | 17 +++ internal/importer/archive/iso/processor.go | 11 +- internal/importer/archive/iso/types.go | 7 ++ internal/importer/archive/iso_expansion.go | 41 ++++++- .../importer/archive/iso_expansion_test.go | 99 ++++++++++++++++ internal/metadata/proto/metadata.pb.go | 106 +++++++++++++++--- internal/metadata/proto/metadata.proto | 19 ++++ 9 files changed, 307 insertions(+), 17 deletions(-) diff --git a/internal/importer/archive/common.go b/internal/importer/archive/common.go index 8da0f2dc1..f130c8ac3 100644 --- a/internal/importer/archive/common.go +++ b/internal/importer/archive/common.go @@ -62,6 +62,20 @@ type Content struct { // are sorted by size descending (1 = largest / main feature). // Zero means this Content did not come from an ISO. ISOExpansionIndex int `json:"iso_expansion_index,omitempty"` + // ClipBoundaries is the per-clip timeline table for a byte-concatenated + // multi-clip Blu-ray main feature. Empty for everything else. At read + // time a TS filter adds each clip's Delta90k to the timestamps inside + // its byte range to build one continuous timeline. + ClipBoundaries []ClipBoundary `json:"clip_boundaries,omitempty"` +} + +// ClipBoundary mirrors metapb.ClipBoundary at the archive layer: one clip in a +// concatenated multi-clip BD main feature. ByteLen is the clip's size in the +// virtual file; Delta90k is the signed 90 kHz timeline offset for packets +// inside this clip's byte range. +type ClipBoundary struct { + ByteLen int64 `json:"byte_len"` + Delta90k int64 `json:"delta_90k"` } // GetContentSegmentCount returns the total number of segments for a Content, diff --git a/internal/importer/archive/content_metadata.go b/internal/importer/archive/content_metadata.go index 221e9dc3f..c49a8b8e3 100644 --- a/internal/importer/archive/content_metadata.go +++ b/internal/importer/archive/content_metadata.go @@ -44,6 +44,16 @@ func NewFileMetadataFromContent( meta.AesIv = content.AesIV } + // Carry the per-clip timeline table for multi-clip BD main features. + // Empty for everything else, which keeps the read-path remux filter + // disabled for all other files. + for _, cb := range content.ClipBoundaries { + meta.ClipBoundaries = append(meta.ClipBoundaries, &metapb.ClipBoundary{ + ByteLen: cb.ByteLen, + Delta_90K: cb.Delta90k, + }) + } + // Populate nested sources. For multi-extent encrypted volumes (e.g. a // Blu-ray main feature with hundreds of extents that all read from the // same encrypted RAR) every NestedSource shares the same Segments slice diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index aa12ffdfb..1da605939 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -18,6 +18,11 @@ type MainFeaturePlaylist struct { Streams []isoFileEntry // ordered M2TS entries (duplicates preserved if the playlist legitimately repeats a clip) UniqueClipBytes uint64 // sum of file sizes of UNIQUE clips referenced; the primary scoring metric UniqueClipCount int // number of distinct clips referenced; scoring tiebreaker + // ClipInTimes and ClipDurations are parallel to Streams: the MPLS + // PlayItem IN_time and (OUT−IN) for each stream, in 45 kHz ticks. They + // drive the continuous-timeline remux of the concatenated clips. + ClipInTimes []int64 + ClipDurations []int64 } // ResolveMainFeature inspects the entries returned by ListISOFiles for a @@ -87,6 +92,8 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn // the same ~80s menu M2TS would score higher than a real 30-chapter // main feature, and we'd serve 30+ GB of looped menu. streams := make([]isoFileEntry, 0, len(pl.PlayItems)) + inTimes := make([]int64, 0, len(pl.PlayItems)) + durations := make([]int64, 0, len(pl.PlayItems)) seenClips := make(map[string]struct{}, len(pl.PlayItems)) var uniqueClipBytes uint64 for _, it := range pl.PlayItems { @@ -99,6 +106,14 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn continue } streams = append(streams, entry) + // Per-clip timing, parallel to streams (45 kHz). OUT may be < IN + // on malformed entries; clamp the span to 0 in that case. + var dur int64 + if it.OutTime > it.InTime { + dur = int64(it.OutTime - it.InTime) + } + inTimes = append(inTimes, int64(it.InTime)) + durations = append(durations, dur) if _, dup := seenClips[name]; !dup { seenClips[name] = struct{}{} uniqueClipBytes += entry.size @@ -114,6 +129,8 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn Streams: streams, UniqueClipBytes: uniqueClipBytes, UniqueClipCount: len(seenClips), + ClipInTimes: inTimes, + ClipDurations: durations, } slog.DebugContext(ctx, "Blu-ray playlist candidate", "playlist", pe.path, diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 1b96b29fe..358660f0a 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -68,8 +68,15 @@ func AnalyzeISO( if mf := ResolveMainFeature(ctx, rs, entries); mf != nil { out.DurationTicks = mf.DurationTicks - for _, e := range mf.Streams { - out.MainFeature = append(out.MainFeature, buildFileContent(src, e)) + for i, e := range mf.Streams { + fc := buildFileContent(src, e) + // Carry per-clip MPLS timing (45 kHz) for the continuous-timeline + // remux. ClipInTimes/ClipDurations are parallel to Streams. + if i < len(mf.ClipInTimes) { + fc.InTimeTicks = mf.ClipInTimes[i] + fc.DurationTicks = mf.ClipDurations[i] + } + out.MainFeature = append(out.MainFeature, fc) } } diff --git a/internal/importer/archive/iso/types.go b/internal/importer/archive/iso/types.go index 09e0aad5d..9425ec9cf 100644 --- a/internal/importer/archive/iso/types.go +++ b/internal/importer/archive/iso/types.go @@ -22,6 +22,13 @@ type ISOFileContent struct { Size int64 // Total file size in bytes (sum of Sources.InnerLength) NzbdavID string // Carried from parent archive Content Sources []ISONestedSource + // InTimeTicks and DurationTicks are the MPLS PlayItem IN_time and + // (OUT−IN) for this clip, in 45 kHz ticks. Populated only for + // MainFeature clips; zero otherwise. They drive the continuous-timeline + // remux: InTimeTicks is the clip's own PTS base (×2 → 90 kHz), + // DurationTicks is its authored span. + InTimeTicks int64 + DurationTicks int64 } // ISONestedSource is one extent of an inner file. For unencrypted ISOs, diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go index 3896abbca..153563afe 100644 --- a/internal/importer/archive/iso_expansion.go +++ b/internal/importer/archive/iso_expansion.go @@ -140,31 +140,69 @@ func buildMainFeatureContent(ctx context.Context, groupKey string, g []analyzedI firstISOName string nzbdavID string ) + // Per-clip timeline table for the continuous-timeline remux. We walk + // clips in output order across every disc, building a running 90 kHz + // timeline: clip 0 keeps its native base (delta 0); each later clip is + // lifted to start where the cumulative authored duration places it. + // timeline_start_90k[k] = base0_90k + 2 * Σ_{j metadata.SegmentData @@ -616,12 +693,13 @@ var file_internal_metadata_proto_metadata_proto_depIdxs = []int32{ 2, // 4: metadata.FileMetadata.segment_data:type_name -> metadata.SegmentData 3, // 5: metadata.FileMetadata.par2_files:type_name -> metadata.Par2FileReference 4, // 6: metadata.FileMetadata.nested_sources:type_name -> metadata.NestedSegmentSource - 4, // 7: metadata.FileMetadata.shared_outer_sources:type_name -> metadata.NestedSegmentSource - 8, // [8:8] is the sub-list for method output_type - 8, // [8:8] is the sub-list for method input_type - 8, // [8:8] is the sub-list for extension type_name - 8, // [8:8] is the sub-list for extension extendee - 0, // [0:8] is the sub-list for field type_name + 5, // 7: metadata.FileMetadata.clip_boundaries:type_name -> metadata.ClipBoundary + 4, // 8: metadata.FileMetadata.shared_outer_sources:type_name -> metadata.NestedSegmentSource + 9, // [9:9] is the sub-list for method output_type + 9, // [9:9] is the sub-list for method input_type + 9, // [9:9] is the sub-list for extension type_name + 9, // [9:9] is the sub-list for extension extendee + 0, // [0:9] is the sub-list for field type_name } func init() { file_internal_metadata_proto_metadata_proto_init() } @@ -635,7 +713,7 @@ func file_internal_metadata_proto_metadata_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_internal_metadata_proto_metadata_proto_rawDesc), len(file_internal_metadata_proto_metadata_proto_rawDesc)), NumEnums: 2, - NumMessages: 4, + NumMessages: 5, NumExtensions: 0, NumServices: 0, }, diff --git a/internal/metadata/proto/metadata.proto b/internal/metadata/proto/metadata.proto index 3f2954070..30c3ca65d 100644 --- a/internal/metadata/proto/metadata.proto +++ b/internal/metadata/proto/metadata.proto @@ -52,6 +52,16 @@ message NestedSegmentSource { int32 shared_outer_source_index = 7; } +// ClipBoundary is one clip in a byte-concatenated multi-clip BD main feature. +// byte_len is the clip's size in the virtual file (a whole number of 192-byte +// BDAV source packets). delta_90k is the signed 90 kHz offset added to PTS/DTS +// (and delta_90k to the 90 kHz-equivalent of PCR base) for packets inside this +// clip's byte range, lifting the clip onto the unified continuous timeline. +message ClipBoundary { + int64 byte_len = 1; + int64 delta_90k = 2; +} + // FileMetadata represents a single virtual file in the filesystem // The filename comes from the actual metadata filename on disk message FileMetadata { @@ -71,6 +81,15 @@ message FileMetadata { string nzbdav_id = 14; // ID to maintain compatibility with nzbdav repeated NestedSegmentSource nested_sources = 15; // Nested RAR sources (when file is inside inner RAR within outer RAR) + // Per-clip timeline table for Blu-ray main-feature virtual files that + // byte-concatenate multiple M2TS clips (each with its own independent + // PTS/DTS/PCR base). At read time a TS-aware filter adds each clip's + // delta_90k to the timestamps inside that clip's byte range, producing a + // single continuous timeline so ffprobe/players report the correct + // duration and seek accurately. Empty for every other file type, which + // disables the filter entirely (zero overhead, total safety). + repeated ClipBoundary clip_boundaries = 17; + // Outer sources shared by groups of NestedSegmentSource entries. // Used for multi-extent encrypted volumes — e.g. a Blu-ray main feature // with hundreds of extents that all read from the same encrypted RAR. From 92af9841a753eb2995a94ecca97911468f02766d Mon Sep 17 00:00:00 2001 From: javi11 Date: Sat, 30 May 2026 12:29:30 +0200 Subject: [PATCH 28/30] feat(nzbfs): apply continuous-timeline TS remux on ISO-merged BD reads --- .../nzbfilesystem/metadata_remote_file.go | 47 +++- .../archive/iso => nzbfilesystem}/tsremux.go | 2 +- internal/nzbfilesystem/tsremux_reader.go | 238 ++++++++++++++++++ internal/nzbfilesystem/tsremux_reader_test.go | 175 +++++++++++++ .../iso => nzbfilesystem}/tsremux_test.go | 2 +- 5 files changed, 460 insertions(+), 4 deletions(-) rename internal/{importer/archive/iso => nzbfilesystem}/tsremux.go (99%) create mode 100644 internal/nzbfilesystem/tsremux_reader.go create mode 100644 internal/nzbfilesystem/tsremux_reader_test.go rename internal/{importer/archive/iso => nzbfilesystem}/tsremux_test.go (99%) diff --git a/internal/nzbfilesystem/metadata_remote_file.go b/internal/nzbfilesystem/metadata_remote_file.go index 8d2c027b9..93bb59275 100644 --- a/internal/nzbfilesystem/metadata_remote_file.go +++ b/internal/nzbfilesystem/metadata_remote_file.go @@ -253,8 +253,9 @@ func (mrf *MetadataRemoteFile) OpenFile(ctx context.Context, name string) (bool, Salt: fileMeta.Salt, AesKey: fileMeta.AesKey, AesIv: fileMeta.AesIv, - SegmentData: fileMeta.SegmentData, - NestedSources: fileMeta.NestedSources, + SegmentData: fileMeta.SegmentData, + NestedSources: fileMeta.NestedSources, + ClipBoundaries: fileMeta.ClipBoundaries, } // Create a metadata-based virtual file handle @@ -766,6 +767,10 @@ type fileHandleMeta struct { AesIv []byte SegmentData []*metapb.SegmentData NestedSources []*metapb.NestedSegmentSource + // ClipBoundaries is the per-clip timeline table for a multi-clip BD main + // feature. Non-empty enables the continuous-timeline TS remux on reads; + // empty (every other file) bypasses it entirely. + ClipBoundaries []*metapb.ClipBoundary } // MetadataVirtualFile implements afero.File for metadata-backed virtual files @@ -790,6 +795,11 @@ type MetadataVirtualFile struct { segmentStore usenet.SegmentStore // optional segment cache segmentIndexOnce sync.Once // guards lazy init of segmentIndex + // clipSpans is the lazily-built absolute byte-range + delta table for the + // continuous-timeline remux, derived once from meta.ClipBoundaries. + clipSpans []clipSpan + clipSpansOnce sync.Once + // Reader state and position tracking reader io.ReadCloser readerInitialized bool @@ -1051,6 +1061,16 @@ func (mvf *MetadataVirtualFile) ReadAtContext(readCtx context.Context, p []byte, // createReaderAtOffset creates an independent reader for reading at a specific offset. // This reader is self-contained and can be used concurrently with other readers. func (mvf *MetadataVirtualFile) createReaderAtOffset(start, end int64) (io.ReadCloser, error) { + reader, err := mvf.createRawReaderAtOffset(start, end) + if err != nil { + return nil, err + } + return mvf.maybeWrapRemux(reader, start), nil +} + +// createRawReaderAtOffset builds the underlying reader for [start,end] without +// the continuous-timeline remux wrapper. +func (mvf *MetadataVirtualFile) createRawReaderAtOffset(start, end int64) (io.ReadCloser, error) { if mvf.poolManager == nil { return nil, ErrNoUsenetPool } @@ -1072,6 +1092,23 @@ func (mvf *MetadataVirtualFile) createReaderAtOffset(start, end int64) (io.ReadC return mvf.createUsenetReader(mvf.ctx, start, end) } +// maybeWrapRemux wraps reader in a continuous-timeline TS remux when the file +// carries a per-clip boundary table (multi-clip BD main feature). startOff is +// the absolute file offset of reader's first byte. For every other file the +// table is empty and reader is returned unchanged (zero overhead). +func (mvf *MetadataVirtualFile) maybeWrapRemux(reader io.ReadCloser, startOff int64) io.ReadCloser { + if len(mvf.meta.ClipBoundaries) == 0 { + return reader + } + mvf.clipSpansOnce.Do(func() { + mvf.clipSpans = buildClipSpans(mvf.meta.ClipBoundaries) + }) + if len(mvf.clipSpans) == 0 { + return reader + } + return newTSRemuxReader(reader, mvf.clipSpans, startOff) +} + // createEncryptedReaderAtOffset creates an encrypted reader for a specific offset range func (mvf *MetadataVirtualFile) createEncryptedReaderAtOffset(start, end int64) (io.ReadCloser, error) { switch mvf.meta.Encryption { @@ -1345,6 +1382,12 @@ func (mvf *MetadataVirtualFile) ensureReader() error { mvf.reader = ur } + // Apply the continuous-timeline remux for multi-clip BD main features. + // No-op (returns the same reader) for every other file. The reader yields + // bytes from absolute offset `start`, which the wrapper needs for packet + // framing and per-clip delta selection. + mvf.reader = mvf.maybeWrapRemux(mvf.reader, start) + mvf.readerInitialized = true return nil } diff --git a/internal/importer/archive/iso/tsremux.go b/internal/nzbfilesystem/tsremux.go similarity index 99% rename from internal/importer/archive/iso/tsremux.go rename to internal/nzbfilesystem/tsremux.go index b753a7ab1..11db981c4 100644 --- a/internal/importer/archive/iso/tsremux.go +++ b/internal/nzbfilesystem/tsremux.go @@ -1,4 +1,4 @@ -package iso +package nzbfilesystem // Continuous-timeline remux core for Blu-ray main-feature virtual files. // diff --git a/internal/nzbfilesystem/tsremux_reader.go b/internal/nzbfilesystem/tsremux_reader.go new file mode 100644 index 000000000..402c5feb8 --- /dev/null +++ b/internal/nzbfilesystem/tsremux_reader.go @@ -0,0 +1,238 @@ +package nzbfilesystem + +import ( + "bytes" + "io" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// clipSpan is one clip's absolute byte range in the virtual file plus the +// 90 kHz timeline delta to add to every timestamp inside it. +type clipSpan struct { + start int64 // inclusive absolute byte offset + end int64 // inclusive absolute byte offset (start + byteLen - 1) + delta int64 // 90 kHz offset added to PTS/DTS/PCR-base of packets in this clip +} + +// buildClipSpans turns the proto ClipBoundary table (byte_len + delta per clip, +// in output order) into absolute byte ranges via a prefix sum. Returns nil +// when the table is empty, which keeps the remux disabled. +func buildClipSpans(boundaries []*metapb.ClipBoundary) []clipSpan { + if len(boundaries) == 0 { + return nil + } + spans := make([]clipSpan, 0, len(boundaries)) + var off int64 + for _, b := range boundaries { + if b.ByteLen <= 0 { + continue + } + spans = append(spans, clipSpan{start: off, end: off + b.ByteLen - 1, delta: b.Delta_90K}) + off += b.ByteLen + } + if len(spans) == 0 { + return nil + } + return spans +} + +// tsRemuxReader wraps an underlying reader that yields the bytes of a +// byte-concatenated multi-clip Blu-ray main feature starting at absolute offset +// startOff. As bytes stream through, it frames them into BDAV/TS source packets +// (aligned to each clip's byte start) and adds that clip's 90 kHz delta to the +// PTS/DTS/PCR timestamps, producing a single continuous timeline. The transform +// is byte-length preserving, so the wrapper is a drop-in io.ReadCloser that does +// not change offsets or sizes. +// +// It is a streaming reader: it buffers across Read calls so packet framing is +// maintained for an entire sequential run. Only the leading bytes of a read +// that starts mid-packet are passed through unrewritten (their timestamps, if +// any, live in the packet header before startOff); every fully-streamed packet +// is rewritten. +type tsRemuxReader struct { + inner io.ReadCloser + spans []clipSpan + absPos int64 // absolute offset of the next byte to pull from inner + packetSize int // 192 (BDAV) or 188; 0 until detected + disabled bool // true if the stream isn't recognisable TS → pure passthrough + out bytes.Buffer // rewritten bytes ready to deliver + probe []byte // bytes read for packet-size detection, not yet framed +} + +// newTSRemuxReader wraps inner. startOff is the absolute file offset of inner's +// first byte. spans must be non-empty (callers gate on that). +func newTSRemuxReader(inner io.ReadCloser, spans []clipSpan, startOff int64) *tsRemuxReader { + return &tsRemuxReader{inner: inner, spans: spans, absPos: startOff} +} + +func (r *tsRemuxReader) Close() error { return r.inner.Close() } + +// clipFor returns the span containing absolute offset off, or nil if past the +// last clip (then bytes are passed through raw). +func (r *tsRemuxReader) clipFor(off int64) *clipSpan { + // Binary search: find the last span whose start <= off. + lo, hi := 0, len(r.spans)-1 + idx := -1 + for lo <= hi { + mid := (lo + hi) / 2 + if r.spans[mid].start <= off { + idx = mid + lo = mid + 1 + } else { + hi = mid - 1 + } + } + if idx < 0 || off > r.spans[idx].end { + return nil + } + return &r.spans[idx] +} + +func (r *tsRemuxReader) Read(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil + } + // Fill `out` until it can satisfy the request or inner is exhausted. + for r.out.Len() < len(p) { + if err := r.fill(); err != nil { + if r.out.Len() > 0 { + break // deliver what we have; surface the error on the next call + } + n, _ := r.out.Read(p) + return n, err + } + } + return r.out.Read(p) +} + +// fill pulls the next chunk from inner, rewrites it if it is a complete +// packet aligned within its clip, and appends it to out. Returns io.EOF when +// inner is exhausted. +func (r *tsRemuxReader) fill() error { + // Detect packet size once from the head of the stream. + if r.packetSize == 0 && !r.disabled { + if err := r.detect(); err != nil { + return err + } + if r.disabled { + // detect() already moved any probed bytes into out as passthrough. + return nil + } + } + + if r.disabled { + return r.passthrough() + } + + clip := r.clipFor(r.absPos) + if clip == nil { + // Past the last clip (shouldn't happen for a well-formed table) — + // stream the remainder unmodified. + return r.passthrough() + } + + // Bytes remaining to the next packet boundary within this clip. + intoClip := r.absPos - clip.start + rem := r.packetSize - int(intoClip%int64(r.packetSize)) + aligned := rem == r.packetSize + want := rem + // Never read across a clip boundary in one chunk. + if r.absPos+int64(want) > clip.end+1 { + want = int(clip.end + 1 - r.absPos) + aligned = false // a clip whose length isn't a packet multiple: tail passthrough + } + + chunk := make([]byte, want) + nr, err := io.ReadFull(r.inner, chunk) + chunk = chunk[:nr] + if nr > 0 { + if aligned && nr == r.packetSize { + rewritePacket(chunk, r.packetSize, clip.delta) + } + r.out.Write(chunk) + r.absPos += int64(nr) + } + if err == io.ErrUnexpectedEOF { + err = io.EOF + } + return err +} + +// passthrough copies a chunk from inner to out without rewriting. +func (r *tsRemuxReader) passthrough() error { + chunk := make([]byte, 64*1024) + nr, err := r.inner.Read(chunk) + if nr > 0 { + r.out.Write(chunk[:nr]) + r.absPos += int64(nr) + } + return err +} + +// detect reads up to two packets' worth from inner to determine the packet +// size, then frames from there. If the stream isn't recognisable TS, it sets +// disabled and emits whatever was probed as passthrough so no bytes are lost. +func (r *tsRemuxReader) detect() error { + // Read enough to cover two BDAV packets for a confident detection. + const probeLen = 2 * bdavPacketLen + buf := make([]byte, probeLen) + nr, err := io.ReadFull(r.inner, buf) + buf = buf[:nr] + r.probe = buf + if nr == 0 { + if err == io.ErrUnexpectedEOF { + err = io.EOF + } + return err + } + + ps := detectTSPacketSize(buf) + if ps == 0 { + // Not TS we understand — disable rewriting, stream raw. + r.disabled = true + r.out.Write(buf) + r.absPos += int64(nr) + r.probe = nil + return nil + } + r.packetSize = ps + + // Frame the probed bytes packet-by-packet (they begin at r.absPos, which + // is the reader's start — assumed packet-aligned for the head read; if it + // isn't, the leading mid-packet bytes are emitted raw by the generic path). + consumed := 0 + for consumed+ps <= len(buf) { + clip := r.clipFor(r.absPos) + pkt := buf[consumed : consumed+ps] + intoClip := r.absPos - clipStartOrZero(clip) + if clip != nil && intoClip%int64(ps) == 0 && r.absPos+int64(ps) <= clip.end+1 { + rewritePacket(pkt, ps, clip.delta) + } + r.out.Write(pkt) + r.absPos += int64(ps) + consumed += ps + } + // Any trailing partial-packet bytes from the probe: stash so the next + // fill() reads the rest of that packet and frames correctly. Simplest: + // emit them raw (they are at most ps-1 bytes; a real stream's next read + // continues the packet, but to keep framing simple at the probe seam we + // pass these through). For BDAV with a packet-aligned start this branch + // is never taken (probeLen is a multiple of 192). + if consumed < len(buf) { + r.out.Write(buf[consumed:]) + r.absPos += int64(len(buf) - consumed) + } + r.probe = nil + if err == io.ErrUnexpectedEOF { + err = nil // partial probe is fine; more may follow + } + return err +} + +func clipStartOrZero(c *clipSpan) int64 { + if c == nil { + return 0 + } + return c.start +} diff --git a/internal/nzbfilesystem/tsremux_reader_test.go b/internal/nzbfilesystem/tsremux_reader_test.go new file mode 100644 index 000000000..b2dd0717d --- /dev/null +++ b/internal/nzbfilesystem/tsremux_reader_test.go @@ -0,0 +1,175 @@ +package nzbfilesystem + +import ( + "bytes" + "io" + "testing" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// memReadCloser serves a byte slice as an io.ReadCloser. +type memReadCloser struct{ r *bytes.Reader } + +func newMem(b []byte) *memReadCloser { return &memReadCloser{r: bytes.NewReader(b)} } +func (m *memReadCloser) Read(p []byte) (int, error) { return m.r.Read(p) } +func (m *memReadCloser) Close() error { return nil } + +// buildTwoClipStream builds a raw byte concatenation of two clips of BDAV +// packets (each packet carrying a PTS), plus the clipSpans that lift them onto +// one continuous timeline keeping clip 0's native base. Returns the raw bytes, +// the spans, and the expected monotonic PTS sequence after rewrite. +func buildTwoClipStream(t *testing.T) (raw []byte, spans []clipSpan, wantPTS []int64) { + t.Helper() + const hz = 90000 + clip0Base := int64(11.65 * hz) + clip1Base := int64(0.5 * hz) + clip0Dur := int64(30 * hz) + + var buf bytes.Buffer + mk := func(base int64, n int) { + for i := range n { + buf.Write(setPTS(newBDAVPacket(0x100, true, 0x01), base+int64(i)*hz)) + } + } + mk(clip0Base, 4) // clip 0: 4 packets + clip0Len := int64(buf.Len()) + mk(clip1Base, 3) // clip 1: 3 packets + total := int64(buf.Len()) + + base0 := clip0Base + timelineStart1 := base0 + clip0Dur + spans = []clipSpan{ + {start: 0, end: clip0Len - 1, delta: base0 - clip0Base}, // 0 + {start: clip0Len, end: total - 1, delta: timelineStart1 - clip1Base}, // lift clip1 + } + + // Expected PTS after rewrite. + for i := range 4 { + wantPTS = append(wantPTS, clip0Base+int64(i)*hz) // delta 0 + } + for i := range 3 { + wantPTS = append(wantPTS, timelineStart1+int64(i)*hz) + } + return buf.Bytes(), spans, wantPTS +} + +// ptsAtPacket decodes the PTS from the n-th 192-byte BDAV packet in b. +func ptsAtPacket(b []byte, n int) int64 { + pkt := b[n*bdavPacketLen : (n+1)*bdavPacketLen] + // PES payload at TS offset 4 → BDAV offset 8; PTS at payload+9. + return readTS(pkt[8:][9:14]) +} + +func TestTSRemuxReader_FullReadMonotonic(t *testing.T) { + raw, spans, wantPTS := buildTwoClipStream(t) + + out, err := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + if err != nil { + t.Fatalf("ReadAll: %v", err) + } + if len(out) != len(raw) { + t.Fatalf("output length %d != input length %d (must be byte-preserving)", len(out), len(raw)) + } + npkt := len(out) / bdavPacketLen + var prev int64 = -1 + for i := range npkt { + got := ptsAtPacket(out, i) + if got != wantPTS[i] { + t.Errorf("packet %d PTS = %d, want %d", i, got, wantPTS[i]) + } + if got <= prev { + t.Errorf("PTS not monotonic at packet %d: %d <= %d", i, got, prev) + } + prev = got + } +} + +// TestTSRemuxReader_ChunkSizeInvariant: the rewritten output must be identical +// regardless of the Read buffer size the caller uses (streaming determinism). +func TestTSRemuxReader_ChunkSizeInvariant(t *testing.T) { + raw, spans, _ := buildTwoClipStream(t) + full, _ := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + + for _, chunk := range []int{1, 7, 100, 192, 193, 1000} { + r := newTSRemuxReader(newMem(raw), spans, 0) + var got bytes.Buffer + p := make([]byte, chunk) + for { + n, err := r.Read(p) + got.Write(p[:n]) + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("chunk %d: read error %v", chunk, err) + } + } + if !bytes.Equal(got.Bytes(), full) { + t.Errorf("chunk size %d produced different bytes than full read", chunk) + } + } +} + +// TestTSRemuxReader_RangeDeterminism is the critical property for HTTP range +// requests: a wrapper started at an arbitrary packet-aligned mid-stream offset +// must produce exactly the same bytes as the corresponding slice of the full +// rewrite. This guarantees seeks/range GETs see a consistent timeline. +func TestTSRemuxReader_RangeDeterminism(t *testing.T) { + raw, spans, _ := buildTwoClipStream(t) + full, _ := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + + for startPkt := 0; startPkt*bdavPacketLen < len(raw); startPkt++ { + startOff := int64(startPkt * bdavPacketLen) + // Underlying reader yields bytes from startOff onward. + r := newTSRemuxReader(newMem(raw[startOff:]), spans, startOff) + got, err := io.ReadAll(r) + if err != nil { + t.Fatalf("startOff %d: %v", startOff, err) + } + want := full[startOff:] + if !bytes.Equal(got, want) { + t.Errorf("startOff %d: range read differs from full-rewrite slice", startOff) + } + } +} + +// TestTSRemuxReader_NonTSPassthrough: a stream that isn't recognisable TS is +// passed through byte-for-byte (disabled mode), never corrupted. +func TestTSRemuxReader_NonTSPassthrough(t *testing.T) { + raw := bytes.Repeat([]byte{0x11, 0x22, 0x33, 0x44}, 500) // no 0x47 sync grid + spans := []clipSpan{{start: 0, end: int64(len(raw)) - 1, delta: 90000}} + out, err := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + if err != nil { + t.Fatalf("ReadAll: %v", err) + } + if !bytes.Equal(out, raw) { + t.Error("non-TS stream was modified; expected byte-for-byte passthrough") + } +} + +func TestBuildClipSpans(t *testing.T) { + // Empty → nil (remux disabled). + if buildClipSpans(nil) != nil { + t.Error("buildClipSpans(nil) should be nil") + } + // Prefix sums turn (byte_len, delta) into absolute [start,end] ranges. + spans := buildClipSpans([]*metapb.ClipBoundary{ + {ByteLen: 100, Delta_90K: 0}, + {ByteLen: 50, Delta_90K: 91000}, + {ByteLen: 200, Delta_90K: 272000}, + }) + want := []clipSpan{ + {start: 0, end: 99, delta: 0}, + {start: 100, end: 149, delta: 91000}, + {start: 150, end: 349, delta: 272000}, + } + if len(spans) != len(want) { + t.Fatalf("got %d spans, want %d", len(spans), len(want)) + } + for i := range want { + if spans[i] != want[i] { + t.Errorf("span %d = %+v, want %+v", i, spans[i], want[i]) + } + } +} diff --git a/internal/importer/archive/iso/tsremux_test.go b/internal/nzbfilesystem/tsremux_test.go similarity index 99% rename from internal/importer/archive/iso/tsremux_test.go rename to internal/nzbfilesystem/tsremux_test.go index 9c3caa885..74c0e85b9 100644 --- a/internal/importer/archive/iso/tsremux_test.go +++ b/internal/nzbfilesystem/tsremux_test.go @@ -1,4 +1,4 @@ -package iso +package nzbfilesystem import "testing" From 8e483bbc98e3dee4f2c83872c12155c5c67df04a Mon Sep 17 00:00:00 2001 From: javi11 Date: Sat, 30 May 2026 18:07:56 +0200 Subject: [PATCH 29/30] fix(nzbfs): frame TS packets from clip grid so seek/ephemeral reads remux correctly The streaming remux disabled itself on any unaligned start because it probed for the packet sync at byte 0/4 of the read offset. ffprobe seeks to a non-packet-aligned near-EOF offset to estimate duration, so the tail was served raw and the duration stayed wrong. Derive packet framing from the known clip byte grid (BDAV-192) instead of probing; pass leading mid-packet payload bytes through and rewrite full packets from the next boundary. Adds unaligned-start determinism coverage that reproduced the bug. --- internal/nzbfilesystem/tsremux.go | 34 ----- internal/nzbfilesystem/tsremux_reader.go | 122 +++++------------- internal/nzbfilesystem/tsremux_reader_test.go | 32 ++++- 3 files changed, 60 insertions(+), 128 deletions(-) diff --git a/internal/nzbfilesystem/tsremux.go b/internal/nzbfilesystem/tsremux.go index 11db981c4..a81550a61 100644 --- a/internal/nzbfilesystem/tsremux.go +++ b/internal/nzbfilesystem/tsremux.go @@ -29,40 +29,6 @@ const ( ptsModulus = int64(1) << 33 ) -// detectTSPacketSize inspects a buffer that begins at a packet boundary and -// returns 192 (BDAV, sync byte at offset 4), 188 (plain TS, sync at offset 0), -// or 0 when neither layout is recognised. Blu-ray .m2ts on disc is BDAV -// (192-byte source packets); plain 188 is handled for completeness/tests. -func detectTSPacketSize(buf []byte) int { - if len(buf) >= bdavPacketLen && buf[4] == tsSync { - // Confirm with a second packet when available to avoid a chance 0x47. - if len(buf) >= 2*bdavPacketLen { - if buf[4+bdavPacketLen] == tsSync { - return bdavPacketLen - } - } else { - return bdavPacketLen - } - } - if len(buf) >= tsPacketLen && buf[0] == tsSync { - if len(buf) >= 2*tsPacketLen { - if buf[tsPacketLen] == tsSync { - return tsPacketLen - } - } else { - return tsPacketLen - } - } - // Fall back to BDAV if only its sync matched on a short buffer. - if len(buf) >= bdavPacketLen && buf[4] == tsSync { - return bdavPacketLen - } - if len(buf) >= tsPacketLen && buf[0] == tsSync { - return tsPacketLen - } - return 0 -} - // addMod33 returns (v + delta) wrapped into the 33-bit timestamp space. // delta may be negative (when a clip's pts_base exceeds its timeline_start). func addMod33(v, delta int64) int64 { diff --git a/internal/nzbfilesystem/tsremux_reader.go b/internal/nzbfilesystem/tsremux_reader.go index 402c5feb8..4ac04ded5 100644 --- a/internal/nzbfilesystem/tsremux_reader.go +++ b/internal/nzbfilesystem/tsremux_reader.go @@ -51,13 +51,13 @@ func buildClipSpans(boundaries []*metapb.ClipBoundary) []clipSpan { // any, live in the packet header before startOff); every fully-streamed packet // is rewritten. type tsRemuxReader struct { - inner io.ReadCloser - spans []clipSpan - absPos int64 // absolute offset of the next byte to pull from inner - packetSize int // 192 (BDAV) or 188; 0 until detected - disabled bool // true if the stream isn't recognisable TS → pure passthrough - out bytes.Buffer // rewritten bytes ready to deliver - probe []byte // bytes read for packet-size detection, not yet framed + inner io.ReadCloser + spans []clipSpan + absPos int64 // absolute offset of the next byte to pull from inner + packetSize int // 192 (BDAV); fixed for BD main features + disabled bool // true if the stream isn't recognisable TS → pure passthrough + syncChecked bool // whether the first aligned packet's sync byte was validated + out bytes.Buffer // rewritten bytes ready to deliver } // newTSRemuxReader wraps inner. startOff is the absolute file offset of inner's @@ -106,24 +106,24 @@ func (r *tsRemuxReader) Read(p []byte) (int, error) { return r.out.Read(p) } -// fill pulls the next chunk from inner, rewrites it if it is a complete -// packet aligned within its clip, and appends it to out. Returns io.EOF when -// inner is exhausted. +// fill pulls the next chunk from inner, rewrites it if it is a complete packet +// aligned within its clip, and appends it to out. Returns io.EOF when inner is +// exhausted. +// +// Packet framing is derived from the CLIP grid (each clip's bytes start at +// clip.start and are a whole number of 192-byte BDAV source packets), NOT from +// probing the stream head. This is what makes the wrapper correct for reads +// that begin at an arbitrary (unaligned) offset — e.g. ffprobe seeking to +// near-EOF to estimate duration. A start that lands mid-packet emits the +// leading partial bytes raw, then frames full packets from the next boundary. func (r *tsRemuxReader) fill() error { - // Detect packet size once from the head of the stream. - if r.packetSize == 0 && !r.disabled { - if err := r.detect(); err != nil { - return err - } - if r.disabled { - // detect() already moved any probed bytes into out as passthrough. - return nil - } - } - if r.disabled { return r.passthrough() } + if r.packetSize == 0 { + // BD main features (the only files with a clip table) are BDAV-192. + r.packetSize = bdavPacketLen + } clip := r.clipFor(r.absPos) if clip == nil { @@ -148,7 +148,18 @@ func (r *tsRemuxReader) fill() error { chunk = chunk[:nr] if nr > 0 { if aligned && nr == r.packetSize { - rewritePacket(chunk, r.packetSize, clip.delta) + // Validate the first aligned packet looks like BDAV TS; if not, + // the stream isn't what we expect (wrong decryption, plain TS, + // non-media) so disable rewriting rather than corrupt bytes. + if !r.syncChecked { + r.syncChecked = true + if chunk[4] != tsSync { + r.disabled = true + } + } + if !r.disabled { + rewritePacket(chunk, r.packetSize, clip.delta) + } } r.out.Write(chunk) r.absPos += int64(nr) @@ -169,70 +180,3 @@ func (r *tsRemuxReader) passthrough() error { } return err } - -// detect reads up to two packets' worth from inner to determine the packet -// size, then frames from there. If the stream isn't recognisable TS, it sets -// disabled and emits whatever was probed as passthrough so no bytes are lost. -func (r *tsRemuxReader) detect() error { - // Read enough to cover two BDAV packets for a confident detection. - const probeLen = 2 * bdavPacketLen - buf := make([]byte, probeLen) - nr, err := io.ReadFull(r.inner, buf) - buf = buf[:nr] - r.probe = buf - if nr == 0 { - if err == io.ErrUnexpectedEOF { - err = io.EOF - } - return err - } - - ps := detectTSPacketSize(buf) - if ps == 0 { - // Not TS we understand — disable rewriting, stream raw. - r.disabled = true - r.out.Write(buf) - r.absPos += int64(nr) - r.probe = nil - return nil - } - r.packetSize = ps - - // Frame the probed bytes packet-by-packet (they begin at r.absPos, which - // is the reader's start — assumed packet-aligned for the head read; if it - // isn't, the leading mid-packet bytes are emitted raw by the generic path). - consumed := 0 - for consumed+ps <= len(buf) { - clip := r.clipFor(r.absPos) - pkt := buf[consumed : consumed+ps] - intoClip := r.absPos - clipStartOrZero(clip) - if clip != nil && intoClip%int64(ps) == 0 && r.absPos+int64(ps) <= clip.end+1 { - rewritePacket(pkt, ps, clip.delta) - } - r.out.Write(pkt) - r.absPos += int64(ps) - consumed += ps - } - // Any trailing partial-packet bytes from the probe: stash so the next - // fill() reads the rest of that packet and frames correctly. Simplest: - // emit them raw (they are at most ps-1 bytes; a real stream's next read - // continues the packet, but to keep framing simple at the probe seam we - // pass these through). For BDAV with a packet-aligned start this branch - // is never taken (probeLen is a multiple of 192). - if consumed < len(buf) { - r.out.Write(buf[consumed:]) - r.absPos += int64(len(buf) - consumed) - } - r.probe = nil - if err == io.ErrUnexpectedEOF { - err = nil // partial probe is fine; more may follow - } - return err -} - -func clipStartOrZero(c *clipSpan) int64 { - if c == nil { - return 0 - } - return c.start -} diff --git a/internal/nzbfilesystem/tsremux_reader_test.go b/internal/nzbfilesystem/tsremux_reader_test.go index b2dd0717d..97248f27a 100644 --- a/internal/nzbfilesystem/tsremux_reader_test.go +++ b/internal/nzbfilesystem/tsremux_reader_test.go @@ -119,17 +119,39 @@ func TestTSRemuxReader_RangeDeterminism(t *testing.T) { raw, spans, _ := buildTwoClipStream(t) full, _ := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + // Packet-aligned starts. for startPkt := 0; startPkt*bdavPacketLen < len(raw); startPkt++ { startOff := int64(startPkt * bdavPacketLen) - // Underlying reader yields bytes from startOff onward. r := newTSRemuxReader(newMem(raw[startOff:]), spans, startOff) got, err := io.ReadAll(r) if err != nil { - t.Fatalf("startOff %d: %v", startOff, err) + t.Fatalf("aligned startOff %d: %v", startOff, err) } - want := full[startOff:] - if !bytes.Equal(got, want) { - t.Errorf("startOff %d: range read differs from full-rewrite slice", startOff) + if want := full[startOff:]; !bytes.Equal(got, want) { + t.Errorf("aligned startOff %d: range read differs from full-rewrite slice", startOff) + } + } + + // UNALIGNED starts in packet payload — this is what ffprobe does when it + // seeks to near-EOF to estimate duration. The OLD code disabled rewriting + // on any unaligned start, leaving the tail (and thus the measured + // duration) wrong; this is the regression guard. The leading mid-packet + // bytes are payload (rewrite only touches header timestamp fields), so the + // output must still byte-match the full-rewrite slice. + for startPkt := 0; startPkt*bdavPacketLen < len(raw); startPkt++ { + for _, intoPkt := range []int64{100, 150, 188} { // all past the PTS field + startOff := int64(startPkt*bdavPacketLen) + intoPkt + if startOff >= int64(len(raw)) { + continue + } + r := newTSRemuxReader(newMem(raw[startOff:]), spans, startOff) + got, err := io.ReadAll(r) + if err != nil { + t.Fatalf("unaligned startOff %d: %v", startOff, err) + } + if want := full[startOff:]; !bytes.Equal(got, want) { + t.Errorf("unaligned startOff %d: range read differs from full-rewrite slice (tail left un-rewritten?)", startOff) + } } } } From 084dce2bda74f570bb7bc27e1a29e8ea3c74537a Mon Sep 17 00:00:00 2001 From: javi11 Date: Sat, 30 May 2026 18:40:16 +0200 Subject: [PATCH 30/30] feat(iso): report import progress during ISO analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ISO analysis (filesystem walk + Blu-ray playlist resolution over NNTP) can take tens of seconds, during which the queue item's progress bar sat frozen and—for RAR/7z-wrapped ISOs—mislabeled as "Analyzing archive". Thread a progress.Tracker end-to-end through the ISO analysis chain so the bar advances with an "Analyzing ISO" stage: - progress.Tracker gains a nil-safe Slice(idx,count) helper that carves a child tracker covering one Nth of the parent's range. - ExpandISOContents/AnalyzeISO/ResolveMainFeature accept a tracker; ResolveMainFeature reports per-playlist progress (each .mpls is an NNTP round-trip), ExpandISOContents gives each ISO its slice of the band. - Bare ISOs (processor.go) get a dedicated 10->30 tracker. - RAR/7z aggregators derive an "Analyzing ISO" tracker from the archive tracker via Slice(0,1).WithStage without mutating it; archives with no ISO emit no updates, so the common case is unchanged. --- internal/importer/archive/iso/bluray.go | 10 +- internal/importer/archive/iso/bluray_test.go | 81 ++++++++++++++-- internal/importer/archive/iso/processor.go | 4 +- .../importer/archive/iso/processor_test.go | 2 + internal/importer/archive/iso_expansion.go | 20 +++- internal/importer/archive/rar/aggregator.go | 14 ++- .../importer/archive/sevenzip/aggregator.go | 14 ++- internal/importer/iso_expand.go | 7 +- internal/importer/iso_expand_test.go | 56 ++++++++++- internal/importer/processor.go | 82 ++++++++++++++-- internal/progress/tracker.go | 18 ++++ internal/progress/tracker_test.go | 93 +++++++++++++++++++ 12 files changed, 371 insertions(+), 30 deletions(-) create mode 100644 internal/progress/tracker_test.go diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go index 1da605939..be0e79f40 100644 --- a/internal/importer/archive/iso/bluray.go +++ b/internal/importer/archive/iso/bluray.go @@ -6,6 +6,8 @@ import ( "log/slog" "sort" "strings" + + "github.com/javi11/altmount/internal/progress" ) // MainFeaturePlaylist is the result of analysing a Blu-ray's BDMV. @@ -37,7 +39,7 @@ type MainFeaturePlaylist struct { // Failures parsing individual playlists are non-fatal — we skip them and // keep evaluating the rest, mirroring how every Blu-ray player tolerates // malformed entries in BDMV/PLAYLIST/. -func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEntry) *MainFeaturePlaylist { +func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEntry, progressTracker *progress.Tracker) *MainFeaturePlaylist { // Build per-clip indexes. M2TS streams live at BDMV/STREAM/.M2TS // and carry the 2D version (or the only version on a 2D disc). SSIF // streams live at BDMV/STREAM/SSIF/.SSIF and carry the @@ -73,7 +75,11 @@ func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEn }) var best *MainFeaturePlaylist - for _, pe := range playlistEntries { + for idx, pe := range playlistEntries { + // Report progress per playlist examined. Reading and parsing each + // .mpls is an NNTP round-trip, so this is the granular signal that + // keeps the queue item's bar moving during BD analysis. nil-safe. + progressTracker.Update(idx+1, len(playlistEntries)) data, err := readISOFile(rs, pe) if err != nil { continue diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go index 788bfdedb..10ec2ccd3 100644 --- a/internal/importer/archive/iso/bluray_test.go +++ b/internal/importer/archive/iso/bluray_test.go @@ -6,8 +6,26 @@ import ( "fmt" "io" "testing" + + "github.com/javi11/altmount/internal/progress" ) +// recordingBroadcaster captures progress updates for assertions in tests. +type recordingBroadcaster struct { + percentages []int + stages []string +} + +func (rb *recordingBroadcaster) UpdateProgress(_ int, percentage int) { + rb.percentages = append(rb.percentages, percentage) + rb.stages = append(rb.stages, "") +} + +func (rb *recordingBroadcaster) UpdateProgressWithStage(_ int, percentage int, stage string) { + rb.percentages = append(rb.percentages, percentage) + rb.stages = append(rb.stages, stage) +} + // mkEntry builds a single-extent isoFileEntry — the common case for tests. func mkEntry(path string, lba uint32, size uint64) isoFileEntry { return isoFileEntry{ @@ -71,7 +89,7 @@ func TestResolveMainFeature(t *testing.T) { mkEntry("BDMV/STREAM/00010.M2TS", 500, 500_000), } - got := ResolveMainFeature(context.Background(), rs, files) + got := ResolveMainFeature(context.Background(), rs, files, nil) if got == nil { t.Fatal("ResolveMainFeature returned nil") } @@ -89,12 +107,57 @@ func TestResolveMainFeature(t *testing.T) { } }) + t.Run("reports progress per playlist", func(t *testing.T) { + t.Parallel() + short := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 45000}, + }, nil) + long := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 90 * 45000}, + {ClipName: "00002", InTime: 0, OutTime: 60 * 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{100: short, 110: long}) + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(short))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(long))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + mkEntry("BDMV/STREAM/00002.M2TS", 300, 2_000_000), + mkEntry("BDMV/STREAM/00010.M2TS", 500, 500_000), + } + + rb := &recordingBroadcaster{} + tracker := progress.NewTracker(rb, 7, 10, 30).WithStage("Analyzing ISO") + + if got := ResolveMainFeature(context.Background(), rs, files, tracker); got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + + // Two playlists → at least one update; every update must carry the + // stage, stay inside [10,30], and be non-decreasing. + if len(rb.percentages) == 0 { + t.Fatal("expected progress updates, got none") + } + prev := -1 + for i, p := range rb.percentages { + if rb.stages[i] != "Analyzing ISO" { + t.Errorf("update %d stage = %q, want %q", i, rb.stages[i], "Analyzing ISO") + } + if p < 10 || p > 30 { + t.Errorf("update %d percentage = %d, want within [10,30]", i, p) + } + if p < prev { + t.Errorf("update %d percentage = %d decreased from %d", i, p, prev) + } + prev = p + } + }) + t.Run("non-BDMV disc returns nil", func(t *testing.T) { t.Parallel() files := []isoFileEntry{ mkEntry("movie.mkv", 100, 1_000_000), } - if got := ResolveMainFeature(context.Background(), bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files); got != nil { + if got := ResolveMainFeature(context.Background(), bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files, nil); got != nil { t.Errorf("expected nil for non-BDMV disc, got %+v", got) } }) @@ -108,7 +171,7 @@ func TestResolveMainFeature(t *testing.T) { mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, 15), mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), } - if got := ResolveMainFeature(context.Background(), rs, files); got != nil { + if got := ResolveMainFeature(context.Background(), rs, files, nil); got != nil { t.Errorf("expected nil for unparseable MPLS, got %+v", got) } }) @@ -144,7 +207,7 @@ func TestResolveMainFeature(t *testing.T) { mkEntry("BDMV/STREAM/SSIF/00102.SSIF", 500, 5_000_000_000), } - got := ResolveMainFeature(context.Background(), rs, files) + got := ResolveMainFeature(context.Background(), rs, files, nil) if got == nil { t.Fatal("ResolveMainFeature returned nil — SSIF index missing?") } @@ -183,7 +246,7 @@ func TestResolveMainFeature(t *testing.T) { mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 40_000_000_000), } - got := ResolveMainFeature(context.Background(), rs, files) + got := ResolveMainFeature(context.Background(), rs, files, nil) if got == nil { t.Fatal("ResolveMainFeature returned nil") } @@ -208,7 +271,7 @@ func TestResolveMainFeature(t *testing.T) { mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(data))), mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), } - if got := ResolveMainFeature(context.Background(), rs, files); got != nil { + if got := ResolveMainFeature(context.Background(), rs, files, nil); got != nil { t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) } }) @@ -263,7 +326,7 @@ func TestResolveMainFeature(t *testing.T) { )) } - got := ResolveMainFeature(context.Background(), rs, files) + got := ResolveMainFeature(context.Background(), rs, files, nil) if got == nil { t.Fatal("ResolveMainFeature returned nil — feature playlist should have won") } @@ -302,7 +365,7 @@ func TestResolveMainFeature(t *testing.T) { mkEntry("BDMV/STREAM/00003.M2TS", 400, 300), } - got := ResolveMainFeature(context.Background(), rs, files) + got := ResolveMainFeature(context.Background(), rs, files, nil) if got == nil { t.Fatal("ResolveMainFeature returned nil") } @@ -360,7 +423,7 @@ func TestResolveMainFeature(t *testing.T) { mkEntry("BDMV/STREAM/00200.M2TS", 300, 100_000_000), // 100 MB — larger } - got := ResolveMainFeature(context.Background(), rs, files) + got := ResolveMainFeature(context.Background(), rs, files, nil) if got == nil { t.Fatal("ResolveMainFeature returned nil for a disc full of menus — should still pick one") } diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 358660f0a..bc54da47f 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -10,6 +10,7 @@ import ( metapb "github.com/javi11/altmount/internal/metadata/proto" "github.com/javi11/altmount/internal/pool" + "github.com/javi11/altmount/internal/progress" ) // AnalyzeISO inspects the given ISO source and returns: @@ -29,6 +30,7 @@ func AnalyzeISO( readTimeout time.Duration, analyzeTimeout time.Duration, allowedExtensions []string, + progressTracker *progress.Tracker, ) (*AnalyzedISO, error) { start := time.Now() // Hard cap the whole walk. A degraded NNTP provider can otherwise stall @@ -66,7 +68,7 @@ func AnalyzeISO( out.Files = append(out.Files, buildFileContent(src, e)) } - if mf := ResolveMainFeature(ctx, rs, entries); mf != nil { + if mf := ResolveMainFeature(ctx, rs, entries, progressTracker); mf != nil { out.DurationTicks = mf.DurationTicks for i, e := range mf.Streams { fc := buildFileContent(src, e) diff --git a/internal/importer/archive/iso/processor_test.go b/internal/importer/archive/iso/processor_test.go index 2c7c048c4..58bc48f63 100644 --- a/internal/importer/archive/iso/processor_test.go +++ b/internal/importer/archive/iso/processor_test.go @@ -30,6 +30,7 @@ func TestAnalyzeISO_HonorsTimeout(t *testing.T) { 0, 1*time.Nanosecond, // analyzeTimeout nil, + nil, // progressTracker ) elapsed := time.Since(start) @@ -65,6 +66,7 @@ func TestAnalyzeISO_HonorsTimeout_PreCanceled(t *testing.T) { 0, 0, // analyzeTimeout=0 → cap disabled, parent ctx still canceled nil, + nil, // progressTracker ) elapsed := time.Since(start) diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go index 153563afe..6b934ebb0 100644 --- a/internal/importer/archive/iso_expansion.go +++ b/internal/importer/archive/iso_expansion.go @@ -13,6 +13,7 @@ import ( "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/pool" + "github.com/javi11/altmount/internal/progress" ) // analyzedISO bundles an ISO Content with its inspection result and its @@ -47,6 +48,7 @@ func ExpandISOContents( readTimeout time.Duration, analyzeTimeout time.Duration, allowedExtensions []string, + progressTracker *progress.Tracker, ) ([]Content, error) { if !expand { return contents, nil @@ -58,6 +60,16 @@ func ExpandISOContents( groupKeys []string ) + // Count the ISO entries up front so each can be given an equal slice of + // the progress tracker's range; isoIdx walks the ISOs as we process them. + numISOs := 0 + for _, c := range contents { + if !c.IsDirectory && strings.ToLower(filepath.Ext(c.Filename)) == ".iso" { + numISOs++ + } + } + isoIdx := 0 + for _, c := range contents { if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { result = append(result, c) @@ -71,7 +83,13 @@ func ExpandISOContents( AesIV: c.AesIV, Size: c.Size, } - a, err := iso.AnalyzeISO(ctx, src, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedExtensions) + // Give this ISO its slice of the overall range so per-playlist + // updates inside AnalyzeISO stay within [isoIdx, isoIdx+1] of the + // band; bump the parent to the slice boundary once it completes so + // even non-BDMV ISOs (no playlist loop) advance the bar. + a, err := iso.AnalyzeISO(ctx, src, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedExtensions, progressTracker.Slice(isoIdx, numISOs)) + isoIdx++ + progressTracker.Update(isoIdx, numISOs) if err != nil { slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", "file", c.Filename, "error", err) diff --git a/internal/importer/archive/rar/aggregator.go b/internal/importer/archive/rar/aggregator.go index a2f8fd28b..64bbf759c 100644 --- a/internal/importer/archive/rar/aggregator.go +++ b/internal/importer/archive/rar/aggregator.go @@ -209,8 +209,18 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { rarContents = append(rarContents, r.contents...) } - // Expand ISO files found inside the RAR archive into their inner media files - rarContents, err := archive.ExpandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedFileExtensions) + // Expand ISO files found inside the RAR archive into their inner media + // files. ISO analysis (filesystem walk + Blu-ray playlist resolution over + // NNTP) can take tens of seconds, so it gets its own progress label. + // Slice(0,1) copies the archive tracker at the same range without mutating + // it (RAR header analysis above is already done); WithStage relabels the + // copy. For archives with no ISO, ExpandISOContents emits no updates, so + // the common case is unaffected. + var isoProgressTracker *progress.Tracker + if archiveProgressTracker != nil { + isoProgressTracker = archiveProgressTracker.Slice(0, 1).WithStage("Analyzing ISO") + } + rarContents, err := archive.ExpandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedFileExtensions, isoProgressTracker) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } diff --git a/internal/importer/archive/sevenzip/aggregator.go b/internal/importer/archive/sevenzip/aggregator.go index 1e01ffafa..01ae85258 100644 --- a/internal/importer/archive/sevenzip/aggregator.go +++ b/internal/importer/archive/sevenzip/aggregator.go @@ -185,8 +185,18 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { slog.InfoContext(ctx, "Successfully analyzed 7zip archive content", "files_in_archive", len(sevenZipContents)) - // Expand ISO files found inside the 7zip archive into their inner media files - sevenZipContents, err = archive.ExpandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedFileExtensions) + // Expand ISO files found inside the 7zip archive into their inner media + // files. ISO analysis (filesystem walk + Blu-ray playlist resolution over + // NNTP) can take tens of seconds, so it gets its own progress label. + // Slice(0,1) copies the archive tracker at the same range without mutating + // it (7z header analysis above is already done); WithStage relabels the + // copy. For archives with no ISO, ExpandISOContents emits no updates, so + // the common case is unaffected. + var isoProgressTracker *progress.Tracker + if archiveProgressTracker != nil { + isoProgressTracker = archiveProgressTracker.Slice(0, 1).WithStage("Analyzing ISO") + } + sevenZipContents, err = archive.ExpandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedFileExtensions, isoProgressTracker) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } diff --git a/internal/importer/iso_expand.go b/internal/importer/iso_expand.go index 597ed87ca..cb9e920e8 100644 --- a/internal/importer/iso_expand.go +++ b/internal/importer/iso_expand.go @@ -79,6 +79,8 @@ func expandBareISOFiles( regularFiles []parser.ParsedFile, virtualDir string, releaseName string, + sourceNzbPath string, + releaseDate int64, ) (written []string, remaining []parser.ParsedFile, err error) { isos, rest := partitionISOFiles(regularFiles) if len(isos) == 0 { @@ -104,10 +106,7 @@ func expandBareISOFiles( remaining = append(remaining, isos[i]) continue } - // Task 4 wiring will supply real sourceNzbPath/releaseDate values; - // for now plumb empty strings/zero — see archive.NewFileMetadataFromContent - // signature. - meta := archive.NewFileMetadataFromContent(c, "", 0, c.NzbdavID) + meta := archive.NewFileMetadataFromContent(c, sourceNzbPath, releaseDate, c.NzbdavID) virtualPath := path.Join(virtualDir, c.Filename) if err := deps.writeMetadata(virtualPath, meta); err != nil { return written, nil, fmt.Errorf("write metadata %q: %w", virtualPath, err) diff --git a/internal/importer/iso_expand_test.go b/internal/importer/iso_expand_test.go index b70364b2e..461cf5730 100644 --- a/internal/importer/iso_expand_test.go +++ b/internal/importer/iso_expand_test.go @@ -71,7 +71,7 @@ func TestExpandBareISOFiles_NoISOs_ReturnsInputUntouched(t *testing.T) { t.Fatal("expand should not be called when no .iso present") return nil, nil }, - }, files, "vdir", "movie") + }, files, "vdir", "movie", "", 0) if err != nil { t.Fatalf("err = %v", err) } @@ -116,7 +116,7 @@ func TestExpandBareISOFiles_OneISO_BluRayPath_WritesMergedMetadata(t *testing.T) enabled: true, } - written, rest, err := expandBareISOFiles(context.Background(), deps, files, "vdir", "movie") + written, rest, err := expandBareISOFiles(context.Background(), deps, files, "vdir", "movie", "", 0) if err != nil { t.Fatalf("err = %v", err) } @@ -147,7 +147,7 @@ func TestExpandBareISOFiles_Disabled_StillPeelsButFallsBack(t *testing.T) { return nil }, } - written, rest, err := expandBareISOFiles(context.Background(), deps, files, "vdir", "movie") + written, rest, err := expandBareISOFiles(context.Background(), deps, files, "vdir", "movie", "", 0) if err != nil { t.Fatalf("err = %v", err) } @@ -158,3 +158,53 @@ func TestExpandBareISOFiles_Disabled_StillPeelsButFallsBack(t *testing.T) { t.Errorf("rest = %+v, want the original .iso pushed back for normal dispatch", rest) } } + +// TestExpandBareISOFiles_PropagatesSourceNzbPathAndReleaseDate asserts the +// orchestrator threads sourceNzbPath and releaseDate through to the +// FileMetadata produced via archive.NewFileMetadataFromContent. Without +// this, downstream consumers (history, repair, etc.) lose the link back +// to the originating NZB post. +func TestExpandBareISOFiles_PropagatesSourceNzbPathAndReleaseDate(t *testing.T) { + files := []parser.ParsedFile{{Filename: "movie.iso", Size: 1000}} + + const wantSourceNzbPath = "/incoming/Movie.1080p.BluRay.nzb" + const wantReleaseDate int64 = 1_234_567_890 + + var capturedMeta *metapb.FileMetadata + deps := expandBareISODeps{ + enabled: true, + expand: func(ctx context.Context, _ bool, _ []archive.Content) ([]archive.Content, error) { + return []archive.Content{{ + Filename: "MOVIE.m2ts", + Size: 900, + NestedSources: []archive.NestedSource{ + {InnerOffset: 0, InnerLength: 900}, + }, + }}, nil + }, + writeMetadata: func(_ string, meta *metapb.FileMetadata) error { + capturedMeta = meta + return nil + }, + } + + written, _, err := expandBareISOFiles( + context.Background(), deps, files, "vdir", "movie", + wantSourceNzbPath, wantReleaseDate, + ) + if err != nil { + t.Fatalf("err = %v", err) + } + if len(written) != 1 { + t.Fatalf("written = %v, want 1 entry", written) + } + if capturedMeta == nil { + t.Fatal("writeMetadata was never invoked") + } + if capturedMeta.SourceNzbPath != wantSourceNzbPath { + t.Errorf("SourceNzbPath = %q, want %q", capturedMeta.SourceNzbPath, wantSourceNzbPath) + } + if capturedMeta.ReleaseDate != wantReleaseDate { + t.Errorf("ReleaseDate = %d, want %d", capturedMeta.ReleaseDate, wantReleaseDate) + } +} diff --git a/internal/importer/processor.go b/internal/importer/processor.go index 94e98b756..63945d08d 100644 --- a/internal/importer/processor.go +++ b/internal/importer/processor.go @@ -14,6 +14,7 @@ import ( "github.com/javi11/altmount/internal/config" "github.com/javi11/altmount/internal/database" + "github.com/javi11/altmount/internal/importer/archive" "github.com/javi11/altmount/internal/importer/archive/rar" "github.com/javi11/altmount/internal/importer/archive/sevenzip" "github.com/javi11/altmount/internal/importer/filesystem" @@ -22,6 +23,7 @@ import ( "github.com/javi11/altmount/internal/importer/singlefile" "github.com/javi11/altmount/internal/importer/utils/nzbtrim" "github.com/javi11/altmount/internal/metadata" + metapb "github.com/javi11/altmount/internal/metadata/proto" "github.com/javi11/altmount/internal/nzbfile" "github.com/javi11/altmount/internal/pool" "github.com/javi11/altmount/internal/progress" @@ -263,30 +265,98 @@ func (proc *Processor) ProcessNzbFile(ctx context.Context, filePath, relativePat // Step 5: Process based on file type var result string var writtenPaths []string + + // Bare-ISO Blu-ray expansion. ISOs posted directly to Usenet (without + // RAR/7z wrapping) are classified as NzbTypeSingleFile/NzbTypeMultiFile + // by the parser and would otherwise bypass archive.ExpandISOContents. + // Peel them out here, run the same expansion the RAR/7z aggregators run, + // persist each expanded virtual file, and feed the remainder back into + // normal dispatch. STRM imports skip this path: they have no NNTP + // segments and the pool guard above explicitly excludes them. + if parsed.Type != parser.NzbTypeStrm { + importCfg := cfg.Import + expandEnabled := true + if importCfg.ExpandBlurayIso != nil { + expandEnabled = *importCfg.ExpandBlurayIso + } + isoMaxPrefetch := importCfg.MaxDownloadPrefetch + isoReadTimeout := time.Duration(importCfg.ReadTimeoutSeconds) * time.Second + if isoReadTimeout == 0 { + isoReadTimeout = 5 * time.Minute + } + + var isoReleaseDate int64 + if len(regularFiles) > 0 { + isoReleaseDate = regularFiles[0].ReleaseDate.Unix() + } + + // Progress tracker for the bare-ISO analysis phase. It fills the band + // between "Identifying files" (10%) and "Validating segments" (30%), + // which would otherwise sit frozen while the ISO filesystem walk and + // Blu-ray playlist resolution run over NNTP. Gated on subscribers to + // avoid overhead when nobody is watching (mirrors the RAR/7z path). + var isoTracker *progress.Tracker + if proc.broadcaster != nil && proc.broadcaster.HasSubscribers() { + isoTracker = proc.broadcaster.CreateTracker(queueID, 10, 30).WithStage("Analyzing ISO") + } + + isoWritten, expandedRegularFiles, isoErr := expandBareISOFiles(ctx, expandBareISODeps{ + enabled: expandEnabled, + expand: func(ctx context.Context, enabled bool, contents []archive.Content) ([]archive.Content, error) { + return archive.ExpandISOContents(ctx, enabled, contents, + proc.poolManager, isoMaxPrefetch, isoReadTimeout, cfg.GetIsoAnalyzeTimeout(), allowedExtensions, isoTracker) + }, + writeMetadata: func(virtualPath string, meta *metapb.FileMetadata) error { + return proc.metadataService.WriteFileMetadata(virtualPath, meta) + }, + }, regularFiles, virtualDir, proc.getCleanNzbName(parsed.Path, queueID), parsed.Path, isoReleaseDate) + if isoErr != nil { + return "", writtenPaths, NewNonRetryableError("bare-ISO expansion failed", isoErr) + } + writtenPaths = append(writtenPaths, isoWritten...) + regularFiles = expandedRegularFiles + + // If bare-ISO expansion consumed every regular file and there are no + // archive files, dispatch has nothing left to do. Return the first + // expanded virtual path so callers get a meaningful result; the + // "no files" error path lives in processSingleFile and would otherwise + // trigger spuriously. + if len(regularFiles) == 0 && len(archiveFiles) == 0 && len(isoWritten) > 0 { + proc.updateProgress(queueID, 100) + return isoWritten[0], writtenPaths, nil + } + } + + // dispatchPaths holds whatever the per-type handlers wrote so we can + // merge it with any ISO-derived paths accumulated above. Handlers + // already return their full set of written paths (including "DIR:" + // prefixed cleanup markers) so we just concatenate. + var dispatchPaths []string switch parsed.Type { case parser.NzbTypeSingleFile: proc.updateProgressWithStage(queueID, 30, "Validating segments") - result, writtenPaths, err = proc.processSingleFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) + result, dispatchPaths, err = proc.processSingleFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) case parser.NzbTypeMultiFile: proc.updateProgressWithStage(queueID, 30, "Validating segments") - result, writtenPaths, err = proc.processMultiFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) + result, dispatchPaths, err = proc.processMultiFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) case parser.NzbTypeRarArchive: proc.updateProgressWithStage(queueID, 15, "Analyzing archive") - result, writtenPaths, err = proc.processRarArchive(ctx, virtualDir, regularFiles, archiveFiles, parsed, queueID, maxConnections, allowedExtensions, proc.validationTimeout, parsed.ExtractedFiles, category, metadata, downloadID) + result, dispatchPaths, err = proc.processRarArchive(ctx, virtualDir, regularFiles, archiveFiles, parsed, queueID, maxConnections, allowedExtensions, proc.validationTimeout, parsed.ExtractedFiles, category, metadata, downloadID) case parser.NzbType7zArchive: proc.updateProgressWithStage(queueID, 15, "Analyzing archive") - result, writtenPaths, err = proc.processSevenZipArchive(ctx, virtualDir, regularFiles, archiveFiles, parsed, queueID, maxConnections, allowedExtensions, proc.validationTimeout, parsed.ExtractedFiles, category, metadata, downloadID) + result, dispatchPaths, err = proc.processSevenZipArchive(ctx, virtualDir, regularFiles, archiveFiles, parsed, queueID, maxConnections, allowedExtensions, proc.validationTimeout, parsed.ExtractedFiles, category, metadata, downloadID) case parser.NzbTypeStrm: proc.updateProgressWithStage(queueID, 30, "Validating segments") - result, writtenPaths, err = proc.processSingleFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) + result, dispatchPaths, err = proc.processSingleFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) default: - return "", nil, NewNonRetryableError(fmt.Sprintf("unknown file type: %s", parsed.Type), nil) + return "", writtenPaths, NewNonRetryableError(fmt.Sprintf("unknown file type: %s", parsed.Type), nil) } + writtenPaths = append(writtenPaths, dispatchPaths...) // Update progress: complete if err == nil { diff --git a/internal/progress/tracker.go b/internal/progress/tracker.go index c845fe2d4..38c4e459f 100644 --- a/internal/progress/tracker.go +++ b/internal/progress/tracker.go @@ -40,6 +40,24 @@ func (pt *Tracker) WithStage(stage string) *Tracker { return pt } +// Slice returns a child tracker covering segment idx of count equal slices of +// this tracker's [min,max] range. Useful for dividing a range across a known +// number of sequential sub-operations (e.g. one slice per ISO across a +// multi-disc group). Safe on a nil receiver (returns nil). +func (pt *Tracker) Slice(idx, count int) *Tracker { + if pt == nil || count <= 0 { + return nil + } + span := pt.maxPercent - pt.minPercent + return &Tracker{ + queueID: pt.queueID, + broadcaster: pt.broadcaster, + minPercent: pt.minPercent + idx*span/count, + maxPercent: pt.minPercent + (idx+1)*span/count, + stage: pt.stage, + } +} + // Update reports progress within the configured percentage range. // Safe to call on a nil receiver (no-op). func (pt *Tracker) Update(current, total int) { diff --git a/internal/progress/tracker_test.go b/internal/progress/tracker_test.go new file mode 100644 index 000000000..1f0865aad --- /dev/null +++ b/internal/progress/tracker_test.go @@ -0,0 +1,93 @@ +package progress + +import "testing" + +// recordingBroadcaster captures every progress update for assertions. +type recordingBroadcaster struct { + updates []recordedUpdate +} + +type recordedUpdate struct { + queueID int + percentage int + stage string +} + +func (rb *recordingBroadcaster) UpdateProgress(queueID, percentage int) { + rb.updates = append(rb.updates, recordedUpdate{queueID: queueID, percentage: percentage}) +} + +func (rb *recordingBroadcaster) UpdateProgressWithStage(queueID, percentage int, stage string) { + rb.updates = append(rb.updates, recordedUpdate{queueID: queueID, percentage: percentage, stage: stage}) +} + +func TestTrackerSlice(t *testing.T) { + t.Parallel() + + rb := &recordingBroadcaster{} + base := NewTracker(rb, 7, 10, 30).WithStage("Analyzing ISO") + + tests := []struct { + name string + idx, count int + wantMin, wantMax int + wantNil bool + }{ + {name: "first of two", idx: 0, count: 2, wantMin: 10, wantMax: 20}, + {name: "second of two", idx: 1, count: 2, wantMin: 20, wantMax: 30}, + {name: "single slice is full range", idx: 0, count: 1, wantMin: 10, wantMax: 30}, + {name: "zero count is nil", idx: 0, count: 0, wantNil: true}, + {name: "negative count is nil", idx: 0, count: -3, wantNil: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := base.Slice(tt.idx, tt.count) + if tt.wantNil { + if got != nil { + t.Fatalf("Slice(%d,%d) = %+v, want nil", tt.idx, tt.count, got) + } + return + } + if got == nil { + t.Fatalf("Slice(%d,%d) = nil, want non-nil", tt.idx, tt.count) + } + if got.minPercent != tt.wantMin || got.maxPercent != tt.wantMax { + t.Errorf("Slice(%d,%d) range = [%d,%d], want [%d,%d]", + tt.idx, tt.count, got.minPercent, got.maxPercent, tt.wantMin, tt.wantMax) + } + // Child inherits queueID, broadcaster, and stage. + if got.queueID != base.queueID || got.broadcaster != base.broadcaster || got.stage != base.stage { + t.Errorf("Slice did not inherit parent identity/stage: %+v", got) + } + }) + } +} + +func TestTrackerSliceNilReceiver(t *testing.T) { + t.Parallel() + + var nilTracker *Tracker + if got := nilTracker.Slice(0, 2); got != nil { + t.Fatalf("nil receiver Slice = %+v, want nil", got) + } + // And Update on the nil result must be a safe no-op. + nilTracker.Slice(0, 2).Update(1, 2) +} + +func TestTrackerSliceUpdateMapsIntoSubRange(t *testing.T) { + t.Parallel() + + rb := &recordingBroadcaster{} + base := NewTracker(rb, 7, 10, 30).WithStage("Analyzing ISO") + + // Second of two ISOs spans [20,30]; a half-complete update lands at 25. + base.Slice(1, 2).Update(1, 2) + + if len(rb.updates) != 1 { + t.Fatalf("got %d updates, want 1", len(rb.updates)) + } + u := rb.updates[0] + if u.queueID != 7 || u.percentage != 25 || u.stage != "Analyzing ISO" { + t.Fatalf("update = %+v, want {7 25 Analyzing ISO}", u) + } +}