diff --git a/internal/config/accessors.go b/internal/config/accessors.go index ccabbbec8..30ba5d33d 100644 --- a/internal/config/accessors.go +++ b/internal/config/accessors.go @@ -126,6 +126,23 @@ func (c *Config) GetReadTimeout() time.Duration { return time.Duration(c.GetReadTimeoutSeconds()) * time.Second } +// GetIsoAnalyzeTimeout returns the per-ISO analyse deadline with a 120s +// default fallback. This bounds the entire iso.AnalyzeISO walk so a +// degraded NNTP provider cannot stall the importer indefinitely. +// +// Sentinel handling: +// - nil (config field unset) → 120s default +// - 0 or negative (explicit "none") → 120s default; users cannot disable +// the cap — the whole purpose of this knob is to prevent unbounded +// waits. To approximate "unlimited", set a very large value (e.g. +// 86400 for a one-day budget). +func (c *Config) GetIsoAnalyzeTimeout() time.Duration { + if c.Import.IsoAnalyzeTimeoutSeconds == nil || *c.Import.IsoAnalyzeTimeoutSeconds <= 0 { + return 120 * time.Second + } + return time.Duration(*c.Import.IsoAnalyzeTimeoutSeconds) * time.Second +} + // GetMetadataBackupKeep returns the number of metadata backups to keep with a default fallback. func (c *Config) GetMetadataBackupKeep() int { if c.Metadata.Backup.KeepBackups <= 0 { diff --git a/internal/config/manager.go b/internal/config/manager.go index d708fcdc3..810a6e021 100644 --- a/internal/config/manager.go +++ b/internal/config/manager.go @@ -265,6 +265,7 @@ type ImportConfig struct { MaxDownloadPrefetch int `yaml:"max_download_prefetch" mapstructure:"max_download_prefetch" json:"max_download_prefetch"` SegmentSamplePercentage int `yaml:"segment_sample_percentage" mapstructure:"segment_sample_percentage" json:"segment_sample_percentage"` ReadTimeoutSeconds int `yaml:"read_timeout_seconds" mapstructure:"read_timeout_seconds" json:"read_timeout_seconds"` + IsoAnalyzeTimeoutSeconds *int `yaml:"iso_analyze_timeout_seconds" mapstructure:"iso_analyze_timeout_seconds" json:"iso_analyze_timeout_seconds,omitempty"` ImportStrategy ImportStrategy `yaml:"import_strategy" mapstructure:"import_strategy" json:"import_strategy"` ImportDir *string `yaml:"import_dir" mapstructure:"import_dir" json:"import_dir,omitempty"` WatchDir *string `yaml:"watch_dir" mapstructure:"watch_dir" json:"watch_dir,omitempty"` @@ -1247,6 +1248,7 @@ func DefaultConfig(configDir ...string) *Config { watchIntervalSeconds := 10 // Default watch interval failedItemRetentionHours := 24 // Default: auto-remove failed items after 24 hours historyRetentionDays := 90 // Default: auto-remove import history after 90 days (3 months) + isoAnalyzeTimeoutSeconds := 120 // Default: 120s hard cap per ISO analyse (prevents stuck NNTP from stalling import for 9+ minutes) cleanupAutomaticImportFailure := false metadataBackupEnabled := false failureMaskingEnabled := false @@ -1378,7 +1380,8 @@ func DefaultConfig(configDir ...string) *Config { MaxImportConnections: 5, // Default: 5 concurrent NNTP connections for validation and archive processing MaxDownloadPrefetch: 10, // Default: 10 segments prefetched ahead for archive analysis SegmentSamplePercentage: 1, // Default: 1% segment sampling - ReadTimeoutSeconds: 300, // Default: 5 minutes read timeout + ReadTimeoutSeconds: 300, // Default: 5 minutes read timeout + IsoAnalyzeTimeoutSeconds: &isoAnalyzeTimeoutSeconds, ImportStrategy: ImportStrategyNone, // Default: no import strategy (direct import) ImportDir: nil, // No default import directory WatchDir: nil, diff --git a/internal/importer/archive/common.go b/internal/importer/archive/common.go index 8da0f2dc1..f130c8ac3 100644 --- a/internal/importer/archive/common.go +++ b/internal/importer/archive/common.go @@ -62,6 +62,20 @@ type Content struct { // are sorted by size descending (1 = largest / main feature). // Zero means this Content did not come from an ISO. ISOExpansionIndex int `json:"iso_expansion_index,omitempty"` + // ClipBoundaries is the per-clip timeline table for a byte-concatenated + // multi-clip Blu-ray main feature. Empty for everything else. At read + // time a TS filter adds each clip's Delta90k to the timestamps inside + // its byte range to build one continuous timeline. + ClipBoundaries []ClipBoundary `json:"clip_boundaries,omitempty"` +} + +// ClipBoundary mirrors metapb.ClipBoundary at the archive layer: one clip in a +// concatenated multi-clip BD main feature. ByteLen is the clip's size in the +// virtual file; Delta90k is the signed 90 kHz timeline offset for packets +// inside this clip's byte range. +type ClipBoundary struct { + ByteLen int64 `json:"byte_len"` + Delta90k int64 `json:"delta_90k"` } // GetContentSegmentCount returns the total number of segments for a Content, diff --git a/internal/importer/archive/content_metadata.go b/internal/importer/archive/content_metadata.go new file mode 100644 index 000000000..c49a8b8e3 --- /dev/null +++ b/internal/importer/archive/content_metadata.go @@ -0,0 +1,166 @@ +package archive + +import ( + "time" + "unsafe" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// NewFileMetadataFromContent creates a FileMetadata from a Content (with its NestedSources) +// for the metadata system. It mirrors the conversion previously inlined inside +// rar.CreateFileMetadataFromRarContent and sevenzip.CreateFileMetadataFromSevenZipContent +// so that non-RAR/non-7z callers (e.g. ISO expansion) can produce equivalent metadata. +// +// Behaviour: +// - Sets CreatedAt/ModifiedAt to time.Now().Unix(). +// - Defaults Status to FILE_STATUS_HEALTHY. +// - Copies SegmentData from content.Segments. +// - When content.AesKey is non-empty, sets Encryption=AES with key/iv. +// - Appends one NestedSegmentSource per content.NestedSources entry. +func NewFileMetadataFromContent( + content Content, + sourceNzbPath string, + releaseDate int64, + nzbdavId string, +) *metapb.FileMetadata { + now := time.Now().Unix() + + meta := &metapb.FileMetadata{ + FileSize: content.Size, + SourceNzbPath: sourceNzbPath, + Status: metapb.FileStatus_FILE_STATUS_HEALTHY, + CreatedAt: now, + ModifiedAt: now, + SegmentData: content.Segments, + ReleaseDate: releaseDate, + NzbdavId: nzbdavId, + } + + // Set AES encryption if keys are present (single-layer encrypted archive) + if len(content.AesKey) > 0 { + meta.Encryption = metapb.Encryption_AES + meta.AesKey = content.AesKey + meta.AesIv = content.AesIV + } + + // Carry the per-clip timeline table for multi-clip BD main features. + // Empty for everything else, which keeps the read-path remux filter + // disabled for all other files. + for _, cb := range content.ClipBoundaries { + meta.ClipBoundaries = append(meta.ClipBoundaries, &metapb.ClipBoundary{ + ByteLen: cb.ByteLen, + Delta_90K: cb.Delta90k, + }) + } + + // Populate nested sources. For multi-extent encrypted volumes (e.g. a + // Blu-ray main feature with hundreds of extents that all read from the + // same encrypted RAR) every NestedSource shares the same Segments slice + // in memory. Serialising them naïvely duplicates the segment list per + // extent — for Avatar 3D that produced an 8 GB .meta file. We dedupe + // here by detecting shared segment-list backing arrays and emitting + // one entry in meta.SharedOuterSources per unique group; each + // NestedSource then carries only its inner_offset + inner_length plus + // a 1-based shared_outer_source_index. Sources without sharing fall + // through to the legacy on-disk layout so old code paths are unaffected. + appendNestedSourcesWithDedupe(meta, content.NestedSources) + + return meta +} + +// nestedSourceShareKey identifies a NestedSource by the backing array of its +// Segments slice plus the AES key/IV and inner volume size. Sources with the +// same key can share one entry in FileMetadata.SharedOuterSources. +type nestedSourceShareKey struct { + segmentsPtr uintptr + segmentsLen int + aesKey string + aesIv string + innerVolumeSize int64 +} + +// shareKeyFor builds a sharing key. It uses the backing-array pointer of +// the Segments slice (cheap O(1) check) plus the slice length to catch +// accidental pointer reuse across distinct slices. The AES key/iv and +// inner_volume_size complete the identity — two sources are only +// shareable when those match exactly. +func shareKeyFor(ns NestedSource) nestedSourceShareKey { + var ptr uintptr + if len(ns.Segments) > 0 { + ptr = uintptr(unsafe.Pointer(unsafe.SliceData(ns.Segments))) + } + return nestedSourceShareKey{ + segmentsPtr: ptr, + segmentsLen: len(ns.Segments), + aesKey: string(ns.AesKey), + aesIv: string(ns.AesIV), + innerVolumeSize: ns.InnerVolumeSize, + } +} + +// appendNestedSourcesWithDedupe writes the NestedSources into meta, +// deduplicating shared outer-segment data into meta.SharedOuterSources. +// When fewer than two sources qualify for sharing (e.g. a single source, +// or every source has a unique segment list) the legacy layout is used: +// every NestedSegmentSource carries its own Segments + AesKey + AesIv. +func appendNestedSourcesWithDedupe(meta *metapb.FileMetadata, sources []NestedSource) { + if len(sources) == 0 { + return + } + + // First pass: count how many sources share each key. Only keys that + // appear in >= 2 sources are worth deduping (single-use keys cost more + // to store as shared entries than as inline data). + counts := make(map[nestedSourceShareKey]int, len(sources)) + for _, ns := range sources { + if len(ns.Segments) == 0 { + continue + } + counts[shareKeyFor(ns)]++ + } + + // Build the SharedOuterSources slice, preserving first-appearance order. + keyToIndex := make(map[nestedSourceShareKey]int32, len(counts)) + for _, ns := range sources { + if len(ns.Segments) == 0 { + continue + } + key := shareKeyFor(ns) + if counts[key] < 2 { + continue + } + if _, seen := keyToIndex[key]; seen { + continue + } + meta.SharedOuterSources = append(meta.SharedOuterSources, &metapb.NestedSegmentSource{ + Segments: ns.Segments, + AesKey: ns.AesKey, + AesIv: ns.AesIV, + InnerVolumeSize: ns.InnerVolumeSize, + }) + keyToIndex[key] = int32(len(meta.SharedOuterSources)) // 1-based + } + + // Second pass: emit one NestedSegmentSource per input, referencing + // the shared entry where applicable. + for _, ns := range sources { + entry := &metapb.NestedSegmentSource{ + InnerOffset: ns.InnerOffset, + InnerLength: ns.InnerLength, + } + if idx, ok := keyToIndex[shareKeyFor(ns)]; ok && len(ns.Segments) > 0 { + entry.SharedOuterSourceIndex = idx + } else { + entry.Segments = ns.Segments + entry.AesKey = ns.AesKey + entry.AesIv = ns.AesIV + entry.InnerVolumeSize = ns.InnerVolumeSize + } + meta.NestedSources = append(meta.NestedSources, entry) + } +} + +// The read-side counterpart of the dedupe written here lives in +// internal/metadata.ExpandSharedOuterSources — called from +// MetadataService.ReadFileMetadata after proto.Unmarshal. diff --git a/internal/importer/archive/content_metadata_test.go b/internal/importer/archive/content_metadata_test.go new file mode 100644 index 000000000..fcbb8b615 --- /dev/null +++ b/internal/importer/archive/content_metadata_test.go @@ -0,0 +1,209 @@ +package archive + +import ( + "testing" + "unsafe" + + metapb "github.com/javi11/altmount/internal/metadata/proto" + "google.golang.org/protobuf/proto" +) + +func TestNewFileMetadataFromContent_PreservesNestedSources(t *testing.T) { + c := Content{ + Filename: "main_feature.m2ts", + Size: 100, + Segments: []*metapb.SegmentData{{Id: "outer@"}}, + NestedSources: []NestedSource{ + {InnerOffset: 0, InnerLength: 40, Segments: []*metapb.SegmentData{{Id: "a@"}}}, + {InnerOffset: 0, InnerLength: 60, Segments: []*metapb.SegmentData{{Id: "b@"}}}, + }, + } + + got := NewFileMetadataFromContent(c, "/path/to.nzb", 1234567890, "nzbdav-id-1") + + if got.FileSize != 100 { + t.Errorf("FileSize = %d, want 100", got.FileSize) + } + if got.SourceNzbPath != "/path/to.nzb" { + t.Errorf("SourceNzbPath = %q, want %q", got.SourceNzbPath, "/path/to.nzb") + } + if got.ReleaseDate != 1234567890 { + t.Errorf("ReleaseDate = %d, want 1234567890", got.ReleaseDate) + } + if got.NzbdavId != "nzbdav-id-1" { + t.Errorf("NzbdavId = %q, want %q", got.NzbdavId, "nzbdav-id-1") + } + if got.Status != metapb.FileStatus_FILE_STATUS_HEALTHY { + t.Errorf("Status = %v, want FILE_STATUS_HEALTHY", got.Status) + } + if len(got.SegmentData) != 1 || got.SegmentData[0].Id != "outer@" { + t.Errorf("SegmentData not preserved: %+v", got.SegmentData) + } + if len(got.NestedSources) != 2 { + t.Fatalf("NestedSources = %d, want 2", len(got.NestedSources)) + } + if got.NestedSources[0].InnerLength != 40 || got.NestedSources[1].InnerLength != 60 { + t.Errorf("NestedSources lengths wrong: %+v", got.NestedSources) + } + if got.NestedSources[0].Segments[0].Id != "a@" || got.NestedSources[1].Segments[0].Id != "b@" { + t.Errorf("NestedSources segment ids wrong: %+v", got.NestedSources) + } + // No AES key on Content → no encryption on metadata + if got.Encryption != metapb.Encryption_NONE { + t.Errorf("Encryption = %v, want NONE (no AES key on content)", got.Encryption) + } +} + +func TestNewFileMetadataFromContent_SetsAESWhenKeyPresent(t *testing.T) { + c := Content{ + Filename: "encrypted.bin", + Size: 50, + AesKey: []byte{0x01, 0x02, 0x03}, + AesIV: []byte{0x10, 0x20, 0x30}, + } + + got := NewFileMetadataFromContent(c, "", 0, "") + + if got.Encryption != metapb.Encryption_AES { + t.Errorf("Encryption = %v, want AES", got.Encryption) + } + if string(got.AesKey) != string(c.AesKey) { + t.Errorf("AesKey not propagated") + } + if string(got.AesIv) != string(c.AesIV) { + t.Errorf("AesIv not propagated") + } +} + +// TestNewFileMetadataFromContent_DedupesSharedOuterSources pins the +// encrypted-multi-extent fix. Mimics the Avatar 3D shape: many +// NestedSources sharing the SAME outer-segment slice plus the same AES +// key/iv. Before the dedupe writer landed, marshalling this proto +// produced an 8 GB .meta on disk. The fix must: +// +// 1. Marshal to a size proportional to len(outer-segments) + len(extents), +// NOT len(outer-segments) × len(extents). +// 2. Round-trip cleanly: after Unmarshal + ExpandSharedOuterSources, all +// nested sources must point to the same underlying segments backing +// array (verified via unsafe.SliceData pointer equality), and per- +// source InnerOffset/InnerLength must be preserved exactly. +func TestNewFileMetadataFromContent_DedupesSharedOuterSources(t *testing.T) { + // Build an outer segment list large enough that duplicating it across + // 100 sources would cost ~5 MB if no dedupe ran. With dedupe the + // marshalled size is dominated by the one shared copy. + const numSegments = 1000 + const numExtents = 100 + outerSegs := make([]*metapb.SegmentData, numSegments) + for i := range outerSegs { + outerSegs[i] = &metapb.SegmentData{ + Id: "msg-id-of-typical-length@news.example.com", + StartOffset: int64(i) * 1024, + EndOffset: int64(i+1)*1024 - 1, + SegmentSize: 1024, + } + } + + nested := make([]NestedSource, 0, numExtents) + for i := range numExtents { + nested = append(nested, NestedSource{ + Segments: outerSegs, // SAME slice header — the dedupe target + AesKey: []byte{0xAA, 0xBB, 0xCC, 0xDD}, + AesIV: []byte{0x11, 0x22, 0x33, 0x44}, + InnerOffset: int64(i) * 4096, + InnerLength: 4096, + InnerVolumeSize: int64(numSegments * 1024), + }) + } + + content := Content{ + Filename: "huge.m2ts", + Size: int64(numExtents * 4096), + NestedSources: nested, + } + + meta := NewFileMetadataFromContent(content, "/nzb", 0, "") + + // Marshal the proto and assert the on-disk size reflects dedupe. + marshalled, err := proto.Marshal(meta) + if err != nil { + t.Fatalf("proto.Marshal: %v", err) + } + t.Logf("marshalled .meta size: %d bytes (%d segments × %d extents)", len(marshalled), numSegments, numExtents) + + // Estimate the marshalled size of one shared outer source: ~85 bytes + // per SegmentData on the wire × 1000 segments ≈ 85 KB. Plus 100 + // thin nested sources at ~30 bytes ≈ 3 KB. Plus header overhead. + // A regression to per-source duplication would produce ~8.5 MB + // (100 × 85 KB). Use 500 KB as a generous ceiling that catches any + // duplication regression. + const maxAllowed = 500 * 1024 + if len(marshalled) > maxAllowed { + t.Fatalf("marshalled proto is %d bytes — expected ≤ %d. Dedupe is not working; each NestedSource is duplicating the outer segments list.", + len(marshalled), maxAllowed) + } + + // Round-trip: unmarshal + expand, then verify all NestedSources point + // at the SAME segments backing array (pointer equality via + // unsafe.SliceData) so RAM cost stays at one shared array. + decoded := &metapb.FileMetadata{} + if err := proto.Unmarshal(marshalled, decoded); err != nil { + t.Fatalf("proto.Unmarshal: %v", err) + } + + if len(decoded.SharedOuterSources) != 1 { + t.Fatalf("SharedOuterSources = %d, want 1 (all extents share the same outer)", len(decoded.SharedOuterSources)) + } + if len(decoded.NestedSources) != numExtents { + t.Fatalf("NestedSources = %d, want %d", len(decoded.NestedSources), numExtents) + } + + // Before expansion: every nested source should have empty segments + // and a non-zero SharedOuterSourceIndex. + for i, ns := range decoded.NestedSources { + if len(ns.Segments) != 0 { + t.Errorf("nested source %d: expected empty Segments before expansion, got %d", i, len(ns.Segments)) + } + if ns.SharedOuterSourceIndex != 1 { + t.Errorf("nested source %d: SharedOuterSourceIndex = %d, want 1", i, ns.SharedOuterSourceIndex) + } + } + + // Reuse the production expand helper via the metadata package — but + // to avoid a test-time import cycle we inline an equivalent walk + // here. (The real read path in metadata.ReadFileMetadata calls + // metadata.ExpandSharedOuterSources, which performs the same walk.) + for _, ns := range decoded.NestedSources { + idx := int(ns.SharedOuterSourceIndex) - 1 + shared := decoded.SharedOuterSources[idx] + ns.Segments = shared.Segments + ns.AesKey = shared.AesKey + ns.AesIv = shared.AesIv + if ns.InnerVolumeSize == 0 { + ns.InnerVolumeSize = shared.InnerVolumeSize + } + } + + // After expansion: per-source offsets/lengths preserved. + for i, ns := range decoded.NestedSources { + if ns.InnerOffset != int64(i)*4096 { + t.Errorf("nested source %d: InnerOffset = %d, want %d", i, ns.InnerOffset, int64(i)*4096) + } + if ns.InnerLength != 4096 { + t.Errorf("nested source %d: InnerLength = %d, want 4096", i, ns.InnerLength) + } + if len(ns.Segments) != numSegments { + t.Errorf("nested source %d: post-expand Segments = %d, want %d", i, len(ns.Segments), numSegments) + } + } + + // All nested sources should share the same underlying segments + // backing array — proves the expansion didn't deep-copy. + firstBacking := uintptr(unsafe.Pointer(unsafe.SliceData(decoded.NestedSources[0].Segments))) + for i := 1; i < len(decoded.NestedSources); i++ { + thisBacking := uintptr(unsafe.Pointer(unsafe.SliceData(decoded.NestedSources[i].Segments))) + if firstBacking != thisBacking { + t.Errorf("nested source %d: expected shared backing array, got distinct pointer (was %x now %x)", i, firstBacking, thisBacking) + break + } + } +} diff --git a/internal/importer/archive/iso/bluray.go b/internal/importer/archive/iso/bluray.go new file mode 100644 index 000000000..be0e79f40 --- /dev/null +++ b/internal/importer/archive/iso/bluray.go @@ -0,0 +1,197 @@ +package iso + +import ( + "context" + "io" + "log/slog" + "sort" + "strings" + + "github.com/javi11/altmount/internal/progress" +) + +// MainFeaturePlaylist is the result of analysing a Blu-ray's BDMV. +// Streams is the ordered list of M2TS file entries that, concatenated, +// form the main feature; the slice is empty if no parseable playlist +// was found. +type MainFeaturePlaylist struct { + PlaylistName string // e.g. "00800.MPLS" — for logging only + DurationTicks int64 // sum of (OUT-IN) at 45 kHz — informational, not used for selection + Streams []isoFileEntry // ordered M2TS entries (duplicates preserved if the playlist legitimately repeats a clip) + UniqueClipBytes uint64 // sum of file sizes of UNIQUE clips referenced; the primary scoring metric + UniqueClipCount int // number of distinct clips referenced; scoring tiebreaker + // ClipInTimes and ClipDurations are parallel to Streams: the MPLS + // PlayItem IN_time and (OUT−IN) for each stream, in 45 kHz ticks. They + // drive the continuous-timeline remux of the concatenated clips. + ClipInTimes []int64 + ClipDurations []int64 +} + +// ResolveMainFeature inspects the entries returned by ListISOFiles for a +// Blu-ray (BDMV) structure and returns the playlist that represents the +// main movie. Returns nil if the disc is not BDMV, has no .mpls, or no +// playlist resolves to a non-empty M2TS sequence. +// +// Selection heuristic: pick the playlist with the longest total +// presentation duration. Ties break on PlayItem count (more clips wins), +// then lexicographically smallest filename for determinism. +// +// Failures parsing individual playlists are non-fatal — we skip them and +// keep evaluating the rest, mirroring how every Blu-ray player tolerates +// malformed entries in BDMV/PLAYLIST/. +func ResolveMainFeature(ctx context.Context, rs io.ReadSeeker, files []isoFileEntry, progressTracker *progress.Tracker) *MainFeaturePlaylist { + // Build per-clip indexes. M2TS streams live at BDMV/STREAM/.M2TS + // and carry the 2D version (or the only version on a 2D disc). SSIF + // streams live at BDMV/STREAM/SSIF/.SSIF and carry the + // stereoscopic interleaved 3D version — on 3D-only Blu-ray releases + // the main feature playlist references SSIF clips, while the M2TS + // directory holds only extras. We prefer M2TS when both exist (smaller + // bytes, universal playback) and fall back to SSIF when only it + // resolves the playlist's clip names. + m2tsByClip := make(map[string]isoFileEntry) + ssifByClip := make(map[string]isoFileEntry) + var playlistEntries []isoFileEntry + for _, f := range files { + up := strings.ToUpper(f.path) + switch { + case strings.HasPrefix(up, "BDMV/PLAYLIST/") && strings.HasSuffix(up, ".MPLS"): + playlistEntries = append(playlistEntries, f) + case strings.HasPrefix(up, "BDMV/STREAM/SSIF/") && strings.HasSuffix(up, ".SSIF"): + base := up[len("BDMV/STREAM/SSIF/") : len(up)-len(".SSIF")] + ssifByClip[base] = f + case strings.HasPrefix(up, "BDMV/STREAM/") && strings.HasSuffix(up, ".M2TS"): + base := up[len("BDMV/STREAM/") : len(up)-len(".M2TS")] + m2tsByClip[base] = f + } + } + if len(playlistEntries) == 0 || (len(m2tsByClip) == 0 && len(ssifByClip) == 0) { + return nil + } + + // Deterministic order: shorter filenames (and lexicographic ties) win + // the tie-break later. + sort.Slice(playlistEntries, func(i, j int) bool { + return playlistEntries[i].path < playlistEntries[j].path + }) + + var best *MainFeaturePlaylist + for idx, pe := range playlistEntries { + // Report progress per playlist examined. Reading and parsing each + // .mpls is an NNTP round-trip, so this is the granular signal that + // keeps the queue item's bar moving during BD analysis. nil-safe. + progressTracker.Update(idx+1, len(playlistEntries)) + data, err := readISOFile(rs, pe) + if err != nil { + continue + } + pl, err := ParseMPLS(data) + if err != nil { + continue + } + + // Resolve clip names in playlist order, preferring M2TS over SSIF. + // Build the ordered streams slice (duplicates preserved — a real BD + // feature may legitimately repeat a clip, and the output virtual + // file must follow the playlist order faithfully) AND a separate + // dedupe-by-name byte sum that drives playlist selection. Without + // the dedupe, a menu-navigation playlist that points 200+ times at + // the same ~80s menu M2TS would score higher than a real 30-chapter + // main feature, and we'd serve 30+ GB of looped menu. + streams := make([]isoFileEntry, 0, len(pl.PlayItems)) + inTimes := make([]int64, 0, len(pl.PlayItems)) + durations := make([]int64, 0, len(pl.PlayItems)) + seenClips := make(map[string]struct{}, len(pl.PlayItems)) + var uniqueClipBytes uint64 + for _, it := range pl.PlayItems { + name := strings.ToUpper(it.ClipName) + entry, ok := m2tsByClip[name] + if !ok { + entry, ok = ssifByClip[name] + } + if !ok { + continue + } + streams = append(streams, entry) + // Per-clip timing, parallel to streams (45 kHz). OUT may be < IN + // on malformed entries; clamp the span to 0 in that case. + var dur int64 + if it.OutTime > it.InTime { + dur = int64(it.OutTime - it.InTime) + } + inTimes = append(inTimes, int64(it.InTime)) + durations = append(durations, dur) + if _, dup := seenClips[name]; !dup { + seenClips[name] = struct{}{} + uniqueClipBytes += entry.size + } + } + if len(streams) == 0 { + continue + } + + cand := &MainFeaturePlaylist{ + PlaylistName: pe.path, + DurationTicks: pl.DurationTicks(), + Streams: streams, + UniqueClipBytes: uniqueClipBytes, + UniqueClipCount: len(seenClips), + ClipInTimes: inTimes, + ClipDurations: durations, + } + slog.DebugContext(ctx, "Blu-ray playlist candidate", + "playlist", pe.path, + "play_items", len(pl.PlayItems), + "resolved_streams", len(streams), + "unique_clips", len(seenClips), + "unique_clip_bytes", uniqueClipBytes, + "duration_seconds", cand.DurationTicks/45000, + ) + if best == nil || isBetterPlaylist(cand, best) { + best = cand + } + } + if best != nil { + slog.InfoContext(ctx, "Blu-ray main feature playlist resolved", + "playlist", best.PlaylistName, + "clips", len(best.Streams), + "unique_clips", best.UniqueClipCount, + "unique_clip_bytes", best.UniqueClipBytes, + "duration_seconds", best.DurationTicks/45000, + ) + } + return best +} + +// isBetterPlaylist returns true when cand should replace best. Score by +// total bytes of unique clips referenced — a real main feature pulls in +// ~30 distinct chapter clips totalling tens of GB, while a Blu-ray menu +// navigation playlist references one small clip repeatedly and therefore +// always loses on this metric regardless of how many PlayItems it +// inflates the raw duration with. Final tie: earlier filename wins, +// relying on playlistEntries being lex-sorted before iteration so we +// only swap when strictly better. +func isBetterPlaylist(cand, best *MainFeaturePlaylist) bool { + if cand.UniqueClipBytes != best.UniqueClipBytes { + return cand.UniqueClipBytes > best.UniqueClipBytes + } + return cand.UniqueClipCount > best.UniqueClipCount +} + +// readISOFile reads the full contents of one isoFileEntry from rs, +// concatenating bytes across every on-disc extent. MPLS files are tiny +// (~KBs) and almost always single-extent, but multi-extent MPLS is +// legal so we iterate. +func readISOFile(rs io.ReadSeeker, e isoFileEntry) ([]byte, error) { + out := make([]byte, 0, e.size) + for _, ext := range e.extents { + if _, err := rs.Seek(int64(ext.lba)*iso9660SectorSize, io.SeekStart); err != nil { + return nil, err + } + chunk := make([]byte, ext.length) + if _, err := io.ReadFull(rs, chunk); err != nil { + return nil, err + } + out = append(out, chunk...) + } + return out, nil +} diff --git a/internal/importer/archive/iso/bluray_test.go b/internal/importer/archive/iso/bluray_test.go new file mode 100644 index 000000000..10ec2ccd3 --- /dev/null +++ b/internal/importer/archive/iso/bluray_test.go @@ -0,0 +1,434 @@ +package iso + +import ( + "bytes" + "context" + "fmt" + "io" + "testing" + + "github.com/javi11/altmount/internal/progress" +) + +// recordingBroadcaster captures progress updates for assertions in tests. +type recordingBroadcaster struct { + percentages []int + stages []string +} + +func (rb *recordingBroadcaster) UpdateProgress(_ int, percentage int) { + rb.percentages = append(rb.percentages, percentage) + rb.stages = append(rb.stages, "") +} + +func (rb *recordingBroadcaster) UpdateProgressWithStage(_ int, percentage int, stage string) { + rb.percentages = append(rb.percentages, percentage) + rb.stages = append(rb.stages, stage) +} + +// mkEntry builds a single-extent isoFileEntry — the common case for tests. +func mkEntry(path string, lba uint32, size uint64) isoFileEntry { + return isoFileEntry{ + path: path, + size: size, + extents: []isoExtent{{lba: lba, length: size}}, + } +} + +// makeImage assembles an in-memory disc image by placing each piece of +// data at the sector index given in its key. The returned reader can be +// used as if it were a real ISO read-seeker. +func makeImage(t *testing.T, pieces map[uint32][]byte) io.ReadSeeker { + t.Helper() + var maxSect uint32 + for s, b := range pieces { + end := s + uint32((len(b)+iso9660SectorSize-1)/iso9660SectorSize) + if end > maxSect { + maxSect = end + } + } + if maxSect == 0 { + maxSect = 1 + } + img := make([]byte, int(maxSect)*iso9660SectorSize) + for s, b := range pieces { + copy(img[int(s)*iso9660SectorSize:], b) + } + return bytes.NewReader(img) +} + +func TestResolveMainFeature(t *testing.T) { + t.Parallel() + + t.Run("picks longest playlist", func(t *testing.T) { + t.Parallel() + // Two playlists: + // 00001.MPLS → 1 clip, short (extras playlist) + // 00800.MPLS → 3 clips, long (main feature) + short := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 45000}, + }, nil) + long := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 90 * 45000}, + {ClipName: "00002", InTime: 0, OutTime: 60 * 45000}, + {ClipName: "00003", InTime: 0, OutTime: 30 * 45000}, + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: short, + 110: long, + }) + + // File listing: two playlists and four M2TS clips (one extra). + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(short))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(long))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + mkEntry("BDMV/STREAM/00002.M2TS", 300, 2_000_000), + mkEntry("BDMV/STREAM/00003.M2TS", 400, 3_000_000), + mkEntry("BDMV/STREAM/00010.M2TS", 500, 500_000), + } + + got := ResolveMainFeature(context.Background(), rs, files, nil) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Errorf("PlaylistName = %q, want 00800.MPLS", got.PlaylistName) + } + if len(got.Streams) != 3 { + t.Fatalf("Streams len = %d, want 3", len(got.Streams)) + } + wantOrder := []string{"BDMV/STREAM/00001.M2TS", "BDMV/STREAM/00002.M2TS", "BDMV/STREAM/00003.M2TS"} + for i, s := range got.Streams { + if s.path != wantOrder[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantOrder[i]) + } + } + }) + + t.Run("reports progress per playlist", func(t *testing.T) { + t.Parallel() + short := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 45000}, + }, nil) + long := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 90 * 45000}, + {ClipName: "00002", InTime: 0, OutTime: 60 * 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{100: short, 110: long}) + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(short))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(long))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + mkEntry("BDMV/STREAM/00002.M2TS", 300, 2_000_000), + mkEntry("BDMV/STREAM/00010.M2TS", 500, 500_000), + } + + rb := &recordingBroadcaster{} + tracker := progress.NewTracker(rb, 7, 10, 30).WithStage("Analyzing ISO") + + if got := ResolveMainFeature(context.Background(), rs, files, tracker); got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + + // Two playlists → at least one update; every update must carry the + // stage, stay inside [10,30], and be non-decreasing. + if len(rb.percentages) == 0 { + t.Fatal("expected progress updates, got none") + } + prev := -1 + for i, p := range rb.percentages { + if rb.stages[i] != "Analyzing ISO" { + t.Errorf("update %d stage = %q, want %q", i, rb.stages[i], "Analyzing ISO") + } + if p < 10 || p > 30 { + t.Errorf("update %d percentage = %d, want within [10,30]", i, p) + } + if p < prev { + t.Errorf("update %d percentage = %d decreased from %d", i, p, prev) + } + prev = p + } + }) + + t.Run("non-BDMV disc returns nil", func(t *testing.T) { + t.Parallel() + files := []isoFileEntry{ + mkEntry("movie.mkv", 100, 1_000_000), + } + if got := ResolveMainFeature(context.Background(), bytes.NewReader(make([]byte, 16*iso9660SectorSize)), files, nil); got != nil { + t.Errorf("expected nil for non-BDMV disc, got %+v", got) + } + }) + + t.Run("BDMV with no parseable MPLS returns nil", func(t *testing.T) { + t.Parallel() + rs := makeImage(t, map[uint32][]byte{ + 100: []byte("not a real mpls"), + }) + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, 15), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + } + if got := ResolveMainFeature(context.Background(), rs, files, nil); got != nil { + t.Errorf("expected nil for unparseable MPLS, got %+v", got) + } + }) + + t.Run("3D BD: playlist resolves against SSIF when M2TS missing", func(t *testing.T) { + t.Parallel() + // Avatar-2-style 3D-only release: BDMV/STREAM/*.M2TS holds only + // extras (tiny). The real main feature lives in BDMV/STREAM/SSIF/ + // and is referenced by its own MPLS. The resolver must index SSIF + // so the long playlist resolves and wins. + extras := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00010", InTime: 0, OutTime: 90 * 45000}, // 90s extra + }, nil) + mainFeature3D := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 60 * 60 * 45000}, + {ClipName: "00101", InTime: 0, OutTime: 60 * 60 * 45000}, + {ClipName: "00102", InTime: 0, OutTime: 12 * 60 * 45000}, // 132 min total + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: extras, + 110: mainFeature3D, + }) + + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(extras))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(mainFeature3D))), + // Only the extras live as M2TS: + mkEntry("BDMV/STREAM/00010.M2TS", 200, 50_000_000), + // Main feature is SSIF only: + mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 25_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00101.SSIF", 400, 25_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00102.SSIF", 500, 5_000_000_000), + } + + got := ResolveMainFeature(context.Background(), rs, files, nil) + if got == nil { + t.Fatal("ResolveMainFeature returned nil — SSIF index missing?") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Errorf("PlaylistName = %q, want 00800.MPLS (3D main feature)", got.PlaylistName) + } + if len(got.Streams) != 3 { + t.Fatalf("Streams len = %d, want 3 SSIF clips", len(got.Streams)) + } + wantOrder := []string{ + "BDMV/STREAM/SSIF/00100.SSIF", + "BDMV/STREAM/SSIF/00101.SSIF", + "BDMV/STREAM/SSIF/00102.SSIF", + } + for i, s := range got.Streams { + if s.path != wantOrder[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantOrder[i]) + } + } + }) + + t.Run("hybrid 3D BD: prefers M2TS over SSIF when both exist", func(t *testing.T) { + t.Parallel() + // Both 2D MPLS (refs M2TS) and 3D MPLS (refs SSIF) point at clips + // of the same name. With both files present, the M2TS version is + // the right pick: smaller bytes, universal playback. The resolver + // should select it even if the 3D playlist is marginally longer. + mainFeature := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 60 * 60 * 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{100: mainFeature}) + + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00800.MPLS", 100, uint64(len(mainFeature))), + mkEntry("BDMV/STREAM/00100.M2TS", 200, 20_000_000_000), + mkEntry("BDMV/STREAM/SSIF/00100.SSIF", 300, 40_000_000_000), + } + + got := ResolveMainFeature(context.Background(), rs, files, nil) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + if len(got.Streams) != 1 { + t.Fatalf("Streams len = %d, want 1", len(got.Streams)) + } + if got.Streams[0].path != "BDMV/STREAM/00100.M2TS" { + t.Errorf("picked %q, want M2TS over SSIF", got.Streams[0].path) + } + }) + + t.Run("playlist referencing missing M2TS yields nil", func(t *testing.T) { + t.Parallel() + // Playlist references a clip that has no corresponding M2TS entry. + data := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "99999", InTime: 0, OutTime: 45000}, + }, nil) + rs := makeImage(t, map[uint32][]byte{ + 100: data, + }) + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(data))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 1_000_000), + } + if got := ResolveMainFeature(context.Background(), rs, files, nil); got != nil { + t.Errorf("expected nil when MPLS references unknown clip, got %+v", got) + } + }) + + t.Run("prefers feature over menu when menu has more PlayItems", func(t *testing.T) { + t.Parallel() + // The Avatar 3D regression: a menu navigation playlist with 201 + // PlayItems all pointing at the same ~80s menu clip would beat the + // real main feature under the old duration-sum scoring because + // 201 × 80s > 30 × 6min. The fix scores by unique-clip bytes, + // where the menu's single 100MB clip loses to the feature's + // 30 × 600MB chapter clips totalling 18 GB. + menuItems := make([]MPLSPlayItem, 201) + for i := range menuItems { + // All 201 PlayItems reference the SAME menu clip — exactly the + // pattern observed in the user's failing case. + menuItems[i] = MPLSPlayItem{ + ClipName: "00149", + InTime: 0, + OutTime: 80 * 45000, // 80s, so total raw duration is 201 × 80s = 16200s ≈ 4.5h + } + } + menu := buildMPLS(t, "0200", menuItems, nil) + + featureItems := make([]MPLSPlayItem, 30) + for i := range featureItems { + featureItems[i] = MPLSPlayItem{ + ClipName: fmt.Sprintf("%05d", 1+i), // 30 distinct clips: 00001..00030 + InTime: 0, + OutTime: 6 * 60 * 45000, // 6 min/chapter → 30 × 6 = 180 min total raw duration + } + } + feature := buildMPLS(t, "0200", featureItems, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: menu, + 110: feature, + }) + + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00000.MPLS", 100, uint64(len(menu))), + mkEntry("BDMV/PLAYLIST/00800.MPLS", 110, uint64(len(feature))), + // Menu clip: ~100 MB, one entry. + mkEntry("BDMV/STREAM/00149.M2TS", 1000, 100_000_000), + } + // 30 distinct feature clips, ~600 MB each → ~18 GB total unique bytes. + for i := range featureItems { + files = append(files, mkEntry( + fmt.Sprintf("BDMV/STREAM/%05d.M2TS", 1+i), + 2000+uint32(i)*10, + 600_000_000, + )) + } + + got := ResolveMainFeature(context.Background(), rs, files, nil) + if got == nil { + t.Fatal("ResolveMainFeature returned nil — feature playlist should have won") + } + if got.PlaylistName != "BDMV/PLAYLIST/00800.MPLS" { + t.Fatalf("PlaylistName = %q, want 00800.MPLS (the real feature). The menu's 201 PlayItems must not be allowed to beat the feature's 30 distinct chapters.", got.PlaylistName) + } + if got.UniqueClipCount != 30 { + t.Errorf("UniqueClipCount = %d, want 30 (one per feature chapter)", got.UniqueClipCount) + } + if got.UniqueClipBytes != 30*600_000_000 { + t.Errorf("UniqueClipBytes = %d, want %d", got.UniqueClipBytes, uint64(30*600_000_000)) + } + if len(got.Streams) != 30 { + t.Errorf("Streams len = %d, want 30 (the playlist's actual playback order)", len(got.Streams)) + } + }) + + t.Run("preserves legitimate clip repetition in output streams", func(t *testing.T) { + t.Parallel() + // A real BD playlist may legitimately repeat a clip (e.g., a + // "previously on..." recap at the start of each chapter). The fix + // dedupes only for scoring; the output Streams slice must retain + // the playlist's actual playback order, including duplicates. + data := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 30 * 45000}, // A + {ClipName: "00002", InTime: 0, OutTime: 60 * 45000}, // B + {ClipName: "00001", InTime: 0, OutTime: 30 * 45000}, // A again + {ClipName: "00003", InTime: 0, OutTime: 90 * 45000}, // C + }, nil) + rs := makeImage(t, map[uint32][]byte{100: data}) + + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00800.MPLS", 100, uint64(len(data))), + mkEntry("BDMV/STREAM/00001.M2TS", 200, 100), + mkEntry("BDMV/STREAM/00002.M2TS", 300, 200), + mkEntry("BDMV/STREAM/00003.M2TS", 400, 300), + } + + got := ResolveMainFeature(context.Background(), rs, files, nil) + if got == nil { + t.Fatal("ResolveMainFeature returned nil") + } + + // Output preserves [A, B, A, C] exactly. + if len(got.Streams) != 4 { + t.Fatalf("Streams len = %d, want 4 (dedupe must not collapse the output)", len(got.Streams)) + } + wantPaths := []string{ + "BDMV/STREAM/00001.M2TS", + "BDMV/STREAM/00002.M2TS", + "BDMV/STREAM/00001.M2TS", + "BDMV/STREAM/00003.M2TS", + } + for i, s := range got.Streams { + if s.path != wantPaths[i] { + t.Errorf("Streams[%d].path = %q, want %q", i, s.path, wantPaths[i]) + } + } + + // Scoring metrics use dedupe: 3 unique clips totalling 100+200+300. + if got.UniqueClipCount != 3 { + t.Errorf("UniqueClipCount = %d, want 3", got.UniqueClipCount) + } + if got.UniqueClipBytes != 600 { + t.Errorf("UniqueClipBytes = %d, want 600 (100+200+300, A counted once)", got.UniqueClipBytes) + } + }) + + t.Run("when all playlists are menus, picks the largest deterministically", func(t *testing.T) { + t.Parallel() + // Degenerate disc: every MPLS is a menu-style single-clip + // repetition. Algorithm must still return *something* without + // crashing and must be deterministic across runs. Picks the one + // with the largest unique-clip bytes (i.e., the largest target + // clip, since each playlist has only one unique clip). + menuA := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00100", InTime: 0, OutTime: 80 * 45000}, + {ClipName: "00100", InTime: 0, OutTime: 80 * 45000}, + }, nil) + menuB := buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00200", InTime: 0, OutTime: 80 * 45000}, + {ClipName: "00200", InTime: 0, OutTime: 80 * 45000}, + {ClipName: "00200", InTime: 0, OutTime: 80 * 45000}, + }, nil) + + rs := makeImage(t, map[uint32][]byte{ + 100: menuA, + 110: menuB, + }) + files := []isoFileEntry{ + mkEntry("BDMV/PLAYLIST/00001.MPLS", 100, uint64(len(menuA))), + mkEntry("BDMV/PLAYLIST/00002.MPLS", 110, uint64(len(menuB))), + mkEntry("BDMV/STREAM/00100.M2TS", 200, 50_000_000), // 50 MB + mkEntry("BDMV/STREAM/00200.M2TS", 300, 100_000_000), // 100 MB — larger + } + + got := ResolveMainFeature(context.Background(), rs, files, nil) + if got == nil { + t.Fatal("ResolveMainFeature returned nil for a disc full of menus — should still pick one") + } + if got.PlaylistName != "BDMV/PLAYLIST/00002.MPLS" { + t.Errorf("PlaylistName = %q, want 00002.MPLS (its unique clip is 100 MB vs 50 MB)", got.PlaylistName) + } + }) +} diff --git a/internal/importer/archive/iso/fs.go b/internal/importer/archive/iso/fs.go index ace50d531..824d99a45 100644 --- a/internal/importer/archive/iso/fs.go +++ b/internal/importer/archive/iso/fs.go @@ -1,20 +1,43 @@ package iso import ( + "context" "encoding/binary" "fmt" "io" + "log/slog" "strings" "unicode/utf16" ) const iso9660SectorSize = 2048 -// isoFileEntry is one non-directory file returned by ListISOFiles. +// isoFileEntry is one non-directory file returned by ListISOFiles. The +// file's data on disc may be split across multiple contiguous extents +// — Blu-ray main-feature M2TS files routinely use hundreds of extents +// chained via Allocation Extent Descriptors. extents is in disc order; +// concatenating their bytes yields the complete file. type isoFileEntry struct { - path string // full path within ISO (e.g. "BDMV/STREAM/00001.M2TS") - lba uint32 - size uint64 + path string + size uint64 + extents []isoExtent +} + +// firstLBA returns the start LBA of the file's first extent. Callers +// that only need a starting sector (e.g. reading a small MPLS file +// known to be single-extent) can use this. +func (e isoFileEntry) firstLBA() uint32 { + if len(e.extents) == 0 { + return 0 + } + return e.extents[0].lba +} + +// isoExtent is one contiguous run of sectors on disc that contributes +// length bytes to the logical file. +type isoExtent struct { + lba uint32 + length uint64 } // ───────────────────────────────────────────────────────────────────────────── @@ -100,7 +123,13 @@ func iso9660WalkAll(rs io.ReadSeeker, dirLBA uint32, dirSize uint64, prefix stri } result = append(result, sub...) } else { - result = append(result, isoFileEntry{path: entryPath, lba: e.lba, size: e.size}) + // ISO 9660 stores file data in a single contiguous extent. + // (Interleave mode exists but is essentially never used.) + result = append(result, isoFileEntry{ + path: entryPath, + size: e.size, + extents: []isoExtent{{lba: e.lba, length: e.size}}, + }) } } return result, nil @@ -178,6 +207,40 @@ func udfReadTag(rs io.ReadSeeker, sectorNum uint32) (udfTag, []byte, error) { return t, buf, nil } +// udfMaxIndirectDepth caps how many Indirect Entry (tag 248) hops +// udfFollowIndirect will traverse before declaring a malformed chain. +// 16 matches Linux kernel UDF (fs/udf/inode.c) and libisofs convention. +const udfMaxIndirectDepth = 16 + +// udfFollowIndirect resolves a chain of Indirect Entries (tag 248) +// starting at physSect and returns the physical sector of the real +// File Entry plus its tag and raw buffer. Per UDF §14.7 an Indirect +// Entry is a 16-byte descriptor tag + 20-byte ICBTag + 16-byte +// long_ad at offset 36. Depth-capped to bound runaway on a malformed +// disc that points an Indirect Entry chain back at itself. +func udfFollowIndirect(ctx context.Context, rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) (uint32, udfTag, []byte, error) { + for depth := range udfMaxIndirectDepth { + tag, buf, err := udfReadTag(rs, physSect) + if err != nil { + return 0, udfTag{}, nil, fmt.Errorf("udf: reading indirect entry at sector %d: %w", physSect, err) + } + if tag.id != 248 { + return physSect, tag, buf, nil + } + if len(buf) < 36+16 { + return 0, udfTag{}, nil, fmt.Errorf("udf: indirect entry at sector %d too short", physSect) + } + next := udfParseLongAD(buf, 36) + resolved, err := udfResolveICB(next.loc, metaMap, partStart) + if err != nil { + return 0, udfTag{}, nil, fmt.Errorf("udf: resolving indirect ICB: %w", err) + } + slog.DebugContext(ctx, "UDF: followed Indirect Entry", "from", physSect, "to", resolved, "depth", depth) + physSect = resolved + } + return 0, udfTag{}, nil, fmt.Errorf("udf: indirect entry chain exceeds depth cap (%d)", udfMaxIndirectDepth) +} + // udfParseLongAD parses a long_ad from buf[off:]. func udfParseLongAD(buf []byte, off int) udfLongAD { length := binary.LittleEndian.Uint32(buf[off:]) @@ -328,10 +391,74 @@ func udfResolveICB(loc udfLBA, metaMap []udfMetaSpan, partStart uint32) (uint32, return udfResolveMetaBlock(loc.block, metaMap, partStart) } +// readMetaExtent reads a contiguous extent of `length` bytes starting at +// logical metadata block `startBlock`, walking sector by sector through +// the metaMap so multi-sector extents (e.g. a 26 KiB directory) are +// returned in full. Without this, callers that read only the first +// 2048-byte sector silently lose every entry past the first sector — the +// root cause of the "main-feature M2TS files missing from listing" bug. +func readMetaExtent(rs io.ReadSeeker, startBlock uint32, length int, metaMap []udfMetaSpan, partStart uint32) ([]byte, error) { + if length <= 0 { + return nil, nil + } + out := make([]byte, 0, length) + remaining := length + for b := uint32(0); remaining > 0; b++ { + ps, err := udfResolveMetaBlock(startBlock+b, metaMap, partStart) + if err != nil { + return nil, err + } + _, sector, err := udfReadTag(rs, ps) + if err != nil { + // Malformed image (e.g. extent claims more sectors than exist): + // return what we successfully read rather than failing the + // entire walk. Callers parse partial directory data correctly. + return out, nil + } + take := min(remaining, len(sector)) + out = append(out, sector[:take]...) + remaining -= take + } + return out, nil +} + +// readICBExtent is the long_ad analogue of readMetaExtent: walks blocks +// by incrementing the logical-block field inside the ICB long_ad. +func readICBExtent(rs io.ReadSeeker, loc udfLBA, length int, metaMap []udfMetaSpan, partStart uint32) ([]byte, error) { + if length <= 0 { + return nil, nil + } + out := make([]byte, 0, length) + remaining := length + cur := loc + for remaining > 0 { + ps, err := udfResolveICB(cur, metaMap, partStart) + if err != nil { + return nil, err + } + _, sector, err := udfReadTag(rs, ps) + if err != nil { + // Malformed image (e.g. extent claims more sectors than exist): + // return what we successfully read rather than failing the + // entire walk. Callers parse partial directory data correctly. + return out, nil + } + take := min(remaining, len(sector)) + out = append(out, sector[:take]...) + remaining -= take + cur.block++ + } + return out, nil +} + // udfReadDirEntries reads all File Identifier Descriptor records from a -// File Entry at physSect. -func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { - tag, buf, err := udfReadTag(rs, physSect) +// File Entry at physSect. ctx is threaded for upcoming Indirect Entry +// (tag 248) follow logic that will emit a debug log on each redirect, +// and as a hook for future warn-log additions in this function. +func udfReadDirEntries(ctx context.Context, rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, partStart uint32) ([]udfDirEntry, error) { + // Transparently traverse any Indirect Entry (tag 248) chain on a + // directory ICB. udfFollowIndirect emits a Debug log per redirect. + physSect, tag, buf, err := udfFollowIndirect(ctx, rs, physSect, metaMap, partStart) if err != nil { return nil, fmt.Errorf("reading dir ICB at %d: %w", physSect, err) } @@ -360,21 +487,22 @@ func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, case 3: // inline dirData = buf[allocDescOff : allocDescOff+allocDescLen] case 0: // short_ad + // A single allocation descriptor describes an extent that can span + // many 2048-byte sectors. The previous version of this code read + // only the first sector and truncated the rest of the extent, + // silently dropping every directory entry past ~30 FIDs — which is + // why BDMV/STREAM/ on a real Blu-ray (~300 entries, ~26 KiB) lost + // every main-feature M2TS clip. We now walk the full extent. for off := 0; off+8 <= allocDescLen; off += 8 { ad := udfParseShortAD(buf[allocDescOff:], off) if ad.length == 0 { break } - ps, rerr := udfResolveMetaBlock(ad.block, metaMap, partStart) - if rerr != nil { - return nil, rerr - } - _, sector, rerr := udfReadTag(rs, ps) + data, rerr := readMetaExtent(rs, ad.block, int(ad.length), metaMap, partStart) if rerr != nil { return nil, rerr } - take := min(int(ad.length), len(sector)) - dirData = append(dirData, sector[:take]...) + dirData = append(dirData, data...) } case 1: // long_ad for off := 0; off+16 <= allocDescLen; off += 16 { @@ -382,16 +510,11 @@ func udfReadDirEntries(rs io.ReadSeeker, physSect uint32, metaMap []udfMetaSpan, if ad.length == 0 { break } - ps, rerr := udfResolveICB(ad.loc, metaMap, partStart) + data, rerr := readICBExtent(rs, ad.loc, int(ad.length), metaMap, partStart) if rerr != nil { return nil, rerr } - _, sector, rerr := udfReadTag(rs, ps) - if rerr != nil { - return nil, rerr - } - take := min(int(ad.length), len(sector)) - dirData = append(dirData, sector[:take]...) + dirData = append(dirData, data...) } } @@ -504,32 +627,53 @@ func udfSetup(rs io.ReadSeeker) (partStart uint32, metaMap []udfMetaSpan, rootIC } // udfWalkAll recursively lists all non-directory files in a UDF filesystem. -func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partStart uint32, prefix string) ([]isoFileEntry, error) { +func udfWalkAll(ctx context.Context, rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partStart uint32, prefix string) ([]isoFileEntry, error) { physSect, err := udfResolveICB(dirICB.loc, metaMap, partStart) if err != nil { return nil, err } - entries, err := udfReadDirEntries(rs, physSect, metaMap, partStart) + entries, err := udfReadDirEntries(ctx, rs, physSect, metaMap, partStart) if err != nil { return nil, err } var result []isoFileEntry for _, e := range entries { + // Return whatever was collected so far along with the cancel error. + // The caller (AnalyzeISO -> iso_expansion.go) treats any non-nil + // error as "keep ISO as-is", so partial vs nil doesn't change the + // outcome — but preserving the slice gives downstream debug logs + // an accurate count of what was enumerated before the cancel. + if err := ctx.Err(); err != nil { + return result, err + } entryPath := e.name if prefix != "" { entryPath = prefix + "/" + e.name } if e.isDir { - sub, _ := udfWalkAll(rs, e.icb, metaMap, partStart, entryPath) + sub, _ := udfWalkAll(ctx, rs, e.icb, metaMap, partStart, entryPath) result = append(result, sub...) continue } fePhys, rerr := udfResolveICB(e.icb.loc, metaMap, partStart) if rerr != nil { + slog.WarnContext(ctx, "UDF: ICB resolve failed, dropping file from listing", + "path", entryPath, "icb_block", e.icb.loc.block, "error", rerr) + continue + } + // Transparently follow any Indirect Entry (tag 248) chain. fePhys + // is reassigned to the resolved post-redirect sector so the + // downstream collectFileExtents call uses the real FE for any + // embedded-data ("embeddedFEPhys") accounting. + fePhys, feTag, feBuf, rerr := udfFollowIndirect(ctx, rs, fePhys, metaMap, partStart) + if rerr != nil { + slog.WarnContext(ctx, "UDF: file ICB read failed, dropping file from listing", + "path", entryPath, "phys_sector", fePhys, "error", rerr) continue } - feTag, feBuf, rerr := udfReadTag(rs, fePhys) - if rerr != nil || (feTag.id != 261 && feTag.id != 266) { + if feTag.id != 261 && feTag.id != 266 { + slog.WarnContext(ctx, "UDF: file ICB has unexpected tag, dropping file from listing", + "path", entryPath, "tag_id", feTag.id) continue } infoLen := binary.LittleEndian.Uint64(feBuf[56:64]) @@ -549,48 +693,263 @@ func udfWalkAll(rs io.ReadSeeker, dirICB udfLongAD, metaMap []udfMetaSpan, partS allocDescLen = len(feBuf) - allocDescOff } - var fileLBA uint32 - switch allocType { - case 0: - if allocDescLen >= 8 { - ad := udfParseShortAD(feBuf[allocDescOff:], 0) - fileLBA = partStart + ad.block + extents := collectFileExtents(ctx, rs, feBuf[allocDescOff:allocDescOff+allocDescLen], allocType, metaMap, partStart, infoLen, fePhys) + if len(extents) == 0 { + slog.WarnContext(ctx, "UDF: collectFileExtents returned 0 extents, dropping file from listing", + "path", entryPath, "info_length", infoLen, "alloc_type", allocType) + continue + } + result = append(result, isoFileEntry{ + path: entryPath, + size: infoLen, + extents: extents, + }) + } + return result, nil +} + +// collectFileExtents walks the allocation descriptors of a UDF File Entry +// (or Extended File Entry), following Allocation Extent Descriptor chains +// when the inline AD area is exhausted, and returns one isoExtent per +// recorded data extent in disc order. +// +// allocType is the lower 3 bits of the FE's ICBTag flags: +// +// 0 → short_ad (8 bytes each) +// 1 → long_ad (16 bytes each) +// 2 → extended ad (20 bytes; rare, treated as short_ad-prefix here) +// 3 → file data embedded in the FE itself (small files) +// +// The high 2 bits of each AD's length field encode the AD "type": +// +// 0 → recorded & allocated extent (real data — emit) +// 1 → not recorded, allocated (sparse — skip, file should not see this on BD) +// 2 → not recorded, not allocated (hole — skip) +// 3 → next AD points at a continuation Allocation Extent Descriptor +// (tag 258) holding more ADs; chase the chain +// +// embeddedFEPhys is only meaningful for allocType 3 (it's the FE's own +// physical sector — the file data is inline at allocDescOff of that +// sector, so we materialise a single synthetic extent pointing at it). +func collectFileExtents(ctx context.Context, rs io.ReadSeeker, inlineADs []byte, allocType byte, metaMap []udfMetaSpan, partStart uint32, infoLen uint64, embeddedFEPhys uint32) []isoExtent { + if allocType == 3 { + // Embedded data — a single "extent" pointing at the FE sector + // itself with the inline-AD area treated as the file data. We + // can't emit a usable LBA for slicing because the data isn't + // sector-aligned. Skip for now; BD streams never use embedded. + return nil + } + var step int + switch allocType { + case 0: + step = 8 + case 1: + step = 16 + case 2: + step = 20 // first 16 bytes are a long_ad; trailing 4 bytes are impl-use + default: + return nil + } + + var extents []isoExtent + chase := inlineADs + safety := 0 + for { + if err := ctx.Err(); err != nil { + // Cancellation is a normal operator-initiated event (shutdown, + // per-ISO deadline, user-cancelled import) — log at Debug so + // it doesn't pollute monitoring dashboards. The peer WARN sites + // in this loop stay at WARN because they indicate genuinely + // corrupt or unreachable AEDs. + slog.DebugContext(ctx, "UDF: AED chain truncated", + "reason", "context canceled", + "extents_so_far", len(extents), + "error", err) + break + } + safety++ + if safety > 4096 { + break // pathological — bail to avoid runaway IO + } + var chain *udfLongAD + for off := 0; off+step <= len(chase); off += step { + lenField := binary.LittleEndian.Uint32(chase[off:]) + adType := lenField >> 30 + adLen := lenField & 0x3FFFFFFF + if adLen == 0 && adType != 3 { + break } - case 1: - if allocDescLen >= 16 { - ad := udfParseLongAD(feBuf[allocDescOff:], 0) - fileLBA, _ = udfResolveICB(ad.loc, metaMap, partStart) + if adType == 3 { + var loc udfLongAD + switch step { + case 8: + // short_ad continuation: the 4 bytes after length + // are the next AED's logical block; partition is + // implicit (same as parent). + loc = udfLongAD{length: adLen, loc: udfLBA{block: binary.LittleEndian.Uint32(chase[off+4:])}} + default: + loc = udfParseLongAD(chase, off) + } + chain = &loc + break } + if adType != 0 { + // Type 1 (allocated but not recorded) and type 2 (hole) + // don't carry real bytes. Skip — BD streams shouldn't + // have these in practice. + continue + } + var lba uint32 + switch step { + case 8: + ad := udfParseShortAD(chase, off) + resolved, err := udfResolveMetaBlock(ad.block, metaMap, partStart) + if err != nil { + continue + } + lba = resolved + default: + ad := udfParseLongAD(chase, off) + resolved, err := udfResolveICB(ad.loc, metaMap, partStart) + if err != nil { + continue + } + lba = resolved + } + extents = append(extents, isoExtent{lba: lba, length: uint64(adLen)}) } - if fileLBA > 0 { - result = append(result, isoFileEntry{path: entryPath, lba: fileLBA, size: infoLen}) + if chain == nil { + break } + ps, err := udfResolveICB(chain.loc, metaMap, partStart) + if err != nil { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "icb resolve failed", + "extents_so_far", len(extents), + "error", err) + break + } + _, aedBuf, err := udfReadTag(rs, ps) + if err != nil { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "tag read failed", + "extents_so_far", len(extents), + "error", err) + break + } + // Allocation Extent Descriptor layout: 16-byte tag + 4-byte + // previous-AED pointer + 4-byte length-of-allocation-descriptors, + // then the ADs themselves. + if len(aedBuf) < 24 { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "aed buffer too short", + "extents_so_far", len(extents), + "buf_len", len(aedBuf)) + break + } + nextLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) + if nextLen <= 0 || 24+nextLen > len(aedBuf) { + slog.WarnContext(ctx, "UDF: AED chain truncated", + "reason", "aed length out of range", + "extents_so_far", len(extents), + "next_len", nextLen, + "buf_len", len(aedBuf)) + break + } + chase = aedBuf[24 : 24+nextLen] } - return result, nil + + // Defensive: cap the total extent bytes at the FE's info_length so a + // malformed disc with mis-sized ADs can't return more bytes than the + // file legitimately contains. + var total uint64 + for i := range extents { + if total+extents[i].length > infoLen { + extents[i].length = infoLen - total + extents = extents[:i+1] + break + } + total += extents[i].length + } + + // Coalesce physically contiguous extents — many BD3D SSIF files have + // dozens of small ADs that sit right next to each other on disc. The + // underlying bytes are one contiguous run; merging the ADs collapses + // the NestedSources count proportionally (Avatar SSIF: 23 → 2) and + // shrinks both the metadata proto and the validation surface. + extents = coalesceExtents(extents) + _ = embeddedFEPhys + return extents +} + +// coalesceExtents merges adjacent extents whose physical sectors are +// contiguous (next.lba == prev.lba + prev.length/sector). Returns the +// possibly-shorter slice in disc order. A file whose extents are +// physically scattered (typical for BD M2TS clips interleaved with SSIF +// dependent-view data) is returned unchanged. +func coalesceExtents(in []isoExtent) []isoExtent { + if len(in) <= 1 { + return in + } + out := make([]isoExtent, 0, len(in)) + cur := in[0] + for i := 1; i < len(in); i++ { + next := in[i] + // length must be a whole number of sectors for the contiguity + // arithmetic to apply; if it isn't (final partial sector of a + // file), fall through and start a new run after. + if cur.length%iso9660SectorSize == 0 && + next.lba == cur.lba+uint32(cur.length/iso9660SectorSize) { + cur.length += next.length + continue + } + out = append(out, cur) + cur = next + } + out = append(out, cur) + return out } // ListISOFiles walks the ISO 9660/UDF filesystem and returns all non-directory // entries. It tries UDF first (correct 64-bit sizes, authoritative for Blu-ray) -// and falls back to ISO 9660 for plain discs without UDF. -func ListISOFiles(rs io.ReadSeeker) ([]isoFileEntry, error) { - // Try UDF first (handles Blu-ray and modern discs with correct 64-bit sizes) - if partStart, metaMap, rootICB, err := udfSetup(rs); err == nil { - files, err := udfWalkAll(rs, rootICB, metaMap, partStart, "") - if err == nil && len(files) > 0 { +// and falls back to ISO 9660 for plain discs without UDF. ctx is threaded +// through the UDF walk so silent-drop sites can emit slog.WarnContext logs +// for diagnosis without polluting the io.ReadSeeker signature. +func ListISOFiles(ctx context.Context, rs io.ReadSeeker) ([]isoFileEntry, error) { + // Track the underlying reason both layers failed so the combined-failure + // error message can point an operator at the actual cause (transient + // network read, malformed structure, unrecognised version, ...). + var udfErr, isoErr error + + // Try UDF first (handles Blu-ray and modern discs with correct 64-bit sizes). + if partStart, metaMap, rootICB, err := udfSetup(rs); err != nil { + udfErr = err + } else { + files, err := udfWalkAll(ctx, rs, rootICB, metaMap, partStart, "") + switch { + case err != nil: + udfErr = fmt.Errorf("walk: %w", err) + case len(files) == 0: + udfErr = fmt.Errorf("walk returned no files") + default: return files, nil } } - // Fall back to ISO 9660 + + // Fall back to ISO 9660. pvd := make([]byte, iso9660SectorSize) - if _, err := rs.Seek(16*iso9660SectorSize, io.SeekStart); err == nil { - if _, err := io.ReadFull(rs, pvd); err == nil { - if pvd[0] == 1 && string(pvd[1:6]) == "CD001" { - rootRec := pvd[156:] - dirLBA := binary.LittleEndian.Uint32(rootRec[2:6]) - dirSize := uint64(binary.LittleEndian.Uint32(rootRec[10:14])) - return iso9660WalkAll(rs, dirLBA, dirSize, "") - } - } + if _, err := rs.Seek(16*iso9660SectorSize, io.SeekStart); err != nil { + isoErr = fmt.Errorf("seek PVD: %w", err) + } else if _, err := io.ReadFull(rs, pvd); err != nil { + isoErr = fmt.Errorf("read PVD: %w", err) + } else if pvd[0] != 1 || string(pvd[1:6]) != "CD001" { + isoErr = fmt.Errorf("invalid PVD header (type=%d magic=%q)", pvd[0], pvd[1:6]) + } else { + rootRec := pvd[156:] + dirLBA := binary.LittleEndian.Uint32(rootRec[2:6]) + dirSize := uint64(binary.LittleEndian.Uint32(rootRec[10:14])) + return iso9660WalkAll(rs, dirLBA, dirSize, "") } - return nil, fmt.Errorf("iso: not a valid ISO 9660 or UDF image") + + return nil, fmt.Errorf("iso: not a valid ISO 9660 or UDF image (udf: %v; iso9660: %v)", udfErr, isoErr) } diff --git a/internal/importer/archive/iso/fs_local_test.go b/internal/importer/archive/iso/fs_local_test.go new file mode 100644 index 000000000..3fbcc4885 --- /dev/null +++ b/internal/importer/archive/iso/fs_local_test.go @@ -0,0 +1,661 @@ +package iso + +import ( + "bytes" + "context" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "log/slog" + "os" + "sort" + "strings" + "testing" + "time" +) + +// TestUDFWalk_LogsWhenFileICBHasUnknownTag drives a synthetic UDF blob with +// one directory containing one File Identifier Descriptor (BOGUS.M2TS) whose +// ICB points at a sector containing an invalid descriptor tag (id=999, not +// 261/266). The walker must: +// +// 1. drop the file from its returned listing (silent today, kept silent); +// 2. emit exactly one slog.WarnContext line naming the file and the bogus +// tag id so operators can see why a file vanished. +// +// This locks in the diagnostic behavior added by Task 6: every silent drop +// site in udfWalkAll / collectFileExtents now logs at WARN level before +// continuing or breaking. +func TestUDFWalk_LogsWhenFileICBHasUnknownTag(t *testing.T) { + // Capture default slog output into a buffer for assertions. NOTE: this + // test mutates the process-wide default slog logger. Do NOT call + // t.Parallel() here, and do not parallelise any other test in this + // package that touches slog output, or log lines will bleed between + // tests and the matches==1 assertion below will flake. + var buf bytes.Buffer + prev := slog.Default() + slog.SetDefault(slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug}))) + t.Cleanup(func() { slog.SetDefault(prev) }) + + // Build a minimal in-memory blob: 32 sectors of zeros, with custom + // content at sector 10 (directory FE) and sector 20 (bogus tag). + const dirSector = 10 + const bogusSector = 20 + image := make([]byte, iso9660SectorSize*32) + + // Sector 10: a UDF File Entry (tag 261) acting as a directory whose + // allocation type is 3 (inline), so udfReadDirEntries reads the FID + // straight out of buf[allocDescOff : allocDescOff+allocDescLen]. + dir := image[dirSector*iso9660SectorSize : (dirSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(dir[0:2], 261) // tag.id = 261 (File Entry) + dir[34] = 3 // icbtag.flags lower 3 bits = 3 (inline) + // FE plain (tag 261) AD-area header at buf[168..176]. + binary.LittleEndian.PutUint32(dir[168:172], 0) // L_EA (extended attrs length) + binary.LittleEndian.PutUint32(dir[172:176], 52) // L_AD (alloc-desc length, == one padded FID) + + // FID at dir[176..]: file identifier descriptor for BOGUS.M2TS + // pointing its ICB long_ad at sector `bogusSector`. + fid := dir[176:] + name := "BOGUS.M2TS" // 10 ASCII bytes + binary.LittleEndian.PutUint16(fid[0:2], 257) // FID tag id + fid[18] = 0 // file characteristics: regular file, neither parent nor deleted + fid[19] = byte(1 + len(name)) // L_FI (comp byte + ASCII chars) + binary.LittleEndian.PutUint32(fid[20:24], 2048) // long_ad.length + binary.LittleEndian.PutUint32(fid[24:28], bogusSector) + binary.LittleEndian.PutUint16(fid[28:30], 0) // long_ad.partition (0 → partStart-relative) + binary.LittleEndian.PutUint16(fid[36:38], 0) // L_IU (impl-use length) + fid[38] = 8 // CS0 compression code (8 = ASCII) + copy(fid[39:39+len(name)], name) + // Padded record length (38 header + 11 name = 49, padded to 52). We + // leave the trailing 3 bytes as zeros from the make(). + + // Sector 20: descriptor tag with the deliberately-bogus id 999. + bogus := image[bogusSector*iso9660SectorSize : (bogusSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(bogus[0:2], 999) + + dirICB := udfLongAD{length: iso9660SectorSize, loc: udfLBA{block: dirSector, part: 0}} + entries, err := udfWalkAll(context.Background(), bytes.NewReader(image), dirICB, nil, 0, "") + if err != nil { + t.Fatalf("udfWalkAll: %v", err) + } + if len(entries) != 0 { + t.Fatalf("expected empty listing (bogus file should be dropped); got %d entries: %+v", len(entries), entries) + } + + // Inspect captured slog output. Parse line by line as JSON and count + // matches; the test fails if not exactly one matching WARN was emitted. + var matches int + for line := range strings.SplitSeq(strings.TrimRight(buf.String(), "\n"), "\n") { + if line == "" { + continue + } + var rec map[string]any + if err := json.Unmarshal([]byte(line), &rec); err != nil { + t.Fatalf("non-JSON log line %q: %v", line, err) + } + if rec["level"] != "WARN" { + continue + } + // Both path and tag_id must be set to disambiguate from any + // other (future) WARN site in the walk. + if rec["path"] != "BOGUS.M2TS" { + continue + } + // JSON-decoded numbers come back as float64; compare via that. + if v, ok := rec["tag_id"].(float64); !ok || int(v) != 999 { + continue + } + matches++ + } + if matches != 1 { + t.Fatalf("want exactly 1 matching WARN line (path=BOGUS.M2TS tag_id=999), got %d. Full log:\n%s", + matches, buf.String()) + } +} + +// TestUDFWalk_FollowsIndirectEntryChain drives a synthetic UDF blob where +// a file's ICB points at a chain of Indirect Entries (tag 248, per UDF +// §14.7 Strategy Type 4 multi-FE indirection) before reaching the real +// File Entry. The walker must transparently follow the chain and surface +// the file with its real size and extents. +// +// Two sub-cases: +// - "single_hop": FID → IE(248) → FE(261) +// - "multi_hop": FID → IE(248) → IE(248) → FE(261) +// +// Each Indirect Entry is laid out per UDF §14.7: +// +// bytes 0..15 descriptor tag (id = 248) +// bytes 16..35 ICBTag (20 bytes; zeros here, strategy etc. not validated) +// bytes 36..51 long_ad (16 bytes) → next ICB in chain +func TestUDFWalk_FollowsIndirectEntryChain(t *testing.T) { + // buildImage constructs an in-memory UDF blob and returns it along with + // the directory ICB. The chain layout: + // FID(MOVIE.M2TS) → IE@hops[0] → IE@hops[1] → ... → FE@feSector + // where the file's data extent lives at dataSector with size dataSize. + buildImage := func(t *testing.T, hops []uint32, feSector, dataSector uint32, dataSize uint32) ([]byte, udfLongAD) { + t.Helper() + const dirSector = 10 + // Size the image to comfortably cover all referenced sectors. + maxSector := max(feSector, dataSector) + for _, h := range hops { + maxSector = max(maxSector, h) + } + image := make([]byte, iso9660SectorSize*int(maxSector+2)) + + // Directory FE at dirSector — same pattern as the test above: + // tag 261, allocType 3 (inline), one FID for MOVIE.M2TS. + dir := image[dirSector*iso9660SectorSize : (dirSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(dir[0:2], 261) // File Entry + dir[34] = 3 // inline alloc type + binary.LittleEndian.PutUint32(dir[168:172], 0) + binary.LittleEndian.PutUint32(dir[172:176], 52) // one padded FID + + fid := dir[176:] + name := "MOVIE.M2TS" // 10 ASCII bytes → recLen 38+11=49 → padded 52 + binary.LittleEndian.PutUint16(fid[0:2], 257) // FID + fid[18] = 0 // regular file + fid[19] = byte(1 + len(name)) // L_FI + binary.LittleEndian.PutUint32(fid[20:24], 2048) // long_ad.length → hops[0] sector + binary.LittleEndian.PutUint32(fid[24:28], hops[0]) + binary.LittleEndian.PutUint16(fid[28:30], 0) // partition 0 → partStart-relative + binary.LittleEndian.PutUint16(fid[36:38], 0) // L_IU + fid[38] = 8 // CS0 ASCII + copy(fid[39:39+len(name)], name) + + // Indirect Entries: each tag-248 sector points to the next. + for i, hop := range hops { + ie := image[hop*iso9660SectorSize : (hop+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(ie[0:2], 248) // Indirect Entry tag + // bytes 16..35 are ICBTag — leave zeroed (not validated). + // long_ad at offset 36: length(4)+block(4)+part(2)+implUse(2) + var nextSector uint32 + if i+1 < len(hops) { + nextSector = hops[i+1] + } else { + nextSector = feSector + } + binary.LittleEndian.PutUint32(ie[36:40], 2048) // length + binary.LittleEndian.PutUint32(ie[40:44], nextSector) // block + binary.LittleEndian.PutUint16(ie[44:46], 0) // partition + } + + // Real File Entry at feSector: tag 261, allocType 0 (short_ad), + // one short_ad pointing at dataSector with the file size. + fe := image[feSector*iso9660SectorSize : (feSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(fe[0:2], 261) // File Entry + fe[34] = 0 // allocType 0 = short_ad + binary.LittleEndian.PutUint64(fe[56:64], uint64(dataSize)) + binary.LittleEndian.PutUint32(fe[168:172], 0) // L_EA + binary.LittleEndian.PutUint32(fe[172:176], 8) // L_AD = one short_ad + binary.LittleEndian.PutUint32(fe[176:180], dataSize) // short_ad.length (adType 0 in high 2 bits) + binary.LittleEndian.PutUint32(fe[180:184], dataSector) // short_ad.block + + dirICB := udfLongAD{length: iso9660SectorSize, loc: udfLBA{block: dirSector, part: 0}} + return image, dirICB + } + + assertFound := func(t *testing.T, entries []isoFileEntry, wantSize uint64, wantLBA uint32) { + t.Helper() + if len(entries) != 1 { + t.Fatalf("want exactly 1 entry, got %d: %+v", len(entries), entries) + } + got := entries[0] + if got.path != "MOVIE.M2TS" { + t.Errorf("path: want MOVIE.M2TS, got %q", got.path) + } + if got.size != wantSize { + t.Errorf("size: want %d, got %d", wantSize, got.size) + } + if len(got.extents) != 1 { + t.Fatalf("extents: want 1, got %d (%+v)", len(got.extents), got.extents) + } + if got.extents[0].lba != wantLBA { + t.Errorf("extents[0].lba: want %d, got %d", wantLBA, got.extents[0].lba) + } + } + + t.Run("single_hop", func(t *testing.T) { + const ieSector = 20 + const feSector = 30 + const dataSector = 40 + const dataSize = 4096 + image, dirICB := buildImage(t, []uint32{ieSector}, feSector, dataSector, dataSize) + entries, err := udfWalkAll(context.Background(), bytes.NewReader(image), dirICB, nil, 0, "") + if err != nil { + t.Fatalf("udfWalkAll: %v", err) + } + assertFound(t, entries, dataSize, dataSector) + }) + + t.Run("multi_hop", func(t *testing.T) { + // FID → IE@20 → IE@25 → FE@30 → data@40 + const feSector = 30 + const dataSector = 40 + const dataSize = 4096 + image, dirICB := buildImage(t, []uint32{20, 25}, feSector, dataSector, dataSize) + entries, err := udfWalkAll(context.Background(), bytes.NewReader(image), dirICB, nil, 0, "") + if err != nil { + t.Fatalf("udfWalkAll: %v", err) + } + assertFound(t, entries, dataSize, dataSector) + }) +} + +// TestLocalISO_DiscoverBigFiles is a manual integration test: it walks a +// real Blu-ray ISO from local disk and dumps a size-sorted summary. Skipped +// unless ALTMOUNT_LOCAL_ISO is set, so CI stays unaffected. +// +// Set ALTMOUNT_LOCAL_ISO=/abs/path/to.iso to run, e.g.: +// +// ALTMOUNT_LOCAL_ISO=/Volumes/.../DISC_1.iso go test \ +// ./internal/importer/archive/iso/... -run TestLocalISO -v +func TestLocalISO_DiscoverBigFiles(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open %s: %v", path, err) + } + defer f.Close() + + stat, _ := f.Stat() + t.Logf("ISO: %s size=%d (%.2f GiB)", path, stat.Size(), float64(stat.Size())/(1<<30)) + + entries, err := ListISOFiles(context.Background(), f) + if err != nil { + t.Fatalf("ListISOFiles: %v", err) + } + + var sum int64 + for _, e := range entries { + sum += int64(e.size) + } + t.Logf("listed_files=%d listed_sum=%d (%.2f GiB) coverage=%.1f%%", + len(entries), sum, float64(sum)/(1<<30), 100*float64(sum)/float64(stat.Size())) + + // Top 25 by size — should match `ls -laS BDMV/STREAM/` if walker is sane. + sort.Slice(entries, func(i, j int) bool { return entries[i].size > entries[j].size }) + t.Logf("top 25 by size:") + for i, e := range entries { + if i >= 25 { + break + } + t.Logf(" %s size=%d (%.2f MiB) extents=%d first_lba=%d", + e.path, e.size, float64(e.size)/(1<<20), len(e.extents), e.firstLBA()) + } + + // Sanity sentinels for the Avatar disc 1 main-feature clips. Each is + // >1 GiB and uses many on-disc extents (00022.m2ts has ~945). Assert + // the file is present, the size is right, AND the extents slice fully + // covers it — otherwise downstream concat reads wrong bytes past the + // first extent. + want := []string{"BDMV/STREAM/00016.m2ts", "BDMV/STREAM/00022.m2ts", "BDMV/STREAM/00028.m2ts"} + have := make(map[string]isoFileEntry, len(entries)) + for _, e := range entries { + have[e.path] = e + } + for _, w := range want { + e, ok := have[w] + if !ok { + t.Errorf("missing %s — walker dropped this file", w) + continue + } + if e.size < 1<<30 { + t.Errorf("%s reported size=%d (%.2f MiB), want >1 GiB", + w, e.size, float64(e.size)/(1<<20)) + } + if len(e.extents) < 2 { + t.Errorf("%s has only %d extents — expected multi-extent (BD main-feature clips fragment heavily)", + w, len(e.extents)) + } + var covered uint64 + for _, ext := range e.extents { + covered += ext.length + } + if covered != e.size { + t.Errorf("%s: sum of extent lengths = %d but file size = %d (delta %d)", + w, covered, e.size, int64(e.size)-int64(covered)) + } + } + + if t.Failed() { + fmt.Println(">>> walker is dropping big files; this is the bug") + } +} + +// TestLocalISO_CountExtentsForBigFiles probes each entry's File Entry on the +// real ISO and reports how many allocation descriptors a file's data uses. +// The walker today reads only the first AD — if any of the multi-GiB main- +// feature clips reports >1 AD, downstream byte reads past the first extent +// will hit wrong sectors. Gated on ALTMOUNT_LOCAL_ISO same as the discovery +// test. +func TestLocalISO_CountExtentsForBigFiles(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open: %v", err) + } + defer f.Close() + + partStart, metaMap, rootICB, err := udfSetup(f) + if err != nil { + t.Fatalf("udfSetup: %v", err) + } + + // Re-walk to get entries plus their ICB so we can re-read each FE and + // count its allocation descriptors. We can't reuse ListISOFiles output + // directly because isoFileEntry discards the ICB. + type probed struct { + path string + size uint64 + ads int // allocation descriptors observed (= number of on-disc extents) + alloc byte + } + + var probedAll []probed + var walk func(dirICB udfLongAD, prefix string) + walk = func(dirICB udfLongAD, prefix string) { + physSect, e := udfResolveICB(dirICB.loc, metaMap, partStart) + if e != nil { + return + } + entries, e := udfReadDirEntries(context.Background(), f, physSect, metaMap, partStart) + if e != nil { + return + } + for _, ent := range entries { + p := ent.name + if prefix != "" { + p = prefix + "/" + ent.name + } + if ent.isDir { + walk(ent.icb, p) + continue + } + fePhys, rerr := udfResolveICB(ent.icb.loc, metaMap, partStart) + if rerr != nil { + continue + } + feTag, feBuf, rerr := udfReadTag(f, fePhys) + if rerr != nil || (feTag.id != 261 && feTag.id != 266) { + continue + } + alloc := feBuf[34] & 0x07 + var adOff, adLen int + if feTag.id == 266 { + eaLen := int(binary.LittleEndian.Uint32(feBuf[208:212])) + adLen = int(binary.LittleEndian.Uint32(feBuf[212:216])) + adOff = 216 + eaLen + } else { + eaLen := int(binary.LittleEndian.Uint32(feBuf[168:172])) + adLen = int(binary.LittleEndian.Uint32(feBuf[172:176])) + adOff = 176 + eaLen + } + if adOff+adLen > len(feBuf) { + adLen = len(feBuf) - adOff + } + // Count extents using the UDF rules: high 2 bits of the + // length field encode the AD "type": + // 0 = recorded and allocated (real extent) + // 1 = not recorded, allocated (sparse / zero-fill) + // 2 = not recorded, not allocated (sparse hole) + // 3 = next AD points at a continuation AED sector, follow it + // We count types 0,1,2 as logical extents (each contributes + // length bytes to the file) and chase type 3 into AED chains. + n := 0 + step := 0 + switch alloc { + case 0: + step = 8 + case 1: + step = 16 + case 2: + step = 20 + case 3: + n = 1 // embedded + } + if step > 0 { + countADs := func(buf []byte) (extents int, chain *udfLongAD) { + for off := 0; off+step <= len(buf); off += step { + lenField := binary.LittleEndian.Uint32(buf[off:]) + adType := lenField >> 30 + adLen := lenField & 0x3FFFFFFF + if adLen == 0 && adType != 3 { + break + } + if adType == 3 { + var loc udfLongAD + switch step { + case 8: + loc = udfLongAD{length: adLen, loc: udfLBA{block: binary.LittleEndian.Uint32(buf[off+4:])}} + case 16: + loc = udfParseLongAD(buf, off) + } + return extents, &loc + } + extents++ + } + return extents, nil + } + cnt, chain := countADs(feBuf[adOff : adOff+adLen]) + n = cnt + safety := 0 + for chain != nil && safety < 100 { + safety++ + ps, e := udfResolveICB(chain.loc, metaMap, partStart) + if e != nil { + break + } + _, aedBuf, e := udfReadTag(f, ps) + if e != nil { + break + } + // AED layout: 16-byte tag + 4-byte previous-AED pointer + // + 4-byte length-of-allocation-descriptors + ADs. + if len(aedBuf) < 24 { + break + } + aedLen := int(binary.LittleEndian.Uint32(aedBuf[20:24])) + if aedLen <= 0 || 24+aedLen > len(aedBuf) { + break + } + more, nextChain := countADs(aedBuf[24 : 24+aedLen]) + n += more + chain = nextChain + } + } + probedAll = append(probedAll, probed{ + path: p, + size: binary.LittleEndian.Uint64(feBuf[56:64]), + ads: n, + alloc: alloc, + }) + } + } + walk(rootICB, "") + + // Report the big files specifically + any file with >1 AD. + sort.Slice(probedAll, func(i, j int) bool { return probedAll[i].size > probedAll[j].size }) + t.Logf("top 15 by size (with extent count):") + for i, p := range probedAll { + if i >= 15 { + break + } + t.Logf(" %s size=%d (%.2f MiB) alloc_type=%d extents=%d", + p.path, p.size, float64(p.size)/(1<<20), p.alloc, p.ads) + } + + multi := 0 + for _, p := range probedAll { + if p.ads > 1 { + multi++ + } + } + t.Logf("files with >1 extent: %d / %d", multi, len(probedAll)) + if multi == 0 { + t.Logf("CONCLUSION: all files are contiguous — single-LBA model is sufficient for this ISO") + } else { + t.Logf("CONCLUSION: fragmentation present — single-LBA walker yields WRONG bytes past extent 1") + } +} + +// TestLocalISO_CountAdjacentExtents checks whether multi-extent files have +// physically contiguous extents that could be coalesced. If yes, segment +// count downstream can be reduced dramatically — the importer hit +// total_segments_to_validate=888,903 on this NZB precisely because every +// AD became its own NestedSource even when adjacent ADs sat next to each +// other on disc. +func TestLocalISO_CountAdjacentExtents(t *testing.T) { + path := os.Getenv("ALTMOUNT_LOCAL_ISO") + if path == "" { + t.Skip("ALTMOUNT_LOCAL_ISO not set") + } + f, err := os.Open(path) + if err != nil { + t.Fatalf("open: %v", err) + } + defer f.Close() + + entries, err := ListISOFiles(context.Background(), f) + if err != nil { + t.Fatalf("list: %v", err) + } + sort.Slice(entries, func(i, j int) bool { return entries[i].size > entries[j].size }) + + const lookAt = 15 + for i, e := range entries { + if i >= lookAt { + break + } + if len(e.extents) <= 1 { + continue + } + // Count adjacent runs (where next.lba == this.lba + this.length/sector). + adjacent := 0 + distinctRuns := 1 + for j := 1; j < len(e.extents); j++ { + prev := e.extents[j-1] + next := e.extents[j] + expectedNextLBA := prev.lba + uint32(prev.length/iso9660SectorSize) + if next.lba == expectedNextLBA { + adjacent++ + } else { + distinctRuns++ + } + } + t.Logf(" %s: extents=%d adjacent_pairs=%d distinct_runs=%d coalesce_ratio=%.1fx", + e.path, len(e.extents), adjacent, distinctRuns, + float64(len(e.extents))/float64(distinctRuns)) + } +} + +// TestListISOFiles_PreservesBothUnderlyingErrors drives ListISOFiles with a +// blob that is neither a valid UDF nor a valid ISO 9660 image. The function +// historically returned a single opaque "not a valid ISO 9660 or UDF image" +// error which hid the actual cause — Task 9 changed it to wrap both the +// underlying UDF error and the ISO 9660 fallback error so operators can +// distinguish transient network failures from genuine structural problems. +func TestListISOFiles_PreservesBothUnderlyingErrors(t *testing.T) { + // 600 KB of zeros — large enough to satisfy reads at both the UDF + // AVDP sector (256 → byte 524288) and the ISO 9660 PVD sector + // (16 → byte 32768), but the bytes don't form valid descriptors + // for either format. + blob := make([]byte, 600*1024) + _, err := ListISOFiles(context.Background(), bytes.NewReader(blob)) + if err == nil { + t.Fatal("expected error from ListISOFiles on an invalid blob") + } + msg := err.Error() + if !strings.Contains(msg, "udf:") { + t.Errorf("error must mention the underlying UDF failure (substring \"udf:\") — got: %q", msg) + } + if !strings.Contains(msg, "iso9660:") { + t.Errorf("error must mention the underlying ISO 9660 failure (substring \"iso9660:\") — got: %q", msg) + } +} + +// TestUDFWalk_StopsWhenContextCanceled builds a synthetic UDF blob whose +// directory contains 3 regular FIDs, then calls udfWalkAll with an +// already-canceled context. The walker must: +// +// 1. observe ctx.Err() before processing any file's ICB, +// 2. return context.Canceled (or a wrapping error) within 100 ms, +// 3. return an empty result slice (no file processed past the check). +// +// This locks in Task 11 behavior: cancellation propagates immediately +// from the entries-loop, instead of waiting for the next sector read +// to time out at the NNTP layer. +func TestUDFWalk_StopsWhenContextCanceled(t *testing.T) { + const dirSector = 10 + // Three FIDs of 52 bytes each = 156 bytes of allocation descriptors. + const fidLen = 52 + const numFiles = 3 + image := make([]byte, iso9660SectorSize*32) + + dir := image[dirSector*iso9660SectorSize : (dirSector+1)*iso9660SectorSize] + binary.LittleEndian.PutUint16(dir[0:2], 261) // tag.id = 261 (File Entry) + dir[34] = 3 // inline alloc type + binary.LittleEndian.PutUint32(dir[168:172], 0) + binary.LittleEndian.PutUint32(dir[172:176], fidLen*numFiles) // L_AD = 3 padded FIDs + + // Write 3 FIDs back-to-back at dir[176..]. Each points at a unique + // sector containing a tag-261 FE with a single short_ad; that the + // walker NEVER reads these is exactly what this test asserts. + for i := range numFiles { + off := 176 + i*fidLen + fid := dir[off : off+fidLen] + name := fmt.Sprintf("FILE%d.M2TS", i) // 10-11 ASCII bytes + binary.LittleEndian.PutUint16(fid[0:2], 257) + fid[18] = 0 + fid[19] = byte(1 + len(name)) + binary.LittleEndian.PutUint32(fid[20:24], 2048) + binary.LittleEndian.PutUint32(fid[24:28], uint32(20+i)) // points at sectors 20,21,22 + binary.LittleEndian.PutUint16(fid[28:30], 0) + binary.LittleEndian.PutUint16(fid[36:38], 0) + fid[38] = 8 + copy(fid[39:39+len(name)], name) + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // canceled before the call — ctx.Err() != nil on entry + + dirICB := udfLongAD{length: iso9660SectorSize, loc: udfLBA{block: dirSector, part: 0}} + + done := make(chan struct{}) + var entries []isoFileEntry + var err error + go func() { + entries, err = udfWalkAll(ctx, bytes.NewReader(image), dirICB, nil, 0, "") + close(done) + }() + + // 1-second deadline: the function should return in microseconds since + // ctx.Err() is checked before any I/O, but goroutine scheduling on a + // loaded CI runner can add tens of milliseconds. 1s is plenty of + // headroom while still failing fast if the cancellation check is + // genuinely missing. + select { + case <-done: + case <-time.After(1 * time.Second): + t.Fatal("udfWalkAll did not return within 1s of a canceled ctx — cancellation is not being honored at the entries-loop") + } + + if !errors.Is(err, context.Canceled) { + t.Fatalf("want err wrapping context.Canceled, got: %v", err) + } + // The ctx check fires at the top of the loop BEFORE any FID is + // processed, so result is empty here. If cancel had happened + // mid-walk a non-empty partial result would also be valid — the + // production contract (udfWalkAll returns "what was collected so + // far" plus the cancel error) tolerates both shapes. + if len(entries) != 0 { + t.Fatalf("want empty result on cancel before any file processed, got %d entries: %+v", len(entries), entries) + } +} diff --git a/internal/importer/archive/iso/fs_test.go b/internal/importer/archive/iso/fs_test.go index c03e1c954..79d0474db 100644 --- a/internal/importer/archive/iso/fs_test.go +++ b/internal/importer/archive/iso/fs_test.go @@ -2,11 +2,22 @@ package iso import ( "bytes" + "context" "encoding/binary" "testing" ) -func TestUDFReadDirEntriesShortADClampsExtentLength(t *testing.T) { +// TestUDFReadDirEntriesTruncatedExtent locks in the fix for the bug where +// a directory's allocation descriptor advertised an extent spanning +// multiple sectors but the walker read only the first sector and silently +// dropped every entry past it (~ the reason the Avatar BDMV main-feature +// M2TS files were invisible). Two assertions: +// - readMetaExtent must keep reading sectors until ad.length is +// satisfied (the fix); +// - if a sector read fails because the image is shorter than ad.length, +// the walk returns partial data without an error so a malformed ISO +// can't fail the entire import. +func TestUDFReadDirEntriesTruncatedExtent(t *testing.T) { image := make([]byte, iso9660SectorSize*21) dirICBSector := image[10*iso9660SectorSize : 11*iso9660SectorSize] binary.LittleEndian.PutUint16(dirICBSector[0:2], 261) @@ -16,7 +27,7 @@ func TestUDFReadDirEntriesShortADClampsExtentLength(t *testing.T) { binary.LittleEndian.PutUint32(dirICBSector[176:180], 2796) binary.LittleEndian.PutUint32(dirICBSector[180:184], 20) - entries, err := udfReadDirEntries(bytes.NewReader(image), 10, nil, 0) + entries, err := udfReadDirEntries(context.Background(), bytes.NewReader(image), 10, nil, 0) if err != nil { t.Fatalf("udfReadDirEntries() error = %v", err) } diff --git a/internal/importer/archive/iso/mpls.go b/internal/importer/archive/iso/mpls.go new file mode 100644 index 000000000..141d7a023 --- /dev/null +++ b/internal/importer/archive/iso/mpls.go @@ -0,0 +1,108 @@ +package iso + +import ( + "encoding/binary" + "errors" + "fmt" +) + +// MPLS (Blu-ray PlayList) is a fixed binary format defined by the BDA spec. +// We only parse the fields needed to identify the main feature playlist and +// its ordered list of M2TS clips: the clip_information_file_name for each +// PlayItem and the IN/OUT presentation times used to estimate duration. + +// mplsHeaderSize is the fixed prefix length: 4 magic + 4 version + +// 4 PlayList offset + 4 PlayListMark offset + 4 ExtensionData offset. +const mplsHeaderSize = 20 + +// MPLSPlayItem describes one entry in a PlayList. +type MPLSPlayItem struct { + // ClipName is the 5-character clip_information_file_name (e.g. "00001"). + // The corresponding stream lives at BDMV/STREAM/.M2TS. + ClipName string + // InTime and OutTime are 45 kHz presentation timestamps. Duration in + // ticks is OutTime - InTime; convert to seconds by dividing by 45000. + InTime uint32 + OutTime uint32 +} + +// MPLSPlayList is the parsed view of a single .mpls file. +type MPLSPlayList struct { + Version string // e.g. "0100", "0200", "0300" + PlayItems []MPLSPlayItem +} + +// DurationTicks returns the sum of (OutTime-InTime) across PlayItems in +// 45 kHz ticks. This is the standard proxy for "longest playlist = +// main feature" used by every Blu-ray player. +func (p *MPLSPlayList) DurationTicks() int64 { + var total int64 + for _, it := range p.PlayItems { + if it.OutTime > it.InTime { + total += int64(it.OutTime - it.InTime) + } + } + return total +} + +// ParseMPLS decodes a .mpls file. All multi-byte integers are big-endian +// per the BDA spec. Sub-paths, the STN table, and per-angle alternates +// are skipped — we use each PlayItem's leading length field to advance +// past everything we don't need. +func ParseMPLS(data []byte) (*MPLSPlayList, error) { + if len(data) < mplsHeaderSize { + return nil, errors.New("mpls: truncated header") + } + if string(data[0:4]) != "MPLS" { + return nil, fmt.Errorf("mpls: bad magic %q", data[0:4]) + } + version := string(data[4:8]) + playListOff := binary.BigEndian.Uint32(data[8:12]) + if int(playListOff) < mplsHeaderSize || int(playListOff)+10 > len(data) { + return nil, fmt.Errorf("mpls: PlayList offset %d out of range (file size %d)", playListOff, len(data)) + } + + // PlayList header: length(4) + reserved(2) + numPlayItems(2) + numSubPaths(2) + pl := data[playListOff:] + playListLen := binary.BigEndian.Uint32(pl[0:4]) + if int(playListOff)+4+int(playListLen) > len(data) { + return nil, fmt.Errorf("mpls: PlayList length %d exceeds file size", playListLen) + } + numPlayItems := binary.BigEndian.Uint16(pl[6:8]) + + items := make([]MPLSPlayItem, 0, numPlayItems) + // PlayItems start after the 10-byte PlayList header. + cursor := 10 + plBody := pl[:4+int(playListLen)] + for i := range int(numPlayItems) { + if cursor+2 > len(plBody) { + return nil, fmt.Errorf("mpls: PlayItem %d header out of range", i) + } + // PlayItem length excludes the 2-byte length field itself. + itemLen := int(binary.BigEndian.Uint16(plBody[cursor : cursor+2])) + itemStart := cursor + 2 + itemEnd := itemStart + itemLen + if itemEnd > len(plBody) { + return nil, fmt.Errorf("mpls: PlayItem %d length %d overruns PlayList", i, itemLen) + } + // Fixed PlayItem layout we care about: + // +0 5 clip_information_file_name (e.g. "00001") + // +5 4 clip_codec_identifier ("M2TS") + // +9 2 flags incl. is_multi_angle / connection_condition + // +11 1 ref_to_STC_id + // +12 4 IN_time (45 kHz) + // +16 4 OUT_time (45 kHz) + if itemLen < 20 { + return nil, fmt.Errorf("mpls: PlayItem %d too short (len=%d)", i, itemLen) + } + body := plBody[itemStart:itemEnd] + items = append(items, MPLSPlayItem{ + ClipName: string(body[0:5]), + InTime: binary.BigEndian.Uint32(body[12:16]), + OutTime: binary.BigEndian.Uint32(body[16:20]), + }) + cursor = itemEnd + } + + return &MPLSPlayList{Version: version, PlayItems: items}, nil +} diff --git a/internal/importer/archive/iso/mpls_test.go b/internal/importer/archive/iso/mpls_test.go new file mode 100644 index 000000000..0df3b4da8 --- /dev/null +++ b/internal/importer/archive/iso/mpls_test.go @@ -0,0 +1,167 @@ +package iso + +import ( + "encoding/binary" + "testing" +) + +// buildMPLS constructs a synthetic .mpls byte stream containing the given +// PlayItems. Each PlayItem is laid out at its minimum legal size (20 bytes +// body + 2-byte length prefix). multiAngleTail, when non-nil, is appended +// inside the first PlayItem to exercise the length-prefixed skip logic. +func buildMPLS(t *testing.T, version string, items []MPLSPlayItem, multiAngleTail []byte) []byte { + t.Helper() + if len(version) != 4 { + t.Fatalf("version must be 4 bytes, got %q", version) + } + + // Build PlayItems body. + var playItemsBuf []byte + for i, it := range items { + if len(it.ClipName) != 5 { + t.Fatalf("item %d: ClipName must be 5 chars", i) + } + body := make([]byte, 20) + copy(body[0:5], it.ClipName) + copy(body[5:9], "M2TS") + // flags (2) + ref_to_STC_id (1) left zero + binary.BigEndian.PutUint32(body[12:16], it.InTime) + binary.BigEndian.PutUint32(body[16:20], it.OutTime) + // Inject the multi-angle tail into the first item only — the parser + // must skip past it via the length field without misaligning the + // next item. + if i == 0 && multiAngleTail != nil { + body = append(body, multiAngleTail...) + } + // PlayItem length excludes its own 2-byte length prefix. + lenPrefix := make([]byte, 2) + binary.BigEndian.PutUint16(lenPrefix, uint16(len(body))) + playItemsBuf = append(playItemsBuf, lenPrefix...) + playItemsBuf = append(playItemsBuf, body...) + } + + // PlayList header: length(4)+reserved(2)+numPI(2)+numSub(2)+playItems + plHeader := make([]byte, 10) + // length excludes its own 4-byte field + binary.BigEndian.PutUint32(plHeader[0:4], uint32(6+len(playItemsBuf))) + binary.BigEndian.PutUint16(plHeader[6:8], uint16(len(items))) + // numSubPaths left zero + + playList := append(plHeader, playItemsBuf...) + + // File header: 4 magic + 4 version + 4 PL offset + 4 PLMark + 4 ExtData + hdr := make([]byte, mplsHeaderSize) + copy(hdr[0:4], "MPLS") + copy(hdr[4:8], version) + binary.BigEndian.PutUint32(hdr[8:12], uint32(mplsHeaderSize)) + // PlayListMark & ExtensionData offsets unused; leave zero. + + return append(hdr, playList...) +} + +func TestParseMPLS(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + data []byte + wantErr bool + wantItems []MPLSPlayItem + wantTicks int64 + }{ + { + name: "single PlayItem", + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 1000, OutTime: 91000}, + }, nil), + wantItems: []MPLSPlayItem{{ClipName: "00001", InTime: 1000, OutTime: 91000}}, + wantTicks: 90000, // 2s at 45kHz + }, + { + name: "five PlayItems (main feature shape)", + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 45000}, + {ClipName: "00003", InTime: 0, OutTime: 45000}, + {ClipName: "00004", InTime: 0, OutTime: 45000}, + {ClipName: "00005", InTime: 0, OutTime: 45000}, + }, nil), + wantItems: []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 45000}, + {ClipName: "00003", InTime: 0, OutTime: 45000}, + {ClipName: "00004", InTime: 0, OutTime: 45000}, + {ClipName: "00005", InTime: 0, OutTime: 45000}, + }, + wantTicks: 5 * 45000, + }, + { + name: "multi-angle PlayItem (tail must be skipped)", + // The tail simulates angle-count + alt-angle records appended + // after the fixed PlayItem prefix. The parser only consumes the + // first 20 bytes and uses the length field to skip past the + // rest, so item 2 must still parse cleanly. + data: buildMPLS(t, "0200", []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 90000}, + }, []byte{ + 0x02, // num_angles + 0x00, // is_different_audios flags + '0', '0', '0', '0', '7', 'M', '2', 'T', 'S', 0x00, // one alt angle entry (10 bytes) + }), + wantItems: []MPLSPlayItem{ + {ClipName: "00001", InTime: 0, OutTime: 45000}, + {ClipName: "00002", InTime: 0, OutTime: 90000}, + }, + wantTicks: 45000 + 90000, + }, + { + name: "wrong magic", + data: []byte("NOTMPLS-padding-here-padding-here"), + wantErr: true, + }, + { + name: "truncated header", + data: []byte("MPLS"), + wantErr: true, + }, + { + name: "PlayList offset out of range", + data: func() []byte { + b := make([]byte, mplsHeaderSize) + copy(b[0:4], "MPLS") + copy(b[4:8], "0200") + binary.BigEndian.PutUint32(b[8:12], 9999) + return b + }(), + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got, err := ParseMPLS(tc.data) + if tc.wantErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got.PlayItems) != len(tc.wantItems) { + t.Fatalf("PlayItems len = %d, want %d", len(got.PlayItems), len(tc.wantItems)) + } + for i, it := range got.PlayItems { + if it != tc.wantItems[i] { + t.Errorf("PlayItem[%d] = %+v, want %+v", i, it, tc.wantItems[i]) + } + } + if d := got.DurationTicks(); d != tc.wantTicks { + t.Errorf("DurationTicks = %d, want %d", d, tc.wantTicks) + } + }) + } +} diff --git a/internal/importer/archive/iso/processor.go b/internal/importer/archive/iso/processor.go index 10c5fd7a1..bc54da47f 100644 --- a/internal/importer/archive/iso/processor.go +++ b/internal/importer/archive/iso/processor.go @@ -3,69 +3,142 @@ package iso import ( "context" "fmt" + "log/slog" "path/filepath" "strings" "time" metapb "github.com/javi11/altmount/internal/metadata/proto" "github.com/javi11/altmount/internal/pool" + "github.com/javi11/altmount/internal/progress" ) -// AnalyzeISOContent enumerates all allowed media files inside the given ISO source -// and returns ISOFileContent entries with Usenet segment mappings. -func AnalyzeISOContent( +// AnalyzeISO inspects the given ISO source and returns: +// - the volume label (for multi-disc grouping), +// - the filtered list of inner files (Files), +// - the ordered MainFeature M2TS list when the ISO is a Blu-ray with a +// resolvable playlist (nil otherwise). +// +// allowedExtensions only filters Files. MainFeature is always returned for +// BDMV discs regardless of the extension list — its existence is the +// signal callers use to opt into virtual concatenation. +func AnalyzeISO( ctx context.Context, src ISOSource, poolManager pool.Manager, maxPrefetch int, readTimeout time.Duration, + analyzeTimeout time.Duration, allowedExtensions []string, -) ([]ISOFileContent, error) { + progressTracker *progress.Tracker, +) (*AnalyzedISO, error) { + start := time.Now() + // Hard cap the whole walk. A degraded NNTP provider can otherwise stall + // AnalyzeISO for minutes per ISO. analyzeTimeout <= 0 disables the cap + // (used by tests that exercise other paths). + if analyzeTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, analyzeTimeout) + defer cancel() + } + // Fail fast when the deadline is already exceeded (e.g. caller passed a + // past deadline, or analyzeTimeout fired between WithTimeout and the + // first NNTP read). + if err := ctx.Err(); err != nil { + return nil, fmt.Errorf("iso: analysing %q: %w", src.Filename, err) + } + rs, closer, err := NewISOReadSeeker(ctx, src, poolManager, maxPrefetch, readTimeout) if err != nil { return nil, fmt.Errorf("iso: creating read seeker for %q: %w", src.Filename, err) } defer closer.Close() - files, err := ListISOFiles(rs) + entries, err := ListISOFiles(ctx, rs) if err != nil { return nil, fmt.Errorf("iso: listing files in %q: %w", src.Filename, err) } - var result []ISOFileContent - for _, entry := range files { - if !isAllowedFile(entry.path, int64(entry.size), allowedExtensions) { + out := &AnalyzedISO{VolumeLabel: ReadVolumeLabel(rs)} + + for _, e := range entries { + if !isAllowedFile(e.path, int64(e.size), allowedExtensions) { continue } + out.Files = append(out.Files, buildFileContent(src, e)) + } - isoOffset := int64(entry.lba) * iso9660SectorSize - - fc := ISOFileContent{ - InternalPath: entry.path, - Filename: filepath.Base(entry.path), - Size: int64(entry.size), + if mf := ResolveMainFeature(ctx, rs, entries, progressTracker); mf != nil { + out.DurationTicks = mf.DurationTicks + for i, e := range mf.Streams { + fc := buildFileContent(src, e) + // Carry per-clip MPLS timing (45 kHz) for the continuous-timeline + // remux. ClipInTimes/ClipDurations are parallel to Streams. + if i < len(mf.ClipInTimes) { + fc.InTimeTicks = mf.ClipInTimes[i] + fc.DurationTicks = mf.ClipDurations[i] + } + out.MainFeature = append(out.MainFeature, fc) } + } + // Single completion log: raw entry count, filtered file count, BD clip + // count, and total time. Previously this function emitted two separate + // INFO lines per successful analysis ("ISO analysed" + "ISO analyse + // complete"); they're consolidated here. + slog.InfoContext(ctx, "ISO analysed", + "filename", src.Filename, + "iso_size_bytes", src.Size, + "entries", len(entries), + "files", len(out.Files), + "main_feature_clips", len(out.MainFeature), + "duration_seconds", time.Since(start).Seconds(), + ) + + return out, nil +} + +// buildFileContent turns one ISO directory entry into an ISOFileContent, +// emitting one ISONestedSource per on-disc extent. Concatenating the +// sources' byte ranges yields the complete file. This is the path that +// previously fed BAD bytes for multi-extent files like Avatar's 17 GiB +// 00022.m2ts (945 extents) — only the first extent's data was correct. +func buildFileContent(src ISOSource, e isoFileEntry) ISOFileContent { + fc := ISOFileContent{ + InternalPath: e.path, + Filename: filepath.Base(e.path), + Size: int64(e.size), + Sources: make([]ISONestedSource, 0, len(e.extents)), + } + for _, ext := range e.extents { + isoOffset := int64(ext.lba) * iso9660SectorSize + extLen := int64(ext.length) if len(src.AesKey) == 0 { - // Unencrypted: slice segments to cover exactly this file's bytes - sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, int64(entry.size)) - fc.Segments = sliced + // Unencrypted: pre-slice outer segments to cover this extent + // only. The downstream nested reader treats InnerOffset as + // an offset within the (already-sliced) segment chain. + sliced, _ := sliceSegmentsForRange(src.Segments, isoOffset, extLen) + fc.Sources = append(fc.Sources, ISONestedSource{ + Segments: sliced, + InnerOffset: 0, + InnerLength: extLen, + InnerVolumeSize: extLen, + }) } else { - // Encrypted: create a NestedSource so the VFS can decrypt and seek - fc.NestedSource = &ISONestedSource{ + // Encrypted: AES-CBC needs the IV chain from byte 0 of the + // outer ISO, so every source gets the full outer segments + // and the cipher seeks via InnerOffset. + fc.Sources = append(fc.Sources, ISONestedSource{ Segments: src.Segments, AesKey: src.AesKey, AesIV: src.AesIV, InnerOffset: isoOffset, - InnerLength: int64(entry.size), + InnerLength: extLen, InnerVolumeSize: src.Size, - } + }) } - - result = append(result, fc) } - - return result, nil + return fc } // isAllowedFile returns true if the file extension is in the allowed list. diff --git a/internal/importer/archive/iso/processor_test.go b/internal/importer/archive/iso/processor_test.go new file mode 100644 index 000000000..58bc48f63 --- /dev/null +++ b/internal/importer/archive/iso/processor_test.go @@ -0,0 +1,82 @@ +package iso + +import ( + "context" + "errors" + "testing" + "time" +) + +// TestAnalyzeISO_HonorsTimeout verifies the hard per-ISO deadline added by +// the IsoAnalyzeTimeoutSeconds config knob. A 1ns analyseTimeout must +// trip the context.WithTimeout, hit the fail-fast ctx.Err() check, and +// return a DeadlineExceeded-wrapped error within a few ms — well before +// any NNTP read could be attempted. +// +// Passing a nil pool.Manager is deliberate: if the timeout check fails +// to fire, NewISOReadSeeker would dereference it and crash, making the +// regression unmissable. +func TestAnalyzeISO_HonorsTimeout(t *testing.T) { + t.Parallel() + + src := ISOSource{Filename: "stuck.iso", Size: 1 << 30} + + start := time.Now() + _, err := AnalyzeISO( + context.Background(), + src, + nil, // pool.Manager — must NOT be reached + 0, + 0, + 1*time.Nanosecond, // analyzeTimeout + nil, + nil, // progressTracker + ) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected error from past-deadline AnalyzeISO, got nil") + } + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("expected error wrapping context.DeadlineExceeded, got: %v", err) + } + if elapsed > 50*time.Millisecond { + t.Fatalf("AnalyzeISO took %v with a 1ns timeout — fail-fast ctx check is not firing", elapsed) + } +} + +// TestAnalyzeISO_HonorsTimeout_PreCanceled covers the case where the +// caller's context is already canceled before AnalyzeISO is invoked. +// With analyzeTimeout==0 (cap disabled), the function still needs to +// surface the parent's cancellation without touching the pool. +func TestAnalyzeISO_HonorsTimeout_PreCanceled(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + src := ISOSource{Filename: "stuck.iso", Size: 1 << 30} + + start := time.Now() + _, err := AnalyzeISO( + ctx, + src, + nil, + 0, + 0, + 0, // analyzeTimeout=0 → cap disabled, parent ctx still canceled + nil, + nil, // progressTracker + ) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected error from pre-canceled AnalyzeISO, got nil") + } + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected error wrapping context.Canceled, got: %v", err) + } + if elapsed > 50*time.Millisecond { + t.Fatalf("AnalyzeISO took %v with pre-canceled ctx — fail-fast ctx check is not firing", elapsed) + } +} diff --git a/internal/importer/archive/iso/types.go b/internal/importer/archive/iso/types.go index 53e514672..9425ec9cf 100644 --- a/internal/importer/archive/iso/types.go +++ b/internal/importer/archive/iso/types.go @@ -11,25 +11,52 @@ type ISOSource struct { Size int64 // Decrypted ISO size } -// ISOFileContent represents one file found inside the ISO. +// ISOFileContent represents one file found inside the ISO. The file's +// data may be split across multiple on-disc extents (Blu-ray main-feature +// M2TS files routinely use hundreds), so Sources is a slice of inner +// sources in disc order. Concatenating their byte ranges yields the +// complete file content. type ISOFileContent struct { InternalPath string // e.g. "BDMV/STREAM/00001.m2ts" Filename string // Base filename - Size int64 // File size in bytes + Size int64 // Total file size in bytes (sum of Sources.InnerLength) NzbdavID string // Carried from parent archive Content - // Unencrypted case: Segments sliced to cover exactly this file - Segments []*metapb.SegmentData - // Encrypted case: nil Segments + populated NestedSource - NestedSource *ISONestedSource + Sources []ISONestedSource + // InTimeTicks and DurationTicks are the MPLS PlayItem IN_time and + // (OUT−IN) for this clip, in 45 kHz ticks. Populated only for + // MainFeature clips; zero otherwise. They drive the continuous-timeline + // remux: InTimeTicks is the clip's own PTS base (×2 → 90 kHz), + // DurationTicks is its authored span. + InTimeTicks int64 + DurationTicks int64 } -// ISONestedSource holds everything needed to decrypt and seek into the ISO -// for a single inner file. +// ISONestedSource is one extent of an inner file. For unencrypted ISOs, +// Segments is pre-sliced to cover exactly this extent and AesKey is nil +// (InnerOffset is 0, InnerLength equals the extent length). For encrypted +// ISOs, AesKey/AesIV are populated, Segments cover the full outer ISO, +// InnerOffset is the byte offset of this extent within the decrypted +// ISO, and InnerVolumeSize is the full decrypted ISO size — the cipher +// chain needs to start at byte 0 so multi-extent encrypted reads use +// the same outer-ISO data with different inner offsets. type ISONestedSource struct { Segments []*metapb.SegmentData AesKey []byte AesIV []byte - InnerOffset int64 // lba * 2048 - InnerLength int64 // file size - InnerVolumeSize int64 // ISO total decrypted size + InnerOffset int64 + InnerLength int64 + InnerVolumeSize int64 +} + +// AnalyzedISO is the full result of inspecting one ISO image. Files mirrors +// what AnalyzeISOContent has always returned (all media files with extension +// filtering applied). MainFeature, when non-nil, is the ordered M2TS list +// that forms the Blu-ray main feature according to BDMV/PLAYLIST/*.mpls — +// this is the slice callers should concatenate to produce a single playable +// virtual file. +type AnalyzedISO struct { + VolumeLabel string + Files []ISOFileContent + MainFeature []ISOFileContent // nil for non-BDMV / unparseable playlists + DurationTicks int64 // sum of (OUT-IN) of MainFeature at 45 kHz } diff --git a/internal/importer/archive/iso/volume.go b/internal/importer/archive/iso/volume.go new file mode 100644 index 000000000..f2db56574 --- /dev/null +++ b/internal/importer/archive/iso/volume.go @@ -0,0 +1,30 @@ +package iso + +import ( + "io" + "strings" +) + +// ReadVolumeLabel returns the ISO 9660 Volume Identifier from the Primary +// Volume Descriptor at sector 16. Hybrid Blu-ray discs always carry a +// 9660 PVD even when the active filesystem is UDF, so this works for both +// plain ISOs and BD images. +// +// Returns an empty string if the descriptor is missing or invalid — callers +// fall back to the ISO filename for disc-group keying. +func ReadVolumeLabel(rs io.ReadSeeker) string { + pvd := make([]byte, iso9660SectorSize) + if _, err := rs.Seek(16*iso9660SectorSize, io.SeekStart); err != nil { + return "" + } + if _, err := io.ReadFull(rs, pvd); err != nil { + return "" + } + // Type 1 = Primary Volume Descriptor; identifier "CD001" at +1. + if pvd[0] != 1 || string(pvd[1:6]) != "CD001" { + return "" + } + // Volume identifier: 32 bytes of a-characters at offset 40, space-padded. + label := strings.TrimRight(string(pvd[40:72]), " \x00") + return label +} diff --git a/internal/importer/archive/iso/volume_test.go b/internal/importer/archive/iso/volume_test.go new file mode 100644 index 000000000..f8aeac1a4 --- /dev/null +++ b/internal/importer/archive/iso/volume_test.go @@ -0,0 +1,70 @@ +package iso + +import ( + "bytes" + "io" + "testing" +) + +// buildPVD constructs a 17-sector buffer with a synthetic Primary Volume +// Descriptor placed at sector 16. The remaining bytes are zero-filled. +func buildPVD(label string, typeCode byte, identifier string) io.ReadSeeker { + buf := make([]byte, 17*iso9660SectorSize) + pvd := buf[16*iso9660SectorSize:] + pvd[0] = typeCode + copy(pvd[1:6], identifier) + // Volume identifier field is 32 bytes, space-padded. + field := make([]byte, 32) + for i := range field { + field[i] = ' ' + } + copy(field, label) + copy(pvd[40:72], field) + return bytes.NewReader(buf) +} + +func TestReadVolumeLabel(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + rs io.ReadSeeker + want string + }{ + { + name: "Avatar disc 1 label", + rs: buildPVD("AVATAR_FIRE_AND_ASH_DISC_1", 1, "CD001"), + want: "AVATAR_FIRE_AND_ASH_DISC_1", + }, + { + name: "padded short label trimmed", + rs: buildPVD("FOO", 1, "CD001"), + want: "FOO", + }, + { + name: "wrong type code", + rs: buildPVD("ANYTHING", 2, "CD001"), + want: "", + }, + { + name: "wrong identifier", + rs: buildPVD("ANYTHING", 1, "BAD!?"), + want: "", + }, + { + name: "short input (no sector 16)", + rs: bytes.NewReader(make([]byte, 1024)), + want: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := ReadVolumeLabel(tc.rs) + if got != tc.want { + t.Errorf("ReadVolumeLabel = %q, want %q", got, tc.want) + } + }) + } +} diff --git a/internal/importer/archive/iso_expansion.go b/internal/importer/archive/iso_expansion.go new file mode 100644 index 000000000..6b934ebb0 --- /dev/null +++ b/internal/importer/archive/iso_expansion.go @@ -0,0 +1,349 @@ +package archive + +import ( + "context" + "fmt" + "log/slog" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/javi11/altmount/internal/importer/archive/iso" + "github.com/javi11/altmount/internal/pool" + "github.com/javi11/altmount/internal/progress" +) + +// analyzedISO bundles an ISO Content with its inspection result and its +// place in a multi-disc grouping. Used internally by ExpandISOContents. +type analyzedISO struct { + src Content // original ISO Content (for fallback / metadata) + analyzed *iso.AnalyzedISO // result of iso.AnalyzeISO + discNum int // parsed disc number; 0 when label has no disc suffix + groupKey string // base name stripped of any DISC/CD/PART suffix +} + +// ExpandISOContents replaces .iso entries in contents with the media they +// contain, applying two Blu-ray-aware optimisations on top of the legacy +// "pick the largest file" behaviour: +// +// 1. Within a disc, if BDMV/PLAYLIST/*.mpls identifies a main feature +// spanning multiple M2TS clips, the clips are virtually concatenated +// into one Content via NestedSources — the player sees a single file. +// 2. Across discs in the same archive group (e.g. DISC_1 and DISC_2 ISOs +// in one NZB), discs sharing a stripped volume label are merged so +// the cross-disc movie also plays as one file. +// +// Non-ISO entries pass through unchanged. Per-ISO errors are non-fatal: +// on failure the original .iso Content is kept so downstream still has +// something to work with. +func ExpandISOContents( + ctx context.Context, + expand bool, + contents []Content, + poolManager pool.Manager, + maxPrefetch int, + readTimeout time.Duration, + analyzeTimeout time.Duration, + allowedExtensions []string, + progressTracker *progress.Tracker, +) ([]Content, error) { + if !expand { + return contents, nil + } + + var ( + result []Content + groups = make(map[string][]analyzedISO) + groupKeys []string + ) + + // Count the ISO entries up front so each can be given an equal slice of + // the progress tracker's range; isoIdx walks the ISOs as we process them. + numISOs := 0 + for _, c := range contents { + if !c.IsDirectory && strings.ToLower(filepath.Ext(c.Filename)) == ".iso" { + numISOs++ + } + } + isoIdx := 0 + + for _, c := range contents { + if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { + result = append(result, c) + continue + } + + src := iso.ISOSource{ + Filename: c.Filename, + Segments: c.Segments, + AesKey: c.AesKey, + AesIV: c.AesIV, + Size: c.Size, + } + // Give this ISO its slice of the overall range so per-playlist + // updates inside AnalyzeISO stay within [isoIdx, isoIdx+1] of the + // band; bump the parent to the slice boundary once it completes so + // even non-BDMV ISOs (no playlist loop) advance the bar. + a, err := iso.AnalyzeISO(ctx, src, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedExtensions, progressTracker.Slice(isoIdx, numISOs)) + isoIdx++ + progressTracker.Update(isoIdx, numISOs) + if err != nil { + slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", + "file", c.Filename, "error", err) + result = append(result, c) + continue + } + if len(a.Files) == 0 && len(a.MainFeature) == 0 { + result = append(result, c) + continue + } + + key, discNum := discGroupKey(a.VolumeLabel, c.Filename) + entry := analyzedISO{src: c, analyzed: a, discNum: discNum, groupKey: key} + if _, exists := groups[key]; !exists { + groupKeys = append(groupKeys, key) + } + groups[key] = append(groups[key], entry) + } + + sort.Strings(groupKeys) // deterministic output order + for _, key := range groupKeys { + g := groups[key] + sort.SliceStable(g, func(i, j int) bool { return g[i].discNum < g[j].discNum }) + + // Concatenate main features only when *every* member of the group + // has one — mixing BDMV and non-BDMV in a single group is almost + // always a false grouping, so fall back to per-disc handling. + allHaveMainFeature := true + for _, e := range g { + if len(e.analyzed.MainFeature) == 0 { + allHaveMainFeature = false + break + } + } + + if allHaveMainFeature { + merged, ok := buildMainFeatureContent(ctx, key, g) + if ok { + result = append(result, merged) + continue + } + } + + // Fallback: legacy per-ISO largest-file selection. + for _, e := range g { + nc, ok := buildLargestFileContent(e.src, e.analyzed.Files) + if !ok { + result = append(result, e.src) + continue + } + result = append(result, nc) + } + } + + return result, nil +} + +// buildMainFeatureContent concatenates every member's MainFeature into a +// single Content whose NestedSources chain spans every M2TS in disc and +// playlist order. Returns (zero, false) when, after conversion, the chain +// is empty. +func buildMainFeatureContent(ctx context.Context, groupKey string, g []analyzedISO) (Content, bool) { + var ( + sources []NestedSource + totalSize int64 + firstISOName string + nzbdavID string + ) + // Per-clip timeline table for the continuous-timeline remux. We walk + // clips in output order across every disc, building a running 90 kHz + // timeline: clip 0 keeps its native base (delta 0); each later clip is + // lifted to start where the cumulative authored duration places it. + // timeline_start_90k[k] = base0_90k + 2 * Σ_{j files[j].Size }) + f := files[0] + nc := Content{ + InternalPath: f.InternalPath, + Filename: f.Filename, + Size: f.Size, + PackedSize: f.Size, + NzbdavID: src.NzbdavID, + ISOExpansionIndex: 1, + } + nc.NestedSources = isoFileContentToNestedSources(f) + if len(nc.NestedSources) == 0 { + return Content{}, false + } + return nc, true +} + +// isoFileContentToNestedSources fans an ISOFileContent's on-disc extents +// out into one NestedSource per extent, preserving disc order. Concating +// the resulting sources yields the file's bytes — the multi-extent fix +// for Blu-ray main-feature M2TS files lives here. +func isoFileContentToNestedSources(fc iso.ISOFileContent) []NestedSource { + out := make([]NestedSource, 0, len(fc.Sources)) + for _, s := range fc.Sources { + out = append(out, NestedSource{ + Segments: s.Segments, + AesKey: s.AesKey, + AesIV: s.AesIV, + InnerOffset: s.InnerOffset, + InnerLength: s.InnerLength, + InnerVolumeSize: s.InnerVolumeSize, + }) + } + return out +} + +// discSuffixPattern matches volume labels like "AVATAR_FIRE_AND_ASH_DISC_1", +// "MOVIE-CD2", "TITLE PART 3", etc. Capture 1 is the stripped base name, +// capture 2 is the disc identifier (numeric or single letter). +var discSuffixPattern = regexp.MustCompile(`(?i)^(.+?)[ _\-]*(?:disc|cd|part|d|side)[ _\-]*([0-9]+|[a-z])$`) + +// discGroupKey computes the disc-grouping key and parsed disc number for +// an ISO. It prefers the volume label and falls back to the ISO filename +// (without extension) when the label is empty or doesn't match a disc +// pattern. Single-disc ISOs return key=, discNum=0. +func discGroupKey(label, isoFilename string) (string, int) { + candidates := []string{label} + if isoFilename != "" { + candidates = append(candidates, strings.TrimSuffix(isoFilename, filepath.Ext(isoFilename))) + } + for _, c := range candidates { + c = strings.TrimSpace(c) + if c == "" { + continue + } + if m := discSuffixPattern.FindStringSubmatch(c); m != nil { + base := normaliseGroupKey(m[1]) + return base, parseDiscNumber(m[2]) + } + } + for _, c := range candidates { + c = strings.TrimSpace(c) + if c != "" { + return normaliseGroupKey(c), 0 + } + } + return "", 0 +} + +func normaliseGroupKey(s string) string { + s = strings.TrimSpace(s) + s = strings.Trim(s, "_- ") + return strings.ToUpper(s) +} + +// parseDiscNumber turns "1" → 1, "2" → 2, "A" → 1, "B" → 2, etc. +func parseDiscNumber(s string) int { + if n, err := strconv.Atoi(s); err == nil { + return n + } + if len(s) == 1 { + c := strings.ToUpper(s)[0] + if c >= 'A' && c <= 'Z' { + return int(c-'A') + 1 + } + } + return 0 +} + +// mainFeatureFilename derives a sensible filename for the virtual concat. +// Downstream renaming (see rar/sevenzip aggregator post-processing) will +// usually replace the base name with the NZB release name; we only need a +// valid .m2ts extension here. +func mainFeatureFilename(groupKey, isoFilename string) string { + const ext = ".m2ts" + if groupKey != "" { + return fmt.Sprintf("%s%s", groupKey, ext) + } + if isoFilename != "" { + stem := strings.TrimSuffix(isoFilename, filepath.Ext(isoFilename)) + return stem + ext + } + return "main_feature" + ext +} diff --git a/internal/importer/archive/iso_expansion_test.go b/internal/importer/archive/iso_expansion_test.go new file mode 100644 index 000000000..b070e94ea --- /dev/null +++ b/internal/importer/archive/iso_expansion_test.go @@ -0,0 +1,352 @@ +package archive + +import ( + "context" + "testing" + + "github.com/javi11/altmount/internal/importer/archive/iso" + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +func TestDiscGroupKey(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + label string + filename string + wantKey string + wantNum int + }{ + {"avatar disc 1 label", "AVATAR_FIRE_AND_ASH_DISC_1", "any.iso", "AVATAR_FIRE_AND_ASH", 1}, + {"avatar disc 2 label", "AVATAR_FIRE_AND_ASH_DISC_2", "any.iso", "AVATAR_FIRE_AND_ASH", 2}, + {"compact DISC2", "MOVIE_DISC2", "any.iso", "MOVIE", 2}, + {"CD suffix", "MOVIE-CD1", "any.iso", "MOVIE", 1}, + {"PART suffix with spaces", "TITLE PART 3", "any.iso", "TITLE", 3}, + {"letter disc identifier B → 2", "FOO_DISC_B", "any.iso", "FOO", 2}, + {"no suffix → solo", "PLAIN_MOVIE", "any.iso", "PLAIN_MOVIE", 0}, + {"empty label falls back to filename stem", "", "MyMovie_Disc_1.iso", "MYMOVIE", 1}, + {"empty label and weird filename", "", "thing.iso", "THING", 0}, + {"only label has disc, filename plain", "X_DISC_2", "anything.iso", "X", 2}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + gotKey, gotNum := discGroupKey(tc.label, tc.filename) + if gotKey != tc.wantKey || gotNum != tc.wantNum { + t.Errorf("discGroupKey(%q,%q) = (%q,%d), want (%q,%d)", + tc.label, tc.filename, gotKey, gotNum, tc.wantKey, tc.wantNum) + } + }) + } +} + +func TestParseDiscNumber(t *testing.T) { + t.Parallel() + + cases := map[string]int{ + "1": 1, + "2": 2, + "10": 10, + "A": 1, + "a": 1, + "B": 2, + "": 0, + "AB": 0, + "foo": 0, + } + for in, want := range cases { + if got := parseDiscNumber(in); got != want { + t.Errorf("parseDiscNumber(%q) = %d, want %d", in, got, want) + } + } +} + +func TestIsoFileContentToNestedSources(t *testing.T) { + t.Parallel() + + t.Run("single unencrypted extent → one NestedSource", func(t *testing.T) { + t.Parallel() + fc := iso.ISOFileContent{ + Filename: "00001.m2ts", + Size: 100, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: "a", StartOffset: 0, EndOffset: 99, SegmentSize: 100}}, + InnerOffset: 0, + InnerLength: 100, + InnerVolumeSize: 100, + }}, + } + got := isoFileContentToNestedSources(fc) + if len(got) != 1 { + t.Fatalf("want 1 source, got %d", len(got)) + } + if got[0].InnerLength != 100 || got[0].InnerOffset != 0 || len(got[0].AesKey) != 0 { + t.Fatalf("unexpected NestedSource: %+v", got[0]) + } + }) + + t.Run("multi-extent file → one NestedSource per extent in order", func(t *testing.T) { + t.Parallel() + // The bug we just fixed: a 17 GiB M2TS spans hundreds of extents. + // Each extent must become its own NestedSource so the downstream + // concat reader stitches them in disc order. + fc := iso.ISOFileContent{ + Filename: "00022.m2ts", + Size: 30, + Sources: []iso.ISONestedSource{ + {Segments: []*metapb.SegmentData{{Id: "e1"}}, InnerLength: 10}, + {Segments: []*metapb.SegmentData{{Id: "e2"}}, InnerLength: 10}, + {Segments: []*metapb.SegmentData{{Id: "e3"}}, InnerLength: 10}, + }, + } + got := isoFileContentToNestedSources(fc) + if len(got) != 3 { + t.Fatalf("want 3 sources, got %d", len(got)) + } + wantIDs := []string{"e1", "e2", "e3"} + for i, ns := range got { + if len(ns.Segments) != 1 || ns.Segments[0].Id != wantIDs[i] { + t.Errorf("source %d: want segment id %q, got %+v", i, wantIDs[i], ns.Segments) + } + } + }) + + t.Run("encrypted source carries key + IV through", func(t *testing.T) { + t.Parallel() + fc := iso.ISOFileContent{ + Filename: "00001.m2ts", + Size: 2048, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: "outer", StartOffset: 0, EndOffset: 99999, SegmentSize: 100000}}, + AesKey: []byte("0123456789abcdef0123456789abcdef"), + AesIV: []byte("0123456789abcdef"), + InnerOffset: 1024, + InnerLength: 2048, + InnerVolumeSize: 99999, + }}, + } + got := isoFileContentToNestedSources(fc) + if len(got) != 1 { + t.Fatalf("want 1 source, got %d", len(got)) + } + if got[0].InnerOffset != 1024 || got[0].InnerLength != 2048 || got[0].InnerVolumeSize != 99999 { + t.Fatalf("offsets mangled: %+v", got[0]) + } + if len(got[0].AesKey) == 0 { + t.Error("AesKey should be carried through for encrypted source") + } + }) +} + +func TestBuildMainFeatureContent_TwoDiscs(t *testing.T) { + t.Parallel() + + // Helper to make a fake ISO main-feature ISOFileContent with given size + // and a single-segment outer slice (segment values are not interpreted + // by buildMainFeatureContent — only Size and the source attributes + // matter for the assembled NestedSources chain). + mkClip := func(name string, size int64) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{ + {Id: name, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}, + }, + InnerLength: size, + InnerVolumeSize: size, + }}, + } + } + + disc1 := analyzedISO{ + src: Content{Filename: "AVATAR_DISC_1.iso", NzbdavID: "nzb-1"}, + analyzed: &iso.AnalyzedISO{ + VolumeLabel: "AVATAR_DISC_1", + MainFeature: []iso.ISOFileContent{ + mkClip("00001.m2ts", 10_000_000), + mkClip("00002.m2ts", 20_000_000), + }, + }, + discNum: 1, + groupKey: "AVATAR", + } + disc2 := analyzedISO{ + src: Content{Filename: "AVATAR_DISC_2.iso", NzbdavID: "nzb-2"}, + analyzed: &iso.AnalyzedISO{ + VolumeLabel: "AVATAR_DISC_2", + MainFeature: []iso.ISOFileContent{ + mkClip("00003.m2ts", 30_000_000), + }, + }, + discNum: 2, + groupKey: "AVATAR", + } + + got, ok := buildMainFeatureContent(context.Background(), "AVATAR", []analyzedISO{disc1, disc2}) + if !ok { + t.Fatal("buildMainFeatureContent returned ok=false") + } + if got.ISOExpansionIndex != 1 { + t.Errorf("ISOExpansionIndex = %d, want 1", got.ISOExpansionIndex) + } + if got.NzbdavID != "nzb-1" { + t.Errorf("NzbdavID = %q, want nzb-1 (from first disc)", got.NzbdavID) + } + if len(got.NestedSources) != 3 { + t.Fatalf("NestedSources count = %d, want 3 (2 clips from disc 1 + 1 clip from disc 2)", len(got.NestedSources)) + } + wantSize := int64(10_000_000 + 20_000_000 + 30_000_000) + if got.Size != wantSize { + t.Errorf("Size = %d, want %d", got.Size, wantSize) + } + if got.PackedSize != wantSize { + t.Errorf("PackedSize = %d, want %d", got.PackedSize, wantSize) + } + // Order must follow disc-then-playlist (disc1.clip1, disc1.clip2, disc2.clip3). + wantOrder := []int64{10_000_000, 20_000_000, 30_000_000} + for i, ns := range got.NestedSources { + if ns.InnerLength != wantOrder[i] { + t.Errorf("NestedSources[%d].InnerLength = %d, want %d", i, ns.InnerLength, wantOrder[i]) + } + } + if got.Filename != "AVATAR.m2ts" { + t.Errorf("Filename = %q, want AVATAR.m2ts", got.Filename) + } +} + +// TestBuildMainFeatureContent_ClipBoundaries verifies the continuous-timeline +// table: each clip's Delta90k lifts its native PTS base (InTimeTicks*2) onto a +// running 90 kHz timeline that advances by each clip's authored span +// (DurationTicks*2), with clip 0 keeping its native base (delta 0). The table +// spans both discs in output order. +func TestBuildMainFeatureContent_ClipBoundaries(t *testing.T) { + t.Parallel() + + // mkTimed builds a one-extent clip with MPLS timing (45 kHz). + mkTimed := func(name string, size, inTime, durTicks int64) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: name, EndOffset: size - 1, SegmentSize: size}}, + InnerLength: size, + }}, + InTimeTicks: inTime, + DurationTicks: durTicks, + } + } + + disc1 := analyzedISO{ + src: Content{Filename: "M_DISC_1.iso", NzbdavID: "nzb-1"}, + groupKey: "M", discNum: 1, + analyzed: &iso.AnalyzedISO{MainFeature: []iso.ISOFileContent{ + mkTimed("00001.m2ts", 10_000_000, 1000, 45000), // base90k 2000, span90k 90000 + mkTimed("00002.m2ts", 20_000_000, 500, 90000), // base90k 1000, span90k 180000 + }}, + } + disc2 := analyzedISO{ + src: Content{Filename: "M_DISC_2.iso", NzbdavID: "nzb-2"}, + groupKey: "M", discNum: 2, + analyzed: &iso.AnalyzedISO{MainFeature: []iso.ISOFileContent{ + mkTimed("00003.m2ts", 30_000_000, 0, 45000), // base90k 0, span90k 90000 + }}, + } + + got, ok := buildMainFeatureContent(context.Background(), "M", []analyzedISO{disc1, disc2}) + if !ok { + t.Fatal("ok=false") + } + if len(got.ClipBoundaries) != 3 { + t.Fatalf("ClipBoundaries = %d, want 3", len(got.ClipBoundaries)) + } + + // base0_90k = 2000. + // clip0: tlStart 2000, delta 0; cum→90000 + // clip1: tlStart 2000+90000=92000, delta 92000-1000=91000; cum→270000 + // clip2: tlStart 2000+270000=272000, delta 272000-0=272000 + wantByteLen := []int64{10_000_000, 20_000_000, 30_000_000} + wantDelta := []int64{0, 91000, 272000} + for i, cb := range got.ClipBoundaries { + if cb.ByteLen != wantByteLen[i] { + t.Errorf("clip %d ByteLen = %d, want %d", i, cb.ByteLen, wantByteLen[i]) + } + if cb.Delta90k != wantDelta[i] { + t.Errorf("clip %d Delta90k = %d, want %d", i, cb.Delta90k, wantDelta[i]) + } + } + + // Σ ByteLen must equal the file size, so the boundary prefix-sums align + // exactly with the flattened NestedSources byte layout. + var sumBytes int64 + for _, cb := range got.ClipBoundaries { + sumBytes += cb.ByteLen + } + if sumBytes != got.Size { + t.Errorf("Σ ClipBoundaries.ByteLen = %d, want file size %d", sumBytes, got.Size) + } +} + +// TestBuildMainFeatureContent_NoTimingNoBoundaries: when MPLS timing is absent +// (all zero), no clip-boundary table is attached, so the read-path remux +// filter stays disabled and the file is served as a plain byte concatenation. +func TestBuildMainFeatureContent_NoTimingNoBoundaries(t *testing.T) { + t.Parallel() + mkClip := func(name string, size int64) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, Size: size, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: name, EndOffset: size - 1, SegmentSize: size}}, + InnerLength: size, + }}, + } + } + d := analyzedISO{ + src: Content{Filename: "X.iso"}, groupKey: "X", + analyzed: &iso.AnalyzedISO{MainFeature: []iso.ISOFileContent{mkClip("a", 100), mkClip("b", 200)}}, + } + got, ok := buildMainFeatureContent(context.Background(), "X", []analyzedISO{d}) + if !ok { + t.Fatal("ok=false") + } + if len(got.ClipBoundaries) != 0 { + t.Errorf("ClipBoundaries = %d, want 0 when no MPLS timing present", len(got.ClipBoundaries)) + } +} + +func TestBuildLargestFileContent(t *testing.T) { + t.Parallel() + + mkFile := func(name string, size int64, segID string) iso.ISOFileContent { + return iso.ISOFileContent{ + Filename: name, + Size: size, + Sources: []iso.ISONestedSource{{ + Segments: []*metapb.SegmentData{{Id: segID, StartOffset: 0, EndOffset: size - 1, SegmentSize: size}}, + InnerLength: size, + InnerVolumeSize: size, + }}, + } + } + files := []iso.ISOFileContent{ + mkFile("small.mkv", 500, "s"), + mkFile("big.mkv", 5_000_000, "b"), + } + src := Content{Filename: "thing.iso", NzbdavID: "id-1"} + + got, ok := buildLargestFileContent(src, files) + if !ok { + t.Fatal("buildLargestFileContent returned ok=false") + } + if got.Filename != "big.mkv" { + t.Errorf("Filename = %q, want big.mkv (largest)", got.Filename) + } + if got.ISOExpansionIndex != 1 { + t.Errorf("ISOExpansionIndex = %d, want 1", got.ISOExpansionIndex) + } + if got.NzbdavID != "id-1" { + t.Errorf("NzbdavID = %q, want id-1", got.NzbdavID) + } +} diff --git a/internal/importer/archive/rar/aggregator.go b/internal/importer/archive/rar/aggregator.go index 52f8d4871..64bbf759c 100644 --- a/internal/importer/archive/rar/aggregator.go +++ b/internal/importer/archive/rar/aggregator.go @@ -16,7 +16,6 @@ import ( "github.com/javi11/altmount/internal/encryption/aes" "github.com/javi11/altmount/internal/importer/archive" - "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/importer/filesystem" "github.com/javi11/altmount/internal/importer/parser" "github.com/javi11/altmount/internal/importer/utils" @@ -140,6 +139,7 @@ type ProcessArchiveOptions struct { ExtractedFiles []parser.ExtractedFileInfo MaxPrefetch int ReadTimeout time.Duration + IsoAnalyzeTimeout time.Duration ExpandBlurayIso bool FilterSamples bool RenameToNzbName bool @@ -165,6 +165,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { extractedFiles := opts.ExtractedFiles maxPrefetch := opts.MaxPrefetch readTimeout := opts.ReadTimeout + analyzeTimeout := opts.IsoAnalyzeTimeout expandBlurayIso := opts.ExpandBlurayIso filterSamples := opts.FilterSamples renameToNzbName := opts.RenameToNzbName @@ -208,8 +209,18 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { rarContents = append(rarContents, r.contents...) } - // Expand ISO files found inside the RAR archive into their inner media files - rarContents, err := expandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + // Expand ISO files found inside the RAR archive into their inner media + // files. ISO analysis (filesystem walk + Blu-ray playlist resolution over + // NNTP) can take tens of seconds, so it gets its own progress label. + // Slice(0,1) copies the archive tracker at the same range without mutating + // it (RAR header analysis above is already done); WithStage relabels the + // copy. For archives with no ISO, ExpandISOContents emits no updates, so + // the common case is unaffected. + var isoProgressTracker *progress.Tracker + if archiveProgressTracker != nil { + isoProgressTracker = archiveProgressTracker.Slice(0, 1).WithStage("Analyzing ISO") + } + rarContents, err := archive.ExpandISOContents(ctx, expandBlurayIso, rarContents, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedFileExtensions, isoProgressTracker) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } @@ -474,81 +485,6 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { return nil } -// expandISOContents replaces any .iso Content entries with the media files found -// inside them. Non-ISO entries are passed through unchanged. Per-file errors are -// non-fatal: on failure the original ISO Content is kept. -func expandISOContents( - ctx context.Context, - expand bool, - contents []Content, - poolManager pool.Manager, - maxPrefetch int, - readTimeout time.Duration, - allowedExtensions []string, -) ([]Content, error) { - if !expand { - return contents, nil - } - var result []Content - for _, c := range contents { - if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { - result = append(result, c) - continue - } - - src := iso.ISOSource{ - Filename: c.Filename, - Segments: c.Segments, - AesKey: c.AesKey, - AesIV: c.AesIV, - Size: c.Size, - } - - isoFiles, err := iso.AnalyzeISOContent(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) - if err != nil { - slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", - "file", c.Filename, "error", err) - result = append(result, c) - continue - } - - if len(isoFiles) == 0 { - result = append(result, c) - continue - } - - // Sort ISO files by size descending so the largest (main feature) gets index 1. - sort.Slice(isoFiles, func(i, j int) bool { - return isoFiles[i].Size > isoFiles[j].Size - }) - - // Keep only the largest file (index 0 after sort); discard smaller streams. - f := isoFiles[0] - nc := Content{ - InternalPath: f.InternalPath, - Filename: f.Filename, - Size: f.Size, - PackedSize: f.Size, // raw ISO data — packed == unpacked - NzbdavID: c.NzbdavID, - ISOExpansionIndex: 1, - } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{{ - Segments: f.NestedSource.Segments, - AesKey: f.NestedSource.AesKey, - AesIV: f.NestedSource.AesIV, - InnerOffset: f.NestedSource.InnerOffset, - InnerLength: f.NestedSource.InnerLength, - InnerVolumeSize: f.NestedSource.InnerVolumeSize, - }} - } else { - nc.Segments = f.Segments - } - result = append(result, nc) - } - return result, nil -} - // GroupArchivesByBaseName groups ParsedFiles by their RAR base name (case-insensitive). // Returns groups in deterministic order (sorted by base name) for testability. func GroupArchivesByBaseName(files []parser.ParsedFile) [][]parser.ParsedFile { diff --git a/internal/importer/archive/rar/processor.go b/internal/importer/archive/rar/processor.go index 3566508b9..79ca93643 100644 --- a/internal/importer/archive/rar/processor.go +++ b/internal/importer/archive/rar/processor.go @@ -36,46 +36,16 @@ func NewProcessor(poolManager pool.Manager, configGetter config.ConfigGetter) Pr } } -// CreateFileMetadataFromRarContent creates FileMetadata from RarContent for the metadata system +// CreateFileMetadataFromRarContent creates FileMetadata from RarContent for the metadata system. +// Delegates to archive.NewFileMetadataFromContent so the mapping stays shared with +// non-RAR callers (e.g. ISO expansion). func (rh *rarProcessor) CreateFileMetadataFromRarContent( Content Content, sourceNzbPath string, releaseDate int64, nzbdavId string, ) *metapb.FileMetadata { - now := time.Now().Unix() - - meta := &metapb.FileMetadata{ - FileSize: Content.Size, - SourceNzbPath: sourceNzbPath, - Status: metapb.FileStatus_FILE_STATUS_HEALTHY, - CreatedAt: now, - ModifiedAt: now, - SegmentData: Content.Segments, - ReleaseDate: releaseDate, - NzbdavId: nzbdavId, - } - - // Set AES encryption if keys are present (single-layer encrypted RAR) - if len(Content.AesKey) > 0 { - meta.Encryption = metapb.Encryption_AES - meta.AesKey = Content.AesKey - meta.AesIv = Content.AesIV - } - - // Populate nested sources for encrypted nested RAR files - for _, ns := range Content.NestedSources { - meta.NestedSources = append(meta.NestedSources, &metapb.NestedSegmentSource{ - Segments: ns.Segments, - AesKey: ns.AesKey, - AesIv: ns.AesIV, - InnerOffset: ns.InnerOffset, - InnerLength: ns.InnerLength, - InnerVolumeSize: ns.InnerVolumeSize, - }) - } - - return meta + return archive.NewFileMetadataFromContent(Content, sourceNzbPath, releaseDate, nzbdavId) } // AnalyzeRarContentFromNzb analyzes a RAR archive directly from NZB data without downloading diff --git a/internal/importer/archive/sevenzip/aggregator.go b/internal/importer/archive/sevenzip/aggregator.go index f0214a294..01ae85258 100644 --- a/internal/importer/archive/sevenzip/aggregator.go +++ b/internal/importer/archive/sevenzip/aggregator.go @@ -6,7 +6,6 @@ import ( "log/slog" "os" "path/filepath" - "sort" "strings" "sync/atomic" "time" @@ -14,7 +13,6 @@ import ( concpool "github.com/sourcegraph/conc/pool" "github.com/javi11/altmount/internal/importer/archive" - "github.com/javi11/altmount/internal/importer/archive/iso" "github.com/javi11/altmount/internal/importer/filesystem" "github.com/javi11/altmount/internal/importer/parser" "github.com/javi11/altmount/internal/importer/utils" @@ -138,6 +136,7 @@ type ProcessArchiveOptions struct { ExtractedFiles []parser.ExtractedFileInfo MaxPrefetch int ReadTimeout time.Duration + IsoAnalyzeTimeout time.Duration ExpandBlurayIso bool FilterSamples bool RenameToNzbName bool @@ -163,6 +162,7 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { extractedFiles := opts.ExtractedFiles maxPrefetch := opts.MaxPrefetch readTimeout := opts.ReadTimeout + analyzeTimeout := opts.IsoAnalyzeTimeout expandBlurayIso := opts.ExpandBlurayIso filterSamples := opts.FilterSamples renameToNzbName := opts.RenameToNzbName @@ -185,8 +185,18 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { slog.InfoContext(ctx, "Successfully analyzed 7zip archive content", "files_in_archive", len(sevenZipContents)) - // Expand ISO files found inside the 7zip archive into their inner media files - sevenZipContents, err = expandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, allowedFileExtensions) + // Expand ISO files found inside the 7zip archive into their inner media + // files. ISO analysis (filesystem walk + Blu-ray playlist resolution over + // NNTP) can take tens of seconds, so it gets its own progress label. + // Slice(0,1) copies the archive tracker at the same range without mutating + // it (7z header analysis above is already done); WithStage relabels the + // copy. For archives with no ISO, ExpandISOContents emits no updates, so + // the common case is unaffected. + var isoProgressTracker *progress.Tracker + if archiveProgressTracker != nil { + isoProgressTracker = archiveProgressTracker.Slice(0, 1).WithStage("Analyzing ISO") + } + sevenZipContents, err = archive.ExpandISOContents(ctx, expandBlurayIso, sevenZipContents, poolManager, maxPrefetch, readTimeout, analyzeTimeout, allowedFileExtensions, isoProgressTracker) if err != nil { slog.WarnContext(ctx, "ISO expansion failed, proceeding without ISO contents", "error", err) } @@ -445,81 +455,6 @@ func ProcessArchive(ctx context.Context, opts ProcessArchiveOptions) error { return nil } -// expandISOContents replaces any .iso Content entries with the media files found -// inside them. Non-ISO entries are passed through unchanged. Per-file errors are -// non-fatal: on failure the original ISO Content is kept. -func expandISOContents( - ctx context.Context, - expand bool, - contents []Content, - poolManager pool.Manager, - maxPrefetch int, - readTimeout time.Duration, - allowedExtensions []string, -) ([]Content, error) { - if !expand { - return contents, nil - } - var result []Content - for _, c := range contents { - if c.IsDirectory || strings.ToLower(filepath.Ext(c.Filename)) != ".iso" { - result = append(result, c) - continue - } - - src := iso.ISOSource{ - Filename: c.Filename, - Segments: c.Segments, - AesKey: c.AesKey, - AesIV: c.AesIV, - Size: c.Size, - } - - isoFiles, err := iso.AnalyzeISOContent(ctx, src, poolManager, maxPrefetch, readTimeout, allowedExtensions) - if err != nil { - slog.WarnContext(ctx, "Failed to analyze ISO content, keeping ISO as-is", - "file", c.Filename, "error", err) - result = append(result, c) - continue - } - - if len(isoFiles) == 0 { - result = append(result, c) - continue - } - - // Sort ISO files by size descending so the largest (main feature) gets index 1. - sort.Slice(isoFiles, func(i, j int) bool { - return isoFiles[i].Size > isoFiles[j].Size - }) - - // Keep only the largest file (index 0 after sort); discard smaller streams. - f := isoFiles[0] - nc := Content{ - InternalPath: f.InternalPath, - Filename: f.Filename, - Size: f.Size, - PackedSize: f.Size, // raw ISO data — packed == unpacked - NzbdavID: c.NzbdavID, - ISOExpansionIndex: 1, - } - if f.NestedSource != nil { - nc.NestedSources = []NestedSource{{ - Segments: f.NestedSource.Segments, - AesKey: f.NestedSource.AesKey, - AesIV: f.NestedSource.AesIV, - InnerOffset: f.NestedSource.InnerOffset, - InnerLength: f.NestedSource.InnerLength, - InnerVolumeSize: f.NestedSource.InnerVolumeSize, - }} - } else { - nc.Segments = f.Segments - } - result = append(result, nc) - } - return result, nil -} - // normalizeArchiveReleaseFilename aligns the filename to the NZB basename while keeping the original extension. func normalizeArchiveReleaseFilename(nzbFilename, originalFilename string) string { releaseName := nzbtrim.TrimNzbExtension(nzbFilename) diff --git a/internal/importer/archive/sevenzip/processor.go b/internal/importer/archive/sevenzip/processor.go index 6284358d2..349979fe5 100644 --- a/internal/importer/archive/sevenzip/processor.go +++ b/internal/importer/archive/sevenzip/processor.go @@ -58,46 +58,16 @@ var ( rarNumericPattern = rar.NumericPattern ) -// CreateFileMetadataFromSevenZipContent creates FileMetadata from SevenZipContent for the metadata system +// CreateFileMetadataFromSevenZipContent creates FileMetadata from SevenZipContent for the metadata system. +// Delegates to archive.NewFileMetadataFromContent so the mapping stays shared with +// non-7z callers (e.g. ISO expansion). func (sz *sevenZipProcessor) CreateFileMetadataFromSevenZipContent( content Content, sourceNzbPath string, releaseDate int64, nzbdavId string, ) *metapb.FileMetadata { - now := time.Now().Unix() - - meta := &metapb.FileMetadata{ - FileSize: content.Size, - SourceNzbPath: sourceNzbPath, - Status: metapb.FileStatus_FILE_STATUS_HEALTHY, - CreatedAt: now, - ModifiedAt: now, - SegmentData: content.Segments, - ReleaseDate: releaseDate, - NzbdavId: nzbdavId, - } - - // Set AES encryption if keys are present - if len(content.AesKey) > 0 { - meta.Encryption = metapb.Encryption_AES - meta.AesKey = content.AesKey - meta.AesIv = content.AesIV - } - - // Populate nested sources for encrypted nested RAR files - for _, ns := range content.NestedSources { - meta.NestedSources = append(meta.NestedSources, &metapb.NestedSegmentSource{ - Segments: ns.Segments, - AesKey: ns.AesKey, - AesIv: ns.AesIV, - InnerOffset: ns.InnerOffset, - InnerLength: ns.InnerLength, - InnerVolumeSize: ns.InnerVolumeSize, - }) - } - - return meta + return archive.NewFileMetadataFromContent(content, sourceNzbPath, releaseDate, nzbdavId) } // deriveAESKey derives the AES encryption key from a password using the 7-zip algorithm diff --git a/internal/importer/iso_expand.go b/internal/importer/iso_expand.go new file mode 100644 index 000000000..cb9e920e8 --- /dev/null +++ b/internal/importer/iso_expand.go @@ -0,0 +1,124 @@ +package importer + +import ( + "context" + "fmt" + "log/slog" + "path" + "path/filepath" + "strings" + + "github.com/javi11/altmount/internal/importer/archive" + "github.com/javi11/altmount/internal/importer/parser" + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// parsedFileToISOContent adapts a parser.ParsedFile (a bare .iso entry +// in an NZB) to archive.Content so archive.ExpandISOContents can analyse +// it. Mirrors the field mapping rar/processor.go applies to RAR-wrapped +// ISOs, minus RAR-specific InternalPath/PackedSize bookkeeping (bare ISO +// is not packed, so PackedSize == Size). +func parsedFileToISOContent(pf parser.ParsedFile) archive.Content { + return archive.Content{ + Filename: pf.Filename, + Size: pf.Size, + PackedSize: pf.Size, // bare ISO is not packed + NzbdavID: pf.NzbdavID, + Segments: pf.Segments, + AesKey: pf.AesKey, + AesIV: pf.AesIv, // parser uses AesIv (lowercase v); archive.Content uses AesIV + } +} + +// partitionISOFiles splits a regularFiles slice into the .iso entries +// (case-insensitive) and everything else, preserving original order in +// both outputs. +func partitionISOFiles(files []parser.ParsedFile) (isos, rest []parser.ParsedFile) { + for _, f := range files { + if strings.EqualFold(filepath.Ext(f.Filename), ".iso") { + isos = append(isos, f) + } else { + rest = append(rest, f) + } + } + return isos, rest +} + +// expandBareISODeps lets the orchestrator be tested without an NNTP pool +// or a real metadata service. Production wiring constructs these from +// the Processor's existing collaborators. +type expandBareISODeps struct { + expand func(ctx context.Context, enabled bool, contents []archive.Content) ([]archive.Content, error) + writeMetadata func(virtualPath string, meta *metapb.FileMetadata) error + // enabled is the resolved value of Import.ExpandBlurayIso. Pulled + // out of deps so tests can flip it without touching config. + enabled bool +} + +// expandBareISOFiles peels .iso entries out of regularFiles, runs the +// existing archive.ExpandISOContents over them (which handles single-disc +// playlist resolution AND multi-disc DISC_N grouping), writes each +// expanded Content as a FileMetadata under virtualDir, and returns the +// list of written virtual paths plus the remaining (non-ISO + unchanged) +// files for normal dispatch. +// +// When no .iso files are present, returns (nil, regularFiles, nil). +// When deps.enabled is false, archive.ExpandISOContents returns the +// inputs unchanged; in that case we push the ISOs back into `remaining` +// so processSingleFile/processMultiFile handle them as raw .iso bytes. +// +// Pairing-by-position note: archive.ExpandISOContents appends exactly one +// Content per input ISO when no multi-disc merging happens, so the i-th +// expanded output corresponds to isos[i]. When multi-disc merging DOES +// happen (group of N discs collapses into 1 Content), every entry in the +// returned slice has NestedSources populated — the per-index fallback +// branch (which references isos[i]) is therefore never taken in that case. +func expandBareISOFiles( + ctx context.Context, + deps expandBareISODeps, + regularFiles []parser.ParsedFile, + virtualDir string, + releaseName string, + sourceNzbPath string, + releaseDate int64, +) (written []string, remaining []parser.ParsedFile, err error) { + isos, rest := partitionISOFiles(regularFiles) + if len(isos) == 0 { + return nil, regularFiles, nil + } + + in := make([]archive.Content, 0, len(isos)) + for _, pf := range isos { + in = append(in, parsedFileToISOContent(pf)) + } + + expanded, err := deps.expand(ctx, deps.enabled, in) + if err != nil { + return nil, nil, fmt.Errorf("expand bare ISOs: %w", err) + } + + for i, c := range expanded { + if c.ISOExpansionIndex == 0 && len(c.NestedSources) == 0 { + // Untransformed — fall back to standard processing. + // len(expanded) <= len(isos) is guaranteed by archive.ExpandISOContents: + // it appends one Content per input ISO on passthrough and ≤ one per + // group on success. Index isos[i] is therefore safe here. + remaining = append(remaining, isos[i]) + continue + } + meta := archive.NewFileMetadataFromContent(c, sourceNzbPath, releaseDate, c.NzbdavID) + virtualPath := path.Join(virtualDir, c.Filename) + if err := deps.writeMetadata(virtualPath, meta); err != nil { + return written, nil, fmt.Errorf("write metadata %q: %w", virtualPath, err) + } + written = append(written, virtualPath) + slog.InfoContext(ctx, "Expanded bare ISO into virtual file", + "release", releaseName, + "path", virtualPath, + "size", c.Size, + "nested_sources", len(c.NestedSources), + ) + } + remaining = append(remaining, rest...) + return written, remaining, nil +} diff --git a/internal/importer/iso_expand_test.go b/internal/importer/iso_expand_test.go new file mode 100644 index 000000000..461cf5730 --- /dev/null +++ b/internal/importer/iso_expand_test.go @@ -0,0 +1,210 @@ +package importer + +import ( + "context" + "testing" + + "github.com/javi11/altmount/internal/importer/archive" + "github.com/javi11/altmount/internal/importer/parser" + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +func TestParsedFileToISOContent_MapsAllFields(t *testing.T) { + pf := parser.ParsedFile{ + Filename: "Movie_DISC_1.iso", + Size: 42_949_672_960, // 40 GiB + NzbdavID: "abc-123", + Segments: []*metapb.SegmentData{ + {Id: "msg1@", StartOffset: 0, EndOffset: 750_000, SegmentSize: 750_000}, + }, + AesKey: []byte{0xAA, 0xBB}, + AesIv: []byte{0xCC, 0xDD}, + } + + got := parsedFileToISOContent(pf) + + if got.Filename != "Movie_DISC_1.iso" { + t.Errorf("Filename = %q, want Movie_DISC_1.iso", got.Filename) + } + if got.Size != 42_949_672_960 { + t.Errorf("Size = %d, want 42949672960", got.Size) + } + if got.PackedSize != 42_949_672_960 { + t.Errorf("PackedSize = %d, want 42949672960 (bare ISO is unpacked)", got.PackedSize) + } + if got.NzbdavID != "abc-123" { + t.Errorf("NzbdavID = %q, want abc-123", got.NzbdavID) + } + if len(got.Segments) != 1 || got.Segments[0].Id != "msg1@" { + t.Errorf("Segments not preserved: %#v", got.Segments) + } + if string(got.AesKey) != "\xAA\xBB" || string(got.AesIV) != "\xCC\xDD" { + t.Errorf("AES key/IV not preserved") + } +} + +func TestPartitionISOFiles_SeparatesISOFromRest(t *testing.T) { + files := []parser.ParsedFile{ + {Filename: "Movie_DISC_1.iso"}, + {Filename: "readme.txt"}, + {Filename: "Movie_DISC_2.ISO"}, + {Filename: "extras.mkv"}, + } + + isos, rest := partitionISOFiles(files) + + if len(isos) != 2 { + t.Fatalf("isos = %d, want 2", len(isos)) + } + if isos[0].Filename != "Movie_DISC_1.iso" || isos[1].Filename != "Movie_DISC_2.ISO" { + t.Errorf("isos = %+v", isos) + } + if len(rest) != 2 || rest[0].Filename != "readme.txt" || rest[1].Filename != "extras.mkv" { + t.Errorf("rest = %+v", rest) + } +} + +func TestExpandBareISOFiles_NoISOs_ReturnsInputUntouched(t *testing.T) { + files := []parser.ParsedFile{{Filename: "a.mkv"}, {Filename: "b.mp4"}} + written, rest, err := expandBareISOFiles(context.Background(), expandBareISODeps{ + expand: func(ctx context.Context, _ bool, _ []archive.Content) ([]archive.Content, error) { + t.Fatal("expand should not be called when no .iso present") + return nil, nil + }, + }, files, "vdir", "movie", "", 0) + if err != nil { + t.Fatalf("err = %v", err) + } + if len(written) != 0 { + t.Errorf("written = %v, want []", written) + } + if len(rest) != 2 { + t.Errorf("rest = %d, want 2", len(rest)) + } +} + +func TestExpandBareISOFiles_OneISO_BluRayPath_WritesMergedMetadata(t *testing.T) { + files := []parser.ParsedFile{ + {Filename: "movie.iso", Size: 25_000_000_000}, + {Filename: "readme.txt"}, + } + expandCalled := false + deps := expandBareISODeps{ + expand: func(ctx context.Context, enabled bool, in []archive.Content) ([]archive.Content, error) { + expandCalled = true + if !enabled { + t.Error("expand called with enabled=false") + } + if len(in) != 1 || in[0].Filename != "movie.iso" { + t.Errorf("unexpected expand input: %+v", in) + } + return []archive.Content{{ + Filename: "MOVIE.m2ts", + Size: 20_000_000_000, + NestedSources: []archive.NestedSource{ + {InnerOffset: 0, InnerLength: 10_000_000_000}, + {InnerOffset: 0, InnerLength: 10_000_000_000}, + }, + }}, nil + }, + writeMetadata: func(virtualPath string, _ *metapb.FileMetadata) error { + if virtualPath != "vdir/MOVIE.m2ts" { + t.Errorf("virtualPath = %q, want vdir/MOVIE.m2ts", virtualPath) + } + return nil + }, + enabled: true, + } + + written, rest, err := expandBareISOFiles(context.Background(), deps, files, "vdir", "movie", "", 0) + if err != nil { + t.Fatalf("err = %v", err) + } + if !expandCalled { + t.Error("expand was never called") + } + if len(written) != 1 || written[0] != "vdir/MOVIE.m2ts" { + t.Errorf("written = %v", written) + } + if len(rest) != 1 || rest[0].Filename != "readme.txt" { + t.Errorf("rest = %v", rest) + } +} + +func TestExpandBareISOFiles_Disabled_StillPeelsButFallsBack(t *testing.T) { + files := []parser.ParsedFile{{Filename: "movie.iso", Size: 1000}} + deps := expandBareISODeps{ + enabled: false, + expand: func(ctx context.Context, enabled bool, in []archive.Content) ([]archive.Content, error) { + if enabled { + t.Error("expand was called with enabled=true; want enabled=false") + } + // archive.ExpandISOContents with expand=false returns input unchanged. + return in, nil + }, + writeMetadata: func(string, *metapb.FileMetadata) error { + t.Fatal("writeMetadata should not be called when bare ISO is unchanged") + return nil + }, + } + written, rest, err := expandBareISOFiles(context.Background(), deps, files, "vdir", "movie", "", 0) + if err != nil { + t.Fatalf("err = %v", err) + } + if len(written) != 0 { + t.Errorf("written = %v, want [] (no metadata should be written when expansion is gated off)", written) + } + if len(rest) != 1 || rest[0].Filename != "movie.iso" { + t.Errorf("rest = %+v, want the original .iso pushed back for normal dispatch", rest) + } +} + +// TestExpandBareISOFiles_PropagatesSourceNzbPathAndReleaseDate asserts the +// orchestrator threads sourceNzbPath and releaseDate through to the +// FileMetadata produced via archive.NewFileMetadataFromContent. Without +// this, downstream consumers (history, repair, etc.) lose the link back +// to the originating NZB post. +func TestExpandBareISOFiles_PropagatesSourceNzbPathAndReleaseDate(t *testing.T) { + files := []parser.ParsedFile{{Filename: "movie.iso", Size: 1000}} + + const wantSourceNzbPath = "/incoming/Movie.1080p.BluRay.nzb" + const wantReleaseDate int64 = 1_234_567_890 + + var capturedMeta *metapb.FileMetadata + deps := expandBareISODeps{ + enabled: true, + expand: func(ctx context.Context, _ bool, _ []archive.Content) ([]archive.Content, error) { + return []archive.Content{{ + Filename: "MOVIE.m2ts", + Size: 900, + NestedSources: []archive.NestedSource{ + {InnerOffset: 0, InnerLength: 900}, + }, + }}, nil + }, + writeMetadata: func(_ string, meta *metapb.FileMetadata) error { + capturedMeta = meta + return nil + }, + } + + written, _, err := expandBareISOFiles( + context.Background(), deps, files, "vdir", "movie", + wantSourceNzbPath, wantReleaseDate, + ) + if err != nil { + t.Fatalf("err = %v", err) + } + if len(written) != 1 { + t.Fatalf("written = %v, want 1 entry", written) + } + if capturedMeta == nil { + t.Fatal("writeMetadata was never invoked") + } + if capturedMeta.SourceNzbPath != wantSourceNzbPath { + t.Errorf("SourceNzbPath = %q, want %q", capturedMeta.SourceNzbPath, wantSourceNzbPath) + } + if capturedMeta.ReleaseDate != wantReleaseDate { + t.Errorf("ReleaseDate = %d, want %d", capturedMeta.ReleaseDate, wantReleaseDate) + } +} diff --git a/internal/importer/processor.go b/internal/importer/processor.go index a2e47742e..63945d08d 100644 --- a/internal/importer/processor.go +++ b/internal/importer/processor.go @@ -14,6 +14,7 @@ import ( "github.com/javi11/altmount/internal/config" "github.com/javi11/altmount/internal/database" + "github.com/javi11/altmount/internal/importer/archive" "github.com/javi11/altmount/internal/importer/archive/rar" "github.com/javi11/altmount/internal/importer/archive/sevenzip" "github.com/javi11/altmount/internal/importer/filesystem" @@ -22,6 +23,7 @@ import ( "github.com/javi11/altmount/internal/importer/singlefile" "github.com/javi11/altmount/internal/importer/utils/nzbtrim" "github.com/javi11/altmount/internal/metadata" + metapb "github.com/javi11/altmount/internal/metadata/proto" "github.com/javi11/altmount/internal/nzbfile" "github.com/javi11/altmount/internal/pool" "github.com/javi11/altmount/internal/progress" @@ -263,30 +265,98 @@ func (proc *Processor) ProcessNzbFile(ctx context.Context, filePath, relativePat // Step 5: Process based on file type var result string var writtenPaths []string + + // Bare-ISO Blu-ray expansion. ISOs posted directly to Usenet (without + // RAR/7z wrapping) are classified as NzbTypeSingleFile/NzbTypeMultiFile + // by the parser and would otherwise bypass archive.ExpandISOContents. + // Peel them out here, run the same expansion the RAR/7z aggregators run, + // persist each expanded virtual file, and feed the remainder back into + // normal dispatch. STRM imports skip this path: they have no NNTP + // segments and the pool guard above explicitly excludes them. + if parsed.Type != parser.NzbTypeStrm { + importCfg := cfg.Import + expandEnabled := true + if importCfg.ExpandBlurayIso != nil { + expandEnabled = *importCfg.ExpandBlurayIso + } + isoMaxPrefetch := importCfg.MaxDownloadPrefetch + isoReadTimeout := time.Duration(importCfg.ReadTimeoutSeconds) * time.Second + if isoReadTimeout == 0 { + isoReadTimeout = 5 * time.Minute + } + + var isoReleaseDate int64 + if len(regularFiles) > 0 { + isoReleaseDate = regularFiles[0].ReleaseDate.Unix() + } + + // Progress tracker for the bare-ISO analysis phase. It fills the band + // between "Identifying files" (10%) and "Validating segments" (30%), + // which would otherwise sit frozen while the ISO filesystem walk and + // Blu-ray playlist resolution run over NNTP. Gated on subscribers to + // avoid overhead when nobody is watching (mirrors the RAR/7z path). + var isoTracker *progress.Tracker + if proc.broadcaster != nil && proc.broadcaster.HasSubscribers() { + isoTracker = proc.broadcaster.CreateTracker(queueID, 10, 30).WithStage("Analyzing ISO") + } + + isoWritten, expandedRegularFiles, isoErr := expandBareISOFiles(ctx, expandBareISODeps{ + enabled: expandEnabled, + expand: func(ctx context.Context, enabled bool, contents []archive.Content) ([]archive.Content, error) { + return archive.ExpandISOContents(ctx, enabled, contents, + proc.poolManager, isoMaxPrefetch, isoReadTimeout, cfg.GetIsoAnalyzeTimeout(), allowedExtensions, isoTracker) + }, + writeMetadata: func(virtualPath string, meta *metapb.FileMetadata) error { + return proc.metadataService.WriteFileMetadata(virtualPath, meta) + }, + }, regularFiles, virtualDir, proc.getCleanNzbName(parsed.Path, queueID), parsed.Path, isoReleaseDate) + if isoErr != nil { + return "", writtenPaths, NewNonRetryableError("bare-ISO expansion failed", isoErr) + } + writtenPaths = append(writtenPaths, isoWritten...) + regularFiles = expandedRegularFiles + + // If bare-ISO expansion consumed every regular file and there are no + // archive files, dispatch has nothing left to do. Return the first + // expanded virtual path so callers get a meaningful result; the + // "no files" error path lives in processSingleFile and would otherwise + // trigger spuriously. + if len(regularFiles) == 0 && len(archiveFiles) == 0 && len(isoWritten) > 0 { + proc.updateProgress(queueID, 100) + return isoWritten[0], writtenPaths, nil + } + } + + // dispatchPaths holds whatever the per-type handlers wrote so we can + // merge it with any ISO-derived paths accumulated above. Handlers + // already return their full set of written paths (including "DIR:" + // prefixed cleanup markers) so we just concatenate. + var dispatchPaths []string switch parsed.Type { case parser.NzbTypeSingleFile: proc.updateProgressWithStage(queueID, 30, "Validating segments") - result, writtenPaths, err = proc.processSingleFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) + result, dispatchPaths, err = proc.processSingleFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) case parser.NzbTypeMultiFile: proc.updateProgressWithStage(queueID, 30, "Validating segments") - result, writtenPaths, err = proc.processMultiFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) + result, dispatchPaths, err = proc.processMultiFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) case parser.NzbTypeRarArchive: proc.updateProgressWithStage(queueID, 15, "Analyzing archive") - result, writtenPaths, err = proc.processRarArchive(ctx, virtualDir, regularFiles, archiveFiles, parsed, queueID, maxConnections, allowedExtensions, proc.validationTimeout, parsed.ExtractedFiles, category, metadata, downloadID) + result, dispatchPaths, err = proc.processRarArchive(ctx, virtualDir, regularFiles, archiveFiles, parsed, queueID, maxConnections, allowedExtensions, proc.validationTimeout, parsed.ExtractedFiles, category, metadata, downloadID) case parser.NzbType7zArchive: proc.updateProgressWithStage(queueID, 15, "Analyzing archive") - result, writtenPaths, err = proc.processSevenZipArchive(ctx, virtualDir, regularFiles, archiveFiles, parsed, queueID, maxConnections, allowedExtensions, proc.validationTimeout, parsed.ExtractedFiles, category, metadata, downloadID) + result, dispatchPaths, err = proc.processSevenZipArchive(ctx, virtualDir, regularFiles, archiveFiles, parsed, queueID, maxConnections, allowedExtensions, proc.validationTimeout, parsed.ExtractedFiles, category, metadata, downloadID) case parser.NzbTypeStrm: proc.updateProgressWithStage(queueID, 30, "Validating segments") - result, writtenPaths, err = proc.processSingleFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) + result, dispatchPaths, err = proc.processSingleFile(ctx, virtualDir, regularFiles, par2Files, parsed.Path, queueID, maxConnections, allowedExtensions, proc.validationTimeout, category, metadata, downloadID) default: - return "", nil, NewNonRetryableError(fmt.Sprintf("unknown file type: %s", parsed.Type), nil) + return "", writtenPaths, NewNonRetryableError(fmt.Sprintf("unknown file type: %s", parsed.Type), nil) } + writtenPaths = append(writtenPaths, dispatchPaths...) // Update progress: complete if err == nil { @@ -639,6 +709,7 @@ func (proc *Processor) processRarArchive( ExtractedFiles: extractedFiles, MaxPrefetch: maxPrefetch, ReadTimeout: readTimeout, + IsoAnalyzeTimeout: proc.configGetter().GetIsoAnalyzeTimeout(), ExpandBlurayIso: expandBlurayIso, FilterSamples: filterSampleFiles, RenameToNzbName: renameToNzbName, @@ -777,6 +848,7 @@ func (proc *Processor) processSevenZipArchive( ExtractedFiles: extractedFiles, MaxPrefetch: maxPrefetch, ReadTimeout: readTimeout, + IsoAnalyzeTimeout: proc.configGetter().GetIsoAnalyzeTimeout(), ExpandBlurayIso: expandBlurayIso, FilterSamples: filterSampleFiles, RenameToNzbName: renameToNzbName, diff --git a/internal/metadata/expand.go b/internal/metadata/expand.go new file mode 100644 index 000000000..f6b803775 --- /dev/null +++ b/internal/metadata/expand.go @@ -0,0 +1,42 @@ +package metadata + +import ( + "fmt" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// ExpandSharedOuterSources resolves NestedSegmentSource.SharedOuterSourceIndex +// references in-place. Sources with a non-zero index inherit Segments, AesKey, +// AesIv, and (if unset) InnerVolumeSize from +// meta.SharedOuterSources[index-1]. Slice headers share their underlying +// array — RAM cost is unchanged from the legacy layout. Safe to call on any +// FileMetadata; a no-op when SharedOuterSources is empty. +// +// The dedupe written by archive.NewFileMetadataFromContent is the +// write-side counterpart of this expansion. +func ExpandSharedOuterSources(meta *metapb.FileMetadata) error { + if len(meta.SharedOuterSources) == 0 { + return nil + } + for _, ns := range meta.NestedSources { + if ns.SharedOuterSourceIndex == 0 { + continue + } + idx := int(ns.SharedOuterSourceIndex) - 1 + if idx < 0 || idx >= len(meta.SharedOuterSources) { + return fmt.Errorf( + "metadata: nested source references shared_outer_source_index %d but only %d shared outer source(s) are defined", + ns.SharedOuterSourceIndex, len(meta.SharedOuterSources), + ) + } + shared := meta.SharedOuterSources[idx] + ns.Segments = shared.Segments + ns.AesKey = shared.AesKey + ns.AesIv = shared.AesIv + if ns.InnerVolumeSize == 0 { + ns.InnerVolumeSize = shared.InnerVolumeSize + } + } + return nil +} diff --git a/internal/metadata/proto/metadata.pb.go b/internal/metadata/proto/metadata.pb.go index 05a58b7e6..b1af3db9b 100644 --- a/internal/metadata/proto/metadata.pb.go +++ b/internal/metadata/proto/metadata.pb.go @@ -263,8 +263,15 @@ type NestedSegmentSource struct { InnerOffset int64 `protobuf:"varint,4,opt,name=inner_offset,json=innerOffset,proto3" json:"inner_offset,omitempty"` // Offset within decrypted inner volume where file data starts InnerLength int64 `protobuf:"varint,5,opt,name=inner_length,json=innerLength,proto3" json:"inner_length,omitempty"` // Bytes of target file in this source InnerVolumeSize int64 `protobuf:"varint,6,opt,name=inner_volume_size,json=innerVolumeSize,proto3" json:"inner_volume_size,omitempty"` // Total decrypted size of inner volume (for AES cipher) - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // When > 0 the `segments`, `aes_key`, `aes_iv`, `inner_volume_size` + // fields above are intentionally left empty on disk and inherit from + // FileMetadata.shared_outer_sources[shared_outer_source_index - 1]. + // Only `inner_offset` and `inner_length` are stored per-extent. 0 + // (proto default) means "no sharing" — identical to the legacy on-disk + // layout, so old .meta files keep working without migration. + SharedOuterSourceIndex int32 `protobuf:"varint,7,opt,name=shared_outer_source_index,json=sharedOuterSourceIndex,proto3" json:"shared_outer_source_index,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *NestedSegmentSource) Reset() { @@ -339,6 +346,70 @@ func (x *NestedSegmentSource) GetInnerVolumeSize() int64 { return 0 } +func (x *NestedSegmentSource) GetSharedOuterSourceIndex() int32 { + if x != nil { + return x.SharedOuterSourceIndex + } + return 0 +} + +// ClipBoundary is one clip in a byte-concatenated multi-clip BD main feature. +// byte_len is the clip's size in the virtual file (a whole number of 192-byte +// BDAV source packets). delta_90k is the signed 90 kHz offset added to PTS/DTS +// (and delta_90k to the 90 kHz-equivalent of PCR base) for packets inside this +// clip's byte range, lifting the clip onto the unified continuous timeline. +type ClipBoundary struct { + state protoimpl.MessageState `protogen:"open.v1"` + ByteLen int64 `protobuf:"varint,1,opt,name=byte_len,json=byteLen,proto3" json:"byte_len,omitempty"` + Delta_90K int64 `protobuf:"varint,2,opt,name=delta_90k,json=delta90k,proto3" json:"delta_90k,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ClipBoundary) Reset() { + *x = ClipBoundary{} + mi := &file_internal_metadata_proto_metadata_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ClipBoundary) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ClipBoundary) ProtoMessage() {} + +func (x *ClipBoundary) ProtoReflect() protoreflect.Message { + mi := &file_internal_metadata_proto_metadata_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ClipBoundary.ProtoReflect.Descriptor instead. +func (*ClipBoundary) Descriptor() ([]byte, []int) { + return file_internal_metadata_proto_metadata_proto_rawDescGZIP(), []int{3} +} + +func (x *ClipBoundary) GetByteLen() int64 { + if x != nil { + return x.ByteLen + } + return 0 +} + +func (x *ClipBoundary) GetDelta_90K() int64 { + if x != nil { + return x.Delta_90K + } + return 0 +} + // FileMetadata represents a single virtual file in the filesystem // The filename comes from the actual metadata filename on disk type FileMetadata struct { @@ -358,13 +429,30 @@ type FileMetadata struct { Par2Files []*Par2FileReference `protobuf:"bytes,13,rep,name=par2_files,json=par2Files,proto3" json:"par2_files,omitempty"` // Associated PAR2 repair files NzbdavId string `protobuf:"bytes,14,opt,name=nzbdav_id,json=nzbdavId,proto3" json:"nzbdav_id,omitempty"` // ID to maintain compatibility with nzbdav NestedSources []*NestedSegmentSource `protobuf:"bytes,15,rep,name=nested_sources,json=nestedSources,proto3" json:"nested_sources,omitempty"` // Nested RAR sources (when file is inside inner RAR within outer RAR) - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // Per-clip timeline table for Blu-ray main-feature virtual files that + // byte-concatenate multiple M2TS clips (each with its own independent + // PTS/DTS/PCR base). At read time a TS-aware filter adds each clip's + // delta_90k to the timestamps inside that clip's byte range, producing a + // single continuous timeline so ffprobe/players report the correct + // duration and seek accurately. Empty for every other file type, which + // disables the filter entirely (zero overhead, total safety). + ClipBoundaries []*ClipBoundary `protobuf:"bytes,17,rep,name=clip_boundaries,json=clipBoundaries,proto3" json:"clip_boundaries,omitempty"` + // Outer sources shared by groups of NestedSegmentSource entries. + // Used for multi-extent encrypted volumes — e.g. a Blu-ray main feature + // with hundreds of extents that all read from the same encrypted RAR. + // Each entry holds the full Segments + AesKey + AesIv + InnerVolumeSize + // once; the corresponding NestedSegmentSource entries reference it by + // 1-based index via shared_outer_source_index, storing only + // inner_offset + inner_length per-extent. Cuts the on-disk .meta size + // from O(extents * segments) to O(extents + segments) for these files. + SharedOuterSources []*NestedSegmentSource `protobuf:"bytes,16,rep,name=shared_outer_sources,json=sharedOuterSources,proto3" json:"shared_outer_sources,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *FileMetadata) Reset() { *x = FileMetadata{} - mi := &file_internal_metadata_proto_metadata_proto_msgTypes[3] + mi := &file_internal_metadata_proto_metadata_proto_msgTypes[4] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -376,7 +464,7 @@ func (x *FileMetadata) String() string { func (*FileMetadata) ProtoMessage() {} func (x *FileMetadata) ProtoReflect() protoreflect.Message { - mi := &file_internal_metadata_proto_metadata_proto_msgTypes[3] + mi := &file_internal_metadata_proto_metadata_proto_msgTypes[4] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -389,7 +477,7 @@ func (x *FileMetadata) ProtoReflect() protoreflect.Message { // Deprecated: Use FileMetadata.ProtoReflect.Descriptor instead. func (*FileMetadata) Descriptor() ([]byte, []int) { - return file_internal_metadata_proto_metadata_proto_rawDescGZIP(), []int{3} + return file_internal_metadata_proto_metadata_proto_rawDescGZIP(), []int{4} } func (x *FileMetadata) GetFileSize() int64 { @@ -497,6 +585,20 @@ func (x *FileMetadata) GetNestedSources() []*NestedSegmentSource { return nil } +func (x *FileMetadata) GetClipBoundaries() []*ClipBoundary { + if x != nil { + return x.ClipBoundaries + } + return nil +} + +func (x *FileMetadata) GetSharedOuterSources() []*NestedSegmentSource { + if x != nil { + return x.SharedOuterSources + } + return nil +} + var File_internal_metadata_proto_metadata_proto protoreflect.FileDescriptor const file_internal_metadata_proto_metadata_proto_rawDesc = "" + @@ -511,14 +613,18 @@ const file_internal_metadata_proto_metadata_proto_rawDesc = "" + "\x11Par2FileReference\x12\x1a\n" + "\bfilename\x18\x01 \x01(\tR\bfilename\x12\x1b\n" + "\tfile_size\x18\x02 \x01(\x03R\bfileSize\x128\n" + - "\fsegment_data\x18\x03 \x03(\v2\x15.metadata.SegmentDataR\vsegmentData\"\xea\x01\n" + + "\fsegment_data\x18\x03 \x03(\v2\x15.metadata.SegmentDataR\vsegmentData\"\xa5\x02\n" + "\x13NestedSegmentSource\x121\n" + "\bsegments\x18\x01 \x03(\v2\x15.metadata.SegmentDataR\bsegments\x12\x17\n" + "\aaes_key\x18\x02 \x01(\fR\x06aesKey\x12\x15\n" + "\x06aes_iv\x18\x03 \x01(\fR\x05aesIv\x12!\n" + "\finner_offset\x18\x04 \x01(\x03R\vinnerOffset\x12!\n" + "\finner_length\x18\x05 \x01(\x03R\vinnerLength\x12*\n" + - "\x11inner_volume_size\x18\x06 \x01(\x03R\x0finnerVolumeSize\"\xd3\x04\n" + + "\x11inner_volume_size\x18\x06 \x01(\x03R\x0finnerVolumeSize\x129\n" + + "\x19shared_outer_source_index\x18\a \x01(\x05R\x16sharedOuterSourceIndex\"F\n" + + "\fClipBoundary\x12\x19\n" + + "\bbyte_len\x18\x01 \x01(\x03R\abyteLen\x12\x1b\n" + + "\tdelta_90k\x18\x02 \x01(\x03R\bdelta90k\"\xe5\x05\n" + "\fFileMetadata\x12\x1b\n" + "\tfile_size\x18\x01 \x01(\x03R\bfileSize\x12&\n" + "\x0fsource_nzb_path\x18\x02 \x01(\tR\rsourceNzbPath\x12,\n" + @@ -540,7 +646,9 @@ const file_internal_metadata_proto_metadata_proto_rawDesc = "" + "\n" + "par2_files\x18\r \x03(\v2\x1b.metadata.Par2FileReferenceR\tpar2Files\x12\x1b\n" + "\tnzbdav_id\x18\x0e \x01(\tR\bnzbdavId\x12D\n" + - "\x0enested_sources\x18\x0f \x03(\v2\x1d.metadata.NestedSegmentSourceR\rnestedSources*8\n" + + "\x0enested_sources\x18\x0f \x03(\v2\x1d.metadata.NestedSegmentSourceR\rnestedSources\x12?\n" + + "\x0fclip_boundaries\x18\x11 \x03(\v2\x16.metadata.ClipBoundaryR\x0eclipBoundaries\x12O\n" + + "\x14shared_outer_sources\x18\x10 \x03(\v2\x1d.metadata.NestedSegmentSourceR\x12sharedOuterSources*8\n" + "\n" + "Encryption\x12\b\n" + "\x04NONE\x10\x00\x12\n" + @@ -567,14 +675,15 @@ func file_internal_metadata_proto_metadata_proto_rawDescGZIP() []byte { } var file_internal_metadata_proto_metadata_proto_enumTypes = make([]protoimpl.EnumInfo, 2) -var file_internal_metadata_proto_metadata_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_internal_metadata_proto_metadata_proto_msgTypes = make([]protoimpl.MessageInfo, 5) var file_internal_metadata_proto_metadata_proto_goTypes = []any{ (Encryption)(0), // 0: metadata.Encryption (FileStatus)(0), // 1: metadata.FileStatus (*SegmentData)(nil), // 2: metadata.SegmentData (*Par2FileReference)(nil), // 3: metadata.Par2FileReference (*NestedSegmentSource)(nil), // 4: metadata.NestedSegmentSource - (*FileMetadata)(nil), // 5: metadata.FileMetadata + (*ClipBoundary)(nil), // 5: metadata.ClipBoundary + (*FileMetadata)(nil), // 6: metadata.FileMetadata } var file_internal_metadata_proto_metadata_proto_depIdxs = []int32{ 2, // 0: metadata.Par2FileReference.segment_data:type_name -> metadata.SegmentData @@ -584,11 +693,13 @@ var file_internal_metadata_proto_metadata_proto_depIdxs = []int32{ 2, // 4: metadata.FileMetadata.segment_data:type_name -> metadata.SegmentData 3, // 5: metadata.FileMetadata.par2_files:type_name -> metadata.Par2FileReference 4, // 6: metadata.FileMetadata.nested_sources:type_name -> metadata.NestedSegmentSource - 7, // [7:7] is the sub-list for method output_type - 7, // [7:7] is the sub-list for method input_type - 7, // [7:7] is the sub-list for extension type_name - 7, // [7:7] is the sub-list for extension extendee - 0, // [0:7] is the sub-list for field type_name + 5, // 7: metadata.FileMetadata.clip_boundaries:type_name -> metadata.ClipBoundary + 4, // 8: metadata.FileMetadata.shared_outer_sources:type_name -> metadata.NestedSegmentSource + 9, // [9:9] is the sub-list for method output_type + 9, // [9:9] is the sub-list for method input_type + 9, // [9:9] is the sub-list for extension type_name + 9, // [9:9] is the sub-list for extension extendee + 0, // [0:9] is the sub-list for field type_name } func init() { file_internal_metadata_proto_metadata_proto_init() } @@ -602,7 +713,7 @@ func file_internal_metadata_proto_metadata_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_internal_metadata_proto_metadata_proto_rawDesc), len(file_internal_metadata_proto_metadata_proto_rawDesc)), NumEnums: 2, - NumMessages: 4, + NumMessages: 5, NumExtensions: 0, NumServices: 0, }, diff --git a/internal/metadata/proto/metadata.proto b/internal/metadata/proto/metadata.proto index 94a03c161..30c3ca65d 100644 --- a/internal/metadata/proto/metadata.proto +++ b/internal/metadata/proto/metadata.proto @@ -42,6 +42,24 @@ message NestedSegmentSource { int64 inner_offset = 4; // Offset within decrypted inner volume where file data starts int64 inner_length = 5; // Bytes of target file in this source int64 inner_volume_size = 6; // Total decrypted size of inner volume (for AES cipher) + + // When > 0 the `segments`, `aes_key`, `aes_iv`, `inner_volume_size` + // fields above are intentionally left empty on disk and inherit from + // FileMetadata.shared_outer_sources[shared_outer_source_index - 1]. + // Only `inner_offset` and `inner_length` are stored per-extent. 0 + // (proto default) means "no sharing" — identical to the legacy on-disk + // layout, so old .meta files keep working without migration. + int32 shared_outer_source_index = 7; +} + +// ClipBoundary is one clip in a byte-concatenated multi-clip BD main feature. +// byte_len is the clip's size in the virtual file (a whole number of 192-byte +// BDAV source packets). delta_90k is the signed 90 kHz offset added to PTS/DTS +// (and delta_90k to the 90 kHz-equivalent of PCR base) for packets inside this +// clip's byte range, lifting the clip onto the unified continuous timeline. +message ClipBoundary { + int64 byte_len = 1; + int64 delta_90k = 2; } // FileMetadata represents a single virtual file in the filesystem @@ -62,5 +80,24 @@ message FileMetadata { repeated Par2FileReference par2_files = 13; // Associated PAR2 repair files string nzbdav_id = 14; // ID to maintain compatibility with nzbdav repeated NestedSegmentSource nested_sources = 15; // Nested RAR sources (when file is inside inner RAR within outer RAR) + + // Per-clip timeline table for Blu-ray main-feature virtual files that + // byte-concatenate multiple M2TS clips (each with its own independent + // PTS/DTS/PCR base). At read time a TS-aware filter adds each clip's + // delta_90k to the timestamps inside that clip's byte range, producing a + // single continuous timeline so ffprobe/players report the correct + // duration and seek accurately. Empty for every other file type, which + // disables the filter entirely (zero overhead, total safety). + repeated ClipBoundary clip_boundaries = 17; + + // Outer sources shared by groups of NestedSegmentSource entries. + // Used for multi-extent encrypted volumes — e.g. a Blu-ray main feature + // with hundreds of extents that all read from the same encrypted RAR. + // Each entry holds the full Segments + AesKey + AesIv + InnerVolumeSize + // once; the corresponding NestedSegmentSource entries reference it by + // 1-based index via shared_outer_source_index, storing only + // inner_offset + inner_length per-extent. Cuts the on-disk .meta size + // from O(extents * segments) to O(extents + segments) for these files. + repeated NestedSegmentSource shared_outer_sources = 16; } \ No newline at end of file diff --git a/internal/metadata/service.go b/internal/metadata/service.go index 3f099d0f8..b891ea108 100644 --- a/internal/metadata/service.go +++ b/internal/metadata/service.go @@ -14,6 +14,7 @@ import ( lru "github.com/hashicorp/golang-lru/v2" metapb "github.com/javi11/altmount/internal/metadata/proto" "github.com/javi11/altmount/internal/utils" + "google.golang.org/protobuf/encoding/protowire" "google.golang.org/protobuf/proto" ) @@ -151,6 +152,14 @@ func (ms *MetadataService) ReadFileMetadata(virtualPath string) (*metapb.FileMet return nil, fmt.Errorf("failed to unmarshal metadata: %w", err) } + // Resolve shared_outer_source_index references on nested sources. + // Files imported with the dedupe writer store outer segments once at + // the FileMetadata level; we re-populate per-source slice headers + // here so the rest of the read path is unaware of the difference. + if err := ExpandSharedOuterSources(metadata); err != nil { + return nil, fmt.Errorf("failed to expand shared outer sources: %w", err) + } + // Read ID from sidecar file (compatibility mode) idPath := metadataPath + ".id" if idData, err := os.ReadFile(idPath); err == nil { @@ -167,16 +176,129 @@ func (ms *MetadataService) ReadFileMetadata(virtualPath string) (*metapb.FileMet return metadata, nil } +// liteScanBytes is how much of a .meta file we read up front when serving a +// directory listing. The lite fields (file_size=1, status=3, modified_at=5) +// are all varints near the start of the proto; the only intervening field +// that can be large is source_nzb_path=2 (a string). 4 KiB is comfortable +// headroom — virtually every real-world .meta has all three within the first +// ~200 bytes. Avoids reading and unmarshalling the full proto (which can be +// MBs for files with many NestedSources or SegmentData entries — the exact +// pattern that caused a 7.94 GB allocation spike during FileBrowser +// recursive PROPFIND walks). +const liteScanBytes = 4096 + // ReadFileMetadataLite reads only the lightweight fields (size, modtime, status) -// needed for directory listings. It uses a separate cache so that Readdir does not -// pull full FileMetadata protos (with SegmentData, etc.) into the main cache. +// needed for directory listings. On cache miss it reads at most liteScanBytes +// from the .meta file and scans the proto wire format for the three lite +// fields, never instantiating the full FileMetadata proto or its +// NestedSources/SegmentData slices. Falls back to a full read in the rare +// case the partial buffer doesn't cover the lite fields. func (ms *MetadataService) ReadFileMetadataLite(virtualPath string) (*FileMetadataLite, error) { // Check lite cache first if cached, ok := ms.liteCache.Get(virtualPath); ok { return cached, nil } - // Cache miss — read from disk and deserialize + // Cache miss — read the head of the file and scan wire-format fields. + filename := filepath.Base(virtualPath) + metadataDir := filepath.Join(ms.rootPath, filepath.Dir(virtualPath)) + metadataPath := filepath.Join(metadataDir, filename+".meta") + + f, err := os.Open(metadataPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("failed to open metadata file: %w", err) + } + defer f.Close() + + buf := make([]byte, liteScanBytes) + n, err := io.ReadFull(f, buf) + if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF { + return nil, fmt.Errorf("failed to read metadata head: %w", err) + } + buf = buf[:n] + + lite, ok := parseLiteFields(buf) + if !ok { + // Lite fields not located within liteScanBytes (extreme/unusual + // source_nzb_path length, future schema reordering, etc). Fall back + // to the full read so the listing is correct even at the cost of + // transient allocation. + return ms.readFileMetadataLiteFull(virtualPath) + } + ms.liteCache.Add(virtualPath, lite) + return lite, nil +} + +// parseLiteFields walks proto wire format inside buf and extracts the lite +// fields without allocating a full FileMetadata struct. Returns (lite, true) +// once both file_size (field 1) and status (field 3) are seen — modified_at +// (field 5) is best-effort within the same buffer. Returns (nil, false) if +// the buffer is exhausted without the required fields, signalling the +// caller to fall back to a full read. +// +// Field numbers must match metadata.proto. Tested via TestReadFileMetadataLite_* +// in service_test.go. +func parseLiteFields(buf []byte) (*FileMetadataLite, bool) { + var lite FileMetadataLite + var sawFileSize, sawStatus bool + for len(buf) > 0 { + num, typ, tagLen := protowire.ConsumeTag(buf) + if tagLen < 0 { + return nil, false + } + buf = buf[tagLen:] + switch num { + case 1: // file_size int64 (varint) + v, l := protowire.ConsumeVarint(buf) + if l < 0 { + return nil, false + } + lite.FileSize = int64(v) + sawFileSize = true + buf = buf[l:] + case 3: // status FileStatus (varint enum) + v, l := protowire.ConsumeVarint(buf) + if l < 0 { + return nil, false + } + lite.Status = metapb.FileStatus(v) + sawStatus = true + buf = buf[l:] + case 5: // modified_at int64 (varint) + v, l := protowire.ConsumeVarint(buf) + if l < 0 { + return nil, false + } + lite.ModifiedAt = int64(v) + buf = buf[l:] + default: + l := protowire.ConsumeFieldValue(num, typ, buf) + if l < 0 { + return nil, false + } + buf = buf[l:] + } + // Early exit once required fields are captured. modified_at is + // best-effort within the partial buffer; if it sits past + // liteScanBytes it stays zero and the listing still renders. + if sawFileSize && sawStatus && lite.ModifiedAt != 0 { + return &lite, true + } + } + if sawFileSize && sawStatus { + return &lite, true + } + return nil, false +} + +// readFileMetadataLiteFull is the legacy slow path: read the entire .meta +// file and unmarshal the full proto. Only used as a fallback when the +// partial-read scan in ReadFileMetadataLite fails to locate the lite +// fields within liteScanBytes. +func (ms *MetadataService) readFileMetadataLiteFull(virtualPath string) (*FileMetadataLite, error) { filename := filepath.Base(virtualPath) metadataDir := filepath.Join(ms.rootPath, filepath.Dir(virtualPath)) metadataPath := filepath.Join(metadataDir, filename+".meta") @@ -194,14 +316,12 @@ func (ms *MetadataService) ReadFileMetadataLite(virtualPath string) (*FileMetada return nil, fmt.Errorf("failed to unmarshal metadata: %w", err) } - // Store only the lightweight version — let the full proto be GC'd lite := &FileMetadataLite{ FileSize: metadata.FileSize, ModifiedAt: metadata.ModifiedAt, Status: metadata.Status, } ms.liteCache.Add(virtualPath, lite) - return lite, nil } diff --git a/internal/metadata/service_test.go b/internal/metadata/service_test.go index 3f684e2ee..8d40a871a 100644 --- a/internal/metadata/service_test.go +++ b/internal/metadata/service_test.go @@ -151,3 +151,110 @@ func TestCleanupOrphanedIDSymlinks_ContextCancellation(t *testing.T) { _, err := ms.CleanupOrphanedIDSymlinks(ctx) assert.ErrorIs(t, err, context.Canceled) } + +// TestReadFileMetadataLite_DoesNotReadFullProto pins the fast path: when the +// `.meta` proto is multi-MB (because the file has thousands of NestedSources +// or SegmentData entries — the exact shape that caused a 7.94 GB +// PROPFIND allocation spike), ReadFileMetadataLite must read only the head +// of the file and never instantiate the giant proto. We measure this via +// the file size we write vs. the bytes read by the lite path. +func TestReadFileMetadataLite_DoesNotReadFullProto(t *testing.T) { + root := t.TempDir() + ms := NewMetadataService(root) + + virtualPath := filepath.Join("movies", "huge.m2ts") + + // Build a FileMetadata with thousands of NestedSources so the on-disk + // proto is hundreds of KB — large enough that a regression to the + // full os.ReadFile + proto.Unmarshal path would allocate >>liteScanBytes + // and be caught by the heap-delta assertion below. + nested := make([]*metapb.NestedSegmentSource, 0, 5000) + for i := range 5000 { + nested = append(nested, &metapb.NestedSegmentSource{ + Segments: []*metapb.SegmentData{ + {Id: "msg-id-with-a-typical-length@server.example", StartOffset: int64(i * 1024), EndOffset: int64((i + 1) * 1024), SegmentSize: 1024}, + }, + InnerOffset: 0, + InnerLength: 1024, + InnerVolumeSize: 1024, + }) + } + meta := ms.CreateFileMetadata( + 17_860_995_072, "Avatar.nzb", metapb.FileStatus_FILE_STATUS_HEALTHY, + nil, metapb.Encryption_NONE, "", "", nil, nil, 0, nil, "huge-nzbdav-id", + ) + meta.NestedSources = nested + require.NoError(t, ms.WriteFileMetadata(virtualPath, meta)) + + // Confirm the on-disk file is at least 200 KB — the partial-read + // budget is 4 KB so anything substantially larger gives the heap-delta + // assertion enough headroom to catch a regression. + stat, err := os.Stat(ms.GetMetadataFilePath(virtualPath)) + require.NoError(t, err) + require.Greater(t, stat.Size(), int64(200<<10), "test setup should produce a >200KB .meta file to make the fast-path savings observable") + + // Drop the liteCache entry written by WriteFileMetadata so we hit the + // disk-read path under test. + ms.liteCache.Purge() + + // Snapshot heap allocations before / after the call. The full-read + // implementation would allocate at least stat.Size() bytes (for the + // os.ReadFile buffer) plus the unmarshalled proto. The partial-read + // implementation should allocate well under 64 KiB. + runtime.GC() + var before, after runtime.MemStats + runtime.ReadMemStats(&before) + + lite, err := ms.ReadFileMetadataLite(virtualPath) + require.NoError(t, err) + require.NotNil(t, lite) + + runtime.ReadMemStats(&after) + delta := after.TotalAlloc - before.TotalAlloc + t.Logf("ReadFileMetadataLite allocated %d bytes (on-disk .meta = %d bytes)", delta, stat.Size()) + + // Correctness: lite must reflect the values we wrote. + assert.Equal(t, int64(17_860_995_072), lite.FileSize) + assert.Equal(t, metapb.FileStatus_FILE_STATUS_HEALTHY, lite.Status) + + // Regression guard: the fast path must allocate dramatically less than + // the full file. Use 5× liteScanBytes as a comfortable upper bound that + // still catches a regression where the implementation re-reads the + // whole file. + const maxExpectedAlloc = 5 * liteScanBytes + assert.LessOrEqualf(t, delta, uint64(maxExpectedAlloc), + "ReadFileMetadataLite allocated %d bytes — should be ≤ %d. A regression to the full os.ReadFile + proto.Unmarshal would allocate >= the on-disk size (%d).", + delta, maxExpectedAlloc, stat.Size()) +} + +// TestReadFileMetadataLite_FallsBackOnLongHeader covers the edge where the +// lite fields aren't reachable within liteScanBytes (e.g., a future schema +// change places one after a very large field). The fallback path produces +// the same correct lite struct, just by reading the full file. +func TestReadFileMetadataLite_FallsBackOnLongHeader(t *testing.T) { + root := t.TempDir() + ms := NewMetadataService(root) + + virtualPath := filepath.Join("movies", "long-header.mkv") + + // Craft a SourceNzbPath longer than liteScanBytes so the lite fields + // after it (status, modified_at) fall past the partial-read window. + // file_size (field 1) is before it, so the partial-read scan sees + // FileSize but not Status/ModifiedAt → falls back to full read. + longPath := make([]byte, liteScanBytes+512) + for i := range longPath { + longPath[i] = 'a' + } + meta := ms.CreateFileMetadata( + 1234, string(longPath), metapb.FileStatus_FILE_STATUS_HEALTHY, + nil, metapb.Encryption_NONE, "", "", nil, nil, 0, nil, "fallback-id", + ) + require.NoError(t, ms.WriteFileMetadata(virtualPath, meta)) + ms.liteCache.Purge() + + lite, err := ms.ReadFileMetadataLite(virtualPath) + require.NoError(t, err) + require.NotNil(t, lite) + assert.Equal(t, int64(1234), lite.FileSize) + assert.Equal(t, metapb.FileStatus_FILE_STATUS_HEALTHY, lite.Status) +} diff --git a/internal/nzbfilesystem/metadata_remote_file.go b/internal/nzbfilesystem/metadata_remote_file.go index 8d2c027b9..93bb59275 100644 --- a/internal/nzbfilesystem/metadata_remote_file.go +++ b/internal/nzbfilesystem/metadata_remote_file.go @@ -253,8 +253,9 @@ func (mrf *MetadataRemoteFile) OpenFile(ctx context.Context, name string) (bool, Salt: fileMeta.Salt, AesKey: fileMeta.AesKey, AesIv: fileMeta.AesIv, - SegmentData: fileMeta.SegmentData, - NestedSources: fileMeta.NestedSources, + SegmentData: fileMeta.SegmentData, + NestedSources: fileMeta.NestedSources, + ClipBoundaries: fileMeta.ClipBoundaries, } // Create a metadata-based virtual file handle @@ -766,6 +767,10 @@ type fileHandleMeta struct { AesIv []byte SegmentData []*metapb.SegmentData NestedSources []*metapb.NestedSegmentSource + // ClipBoundaries is the per-clip timeline table for a multi-clip BD main + // feature. Non-empty enables the continuous-timeline TS remux on reads; + // empty (every other file) bypasses it entirely. + ClipBoundaries []*metapb.ClipBoundary } // MetadataVirtualFile implements afero.File for metadata-backed virtual files @@ -790,6 +795,11 @@ type MetadataVirtualFile struct { segmentStore usenet.SegmentStore // optional segment cache segmentIndexOnce sync.Once // guards lazy init of segmentIndex + // clipSpans is the lazily-built absolute byte-range + delta table for the + // continuous-timeline remux, derived once from meta.ClipBoundaries. + clipSpans []clipSpan + clipSpansOnce sync.Once + // Reader state and position tracking reader io.ReadCloser readerInitialized bool @@ -1051,6 +1061,16 @@ func (mvf *MetadataVirtualFile) ReadAtContext(readCtx context.Context, p []byte, // createReaderAtOffset creates an independent reader for reading at a specific offset. // This reader is self-contained and can be used concurrently with other readers. func (mvf *MetadataVirtualFile) createReaderAtOffset(start, end int64) (io.ReadCloser, error) { + reader, err := mvf.createRawReaderAtOffset(start, end) + if err != nil { + return nil, err + } + return mvf.maybeWrapRemux(reader, start), nil +} + +// createRawReaderAtOffset builds the underlying reader for [start,end] without +// the continuous-timeline remux wrapper. +func (mvf *MetadataVirtualFile) createRawReaderAtOffset(start, end int64) (io.ReadCloser, error) { if mvf.poolManager == nil { return nil, ErrNoUsenetPool } @@ -1072,6 +1092,23 @@ func (mvf *MetadataVirtualFile) createReaderAtOffset(start, end int64) (io.ReadC return mvf.createUsenetReader(mvf.ctx, start, end) } +// maybeWrapRemux wraps reader in a continuous-timeline TS remux when the file +// carries a per-clip boundary table (multi-clip BD main feature). startOff is +// the absolute file offset of reader's first byte. For every other file the +// table is empty and reader is returned unchanged (zero overhead). +func (mvf *MetadataVirtualFile) maybeWrapRemux(reader io.ReadCloser, startOff int64) io.ReadCloser { + if len(mvf.meta.ClipBoundaries) == 0 { + return reader + } + mvf.clipSpansOnce.Do(func() { + mvf.clipSpans = buildClipSpans(mvf.meta.ClipBoundaries) + }) + if len(mvf.clipSpans) == 0 { + return reader + } + return newTSRemuxReader(reader, mvf.clipSpans, startOff) +} + // createEncryptedReaderAtOffset creates an encrypted reader for a specific offset range func (mvf *MetadataVirtualFile) createEncryptedReaderAtOffset(start, end int64) (io.ReadCloser, error) { switch mvf.meta.Encryption { @@ -1345,6 +1382,12 @@ func (mvf *MetadataVirtualFile) ensureReader() error { mvf.reader = ur } + // Apply the continuous-timeline remux for multi-clip BD main features. + // No-op (returns the same reader) for every other file. The reader yields + // bytes from absolute offset `start`, which the wrapper needs for packet + // framing and per-clip delta selection. + mvf.reader = mvf.maybeWrapRemux(mvf.reader, start) + mvf.readerInitialized = true return nil } diff --git a/internal/nzbfilesystem/tsremux.go b/internal/nzbfilesystem/tsremux.go new file mode 100644 index 000000000..a81550a61 --- /dev/null +++ b/internal/nzbfilesystem/tsremux.go @@ -0,0 +1,176 @@ +package nzbfilesystem + +// Continuous-timeline remux core for Blu-ray main-feature virtual files. +// +// A merged BD main feature byte-concatenates N M2TS clips, each carrying its +// OWN independent PTS/DTS/PCR timeline (each starts near its own base). A +// player resyncs on the discontinuities so playback works, but ffprobe and +// seeking compute time from PTS deltas, which are meaningless across clip +// boundaries — hence "Duration: 00:26:21" for a 3h17m movie. +// +// This file holds the pure, stateless byte transform: given a TS packet and a +// 90 kHz delta, add the delta to every timestamp (PTS, DTS, PCR) found in the +// packet, in place. All timestamp fields are fixed-width, so the rewrite is +// byte-length preserving — the virtual file size and every byte offset are +// unchanged, so VFS byte-mapping and range requests keep working untouched. +// +// Nothing here does I/O or knows about clips; the caller supplies the delta +// per packet based on which clip the packet's byte offset falls in. That keeps +// this layer trivially testable (see tsremux_test.go) and is the feasibility +// gate for the whole continuous-timeline feature. + +const ( + tsSync = 0x47 + tsPacketLen = 188 + bdavPacketLen = 192 // 4-byte TP_extra_header + 188-byte TS packet + // ptsModulus is 2^33; PTS/DTS/PCR-base are 33-bit values that wrap here. + // At 90 kHz that is ~26.5 h, far above any single feature's runtime, but + // we still wrap defensively so a near-max base plus a delta stays legal. + ptsModulus = int64(1) << 33 +) + +// addMod33 returns (v + delta) wrapped into the 33-bit timestamp space. +// delta may be negative (when a clip's pts_base exceeds its timeline_start). +func addMod33(v, delta int64) int64 { + r := (v + delta) % ptsModulus + if r < 0 { + r += ptsModulus + } + return r +} + +// rewritePacket adds delta90k (a 90 kHz signed offset) to the PTS, DTS, and +// PCR timestamps inside one source packet. packetSize is 192 (BDAV) or 188. +// The packet slice must be exactly packetSize bytes. Returns true if any +// timestamp was rewritten. Packets without timestamps (continuation packets, +// PSI, null) are left untouched. +// +// BDAV's 4-byte TP_extra_header (which carries a 27 MHz arrival timestamp) is +// intentionally NOT rewritten: ATS feeds the player's input-buffer model, not +// presentation timing or ffprobe's duration estimate. Leaving it avoids a +// whole extra class of bugs; revisit only if a hardware player needs it. +func rewritePacket(pkt []byte, packetSize int, delta90k int64) bool { + if delta90k == 0 || len(pkt) != packetSize { + return false + } + // Locate the 188-byte TS packet within the source packet. + off := 0 + if packetSize == bdavPacketLen { + off = 4 + } + ts := pkt[off : off+tsPacketLen] + if ts[0] != tsSync { + return false + } + + pusi := ts[1]&0x40 != 0 + afc := (ts[3] >> 4) & 0x03 // adaptation_field_control + + changed := false + + // --- PCR (adaptation field) --- + // AFC 0b10 = adaptation only, 0b11 = adaptation + payload. + payloadStart := 4 + if afc == 0x02 || afc == 0x03 { + afLen := int(ts[4]) + // adaptation_field_length counts bytes after itself. + payloadStart = 5 + afLen + if afLen >= 1 && 5+afLen <= tsPacketLen { + afFlags := ts[5] + if afFlags&0x10 != 0 { // PCR_flag + // PCR occupies the 6 bytes at ts[6..12). + if 6+6 <= tsPacketLen { + if rewritePCR(ts[6:12], delta90k) { + changed = true + } + } + } + } + } + + // --- PTS / DTS (PES header) --- + // Only the first TS packet of a PES (PUSI=1) carries the PES header with + // the timestamps; continuation packets have none. + if pusi && (afc == 0x01 || afc == 0x03) && payloadStart+9 <= tsPacketLen { + p := ts[payloadStart:] + // PES start code 0x000001. + if len(p) >= 9 && p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x01 { + // Optional PES header present only when top 2 bits of p[6] == 10. + if p[6]&0xC0 == 0x80 { + ptsDtsFlags := (p[7] & 0xC0) >> 6 + // 0b10 = PTS only; 0b11 = PTS + DTS. + if ptsDtsFlags == 0x02 || ptsDtsFlags == 0x03 { + if payloadStart+9+5 <= tsPacketLen { + if rewriteTS(p[9:14], delta90k) { + changed = true + } + } + } + if ptsDtsFlags == 0x03 { + if payloadStart+14+5 <= tsPacketLen { + if rewriteTS(p[14:19], delta90k) { + changed = true + } + } + } + } + } + } + + return changed +} + +// readTS decodes a 33-bit PTS/DTS from a 5-byte field. +// +// b[0]: prefix(7..4) PTS[32..30](3..1) marker(0) +// b[1]: PTS[29..22] +// b[2]: PTS[21..15](7..1) marker(0) +// b[3]: PTS[14..7] +// b[4]: PTS[6..0](7..1) marker(0) +func readTS(b []byte) int64 { + return (int64(b[0]&0x0E) << 29) | + (int64(b[1]) << 22) | + (int64(b[2]&0xFE) << 14) | + (int64(b[3]) << 7) | + (int64(b[4]) >> 1) +} + +// writeTS encodes v back into the 5-byte field, preserving the prefix nibble +// (bits 7..4 of b[0]) and all three marker bits (bit 0 of b[0], b[2], b[4]). +func writeTS(b []byte, v int64) { + b[0] = (b[0] & 0xF1) | byte((v>>29)&0x0E) + b[1] = byte(v >> 22) + b[2] = (b[2] & 0x01) | byte((v>>14)&0xFE) + b[3] = byte(v >> 7) + b[4] = (b[4] & 0x01) | byte((v<<1)&0xFE) +} + +// rewriteTS adds delta to a PTS/DTS field in place. +func rewriteTS(b []byte, delta int64) bool { + writeTS(b, addMod33(readTS(b), delta)) + return true +} + +// rewritePCR adds delta (90 kHz) to a 6-byte PCR field. The 27 MHz PCR value +// is base*300 + ext; adding delta90k*300 is equivalent to adding delta90k to +// base and leaving ext untouched. +// +// b[0..3] + top bit of b[4] : program_clock_reference_base (33 bits) +// b[4] bits 6..1 : reserved +// b[4] bit 0 + b[5] : program_clock_reference_extension (9 bits) +func rewritePCR(b []byte, delta90k int64) bool { + base := (int64(b[0]) << 25) | + (int64(b[1]) << 17) | + (int64(b[2]) << 9) | + (int64(b[3]) << 1) | + (int64(b[4]) >> 7) + base = addMod33(base, delta90k) + b[0] = byte(base >> 25) + b[1] = byte(base >> 17) + b[2] = byte(base >> 9) + b[3] = byte(base >> 1) + // Preserve b[4] low 7 bits (reserved + ext high bit); set bit 7 = base LSB. + b[4] = byte((base&0x01)<<7) | (b[4] & 0x7F) + // b[5] (ext low byte) unchanged. + return true +} diff --git a/internal/nzbfilesystem/tsremux_reader.go b/internal/nzbfilesystem/tsremux_reader.go new file mode 100644 index 000000000..4ac04ded5 --- /dev/null +++ b/internal/nzbfilesystem/tsremux_reader.go @@ -0,0 +1,182 @@ +package nzbfilesystem + +import ( + "bytes" + "io" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// clipSpan is one clip's absolute byte range in the virtual file plus the +// 90 kHz timeline delta to add to every timestamp inside it. +type clipSpan struct { + start int64 // inclusive absolute byte offset + end int64 // inclusive absolute byte offset (start + byteLen - 1) + delta int64 // 90 kHz offset added to PTS/DTS/PCR-base of packets in this clip +} + +// buildClipSpans turns the proto ClipBoundary table (byte_len + delta per clip, +// in output order) into absolute byte ranges via a prefix sum. Returns nil +// when the table is empty, which keeps the remux disabled. +func buildClipSpans(boundaries []*metapb.ClipBoundary) []clipSpan { + if len(boundaries) == 0 { + return nil + } + spans := make([]clipSpan, 0, len(boundaries)) + var off int64 + for _, b := range boundaries { + if b.ByteLen <= 0 { + continue + } + spans = append(spans, clipSpan{start: off, end: off + b.ByteLen - 1, delta: b.Delta_90K}) + off += b.ByteLen + } + if len(spans) == 0 { + return nil + } + return spans +} + +// tsRemuxReader wraps an underlying reader that yields the bytes of a +// byte-concatenated multi-clip Blu-ray main feature starting at absolute offset +// startOff. As bytes stream through, it frames them into BDAV/TS source packets +// (aligned to each clip's byte start) and adds that clip's 90 kHz delta to the +// PTS/DTS/PCR timestamps, producing a single continuous timeline. The transform +// is byte-length preserving, so the wrapper is a drop-in io.ReadCloser that does +// not change offsets or sizes. +// +// It is a streaming reader: it buffers across Read calls so packet framing is +// maintained for an entire sequential run. Only the leading bytes of a read +// that starts mid-packet are passed through unrewritten (their timestamps, if +// any, live in the packet header before startOff); every fully-streamed packet +// is rewritten. +type tsRemuxReader struct { + inner io.ReadCloser + spans []clipSpan + absPos int64 // absolute offset of the next byte to pull from inner + packetSize int // 192 (BDAV); fixed for BD main features + disabled bool // true if the stream isn't recognisable TS → pure passthrough + syncChecked bool // whether the first aligned packet's sync byte was validated + out bytes.Buffer // rewritten bytes ready to deliver +} + +// newTSRemuxReader wraps inner. startOff is the absolute file offset of inner's +// first byte. spans must be non-empty (callers gate on that). +func newTSRemuxReader(inner io.ReadCloser, spans []clipSpan, startOff int64) *tsRemuxReader { + return &tsRemuxReader{inner: inner, spans: spans, absPos: startOff} +} + +func (r *tsRemuxReader) Close() error { return r.inner.Close() } + +// clipFor returns the span containing absolute offset off, or nil if past the +// last clip (then bytes are passed through raw). +func (r *tsRemuxReader) clipFor(off int64) *clipSpan { + // Binary search: find the last span whose start <= off. + lo, hi := 0, len(r.spans)-1 + idx := -1 + for lo <= hi { + mid := (lo + hi) / 2 + if r.spans[mid].start <= off { + idx = mid + lo = mid + 1 + } else { + hi = mid - 1 + } + } + if idx < 0 || off > r.spans[idx].end { + return nil + } + return &r.spans[idx] +} + +func (r *tsRemuxReader) Read(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil + } + // Fill `out` until it can satisfy the request or inner is exhausted. + for r.out.Len() < len(p) { + if err := r.fill(); err != nil { + if r.out.Len() > 0 { + break // deliver what we have; surface the error on the next call + } + n, _ := r.out.Read(p) + return n, err + } + } + return r.out.Read(p) +} + +// fill pulls the next chunk from inner, rewrites it if it is a complete packet +// aligned within its clip, and appends it to out. Returns io.EOF when inner is +// exhausted. +// +// Packet framing is derived from the CLIP grid (each clip's bytes start at +// clip.start and are a whole number of 192-byte BDAV source packets), NOT from +// probing the stream head. This is what makes the wrapper correct for reads +// that begin at an arbitrary (unaligned) offset — e.g. ffprobe seeking to +// near-EOF to estimate duration. A start that lands mid-packet emits the +// leading partial bytes raw, then frames full packets from the next boundary. +func (r *tsRemuxReader) fill() error { + if r.disabled { + return r.passthrough() + } + if r.packetSize == 0 { + // BD main features (the only files with a clip table) are BDAV-192. + r.packetSize = bdavPacketLen + } + + clip := r.clipFor(r.absPos) + if clip == nil { + // Past the last clip (shouldn't happen for a well-formed table) — + // stream the remainder unmodified. + return r.passthrough() + } + + // Bytes remaining to the next packet boundary within this clip. + intoClip := r.absPos - clip.start + rem := r.packetSize - int(intoClip%int64(r.packetSize)) + aligned := rem == r.packetSize + want := rem + // Never read across a clip boundary in one chunk. + if r.absPos+int64(want) > clip.end+1 { + want = int(clip.end + 1 - r.absPos) + aligned = false // a clip whose length isn't a packet multiple: tail passthrough + } + + chunk := make([]byte, want) + nr, err := io.ReadFull(r.inner, chunk) + chunk = chunk[:nr] + if nr > 0 { + if aligned && nr == r.packetSize { + // Validate the first aligned packet looks like BDAV TS; if not, + // the stream isn't what we expect (wrong decryption, plain TS, + // non-media) so disable rewriting rather than corrupt bytes. + if !r.syncChecked { + r.syncChecked = true + if chunk[4] != tsSync { + r.disabled = true + } + } + if !r.disabled { + rewritePacket(chunk, r.packetSize, clip.delta) + } + } + r.out.Write(chunk) + r.absPos += int64(nr) + } + if err == io.ErrUnexpectedEOF { + err = io.EOF + } + return err +} + +// passthrough copies a chunk from inner to out without rewriting. +func (r *tsRemuxReader) passthrough() error { + chunk := make([]byte, 64*1024) + nr, err := r.inner.Read(chunk) + if nr > 0 { + r.out.Write(chunk[:nr]) + r.absPos += int64(nr) + } + return err +} diff --git a/internal/nzbfilesystem/tsremux_reader_test.go b/internal/nzbfilesystem/tsremux_reader_test.go new file mode 100644 index 000000000..97248f27a --- /dev/null +++ b/internal/nzbfilesystem/tsremux_reader_test.go @@ -0,0 +1,197 @@ +package nzbfilesystem + +import ( + "bytes" + "io" + "testing" + + metapb "github.com/javi11/altmount/internal/metadata/proto" +) + +// memReadCloser serves a byte slice as an io.ReadCloser. +type memReadCloser struct{ r *bytes.Reader } + +func newMem(b []byte) *memReadCloser { return &memReadCloser{r: bytes.NewReader(b)} } +func (m *memReadCloser) Read(p []byte) (int, error) { return m.r.Read(p) } +func (m *memReadCloser) Close() error { return nil } + +// buildTwoClipStream builds a raw byte concatenation of two clips of BDAV +// packets (each packet carrying a PTS), plus the clipSpans that lift them onto +// one continuous timeline keeping clip 0's native base. Returns the raw bytes, +// the spans, and the expected monotonic PTS sequence after rewrite. +func buildTwoClipStream(t *testing.T) (raw []byte, spans []clipSpan, wantPTS []int64) { + t.Helper() + const hz = 90000 + clip0Base := int64(11.65 * hz) + clip1Base := int64(0.5 * hz) + clip0Dur := int64(30 * hz) + + var buf bytes.Buffer + mk := func(base int64, n int) { + for i := range n { + buf.Write(setPTS(newBDAVPacket(0x100, true, 0x01), base+int64(i)*hz)) + } + } + mk(clip0Base, 4) // clip 0: 4 packets + clip0Len := int64(buf.Len()) + mk(clip1Base, 3) // clip 1: 3 packets + total := int64(buf.Len()) + + base0 := clip0Base + timelineStart1 := base0 + clip0Dur + spans = []clipSpan{ + {start: 0, end: clip0Len - 1, delta: base0 - clip0Base}, // 0 + {start: clip0Len, end: total - 1, delta: timelineStart1 - clip1Base}, // lift clip1 + } + + // Expected PTS after rewrite. + for i := range 4 { + wantPTS = append(wantPTS, clip0Base+int64(i)*hz) // delta 0 + } + for i := range 3 { + wantPTS = append(wantPTS, timelineStart1+int64(i)*hz) + } + return buf.Bytes(), spans, wantPTS +} + +// ptsAtPacket decodes the PTS from the n-th 192-byte BDAV packet in b. +func ptsAtPacket(b []byte, n int) int64 { + pkt := b[n*bdavPacketLen : (n+1)*bdavPacketLen] + // PES payload at TS offset 4 → BDAV offset 8; PTS at payload+9. + return readTS(pkt[8:][9:14]) +} + +func TestTSRemuxReader_FullReadMonotonic(t *testing.T) { + raw, spans, wantPTS := buildTwoClipStream(t) + + out, err := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + if err != nil { + t.Fatalf("ReadAll: %v", err) + } + if len(out) != len(raw) { + t.Fatalf("output length %d != input length %d (must be byte-preserving)", len(out), len(raw)) + } + npkt := len(out) / bdavPacketLen + var prev int64 = -1 + for i := range npkt { + got := ptsAtPacket(out, i) + if got != wantPTS[i] { + t.Errorf("packet %d PTS = %d, want %d", i, got, wantPTS[i]) + } + if got <= prev { + t.Errorf("PTS not monotonic at packet %d: %d <= %d", i, got, prev) + } + prev = got + } +} + +// TestTSRemuxReader_ChunkSizeInvariant: the rewritten output must be identical +// regardless of the Read buffer size the caller uses (streaming determinism). +func TestTSRemuxReader_ChunkSizeInvariant(t *testing.T) { + raw, spans, _ := buildTwoClipStream(t) + full, _ := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + + for _, chunk := range []int{1, 7, 100, 192, 193, 1000} { + r := newTSRemuxReader(newMem(raw), spans, 0) + var got bytes.Buffer + p := make([]byte, chunk) + for { + n, err := r.Read(p) + got.Write(p[:n]) + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("chunk %d: read error %v", chunk, err) + } + } + if !bytes.Equal(got.Bytes(), full) { + t.Errorf("chunk size %d produced different bytes than full read", chunk) + } + } +} + +// TestTSRemuxReader_RangeDeterminism is the critical property for HTTP range +// requests: a wrapper started at an arbitrary packet-aligned mid-stream offset +// must produce exactly the same bytes as the corresponding slice of the full +// rewrite. This guarantees seeks/range GETs see a consistent timeline. +func TestTSRemuxReader_RangeDeterminism(t *testing.T) { + raw, spans, _ := buildTwoClipStream(t) + full, _ := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + + // Packet-aligned starts. + for startPkt := 0; startPkt*bdavPacketLen < len(raw); startPkt++ { + startOff := int64(startPkt * bdavPacketLen) + r := newTSRemuxReader(newMem(raw[startOff:]), spans, startOff) + got, err := io.ReadAll(r) + if err != nil { + t.Fatalf("aligned startOff %d: %v", startOff, err) + } + if want := full[startOff:]; !bytes.Equal(got, want) { + t.Errorf("aligned startOff %d: range read differs from full-rewrite slice", startOff) + } + } + + // UNALIGNED starts in packet payload — this is what ffprobe does when it + // seeks to near-EOF to estimate duration. The OLD code disabled rewriting + // on any unaligned start, leaving the tail (and thus the measured + // duration) wrong; this is the regression guard. The leading mid-packet + // bytes are payload (rewrite only touches header timestamp fields), so the + // output must still byte-match the full-rewrite slice. + for startPkt := 0; startPkt*bdavPacketLen < len(raw); startPkt++ { + for _, intoPkt := range []int64{100, 150, 188} { // all past the PTS field + startOff := int64(startPkt*bdavPacketLen) + intoPkt + if startOff >= int64(len(raw)) { + continue + } + r := newTSRemuxReader(newMem(raw[startOff:]), spans, startOff) + got, err := io.ReadAll(r) + if err != nil { + t.Fatalf("unaligned startOff %d: %v", startOff, err) + } + if want := full[startOff:]; !bytes.Equal(got, want) { + t.Errorf("unaligned startOff %d: range read differs from full-rewrite slice (tail left un-rewritten?)", startOff) + } + } + } +} + +// TestTSRemuxReader_NonTSPassthrough: a stream that isn't recognisable TS is +// passed through byte-for-byte (disabled mode), never corrupted. +func TestTSRemuxReader_NonTSPassthrough(t *testing.T) { + raw := bytes.Repeat([]byte{0x11, 0x22, 0x33, 0x44}, 500) // no 0x47 sync grid + spans := []clipSpan{{start: 0, end: int64(len(raw)) - 1, delta: 90000}} + out, err := io.ReadAll(newTSRemuxReader(newMem(raw), spans, 0)) + if err != nil { + t.Fatalf("ReadAll: %v", err) + } + if !bytes.Equal(out, raw) { + t.Error("non-TS stream was modified; expected byte-for-byte passthrough") + } +} + +func TestBuildClipSpans(t *testing.T) { + // Empty → nil (remux disabled). + if buildClipSpans(nil) != nil { + t.Error("buildClipSpans(nil) should be nil") + } + // Prefix sums turn (byte_len, delta) into absolute [start,end] ranges. + spans := buildClipSpans([]*metapb.ClipBoundary{ + {ByteLen: 100, Delta_90K: 0}, + {ByteLen: 50, Delta_90K: 91000}, + {ByteLen: 200, Delta_90K: 272000}, + }) + want := []clipSpan{ + {start: 0, end: 99, delta: 0}, + {start: 100, end: 149, delta: 91000}, + {start: 150, end: 349, delta: 272000}, + } + if len(spans) != len(want) { + t.Fatalf("got %d spans, want %d", len(spans), len(want)) + } + for i := range want { + if spans[i] != want[i] { + t.Errorf("span %d = %+v, want %+v", i, spans[i], want[i]) + } + } +} diff --git a/internal/nzbfilesystem/tsremux_test.go b/internal/nzbfilesystem/tsremux_test.go new file mode 100644 index 000000000..74c0e85b9 --- /dev/null +++ b/internal/nzbfilesystem/tsremux_test.go @@ -0,0 +1,218 @@ +package nzbfilesystem + +import "testing" + +// --- synthetic BDAV packet builders ----------------------------------------- + +// newBDAVPacket returns a zeroed 192-byte BDAV source packet with the sync +// byte set. afc selects adaptation_field_control (0x01 payload-only, +// 0x03 adaptation+payload). pusi sets payload_unit_start_indicator. +func newBDAVPacket(pid uint16, pusi bool, afc byte) []byte { + p := make([]byte, bdavPacketLen) + ts := p[4:] // 188-byte TS packet + ts[0] = tsSync + ts[1] = byte(pid>>8) & 0x1F + if pusi { + ts[1] |= 0x40 + } + ts[2] = byte(pid) + ts[3] = (afc << 4) // scrambling 00, CC 0 + return p +} + +// setPTS writes a PTS-only PES header into a payload-only BDAV packet and +// returns the packet. tsBytesOffset is where the 188-byte TS payload starts +// within the source packet (8 for BDAV payload-only). +func setPTS(p []byte, pts int64) []byte { + pl := p[8:] // payload of the 188-byte TS packet (BDAV off 4 + TS header 4) + pl[0], pl[1], pl[2] = 0x00, 0x00, 0x01 + pl[3] = 0xE0 // video stream_id + pl[4], pl[5] = 0x00, 0x00 + pl[6] = 0x80 // marker '10', no flags + pl[7] = 0x80 // PTS_DTS_flags = '10' (PTS only) + pl[8] = 0x05 // PES_header_data_length + // Seed the PTS field prefix nibble (0010) + marker bits, then encode. + pl[9] = 0x21 // 0010 ...1 + pl[11] = 0x01 // marker + pl[13] = 0x01 // marker + writeTS(pl[9:14], pts) + return p +} + +// setPTSDTS writes a PTS+DTS PES header. +func setPTSDTS(p []byte, pts, dts int64) []byte { + pl := p[8:] + pl[0], pl[1], pl[2] = 0x00, 0x00, 0x01 + pl[3] = 0xE0 + pl[4], pl[5] = 0x00, 0x00 + pl[6] = 0x80 + pl[7] = 0xC0 // PTS_DTS_flags = '11' + pl[8] = 0x0A // 10 bytes (PTS+DTS) + pl[9] = 0x31 // prefix 0011 for PTS-when-DTS-present + pl[11], pl[13] = 0x01, 0x01 + writeTS(pl[9:14], pts) + pl[14] = 0x11 // prefix 0001 for DTS + pl[16], pl[18] = 0x01, 0x01 + writeTS(pl[14:19], dts) + return p +} + +// setPCR writes a PCR into an adaptation+payload BDAV packet (afc 0x03). +func setPCR(p []byte, pcrBase int64) []byte { + ts := p[4:] + // adaptation_field_length: 1 (flags) + 6 (PCR) = 7. + ts[4] = 7 + ts[5] = 0x10 // PCR_flag + b := ts[6:12] + b[0] = byte(pcrBase >> 25) + b[1] = byte(pcrBase >> 17) + b[2] = byte(pcrBase >> 9) + b[3] = byte(pcrBase >> 1) + b[4] = byte((pcrBase&0x01)<<7) // ext = 0 + b[5] = 0x00 + return p +} + +func readPCRBase(p []byte) int64 { + b := p[4:][6:12] + return (int64(b[0]) << 25) | (int64(b[1]) << 17) | (int64(b[2]) << 9) | + (int64(b[3]) << 1) | (int64(b[4]) >> 7) +} + +// --- tests ------------------------------------------------------------------ + +func TestReadWriteTS_RoundTrip(t *testing.T) { + cases := []int64{0, 1, 90000, 1048500, ptsModulus - 1, (1 << 32) + 12345} + for _, want := range cases { + b := []byte{0x21, 0x00, 0x01, 0x00, 0x01} // prefix + markers + writeTS(b, want) + got := readTS(b) + if got != want { + t.Errorf("round-trip PTS: wrote %d, read %d", want, got) + } + // Marker bits must be preserved (bit 0 of b[0], b[2], b[4]). + if b[0]&0x01 != 0x01 || b[2]&0x01 != 0x01 || b[4]&0x01 != 0x01 { + t.Errorf("marker bits clobbered for %d: % x", want, b) + } + // Prefix nibble preserved. + if b[0]&0xF0 != 0x20 { + t.Errorf("prefix nibble clobbered for %d: %#x", want, b[0]) + } + } +} + +func TestRewritePacket_PTSOnly(t *testing.T) { + const base = int64(1048500) // ~11.65s + const delta = int64(90000) // +1s + p := setPTS(newBDAVPacket(0x100, true, 0x01), base) + if !rewritePacket(p, bdavPacketLen, delta) { + t.Fatal("rewritePacket reported no change for a PTS packet") + } + got := readTS(p[8:][9:14]) + if got != base+delta { + t.Errorf("PTS after rewrite = %d, want %d", got, base+delta) + } +} + +func TestRewritePacket_PTSDTS_andPCR(t *testing.T) { + const pts = int64(900000) + const dts = int64(810000) + const delta = int64(45000) + p := setPTSDTS(newBDAVPacket(0x100, true, 0x01), pts, dts) + rewritePacket(p, bdavPacketLen, delta) + if g := readTS(p[8:][9:14]); g != pts+delta { + t.Errorf("PTS = %d, want %d", g, pts+delta) + } + if g := readTS(p[8:][14:19]); g != dts+delta { + t.Errorf("DTS = %d, want %d", g, dts+delta) + } + + const pcrBase = int64(1234567) + pc := setPCR(newBDAVPacket(0x100, false, 0x03), pcrBase) + rewritePacket(pc, bdavPacketLen, delta) + if g := readPCRBase(pc); g != pcrBase+delta { + t.Errorf("PCR base = %d, want %d", g, pcrBase+delta) + } +} + +func TestRewritePacket_NoTimestampLeavesUnchanged(t *testing.T) { + // Continuation packet (PUSI=0, payload-only, no PES header). + p := newBDAVPacket(0x100, false, 0x01) + cp := make([]byte, len(p)) + copy(cp, p) + if rewritePacket(p, bdavPacketLen, 90000) { + t.Error("rewritePacket changed a packet with no timestamps") + } + for i := range p { + if p[i] != cp[i] { + t.Fatalf("byte %d changed in a no-timestamp packet", i) + } + } +} + +// TestRewrite_TwoClipsBecomeMonotonic is the FEASIBILITY GATE: two clips with +// independent PTS bases, byte-concatenated, become a single monotonic timeline +// after per-clip delta rewriting, and last−first equals the sum of the clips' +// durations. This proves the whole continuous-timeline approach before any +// metadata/VFS plumbing is built. +func TestRewrite_TwoClipsBecomeMonotonic(t *testing.T) { + const hz = 90000 + // Clip 0: base 11.65s, 3 packets spaced 1s, duration 30s. + // Clip 1: base 0.5s, 3 packets spaced 1s, duration 20s. + clip0Base := int64(11.65 * hz) + clip1Base := int64(0.5 * hz) + clip0Dur := int64(30 * hz) + clip1Dur := int64(20 * hz) + + mkClip := func(base int64, n int) [][]byte { + out := make([][]byte, n) + for i := range n { + out[i] = setPTS(newBDAVPacket(0x100, true, 0x01), base+int64(i)*hz) + } + return out + } + clip0 := mkClip(clip0Base, 3) + clip1 := mkClip(clip1Base, 3) + + // timeline_start: clip0 keeps its own base (start the file at 11.65s), + // clip1 begins where clip0's authored duration ends. + timelineStart0 := clip0Base + timelineStart1 := clip0Base + clip0Dur + delta0 := timelineStart0 - clip0Base // 0 + delta1 := timelineStart1 - clip1Base + + var ptsSeq []int64 + for _, p := range clip0 { + rewritePacket(p, bdavPacketLen, delta0) + ptsSeq = append(ptsSeq, readTS(p[8:][9:14])) + } + for _, p := range clip1 { + rewritePacket(p, bdavPacketLen, delta1) + ptsSeq = append(ptsSeq, readTS(p[8:][9:14])) + } + + // Strictly monotonic across the whole concatenation. + for i := 1; i < len(ptsSeq); i++ { + if ptsSeq[i] <= ptsSeq[i-1] { + t.Fatalf("PTS not monotonic at index %d: %d <= %d (full seq: %v)", i, ptsSeq[i], ptsSeq[i-1], ptsSeq) + } + } + + // ffprobe-style duration estimate: last − first. The last packet sits at + // timelineStart1 + 2s; first at clip0Base. Their delta must equal the + // real elapsed time across the unified timeline. + first := ptsSeq[0] + last := ptsSeq[len(ptsSeq)-1] + wantSpan := (timelineStart1 + 2*hz) - clip0Base + if last-first != wantSpan { + t.Errorf("timeline span = %d ticks, want %d", last-first, wantSpan) + } + + // Clip 1's first packet must land exactly at timelineStart1, proving it + // was lifted off its own 0.5s base onto the unified timeline rather than + // resetting (which is what breaks ffprobe today). + if ptsSeq[3] != timelineStart1 { + t.Errorf("clip1 first PTS = %d, want timelineStart1 %d", ptsSeq[3], timelineStart1) + } + _ = clip1Dur // documented as clip 1's authored length; not needed past timelineStart1 +} diff --git a/internal/progress/tracker.go b/internal/progress/tracker.go index c845fe2d4..38c4e459f 100644 --- a/internal/progress/tracker.go +++ b/internal/progress/tracker.go @@ -40,6 +40,24 @@ func (pt *Tracker) WithStage(stage string) *Tracker { return pt } +// Slice returns a child tracker covering segment idx of count equal slices of +// this tracker's [min,max] range. Useful for dividing a range across a known +// number of sequential sub-operations (e.g. one slice per ISO across a +// multi-disc group). Safe on a nil receiver (returns nil). +func (pt *Tracker) Slice(idx, count int) *Tracker { + if pt == nil || count <= 0 { + return nil + } + span := pt.maxPercent - pt.minPercent + return &Tracker{ + queueID: pt.queueID, + broadcaster: pt.broadcaster, + minPercent: pt.minPercent + idx*span/count, + maxPercent: pt.minPercent + (idx+1)*span/count, + stage: pt.stage, + } +} + // Update reports progress within the configured percentage range. // Safe to call on a nil receiver (no-op). func (pt *Tracker) Update(current, total int) { diff --git a/internal/progress/tracker_test.go b/internal/progress/tracker_test.go new file mode 100644 index 000000000..1f0865aad --- /dev/null +++ b/internal/progress/tracker_test.go @@ -0,0 +1,93 @@ +package progress + +import "testing" + +// recordingBroadcaster captures every progress update for assertions. +type recordingBroadcaster struct { + updates []recordedUpdate +} + +type recordedUpdate struct { + queueID int + percentage int + stage string +} + +func (rb *recordingBroadcaster) UpdateProgress(queueID, percentage int) { + rb.updates = append(rb.updates, recordedUpdate{queueID: queueID, percentage: percentage}) +} + +func (rb *recordingBroadcaster) UpdateProgressWithStage(queueID, percentage int, stage string) { + rb.updates = append(rb.updates, recordedUpdate{queueID: queueID, percentage: percentage, stage: stage}) +} + +func TestTrackerSlice(t *testing.T) { + t.Parallel() + + rb := &recordingBroadcaster{} + base := NewTracker(rb, 7, 10, 30).WithStage("Analyzing ISO") + + tests := []struct { + name string + idx, count int + wantMin, wantMax int + wantNil bool + }{ + {name: "first of two", idx: 0, count: 2, wantMin: 10, wantMax: 20}, + {name: "second of two", idx: 1, count: 2, wantMin: 20, wantMax: 30}, + {name: "single slice is full range", idx: 0, count: 1, wantMin: 10, wantMax: 30}, + {name: "zero count is nil", idx: 0, count: 0, wantNil: true}, + {name: "negative count is nil", idx: 0, count: -3, wantNil: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := base.Slice(tt.idx, tt.count) + if tt.wantNil { + if got != nil { + t.Fatalf("Slice(%d,%d) = %+v, want nil", tt.idx, tt.count, got) + } + return + } + if got == nil { + t.Fatalf("Slice(%d,%d) = nil, want non-nil", tt.idx, tt.count) + } + if got.minPercent != tt.wantMin || got.maxPercent != tt.wantMax { + t.Errorf("Slice(%d,%d) range = [%d,%d], want [%d,%d]", + tt.idx, tt.count, got.minPercent, got.maxPercent, tt.wantMin, tt.wantMax) + } + // Child inherits queueID, broadcaster, and stage. + if got.queueID != base.queueID || got.broadcaster != base.broadcaster || got.stage != base.stage { + t.Errorf("Slice did not inherit parent identity/stage: %+v", got) + } + }) + } +} + +func TestTrackerSliceNilReceiver(t *testing.T) { + t.Parallel() + + var nilTracker *Tracker + if got := nilTracker.Slice(0, 2); got != nil { + t.Fatalf("nil receiver Slice = %+v, want nil", got) + } + // And Update on the nil result must be a safe no-op. + nilTracker.Slice(0, 2).Update(1, 2) +} + +func TestTrackerSliceUpdateMapsIntoSubRange(t *testing.T) { + t.Parallel() + + rb := &recordingBroadcaster{} + base := NewTracker(rb, 7, 10, 30).WithStage("Analyzing ISO") + + // Second of two ISOs spans [20,30]; a half-complete update lands at 25. + base.Slice(1, 2).Update(1, 2) + + if len(rb.updates) != 1 { + t.Fatalf("got %d updates, want 1", len(rb.updates)) + } + u := rb.updates[0] + if u.queueID != 7 || u.percentage != 25 || u.stage != "Analyzing ISO" { + t.Fatalf("update = %+v, want {7 25 Analyzing ISO}", u) + } +}