Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions src/Microsoft.ComponentDetection.Common/DockerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace Microsoft.ComponentDetection.Common;
using System.Threading.Tasks;
using Docker.DotNet;
using Docker.DotNet.Models;
using Microsoft.ComponentDetection.Common.Telemetry;
using Microsoft.ComponentDetection.Common.Telemetry.Records;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.BcdeModels;
Expand Down Expand Up @@ -38,6 +39,7 @@ public async Task<bool> CanPingDockerAsync(CancellationToken cancellationToken =
catch (Exception e)
{
this.logger.LogError(e, "Failed to ping docker");
cancellationToken.ThrowIfCancellationRequested();
return false;
}
}
Expand All @@ -59,6 +61,7 @@ public async Task<bool> CanRunLinuxContainersAsync(CancellationToken cancellatio
catch (Exception e)
{
record.ExceptionMessage = e.Message;
cancellationToken.ThrowIfCancellationRequested();
}

return false;
Expand All @@ -79,6 +82,7 @@ public async Task<bool> ImageExistsLocallyAsync(string image, CancellationToken
catch (Exception e)
{
record.ExceptionMessage = e.Message;
cancellationToken.ThrowIfCancellationRequested();
return false;
}
}
Expand Down Expand Up @@ -114,6 +118,7 @@ public async Task<bool> TryPullImageAsync(string image, CancellationToken cancel
catch (Exception e)
{
record.ExceptionMessage = e.Message;
cancellationToken.ThrowIfCancellationRequested();
return false;
}
}
Expand Down Expand Up @@ -178,6 +183,7 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
catch (Exception e)
{
record.ExceptionMessage = e.Message;
cancellationToken.ThrowIfCancellationRequested();
return null;
}
}
Expand Down Expand Up @@ -207,6 +213,12 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
var stream = await AttachContainerAsync(container.ID, cancellationToken);
await StartContainerAsync(container.ID, cancellationToken);

this.logger.LogInformation("Container {ContainerId} started for image {Image}, reading output...", container.ID, image);

// Flush telemetry before the long-running ReadOutput so we get mid-scan
// data in App Insights even if the process hangs during the read.
TelemetryRelay.Instance.FlushCurrentTelemetry();

Comment thread
AMaini503 marked this conversation as resolved.
var (stdout, stderr) = await ReadContainerOutputAsync(stream, container.ID, image, cancellationToken);

record.Stdout = stdout;
Expand All @@ -217,13 +229,33 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
finally
{
// Best-effort container cleanup with a bounded timeout.
// RemoveContainerAsync already handles not-found, but we must guard against
// the Docker daemon hanging on container removal (e.g. when the container
// process is stuck), which would block the detector indefinitely.
// Use Task.WhenAny as belt-and-suspenders: even if Docker.DotNet's HTTP
// pipeline doesn't honor the CTS (e.g. kernel-level socket blocking),
// we abandon the removal rather than hanging indefinitely.
this.logger.LogInformation("Removing container {ContainerId}...", container.ID);
using var removeCts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
try
{
await RemoveContainerAsync(container.ID, removeCts.Token);
var removeTask = RemoveContainerAsync(container.ID, removeCts.Token);
var removeTimeout = Task.Delay(TimeSpan.FromSeconds(30), CancellationToken.None);

if (await Task.WhenAny(removeTask, removeTimeout) == removeTimeout)
{
this.logger.LogWarning(
"RemoveContainerAsync timed out for container {ContainerId}; abandoning cleanup",
container.ID);

// Observe the abandoned task to prevent unobserved task exceptions
_ = removeTask.ContinueWith(
static _ => { },
CancellationToken.None,
TaskContinuationOptions.OnlyOnFaulted,
TaskScheduler.Default);
}
else
{
await removeTask; // Observe any exception from completed task
}
}
catch (Exception ex)
{
Expand All @@ -232,6 +264,8 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
"Failed to remove container {ContainerId}; abandoning cleanup",
container.ID);
}
Comment thread
AMaini503 marked this conversation as resolved.

this.logger.LogInformation("Container {ContainerId} cleanup complete", container.ID);
}
}

Expand Down Expand Up @@ -264,8 +298,22 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
{
record.WasCancelled = true;

// Dispose the stream to unblock any pending read operation
stream.Dispose();
// Dispose the stream to unblock any pending read operation.
// Run in fire-and-forget: if the underlying socket close() blocks
// (e.g. Docker daemon in kernel D-state), we don't want to hang here.
_ = Task.Run(
() =>
{
try
{
stream.Dispose();
}
catch
{
// best effort
}
},
CancellationToken.None);
Comment thread
AMaini503 marked this conversation as resolved.

// Observe the readTask to prevent unobserved task exceptions.
// Running any continuation automatically marks the exception as observed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,25 @@ public void PostTelemetryRecord(IDetectionTelemetryRecord record)
}
}

/// <summary>
/// Flushes all buffered telemetry records to their services without shutting down.
/// Use this at critical checkpoints to ensure telemetry is delivered even if the process later hangs.
/// </summary>
public void FlushCurrentTelemetry()
{
foreach (var service in this.telemetryServices)
{
try
{
service.Flush();
}
catch
{
// Telemetry should never crash the application
}
}
}
Comment thread
AMaini503 marked this conversation as resolved.

/// <summary>
/// Disables the sending of telemetry and flushes any messages out of the queue for each service.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ ILogger<LinuxContainerDetector> logger
private const LinuxScannerScope DefaultScanScope = LinuxScannerScope.AllLayers;

private const string LocalImageMountPoint = "/image";
private const int HeartbeatIntervalSeconds = 60;

// Base image annotations from ADO dockerTask
private const string BaseImageRefAnnotation = "image.base.ref.name";
Expand Down Expand Up @@ -99,19 +100,32 @@ public async Task<IndividualDetectorScanResult> ExecuteDetectorAsync(
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
timeoutCts.CancelAfter(GetTimeout(request.DetectorArgs));

if (!await this.dockerService.CanRunLinuxContainersAsync(timeoutCts.Token))
{
using var record = new LinuxContainerDetectorUnsupportedOs
{
Os = RuntimeInformation.OSDescription,
};
this.logger.LogInformation("Linux containers are not available on this host.");
return EmptySuccessfulScan();
}

var results = Enumerable.Empty<ImageScanningResult>();

// Heartbeat timer: logs every 60s while the detector is running.
// If the process dies silently (OOM, SIGKILL), the heartbeat stops
// and we know approximately when it happened from the last log entry.
var scanStart = DateTime.UtcNow;
using var heartbeat = new Timer(
_ => this.logger.LogInformation(
"LinuxContainerDetector heartbeat — still scanning ({ElapsedSeconds}s elapsed)",
(DateTime.UtcNow - scanStart).TotalSeconds),
state: null,
dueTime: TimeSpan.FromSeconds(HeartbeatIntervalSeconds),
period: TimeSpan.FromSeconds(HeartbeatIntervalSeconds));

try
{
if (!await this.dockerService.CanRunLinuxContainersAsync(timeoutCts.Token))
{
using var record = new LinuxContainerDetectorUnsupportedOs
{
Os = RuntimeInformation.OSDescription,
};
this.logger.LogInformation("Linux containers are not available on this host.");
return EmptySuccessfulScan();
}

results = await this.ProcessImagesAsync(
allImages,
request.ComponentRecorder,
Expand All @@ -132,6 +146,10 @@ public async Task<IndividualDetectorScanResult> ExecuteDetectorAsync(
this.logger.LogError(e, "Unexpected error during Linux container image scanning");
}

this.logger.LogInformation(
"LinuxContainerDetector completed after {ElapsedSeconds}s",
(DateTime.UtcNow - scanStart).TotalSeconds);

return new IndividualDetectorScanResult
{
ContainerDetails = results
Expand Down
Loading