Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions pkg/tools/builtin/shell.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,27 @@ func (h *shellHandler) RunShell(ctx context.Context, params RunShellArgs) (*tool
return h.runNativeCommand(timeoutCtx, ctx, params.Cmd, cwd, timeout), nil
}

// waitDelayAfterShellExit caps how long cmd.Wait() blocks on stdout/stderr
// copy goroutines after the direct shell child has exited.
//
// When cmd.Stdout/Stderr are not *os.File, Go's exec package creates OS pipes
// and spawns copy goroutines; cmd.Wait() only returns after *both* the child
// exits and those goroutines see EOF on the pipes. If the command backgrounds
// a grandchild (e.g. `docker run ... &`, `sleep 10 &`) that inherits the pipe
// fds, the pipes stay open and Wait() blocks until the configured timeout.
//
// cmd.WaitDelay tells Go to force-close the pipes and return this long after
// the direct child has exited, letting the grandchild keep running while the
// tool call returns promptly. A short delay is plenty because any output the
// shell itself produced is already flushed by the time it exits.
const waitDelayAfterShellExit = 500 * time.Millisecond

func (h *shellHandler) runNativeCommand(timeoutCtx, ctx context.Context, command, cwd string, timeout time.Duration) *tools.ToolCallResult {
cmd := exec.Command(h.shell, append(h.shellArgsPrefix, command)...)
cmd.Env = h.env
cmd.Dir = cwd
cmd.SysProcAttr = platformSpecificSysProcAttr()
cmd.WaitDelay = waitDelayAfterShellExit

var outBuf bytes.Buffer
cmd.Stdout = &outBuf
Expand Down
72 changes: 72 additions & 0 deletions pkg/tools/builtin/shell_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ package builtin

import (
"os"
"os/exec"
"runtime"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -177,3 +180,72 @@ func TestShellTool_RelativeCwdResolvesAgainstWorkingDir(t *testing.T) {
assert.Contains(t, result.Output, subdir,
"relative cwd must resolve against the configured workingDir, not the process cwd")
}

// Regression test for a shell-tool hang caused by backgrounded grandchildren.
//
// A command like `sleep 10 &` makes the shell exit immediately, but the
// backgrounded sleep inherits stdout/stderr. Without cmd.WaitDelay, Go's
// exec.Cmd.Wait() blocks reading the pipe until the configured timeout,
// which makes the tool call hang (observed in eval runs where the agent
// launched a server with `docker run ... &`).
//
// With the WaitDelay safeguard the tool must return within a small fraction
// of the configured timeout.
func TestShellTool_BackgroundedChildDoesNotBlockReturn(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("POSIX shell backgrounding semantics; skipped on Windows")
}

tool := NewShellTool(nil, &config.RuntimeConfig{Config: config.Config{WorkingDir: t.TempDir()}})

start := time.Now()
result, err := tool.handler.RunShell(t.Context(), RunShellArgs{
// sleep inherits stdout/stderr from the shell and holds the pipe
// open for 30s. The tool must return as soon as the shell exits.
Cmd: "sleep 30 &",
Timeout: 20,
})
elapsed := time.Since(start)

require.NoError(t, err)
require.NotNil(t, result)
assert.Less(t, elapsed, 5*time.Second,
"shell tool must return promptly when the command backgrounds a child "+
"that inherits stdout/stderr; elapsed=%s", elapsed)
}

// Even when the backgrounded child detaches into its own session (so the
// shell tool's process-group kill cannot reach it on timeout), cmd.WaitDelay
// must still allow the tool call to return.
func TestShellTool_DetachedBackgroundedChildDoesNotBlockReturn(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("POSIX shell backgrounding semantics; skipped on Windows")
}
if _, err := exec.LookPath("setsid"); err != nil {
t.Skip("setsid not available")
}

tool := NewShellTool(nil, &config.RuntimeConfig{Config: config.Config{WorkingDir: t.TempDir()}})

done := make(chan struct{})
var result *tools.ToolCallResult
var err error
go func() {
defer close(done)
result, err = tool.handler.RunShell(t.Context(), RunShellArgs{
// setsid places sleep in its own session/process group, so the
// process-group kill fallback in the timeout path cannot reach
// it. Only cmd.WaitDelay can unblock Wait() here.
Cmd: "setsid sleep 30 &",
Timeout: 20,
})
}()

select {
case <-done:
require.NoError(t, err)
require.NotNil(t, result)
case <-time.After(10 * time.Second):
t.Fatal("shell tool hung when command backgrounded a detached child")
}
}
Loading