Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Apps/CLI/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [3.5.3] - 2026-06-13

### Fixed
- Public CLI, agent, MCP, and API guidance now treats runtime element IDs as opaque strings to copy exactly instead of implying role-specific ID shapes. Thanks @coygeek for #194.
- JSON-only `peekaboo see` runs without `--path` now keep required screenshots in snapshot storage instead of leaving files on Desktop or exposing their temporary paths. Thanks @coygeek for #196.
- Background element/query/coordinate clicks now pin actions to the requested process and exact window, reject mismatched window/PID selectors and unverifiable snapshots, invalidate implicit latest snapshots without deleting history, and no longer require Event Synthesizing when Accessibility completes the click.
- App launch, open, and inventory commands now use the selected runtime host, fixing sandboxed LaunchServices failures; launch/open preserve `--no-focus` and caller-relative app paths, relaunch preflights and keeps quit/wait/launch in one daemon-held transaction, build-scoped fallback daemons remain reusable and controllable across native/Rosetta execution and executable upgrades, incompatible legacy hosts no longer force sandboxed local fallback, and inventory ignores unrelated input overrides.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ extension ClickCommand: CommanderSignatureProviding {
),
.commandOption(
"on",
help: "Element ID to click (e.g., B1, T2)",
help: "Opaque element ID copied from current see or inspect-ui output",
long: "on"
),
.commandOption(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ extension ClickCommand {

💡 Hints:
• Run 'peekaboo see' first to capture UI elements
Check that the element ID is correct (e.g., B1, T2)
Copy the opaque element ID exactly from current see or inspect-ui output
• Element may have disappeared or changed
"""
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ struct ClickCommand: ErrorHandlingCommand, OutputFormattable, RuntimeOptionsConf
@Option(help: "Snapshot ID, or 'latest' (uses latest if not specified)")
var snapshot: String?

@Option(help: "Element ID to click (e.g., B1, T2)")
@Option(help: "Opaque element ID copied from current see or inspect-ui output")
var on: String?

@Option(name: .customLong("id"), help: "Element ID to click (alias for --on)")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,11 @@ extension DragCommand: ParsableCommand {
Execute click-and-drag operations for moving elements, selecting text, or dragging files.

EXAMPLES:
peekaboo drag --from B1 --to T2
peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID"
peekaboo drag --from-coords "100,200" --to-coords "400,300"
peekaboo drag --from B1 --to-app Trash
peekaboo drag --from S1 --to-coords "500,250" --duration 2000
peekaboo drag --from T1 --to T5 --modifiers shift
peekaboo drag --from "$SOURCE_ID" --to-app Trash
peekaboo drag --from "$SOURCE_ID" --to-coords "500,250" --duration 2000
peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID" --modifiers shift
""",
version: "2.0.0",
showHelpOnEmptyInvocation: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ extension MoveCommand: ParsableCommand {
EXAMPLES:
peekaboo move 100,200 # Move to coordinates
peekaboo move --to "Submit Button" # Move to element by text
peekaboo move --on B3 # Move to element by ID
peekaboo move --on "$ELEMENT_ID" # ID copied from current output
peekaboo move 500,300 --smooth # Smooth movement
peekaboo move --center # Move to screen center

Expand Down Expand Up @@ -84,7 +84,7 @@ extension MoveCommand: CommanderSignatureProviding {
),
.commandOption(
"on",
help: "Element ID to move to (e.g., B1, T2)",
help: "Opaque element ID copied from current see or inspect-ui output",
long: "on"
),
.commandOption(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ struct MoveCommand: ErrorHandlingCommand, OutputFormattable {
@Option(help: "Move to element by text/label")
var to: String?

@Option(help: "Element ID to move to (e.g., B1, T2)")
@Option(help: "Opaque element ID copied from current see or inspect-ui output")
var on: String?

@Option(name: .customLong("id"), help: "Element ID to move to (alias for --on)")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ extension PerformActionCommand: ParsableCommand {
Invokes an accessibility action without synthesizing a mouse or keyboard event.

EXAMPLES:
peekaboo perform-action --on B1 --action AXPress
peekaboo perform-action --on "$ELEMENT_ID" --action AXPress
peekaboo perform-action --on Stepper --action AXIncrement
""",
showHelpOnEmptyInvocation: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ extension SetValueCommand: ParsableCommand {
Sets a settable accessibility value without synthesizing keystrokes.

EXAMPLES:
peekaboo set-value "hello" --on T1
peekaboo set-value "hello" --on "$ELEMENT_ID"
peekaboo set-value "42" --on "Search"
""",
showHelpOnEmptyInvocation: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,20 +249,20 @@ extension SwipeCommand: ParsableCommand {

EXAMPLES:
# Swipe between UI elements
peekaboo swipe --from B1 --to B5 --snapshot 12345
peekaboo swipe --from "$SOURCE_ID" --to "$TARGET_ID" --snapshot "$SNAPSHOT_ID"

# Swipe with coordinates
peekaboo swipe --from-coords 100,200 --to-coords 300,400

# Mixed mode: element to coordinates
peekaboo swipe --from T1 --to-coords 500,300 --duration 1000
peekaboo swipe --from "$SOURCE_ID" --to-coords 500,300 --duration 1000

# Slow swipe for precise gesture
peekaboo swipe --from-coords 50,50 --to-coords 400,400 --duration 2000

USAGE:
You can specify source and destination using either:
- Element IDs from a previous 'see' command
- Opaque element IDs copied from current 'see' or 'inspect-ui' output
- Direct coordinates
- A mix of both

Expand Down
8 changes: 8 additions & 0 deletions Apps/CLI/Tests/CoreCLITests/CommandHelpRendererTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ struct CommandHelpRendererTests {
#expect(!help.contains("<alsoText>"))
#expect(!help.contains("<logLevel>"))
}

@Test
func `interaction help describes element IDs as opaque`() {
for help in [ClickCommand.helpMessage(), MoveCommand.helpMessage()] {
#expect(help.contains("Opaque element ID"))
#expect(help.range(of: #"\b[BTMS]\d+\b"#, options: .regularExpression) == nil)
}
}
}

private struct SampleHelpCommand: ParsableCommand {
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## [3.5.3] - 2026-06-13

### Fixed
- Public CLI, agent, MCP, and API guidance now treats runtime element IDs as opaque strings to copy exactly instead of implying role-specific ID shapes. Thanks @coygeek for #194.
- Sparkle no longer advertises the unpublished 3.5.3 release whose public app download returns 404; the entry will return through the normal release flow when the release is published. Thanks @bcharleson for #199.
- JSON-only `peekaboo see` runs without `--path` now keep required screenshots in snapshot storage instead of leaving files on Desktop or exposing their temporary paths. Thanks @coygeek for #196.
- Watch captures now honor stop requests during transient ScreenCaptureKit retry backoff instead of waiting out the full delay. Thanks @SebTardif for #193.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public struct DetectedElements: Sendable, Codable {

/// A detected UI element
public struct DetectedElement: Sendable, Codable {
/// Unique identifier (e.g., "B1", "T2")
/// Opaque identifier returned by element detection.
public let id: String

/// Element type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import PeekabooFoundation

/// Target for click operations
public enum ClickTarget: Sendable, Codable {
/// Click on element by ID (e.g., "B1")
/// Click an element by its opaque detected ID.
case elementId(String)

/// Click at specific coordinates
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import PeekabooFoundation
*
* // Click by element ID
* try await clickService.click(
* target: .elementId("B1"),
* target: .elementId(detectedElement.id),
* clickType: .single,
* snapshotId: "snapshot_123"
* )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ extension UIAutomationService {
*
* ## Click Targeting
* Three targeting modes are supported:
* - **Element ID**: Click on a specific detected element (e.g., "B1", "T3")
* - **Element ID**: Click on a specific element using its opaque detected ID
* - **Query**: Find element by text content or accessibility label
* - **Coordinates**: Click at exact screen coordinates
*
Expand All @@ -128,7 +128,7 @@ extension UIAutomationService {
* ```swift
* // Click on detected element
* try await automation.click(
* target: .elementId("B1"),
* target: .elementId(detectedElement.id),
* clickType: .single,
* snapshotId: "snapshot_123"
* )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ extension UIAutomationService {
* // Type into specific element with clearing
* try await automation.type(
* text: "Hello World!",
* target: "T1",
* target: detectedElement.id,
* clearExisting: true,
* typingDelay: 50,
* snapshotId: "snapshot_123"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ import PeekabooFoundation
* )
*
* // Perform automation
* try await automation.click(target: .elementId("B1"), clickType: .single, snapshotId: "snapshot_123")
* try await automation.type(text: "Hello World", target: "T1", clearExisting: true, snapshotId: "snapshot_123")
* try await automation.click(
* target: .elementId(button.id), clickType: .single, snapshotId: "snapshot_123")
* try await automation.type(
* text: "Hello World", target: textField.id, clearExisting: true, snapshotId: "snapshot_123")
* ```
*
* - Important: Requires Screen Recording and Accessibility permissions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public struct ClickTool: MCPTool {
"""),
"on": SchemaBuilder.string(
description: """
Optional. Element ID to click (e.g., B1, T2) from `see` or `inspect_ui` output.
Optional. Opaque element ID copied exactly from current `see` or `inspect_ui` output.
"""),
"coords": SchemaBuilder.string(
description: """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ public struct PerformActionTool: MCPTool {
SchemaBuilder.object(
properties: [
"on": SchemaBuilder.string(
description: "Element ID from `see` or `inspect_ui` output, such as B1, or a query string."),
description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output, " +
"or a query string."),
"action": SchemaBuilder.string(
description: "Accessibility action name to invoke, e.g. AXPress, AXShowMenu, AXIncrement."),
"snapshot": SchemaBuilder.string(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ struct SeeSummaryBuilder {
lines.append("")
lines.append(contentsOf: self.elementSection())
lines.append("")
lines.append("Use element IDs (B1, T1, etc.) with click, type, and other interaction commands.")
lines.append("Copy opaque element IDs exactly into click, type, and other interaction commands.")
return lines.joined(separator: "\n")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ public struct SeeTool: MCPTool {
"""
Captures a screenshot of the active UI and generates an element map.

Returns Peekaboo element IDs (B1 for buttons, T1 for text fields, etc.) that can be
used with interaction commands and creates/updates a snapshot that tracks UI state.
Returns opaque Peekaboo element IDs that can be passed unchanged to interaction commands.
Do not infer an element's role or type from the shape of its ID. Creates or updates a
snapshot that tracks UI state.
\(PeekabooMCPVersion.banner) using openai/gpt-5.5
and anthropic/claude-opus-4-8.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ public struct SetValueTool: MCPTool {
SchemaBuilder.object(
properties: [
"on": SchemaBuilder.string(
description: "Element ID from `see` or `inspect_ui` output, such as T1, or a query string."),
description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output, " +
"or a query string."),
"value": SchemaBuilder.anyOf(
[
SchemaBuilder.string(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public enum UIAutomationToolDefinitions {
ParameterDefinition(
name: "on",
type: .string,
description: "Element ID to click (e.g., B1, T2) from `see` or `inspect_ui` output",
description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output",
required: false),
ParameterDefinition(
name: "coords",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public enum ToolRegistry {

EXAMPLE
peekaboo click --foreground --wait-for 1500 --double \"Submit\"
peekaboo click --on B2 --foreground --space-switch
peekaboo click --on "$ELEMENT_ID" --foreground --space-switch

TROUBLESHOOTING
If the element isn't found, refresh the snapshot with a fresh observation (`peekaboo see`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,27 @@ struct AgentToolDescriptionTests {
}
}

@Test
@MainActor
func `Agent tools treat element IDs as opaque`() throws {
let service = try PeekabooAgentService(services: PeekabooServices())
let agentTools = service.createAgentTools()

for tool in agentTools {
let parameterDescriptions = tool.parameters.properties.values.map(\.description)
let guidance = ([tool.description] + parameterDescriptions).joined(separator: "\n")

#expect(
guidance.range(of: #"\b[BTMS]\d+\b"#, options: .regularExpression) == nil,
"Tool '\(tool.name)' must not imply that element ID shape encodes element role.")
}

let clickGuidance = agentTools.first(where: { $0.name == "click" }).map { tool in
([tool.description] + tool.parameters.properties.values.map(\.description)).joined(separator: "\n")
}
#expect(clickGuidance?.localizedCaseInsensitiveContains("opaque") == true)
}

@Test
@MainActor
func `Shell tool has quoting examples`() {
Expand Down
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,20 @@ Peekaboo brings high-fidelity screen capture, AI analysis, and complete GUI auto
# Capture full screen at Retina scale and save to Desktop
peekaboo image --mode screen --retina --path ~/Desktop/screen.png

# Click a button by label (captures, resolves, and clicks in one go)
peekaboo see --app Safari --json | jq -r '.data.snapshot_id' | read SNAPSHOT
# Capture current UI state, then copy its snapshot and opaque element IDs exactly
peekaboo see --app Safari --json
SNAPSHOT="<snapshot-id>"
TEXT_FIELD_ID="<text-field-id>"
BUTTON_ID="<button-id>"

# Click a button by label
peekaboo click --on "Reload this page" --snapshot "$SNAPSHOT"

# Directly set a text field value when the accessibility value is settable
peekaboo set-value --on T1 --value "hello" --snapshot "$SNAPSHOT"
peekaboo set-value --on "$TEXT_FIELD_ID" --value "hello" --snapshot "$SNAPSHOT"

# Invoke a named accessibility action on an element
peekaboo perform-action --on B1 --action AXPress --snapshot "$SNAPSHOT"
peekaboo perform-action --on "$BUTTON_ID" --action AXPress --snapshot "$SNAPSHOT"

# Run a natural-language automation
peekaboo agent "Open Notes and create a TODO list with three items"
Expand Down
6 changes: 3 additions & 3 deletions docs/commands/click.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ read_when:
| Flag | Description |
| --- | --- |
| `[query]` | Optional positional text query (case-insensitive substring match). |
| `--on <id>` / `--id <id>` | Target a specific Peekaboo element ID (e.g., `B1`, `T2`). |
| `--on <id>` / `--id <id>` | Target an opaque Peekaboo element ID copied exactly from current `see` or `inspect-ui` output. |
| `--coords x,y` | Click coordinates. With target flags, coordinates are relative to the resolved target window; without target flags, they are global screen coordinates. |
| `--global-coords` | Treat `--coords` as global screen coordinates even when target flags are supplied. |
| `--snapshot <id>` | Reuse a prior snapshot; defaults to `services.snapshots.getMostRecentSnapshot()` when omitted. |
Expand All @@ -40,8 +40,8 @@ read_when:

## Examples
```bash
# Click the "Send" button (ID from a previous `see` run)
peekaboo click --on B12
# Click the "Send" button using an ID copied from current `see` output
peekaboo click --on "$ELEMENT_ID"

# Fuzzy search + extra wait for a slow dialog using foreground delivery
peekaboo click "Allow" --foreground --wait-for 8000 --space-switch
Expand Down
2 changes: 1 addition & 1 deletion docs/commands/perform-action.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ read_when:

```bash
peekaboo see --app Calculator
peekaboo perform-action --on B7 --action AXPress --snapshot <snapshot-id>
peekaboo perform-action --on "$ELEMENT_ID" --action AXPress --snapshot <snapshot-id>

peekaboo perform-action --on Stepper --action AXIncrement
```
2 changes: 1 addition & 1 deletion docs/commands/set-value.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ read_when:

```bash
peekaboo see --app TextEdit
peekaboo set-value "hello" --on T1 --snapshot <snapshot-id>
peekaboo set-value "hello" --on "$ELEMENT_ID" --snapshot <snapshot-id>

peekaboo set-value "42" --on "Search"
```
2 changes: 1 addition & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ Environment variables:
CLI override:

```bash
peekaboo click --on B1 --input-strategy actionFirst
peekaboo click --on "$ELEMENT_ID" --input-strategy actionFirst
```

## Logging & Troubleshooting
Expand Down
2 changes: 1 addition & 1 deletion docs/focus.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ peekaboo type "Hello world"
peekaboo scroll --direction down
peekaboo menu click --app Safari --item "New Tab"
peekaboo hotkey --keys "cmd,s"
peekaboo drag --from B1 --to T2
peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID"
```

### Default Behavior
Expand Down
2 changes: 1 addition & 1 deletion docs/integrations/subprocess.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ Always use `--no-remote --capture-engine cg` for capture commands:
peekaboo see --app Safari --no-remote --capture-engine cg --json

# Click element (doesn't need workaround, but safe to include)
peekaboo click --on B1 --no-remote
peekaboo click --on "$ELEMENT_ID" --no-remote

# Type text (doesn't need workaround, but safe to include)
peekaboo type --text "Hello" --no-remote
Expand Down
Loading
Loading