diff --git a/Apps/CLI/CHANGELOG.md b/Apps/CLI/CHANGELOG.md index 3b3de8b84..889ac80f8 100644 --- a/Apps/CLI/CHANGELOG.md +++ b/Apps/CLI/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [3.5.3] - 2026-06-13 ### Fixed +- Public CLI, agent, MCP, and API guidance now treats runtime element IDs as opaque strings to copy exactly instead of implying role-specific ID shapes. Thanks @coygeek for #194. - JSON-only `peekaboo see` runs without `--path` now keep required screenshots in snapshot storage instead of leaving files on Desktop or exposing their temporary paths. Thanks @coygeek for #196. - Background element/query/coordinate clicks now pin actions to the requested process and exact window, reject mismatched window/PID selectors and unverifiable snapshots, invalidate implicit latest snapshots without deleting history, and no longer require Event Synthesizing when Accessibility completes the click. - App launch, open, and inventory commands now use the selected runtime host, fixing sandboxed LaunchServices failures; launch/open preserve `--no-focus` and caller-relative app paths, relaunch preflights and keeps quit/wait/launch in one daemon-held transaction, build-scoped fallback daemons remain reusable and controllable across native/Rosetta execution and executable upgrades, incompatible legacy hosts no longer force sandboxed local fallback, and inventory ignores unrelated input overrides. diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand+CommanderMetadata.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand+CommanderMetadata.swift index 52d467abd..99c2ce0ae 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand+CommanderMetadata.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand+CommanderMetadata.swift @@ -54,7 +54,7 @@ extension ClickCommand: CommanderSignatureProviding { ), .commandOption( "on", - help: "Element ID to click (e.g., B1, T2)", + help: "Opaque element ID copied from current see or inspect-ui output", long: "on" ), .commandOption( diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand+Validation.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand+Validation.swift index 04f6fbe15..8ae4ab501 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand+Validation.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand+Validation.swift @@ -48,7 +48,7 @@ extension ClickCommand { 💡 Hints: • Run 'peekaboo see' first to capture UI elements - • Check that the element ID is correct (e.g., B1, T2) + • Copy the opaque element ID exactly from current see or inspect-ui output • Element may have disappeared or changed """ } diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand.swift index 819c663c7..f70032e5e 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/ClickCommand.swift @@ -14,7 +14,7 @@ struct ClickCommand: ErrorHandlingCommand, OutputFormattable, RuntimeOptionsConf @Option(help: "Snapshot ID, or 'latest' (uses latest if not specified)") var snapshot: String? - @Option(help: "Element ID to click (e.g., B1, T2)") + @Option(help: "Opaque element ID copied from current see or inspect-ui output") var on: String? @Option(name: .customLong("id"), help: "Element ID to click (alias for --on)") diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/DragCommand.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/DragCommand.swift index f0bf48a9e..5a600151c 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/DragCommand.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/DragCommand.swift @@ -259,11 +259,11 @@ extension DragCommand: ParsableCommand { Execute click-and-drag operations for moving elements, selecting text, or dragging files. EXAMPLES: - peekaboo drag --from B1 --to T2 + peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID" peekaboo drag --from-coords "100,200" --to-coords "400,300" - peekaboo drag --from B1 --to-app Trash - peekaboo drag --from S1 --to-coords "500,250" --duration 2000 - peekaboo drag --from T1 --to T5 --modifiers shift + peekaboo drag --from "$SOURCE_ID" --to-app Trash + peekaboo drag --from "$SOURCE_ID" --to-coords "500,250" --duration 2000 + peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID" --modifiers shift """, version: "2.0.0", showHelpOnEmptyInvocation: true diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/MoveCommand+CommanderMetadata.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/MoveCommand+CommanderMetadata.swift index 031ccb0e3..349b078cb 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/MoveCommand+CommanderMetadata.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/MoveCommand+CommanderMetadata.swift @@ -16,7 +16,7 @@ extension MoveCommand: ParsableCommand { EXAMPLES: peekaboo move 100,200 # Move to coordinates peekaboo move --to "Submit Button" # Move to element by text - peekaboo move --on B3 # Move to element by ID + peekaboo move --on "$ELEMENT_ID" # ID copied from current output peekaboo move 500,300 --smooth # Smooth movement peekaboo move --center # Move to screen center @@ -84,7 +84,7 @@ extension MoveCommand: CommanderSignatureProviding { ), .commandOption( "on", - help: "Element ID to move to (e.g., B1, T2)", + help: "Opaque element ID copied from current see or inspect-ui output", long: "on" ), .commandOption( diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/MoveCommand.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/MoveCommand.swift index 633268366..76d0fc2fc 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/MoveCommand.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/MoveCommand.swift @@ -17,7 +17,7 @@ struct MoveCommand: ErrorHandlingCommand, OutputFormattable { @Option(help: "Move to element by text/label") var to: String? - @Option(help: "Element ID to move to (e.g., B1, T2)") + @Option(help: "Opaque element ID copied from current see or inspect-ui output") var on: String? @Option(name: .customLong("id"), help: "Element ID to move to (alias for --on)") diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/PerformActionCommand.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/PerformActionCommand.swift index c4c43be88..2715de971 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/PerformActionCommand.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/PerformActionCommand.swift @@ -116,7 +116,7 @@ extension PerformActionCommand: ParsableCommand { Invokes an accessibility action without synthesizing a mouse or keyboard event. EXAMPLES: - peekaboo perform-action --on B1 --action AXPress + peekaboo perform-action --on "$ELEMENT_ID" --action AXPress peekaboo perform-action --on Stepper --action AXIncrement """, showHelpOnEmptyInvocation: true diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/SetValueCommand.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/SetValueCommand.swift index 89a58bd82..c65e1e162 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/SetValueCommand.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/SetValueCommand.swift @@ -119,7 +119,7 @@ extension SetValueCommand: ParsableCommand { Sets a settable accessibility value without synthesizing keystrokes. EXAMPLES: - peekaboo set-value "hello" --on T1 + peekaboo set-value "hello" --on "$ELEMENT_ID" peekaboo set-value "42" --on "Search" """, showHelpOnEmptyInvocation: true diff --git a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/SwipeCommand.swift b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/SwipeCommand.swift index d897907c6..92281e504 100644 --- a/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/SwipeCommand.swift +++ b/Apps/CLI/Sources/PeekabooCLI/Commands/Interaction/SwipeCommand.swift @@ -249,20 +249,20 @@ extension SwipeCommand: ParsableCommand { EXAMPLES: # Swipe between UI elements - peekaboo swipe --from B1 --to B5 --snapshot 12345 + peekaboo swipe --from "$SOURCE_ID" --to "$TARGET_ID" --snapshot "$SNAPSHOT_ID" # Swipe with coordinates peekaboo swipe --from-coords 100,200 --to-coords 300,400 # Mixed mode: element to coordinates - peekaboo swipe --from T1 --to-coords 500,300 --duration 1000 + peekaboo swipe --from "$SOURCE_ID" --to-coords 500,300 --duration 1000 # Slow swipe for precise gesture peekaboo swipe --from-coords 50,50 --to-coords 400,400 --duration 2000 USAGE: You can specify source and destination using either: - - Element IDs from a previous 'see' command + - Opaque element IDs copied from current 'see' or 'inspect-ui' output - Direct coordinates - A mix of both diff --git a/Apps/CLI/Tests/CoreCLITests/CommandHelpRendererTests.swift b/Apps/CLI/Tests/CoreCLITests/CommandHelpRendererTests.swift index 79d81db68..8f5b9cfec 100644 --- a/Apps/CLI/Tests/CoreCLITests/CommandHelpRendererTests.swift +++ b/Apps/CLI/Tests/CoreCLITests/CommandHelpRendererTests.swift @@ -21,6 +21,14 @@ struct CommandHelpRendererTests { #expect(!help.contains("")) #expect(!help.contains("")) } + + @Test + func `interaction help describes element IDs as opaque`() { + for help in [ClickCommand.helpMessage(), MoveCommand.helpMessage()] { + #expect(help.contains("Opaque element ID")) + #expect(help.range(of: #"\b[BTMS]\d+\b"#, options: .regularExpression) == nil) + } + } } private struct SampleHelpCommand: ParsableCommand { diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d0a5bf02..f36bd441b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [3.5.3] - 2026-06-13 ### Fixed +- Public CLI, agent, MCP, and API guidance now treats runtime element IDs as opaque strings to copy exactly instead of implying role-specific ID shapes. Thanks @coygeek for #194. - Sparkle no longer advertises the unpublished 3.5.3 release whose public app download returns 404; the entry will return through the normal release flow when the release is published. Thanks @bcharleson for #199. - JSON-only `peekaboo see` runs without `--path` now keep required screenshots in snapshot storage instead of leaving files on Desktop or exposing their temporary paths. Thanks @coygeek for #196. - Watch captures now honor stop requests during transient ScreenCaptureKit retry backoff instead of waiting out the full delay. Thanks @SebTardif for #193. diff --git a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/Core/Protocols/ElementDetectionModels.swift b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/Core/Protocols/ElementDetectionModels.swift index 5d322576e..821abbcba 100644 --- a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/Core/Protocols/ElementDetectionModels.swift +++ b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/Core/Protocols/ElementDetectionModels.swift @@ -78,7 +78,7 @@ public struct DetectedElements: Sendable, Codable { /// A detected UI element public struct DetectedElement: Sendable, Codable { - /// Unique identifier (e.g., "B1", "T2") + /// Opaque identifier returned by element detection. public let id: String /// Element type diff --git a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/Core/Protocols/UIAutomationOperationModels.swift b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/Core/Protocols/UIAutomationOperationModels.swift index 9c0668250..48c7434c0 100644 --- a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/Core/Protocols/UIAutomationOperationModels.swift +++ b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/Core/Protocols/UIAutomationOperationModels.swift @@ -4,7 +4,7 @@ import PeekabooFoundation /// Target for click operations public enum ClickTarget: Sendable, Codable { - /// Click on element by ID (e.g., "B1") + /// Click an element by its opaque detected ID. case elementId(String) /// Click at specific coordinates diff --git a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/ClickService.swift b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/ClickService.swift index 9b606112b..dd9da0666 100644 --- a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/ClickService.swift +++ b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/ClickService.swift @@ -24,7 +24,7 @@ import PeekabooFoundation * * // Click by element ID * try await clickService.click( - * target: .elementId("B1"), + * target: .elementId(detectedElement.id), * clickType: .single, * snapshotId: "snapshot_123" * ) diff --git a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService+Operations.swift b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService+Operations.swift index 96202a77f..0300354e2 100644 --- a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService+Operations.swift +++ b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService+Operations.swift @@ -103,7 +103,7 @@ extension UIAutomationService { * * ## Click Targeting * Three targeting modes are supported: - * - **Element ID**: Click on a specific detected element (e.g., "B1", "T3") + * - **Element ID**: Click on a specific element using its opaque detected ID * - **Query**: Find element by text content or accessibility label * - **Coordinates**: Click at exact screen coordinates * @@ -128,7 +128,7 @@ extension UIAutomationService { * ```swift * // Click on detected element * try await automation.click( - * target: .elementId("B1"), + * target: .elementId(detectedElement.id), * clickType: .single, * snapshotId: "snapshot_123" * ) diff --git a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService+TypingOperations.swift b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService+TypingOperations.swift index 7f7444d2f..bc743b6c4 100644 --- a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService+TypingOperations.swift +++ b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService+TypingOperations.swift @@ -47,7 +47,7 @@ extension UIAutomationService { * // Type into specific element with clearing * try await automation.type( * text: "Hello World!", - * target: "T1", + * target: detectedElement.id, * clearExisting: true, * typingDelay: 50, * snapshotId: "snapshot_123" diff --git a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService.swift b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService.swift index 73916f42b..785c061fa 100644 --- a/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService.swift +++ b/Core/PeekabooAutomationKit/Sources/PeekabooAutomationKit/Services/UI/UIAutomationService.swift @@ -27,8 +27,10 @@ import PeekabooFoundation * ) * * // Perform automation - * try await automation.click(target: .elementId("B1"), clickType: .single, snapshotId: "snapshot_123") - * try await automation.type(text: "Hello World", target: "T1", clearExisting: true, snapshotId: "snapshot_123") + * try await automation.click( + * target: .elementId(button.id), clickType: .single, snapshotId: "snapshot_123") + * try await automation.type( + * text: "Hello World", target: textField.id, clearExisting: true, snapshotId: "snapshot_123") * ``` * * - Important: Requires Screen Recording and Accessibility permissions diff --git a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/ClickTool.swift b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/ClickTool.swift index 25e91fb2d..63f9bf634 100644 --- a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/ClickTool.swift +++ b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/ClickTool.swift @@ -30,7 +30,7 @@ public struct ClickTool: MCPTool { """), "on": SchemaBuilder.string( description: """ - Optional. Element ID to click (e.g., B1, T2) from `see` or `inspect_ui` output. + Optional. Opaque element ID copied exactly from current `see` or `inspect_ui` output. """), "coords": SchemaBuilder.string( description: """ diff --git a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/PerformActionTool.swift b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/PerformActionTool.swift index 2a35f2d16..7562522d6 100644 --- a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/PerformActionTool.swift +++ b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/PerformActionTool.swift @@ -23,7 +23,8 @@ public struct PerformActionTool: MCPTool { SchemaBuilder.object( properties: [ "on": SchemaBuilder.string( - description: "Element ID from `see` or `inspect_ui` output, such as B1, or a query string."), + description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output, " + + "or a query string."), "action": SchemaBuilder.string( description: "Accessibility action name to invoke, e.g. AXPress, AXShowMenu, AXIncrement."), "snapshot": SchemaBuilder.string( diff --git a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SeeTool+Types.swift b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SeeTool+Types.swift index b705cf0b1..82012be35 100644 --- a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SeeTool+Types.swift +++ b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SeeTool+Types.swift @@ -50,7 +50,7 @@ struct SeeSummaryBuilder { lines.append("") lines.append(contentsOf: self.elementSection()) lines.append("") - lines.append("Use element IDs (B1, T1, etc.) with click, type, and other interaction commands.") + lines.append("Copy opaque element IDs exactly into click, type, and other interaction commands.") return lines.joined(separator: "\n") } diff --git a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SeeTool.swift b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SeeTool.swift index e4e8f79cf..bd4f403b5 100644 --- a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SeeTool.swift +++ b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SeeTool.swift @@ -21,8 +21,9 @@ public struct SeeTool: MCPTool { """ Captures a screenshot of the active UI and generates an element map. - Returns Peekaboo element IDs (B1 for buttons, T1 for text fields, etc.) that can be - used with interaction commands and creates/updates a snapshot that tracks UI state. + Returns opaque Peekaboo element IDs that can be passed unchanged to interaction commands. + Do not infer an element's role or type from the shape of its ID. Creates or updates a + snapshot that tracks UI state. \(PeekabooMCPVersion.banner) using openai/gpt-5.5 and anthropic/claude-opus-4-8. """ diff --git a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SetValueTool.swift b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SetValueTool.swift index 09e19a0ee..4aef287f4 100644 --- a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SetValueTool.swift +++ b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/MCP/Tools/SetValueTool.swift @@ -23,7 +23,8 @@ public struct SetValueTool: MCPTool { SchemaBuilder.object( properties: [ "on": SchemaBuilder.string( - description: "Element ID from `see` or `inspect_ui` output, such as T1, or a query string."), + description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output, " + + "or a query string."), "value": SchemaBuilder.anyOf( [ SchemaBuilder.string(), diff --git a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/ToolRegistry/ToolDefinitions.swift b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/ToolRegistry/ToolDefinitions.swift index d2c00be72..f83fdb519 100644 --- a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/ToolRegistry/ToolDefinitions.swift +++ b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/ToolRegistry/ToolDefinitions.swift @@ -76,7 +76,7 @@ public enum UIAutomationToolDefinitions { ParameterDefinition( name: "on", type: .string, - description: "Element ID to click (e.g., B1, T2) from `see` or `inspect_ui` output", + description: "Opaque element ID copied exactly from current `see` or `inspect_ui` output", required: false), ParameterDefinition( name: "coords", diff --git a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/ToolRegistry/ToolRegistry.swift b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/ToolRegistry/ToolRegistry.swift index af432fdd8..69a62ced2 100644 --- a/Core/PeekabooCore/Sources/PeekabooAgentRuntime/ToolRegistry/ToolRegistry.swift +++ b/Core/PeekabooCore/Sources/PeekabooAgentRuntime/ToolRegistry/ToolRegistry.swift @@ -58,7 +58,7 @@ public enum ToolRegistry { EXAMPLE peekaboo click --foreground --wait-for 1500 --double \"Submit\" - peekaboo click --on B2 --foreground --space-switch + peekaboo click --on "$ELEMENT_ID" --foreground --space-switch TROUBLESHOOTING If the element isn't found, refresh the snapshot with a fresh observation (`peekaboo see` diff --git a/Core/PeekabooCore/Tests/PeekabooTests/AgentToolDescriptionTests.swift b/Core/PeekabooCore/Tests/PeekabooTests/AgentToolDescriptionTests.swift index 48011d2eb..32d45a774 100644 --- a/Core/PeekabooCore/Tests/PeekabooTests/AgentToolDescriptionTests.swift +++ b/Core/PeekabooCore/Tests/PeekabooTests/AgentToolDescriptionTests.swift @@ -144,6 +144,27 @@ struct AgentToolDescriptionTests { } } + @Test + @MainActor + func `Agent tools treat element IDs as opaque`() throws { + let service = try PeekabooAgentService(services: PeekabooServices()) + let agentTools = service.createAgentTools() + + for tool in agentTools { + let parameterDescriptions = tool.parameters.properties.values.map(\.description) + let guidance = ([tool.description] + parameterDescriptions).joined(separator: "\n") + + #expect( + guidance.range(of: #"\b[BTMS]\d+\b"#, options: .regularExpression) == nil, + "Tool '\(tool.name)' must not imply that element ID shape encodes element role.") + } + + let clickGuidance = agentTools.first(where: { $0.name == "click" }).map { tool in + ([tool.description] + tool.parameters.properties.values.map(\.description)).joined(separator: "\n") + } + #expect(clickGuidance?.localizedCaseInsensitiveContains("opaque") == true) + } + @Test @MainActor func `Shell tool has quoting examples`() { diff --git a/README.md b/README.md index 4b508b276..5471044e3 100644 --- a/README.md +++ b/README.md @@ -41,15 +41,20 @@ Peekaboo brings high-fidelity screen capture, AI analysis, and complete GUI auto # Capture full screen at Retina scale and save to Desktop peekaboo image --mode screen --retina --path ~/Desktop/screen.png -# Click a button by label (captures, resolves, and clicks in one go) -peekaboo see --app Safari --json | jq -r '.data.snapshot_id' | read SNAPSHOT +# Capture current UI state, then copy its snapshot and opaque element IDs exactly +peekaboo see --app Safari --json +SNAPSHOT="" +TEXT_FIELD_ID="" +BUTTON_ID="" + +# Click a button by label peekaboo click --on "Reload this page" --snapshot "$SNAPSHOT" # Directly set a text field value when the accessibility value is settable -peekaboo set-value --on T1 --value "hello" --snapshot "$SNAPSHOT" +peekaboo set-value --on "$TEXT_FIELD_ID" --value "hello" --snapshot "$SNAPSHOT" # Invoke a named accessibility action on an element -peekaboo perform-action --on B1 --action AXPress --snapshot "$SNAPSHOT" +peekaboo perform-action --on "$BUTTON_ID" --action AXPress --snapshot "$SNAPSHOT" # Run a natural-language automation peekaboo agent "Open Notes and create a TODO list with three items" diff --git a/docs/commands/click.md b/docs/commands/click.md index 636ef5ca3..3dd40e230 100644 --- a/docs/commands/click.md +++ b/docs/commands/click.md @@ -13,7 +13,7 @@ read_when: | Flag | Description | | --- | --- | | `[query]` | Optional positional text query (case-insensitive substring match). | -| `--on ` / `--id ` | Target a specific Peekaboo element ID (e.g., `B1`, `T2`). | +| `--on ` / `--id ` | Target an opaque Peekaboo element ID copied exactly from current `see` or `inspect-ui` output. | | `--coords x,y` | Click coordinates. With target flags, coordinates are relative to the resolved target window; without target flags, they are global screen coordinates. | | `--global-coords` | Treat `--coords` as global screen coordinates even when target flags are supplied. | | `--snapshot ` | Reuse a prior snapshot; defaults to `services.snapshots.getMostRecentSnapshot()` when omitted. | @@ -40,8 +40,8 @@ read_when: ## Examples ```bash -# Click the "Send" button (ID from a previous `see` run) -peekaboo click --on B12 +# Click the "Send" button using an ID copied from current `see` output +peekaboo click --on "$ELEMENT_ID" # Fuzzy search + extra wait for a slow dialog using foreground delivery peekaboo click "Allow" --foreground --wait-for 8000 --space-switch diff --git a/docs/commands/perform-action.md b/docs/commands/perform-action.md index b31edce90..19deb01bf 100644 --- a/docs/commands/perform-action.md +++ b/docs/commands/perform-action.md @@ -27,7 +27,7 @@ read_when: ```bash peekaboo see --app Calculator -peekaboo perform-action --on B7 --action AXPress --snapshot +peekaboo perform-action --on "$ELEMENT_ID" --action AXPress --snapshot peekaboo perform-action --on Stepper --action AXIncrement ``` diff --git a/docs/commands/set-value.md b/docs/commands/set-value.md index eca78fb25..355603490 100644 --- a/docs/commands/set-value.md +++ b/docs/commands/set-value.md @@ -28,7 +28,7 @@ read_when: ```bash peekaboo see --app TextEdit -peekaboo set-value "hello" --on T1 --snapshot +peekaboo set-value "hello" --on "$ELEMENT_ID" --snapshot peekaboo set-value "42" --on "Search" ``` diff --git a/docs/configuration.md b/docs/configuration.md index f84577168..7f1740153 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -132,7 +132,7 @@ Environment variables: CLI override: ```bash -peekaboo click --on B1 --input-strategy actionFirst +peekaboo click --on "$ELEMENT_ID" --input-strategy actionFirst ``` ## Logging & Troubleshooting diff --git a/docs/focus.md b/docs/focus.md index c88b819ed..629b51ce0 100644 --- a/docs/focus.md +++ b/docs/focus.md @@ -69,7 +69,7 @@ peekaboo type "Hello world" peekaboo scroll --direction down peekaboo menu click --app Safari --item "New Tab" peekaboo hotkey --keys "cmd,s" -peekaboo drag --from B1 --to T2 +peekaboo drag --from "$SOURCE_ID" --to "$TARGET_ID" ``` ### Default Behavior diff --git a/docs/integrations/subprocess.md b/docs/integrations/subprocess.md index faa385845..74d5b1f15 100644 --- a/docs/integrations/subprocess.md +++ b/docs/integrations/subprocess.md @@ -106,7 +106,7 @@ Always use `--no-remote --capture-engine cg` for capture commands: peekaboo see --app Safari --no-remote --capture-engine cg --json # Click element (doesn't need workaround, but safe to include) -peekaboo click --on B1 --no-remote +peekaboo click --on "$ELEMENT_ID" --no-remote # Type text (doesn't need workaround, but safe to include) peekaboo type --text "Hello" --no-remote diff --git a/docs/logging-guide.md b/docs/logging-guide.md index c1049140a..b91505a2f 100644 --- a/docs/logging-guide.md +++ b/docs/logging-guide.md @@ -29,7 +29,7 @@ Use the `--verbose` or `-v` flag with any command: ```bash peekaboo see --app Safari --verbose -peekaboo click --on B1 --verbose +peekaboo click --on "$ELEMENT_ID" --verbose ``` ### Environment Variable @@ -106,12 +106,12 @@ $ peekaboo see --app Safari --verbose ### Debugging Element Not Found ```bash -$ peekaboo click --on B99 --verbose +$ peekaboo click --on "$ELEMENT_ID" --verbose [2025-01-06T08:05:24.123Z] VERBOSE [Snapshot]: Resolving snapshot {explicitId=null} [2025-01-06T08:05:24.124Z] VERBOSE [Snapshot]: Found valid snapshots {count=1, latest=12345} -[2025-01-06T08:05:24.125Z] VERBOSE [ElementSearch]: Looking for element {id=B99, snapshotId=12345} +[2025-01-06T08:05:24.125Z] VERBOSE [ElementSearch]: Looking for element {id=, snapshotId=12345} [2025-01-06T08:05:24.126Z] VERBOSE [ElementSearch]: Loading snapshot map from cache -[2025-01-06T08:05:24.127Z] ERROR [ElementSearch]: Element not found in snapshot {id=B99, availableIds=[B1,B2,B3,T1,T2]} +[2025-01-06T08:05:24.127Z] ERROR [ElementSearch]: Element not found in snapshot {id=} ``` ### Performance Analysis diff --git a/docs/testing/tools.md b/docs/testing/tools.md index 958d2d9b9..aee29ed5c 100644 --- a/docs/testing/tools.md +++ b/docs/testing/tools.md @@ -253,7 +253,7 @@ The following subsections spell out the concrete steps, required Playground surf - **Log capture**: `./Apps/Playground/scripts/playground-log.sh -c Click --last 10m --all -o "$LOG_ROOT/click-$(date +%s).log"`. - **Test cases**: 1. Query-based click: `polter peekaboo -- click "Single Click"` (expect `Click` log + counter increment). - 2. ID-based click: `polter peekaboo -- click --on B1 --snapshot ` targeting `single-click-button`. + 2. ID-based click: copy the opaque ID from current `see` output, then run `polter peekaboo -- click --on "$ELEMENT_ID" --snapshot ` targeting `single-click-button`. 3. Coordinate click: `polter peekaboo -- click --coords 400,400 --foreground` hitting the nested area. 4. Coordinate validation: `polter peekaboo -- click --coords , --json-output` should fail with `VALIDATION_ERROR` (no crash). 5. Error path: attempt to click disabled button and confirm descriptive `elementNotFound` guidance. @@ -264,7 +264,7 @@ The following subsections spell out the concrete steps, required Playground surf - `polter peekaboo -- click "Single Click" --snapshot ` succeeded but targeted Ghostty (click hit terminal input); highlighting importance of focusing Playground first. - `polter peekaboo -- app switch --to Playground` followed by `polter peekaboo -- click --on elem_6 --snapshot 263F8CD6-...` successfully hit the “View Logs” button (Playground log recorded the click). - Coordinate click `--coords 600,500` succeeded (see log); attempting `--on elem_disabled` produced expected `elementNotFound` error. - - IDs like `B1` are not stable in this build; rely on `elem_*` IDs from the `see` output. + - Element IDs are opaque and unstable; always copy the exact ID from current `see` output. - **2025-12-17 Controls Fixture add-on**: - Open “Controls Fixture” via `⌘⌃3`, then drive checkboxes + segmented control by clicking snapshot IDs (`--on elem_…`) captured from `see`. - **Important**: ControlsView is scrollable; after any `scroll`, re-run `see` before clicking elements further down (otherwise snapshot coordinates can be stale). diff --git a/docs/visualizer.md b/docs/visualizer.md index 711640c38..9c65a3c87 100644 --- a/docs/visualizer.md +++ b/docs/visualizer.md @@ -190,7 +190,7 @@ Peekaboo.app still respects user-facing toggles via `PeekabooSettings`; the coor ### Element Detection (See) 👁️ - **Effect**: All detected elements briefly highlight -- **Style**: Colored overlays with IDs (B1, T1, etc.) +- **Style**: Colored overlays labeled with opaque element IDs - **Animation**: Fade in with slight scale - **Duration**: 2 seconds before fade diff --git a/scripts/docs-lint.mjs b/scripts/docs-lint.mjs index ac9a14d29..b29a351eb 100644 --- a/scripts/docs-lint.mjs +++ b/scripts/docs-lint.mjs @@ -19,6 +19,8 @@ const staleCliPatterns = [ [/--label\b/, 'use positional query text or `--on`'], [/--at\b/, 'use `--coords`'], [/--ticks\b/, 'use `--amount`'], + [/--(?:on|from|to)\s+[`"']?[BTMS]\d+\b/, 'use an opaque element ID copied from current output'], + [/element IDs?[^\n]*[`"']?[BTMS]\d+\b/i, 'describe element IDs as opaque'], ]; const staleDocsPatterns = [ [/mcp-capture-meta/i, 'remove stale native MCP capture metadata references'], diff --git a/skills/peekaboo/SKILL.md b/skills/peekaboo/SKILL.md index 260d4a62b..76da7ff7c 100644 --- a/skills/peekaboo/SKILL.md +++ b/skills/peekaboo/SKILL.md @@ -151,7 +151,7 @@ ruby -e 'h=File.read("skills/peekaboo/SKILL.md").split(/^---\s*$/,3)[1]; keys=h. ! rg -n '^allowed-tools:' skills/peekaboo/SKILL.md pnpm run build:cli BIN="$(swift build --package-path Apps/CLI --show-bin-path)/peekaboo"; "$BIN" --version -"$BIN" click --help | rg -- '--foreground|--focus-background|--input-strategy|B1, T2' +"$BIN" click --help | rg -- '--foreground|--focus-background|--input-strategy|Opaque element ID' "$BIN" see --help | rg -- '--json|--annotate|--app|--no-web-focus' "$BIN" inspect-ui --help | rg 'inspect_ui|--app-target|--snapshot|--json' git diff --check -- skills/peekaboo/SKILL.md docs/agent-skill.md docs/commands/see.md docs/automation.md scripts/docs-lint.mjs