Files
claude-code-sourcemap/restored-src/node_modules/@ant/computer-use-mcp/src/mcpServer.ts

314 lines
12 KiB
TypeScript

/**
* MCP server factory + session-context binder.
*
* Two entry points:
*
* `bindSessionContext` — the wrapper closure. Takes a `ComputerUseSessionContext`
* (getters + callbacks backed by host session state), returns a dispatcher.
* Reusable by both the MCP CallTool handler here AND Cowork's
* `InternalServerDefinition.handleToolCall` (which doesn't go through MCP).
* This replaces the duplicated wrapper closures in apps/desktop/…/serverDef.ts
* and the Claude Code CLI's CU host wrapper — both did the same thing: build `ComputerUseOverrides`
* fresh from getters, call `handleToolCall`, stash screenshot, merge permissions.
*
* `createComputerUseMcpServer` — the Server object. When `context` is provided,
* the CallTool handler is real (uses `bindSessionContext`). When not, it's the
* legacy stub that returns a not-wired error. The tool-schema ListTools handler
* is the same either way.
*/
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
import {
CallToolRequestSchema,
ListToolsRequestSchema,
} from "@modelcontextprotocol/sdk/types.js";
import type { ScreenshotResult } from "./executor.js";
import type { CuCallToolResult } from "./toolCalls.js";
import {
defersLockAcquire,
handleToolCall,
resetMouseButtonHeld,
} from "./toolCalls.js";
import { buildComputerUseTools } from "./tools.js";
import type {
AppGrant,
ComputerUseHostAdapter,
ComputerUseOverrides,
ComputerUseSessionContext,
CoordinateMode,
CuGrantFlags,
CuPermissionResponse,
} from "./types.js";
import { DEFAULT_GRANT_FLAGS } from "./types.js";
const DEFAULT_LOCK_HELD_MESSAGE =
"Another Claude session is currently using the computer. Wait for that " +
"session to finish, or find a non-computer-use approach.";
/**
* Dedupe `granted` into `existing` on bundleId, spread truthy-only flags over
* defaults+existing. Truthy-only: a subsequent `request_access` that doesn't
* request clipboard can't revoke an earlier clipboard grant — revocation lives
* in a Settings page, not here.
*
* Same merge both hosts implemented independently today.
*/
function mergePermissionResponse(
existing: readonly AppGrant[],
existingFlags: CuGrantFlags,
response: CuPermissionResponse,
): { apps: AppGrant[]; flags: CuGrantFlags } {
const seen = new Set(existing.map((a) => a.bundleId));
const apps = [
...existing,
...response.granted.filter((g) => !seen.has(g.bundleId)),
];
const truthyFlags = Object.fromEntries(
Object.entries(response.flags).filter(([, v]) => v === true),
);
const flags: CuGrantFlags = {
...DEFAULT_GRANT_FLAGS,
...existingFlags,
...truthyFlags,
};
return { apps, flags };
}
/**
* Bind session state to a reusable dispatcher. The returned function is the
* wrapper closure: async lock gate → build overrides fresh → `handleToolCall`
* → stash screenshot → strip piggybacked fields.
*
* The last-screenshot blob is held in a closure cell here (not on `ctx`), so
* hosts don't need to guarantee `ctx` object identity across calls — they just
* need to hold onto the returned dispatcher. Cowork caches per
* `InternalServerContext` in a WeakMap; the CLI host constructs once at server creation.
*/
export function bindSessionContext(
adapter: ComputerUseHostAdapter,
coordinateMode: CoordinateMode,
ctx: ComputerUseSessionContext,
): (name: string, args: unknown) => Promise<CuCallToolResult> {
const { logger, serverName } = adapter;
// Screenshot blob persists here across calls — NOT on `ctx`. Hosts hold
// onto the returned dispatcher; that's the identity that matters.
let lastScreenshot: ScreenshotResult | undefined;
const wrapPermission = ctx.onPermissionRequest
? async (
req: Parameters<NonNullable<typeof ctx.onPermissionRequest>>[0],
signal: AbortSignal,
): Promise<CuPermissionResponse> => {
const response = await ctx.onPermissionRequest!(req, signal);
const { apps, flags } = mergePermissionResponse(
ctx.getAllowedApps(),
ctx.getGrantFlags(),
response,
);
logger.debug(
`[${serverName}] permission result: granted=${response.granted.length} denied=${response.denied.length}`,
);
ctx.onAllowedAppsChanged?.(apps, flags);
return response;
}
: undefined;
const wrapTeachPermission = ctx.onTeachPermissionRequest
? async (
req: Parameters<NonNullable<typeof ctx.onTeachPermissionRequest>>[0],
signal: AbortSignal,
): Promise<CuPermissionResponse> => {
const response = await ctx.onTeachPermissionRequest!(req, signal);
logger.debug(
`[${serverName}] teach permission result: granted=${response.granted.length} denied=${response.denied.length}`,
);
// Teach doesn't request grant flags — preserve existing.
const { apps } = mergePermissionResponse(
ctx.getAllowedApps(),
ctx.getGrantFlags(),
response,
);
ctx.onAllowedAppsChanged?.(apps, {
...DEFAULT_GRANT_FLAGS,
...ctx.getGrantFlags(),
});
return response;
}
: undefined;
return async (name, args) => {
// ─── Async lock gate ─────────────────────────────────────────────────
// Replaces the sync Gate-3 in `handleToolCall` — we pass
// `checkCuLock: undefined` below so it no-ops. Hosts with
// cross-process locks (O_EXCL file) await the real primitive here
// instead of pre-computing + feeding a fake sync result.
if (ctx.checkCuLock) {
const lock = await ctx.checkCuLock();
if (lock.holder !== undefined && !lock.isSelf) {
const text =
ctx.formatLockHeldMessage?.(lock.holder) ?? DEFAULT_LOCK_HELD_MESSAGE;
return {
content: [{ type: "text", text }],
isError: true,
telemetry: { error_kind: "cu_lock_held" },
};
}
if (lock.holder === undefined && !defersLockAcquire(name)) {
await ctx.acquireCuLock?.();
// Re-check: the awaits above yield the microtask queue, so another
// session's check+acquire can interleave with ours. Hosts where
// acquire is a no-op when already held (Cowork's CuLockManager) give
// no signal that we lost — verify we're now the holder before
// proceeding. The CLI's O_EXCL file lock would surface this as a throw from
// acquire instead; this re-check is a belt-and-suspenders for that
// path too.
const recheck = await ctx.checkCuLock();
if (recheck.holder !== undefined && !recheck.isSelf) {
const text =
ctx.formatLockHeldMessage?.(recheck.holder) ??
DEFAULT_LOCK_HELD_MESSAGE;
return {
content: [{ type: "text", text }],
isError: true,
telemetry: { error_kind: "cu_lock_held" },
};
}
// Fresh holder → any prior session's mouseButtonHeld is stale.
// Mirrors what Gate-3 does on the acquire branch. After the
// re-check so we only clear module state when we actually won.
resetMouseButtonHeld();
}
}
// ─── Build overrides fresh ───────────────────────────────────────────
// Blob-first; dims-fallback with base64:"" when the closure cell is
// unset (cross-respawn). scaleCoord reads dims; pixelCompare sees "" →
// isEmpty → skip.
const dimsFallback = lastScreenshot
? undefined
: ctx.getLastScreenshotDims?.();
// Per-call AbortController for dialog dismissal. Aborted in `finally` —
// if handleToolCall finishes (MCP timeout, throw) before the user
// answers, the host's dialog handler sees the abort and tears down.
const dialogAbort = new AbortController();
const overrides: ComputerUseOverrides = {
allowedApps: [...ctx.getAllowedApps()],
grantFlags: ctx.getGrantFlags(),
userDeniedBundleIds: ctx.getUserDeniedBundleIds(),
coordinateMode,
selectedDisplayId: ctx.getSelectedDisplayId(),
displayPinnedByModel: ctx.getDisplayPinnedByModel?.(),
displayResolvedForApps: ctx.getDisplayResolvedForApps?.(),
lastScreenshot:
lastScreenshot ??
(dimsFallback ? { ...dimsFallback, base64: "" } : undefined),
onPermissionRequest: wrapPermission
? (req) => wrapPermission(req, dialogAbort.signal)
: undefined,
onTeachPermissionRequest: wrapTeachPermission
? (req) => wrapTeachPermission(req, dialogAbort.signal)
: undefined,
onAppsHidden: ctx.onAppsHidden,
getClipboardStash: ctx.getClipboardStash,
onClipboardStashChanged: ctx.onClipboardStashChanged,
onResolvedDisplayUpdated: ctx.onResolvedDisplayUpdated,
onDisplayPinned: ctx.onDisplayPinned,
onDisplayResolvedForApps: ctx.onDisplayResolvedForApps,
onTeachModeActivated: ctx.onTeachModeActivated,
onTeachStep: ctx.onTeachStep,
onTeachWorking: ctx.onTeachWorking,
getTeachModeActive: ctx.getTeachModeActive,
// Undefined → handleToolCall's sync Gate-3 no-ops. The async gate
// above already ran.
checkCuLock: undefined,
acquireCuLock: undefined,
isAborted: ctx.isAborted,
};
logger.debug(
`[${serverName}] tool=${name} allowedApps=${overrides.allowedApps.length} coordMode=${coordinateMode}`,
);
// ─── Dispatch ────────────────────────────────────────────────────────
try {
const result = await handleToolCall(adapter, name, args, overrides);
if (result.screenshot) {
lastScreenshot = result.screenshot;
const { base64: _blob, ...dims } = result.screenshot;
logger.debug(`[${serverName}] screenshot dims: ${JSON.stringify(dims)}`);
ctx.onScreenshotCaptured?.(dims);
}
return result;
} finally {
dialogAbort.abort();
}
};
}
export function createComputerUseMcpServer(
adapter: ComputerUseHostAdapter,
coordinateMode: CoordinateMode,
context?: ComputerUseSessionContext,
): Server {
const { serverName, logger } = adapter;
const server = new Server(
{ name: serverName, version: "0.1.3" },
{ capabilities: { tools: {}, logging: {} } },
);
const tools = buildComputerUseTools(
adapter.executor.capabilities,
coordinateMode,
);
server.setRequestHandler(ListToolsRequestSchema, async () =>
adapter.isDisabled() ? { tools: [] } : { tools },
);
if (context) {
const dispatch = bindSessionContext(adapter, coordinateMode, context);
server.setRequestHandler(
CallToolRequestSchema,
async (request): Promise<CallToolResult> => {
const { screenshot: _s, telemetry: _t, ...result } = await dispatch(
request.params.name,
request.params.arguments ?? {},
);
return result;
},
);
return server;
}
// Legacy: no context → stub handler. Reached only if something calls the
// server over MCP transport WITHOUT going through a binder (a wiring
// regression). Clear error instead of silent failure.
server.setRequestHandler(
CallToolRequestSchema,
async (request): Promise<CallToolResult> => {
logger.warn(
`[${serverName}] tool call "${request.params.name}" reached the stub handler — no session context bound. Per-session state unavailable.`,
);
return {
content: [
{
type: "text",
text: "This computer-use server instance is not wired to a session. Per-session app permissions are not available on this code path.",
},
],
isError: true,
};
},
);
return server;
}